#include "qemu/thread.h"
#include "qemu/atomic.h"
#include "qemu/notify.h"
-#include "trace.h"
+#include "qemu-thread-common.h"
+#include "qemu/tsan.h"
+#include "qemu/bitmap.h"
+
+#ifdef CONFIG_PTHREAD_SET_NAME_NP
+#include <pthread_np.h>
+#endif
static bool name_threads;
{
name_threads = enable;
-#ifndef CONFIG_THREAD_SETNAME_BYTHREAD
+#if !defined CONFIG_PTHREAD_SETNAME_NP_W_TID && \
+ !defined CONFIG_PTHREAD_SETNAME_NP_WO_TID && \
+ !defined CONFIG_PTHREAD_SET_NAME_NP
/* This is a debugging option, not fatal */
if (enable) {
fprintf(stderr, "qemu: thread naming not supported on this host\n");
abort();
}
+static inline clockid_t qemu_timedwait_clockid(void)
+{
+#ifdef CONFIG_PTHREAD_CONDATTR_SETCLOCK
+ return CLOCK_MONOTONIC;
+#else
+ return CLOCK_REALTIME;
+#endif
+}
+
+static void compute_abs_deadline(struct timespec *ts, int ms)
+{
+ clock_gettime(qemu_timedwait_clockid(), ts);
+ ts->tv_nsec += (ms % 1000) * 1000000;
+ ts->tv_sec += ms / 1000;
+ if (ts->tv_nsec >= 1000000000) {
+ ts->tv_sec++;
+ ts->tv_nsec -= 1000000000;
+ }
+}
+
void qemu_mutex_init(QemuMutex *mutex)
{
int err;
err = pthread_mutex_init(&mutex->lock, NULL);
if (err)
error_exit(err, __func__);
- mutex->initialized = true;
+ qemu_mutex_post_init(mutex);
}
void qemu_mutex_destroy(QemuMutex *mutex)
int err;
assert(mutex->initialized);
- trace_qemu_mutex_lock(mutex, file, line);
-
+ qemu_mutex_pre_lock(mutex, file, line);
err = pthread_mutex_lock(&mutex->lock);
if (err)
error_exit(err, __func__);
-
- trace_qemu_mutex_locked(mutex, file, line);
+ qemu_mutex_post_lock(mutex, file, line);
}
int qemu_mutex_trylock_impl(QemuMutex *mutex, const char *file, const int line)
assert(mutex->initialized);
err = pthread_mutex_trylock(&mutex->lock);
if (err == 0) {
- trace_qemu_mutex_locked(mutex, file, line);
+ qemu_mutex_post_lock(mutex, file, line);
return 0;
}
if (err != EBUSY) {
int err;
assert(mutex->initialized);
+ qemu_mutex_pre_unlock(mutex, file, line);
err = pthread_mutex_unlock(&mutex->lock);
if (err)
error_exit(err, __func__);
-
- trace_qemu_mutex_unlock(mutex, file, line);
}
void qemu_rec_mutex_init(QemuRecMutex *mutex)
pthread_mutexattr_init(&attr);
pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
- err = pthread_mutex_init(&mutex->lock, &attr);
+ err = pthread_mutex_init(&mutex->m.lock, &attr);
pthread_mutexattr_destroy(&attr);
if (err) {
error_exit(err, __func__);
}
- mutex->initialized = true;
+ mutex->m.initialized = true;
+}
+
+void qemu_rec_mutex_destroy(QemuRecMutex *mutex)
+{
+ qemu_mutex_destroy(&mutex->m);
+}
+
+void qemu_rec_mutex_lock_impl(QemuRecMutex *mutex, const char *file, int line)
+{
+ qemu_mutex_lock_impl(&mutex->m, file, line);
+}
+
+int qemu_rec_mutex_trylock_impl(QemuRecMutex *mutex, const char *file, int line)
+{
+ return qemu_mutex_trylock_impl(&mutex->m, file, line);
+}
+
+void qemu_rec_mutex_unlock_impl(QemuRecMutex *mutex, const char *file, int line)
+{
+ qemu_mutex_unlock_impl(&mutex->m, file, line);
}
void qemu_cond_init(QemuCond *cond)
{
+ pthread_condattr_t attr;
int err;
- err = pthread_cond_init(&cond->cond, NULL);
- if (err)
+ err = pthread_condattr_init(&attr);
+ if (err) {
+ error_exit(err, __func__);
+ }
+#ifdef CONFIG_PTHREAD_CONDATTR_SETCLOCK
+ err = pthread_condattr_setclock(&attr, qemu_timedwait_clockid());
+ if (err) {
+ error_exit(err, __func__);
+ }
+#endif
+ err = pthread_cond_init(&cond->cond, &attr);
+ if (err) {
error_exit(err, __func__);
+ }
+ err = pthread_condattr_destroy(&attr);
+ if (err) {
+ error_exit(err, __func__);
+ }
cond->initialized = true;
}
int err;
assert(cond->initialized);
- trace_qemu_mutex_unlock(mutex, file, line);
+ qemu_mutex_pre_unlock(mutex, file, line);
err = pthread_cond_wait(&cond->cond, &mutex->lock);
- trace_qemu_mutex_locked(mutex, file, line);
+ qemu_mutex_post_lock(mutex, file, line);
if (err)
error_exit(err, __func__);
}
-void qemu_sem_init(QemuSemaphore *sem, int init)
+static bool TSA_NO_TSA
+qemu_cond_timedwait_ts(QemuCond *cond, QemuMutex *mutex, struct timespec *ts,
+ const char *file, const int line)
{
- int rc;
+ int err;
-#ifndef CONFIG_SEM_TIMEDWAIT
- rc = pthread_mutex_init(&sem->lock, NULL);
- if (rc != 0) {
- error_exit(rc, __func__);
- }
- rc = pthread_cond_init(&sem->cond, NULL);
- if (rc != 0) {
- error_exit(rc, __func__);
+ assert(cond->initialized);
+ trace_qemu_mutex_unlock(mutex, file, line);
+ err = pthread_cond_timedwait(&cond->cond, &mutex->lock, ts);
+ trace_qemu_mutex_locked(mutex, file, line);
+ if (err && err != ETIMEDOUT) {
+ error_exit(err, __func__);
}
+ return err != ETIMEDOUT;
+}
+
+bool qemu_cond_timedwait_impl(QemuCond *cond, QemuMutex *mutex, int ms,
+ const char *file, const int line)
+{
+ struct timespec ts;
+
+ compute_abs_deadline(&ts, ms);
+ return qemu_cond_timedwait_ts(cond, mutex, &ts, file, line);
+}
+
+void qemu_sem_init(QemuSemaphore *sem, int init)
+{
+ qemu_mutex_init(&sem->mutex);
+ qemu_cond_init(&sem->cond);
+
if (init < 0) {
error_exit(EINVAL, __func__);
}
sem->count = init;
-#else
- rc = sem_init(&sem->sem, 0, init);
- if (rc < 0) {
- error_exit(errno, __func__);
- }
-#endif
- sem->initialized = true;
}
void qemu_sem_destroy(QemuSemaphore *sem)
{
- int rc;
-
- assert(sem->initialized);
- sem->initialized = false;
-#ifndef CONFIG_SEM_TIMEDWAIT
- rc = pthread_cond_destroy(&sem->cond);
- if (rc < 0) {
- error_exit(rc, __func__);
- }
- rc = pthread_mutex_destroy(&sem->lock);
- if (rc < 0) {
- error_exit(rc, __func__);
- }
-#else
- rc = sem_destroy(&sem->sem);
- if (rc < 0) {
- error_exit(errno, __func__);
- }
-#endif
+ qemu_cond_destroy(&sem->cond);
+ qemu_mutex_destroy(&sem->mutex);
}
void qemu_sem_post(QemuSemaphore *sem)
{
- int rc;
-
- assert(sem->initialized);
-#ifndef CONFIG_SEM_TIMEDWAIT
- pthread_mutex_lock(&sem->lock);
+ qemu_mutex_lock(&sem->mutex);
if (sem->count == UINT_MAX) {
- rc = EINVAL;
+ error_exit(EINVAL, __func__);
} else {
sem->count++;
- rc = pthread_cond_signal(&sem->cond);
- }
- pthread_mutex_unlock(&sem->lock);
- if (rc != 0) {
- error_exit(rc, __func__);
- }
-#else
- rc = sem_post(&sem->sem);
- if (rc < 0) {
- error_exit(errno, __func__);
- }
-#endif
-}
-
-static void compute_abs_deadline(struct timespec *ts, int ms)
-{
- struct timeval tv;
- gettimeofday(&tv, NULL);
- ts->tv_nsec = tv.tv_usec * 1000 + (ms % 1000) * 1000000;
- ts->tv_sec = tv.tv_sec + ms / 1000;
- if (ts->tv_nsec >= 1000000000) {
- ts->tv_sec++;
- ts->tv_nsec -= 1000000000;
+ qemu_cond_signal(&sem->cond);
}
+ qemu_mutex_unlock(&sem->mutex);
}
int qemu_sem_timedwait(QemuSemaphore *sem, int ms)
{
- int rc;
+ bool rc = true;
struct timespec ts;
- assert(sem->initialized);
-#ifndef CONFIG_SEM_TIMEDWAIT
- rc = 0;
compute_abs_deadline(&ts, ms);
- pthread_mutex_lock(&sem->lock);
+ qemu_mutex_lock(&sem->mutex);
while (sem->count == 0) {
- rc = pthread_cond_timedwait(&sem->cond, &sem->lock, &ts);
- if (rc == ETIMEDOUT) {
- break;
+ if (ms == 0) {
+ rc = false;
+ } else {
+ rc = qemu_cond_timedwait_ts(&sem->cond, &sem->mutex, &ts,
+ __FILE__, __LINE__);
}
- if (rc != 0) {
- error_exit(rc, __func__);
+ if (!rc) { /* timeout */
+ break;
}
}
- if (rc != ETIMEDOUT) {
+ if (rc) {
--sem->count;
}
- pthread_mutex_unlock(&sem->lock);
- return (rc == ETIMEDOUT ? -1 : 0);
-#else
- if (ms <= 0) {
- /* This is cheaper than sem_timedwait. */
- do {
- rc = sem_trywait(&sem->sem);
- } while (rc == -1 && errno == EINTR);
- if (rc == -1 && errno == EAGAIN) {
- return -1;
- }
- } else {
- compute_abs_deadline(&ts, ms);
- do {
- rc = sem_timedwait(&sem->sem, &ts);
- } while (rc == -1 && errno == EINTR);
- if (rc == -1 && errno == ETIMEDOUT) {
- return -1;
- }
- }
- if (rc < 0) {
- error_exit(errno, __func__);
- }
- return 0;
-#endif
+ qemu_mutex_unlock(&sem->mutex);
+ return (rc ? 0 : -1);
}
void qemu_sem_wait(QemuSemaphore *sem)
{
- int rc;
-
- assert(sem->initialized);
-#ifndef CONFIG_SEM_TIMEDWAIT
- pthread_mutex_lock(&sem->lock);
+ qemu_mutex_lock(&sem->mutex);
while (sem->count == 0) {
- rc = pthread_cond_wait(&sem->cond, &sem->lock);
- if (rc != 0) {
- error_exit(rc, __func__);
- }
+ qemu_cond_wait(&sem->cond, &sem->mutex);
}
--sem->count;
- pthread_mutex_unlock(&sem->lock);
-#else
- do {
- rc = sem_wait(&sem->sem);
- } while (rc == -1 && errno == EINTR);
- if (rc < 0) {
- error_exit(errno, __func__);
- }
-#endif
+ qemu_mutex_unlock(&sem->mutex);
}
#ifdef __linux__
void qemu_event_set(QemuEvent *ev)
{
- /* qemu_event_set has release semantics, but because it *loads*
+ assert(ev->initialized);
+
+ /*
+ * Pairs with both qemu_event_reset() and qemu_event_wait().
+ *
+ * qemu_event_set has release semantics, but because it *loads*
* ev->value we need a full memory barrier here.
*/
- assert(ev->initialized);
smp_mb();
- if (atomic_read(&ev->value) != EV_SET) {
- if (atomic_xchg(&ev->value, EV_SET) == EV_BUSY) {
+ if (qatomic_read(&ev->value) != EV_SET) {
+ int old = qatomic_xchg(&ev->value, EV_SET);
+
+ /* Pairs with memory barrier in kernel futex_wait system call. */
+ smp_mb__after_rmw();
+ if (old == EV_BUSY) {
/* There were waiters, wake them up. */
qemu_futex_wake(ev, INT_MAX);
}
void qemu_event_reset(QemuEvent *ev)
{
- unsigned value;
-
assert(ev->initialized);
- value = atomic_read(&ev->value);
- smp_mb_acquire();
- if (value == EV_SET) {
- /*
- * If there was a concurrent reset (or even reset+wait),
- * do nothing. Otherwise change EV_SET->EV_FREE.
- */
- atomic_or(&ev->value, EV_FREE);
- }
+
+ /*
+ * If there was a concurrent reset (or even reset+wait),
+ * do nothing. Otherwise change EV_SET->EV_FREE.
+ */
+ qatomic_or(&ev->value, EV_FREE);
+
+ /*
+ * Order reset before checking the condition in the caller.
+ * Pairs with the first memory barrier in qemu_event_set().
+ */
+ smp_mb__after_rmw();
}
void qemu_event_wait(QemuEvent *ev)
unsigned value;
assert(ev->initialized);
- value = atomic_read(&ev->value);
- smp_mb_acquire();
+
+ /*
+ * qemu_event_wait must synchronize with qemu_event_set even if it does
+ * not go down the slow path, so this load-acquire is needed that
+ * synchronizes with the first memory barrier in qemu_event_set().
+ *
+ * If we do go down the slow path, there is no requirement at all: we
+ * might miss a qemu_event_set() here but ultimately the memory barrier in
+ * qemu_futex_wait() will ensure the check is done correctly.
+ */
+ value = qatomic_load_acquire(&ev->value);
if (value != EV_SET) {
if (value == EV_FREE) {
/*
- * Leave the event reset and tell qemu_event_set that there
- * are waiters. No need to retry, because there cannot be
- * a concurrent busy->free transition. After the CAS, the
- * event will be either set or busy.
+ * Leave the event reset and tell qemu_event_set that there are
+ * waiters. No need to retry, because there cannot be a concurrent
+ * busy->free transition. After the CAS, the event will be either
+ * set or busy.
+ *
+ * This cmpxchg doesn't have particular ordering requirements if it
+ * succeeds (moving the store earlier can only cause qemu_event_set()
+ * to issue _more_ wakeups), the failing case needs acquire semantics
+ * like the load above.
*/
- if (atomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
+ if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
return;
}
}
+
+ /*
+ * This is the final check for a concurrent set, so it does need
+ * a smp_mb() pairing with the second barrier of qemu_event_set().
+ * The barrier is inside the FUTEX_WAIT system call.
+ */
qemu_futex_wait(ev, EV_BUSY);
}
}
-static pthread_key_t exit_key;
-
-union NotifierThreadData {
- void *ptr;
- NotifierList list;
-};
-QEMU_BUILD_BUG_ON(sizeof(union NotifierThreadData) != sizeof(void *));
+static __thread NotifierList thread_exit;
+/*
+ * Note that in this implementation you can register a thread-exit
+ * notifier for the main thread, but it will never be called.
+ * This is OK because main thread exit can only happen when the
+ * entire process is exiting, and the API allows notifiers to not
+ * be called on process exit.
+ */
void qemu_thread_atexit_add(Notifier *notifier)
{
- union NotifierThreadData ntd;
- ntd.ptr = pthread_getspecific(exit_key);
- notifier_list_add(&ntd.list, notifier);
- pthread_setspecific(exit_key, ntd.ptr);
+ notifier_list_add(&thread_exit, notifier);
}
void qemu_thread_atexit_remove(Notifier *notifier)
{
- union NotifierThreadData ntd;
- ntd.ptr = pthread_getspecific(exit_key);
notifier_remove(notifier);
- pthread_setspecific(exit_key, ntd.ptr);
-}
-
-static void qemu_thread_atexit_run(void *arg)
-{
- union NotifierThreadData ntd = { .ptr = arg };
- notifier_list_notify(&ntd.list, NULL);
}
-static void __attribute__((constructor)) qemu_thread_atexit_init(void)
+static void qemu_thread_atexit_notify(void *arg)
{
- pthread_key_create(&exit_key, qemu_thread_atexit_run);
+ /*
+ * Called when non-main thread exits (via qemu_thread_exit()
+ * or by returning from its start routine.)
+ */
+ notifier_list_notify(&thread_exit, NULL);
}
-
-#ifdef CONFIG_PTHREAD_SETNAME_NP
typedef struct {
void *(*start_routine)(void *);
void *arg;
QemuThreadArgs *qemu_thread_args = args;
void *(*start_routine)(void *) = qemu_thread_args->start_routine;
void *arg = qemu_thread_args->arg;
+ void *r;
/* Attempt to set the threads name; note that this is for debug, so
* we're not going to fail if we can't set it.
*/
- pthread_setname_np(pthread_self(), qemu_thread_args->name);
+ if (name_threads && qemu_thread_args->name) {
+# if defined(CONFIG_PTHREAD_SETNAME_NP_W_TID)
+ pthread_setname_np(pthread_self(), qemu_thread_args->name);
+# elif defined(CONFIG_PTHREAD_SETNAME_NP_WO_TID)
+ pthread_setname_np(qemu_thread_args->name);
+# elif defined(CONFIG_PTHREAD_SET_NAME_NP)
+ pthread_set_name_np(pthread_self(), qemu_thread_args->name);
+# endif
+ }
+ QEMU_TSAN_ANNOTATE_THREAD_NAME(qemu_thread_args->name);
g_free(qemu_thread_args->name);
g_free(qemu_thread_args);
- return start_routine(arg);
-}
+
+ /*
+ * GCC 11 with glibc 2.17 on PowerPC reports
+ *
+ * qemu-thread-posix.c:540:5: error: ‘__sigsetjmp’ accessing 656 bytes
+ * in a region of size 528 [-Werror=stringop-overflow=]
+ * 540 | pthread_cleanup_push(qemu_thread_atexit_notify, NULL);
+ * | ^~~~~~~~~~~~~~~~~~~~
+ *
+ * which is clearly nonsense.
+ */
+#pragma GCC diagnostic push
+#ifndef __clang__
+#pragma GCC diagnostic ignored "-Wstringop-overflow"
#endif
+ pthread_cleanup_push(qemu_thread_atexit_notify, NULL);
+ r = start_routine(arg);
+ pthread_cleanup_pop(1);
+
+#pragma GCC diagnostic pop
+
+ return r;
+}
void qemu_thread_create(QemuThread *thread, const char *name,
void *(*start_routine)(void*),
sigset_t set, oldset;
int err;
pthread_attr_t attr;
+ QemuThreadArgs *qemu_thread_args;
err = pthread_attr_init(&attr);
if (err) {
/* Leave signal handling to the iothread. */
sigfillset(&set);
+ /* Blocking the signals can result in undefined behaviour. */
+ sigdelset(&set, SIGSEGV);
+ sigdelset(&set, SIGFPE);
+ sigdelset(&set, SIGILL);
+ /* TODO avoid SIGBUS loss on macOS */
pthread_sigmask(SIG_SETMASK, &set, &oldset);
-#ifdef CONFIG_PTHREAD_SETNAME_NP
- if (name_threads) {
- QemuThreadArgs *qemu_thread_args;
- qemu_thread_args = g_new0(QemuThreadArgs, 1);
- qemu_thread_args->name = g_strdup(name);
- qemu_thread_args->start_routine = start_routine;
- qemu_thread_args->arg = arg;
-
- err = pthread_create(&thread->thread, &attr,
- qemu_thread_start, qemu_thread_args);
- } else
-#endif
- {
- err = pthread_create(&thread->thread, &attr,
- start_routine, arg);
- }
+ qemu_thread_args = g_new0(QemuThreadArgs, 1);
+ qemu_thread_args->name = g_strdup(name);
+ qemu_thread_args->start_routine = start_routine;
+ qemu_thread_args->arg = arg;
+
+ err = pthread_create(&thread->thread, &attr,
+ qemu_thread_start, qemu_thread_args);
if (err)
error_exit(err, __func__);
pthread_attr_destroy(&attr);
}
+int qemu_thread_set_affinity(QemuThread *thread, unsigned long *host_cpus,
+ unsigned long nbits)
+{
+#if defined(CONFIG_PTHREAD_AFFINITY_NP)
+ const size_t setsize = CPU_ALLOC_SIZE(nbits);
+ unsigned long value;
+ cpu_set_t *cpuset;
+ int err;
+
+ cpuset = CPU_ALLOC(nbits);
+ g_assert(cpuset);
+
+ CPU_ZERO_S(setsize, cpuset);
+ value = find_first_bit(host_cpus, nbits);
+ while (value < nbits) {
+ CPU_SET_S(value, setsize, cpuset);
+ value = find_next_bit(host_cpus, nbits, value + 1);
+ }
+
+ err = pthread_setaffinity_np(thread->thread, setsize, cpuset);
+ CPU_FREE(cpuset);
+ return err;
+#else
+ return -ENOSYS;
+#endif
+}
+
+int qemu_thread_get_affinity(QemuThread *thread, unsigned long **host_cpus,
+ unsigned long *nbits)
+{
+#if defined(CONFIG_PTHREAD_AFFINITY_NP)
+ unsigned long tmpbits;
+ cpu_set_t *cpuset;
+ size_t setsize;
+ int i, err;
+
+ tmpbits = CPU_SETSIZE;
+ while (true) {
+ setsize = CPU_ALLOC_SIZE(tmpbits);
+ cpuset = CPU_ALLOC(tmpbits);
+ g_assert(cpuset);
+
+ err = pthread_getaffinity_np(thread->thread, setsize, cpuset);
+ if (err) {
+ CPU_FREE(cpuset);
+ if (err != -EINVAL) {
+ return err;
+ }
+ tmpbits *= 2;
+ } else {
+ break;
+ }
+ }
+
+ /* Convert the result into a proper bitmap. */
+ *nbits = tmpbits;
+ *host_cpus = bitmap_new(tmpbits);
+ for (i = 0; i < tmpbits; i++) {
+ if (CPU_ISSET(i, cpuset)) {
+ set_bit(i, *host_cpus);
+ }
+ }
+ CPU_FREE(cpuset);
+ return 0;
+#else
+ return -ENOSYS;
+#endif
+}
+
void qemu_thread_get_self(QemuThread *thread)
{
thread->thread = pthread_self();