mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-02-07 06:53:33 +00:00
This change introduces a new deadlock detector for Cosmo's POSIX threads implementation. Error check mutexes will now track a DAG of nested locks and report EDEADLK when a deadlock is theoretically possible. These will occur rarely, but it's important for production hardening your code. You don't even need to change your mutexes to use the POSIX error check mode because `cosmocc -mdbg` will enable error checking on mutexes by default globally. When cycles are found, an error message showing your demangled symbols describing the strongly connected component are printed and then the SIGTRAP is raised, which means you'll also get a backtrace if you're using ShowCrashReports() too. This new error checker is so low-level and so pure that it's able to verify the relationships of every libc runtime lock, including those locks upon which the mutex implementation depends.
389 lines
11 KiB
C
389 lines
11 KiB
C
#define USE POSIX
|
|
#define ITERATIONS 100000
|
|
#define THREADS 30
|
|
|
|
#define SPIN 1
|
|
#define FUTEX 2
|
|
#define FUTEX_SHARED 3
|
|
#define POSIX 4
|
|
#define POSIX_RECURSIVE 5
|
|
#define RWLOCK 6
|
|
#define RWLOCK_SHARED 7
|
|
|
|
#ifdef __COSMOPOLITAN__
|
|
#include <cosmo.h>
|
|
#endif
|
|
|
|
#include <assert.h>
|
|
#include <pthread.h>
|
|
#include <stdatomic.h>
|
|
#include <stdio.h>
|
|
#include <sys/resource.h>
|
|
#include <sys/time.h>
|
|
#include <time.h>
|
|
#include <unistd.h>
|
|
|
|
#ifdef __linux__
|
|
#include <linux/futex.h>
|
|
#include <sys/syscall.h>
|
|
static inline long nsync_futex_wait_(atomic_int *uaddr, int val, char pshare,
|
|
int clock,
|
|
const struct timespec *timeout) {
|
|
return syscall(SYS_futex, uaddr, pshare ? FUTEX_WAIT : FUTEX_WAIT_PRIVATE,
|
|
val, timeout, NULL, 0);
|
|
}
|
|
static inline long nsync_futex_wake_(atomic_int *uaddr, int num_to_wake,
|
|
char pshare) {
|
|
return syscall(SYS_futex, uaddr, pshare ? FUTEX_WAKE : FUTEX_WAKE_PRIVATE,
|
|
num_to_wake, NULL, NULL, 0);
|
|
}
|
|
#endif
|
|
|
|
// x86 fleet
|
|
// with spin lock
|
|
// 30 threads / 100000 iterations
|
|
//
|
|
// footek_test on Linux 6.8 AMD Ryzen Threadripper PRO 7995WX
|
|
// 1,570,224 us real
|
|
// 42,690,880 us user
|
|
// 1,999 us sys
|
|
//
|
|
// footek_test on rhel7.test. 423 µs 2'638 µs 912'241 µs
|
|
// 897,815 us real
|
|
// 1,763,705 us user
|
|
// 9,696 us sys
|
|
//
|
|
// footek_test on xnu.test. 98'468 µs 5'242 µs 5'191'724 µs
|
|
// 4,225,726 us real
|
|
// 16,679,456 us user
|
|
// 16,265 us sys
|
|
//
|
|
// footek_test on freebsd.test. 690 µs 3'011 µs 2'925'997 µs
|
|
// 2,916,033 us real
|
|
// 17,236,103 us user
|
|
// 0 us sys
|
|
//
|
|
// footek_test on netbsd.test. 1'151 µs 2'634 µs 1'014'867 µs
|
|
// 790,332 us real
|
|
// 2,359,967 us user
|
|
// 0 us sys
|
|
//
|
|
// footek_test on openbsd.test. 557 µs 3'020 µs 2'554'648 µs
|
|
// 2,332,724 us real
|
|
// 9,150,000 us user
|
|
// 10,000 us sys
|
|
//
|
|
// footek_test on win10.test. 962 µs 9'698 µs 2'751'905 µs
|
|
// 2,528,863 us real
|
|
// 56,546,875 us user
|
|
// 1,671,875 us sys
|
|
|
|
// x86 fleet
|
|
// with futexes
|
|
// 30 threads / 100000 iterations
|
|
//
|
|
// footek_test on Linux 6.8 AMD Ryzen Threadripper PRO 7995WX
|
|
// 100,746 us real
|
|
// 234,451 us user
|
|
// 2,638,333 us sys
|
|
//
|
|
// footek_test on rhel7.test. 376 µs 2'259 µs 153'024 µs
|
|
// 146,015 us real
|
|
// 169,427 us user
|
|
// 68,939 us sys
|
|
//
|
|
// footek_test on xnu.test. 11'143 µs 9'159 µs 164'865 µs
|
|
// 144,917 us real
|
|
// 383,317 us user
|
|
// 191,203 us sys
|
|
//
|
|
// footek_test on freebsd.test. 394 µs 2'165 µs 256'227 µs
|
|
// 244,286 us real
|
|
// 405,395 us user
|
|
// 956,122 us sys
|
|
//
|
|
// footek_test on netbsd.test. 502 µs 2'020 µs 261'895 µs
|
|
// 209,095 us real
|
|
// 616,634 us user
|
|
// 9,945 us sys
|
|
//
|
|
// footek_test on openbsd.test. 457 µs 2'737 µs 396'342 µs
|
|
// 344,876 us real
|
|
// 50,000 us user
|
|
// 1,240,000 us sys
|
|
//
|
|
// footek_test on win10.test. 462 µs 59'528 µs 1'348'265 µs
|
|
// 1,193,906 us real
|
|
// 17,546,875 us user
|
|
// 3,000,000 us sys
|
|
|
|
// x86 fleet
|
|
// with posix
|
|
// 30 threads / 100000 iterations
|
|
//
|
|
// footek_test on Linux 6.8 AMD Ryzen Threadripper PRO 7995WX (glibc)
|
|
// 111,560 us real
|
|
// 153,985 us user
|
|
// 2,988,121 us sys
|
|
//
|
|
// footek_test on Linux 6.8 AMD Ryzen Threadripper PRO 7995WX (musl)
|
|
// 392,765 us real
|
|
// 1,885,558 us user
|
|
// 9,667,865 us sys
|
|
//
|
|
// footek_test on Linux 6.8 AMD Ryzen Threadripper PRO 7995WX (cosmo)
|
|
// 40,965 us real
|
|
// 47,168 us user
|
|
// 25,398 us sys
|
|
//
|
|
// footek_test on rhel7.test. 683 µs 1'340 µs 105'977 µs
|
|
// 101,934 us real
|
|
// 104,771 us user
|
|
// 4,068 us sys
|
|
//
|
|
// footek_test on xnu.test. 2'054 µs 5'352 µs 210'306 µs
|
|
// 181,540 us real
|
|
// 216,236 us user
|
|
// 127,344 us sys
|
|
//
|
|
// footek_test on freebsd.test. (cosmo)
|
|
// 126,803 us real
|
|
// 3,100 us user
|
|
// 176,744 us sys
|
|
//
|
|
// footek_test on freebsd.test. (freebsd libc)
|
|
// 219,073 us real
|
|
// 158,103 us user
|
|
// 1,146,252 us sys
|
|
//
|
|
// footek_test on netbsd.test. 350 µs 3'570 µs 262'186 µs
|
|
// 199,882 us real
|
|
// 138,178 us user
|
|
// 329,501 us sys
|
|
//
|
|
// footek_test on openbsd.test. (cosmo)
|
|
// 138,619 us real
|
|
// 30,000 us user
|
|
// 110,000 us sys
|
|
//
|
|
// footek_test on openbsd.test. (openbsd libc)
|
|
// 385,431 us real
|
|
// 80,000 us user
|
|
// 1,350,000 us sys
|
|
//
|
|
// footek_test on win10.test. (cosmo)
|
|
// 156,382 us real
|
|
// 312,500 us user
|
|
// 31,250 us sys
|
|
//
|
|
// footek_test on win10.test. (cygwin)
|
|
// 9,334,610 us real
|
|
// 1,562,000 us user
|
|
// 6,093,000 us sys
|
|
|
|
// arm fleet
|
|
// with spin lock
|
|
// 30 threads / 100000 iterations
|
|
//
|
|
// footek_test on studio.test. 961 µs 12'907 µs 1'287'983 µs
|
|
// 1,282,084 us real
|
|
// 29,359,582 us user
|
|
// 34,553 us sys
|
|
//
|
|
// footek_test on pi.test. 459 µs 16'376 µs 4'095'512 µs
|
|
// 4,070,988 us real
|
|
// 16,203,990 us user
|
|
// 7,999 us sys
|
|
//
|
|
// footek_test on freebsdarm.test. 502 µs 16'446 µs 7'051'545 µs
|
|
// 7,012,493 us real
|
|
// 27,936,725 us user
|
|
// 7,871 us sys
|
|
|
|
// arm fleet
|
|
// with futexes
|
|
// 30 threads / 100000 iterations
|
|
//
|
|
// footek_test on studio.test. 585 µs 13'597 µs 57'473 µs
|
|
// 46,481 us real
|
|
// 68,745 us user
|
|
// 586,871 us sys
|
|
//
|
|
// footek_test on pi5.test. 335 µs 13'034 µs 432'358 µs
|
|
// 389,619 us real
|
|
// 839,848 us user
|
|
// 679,112 us sys
|
|
//
|
|
// footek_test on pi.test. 479 µs 16'539 µs 476'395 µs
|
|
// 463,799 us real
|
|
// 1,259,267 us user
|
|
// 547,681 us sys
|
|
//
|
|
// footek_test on freebsdarm.test. 364 µs 16'898 µs 1'288'594 µs
|
|
// 1,256,134 us real
|
|
// 3,770,473 us user
|
|
// 1,214,755 us sys
|
|
|
|
// arm fleet
|
|
// with posix
|
|
// 30 threads / 100000 iterations
|
|
//
|
|
// footek_test on Apple M2 Ultra (Apple Libc)
|
|
// 45,443 us real
|
|
// 30,201 us user
|
|
// 864,650 us sys
|
|
//
|
|
// footek_test on Apple M2 Ultra (Cosmo Libc)
|
|
// 65,118 us real
|
|
// 77,891 us user
|
|
// 1,023,575 us sys
|
|
//
|
|
// footek_test on pi5.test. 407 µs 12'661 µs 198'133 µs
|
|
// 152,791 us real
|
|
// 143,678 us user
|
|
// 14,736 us sys
|
|
//
|
|
// footek_test on studio.test. 463 µs 13'286 µs 234'742 µs
|
|
// 227,916 us real
|
|
// 294,162 us user
|
|
// 155,062 us sys
|
|
//
|
|
// footek_test on pi.test. 374 µs 15'720 µs 249'245 µs
|
|
// 233,504 us real
|
|
// 301,072 us user
|
|
// 187,153 us sys
|
|
//
|
|
// footek_test on freebsdarm.test. 328 µs 16'614 µs 918'647 µs
|
|
// 877,124 us real
|
|
// 1,377,338 us user
|
|
// 798,230 us sys
|
|
|
|
#define MUTEX_LOCKED(word) ((word) & 8)
|
|
#define MUTEX_WAITING(word) ((word) & 16)
|
|
|
|
#define MUTEX_LOCK(word) ((word) | 8)
|
|
#define MUTEX_SET_WAITING(word) ((word) | 16)
|
|
#define MUTEX_UNLOCK(word) ((word) & ~(8 | 16))
|
|
|
|
void lock(atomic_int *futex) {
|
|
int word, cs;
|
|
for (int i = 0; i < 4; ++i) {
|
|
word = 0;
|
|
if (atomic_compare_exchange_strong_explicit(
|
|
futex, &word, 1, memory_order_acquire, memory_order_acquire))
|
|
return;
|
|
}
|
|
if (word == 1)
|
|
word = atomic_exchange_explicit(futex, 2, memory_order_acquire);
|
|
while (word > 0) {
|
|
pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
|
|
#if USE == FUTEX || USE == FUTEX_SHARED
|
|
cosmo_futex_wait(
|
|
futex, 2,
|
|
USE == FUTEX_SHARED ? PTHREAD_PROCESS_SHARED : PTHREAD_PROCESS_PRIVATE,
|
|
0, 0);
|
|
#else
|
|
pthread_yield_np();
|
|
#endif
|
|
pthread_setcancelstate(cs, 0);
|
|
word = atomic_exchange_explicit(futex, 2, memory_order_acquire);
|
|
}
|
|
}
|
|
|
|
void unlock(atomic_int *futex) {
|
|
int word = atomic_fetch_sub_explicit(futex, 1, memory_order_release);
|
|
if (word == 2) {
|
|
atomic_store_explicit(futex, 0, memory_order_release);
|
|
#if USE == FUTEX || USE == FUTEX_SHARED
|
|
cosmo_futex_wake(
|
|
futex, 1,
|
|
USE == FUTEX_SHARED ? PTHREAD_PROCESS_SHARED : PTHREAD_PROCESS_PRIVATE);
|
|
#endif
|
|
}
|
|
}
|
|
|
|
int g_chores;
|
|
atomic_int g_lock;
|
|
pthread_mutex_t g_locker;
|
|
pthread_rwlock_t g_rwlocker;
|
|
|
|
void *worker(void *arg) {
|
|
for (int i = 0; i < ITERATIONS; ++i) {
|
|
#if USE == POSIX || USE == POSIX_RECURSIVE
|
|
pthread_mutex_lock(&g_locker);
|
|
++g_chores;
|
|
pthread_mutex_unlock(&g_locker);
|
|
#elif USE == RWLOCK || USE == RWLOCK_SHARED
|
|
pthread_rwlock_wrlock(&g_rwlocker);
|
|
++g_chores;
|
|
pthread_rwlock_unlock(&g_rwlocker);
|
|
#else
|
|
lock(&g_lock);
|
|
++g_chores;
|
|
unlock(&g_lock);
|
|
#endif
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
struct timeval tub(struct timeval a, struct timeval b) {
|
|
a.tv_sec -= b.tv_sec;
|
|
if (a.tv_usec < b.tv_usec) {
|
|
a.tv_usec += 1000000;
|
|
a.tv_sec--;
|
|
}
|
|
a.tv_usec -= b.tv_usec;
|
|
return a;
|
|
}
|
|
|
|
long tomicros(struct timeval x) {
|
|
return x.tv_sec * 1000000ul + x.tv_usec;
|
|
}
|
|
|
|
int main() {
|
|
struct timeval start;
|
|
gettimeofday(&start, 0);
|
|
|
|
pthread_mutexattr_t attr;
|
|
pthread_mutexattr_init(&attr);
|
|
#if USE == POSIX_RECURSIVE
|
|
pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
|
|
#else
|
|
pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_DEFAULT);
|
|
#endif
|
|
pthread_mutex_init(&g_locker, &attr);
|
|
pthread_mutexattr_destroy(&attr);
|
|
|
|
pthread_rwlockattr_t rwattr;
|
|
pthread_rwlockattr_init(&rwattr);
|
|
#if USE == RWLOCK_SHARED
|
|
pthread_rwlockattr_setpshared(&rwattr, PTHREAD_PROCESS_SHARED);
|
|
#endif
|
|
pthread_rwlock_init(&g_rwlocker, &rwattr);
|
|
pthread_rwlockattr_destroy(&rwattr);
|
|
|
|
pthread_t th[THREADS];
|
|
for (int i = 0; i < THREADS; ++i)
|
|
pthread_create(&th[i], 0, worker, 0);
|
|
for (int i = 0; i < THREADS; ++i)
|
|
pthread_join(th[i], 0);
|
|
assert(g_chores == THREADS * ITERATIONS);
|
|
|
|
struct rusage ru;
|
|
struct timeval end;
|
|
gettimeofday(&end, 0);
|
|
getrusage(RUSAGE_SELF, &ru);
|
|
printf("%16ld us real\n"
|
|
"%16ld us user\n"
|
|
"%16ld us sys\n",
|
|
tomicros(tub(end, start)), //
|
|
tomicros(ru.ru_utime), //
|
|
tomicros(ru.ru_stime));
|
|
|
|
pthread_rwlock_destroy(&g_rwlocker);
|
|
pthread_mutex_destroy(&g_locker);
|
|
|
|
#ifdef __COSMOPOLITAN__
|
|
CheckForMemoryLeaks();
|
|
#endif
|
|
}
|