#if 0 /*─────────────────────────────────────────────────────────────────╗ │ To the extent possible under law, Justine Tunney has waived │ │ all copyright and related or neighboring rights to this file, │ │ as it is written in the following disclaimers: │ │ • http://unlicense.org/ │ │ • http://creativecommons.org/publicdomain/zero/1.0/ │ ╚─────────────────────────────────────────────────────────────────*/ #endif #include #include #include #include #include #include #include #include // lockless push / pop tutorial // // this file demonstrates how to create a singly linked list that can be // pushed and popped across multiple threads, using only atomics. atomic // operations (rather than using a mutex) make push/pop go faster and it // ensures asynchronous signal safety too. therefore it will be safe for // use in a variety of contexts, such as signal handlers. #define THREADS 128 #define ITERATIONS 10000 // adjust mask based on alignment of list struct // // - 0x00fffffffffffff0 may be used if List* is always 16-byte aligned. // We know that's the case here, because we call malloc() to create // every List* object, and malloc() results are always max aligned. // // - 0x00fffffffffffff8 may be used if List* is always 8-byte aligned. // This might be the case if you're pushing and popping stuff that was // allocated from an array, to avoid malloc() calls. This has one // fewer byte of safeguards against the ABA problem though. // // - 0x00fffffffffff000 may be used if List* is always page aligned. // This is a good choice if you use mmap() to allocate each List* // element, since it offers maximum protection against ABA. // // - only the highest byte of a 64-bit pointer is safe to use on our // supported platforms. on most x86 and arm systems, it's possible to // use the top sixteen bits. however that's not the case on more // recent high end x86-64 systems that have pml5t. // #define MASQUE 0x00fffffffffffff0 #define PTR(x) ((uintptr_t)(x) & MASQUE) #define TAG(x) ROL((uintptr_t)(x) & ~MASQUE, 8) #define ABA(p, t) ((uintptr_t)(p) | (ROR((uintptr_t)(t), 8) & ~MASQUE)) #define ROL(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) #define ROR(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) struct List { struct List* next; int count; }; atomic_uintptr_t list; void push(struct List* elem) { uintptr_t tip; assert(!TAG(elem)); for (tip = atomic_load_explicit(&list, memory_order_relaxed);;) { elem->next = (struct List*)PTR(tip); if (atomic_compare_exchange_weak_explicit( &list, &tip, ABA(elem, TAG(tip) + 1), memory_order_release, memory_order_relaxed)) break; pthread_pause_np(); } } struct List* pop(void) { uintptr_t tip; struct List* elem; tip = atomic_load_explicit(&list, memory_order_relaxed); while ((elem = (struct List*)PTR(tip))) { if (atomic_compare_exchange_weak_explicit( &list, &tip, ABA(elem->next, TAG(tip) + 1), memory_order_acquire, memory_order_relaxed)) break; pthread_pause_np(); } return elem; } void* tester(void* arg) { struct List* elem; for (int i = 0; i < ITERATIONS; ++i) { while (!(elem = pop())) { elem = malloc(sizeof(*elem)); elem->count = 0; push(elem); } elem->count++; push(elem); } return 0; } int main() { printf("testing aba problem..."); fflush(stdout); pthread_t th[THREADS]; for (int i = 0; i < THREADS; ++i) pthread_create(&th[i], 0, tester, 0); for (int i = 0; i < THREADS; ++i) pthread_join(th[i], 0); int sum = 0; struct List* elem; while ((elem = pop())) { printf(" %d", elem->count); sum += elem->count; free(elem); } printf("\n"); assert(sum == ITERATIONS * THREADS); printf("you are the dancing queen\n"); CheckForMemoryLeaks(); }