diff --git a/libc/calls/semaphore.internal.h b/libc/calls/semaphore.internal.h
new file mode 100644
index 000000000..d4c595ae9
--- /dev/null
+++ b/libc/calls/semaphore.internal.h
@@ -0,0 +1,25 @@
+#ifndef COSMOPOLITAN_LIBC_CALLS_SEMAPHORE_H_
+#define COSMOPOLITAN_LIBC_CALLS_SEMAPHORE_H_
+#if !(__ASSEMBLER__ + __LINKER__ + 0)
+COSMOPOLITAN_C_START_
+
+#define SEM_FAILED ((sem_t *)0)
+
+typedef struct {
+  volatile int __val[4 * sizeof(long) / sizeof(int)];
+} sem_t;
+
+int sem_close(sem_t *);
+int sem_destroy(sem_t *);
+int sem_getvalue(sem_t *, int *);
+int sem_init(sem_t *, int, unsigned);
+sem_t *sem_open(const char *, int, ...);
+int sem_post(sem_t *);
+int sem_timedwait(sem_t *, const struct timespec *);
+int sem_trywait(sem_t *);
+int sem_unlink(const char *);
+int sem_wait(sem_t *);
+
+COSMOPOLITAN_C_END_
+#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
+#endif /* COSMOPOLITAN_LIBC_CALLS_SEMAPHORE_H_ */
diff --git a/libc/fmt/conv.h b/libc/fmt/conv.h
index 630cbac7b..71fb95067 100644
--- a/libc/fmt/conv.h
+++ b/libc/fmt/conv.h
@@ -118,7 +118,8 @@ imaxdiv_t imaxdiv(intmax_t, intmax_t) pureconst;
 #define lldiv(num, den) ((lldiv_t){(num) / (den), (num) % (den)})
 #endif
 
-#if __GNUC__ * 100 + __GNUC_MINOR__ >= 406 || defined(__llvm__)
+#if (__GNUC__ * 100 + __GNUC_MINOR__ >= 406 || defined(__llvm__)) && \
+    !defined(__STRICT_ANSI__)
 int128_t i128abs(int128_t) libcesque pureconst;
 int128_t strtoi128(const char *, char **, int) paramsnonnull((1));
 uint128_t strtou128(const char *, char **, int) paramsnonnull((1));
diff --git a/libc/integral/c.inc b/libc/integral/c.inc
index 68bcd7090..cfc14baf5 100644
--- a/libc/integral/c.inc
+++ b/libc/integral/c.inc
@@ -123,7 +123,9 @@ typedef __UINT64_TYPE__ uint64_t;
 typedef __INTMAX_TYPE__ intmax_t;
 typedef __UINTMAX_TYPE__ uintmax_t;
 
-#if (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 406 || defined(__llvm__)
+#if ((__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 406 || \
+     defined(__llvm__)) &&                                 \
+    !defined(__STRICT_ANSI__)
 typedef signed __int128 int128_t;
 typedef unsigned __int128 uint128_t;
 #endif
diff --git a/libc/isystem/linux/futex.h b/libc/isystem/linux/futex.h
new file mode 100644
index 000000000..b07fc1454
--- /dev/null
+++ b/libc/isystem/linux/futex.h
@@ -0,0 +1,5 @@
+#ifndef COSMOPOLITAN_LIBC_ISYSTEM_LINUX_FUTEX_H_
+#define COSMOPOLITAN_LIBC_ISYSTEM_LINUX_FUTEX_H_
+#include "libc/sysv/consts/futex.h"
+#include "libc/sysv/consts/nr.h"
+#endif /* COSMOPOLITAN_LIBC_ISYSTEM_LINUX_FUTEX_H_ */
diff --git a/libc/isystem/semaphore.h b/libc/isystem/semaphore.h
new file mode 100644
index 000000000..a672765e1
--- /dev/null
+++ b/libc/isystem/semaphore.h
@@ -0,0 +1,4 @@
+#ifndef COSMOPOLITAN_LIBC_ISYSTEM_SEMAPHORE_H_
+#define COSMOPOLITAN_LIBC_ISYSTEM_SEMAPHORE_H_
+#include "libc/calls/semaphore.internal.h"
+#endif /* COSMOPOLITAN_LIBC_ISYSTEM_SEMAPHORE_H_ */
diff --git a/libc/isystem/time.h b/libc/isystem/time.h
index 89449f24c..64a6b69fa 100644
--- a/libc/isystem/time.h
+++ b/libc/isystem/time.h
@@ -3,6 +3,7 @@
 #include "libc/calls/struct/timespec.h"
 #include "libc/calls/struct/timeval.h"
 #include "libc/calls/weirdtypes.h"
+#include "libc/sysv/consts/clock.h"
 #include "libc/sysv/consts/sched.h"
 #include "libc/time/struct/tm.h"
 #include "libc/time/time.h"
diff --git a/libc/thread/atomic.h b/libc/thread/atomic.h
new file mode 100644
index 000000000..c1219f280
--- /dev/null
+++ b/libc/thread/atomic.h
@@ -0,0 +1,103 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#ifndef NSYNC_PLATFORM_C11_ATOMIC_H_
+#define NSYNC_PLATFORM_C11_ATOMIC_H_
+
+/* Atomic operations on nsync_atomic_uint32_ quantities
+   CAS, load, and store.
+
+   Normally, these are used only on nsync_atomic_uint32_ values, but on Linux they may be
+   invoked on int values, because futexes operate on int values.  A
+   compile-time check in the futex code ensures that both int and   
+   nsync_atomic_uint32_ are 32 bits.
+
+   Memory barriers:
+	   Operations with the suffixes _ACQ and _RELACQ ensure that the operation
+	   appears to complete before other memory operations subsequently performed by
+	   the same thread, as seen by other threads.  (In the case of ATM_CAS_ACQ,
+	   this applies only if the operation returns a non-zero value.)
+
+	   Operations with the suffixes _REL and _RELACQ ensure that the operation
+	   appears to complete after other memory operations previously performed by
+	   the same thread, as seen by other threads.  (In the case of ATM_CAS_REL,
+	   this applies only if the operation returns a non-zero value.)
+
+   // Atomically,
+   //   int ATM_CAS (nsync_atomic_uint32_ *p, uint32_t old_value, uint32_t new_value) {
+   //		if (*p == old_value) {
+   //			*p = new_value;
+   //			return (some-non-zero-value);
+   //		} else {
+   //			return (0);
+   //		}
+   //	}
+   // *_ACQ, *_REL, *_RELACQ variants are available,
+   // with the barrier semantics described above.
+   int ATM_CAS (nsync_atomic_uint32_ *p, uint32_t old_value, uint32_t new_value);
+
+   // Atomically,
+   //     uint32_t ATM_LOAD (nsync_atomic_uint32_ *p) { return (*p); }
+   // A *_ACQ variant is available,
+   // with the barrier semantics described above.
+   uint32_t ATM_LOAD (nsync_atomic_uint32_ *p);
+
+   // Atomically,
+   //     void ATM_STORE (nsync_atomic_uint32_ *p, uint32_t value) { *p = value; }
+   // A *_REL variant is available,
+   // with the barrier semantics described above.
+   void ATM_STORE (nsync_atomic_uint32_ *p, uint32_t value);
+ */
+
+#include "libc/thread/compiler.h"
+#include "libc/intrin/atomic.h"
+#include "libc/thread/nsync_atomic.h"
+
+NSYNC_CPP_START_
+
+static __inline__ int atm_cas_nomb_u32_ (nsync_atomic_uint32_ *p, uint32_t o, uint32_t n) {
+	return (atomic_compare_exchange_strong_explicit (NSYNC_ATOMIC_UINT32_PTR_ (p), &o, n,
+					     memory_order_relaxed, memory_order_relaxed));
+}
+static __inline__ int atm_cas_acq_u32_ (nsync_atomic_uint32_ *p, uint32_t o, uint32_t n) {
+	return (atomic_compare_exchange_strong_explicit (NSYNC_ATOMIC_UINT32_PTR_ (p), &o, n,
+					     memory_order_acquire, memory_order_relaxed));
+}
+static __inline__ int atm_cas_rel_u32_ (nsync_atomic_uint32_ *p, uint32_t o, uint32_t n) {
+	return (atomic_compare_exchange_strong_explicit (NSYNC_ATOMIC_UINT32_PTR_ (p), &o, n,
+					     memory_order_release, memory_order_relaxed));
+}
+static __inline__ int atm_cas_relacq_u32_ (nsync_atomic_uint32_ *p, uint32_t o, uint32_t n) {
+	return (atomic_compare_exchange_strong_explicit (NSYNC_ATOMIC_UINT32_PTR_ (p), &o, n,
+					     memory_order_acq_rel, memory_order_relaxed));
+}
+
+#define ATM_CAS_HELPER_(barrier, p, o, n) (atm_cas_##barrier##_u32_ ((p), (o), (n)))
+
+#define ATM_CAS(p,o,n)           ATM_CAS_HELPER_ (nomb,   (p), (o), (n))
+#define ATM_CAS_ACQ(p,o,n)       ATM_CAS_HELPER_ (acq,    (p), (o), (n))
+#define ATM_CAS_REL(p,o,n)       ATM_CAS_HELPER_ (rel,    (p), (o), (n))
+#define ATM_CAS_RELACQ(p,o,n)    ATM_CAS_HELPER_ (relacq, (p), (o), (n))
+
+/* Need a cast to remove "const" from some uses. */
+#define ATM_LOAD(p)         (atomic_load_explicit ((nsync_atomic_uint32_ *) NSYNC_ATOMIC_UINT32_PTR_ (p), memory_order_relaxed))
+#define ATM_LOAD_ACQ(p)     (atomic_load_explicit ((nsync_atomic_uint32_ *) NSYNC_ATOMIC_UINT32_PTR_ (p), memory_order_acquire))
+
+#define ATM_STORE(p,v)      (atomic_store_explicit (NSYNC_ATOMIC_UINT32_PTR_ (p), (v), memory_order_relaxed))
+#define ATM_STORE_REL(p,v)  (atomic_store_explicit (NSYNC_ATOMIC_UINT32_PTR_ (p), (v), memory_order_release))
+
+NSYNC_CPP_END_
+
+#endif /*NSYNC_PLATFORM_C11_ATOMIC_H_*/
diff --git a/libc/thread/common.c b/libc/thread/common.c
new file mode 100644
index 000000000..a96b028b2
--- /dev/null
+++ b/libc/thread/common.c
@@ -0,0 +1,279 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+/* This package provides a mutex nsync_mu and a Mesa-style condition variable nsync_cv. */
+
+#include "libc/thread/nsync_cpp.h"
+#include "libc/thread/platform.h"
+#include "libc/thread/compiler.h"
+#include "libc/thread/cputype.h"
+#include "libc/thread/nsync.h"
+#include "libc/thread/atomic.h"
+#include "libc/thread/sem.h"
+#include "libc/thread/dll.h"
+#include "libc/thread/wait_internal.h"
+#include "libc/thread/common.h"
+
+NSYNC_CPP_START_
+
+/* Implementation notes
+
+   The implementations of nsync_mu and nsync_cv both use spinlocks to protect
+   their waiter queues.  The spinlocks are implemented with atomic operations
+   and a delay loop found below.  They could use pthread_mutex_t, but I wished
+   to have an implementation independent of pthread mutexes and condition
+   variables.
+
+   nsync_mu and nsync_cv use the same type of doubly-linked list of waiters
+   (see waiter.c).  This allows waiters to be transferred from the cv queue to
+   the mu queue when a thread is logically woken from the cv but would
+   immediately go to sleep on the mu.  See the wake_waiters() call.
+
+   In mu, the "designated waker" is a thread that was waiting on mu, has been
+   woken up, but as yet has neither acquired nor gone back to waiting.  The
+   presence of such a thread is indicated by the MU_DESIG_WAKER bit in the mu
+   word.  This bit allows the nsync_mu_unlock() code to avoid waking a second
+   waiter when there's already one that will wake the next thread when the time
+   comes.  This speeds things up when the lock is heavily contended, and the
+   critical sections are small.
+
+   The weasel words "with high probability" in the specification of
+   nsync_mu_trylock() and nsync_mu_rtrylock() prevent clients from believing
+   that they can determine with certainty whether another thread has given up a
+   lock yet.  This, together with the requirement that a thread that acquired a
+   mutex must release it (rather than it being released by another thread),
+   prohibits clients from using mu as a sort of semaphore.  The intent is that
+   it be used only for traditional mutual exclusion, and that clients that need
+   a semaphore should use one.  This leaves room for certain future
+   optimizations, and make it easier to apply detection of potential races via
+   candidate lock-set algorithms, should that ever be desired.
+
+   The nsync_mu_wait_with_deadline() and nsync_mu_wait_with_deadline() calls use an
+   absolute rather than a relative timeout.  This is less error prone, as
+   described in the comment on nsync_cv_wait_with_deadline().  Alas, relative
+   timeouts are seductive in trivial examples (such as tests).  These are the
+   first things that people try, so they are likely to be requested.  If enough
+   people complain we could give them that particular piece of rope.
+
+   Excessive evaluations of the same wait condition are avoided by maintaining
+   waiter.same_condition as a doubly-linked list of waiters with the same
+   non-NULL wait condition that are also adjacent in the waiter list.  This does
+   well even with large numbers of threads if there is at most one
+   wait condition that can be false at any given time (such as in a
+   producer/consumer queue, which cannot be both empty and full
+   simultaneously).  One could imagine a queueing mechanism that would
+   guarantee to evaluate each condition at most once per wakeup, but that would
+   be substantially more complex, and would still degrade if the number of
+   distinct wakeup conditions were high.  So clients are advised to resort to
+   condition variables if they have many distinct wakeup conditions. */
+
+/* Used in spinloops to delay resumption of the loop.
+   Usage:
+       unsigned attempts = 0;
+       while (try_something) {
+	  attempts = nsync_spin_delay_ (attempts);
+       } */
+unsigned nsync_spin_delay_ (unsigned attempts) {
+	if (attempts < 7) {
+		volatile int i;
+		for (i = 0; i != 1 << attempts; i++) {
+		}
+		attempts++;
+	} else {
+		sched_yield ();
+	}
+	return (attempts);
+}
+
+/* Spin until (*w & test) == 0, then atomically perform *w = ((*w | set) &
+   ~clear), perform an acquire barrier, and return the previous value of *w.
+   */
+uint32_t nsync_spin_test_and_set_ (nsync_atomic_uint32_ *w, uint32_t test,
+				   uint32_t set, uint32_t clear) {
+	unsigned attempts = 0; /* CV_SPINLOCK retry count */
+	uint32_t old = ATM_LOAD (w);
+	while ((old & test) != 0 || !ATM_CAS_ACQ (w, old, (old | set) & ~clear)) {
+		attempts = nsync_spin_delay_ (attempts);
+		old = ATM_LOAD (w);
+	}
+	return (old);
+}
+
+/* ====================================================================================== */
+
+struct nsync_waiter_s *nsync_dll_nsync_waiter_ (nsync_dll_element_ *e) {
+	struct nsync_waiter_s *nw = (struct nsync_waiter_s *) e->container;
+	ASSERT (nw->tag == NSYNC_WAITER_TAG);
+	ASSERT (e == &nw->q);
+	return (nw);
+}
+waiter *nsync_dll_waiter_ (nsync_dll_element_ *e) {
+	struct nsync_waiter_s *nw = DLL_NSYNC_WAITER (e);
+	waiter *w = CONTAINER (waiter, nw, nw);
+	ASSERT ((nw->flags & NSYNC_WAITER_FLAG_MUCV) != 0);
+	ASSERT (w->tag == WAITER_TAG);
+	ASSERT (e == &w->nw.q);
+	return (w);
+}
+
+waiter *nsync_dll_waiter_samecond_ (nsync_dll_element_ *e) {
+	waiter *w = (waiter *) e->container;
+	ASSERT (w->tag == WAITER_TAG);
+	ASSERT (e == &w->same_condition);
+	return (w);
+}
+
+/* -------------------------------- */
+
+static nsync_dll_list_ free_waiters = NULL;
+
+/* free_waiters points to a doubly-linked list of free waiter structs. */
+static nsync_atomic_uint32_ free_waiters_mu; /* spinlock; protects free_waiters */
+
+static THREAD_LOCAL waiter *waiter_for_thread;
+static void waiter_destroy (void *v) {
+	waiter *w = (waiter *) v;
+	/* Reset waiter_for_thread in case another thread-local variable reuses
+	   the waiter in its destructor while the waiter is taken by the other
+	   thread from free_waiters. This can happen as the destruction order
+	   of thread-local variables can be arbitrary in some platform e.g.
+	   POSIX.  */
+	waiter_for_thread = NULL;
+	IGNORE_RACES_START ();
+	ASSERT ((w->flags & (WAITER_RESERVED|WAITER_IN_USE)) == WAITER_RESERVED);
+	w->flags &= ~WAITER_RESERVED;
+	nsync_spin_test_and_set_ (&free_waiters_mu, 1, 1, 0);
+	free_waiters = nsync_dll_make_first_in_list_ (free_waiters, &w->nw.q);
+	ATM_STORE_REL (&free_waiters_mu, 0); /* release store */
+	IGNORE_RACES_END ();
+}
+
+/* If non-nil, nsync_malloc_ptr_ points to a malloc-like routine that allocated
+   memory, used by mutex and condition variable code to allocate waiter
+   structs.  This would allow nsync's mutexes to be used inside an
+   implementation of malloc(), by providing another, simpler allocator here.
+   The intent is that the implicit NULL value here can be overridden by a
+   client declaration that uses an initializer.  */
+void *(*nsync_malloc_ptr_) (size_t size);
+
+// [jart] give our leak detector a helping hand
+static void release_waiters(void) {
+        waiter *w;
+	nsync_dll_element_ *q;
+        nsync_spin_test_and_set_ (&free_waiters_mu, 1, 1, 0);
+        for (;;) {
+                q = nsync_dll_first_ (free_waiters);
+                if (q != NULL) { /* If free list is non-empty, dequeue an item. */
+                        free_waiters = nsync_dll_remove_ (free_waiters, q);
+                        w = DLL_WAITER (q);
+                        free (w);
+                } else {
+                        break;
+                }
+        }
+        ATM_STORE_REL (&free_waiters_mu, 0); /* release store */
+}
+
+__attribute__((__constructor__)) static void init(void) {
+        atexit (release_waiters);
+}
+
+/* Return a pointer to an unused waiter struct.
+   Ensures that the enclosed timer is stopped and its channel drained. */
+waiter *nsync_waiter_new_ (void) {
+	nsync_dll_element_ *q;
+	waiter *tw;
+	waiter *w;
+	if (HAVE_THREAD_LOCAL) {
+		tw = waiter_for_thread;
+	} else {
+		tw = (waiter *) nsync_per_thread_waiter_ (&waiter_destroy);
+	}
+	w = tw;
+	if (w == NULL || (w->flags & (WAITER_RESERVED|WAITER_IN_USE)) != WAITER_RESERVED) {
+		w = NULL;
+		nsync_spin_test_and_set_ (&free_waiters_mu, 1, 1, 0);
+		q = nsync_dll_first_ (free_waiters);
+		if (q != NULL) { /* If free list is non-empty, dequeue an item. */
+			free_waiters = nsync_dll_remove_ (free_waiters, q);
+			w = DLL_WAITER (q);
+		}
+		ATM_STORE_REL (&free_waiters_mu, 0); /* release store */
+		if (w == NULL) { /* If free list was empty, allocate an item. */
+			if (nsync_malloc_ptr_ != NULL) { /* Use client's malloc() */
+				w = (waiter *) (*nsync_malloc_ptr_) (sizeof (*w));
+			} else {  /* standard malloc () */
+				w = (waiter *) malloc (sizeof (*w));
+			}
+			w->tag = WAITER_TAG;
+			w->nw.tag = NSYNC_WAITER_TAG;
+			nsync_mu_semaphore_init (&w->sem);
+			w->nw.sem = &w->sem;
+			nsync_dll_init_ (&w->nw.q, &w->nw);
+			NSYNC_ATOMIC_UINT32_STORE_ (&w->nw.waiting, 0);
+			w->nw.flags = NSYNC_WAITER_FLAG_MUCV;
+			ATM_STORE (&w->remove_count, 0);
+			nsync_dll_init_ (&w->same_condition, w);
+			w->flags = 0;
+		}
+		if (tw == NULL) {
+			w->flags |= WAITER_RESERVED;
+			nsync_set_per_thread_waiter_ (w, &waiter_destroy);
+			if (HAVE_THREAD_LOCAL) {
+				waiter_for_thread = w;
+			}
+		}
+	}
+	w->flags |= WAITER_IN_USE;
+	return (w);
+}
+
+/* Return an unused waiter struct *w to the free pool. */
+void nsync_waiter_free_ (waiter *w) {
+	ASSERT ((w->flags & WAITER_IN_USE) != 0);
+	w->flags &= ~WAITER_IN_USE;
+	if ((w->flags & WAITER_RESERVED) == 0) {
+		nsync_spin_test_and_set_ (&free_waiters_mu, 1, 1, 0);
+		free_waiters = nsync_dll_make_first_in_list_ (free_waiters, &w->nw.q);
+		ATM_STORE_REL (&free_waiters_mu, 0); /* release store */
+	}
+}
+
+/* ====================================================================================== */
+
+/* writer_type points to a lock_type that describes how to manipulate a mu for a writer. */
+static lock_type Xwriter_type = {
+	MU_WZERO_TO_ACQUIRE,
+	MU_WADD_TO_ACQUIRE,
+	MU_WHELD_IF_NON_ZERO,
+	MU_WSET_WHEN_WAITING,
+	MU_WCLEAR_ON_ACQUIRE,
+	MU_WCLEAR_ON_UNCONTENDED_RELEASE
+};
+lock_type *nsync_writer_type_ = &Xwriter_type;
+
+
+/* reader_type points to a lock_type that describes how to manipulate a mu for a reader. */
+static lock_type Xreader_type = {
+	MU_RZERO_TO_ACQUIRE,
+	MU_RADD_TO_ACQUIRE,
+	MU_RHELD_IF_NON_ZERO,
+	MU_RSET_WHEN_WAITING,
+	MU_RCLEAR_ON_ACQUIRE,
+	MU_RCLEAR_ON_UNCONTENDED_RELEASE
+};
+lock_type *nsync_reader_type_ = &Xreader_type;
+
+NSYNC_CPP_END_
diff --git a/libc/thread/common.h b/libc/thread/common.h
new file mode 100644
index 000000000..1c672f54c
--- /dev/null
+++ b/libc/thread/common.h
@@ -0,0 +1,293 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#ifndef NSYNC_INTERNAL_COMMON_H_
+#define NSYNC_INTERNAL_COMMON_H_
+
+#include "libc/thread/nsync_cpp.h"
+#include "libc/thread/platform.h"
+#include "libc/thread/nsync_atomic.h"
+#include "libc/thread/sem.h"
+#include "libc/thread/nsync_waiter.h"
+#include "libc/thread/dll.h"
+#include "libc/thread/nsync_mu.h"
+#include "libc/thread/nsync_cv.h"
+#include "libc/thread/nsync_note.h"
+#include "libc/thread/wait_internal.h"
+
+/* Annotations for race detectors. */
+#if defined(__has_feature) && !defined(__SANITIZE_THREAD__)
+#if __has_feature(thread_sanitizer)  /* used by clang */
+#define __SANITIZE_THREAD__ 1 /* GCC uses this; fake it in clang */
+#endif
+#endif
+#if defined(__SANITIZE_THREAD__)
+NSYNC_C_START_
+void AnnotateIgnoreWritesBegin(const char* file, int line);
+void AnnotateIgnoreWritesEnd(const char* file, int line);
+void AnnotateIgnoreReadsBegin(const char* file, int line);
+void AnnotateIgnoreReadsEnd(const char* file, int line);
+NSYNC_C_END_
+#define IGNORE_RACES_START() \
+	do { \
+		AnnotateIgnoreReadsBegin(__FILE__, __LINE__); \
+		AnnotateIgnoreWritesBegin(__FILE__, __LINE__); \
+	} while (0)
+#define IGNORE_RACES_END() \
+	do { \
+		AnnotateIgnoreWritesEnd(__FILE__, __LINE__); \
+		AnnotateIgnoreReadsEnd(__FILE__, __LINE__); \
+	} while (0)
+#else
+#define IGNORE_RACES_START()
+#define IGNORE_RACES_END()
+#endif
+
+#ifndef NSYNC_DEBUG
+#define NSYNC_DEBUG 0
+#endif
+
+NSYNC_CPP_START_
+
+/* Yield the CPU. Platform specific. */
+void nsync_yield_ (void);
+
+/* Retrieve the per-thread cache of the waiter object.  Platform specific. */
+void *nsync_per_thread_waiter_ (void (*dest) (void *));
+
+/* Set the per-thread cache of the waiter object.  Platform specific. */
+void nsync_set_per_thread_waiter_ (void *v, void (*dest) (void *));
+
+/* Used in spinloops to delay resumption of the loop.
+   Usage:
+       unsigned attempts = 0;
+       while (try_something) {
+	  attempts = nsync_spin_delay_ (attempts);
+       } */
+unsigned nsync_spin_delay_ (unsigned attempts);
+
+/* Spin until (*w & test) == 0, then atomically perform *w = ((*w | set) &
+   ~clear), perform an acquire barrier, and return the previous value of *w.
+   */
+uint32_t nsync_spin_test_and_set_ (nsync_atomic_uint32_ *w, uint32_t test,
+				   uint32_t set, uint32_t clear);
+
+/* Abort after printing the nul-temrinated string s[]. */
+void nsync_panic_ (const char *s);
+
+/* ---------- */
+
+#define MIN_(a_,b_) ((a_) < (b_)? (a_) : (b_))
+#define MAX_(a_,b_) ((a_) > (b_)? (a_) : (b_))
+
+/* ---------- */
+
+/* Fields in nsync_mu.word.
+
+   - At least one of the MU_WLOCK or MU_RLOCK_FIELD fields must be zero.
+   - MU_WLOCK indicates that a write lock is held.
+   - MU_RLOCK_FIELD is a count of readers with read locks.
+
+   - MU_SPINLOCK represents a spinlock that must be held when manipulating the
+     waiter queue.
+
+   - MU_DESIG_WAKER indicates that a former waiter has been woken, but has
+     neither acquired the lock nor gone back to sleep.  Legal to fail to set it;
+     illegal to set it when no such waiter exists.
+
+   - MU_WAITING indicates whether the waiter queue is non-empty.
+     The following bits should be zero if MU_WAITING is zero.
+   - MU_CONDITION indicates that some waiter may have an associated condition
+     (from nsync_mu_wait, etc.).  Legal to set it with no such waiter exists,
+     but illegal to fail to set it with such a waiter.
+   - MU_WRITER_WAITING indicates that a reader that has not yet blocked
+     at least once should not acquire in order not to starve waiting writers.
+     It set when a writer blocks or a reader is woken with a writer waiting.
+     It is reset when a writer acquires, but set again when that writer
+     releases if it wakes readers and there is a waiting writer.
+   - MU_LONG_WAIT indicates that a waiter has been woken many times but
+     repeatedly failed to acquire when competing for the lock.  This is used
+     only to prevent long-term starvation by writers.  The thread that sets it
+     clears it when if acquires.
+   - MU_ALL_FALSE indicates that a complete scan of the waiter list found no
+     waiters with true conditions, and the lock has not been acquired by a
+     writer since then.  This allows a reader lock to be released without
+     testing conditions again.  It is legal to fail to set this, but illegal
+     to set it inappropriately.
+ */
+#define MU_WLOCK ((uint32_t) (1 << 0)) /* writer lock is held. */
+#define MU_SPINLOCK ((uint32_t) (1 << 1)) /* spinlock is held (protects waiters). */
+#define MU_WAITING ((uint32_t) (1 << 2)) /* waiter list is non-empty. */
+#define MU_DESIG_WAKER ((uint32_t) (1 << 3)) /* a former waiter awoke, and hasn't yet acquired or slept anew */
+#define MU_CONDITION ((uint32_t) (1 << 4)) /* the wait list contains some conditional waiters. */
+#define MU_WRITER_WAITING ((uint32_t) (1 << 5)) /* there is a writer waiting */
+#define MU_LONG_WAIT ((uint32_t) (1 << 6)) /* the waiter at the head of the queue has been waiting a long time */
+#define MU_ALL_FALSE ((uint32_t) (1 << 7)) /* all waiter conditions are false */
+#define MU_RLOCK ((uint32_t) (1 << 8)) /* low-order bit of reader count, which uses rest of word */
+
+/* The constants below are derived from those above. */
+#define MU_RLOCK_FIELD (~(uint32_t) (MU_RLOCK - 1)) /* mask of reader count field */
+
+#define MU_ANY_LOCK (MU_WLOCK | MU_RLOCK_FIELD) /* mask for any lock held */
+
+#define MU_WZERO_TO_ACQUIRE (MU_ANY_LOCK | MU_LONG_WAIT) /* bits to be zero to acquire write lock */
+#define MU_WADD_TO_ACQUIRE (MU_WLOCK)         /* add to acquire a write lock */
+#define MU_WHELD_IF_NON_ZERO (MU_WLOCK)       /* if any of these bits are set, write lock is held */
+#define MU_WSET_WHEN_WAITING (MU_WAITING | MU_WRITER_WAITING) /* a writer is waiting */
+#define MU_WCLEAR_ON_ACQUIRE (MU_WRITER_WAITING)  /* clear MU_WRITER_WAITING when a writer acquires */
+#define MU_WCLEAR_ON_UNCONTENDED_RELEASE (MU_ALL_FALSE) /* clear if a writer releases w/o waking */
+
+/* bits to be zero to acquire read lock */
+#define MU_RZERO_TO_ACQUIRE (MU_WLOCK | MU_WRITER_WAITING | MU_LONG_WAIT)
+#define MU_RADD_TO_ACQUIRE (MU_RLOCK)         /* add to acquire a read lock */
+#define MU_RHELD_IF_NON_ZERO (MU_RLOCK_FIELD) /* if any of these bits are set, read lock is held */
+#define MU_RSET_WHEN_WAITING (MU_WAITING)     /* indicate that some thread is waiting */
+#define MU_RCLEAR_ON_ACQUIRE ((uint32_t) 0)              /* nothing to clear when a read acquires */
+#define MU_RCLEAR_ON_UNCONTENDED_RELEASE ((uint32_t) 0)  /* nothing to clear when a read releases */
+
+
+/* A lock_type holds the values needed to manipulate a mu in some mode (read or
+   write).  This allows some of the code to be generic, and parameterized by
+   the lock type. */
+typedef struct lock_type_s {
+	uint32_t zero_to_acquire; /* bits that must be zero to acquire */
+	uint32_t add_to_acquire; /* constant to add to acquire */
+	uint32_t held_if_non_zero; /* if any of these bits are set, the lock is held */
+	uint32_t set_when_waiting; /* set when thread waits */
+	uint32_t clear_on_acquire; /* clear when thread acquires */
+	uint32_t clear_on_uncontended_release; /* clear when thread releases without waking */
+} lock_type;
+
+
+/* writer_type points to a lock_type that describes how to manipulate a mu for a writer. */
+extern lock_type *nsync_writer_type_;
+
+/* reader_type points to a lock_type that describes how to manipulate a mu for a reader. */
+extern lock_type *nsync_reader_type_;
+
+/* ---------- */
+
+/* Bits in nsync_cv.word */
+
+#define CV_SPINLOCK ((uint32_t) (1 << 0)) /* protects waiters */
+#define CV_NON_EMPTY ((uint32_t) (1 << 1)) /* waiters list is non-empty */
+
+/* ---------- */
+
+/* Hold a pair of  condition function and its argument. */
+struct wait_condition_s {
+	int (*f) (const void *v);
+	const void *v;
+	int (*eq) (const void *a, const void *b);
+};
+
+/* Return whether wait conditions *a_ and *b_ are equal and non-null. */
+#define WAIT_CONDITION_EQ(a_, b_)  ((a_)->f != NULL && (a_)->f == (b_)->f && \
+                                    ((a_)->v == (b_)->v || \
+				     ((a_)->eq != NULL && (*(a_)->eq) ((a_)->v, (b_)->v))))
+
+/* If a waiter has waited this many times, it may set the MU_LONG_WAIT bit. */
+#define LONG_WAIT_THRESHOLD 30
+
+/* ---------- */
+
+#define NOTIFIED_TIME(n_) (ATM_LOAD_ACQ (&(n_)->notified) != 0? nsync_time_zero : \
+			   (n_)->expiry_time_valid? (n_)->expiry_time : nsync_time_no_deadline)
+
+/* A waiter represents a single waiter on a cv or a mu.
+
+   To wait:
+   Allocate a waiter struct *w with new_waiter(), set w.waiting=1, and
+   w.cv_mu=nil or to the associated mu if waiting on a condition variable, then
+   queue w.nsync_dll on some queue, and then wait using:
+      while (ATM_LOAD_ACQ (&w.waiting) != 0) { nsync_mu_semaphore_p (&w.sem); }
+   Return *w to the freepool by calling free_waiter (w).
+
+   To wakeup:
+   Remove *w from the relevant queue then:
+    ATM_STORE_REL (&w.waiting, 0);
+    nsync_mu_semaphore_v (&w.sem); */
+typedef struct {
+	uint32_t tag;              /* debug DLL_NSYNC_WAITER, DLL_WAITER, DLL_WAITER_SAMECOND */
+	nsync_semaphore sem;       /* Thread waits on this semaphore. */
+	struct nsync_waiter_s nw;  /* An embedded nsync_waiter_s. */
+	struct nsync_mu_s_ *cv_mu;  /* pointer to nsync_mu associated with a cv wait */
+	lock_type *l_type;         /* Lock type of the mu, or nil if not associated with a mu. */
+	nsync_atomic_uint32_ remove_count;   /* count of removals from queue */
+	struct wait_condition_s cond; /* A condition on which to acquire a mu. */
+	nsync_dll_element_ same_condition;   /* Links neighbours in nw.q with same non-nil condition. */
+	int flags;                    /* see WAITER_* bits below */
+} waiter;
+static const uint32_t WAITER_TAG = 0x0590239f;
+static const uint32_t NSYNC_WAITER_TAG = 0x726d2ba9;
+
+#define WAITER_RESERVED 0x1  /* waiter reserved by a thread, even when not in use */
+#define WAITER_IN_USE   0x2  /* waiter in use by a thread */
+
+#define CONTAINER(t_,f_,p_)  ((t_ *) (((char *) (p_)) - offsetof (t_, f_)))
+#define ASSERT(x) do { if (!(x)) { *(volatile int *)0 = 0; } } while (0)
+	
+/* Return a pointer to the nsync_waiter_s containing nsync_dll_element_ *e. */
+#define DLL_NSYNC_WAITER(e) (NSYNC_DEBUG? nsync_dll_nsync_waiter_ (e) : \
+	((struct nsync_waiter_s *)((e)->container)))
+struct nsync_waiter_s *nsync_dll_nsync_waiter_ (nsync_dll_element_ *e);
+
+/* Return a pointer to the waiter struct that *e is embedded in, where *e is an nw.q field. */
+#define DLL_WAITER(e) (NSYNC_DEBUG? nsync_dll_waiter_ (e) : \
+	CONTAINER (waiter, nw, DLL_NSYNC_WAITER(e)))
+waiter *nsync_dll_waiter_ (nsync_dll_element_ *e);
+
+/* Return a pointer to the waiter struct that *e is embedded in, where *e is a
+   same_condition field.  */
+#define DLL_WAITER_SAMECOND(e) (NSYNC_DEBUG? nsync_dll_waiter_samecond_ (e) : \
+	((waiter *) ((e)->container)))
+waiter *nsync_dll_waiter_samecond_ (nsync_dll_element_ *e);
+
+/* Return a pointer to an unused waiter struct.
+   Ensures that the enclosed timer is stopped and its channel drained. */
+waiter *nsync_waiter_new_ (void);
+
+/* Return an unused waiter struct *w to the free pool. */
+void nsync_waiter_free_ (waiter *w);
+
+/* ---------- */
+
+/* The internals of an nync_note.  See internal/note.c for details of locking
+   discipline.  */
+struct nsync_note_s_ {
+        nsync_dll_element_ parent_child_link; /* parent's children, under parent->note_mu  */
+        int expiry_time_valid;      /* whether expiry_time is valid; r/o after init */
+        nsync_time expiry_time;     /* expiry time, if expiry_time_valid != 0; r/o after init */
+        nsync_mu note_mu;          /* protects fields below except "notified" */
+        nsync_cv no_children_cv;    /* signalled when children becomes empty */
+        uint32_t disconnecting;     /* non-zero => node is being disconnected */
+        nsync_atomic_uint32_ notified;   /* non-zero if the note has been notified */
+        struct nsync_note_s_ *parent;     /* points to parent, if any */
+        nsync_dll_element_ *children; /* list of children */
+        nsync_dll_element_ *waiters;  /* list of waiters */
+};
+
+/* ---------- */
+
+void nsync_mu_lock_slow_ (nsync_mu *mu, waiter *w, uint32_t clear, lock_type *l_type);
+void nsync_mu_unlock_slow_ (nsync_mu *mu, lock_type *l_type);
+nsync_dll_list_ nsync_remove_from_mu_queue_ (nsync_dll_list_ mu_queue, nsync_dll_element_ *e);
+void nsync_maybe_merge_conditions_ (nsync_dll_element_ *p, nsync_dll_element_ *n);
+nsync_time nsync_note_notified_deadline_ (nsync_note n);
+int nsync_sem_wait_with_cancel_ (waiter *w, nsync_time abs_deadline,
+				 nsync_note cancel_note);
+NSYNC_CPP_END_
+
+#endif /*NSYNC_INTERNAL_COMMON_H_*/
diff --git a/libc/thread/compiler.h b/libc/thread/compiler.h
new file mode 100644
index 000000000..41c938485
--- /dev/null
+++ b/libc/thread/compiler.h
@@ -0,0 +1,24 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#ifndef NSYNC_PLATFORM_GCC_COMPILER_H_
+#define NSYNC_PLATFORM_GCC_COMPILER_H_
+
+#define INLINE __inline
+#define UNUSED __attribute__((unused))
+#define THREAD_LOCAL __thread
+#define HAVE_THREAD_LOCAL 1
+
+#endif /*NSYNC_PLATFORM_GCC_COMPILER_H_*/
diff --git a/libc/thread/counter.c b/libc/thread/counter.c
new file mode 100644
index 000000000..b6a41c14a
--- /dev/null
+++ b/libc/thread/counter.c
@@ -0,0 +1,151 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#include "libc/thread/nsync_cpp.h"
+#include "libc/thread/platform.h"
+#include "libc/thread/compiler.h"
+#include "libc/thread/cputype.h"
+#include "libc/thread/nsync.h"
+#include "libc/thread/atomic.h"
+#include "libc/thread/dll.h"
+#include "libc/thread/sem.h"
+#include "libc/thread/wait_internal.h"
+#include "libc/thread/common.h"
+
+NSYNC_CPP_START_
+
+/* Internal details of nsync_counter. */
+struct nsync_counter_s_ {
+        nsync_atomic_uint32_ waited;    /* wait has been called */
+        nsync_mu counter_mu;         /* protects fields below except reads of "value" */
+        nsync_atomic_uint32_ value;     /* value of counter */
+        struct nsync_dll_element_s_ *waiters;  /* list of waiters */
+};
+
+nsync_counter nsync_counter_new (uint32_t value) {
+	nsync_counter c = (nsync_counter) malloc (sizeof (*c));
+	if (c != NULL) {
+		memset ((void *) c, 0, sizeof (*c));
+		ATM_STORE (&c->value, value);
+	}
+	return (c);
+}
+
+void nsync_counter_free (nsync_counter c) {
+	nsync_mu_lock (&c->counter_mu);
+	ASSERT (nsync_dll_is_empty_ (c->waiters));
+	nsync_mu_unlock (&c->counter_mu);
+	free (c);
+}
+
+uint32_t nsync_counter_add (nsync_counter c, int32_t delta) {
+	uint32_t value;
+	IGNORE_RACES_START ();
+	if (delta == 0) {
+		value = ATM_LOAD_ACQ (&c->value);
+	} else {
+		nsync_mu_lock (&c->counter_mu);
+		do {
+			value = ATM_LOAD (&c->value);
+		} while (!ATM_CAS_RELACQ (&c->value, value, value+delta));
+		value += delta;
+		if (delta > 0) {
+			/* It's illegal to increase the count from zero if
+			   there has been a waiter. */
+			ASSERT (value != (uint32_t) delta || !ATM_LOAD (&c->waited));
+			ASSERT (value > value - delta); /* Crash on overflow. */
+		} else {
+			ASSERT (value < value - delta); /* Crash on overflow. */
+		}
+		if (value == 0) {
+			nsync_dll_element_ *p;
+			while ((p = nsync_dll_first_ (c->waiters)) != NULL) {
+				struct nsync_waiter_s *nw = DLL_NSYNC_WAITER (p);
+				c->waiters = nsync_dll_remove_ (c->waiters, p);
+				ATM_STORE_REL (&nw->waiting, 0);
+				nsync_mu_semaphore_v (nw->sem);
+			}
+		}
+		nsync_mu_unlock (&c->counter_mu);
+	}
+	IGNORE_RACES_END ();
+	return (value);
+}
+
+uint32_t nsync_counter_value (nsync_counter c) {
+	uint32_t result;
+	IGNORE_RACES_START ();
+	result = ATM_LOAD_ACQ (&c->value);
+	IGNORE_RACES_END ();
+	return (result);
+}
+
+uint32_t nsync_counter_wait (nsync_counter c, nsync_time abs_deadline) {
+	struct nsync_waitable_s waitable;
+	struct nsync_waitable_s *pwaitable = &waitable;
+	uint32_t result = 0;
+	waitable.v = c;
+	waitable.funcs = &nsync_counter_waitable_funcs;
+	if (nsync_wait_n (NULL, NULL, NULL, abs_deadline, 1, &pwaitable) != 0) {
+		IGNORE_RACES_START ();
+		result = ATM_LOAD_ACQ (&c->value);
+		IGNORE_RACES_END ();
+	}
+	return (result);
+}
+
+static nsync_time counter_ready_time (void *v, struct nsync_waiter_s *nw UNUSED) {
+	nsync_counter c = (nsync_counter) v;
+	nsync_time r;
+	ATM_STORE (&c->waited, 1);
+	r = (ATM_LOAD_ACQ (&c->value) == 0? nsync_time_zero : nsync_time_no_deadline);
+	return (r);
+}
+
+static int counter_enqueue (void *v, struct nsync_waiter_s *nw) {
+	nsync_counter c = (nsync_counter) v;
+	int32_t value;
+	nsync_mu_lock (&c->counter_mu);
+	value = ATM_LOAD_ACQ (&c->value);
+	if (value != 0) {
+		c->waiters = nsync_dll_make_last_in_list_ (c->waiters, &nw->q);
+		ATM_STORE (&nw->waiting, 1);
+	} else {
+		ATM_STORE (&nw->waiting, 0);
+	}
+	nsync_mu_unlock (&c->counter_mu);
+	return (value != 0);
+}
+
+static int counter_dequeue (void *v, struct nsync_waiter_s *nw) {
+	nsync_counter c = (nsync_counter) v;
+	int32_t value;
+	nsync_mu_lock (&c->counter_mu);
+	value = ATM_LOAD_ACQ (&c->value);
+	if (ATM_LOAD_ACQ (&nw->waiting) != 0) {
+		c->waiters = nsync_dll_remove_ (c->waiters, &nw->q);
+		ATM_STORE (&nw->waiting, 0);
+	}
+	nsync_mu_unlock (&c->counter_mu);
+	return (value != 0);
+}
+
+const struct nsync_waitable_funcs_s nsync_counter_waitable_funcs = {
+	&counter_ready_time,
+	&counter_enqueue,
+	&counter_dequeue
+};
+
+NSYNC_CPP_END_
diff --git a/libc/thread/cputype.h b/libc/thread/cputype.h
new file mode 100644
index 000000000..e954ebf80
--- /dev/null
+++ b/libc/thread/cputype.h
@@ -0,0 +1,21 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#ifndef NSYNC_PLATFORM_X86_64_CPUTYPE_H_
+#define NSYNC_PLATFORM_X86_64_CPUTYPE_H_
+
+#define ATM_LD_IS_ACQ_ST_IS_REL_ 1
+
+#endif /*NSYNC_PLATFORM_X86_64_CPUTYPE_H_*/
diff --git a/libc/thread/cv.c b/libc/thread/cv.c
new file mode 100644
index 000000000..660c8fb97
--- /dev/null
+++ b/libc/thread/cv.c
@@ -0,0 +1,495 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#include "libc/thread/nsync_cpp.h"
+#include "libc/thread/platform.h"
+#include "libc/thread/compiler.h"
+#include "libc/thread/cputype.h"
+#include "libc/thread/nsync.h"
+#include "libc/thread/dll.h"
+#include "libc/thread/sem.h"
+#include "libc/thread/wait_internal.h"
+#include "libc/thread/common.h"
+#include "libc/thread/atomic.h"
+
+NSYNC_CPP_START_
+
+/* Initialize *cv. */
+void nsync_cv_init (nsync_cv *cv) {
+        memset ((void *) cv, 0, sizeof (*cv));
+}
+
+/* Wake the cv waiters in the circular list pointed to by
+   to_wake_list, which may not be NULL.  If the waiter is associated with a
+   nsync_mu, the "wakeup" may consist of transferring the waiters to the nsync_mu's
+   queue.  Requires that every waiter is associated with the same mutex.
+   all_readers indicates whether all the waiters on the list are readers.  */
+static void wake_waiters (nsync_dll_list_ to_wake_list, int all_readers) {
+	nsync_dll_element_ *p = NULL;
+	nsync_dll_element_ *next = NULL;
+	nsync_dll_element_ *first_waiter = nsync_dll_first_ (to_wake_list);
+	struct nsync_waiter_s *first_nw = DLL_NSYNC_WAITER (first_waiter);
+	waiter *first_w = NULL;
+	nsync_mu *pmu = NULL;
+	if ((first_nw->flags & NSYNC_WAITER_FLAG_MUCV) != 0) {
+		first_w = DLL_WAITER (first_waiter);
+		pmu = first_w->cv_mu;
+	}
+	if (pmu != NULL) { /* waiter is associated with the nsync_mu *pmu. */
+		/* We will transfer elements of to_wake_list to *pmu if all of:
+		    - some thread holds the lock, and
+		    - *pmu's spinlock is not held, and
+		    - either *pmu cannot be acquired in the mode of the first
+		      waiter, or there's more than one thread on to_wake_list
+		      and not all are readers, and
+		    - we acquire the spinlock on the first try.
+		   The spinlock acquisition also marks *pmu as having waiters.
+		   The requirement that some thread holds the lock ensures
+		   that at least one of the transferred waiters will be woken.
+		   */
+		uint32_t old_mu_word = ATM_LOAD (&pmu->word);
+		int first_cant_acquire = ((old_mu_word & first_w->l_type->zero_to_acquire) != 0);
+		next = nsync_dll_next_ (to_wake_list, first_waiter);
+		if ((old_mu_word&MU_ANY_LOCK) != 0 &&
+		    (old_mu_word&MU_SPINLOCK) == 0 &&
+		    (first_cant_acquire || (next != NULL && !all_readers)) &&
+		    ATM_CAS_ACQ (&pmu->word, old_mu_word,
+				 (old_mu_word|MU_SPINLOCK|MU_WAITING) &
+				 ~MU_ALL_FALSE)) {
+
+			uint32_t set_on_release = 0;
+
+			/* For any waiter that should be transferred, rather
+			   than woken, move it from to_wake_list to pmu->waiters. */
+			int first_is_writer = first_w->l_type == nsync_writer_type_;
+			int transferred_a_writer = 0;
+			int woke_areader = 0;
+			/* Transfer the first waiter iff it can't acquire *pmu. */
+			if (first_cant_acquire) {
+				to_wake_list = nsync_dll_remove_ (to_wake_list, first_waiter);
+				pmu->waiters = nsync_dll_make_last_in_list_ (pmu->waiters, first_waiter);
+				/* tell nsync_cv_wait_with_deadline() that we
+				   moved the waiter to *pmu's queue.  */
+				first_w->cv_mu = NULL;
+				/* first_nw.waiting is already 1, from being on
+				   cv's waiter queue.  */
+				transferred_a_writer = first_is_writer;
+			} else {
+				woke_areader = !first_is_writer;
+			}
+			/* Now process the other waiters. */
+			for (p = next; p != NULL; p = next) {
+				int p_is_writer;
+				struct nsync_waiter_s *p_nw = DLL_NSYNC_WAITER (p);
+				waiter *p_w = NULL;
+				if ((p_nw->flags & NSYNC_WAITER_FLAG_MUCV) != 0) {
+					p_w = DLL_WAITER (p);
+				}
+				next = nsync_dll_next_ (to_wake_list, p);
+				p_is_writer = (p_w != NULL &&
+					       DLL_WAITER (p)->l_type == nsync_writer_type_);
+				/* We transfer this element if any of:
+				   - the first waiter can't acquire *pmu, or
+				   - the first waiter is a writer, or
+				   - this element is a writer. */
+				if (p_w == NULL) {
+					/* wake non-native waiter */
+				} else if (first_cant_acquire || first_is_writer || p_is_writer) {
+					to_wake_list = nsync_dll_remove_ (to_wake_list, p);
+					pmu->waiters = nsync_dll_make_last_in_list_ (pmu->waiters, p);
+					/* tell nsync_cv_wait_with_deadline()
+					   that we moved the waiter to *pmu's
+					   queue.  */
+					p_w->cv_mu = NULL;
+					/* p_nw->waiting is already 1, from
+					   being on cv's waiter queue.  */
+					transferred_a_writer = transferred_a_writer || p_is_writer;
+				} else {
+					woke_areader = woke_areader || !p_is_writer;
+				}
+			}
+
+			/* Claim a waiting writer if we transferred one, except if we woke readers,
+			   in which case we want those readers to be able to acquire immediately. */
+			if (transferred_a_writer && !woke_areader) {
+				set_on_release |= MU_WRITER_WAITING;
+			}
+
+			/* release *pmu's spinlock  (MU_WAITING was set by CAS above) */
+			old_mu_word = ATM_LOAD (&pmu->word);
+			while (!ATM_CAS_REL (&pmu->word, old_mu_word,
+					     (old_mu_word|set_on_release) & ~MU_SPINLOCK)) {
+				old_mu_word = ATM_LOAD (&pmu->word);
+			}
+		}
+	}
+
+	/* Wake any waiters we didn't manage to enqueue on the mu. */
+	for (p = nsync_dll_first_ (to_wake_list); p != NULL; p = next) {
+		struct nsync_waiter_s *p_nw = DLL_NSYNC_WAITER (p);
+		next = nsync_dll_next_ (to_wake_list, p);
+		to_wake_list = nsync_dll_remove_ (to_wake_list, p);
+		/* Wake the waiter. */
+		ATM_STORE_REL (&p_nw->waiting, 0); /* release store */
+		nsync_mu_semaphore_v (p_nw->sem);
+	}
+}
+
+/* ------------------------------------------ */
+
+/* Versions of nsync_mu_lock() and nsync_mu_unlock() that take "void *"
+   arguments, to avoid call through a function pointer of a different type,
+   which is undefined.  */
+static void void_mu_lock (void *mu) {
+	nsync_mu_lock ((nsync_mu *) mu);
+}
+static void void_mu_unlock (void *mu) {
+	nsync_mu_unlock ((nsync_mu *) mu);
+}
+
+/* Atomically release *pmu (which must be held on entry)
+   and block the calling thread on *pcv.  Then wait until awakened by a
+   call to nsync_cv_signal() or nsync_cv_broadcast() (or a spurious wakeup), or by the time
+   reaching abs_deadline, or by cancel_note being notified.  In all cases,
+   reacquire *pmu, and return the reason for the call returned (0, ETIMEDOUT,
+   or ECANCELED).  Callers should abs_deadline==nsync_time_no_deadline for no
+   deadline, and cancel_note==NULL for no cancellation.  nsync_cv_wait_with_deadline()
+   should be used in a loop, as with all Mesa-style condition variables.  See
+   examples above.
+
+   There are two reasons for using an absolute deadline, rather than a relative
+   timeout---these are why pthread_cond_timedwait() also uses an absolute
+   deadline.  First, condition variable waits have to be used in a loop; with
+   an absolute times, the deadline does not have to be recomputed on each
+   iteration.  Second, in most real programmes, some activity (such as an RPC
+   to a server, or when guaranteeing response time in a UI), there is a
+   deadline imposed by the specification or the caller/user; relative delays
+   can shift arbitrarily with scheduling delays, and so after multiple waits
+   might extend beyond the expected deadline.  Relative delays tend to be more
+   convenient mostly in tests and trivial examples than they are in real
+   programmes. */
+int nsync_cv_wait_with_deadline_generic (nsync_cv *pcv, void *pmu,
+					 void (*lock) (void *), void (*unlock) (void *),
+					 nsync_time abs_deadline,
+					 nsync_note cancel_note) {
+	nsync_mu *cv_mu = NULL;
+	int is_reader_mu;
+	uint32_t old_word;
+	uint32_t remove_count;
+	int sem_outcome;
+	unsigned attempts;
+	int outcome = 0;
+	waiter *w;
+	IGNORE_RACES_START ();
+	w = nsync_waiter_new_ ();
+	ATM_STORE (&w->nw.waiting, 1);
+	w->cond.f = NULL; /* Not using a conditional critical section. */
+	w->cond.v = NULL;
+	w->cond.eq = NULL;
+	if (lock == &void_mu_lock ||
+	    lock == (void (*) (void *)) &nsync_mu_lock ||
+	    lock == (void (*) (void *)) &nsync_mu_rlock) {
+		cv_mu = (nsync_mu *) pmu;
+	}
+	w->cv_mu = cv_mu;       /* If *pmu is an nsync_mu, record its address, else record NULL. */
+	is_reader_mu = 0; /* If true, an nsync_mu in reader mode. */
+	if (cv_mu == NULL) {
+		w->l_type = NULL;
+	} else {
+		uint32_t old_mu_word = ATM_LOAD (&cv_mu->word);
+		int is_writer = (old_mu_word & MU_WHELD_IF_NON_ZERO) != 0;
+		int is_reader = (old_mu_word & MU_RHELD_IF_NON_ZERO) != 0;
+		if (is_writer) {
+			if (is_reader) {
+				nsync_panic_ ("mu held in reader and writer mode simultaneously "
+				       "on entry to nsync_cv_wait_with_deadline()\n");
+			}
+			w->l_type = nsync_writer_type_;
+		} else if (is_reader) {
+			w->l_type = nsync_reader_type_;
+			is_reader_mu = 1;
+		} else {
+			nsync_panic_ ("mu not held on entry to nsync_cv_wait_with_deadline()\n");
+		}
+	}
+
+	/* acquire spinlock, set non-empty */
+	old_word = nsync_spin_test_and_set_ (&pcv->word, CV_SPINLOCK, CV_SPINLOCK|CV_NON_EMPTY, 0);
+	pcv->waiters = nsync_dll_make_last_in_list_ (pcv->waiters, &w->nw.q);
+	remove_count = ATM_LOAD (&w->remove_count);
+	/* Release the spin lock. */
+	ATM_STORE_REL (&pcv->word, old_word|CV_NON_EMPTY); /* release store */
+
+	/* Release *pmu. */
+	if (is_reader_mu) {
+		nsync_mu_runlock (cv_mu);
+	} else {
+		(*unlock) (pmu);
+	}
+
+	/* wait until awoken or a timeout. */
+	sem_outcome = 0;
+	attempts = 0;
+	while (ATM_LOAD_ACQ (&w->nw.waiting) != 0) { /* acquire load */
+		if (sem_outcome == 0) {
+			sem_outcome = nsync_sem_wait_with_cancel_ (w, abs_deadline, cancel_note);
+		}
+
+		if (sem_outcome != 0 && ATM_LOAD (&w->nw.waiting) != 0) {
+			/* A timeout or cancellation occurred, and no wakeup.
+			   Acquire *pcv's spinlock, and confirm.  */
+			old_word = nsync_spin_test_and_set_ (&pcv->word, CV_SPINLOCK,
+							     CV_SPINLOCK, 0);
+			/* Check that w wasn't removed from the queue after we
+			   checked above, but before we acquired the spinlock.
+			   The test of remove_count confirms that the waiter *w
+			   is still governed by *pcv's spinlock; otherwise, some
+			   other thread is about to set w.waiting==0.  */
+			if (ATM_LOAD (&w->nw.waiting) != 0) {
+				if (remove_count == ATM_LOAD (&w->remove_count)) {
+					uint32_t old_value;
+					/* still in cv waiter queue */
+					/* Not woken, so remove *w from cv
+					   queue, and declare a
+					   timeout/cancellation.  */
+					outcome = sem_outcome;
+					pcv->waiters = nsync_dll_remove_ (pcv->waiters,
+								          &w->nw.q);
+					do {    
+						old_value = ATM_LOAD (&w->remove_count);
+					} while (!ATM_CAS (&w->remove_count, old_value, old_value+1));
+					if (nsync_dll_is_empty_ (pcv->waiters)) {
+						old_word &= ~(CV_NON_EMPTY);
+					}
+					ATM_STORE_REL (&w->nw.waiting, 0); /* release store */
+				}
+			}
+			/* Release spinlock. */
+			ATM_STORE_REL (&pcv->word, old_word); /* release store */
+		}
+
+		if (ATM_LOAD (&w->nw.waiting) != 0) {
+                        /* The delay here causes this thread ultimately to
+                           yield to another that has dequeued this thread, but
+                           has not yet set the waiting field to zero; a
+                           cancellation or timeout may prevent this thread
+                           from blocking above on the semaphore.  */
+			attempts = nsync_spin_delay_ (attempts);
+		}
+	}
+
+	if (cv_mu != NULL && w->cv_mu == NULL) { /* waiter was moved to *pmu's queue, and woken. */
+		/* Requeue on *pmu using existing waiter struct; current thread
+		   is the designated waker.  */
+		nsync_mu_lock_slow_ (cv_mu, w, MU_DESIG_WAKER, w->l_type);
+		nsync_waiter_free_ (w);
+	} else {
+		/* Traditional case: We've woken from the cv, and need to reacquire *pmu. */
+		nsync_waiter_free_ (w);
+		if (is_reader_mu) {
+			nsync_mu_rlock (cv_mu);
+		} else {
+			(*lock) (pmu);
+		}
+	}
+	IGNORE_RACES_END ();
+	return (outcome);
+}
+
+/* Wake at least one thread if any are currently blocked on *pcv.  If
+   the chosen thread is a reader on an nsync_mu, wake all readers and, if
+   possible, a writer. */
+void nsync_cv_signal (nsync_cv *pcv) {
+	IGNORE_RACES_START ();
+	if ((ATM_LOAD_ACQ (&pcv->word) & CV_NON_EMPTY) != 0) { /* acquire load */
+		nsync_dll_list_ to_wake_list = NULL; /* waiters that we will wake */
+		int all_readers = 0;
+		/* acquire spinlock */
+		uint32_t old_word = nsync_spin_test_and_set_ (&pcv->word, CV_SPINLOCK,
+							      CV_SPINLOCK, 0);
+		if (!nsync_dll_is_empty_ (pcv->waiters)) {
+			/* Point to first waiter that enqueued itself, and
+			   detach it from all others.  */
+			struct nsync_waiter_s *first_nw;
+			nsync_dll_element_ *first = nsync_dll_first_ (pcv->waiters);
+			pcv->waiters = nsync_dll_remove_ (pcv->waiters, first);
+			first_nw = DLL_NSYNC_WAITER (first);
+			if ((first_nw->flags & NSYNC_WAITER_FLAG_MUCV) != 0) {
+				uint32_t old_value;
+				do {    
+					old_value =
+						ATM_LOAD (&DLL_WAITER (first)->remove_count);
+				} while (!ATM_CAS (&DLL_WAITER (first)->remove_count,
+						   old_value, old_value+1));
+			}
+			to_wake_list = nsync_dll_make_last_in_list_ (to_wake_list, first);
+			if ((first_nw->flags & NSYNC_WAITER_FLAG_MUCV) != 0 &&
+			    DLL_WAITER (first)->l_type == nsync_reader_type_) {
+				int woke_writer;
+				/* If the first waiter is a reader, wake all readers, and
+				   if it's possible, one writer.  This allows reader-regions
+				   to be added to a monitor without invalidating code in which
+				   a client has optimized broadcast calls by converting them to 
+				   signal calls.  In particular, we wake a writer when waking
+				   readers because the readers will not invalidate the condition
+				   that motivated the client to call nsync_cv_signal().  But we
+				   wake at most one writer because the first writer may invalidate
+				   the condition; the client is expecting only one writer to be
+				   able make use of the wakeup, or he would have called
+				   nsync_cv_broadcast().  */
+				nsync_dll_element_ *p = NULL;
+				nsync_dll_element_ *next = NULL;
+				all_readers = 1;
+				woke_writer = 0;
+				for (p = nsync_dll_first_ (pcv->waiters); p != NULL; p = next) {
+					struct nsync_waiter_s *p_nw = DLL_NSYNC_WAITER (p);
+					int should_wake;
+					next = nsync_dll_next_ (pcv->waiters, p);
+					should_wake = 0;
+					if ((p_nw->flags & NSYNC_WAITER_FLAG_MUCV) != 0 &&
+					     DLL_WAITER (p)->l_type == nsync_reader_type_) {
+						should_wake = 1;
+					} else if (!woke_writer) {
+						woke_writer = 1;
+						all_readers = 0;
+						should_wake = 1;
+					}
+					if (should_wake) {
+						pcv->waiters = nsync_dll_remove_ (pcv->waiters, p);
+						if ((p_nw->flags & NSYNC_WAITER_FLAG_MUCV) != 0) {
+							uint32_t old_value;
+							do {    
+								old_value = ATM_LOAD (
+								    &DLL_WAITER (p)->remove_count);
+							} while (!ATM_CAS (&DLL_WAITER (p)->remove_count,
+									   old_value, old_value+1));
+						}
+						to_wake_list = nsync_dll_make_last_in_list_ (
+							to_wake_list, p);
+					}
+				}
+			}
+			if (nsync_dll_is_empty_ (pcv->waiters)) {
+				old_word &= ~(CV_NON_EMPTY);
+			}
+		}
+		/* Release spinlock. */
+		ATM_STORE_REL (&pcv->word, old_word); /* release store */
+		if (!nsync_dll_is_empty_ (to_wake_list)) {
+			wake_waiters (to_wake_list, all_readers);
+		}
+	}
+	IGNORE_RACES_END ();
+}
+
+/* Wake all threads currently blocked on *pcv. */
+void nsync_cv_broadcast (nsync_cv *pcv) {
+	IGNORE_RACES_START ();
+	if ((ATM_LOAD_ACQ (&pcv->word) & CV_NON_EMPTY) != 0) { /* acquire load */
+		nsync_dll_element_ *p;
+		nsync_dll_element_ *next;
+		int all_readers;
+		nsync_dll_list_ to_wake_list = NULL;   /* waiters that we will wake */
+		/* acquire spinlock */
+		nsync_spin_test_and_set_ (&pcv->word, CV_SPINLOCK, CV_SPINLOCK, 0);
+		p = NULL;
+		next = NULL;
+		all_readers = 1;
+		/* Wake entire waiter list, which we leave empty. */
+		for (p = nsync_dll_first_ (pcv->waiters); p != NULL; p = next) {
+			struct nsync_waiter_s *p_nw = DLL_NSYNC_WAITER (p);
+			next = nsync_dll_next_ (pcv->waiters, p);
+			all_readers = all_readers && (p_nw->flags & NSYNC_WAITER_FLAG_MUCV) != 0 &&
+				      (DLL_WAITER (p)->l_type == nsync_reader_type_);
+			pcv->waiters = nsync_dll_remove_ (pcv->waiters, p);
+			if ((p_nw->flags & NSYNC_WAITER_FLAG_MUCV) != 0) {
+				uint32_t old_value;
+				do {    
+					old_value = ATM_LOAD (&DLL_WAITER (p)->remove_count);
+				} while (!ATM_CAS (&DLL_WAITER (p)->remove_count,
+						   old_value, old_value+1));
+			}
+			to_wake_list = nsync_dll_make_last_in_list_ (to_wake_list, p);
+		}
+		/* Release spinlock and mark queue empty. */
+		ATM_STORE_REL (&pcv->word, 0); /* release store */
+		if (!nsync_dll_is_empty_ (to_wake_list)) {    /* Wake them. */
+			wake_waiters (to_wake_list, all_readers);
+		}
+	}
+	IGNORE_RACES_END ();
+}
+
+/* Wait with deadline, using an nsync_mu. */
+int nsync_cv_wait_with_deadline (nsync_cv *pcv, nsync_mu *pmu,
+				 nsync_time abs_deadline,
+				 nsync_note cancel_note) {
+	return (nsync_cv_wait_with_deadline_generic (pcv, pmu, &void_mu_lock,
+						     &void_mu_unlock,
+						     abs_deadline, cancel_note));
+}
+
+/* Atomically release *pmu and block the caller on *pcv.  Wait
+   until awakened by a call to nsync_cv_signal() or nsync_cv_broadcast(), or a spurious
+   wakeup.  Then reacquires *pmu, and return.  The call is equivalent to a call
+   to nsync_cv_wait_with_deadline() with abs_deadline==nsync_time_no_deadline, and a NULL
+   cancel_note.  It should be used in a loop, as with all standard Mesa-style
+   condition variables.  See examples above.  */
+void nsync_cv_wait (nsync_cv *pcv, nsync_mu *pmu) {
+	nsync_cv_wait_with_deadline (pcv, pmu, nsync_time_no_deadline, NULL);
+}
+
+static nsync_time cv_ready_time (void *v UNUSED, struct nsync_waiter_s *nw) {
+	nsync_time r;
+	r = (nw == NULL || ATM_LOAD_ACQ (&nw->waiting) != 0? nsync_time_no_deadline : nsync_time_zero);
+	return (r);
+}
+
+static int cv_enqueue (void *v, struct nsync_waiter_s *nw) {
+	nsync_cv *pcv = (nsync_cv *) v;
+	/* acquire spinlock */
+	uint32_t old_word = nsync_spin_test_and_set_ (&pcv->word, CV_SPINLOCK, CV_SPINLOCK, 0);
+	pcv->waiters = nsync_dll_make_last_in_list_ (pcv->waiters, &nw->q);
+	ATM_STORE (&nw->waiting, 1);
+	/* Release spinlock. */
+	ATM_STORE_REL (&pcv->word, old_word | CV_NON_EMPTY); /* release store */
+	return (1);
+}
+
+static int cv_dequeue (void *v, struct nsync_waiter_s *nw) {
+	nsync_cv *pcv = (nsync_cv *) v;
+	int was_queued = 0;
+	/* acquire spinlock */
+	uint32_t old_word = nsync_spin_test_and_set_ (&pcv->word, CV_SPINLOCK, CV_SPINLOCK, 0);
+	if (ATM_LOAD_ACQ (&nw->waiting) != 0) {
+		pcv->waiters = nsync_dll_remove_ (pcv->waiters, &nw->q);
+		ATM_STORE (&nw->waiting, 0);
+		was_queued = 1;
+	}
+	if (nsync_dll_is_empty_ (pcv->waiters)) {
+		old_word &= ~(CV_NON_EMPTY);
+	}
+	/* Release spinlock. */
+	ATM_STORE_REL (&pcv->word, old_word); /* release store */
+	return (was_queued);
+}
+
+const struct nsync_waitable_funcs_s nsync_cv_waitable_funcs = {
+	&cv_ready_time,
+	&cv_enqueue,
+	&cv_dequeue
+};
+
+NSYNC_CPP_END_
diff --git a/libc/thread/debug.c b/libc/thread/debug.c
new file mode 100644
index 000000000..c2a617744
--- /dev/null
+++ b/libc/thread/debug.c
@@ -0,0 +1,294 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+/* Routines for debugging. */
+
+#include "libc/thread/nsync_cpp.h"
+#include "libc/thread/platform.h"
+#include "libc/thread/compiler.h"
+#include "libc/thread/cputype.h"
+#include "libc/thread/nsync.h"
+#include "libc/thread/dll.h"
+#include "libc/thread/sem.h"
+#include "libc/thread/wait_internal.h"
+#include "libc/thread/common.h"
+#include "libc/thread/atomic.h"
+
+NSYNC_CPP_START_
+
+/* ---------- */
+
+/* An emit_buf represents a buffer into which debug information can
+   be written.  */
+struct emit_buf {
+        char *start;  /* start of buffer */
+        int len;      /* pength of buffer */
+        int pos;      /* position of next character to bve written */
+        int overflow; /* non-zero iff buffer overflow has occurred */
+};
+
+/* Initialize *b to point to start[0, .., len-1], and return b.
+   of to an internal static buffer if buf==NULL. */
+static struct emit_buf *emit_init (struct emit_buf *b, char *start, int len) {
+        b->start = start;
+        b->len = len;
+        b->pos = 0;
+        b->overflow = 0;
+        return (b);
+}
+
+
+/* Write character c to buffer *b. */
+static void emit_c (struct emit_buf *b, int c) {
+        if (b->pos < b->len) {
+                b->start[b->pos++] = c;
+        } else if (!b->overflow) {
+                static const char suffix[] = "...";
+                const char *s = &suffix[sizeof (suffix)]; /* past nul */
+                char *p = &b->start[b->len];  /* past end */
+                while (s > suffix && p > b->start) {
+                        *--p = *--s;
+                }
+                b->overflow = 1;
+        }
+}
+
+/* A printf-like function that writes to an emit_buf.
+   It understands only the format specifiers %s (const char *), and %i
+   (uintptr_t in hex), with no modifiers. */
+static void emit_print (struct emit_buf *b, const char *fmt, ...) {
+        va_list ap;
+        va_start (ap, fmt);
+        while (*fmt != 0) {
+                int c = *fmt++;
+                if (c != '%') {
+                        emit_c (b, c);
+                } else {
+                        c = *fmt++;
+                        if (c == 's') {
+                                const char *s = va_arg (ap, const char *);
+                                while (*s != 0) {
+                                        emit_c (b, *s++);
+                                }
+                        } else if (c == 'i') {
+                                uintptr_t n = va_arg (ap, uintptr_t);
+                                int i;
+                                for (i = 0; (n >> i) >= 0x10; i += 4) {
+                                }
+                                for (; i >= 0; i -= 4) {
+                                        emit_c (b, "0123456789abcdef"[(n >> i) & 0xf]);
+                                }
+                        } else {
+                                ASSERT (0);
+                        }
+                }
+        }
+        va_end (ap);
+}
+
+/* Map a bit in a uint32_t to a human-readable name. */
+struct bit_name {
+        uint32_t mask;
+        const char *name;
+};
+
+/* names for bits in a mu word */
+static const struct bit_name mu_bit[] = {
+        { MU_WLOCK,          "wlock" },
+        { MU_SPINLOCK,       "spin" },
+        { MU_WAITING,        "wait" },
+        { MU_DESIG_WAKER,    "desig" },
+        { MU_CONDITION,      "cond" },
+        { MU_WRITER_WAITING, "writer" },
+        { MU_LONG_WAIT,      "long" },
+        { MU_ALL_FALSE,      "false" },
+        { 0,                 "" }  /* sentinel */
+};
+
+/* names for bits in a cv word */
+static const struct bit_name cv_bit[] = {
+        { CV_SPINLOCK,       "spin" },
+        { CV_NON_EMPTY,      "wait" },
+        { 0,                 "" }  /* sentinel */
+};
+
+/* names for bits in a waiter flags word */
+static const struct bit_name waiter_flags_bit[] = {
+        { WAITER_RESERVED,   "rsrvd" },
+        { WAITER_IN_USE,     "in_use" },
+        { 0,                 "" }  /* sentinel */
+};
+
+/* Emit the names of bits in word to buffer *b using names[] */
+static void emit_word (struct emit_buf *b, const struct bit_name *name, uint32_t word) {
+      int i;
+      for (i = 0; name[i].mask != 0; i++) {
+              if ((word & name[i].mask) != 0) {
+                      emit_print (b, " %s", name[i].name);
+              }
+      }
+}
+
+/* Emit the waiter queue *q to *b. */
+static void emit_waiters (struct emit_buf *b, nsync_dll_list_ list) {
+        nsync_dll_element_ *p = nsync_dll_first_ (list);
+        nsync_dll_element_ *next;
+        if (p != NULL) {
+                emit_print (b, "\nwaiters =\n");
+        }
+        for (; p != NULL && !b->overflow; p = next) {
+                struct nsync_waiter_s *nw = DLL_NSYNC_WAITER (p);
+                waiter *w = DLL_WAITER (p);
+                next = NULL;
+                emit_print (b, "   %i", (uintptr_t) w);
+                if (w->tag != WAITER_TAG) {
+                        emit_print (b, "bad WAITER_TAG %i",
+                                    (uintptr_t) w->tag);
+                } else {
+                        next = nsync_dll_next_ (list, p);
+                        if (nw->tag != NSYNC_WAITER_TAG) {
+                                emit_print (b, " bad WAITER_TAG %i",
+                                            (uintptr_t) nw->tag);
+                        } else {
+                                emit_print (b, " embedded=%i waiting=%i",
+                                            (uintptr_t) (w->flags & NSYNC_WAITER_FLAG_MUCV),
+                                            (uintptr_t) ATM_LOAD (&nw->waiting));
+                        }
+                        emit_word (b, waiter_flags_bit, w->flags);
+                        emit_print (b, " %s removes=%i cond=(%i %i %i)",
+                                    w->l_type == nsync_writer_type_? "writer" :
+                                    w->l_type == nsync_reader_type_? "reader" :
+                                                                     "??????",
+                                    (uintptr_t) ATM_LOAD (&w->remove_count),
+                                    (uintptr_t) w->cond.f,
+                                    (uintptr_t) w->cond.v,
+                                    (uintptr_t) w->cond.eq);
+                        if (w->same_condition.next != &w->same_condition) {
+                                emit_print (b, " same_as %i",
+                                            (uintptr_t) DLL_WAITER_SAMECOND (
+                                                w->same_condition.next));
+                        }
+                }
+                emit_c (b, '\n');
+        }
+}
+
+/* Emit to *b the state of *mu, and return a pointer to *b's buffer.
+  
+   If blocking!=0, print_waiters!=0, and *mu's waiter list is non-empty, the
+   call will block until it can acquire the spinlock.
+   If print_waiters!=0, the waiter list is printed.
+   The spinlock is released before return if it was acquired.
+   blocking==0 && print_waiters!=0 is unsafe and is intended for use within
+   interactive debuggers.  */
+static char *emit_mu_state (struct emit_buf *b, nsync_mu *mu,
+			    int blocking, int print_waiters) {
+        uintptr_t word;
+        uintptr_t readers;
+        int acquired = 0;
+        IGNORE_RACES_START ();
+        word = ATM_LOAD (&mu->word);
+        if ((word & MU_WAITING) != 0 && print_waiters &&  /* can benefit from lock */
+	    (blocking || (word & MU_SPINLOCK) == 0)) {  /* willing, or no need to wait */
+                word = nsync_spin_test_and_set_ (&mu->word, MU_SPINLOCK, MU_SPINLOCK, 0);
+                acquired = 1;
+        }
+        readers = word / MU_RLOCK;
+        emit_print (b, "mu 0x%i -> 0x%i = {", (uintptr_t) mu, word);
+        emit_word (b, mu_bit, word);
+        if (readers != 0) {
+                emit_print (b, " readers=0x%i", readers);
+        }
+        emit_print (b, " }");
+        if (print_waiters) {
+                emit_waiters (b, mu->waiters);
+        }
+        if (acquired) {
+                ATM_STORE_REL (&mu->word, word); /* release store */
+        }
+        emit_c (b, 0);
+        IGNORE_RACES_END ();
+        return (b->start);
+}
+
+/* Emit to *b the state of *cv, and return a pointer to *b's buffer.
+
+   If blocking!=0, print_waiters!=0, and *cv's waiter list is non-empty, the
+   call will block until it can acquire the spinlock.
+   If print_waiters!=0, the waiter list is printed.
+   The spinlock is released before return if it was acquired.
+   blocking==0 && print_waiters!=0 is unsafe and is intended for use within
+   interactive debuggers.  */
+static char *emit_cv_state (struct emit_buf *b, nsync_cv *cv,
+			    int blocking, int print_waiters) {
+        uintptr_t word;
+        int acquired = 0;
+        IGNORE_RACES_START ();
+        word = ATM_LOAD (&cv->word);
+        if ((word & CV_NON_EMPTY) != 0 && print_waiters &&  /* can benefit from lock */
+	    (blocking || (word & CV_SPINLOCK) == 0)) {  /* willing, or no need to wait */
+                word = nsync_spin_test_and_set_ (&cv->word, CV_SPINLOCK, CV_SPINLOCK, 0);
+                acquired = 1;
+        }
+        emit_print (b, "cv 0x%i -> 0x%i = {", (uintptr_t) cv, word);
+        emit_word (b, cv_bit, word);
+        emit_print (b, " }");
+        if (print_waiters) {
+                emit_waiters (b, cv->waiters);
+        }
+        if (acquired) {
+                ATM_STORE_REL (&cv->word, word); /* release store */
+        }
+        emit_c (b, 0);
+        IGNORE_RACES_END ();
+        return (b->start);
+}
+
+char *nsync_mu_debug_state (nsync_mu *mu, char *buf, int n) {
+        struct emit_buf b;
+        return (emit_mu_state (emit_init (&b, buf, n), mu, 0, 0));
+}
+
+char *nsync_cv_debug_state (nsync_cv *cv, char *buf, int n) {
+        struct emit_buf b;
+        return (emit_cv_state (emit_init (&b, buf, n), cv, 0, 0));
+}
+
+char *nsync_mu_debug_state_and_waiters (nsync_mu *mu, char *buf, int n) {
+        struct emit_buf b;
+        return (emit_mu_state (emit_init (&b, buf, n), mu, 1, 1));
+}
+
+char *nsync_cv_debug_state_and_waiters (nsync_cv *cv, char *buf, int n) {
+        struct emit_buf b;
+        return (emit_cv_state (emit_init (&b, buf, n), cv, 1, 1));
+}
+
+static char nsync_debug_buf[1024];
+
+char *nsync_mu_debugger (nsync_mu *mu) {
+        struct emit_buf b;
+        return (emit_mu_state (emit_init (&b, nsync_debug_buf,
+                                          (int) sizeof (nsync_debug_buf)),
+                               mu, 0, 1));
+}
+char *nsync_cv_debugger (nsync_cv *cv) {
+        struct emit_buf b;
+        return (emit_cv_state (emit_init (&b, nsync_debug_buf,
+                                          (int) sizeof (nsync_debug_buf)),
+                               cv, 0, 1));
+}
+
+NSYNC_CPP_END_
diff --git a/libc/thread/dll.c b/libc/thread/dll.c
new file mode 100644
index 000000000..9fc7e0482
--- /dev/null
+++ b/libc/thread/dll.c
@@ -0,0 +1,143 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#include "libc/thread/nsync_cpp.h"
+#include "libc/thread/platform.h"
+#include "libc/thread/compiler.h"
+#include "libc/thread/cputype.h"
+#include "libc/thread/dll.h"
+
+NSYNC_CPP_START_
+
+/* Initialize *e. */
+void nsync_dll_init_ (nsync_dll_element_ *e, void *container) {
+	e->next = e;
+	e->prev = e;
+	e->container = container;
+}
+
+/* Return whether list is empty. */
+int nsync_dll_is_empty_ (nsync_dll_list_ list) {
+	return (list == NULL);
+}
+
+/* Remove *e from list, and returns the new list. */
+nsync_dll_list_ nsync_dll_remove_ (nsync_dll_list_ list, nsync_dll_element_ *e) {
+	if (list == e) { /* removing tail of list */
+		if (list->prev == list) {
+			list = NULL; /* removing only element of list */
+		} else {
+			list = list->prev;
+		}
+	}
+	e->next->prev = e->prev;
+	e->prev->next = e->next;
+	e->next = e;
+	e->prev = e;
+	return (list);
+}
+
+/* Cause element *n and its successors to come after element *p.
+   Requires n and p are non-NULL and do not point at elements of the same list.
+
+   Unlike the other operations in this API, this operation acts on
+   two circular lists of elements, rather than on a "head" location that points
+   to such a circular list.
+
+   If the two lists are p->p_2nd->p_mid->p_last->p and n->n_2nd->n_mid->n_last->n,
+   then after nsync_dll_splice_after_ (p, n), the p list would be:
+   p->n->n_2nd->n_mid->n_last->p_2nd->p_mid->p_last->p.  */
+void nsync_dll_splice_after_ (nsync_dll_element_ *p, nsync_dll_element_ *n) {
+        nsync_dll_element_ *p_2nd = p->next;
+        nsync_dll_element_ *n_last = n->prev;
+        p->next = n;  /* n follows p */
+        n->prev = p;
+	n_last->next = p_2nd;  /* remainder of p-list follows last of n-list */
+	p_2nd->prev = n_last;
+}
+
+/* Make element *e the first element of list, and return
+   the list.  The resulting list will have *e as its first element, followed by
+   any elements in the same list as *e, followed by the elements that were
+   previously in list.  Requires that *e not be in list.  If e==NULL, list is
+   returned unchanged.
+
+   Suppose the e list is e->e_2nd->e_mid->e_last->e.
+   Recall that a head "list" points to the last element of its list.
+   If list is initially null, then the outcome is:
+	result = e_last->e->e_2nd->e_mid->e_last
+   If list is  initially list->list_last->list_1st->list_mid->list_last,
+   then the outcome is:
+	result = list_last->e->e_2nd->e_mid->e_last->list_1st->list_mid->list_last
+   */
+nsync_dll_list_ nsync_dll_make_first_in_list_ (nsync_dll_list_ list, nsync_dll_element_ *e) {
+	if (e != NULL) {
+		if (list == NULL) {
+			list = e->prev;   /*e->prev is e_last*/
+		} else {
+			nsync_dll_splice_after_ (list, e);
+		}
+	}
+	return (list);
+}
+
+/* Make element *e the last element of list, and return
+   the list.  The resulting list will have *e as its last element, preceded by
+   any elements in the same list as *e, preceded by the elements that were
+   previously in list.  Requires that *e not be in list.  If e==NULL, list is
+   returned unchanged. */
+nsync_dll_list_ nsync_dll_make_last_in_list_ (nsync_dll_list_ list, nsync_dll_element_ *e) {
+	if (e != NULL) {
+		nsync_dll_make_first_in_list_ (list, e->next);
+		list = e;
+	}
+	return (list);
+}
+
+/* Return a pointer to the first element of list, or NULL if list is empty. */
+nsync_dll_element_ *nsync_dll_first_ (nsync_dll_list_ list) {
+	nsync_dll_element_ *first = NULL;
+	if (list != NULL) {
+		first = list->next;
+	}
+	return (first);
+}
+
+/* Return a pointer to the last element of list, or NULL if list is empty. */
+nsync_dll_element_ *nsync_dll_last_ (nsync_dll_list_ list) {
+	return (list);
+}
+
+/* Return a pointer to the next element of list following *e,
+   or NULL if there is no such element. */
+nsync_dll_element_ *nsync_dll_next_ (nsync_dll_list_ list, nsync_dll_element_ *e) {
+	nsync_dll_element_ *next = NULL;
+	if (e != list) {
+		next = e->next;
+	}
+	return (next);
+}
+
+/* Return a pointer to the previous element of list following *e,
+   or NULL if there is no such element. */
+nsync_dll_element_ *nsync_dll_prev_ (nsync_dll_list_ list, nsync_dll_element_ *e) {
+	nsync_dll_element_ *prev = NULL;
+	if (e != list->next) {
+		prev = e->prev;
+	}
+	return (prev);
+}
+
+NSYNC_CPP_END_
diff --git a/libc/thread/dll.h b/libc/thread/dll.h
new file mode 100644
index 000000000..95c7a53ad
--- /dev/null
+++ b/libc/thread/dll.h
@@ -0,0 +1,78 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#ifndef NSYNC_INTERNAL_DLL_H_
+#define NSYNC_INTERNAL_DLL_H_
+
+/* Doubly linked lists. */
+
+#include "libc/thread/nsync_cpp.h"
+NSYNC_CPP_START_
+
+/* A nsync_dll_element_ represents an element of a doubly-linked list of waiters. */
+typedef struct nsync_dll_element_s_ {
+	struct nsync_dll_element_s_ *next;
+	struct nsync_dll_element_s_ *prev;
+	void *container; /* points to the struct this nsync_dll struct is embedded in. */
+} nsync_dll_element_;
+
+/* A nsync_dll_list_ represents a list of nsync_dll_elements_. */
+typedef nsync_dll_element_ *nsync_dll_list_; /* last elem of circular list; nil => empty; first is x.next. */
+
+
+/* Initialize *e. */
+void nsync_dll_init_ (nsync_dll_element_ *e, void *container);
+
+/* Return whether list is empty. */
+int nsync_dll_is_empty_ (nsync_dll_list_ list);
+
+/* Remove *e from list, and returns the new list. */
+nsync_dll_list_ nsync_dll_remove_ (nsync_dll_list_ list, nsync_dll_element_ *e);
+
+/* Cause element *n and its successors to come after element *p.
+   Requires n and p are non-NULL and do not point at elements of the same list. */
+void nsync_dll_splice_after_ (nsync_dll_element_ *p, nsync_dll_element_ *n);
+
+/* Make element *e the first element of list, and return
+   the list.  The resulting list will have *e as its first element, followed by
+   any elements in the same list as *e, followed by the elements that were
+   previously in list.  Requires that *e not be in list.  If e==NULL, list is
+   returned unchanged. */
+nsync_dll_list_ nsync_dll_make_first_in_list_ (nsync_dll_list_ list, nsync_dll_element_ *e);
+
+/* Make element *e the last element of list, and return
+   the list.  The resulting list will have *e as its last element, preceded by
+   any elements in the same list as *e, preceded by the elements that were
+   previously in list.  Requires that *e not be in list.  If e==NULL, list is
+   returned unchanged. */
+nsync_dll_list_ nsync_dll_make_last_in_list_ (nsync_dll_list_ list, nsync_dll_element_ *e);
+
+/* Return a pointer to the first element of list, or NULL if list is empty. */
+nsync_dll_element_ *nsync_dll_first_ (nsync_dll_list_ list);
+
+/* Return a pointer to the last element of list, or NULL if list is empty. */
+nsync_dll_element_ *nsync_dll_last_ (nsync_dll_list_ list);
+
+/* Return a pointer to the next element of list following *e,
+   or NULL if there is no such element. */
+nsync_dll_element_ *nsync_dll_next_ (nsync_dll_list_ list, nsync_dll_element_ *e);
+
+/* Return a pointer to the previous element of list following *e,
+   or NULL if there is no such element. */
+nsync_dll_element_ *nsync_dll_prev_ (nsync_dll_list_ list, nsync_dll_element_ *e);
+
+NSYNC_CPP_END_
+
+#endif /*NSYNC_INTERNAL_DLL_H_*/
diff --git a/libc/thread/headers.h b/libc/thread/headers.h
new file mode 100644
index 000000000..5f74394d2
--- /dev/null
+++ b/libc/thread/headers.h
@@ -0,0 +1,27 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#ifndef NSYNC_INTERNAL_HEADERS_H_
+#define NSYNC_INTERNAL_HEADERS_H_
+
+#include "libc/thread/nsync_cpp.h"
+#include "libc/thread/platform.h"
+#include "libc/thread/compiler.h"
+#include "libc/thread/cputype.h"
+#include "libc/thread/nsync.h"
+#include "libc/thread/atomic.h"
+#include "libc/thread/sem.h"
+
+#endif /*NSYNC_INTERNAL_HEADERS_H_*/
diff --git a/libc/thread/mu.c b/libc/thread/mu.c
new file mode 100644
index 000000000..8fd1b5196
--- /dev/null
+++ b/libc/thread/mu.c
@@ -0,0 +1,547 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#include "libc/thread/nsync_cpp.h"
+#include "libc/thread/platform.h"
+#include "libc/thread/compiler.h"
+#include "libc/thread/cputype.h"
+#include "libc/thread/nsync.h"
+#include "libc/thread/dll.h"
+#include "libc/thread/sem.h"
+#include "libc/thread/wait_internal.h"
+#include "libc/thread/common.h"
+#include "libc/thread/atomic.h"
+
+NSYNC_CPP_START_
+
+/* Initialize *mu. */
+void nsync_mu_init (nsync_mu *mu) {
+	memset ((void *) mu, 0, sizeof (*mu));
+}
+
+/* Release the mutex spinlock. */
+static void mu_release_spinlock (nsync_mu *mu) {
+	uint32_t old_word = ATM_LOAD (&mu->word);
+	while (!ATM_CAS_REL (&mu->word, old_word, old_word & ~MU_SPINLOCK)) {
+		old_word = ATM_LOAD (&mu->word);
+	}
+}
+
+/* Lock *mu using the specified lock_type, waiting on *w if necessary.
+   "clear" should be zero if the thread has not previously slept on *mu, and
+   MU_DESIG_WAKER if it has; this represents bits that nsync_mu_lock_slow_() must clear when
+   it either acquires or sleeps on *mu.  The caller owns *w on return; it is in a valid
+   state to be returned to the free pool. */
+void nsync_mu_lock_slow_ (nsync_mu *mu, waiter *w, uint32_t clear, lock_type *l_type) {
+	uint32_t zero_to_acquire;
+	uint32_t wait_count;
+	uint32_t long_wait;
+	unsigned attempts = 0; /* attempt count; used for spinloop backoff */
+	w->cv_mu = NULL;      /* not a cv wait */
+	w->cond.f = NULL; /* Not using a conditional critical section. */
+	w->cond.v = NULL;
+	w->cond.eq = NULL;
+	w->l_type = l_type;
+	zero_to_acquire = l_type->zero_to_acquire;
+	if (clear != 0) {
+		/* Only the constraints of mutual exclusion should stop a designated waker. */
+		zero_to_acquire &= ~(MU_WRITER_WAITING | MU_LONG_WAIT);
+	}
+	wait_count = 0; /* number of times we waited, and were woken. */
+	long_wait = 0; /* set to MU_LONG_WAIT when wait_count gets large */
+	for (;;) {
+		uint32_t old_word = ATM_LOAD (&mu->word);
+		if ((old_word & zero_to_acquire) == 0) {
+			/* lock can be acquired; try to acquire, possibly
+			   clearing MU_DESIG_WAKER and MU_LONG_WAIT.  */
+			if (ATM_CAS_ACQ (&mu->word, old_word,
+					 (old_word+l_type->add_to_acquire) &
+					  ~(clear|long_wait|l_type->clear_on_acquire))) {
+				return;
+			}
+		} else if ((old_word&MU_SPINLOCK) == 0 &&
+			   ATM_CAS_ACQ (&mu->word, old_word,
+					(old_word|MU_SPINLOCK|long_wait|
+					 l_type->set_when_waiting) & ~(clear | MU_ALL_FALSE))) {
+
+			/* Spinlock is now held, and lock is held by someone
+			   else; MU_WAITING has also been set; queue ourselves.
+			   There's no need to adjust same_condition here,
+			   because w.condition==NULL.  */
+			ATM_STORE (&w->nw.waiting, 1);
+			if (wait_count == 0) {
+				/* first wait goes to end of queue */
+				mu->waiters = nsync_dll_make_last_in_list_ (mu->waiters,
+								            &w->nw.q);
+			} else {
+				/* subsequent waits go to front of queue */
+				mu->waiters = nsync_dll_make_first_in_list_ (mu->waiters,
+								             &w->nw.q);
+			}
+
+			/* Release spinlock.  Cannot use a store here, because
+			   the current thread does not hold the mutex.  If
+			   another thread were a designated waker, the mutex
+			   holder could be concurrently unlocking, even though
+			   we hold the spinlock. */
+			mu_release_spinlock (mu);
+
+			/* wait until awoken. */
+			while (ATM_LOAD_ACQ (&w->nw.waiting) != 0) { /* acquire load */
+				nsync_mu_semaphore_p (&w->sem);
+			}
+			wait_count++;
+			/* If the thread has been woken more than this many
+			   times, and still not acquired, it sets the
+			   MU_LONG_WAIT bit to prevent thread that have not
+			   waited from acquiring.  This is the starvation
+			   avoidance mechanism.  The number is fairly high so
+			   that we continue to benefit from the throughput of
+			   not having running threads wait unless absolutely
+			   necessary.  */
+			if (wait_count == LONG_WAIT_THRESHOLD) { /* repeatedly woken */
+				long_wait = MU_LONG_WAIT; /* force others to wait at least once */
+			}
+
+			attempts = 0;
+			clear = MU_DESIG_WAKER;
+			/* Threads that have been woken at least once don't care
+			   about waiting writers or long waiters. */
+			zero_to_acquire &= ~(MU_WRITER_WAITING | MU_LONG_WAIT);
+		}
+		attempts = nsync_spin_delay_ (attempts);
+	}
+}
+
+/* Attempt to acquire *mu in writer mode without blocking, and return non-zero
+   iff successful.  Return non-zero with high probability if *mu was free on
+   entry.  */
+int nsync_mu_trylock (nsync_mu *mu) {
+	int result;
+	IGNORE_RACES_START ();
+	if (ATM_CAS_ACQ (&mu->word, 0, MU_WADD_TO_ACQUIRE)) { /* acquire CAS */
+		result = 1;
+	} else {
+		uint32_t old_word = ATM_LOAD (&mu->word);
+		result = ((old_word & MU_WZERO_TO_ACQUIRE) == 0 &&
+			  ATM_CAS_ACQ (&mu->word, old_word,
+				       (old_word + MU_WADD_TO_ACQUIRE) & ~MU_WCLEAR_ON_ACQUIRE));
+	}
+	IGNORE_RACES_END ();
+	return (result);
+}
+
+/* Block until *mu is free and then acquire it in writer mode. */
+void nsync_mu_lock (nsync_mu *mu) {
+	IGNORE_RACES_START ();
+	if (!ATM_CAS_ACQ (&mu->word, 0, MU_WADD_TO_ACQUIRE)) { /* acquire CAS */
+		uint32_t old_word = ATM_LOAD (&mu->word);
+		if ((old_word&MU_WZERO_TO_ACQUIRE) != 0 ||
+		    !ATM_CAS_ACQ (&mu->word, old_word,
+				  (old_word+MU_WADD_TO_ACQUIRE) & ~MU_WCLEAR_ON_ACQUIRE)) {
+			waiter *w = nsync_waiter_new_ ();
+			nsync_mu_lock_slow_ (mu, w, 0, nsync_writer_type_);
+			nsync_waiter_free_ (w);
+		}
+	}
+	IGNORE_RACES_END ();
+}
+
+/* Attempt to acquire *mu in reader mode without blocking, and return non-zero
+   iff successful.  Returns non-zero with high probability if *mu was free on
+   entry.  It may fail to acquire if a writer is waiting, to avoid starvation.
+   */
+int nsync_mu_rtrylock (nsync_mu *mu) {
+	int result;
+	IGNORE_RACES_START ();
+	if (ATM_CAS_ACQ (&mu->word, 0, MU_RADD_TO_ACQUIRE)) { /* acquire CAS */
+		result = 1;
+	} else {
+		uint32_t old_word = ATM_LOAD (&mu->word);
+		result = ((old_word&MU_RZERO_TO_ACQUIRE) == 0 &&
+			  ATM_CAS_ACQ (&mu->word, old_word,
+				       (old_word+MU_RADD_TO_ACQUIRE) & ~MU_RCLEAR_ON_ACQUIRE));
+	}
+	IGNORE_RACES_END ();
+	return (result);
+}
+
+/* Block until *mu can be acquired in reader mode and then acquire it. */
+void nsync_mu_rlock (nsync_mu *mu) {
+	IGNORE_RACES_START ();
+	if (!ATM_CAS_ACQ (&mu->word, 0, MU_RADD_TO_ACQUIRE)) { /* acquire CAS */
+		uint32_t old_word = ATM_LOAD (&mu->word);
+		if ((old_word&MU_RZERO_TO_ACQUIRE) != 0 ||
+		    !ATM_CAS_ACQ (&mu->word, old_word,
+				  (old_word+MU_RADD_TO_ACQUIRE) & ~MU_RCLEAR_ON_ACQUIRE)) {
+			waiter *w = nsync_waiter_new_ ();
+			nsync_mu_lock_slow_ (mu, w, 0, nsync_reader_type_);
+			nsync_waiter_free_ (w);
+		}
+	}
+	IGNORE_RACES_END ();
+}
+
+/* Invoke the condition associated with *p, which is an element of
+   a "waiter" list. */
+static int condition_true (nsync_dll_element_ *p) {
+	return ((*DLL_WAITER (p)->cond.f) (DLL_WAITER (p)->cond.v));
+}
+
+/* If *p is an element of waiter_list (a list of "waiter" structs(, return a
+   pointer to the next element of the list that has a different condition. */
+static nsync_dll_element_ *skip_past_same_condition (
+	nsync_dll_list_ waiter_list, nsync_dll_element_ *p) {
+	nsync_dll_element_ *next;
+	nsync_dll_element_ *last_with_same_condition =
+		&DLL_WAITER_SAMECOND (DLL_WAITER (p)->same_condition.prev)->nw.q;
+	if (last_with_same_condition != p && last_with_same_condition != p->prev) {
+		/* First in set with same condition, so skip to end.  */
+		next = nsync_dll_next_ (waiter_list, last_with_same_condition);
+	} else {
+		next = nsync_dll_next_ (waiter_list, p);
+	}
+	return (next);
+}
+
+/* Merge the same_condition lists of *p and *n if they have the same non-NULL
+   condition.  */
+void nsync_maybe_merge_conditions_ (nsync_dll_element_ *p, nsync_dll_element_ *n) {
+	if (p != NULL && n != NULL &&
+	    WAIT_CONDITION_EQ (&DLL_WAITER (p)->cond, &DLL_WAITER (n)->cond)) {
+		nsync_dll_splice_after_ (&DLL_WAITER (p)->same_condition,
+				  &DLL_WAITER (n)->same_condition);
+	}
+}
+
+/* Remove element *e from nsync_mu waiter queue mu_queue, fixing
+   up the same_condition list by merging the lists on either side if possible.
+   Also increment the waiter's remove_count. */
+nsync_dll_list_ nsync_remove_from_mu_queue_ (nsync_dll_list_ mu_queue, nsync_dll_element_ *e) {
+	/* Record previous and next elements in the original queue. */
+	nsync_dll_element_ *prev = e->prev;
+	nsync_dll_element_ *next = e->next;
+	uint32_t old_value;
+	/* Remove. */
+	mu_queue = nsync_dll_remove_ (mu_queue, e);
+        do {    
+                old_value = ATM_LOAD (&DLL_WAITER (e)->remove_count);
+        } while (!ATM_CAS (&DLL_WAITER (e)->remove_count, old_value, old_value+1));
+	if (!nsync_dll_is_empty_ (mu_queue)) {
+		/* Fix up same_condition. */
+		nsync_dll_element_ *e_same_condition = &DLL_WAITER (e)->same_condition;
+
+		if (e_same_condition->next != e_same_condition) {
+			/* *e is linked to a same_condition neighbour---just remove it. */
+			e_same_condition->next->prev = e_same_condition->prev;
+			e_same_condition->prev->next = e_same_condition->next;
+			e_same_condition->next = e_same_condition;
+			e_same_condition->prev = e_same_condition;
+		} else if (prev != nsync_dll_last_ (mu_queue)) {
+			/* Merge the new neighbours together if we can. */
+			nsync_maybe_merge_conditions_ (prev, next);
+		}
+	}
+	return (mu_queue);
+}
+
+/* Unlock *mu and wake one or more waiters as appropriate after an unlock.
+   It is called with *mu held in mode l_type. */
+void nsync_mu_unlock_slow_ (nsync_mu *mu, lock_type *l_type) {
+	unsigned attempts = 0; /* attempt count; used for backoff */
+	for (;;) {
+		uint32_t old_word = ATM_LOAD (&mu->word);
+		int testing_conditions = ((old_word & MU_CONDITION) != 0);
+		uint32_t early_release_mu = l_type->add_to_acquire;
+		uint32_t late_release_mu = 0;
+		if (testing_conditions) {
+			/* Convert to a writer lock, and release later.
+			   - A writer lock is currently needed to test conditions
+			     because exclusive access is needed to the list to
+			     allow modification.  The spinlock cannot be used
+			     to achieve that, because an internal lock should not
+			     be held when calling the external predicates.
+			   - We must test conditions even though a reader region
+			     cannot have made any new ones true because some
+			     might have been true before the reader region started.
+			     The MU_ALL_FALSE test below shortcuts the case where
+			     the conditions are known all to be false.  */
+			early_release_mu = l_type->add_to_acquire - MU_WLOCK;
+			late_release_mu = MU_WLOCK;
+		}
+		if ((old_word&MU_WAITING) == 0 || (old_word&MU_DESIG_WAKER) != 0 ||
+		    (old_word & MU_RLOCK_FIELD) > MU_RLOCK ||
+		    (old_word & (MU_RLOCK|MU_ALL_FALSE)) == (MU_RLOCK|MU_ALL_FALSE)) {
+			/* no one to wake, there's a designated waker waking
+			   up, there are still readers, or it's a reader and all waiters
+			   have false conditions */
+			if (ATM_CAS_REL (&mu->word, old_word,
+					 (old_word - l_type->add_to_acquire) &
+					 ~l_type->clear_on_uncontended_release)) {
+				return;
+			}
+		} else if ((old_word&MU_SPINLOCK) == 0 &&
+			   ATM_CAS_ACQ (&mu->word, old_word,
+					(old_word-early_release_mu)|MU_SPINLOCK|MU_DESIG_WAKER)) {
+			nsync_dll_list_ wake;
+			lock_type *wake_type;
+			uint32_t clear_on_release;
+			uint32_t set_on_release;
+			/* The spinlock is now held, and we've set the
+			   designated wake flag, since we're likely to wake a
+			   thread that will become that designated waker.  If
+			   there are conditions to check, the mutex itself is
+			   still held.  */
+
+			nsync_dll_element_ *p = NULL;
+			nsync_dll_element_ *next = NULL;
+
+			/* Swap the entire mu->waiters list into the local
+			   "new_waiters" list.  This gives us exclusive access
+			   to the list, even if we unlock the spinlock, which
+			   we may do if checking conditions.  The loop below
+			   will grab more new waiters that arrived while we
+			   were checking conditions, and terminates only if no
+			   new waiters arrive in one loop iteration.  */
+			nsync_dll_list_ waiters = NULL;
+			nsync_dll_list_ new_waiters = mu->waiters;
+			mu->waiters = NULL;
+
+			/* Remove a waiter from the queue, if possible. */
+			wake = NULL;       /* waiters to wake. */
+			wake_type = NULL; /* type of waiter(s) on wake, or NULL if wake is empty. */
+			clear_on_release = MU_SPINLOCK;
+			set_on_release = MU_ALL_FALSE;
+			while (!nsync_dll_is_empty_ (new_waiters)) { /* some new waiters to consider */
+				p = nsync_dll_first_ (new_waiters);
+				if (testing_conditions) {
+					/* Should we continue to test conditions? */
+					if (wake_type == nsync_writer_type_) {
+						/* No, because we're already waking a writer,
+						   and need wake no others.*/
+						testing_conditions = 0;
+					} else if (wake_type == NULL &&
+						DLL_WAITER (p)->l_type != nsync_reader_type_ &&
+						DLL_WAITER (p)->cond.f == NULL) {
+						/* No, because we've woken no one, but the
+						   first waiter is a writer with no condition,
+						   so we will certainly wake it, and need wake
+						   no others. */
+						testing_conditions = 0;
+					}
+				}
+				/* If testing waiters' conditions, release the
+				   spinlock while still holding the write lock.
+				   This is so that the spinlock is not held
+				   while the conditions are evaluated.  */
+				if (testing_conditions) {
+					mu_release_spinlock (mu);
+				}
+
+				/* Process the new waiters picked up in this iteration of the
+				   "while (!nsync_dll_is_empty_ (new_waiters))" loop,
+				   and stop looking when we run out of waiters, or we find
+				   a writer to wake up. */
+				while (p != NULL && wake_type != nsync_writer_type_) {
+					int p_has_condition;
+					next = nsync_dll_next_ (new_waiters, p);
+					p_has_condition = (DLL_WAITER (p)->cond.f != NULL);
+					if (p_has_condition && !testing_conditions) {
+						nsync_panic_ ("checking a waiter condition "
+							      "while unlocked\n");
+					}
+					if (p_has_condition && !condition_true (p)) {
+						/* condition is false */
+						/* skip to the end of the same_condition group. */
+						next = skip_past_same_condition (new_waiters, p);
+					} else if (wake_type == NULL ||
+						   DLL_WAITER (p)->l_type == nsync_reader_type_) {
+						/* Wake this thread. */
+						new_waiters = nsync_remove_from_mu_queue_ (
+							new_waiters, p);
+						wake = nsync_dll_make_last_in_list_ (wake, p);
+						wake_type = DLL_WAITER (p)->l_type;
+					} else {
+						/* Failing to wake a writer
+						   that could acquire if it
+						   were first.  */
+						set_on_release |= MU_WRITER_WAITING;
+						set_on_release &= ~MU_ALL_FALSE;
+					}
+					p = next;
+				}
+
+				if (p != NULL) {
+					/* Didn't search to end of list, so can't be sure
+					   all conditions are false. */
+					set_on_release &= ~MU_ALL_FALSE;
+				}
+
+				/* If testing waiters' conditions, reacquire the spinlock
+				   released above. */
+				if (testing_conditions) {
+					nsync_spin_test_and_set_ (&mu->word, MU_SPINLOCK,
+								  MU_SPINLOCK, 0);
+				}
+
+				/* add the new_waiters to the last of the waiters. */
+				nsync_maybe_merge_conditions_ (nsync_dll_last_ (waiters),
+							       nsync_dll_first_ (new_waiters));
+				waiters = nsync_dll_make_last_in_list_ (waiters,
+								 nsync_dll_last_ (new_waiters));
+				/* Pick up the next set of new waiters. */
+				new_waiters = mu->waiters;
+				mu->waiters = NULL;
+			}
+
+			/* Return the local waiter list to *mu. */
+			mu->waiters = waiters;
+
+			if (nsync_dll_is_empty_ (wake)) {
+				/* not waking a waiter => no designated waker */
+				clear_on_release |= MU_DESIG_WAKER;
+			}
+
+			if ((set_on_release & MU_ALL_FALSE) == 0) {
+				/* If not explicitly setting MU_ALL_FALSE, clear it. */
+				clear_on_release |= MU_ALL_FALSE;
+			}
+
+			if (nsync_dll_is_empty_ (mu->waiters)) {
+				/* no waiters left */
+				clear_on_release |= MU_WAITING | MU_WRITER_WAITING |
+						    MU_CONDITION | MU_ALL_FALSE;
+			}
+
+			/* Release the spinlock, and possibly the lock if
+			   late_release_mu is non-zero.  Other bits are set or
+			   cleared according to whether we woke any threads,
+			   whether any waiters remain, and whether any of them
+			   are writers.  */
+			old_word = ATM_LOAD (&mu->word);
+			while (!ATM_CAS_REL (&mu->word, old_word,
+					     ((old_word-late_release_mu)|set_on_release) &
+					     ~clear_on_release)) { /* release CAS */
+				old_word = ATM_LOAD (&mu->word);
+			}
+			/* Wake the waiters. */
+			for (p = nsync_dll_first_ (wake); p != NULL; p = next) {
+				next = nsync_dll_next_ (wake, p);
+				wake = nsync_dll_remove_ (wake, p);
+				ATM_STORE_REL (&DLL_NSYNC_WAITER (p)->waiting, 0);
+				nsync_mu_semaphore_v (&DLL_WAITER (p)->sem);
+			}
+			return;
+		}
+		attempts = nsync_spin_delay_ (attempts);
+	}
+}
+
+/* Unlock *mu, which must be held in write mode, and wake waiters, if appropriate. */
+void nsync_mu_unlock (nsync_mu *mu) {
+	IGNORE_RACES_START ();
+	/* C is not a garbage-collected language, so we cannot release until we
+	   can be sure that we will not have to touch the mutex again to wake a
+	   waiter.  Another thread could acquire, decrement a reference count
+	   and deallocate the mutex before the current thread touched the mutex
+	   word again. */
+	if (!ATM_CAS_REL (&mu->word, MU_WLOCK, 0)) {
+		uint32_t old_word = ATM_LOAD (&mu->word);
+                /* Clear MU_ALL_FALSE because the critical section we're just
+                   leaving may have made some conditions true.  */
+		uint32_t new_word = (old_word - MU_WLOCK) & ~MU_ALL_FALSE;
+                /* Sanity check:  mutex must be held in write mode, and there
+                   must be no readers.  */
+		if ((new_word & (MU_RLOCK_FIELD | MU_WLOCK)) != 0) {
+			if ((old_word & MU_RLOCK_FIELD) != 0) {
+				nsync_panic_ ("attempt to nsync_mu_unlock() an nsync_mu "
+				       "held in read mode\n");
+			} else {
+				nsync_panic_ ("attempt to nsync_mu_unlock() an nsync_mu "
+				       "not held in write mode\n");
+			}
+		} else if ((old_word & (MU_WAITING|MU_DESIG_WAKER)) == MU_WAITING ||
+			   !ATM_CAS_REL (&mu->word, old_word, new_word)) {
+			/* There are waiters and no designated waker, or
+			   our initial CAS attempt failed, to use slow path. */
+			nsync_mu_unlock_slow_ (mu, nsync_writer_type_);
+		}
+	}
+	IGNORE_RACES_END ();
+}
+
+/* Unlock *mu, which must be held in read mode, and wake waiters, if appropriate. */
+void nsync_mu_runlock (nsync_mu *mu) {
+	IGNORE_RACES_START ();
+	/* See comment in nsync_mu_unlock(). */
+	if (!ATM_CAS_REL (&mu->word, MU_RLOCK, 0)) {
+		uint32_t old_word = ATM_LOAD (&mu->word);
+                /* Sanity check:  mutex must not be held in write mode and
+                   reader count must not be 0.  */
+		if (((old_word ^ MU_WLOCK) & (MU_WLOCK | MU_RLOCK_FIELD)) == 0) {
+			if ((old_word & MU_WLOCK) != 0) {
+				nsync_panic_ ("attempt to nsync_mu_runlock() an nsync_mu "
+				       "held in write mode\n");
+			} else {
+				nsync_panic_ ("attempt to nsync_mu_runlock() an nsync_mu "
+				       "not held in read mode\n");
+			}
+		} else if ((old_word & (MU_WAITING | MU_DESIG_WAKER)) == MU_WAITING &&
+			    (old_word & (MU_RLOCK_FIELD|MU_ALL_FALSE)) == MU_RLOCK) {
+                        /* There are waiters and no designated waker, the last
+                           reader is unlocking, and not all waiters have a
+                           false condition.  So we must take the slow path to
+                           attempt to wake a waiter.  */
+			nsync_mu_unlock_slow_ (mu, nsync_reader_type_);
+		} else if (!ATM_CAS_REL (&mu->word, old_word, old_word - MU_RLOCK)) {
+			/* CAS attempt failed, so take slow path. */
+			nsync_mu_unlock_slow_ (mu, nsync_reader_type_);
+		}
+	}
+	IGNORE_RACES_END ();
+}
+
+/* Abort if *mu is not held in write mode. */
+void nsync_mu_assert_held (const nsync_mu *mu) {
+	IGNORE_RACES_START ();
+	if ((ATM_LOAD (&mu->word) & MU_WHELD_IF_NON_ZERO) == 0) {
+		nsync_panic_ ("nsync_mu not held in write mode\n");
+	}
+	IGNORE_RACES_END ();
+}
+
+/* Abort if *mu is not held in read or write mode. */
+void nsync_mu_rassert_held (const nsync_mu *mu) {
+	IGNORE_RACES_START ();
+	if ((ATM_LOAD (&mu->word) & MU_ANY_LOCK) == 0) {
+		nsync_panic_ ("nsync_mu not held in some mode\n");
+	}
+	IGNORE_RACES_END ();
+}
+
+/* Return whether *mu is held in read mode.
+   Requires that *mu is held in some mode. */
+int nsync_mu_is_reader (const nsync_mu *mu) {
+	uint32_t word;
+	IGNORE_RACES_START ();
+	word = ATM_LOAD (&mu->word);
+	if ((word & MU_ANY_LOCK) == 0) {
+		nsync_panic_ ("nsync_mu not held in some mode\n");
+	}
+	IGNORE_RACES_END ();
+	return ((word & MU_WLOCK) == 0);
+}
+
+NSYNC_CPP_END_
diff --git a/libc/thread/mu_wait.c b/libc/thread/mu_wait.c
new file mode 100644
index 000000000..f2c966238
--- /dev/null
+++ b/libc/thread/mu_wait.c
@@ -0,0 +1,320 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#include "libc/thread/nsync_cpp.h"
+#include "libc/thread/platform.h"
+#include "libc/thread/compiler.h"
+#include "libc/thread/cputype.h"
+#include "libc/thread/nsync.h"
+#include "libc/thread/dll.h"
+#include "libc/thread/sem.h"
+#include "libc/thread/wait_internal.h"
+#include "libc/thread/common.h"
+#include "libc/thread/atomic.h"
+
+NSYNC_CPP_START_
+
+/* Attempt to remove waiter *w from *mu's
+   waiter queue.  If successful, leave the lock held in mode *l_type, and
+   return non-zero; otherwise return zero.  Requires that the current thread
+   hold neither *mu nor its spinlock, that remove_count be the value of
+   w.remove_count when *w was inserted into the queue (which it will still be if
+   it has not been removed).
+
+   This is a tricky part of the design.  Here is the rationale.
+
+   When a condition times out or is cancelled, we must "turn off" the
+   condition, making it always true, so the lock will be acquired in the normal
+   way.  The naive approach would be to set a field atomically to tell future
+   waiters to ignore the condition.  Unfortunately, that would violate the
+   same_condition list invariants, and the same_condition optimization is
+   probably worth keeping.
+
+   To fixup the same_condition list, we must have mutual exclusion with the loop
+   in nsync_mu_unlock_slow_() that is examining waiters, evaluating their conditions, and
+   removing them from the queue.  That loop uses both the spinlock (to allow
+   queue changes), and the mutex itself (to allow condition evaluation).
+   Therefore, to "turn off" the condition requires acquiring both the spinlock
+   and the mutex.  This has two consequences:
+   - If we must acquire *mu to "turn off" the condition, we might as well give
+     the lock to this waiter and return from nsync_cv_wait_with_deadline() after we've
+     done so.  It would be wasted work to put it back on the waiter queue, and
+     have it wake up and acquire yet again.  (There are possibilities for
+     starvation here that we ignore, under the assumption that the client
+     avoids timeouts that are extremely short relative to the durations of his
+     section durations.)
+   - We can't use *w to wait for the lock to be free, because *w is already on
+     the waiter queue with the wrong condition; we now want to wait with no
+     condition.  So either we must spin to acquire the lock, or we must
+     allocate _another_ waiter object.  The latter option is feasible, but
+     delicate:  the thread would have two waiter objects, and would have to
+     handle being woken by either one or both, and possibly removing one that
+     was not awoken.  For the moment, we spin, because it's easier, and seems
+     not to cause problems in practice, since the spinloop backs off
+     aggressively. */
+static int mu_try_acquire_after_timeout_or_cancel (nsync_mu *mu, lock_type *l_type,
+						   waiter *w, uint32_t remove_count) {
+	int success = 0;
+	unsigned spin_attempts = 0;
+	uint32_t old_word = ATM_LOAD (&mu->word);
+	/* Spin until we can acquire the spinlock and a writer lock on *mu. */
+	while ((old_word&(MU_WZERO_TO_ACQUIRE|MU_SPINLOCK)) != 0 ||
+	       !ATM_CAS_ACQ (&mu->word, old_word,
+			     (old_word+MU_WADD_TO_ACQUIRE+MU_SPINLOCK) &
+			     ~MU_WCLEAR_ON_ACQUIRE)) {
+		/* Failed to acquire.  If we can, set the MU_WRITER_WAITING bit
+		   to avoid being starved by readers. */
+		if ((old_word & (MU_WRITER_WAITING | MU_SPINLOCK)) == 0) {
+			/* If the following CAS succeeds, it effectively
+			   acquires and releases the spinlock atomically, so
+			   must be both an acquire and release barrier.
+			   MU_WRITER_WAITING will be cleared via
+			   MU_WCLEAR_ON_ACQUIRE when this loop succeeds.
+			   An optimization; failures are ignored.  */
+			ATM_CAS_RELACQ (&mu->word, old_word,
+					old_word|MU_WRITER_WAITING);
+		}
+		spin_attempts = nsync_spin_delay_ (spin_attempts);
+		old_word = ATM_LOAD (&mu->word);
+	}
+	/* Check that w wasn't removed from the queue after our caller checked,
+	   but before we acquired the spinlock.
+	   The check of remove_count confirms that the waiter *w is still
+	   governed by *mu's spinlock.  Otherwise, some other thread may be
+	   about to set w.waiting==0. */
+	if (ATM_LOAD (&w->nw.waiting) != 0 && remove_count == ATM_LOAD (&w->remove_count)) {
+		/* This thread's condition is now irrelevant, and it
+		   holds a writer lock.  Remove it from the queue,
+		   and possibly convert back to a reader lock. */
+		mu->waiters = nsync_remove_from_mu_queue_ (mu->waiters, &w->nw.q);
+		ATM_STORE (&w->nw.waiting, 0);
+
+		/* Release spinlock but keep desired lock type. */
+		ATM_STORE_REL (&mu->word, old_word+l_type->add_to_acquire); /* release store */
+		success = 1;
+	} else {
+		/* Release spinlock and *mu. */
+		ATM_STORE_REL (&mu->word, old_word); /* release store */
+	}
+	return (success);
+}
+
+/* Return when at least one of:  the condition is true, the
+   deadline expires, or cancel_note is notified.  It may unlock and relock *mu
+   while blocked waiting for one of these events, but always returns with *mu
+   held.  It returns 0 iff the condition is true on return, and otherwise
+   either ETIMEDOUT or ECANCELED, depending on why the call returned early.  Use
+   abs_deadline==nsync_time_no_deadline for no deadline, and cancel_note==NULL for no
+   cancellation.
+
+   Requires that *mu be held on entry.
+   Requires that condition.eval() neither modify state protected by *mu, nor
+   return a value dependent on state not protected by *mu.  To depend on time,
+   use the abs_deadline parameter.
+   (Conventional use of condition variables have the same restrictions on the
+   conditions tested by the while-loop.)
+   The implementation calls condition.eval() only with *mu held, though not
+   always from the calling thread, and may elect to hold only a read lock
+   during the call, even if the client is attempting to acquire only write
+   locks.
+
+   The nsync_mu_wait() and nsync_mu_wait_with_deadline() calls can be used instead of condition
+   variables.  In many straightforward situations they are of equivalent
+   performance and are somewhat easier to use, because unlike condition
+   variables, they do not require that the waits be placed in a loop, and they
+   do not require explicit wakeup calls.  In the current implementation, use of
+   nsync_mu_wait() and nsync_mu_wait_with_deadline() can take longer if many distinct
+   wait conditions are used.  In such cases, use an explicit condition variable
+   per wakeup condition for best performance. */
+int nsync_mu_wait_with_deadline (nsync_mu *mu,
+				 int (*condition) (const void *condition_arg),
+				 const void *condition_arg,
+				 int (*condition_arg_eq) (const void *a, const void *b),
+				 nsync_time abs_deadline, nsync_note cancel_note) {
+	lock_type *l_type;
+	int first_wait;
+	int condition_is_true;
+	waiter *w;
+	int outcome;
+	/* Work out in which mode the lock is held. */
+	uint32_t old_word;
+	IGNORE_RACES_START ();
+	old_word = ATM_LOAD (&mu->word);
+	if ((old_word & MU_ANY_LOCK) == 0) {
+		nsync_panic_ ("nsync_mu not held in some mode when calling "
+		       "nsync_mu_wait_with_deadline()\n");
+	}
+	l_type = nsync_writer_type_;
+	if ((old_word & MU_RHELD_IF_NON_ZERO) != 0) {
+		l_type = nsync_reader_type_;
+	}
+
+	first_wait = 1; /* first time through the loop below. */
+	condition_is_true = (condition == NULL || (*condition) (condition_arg));
+
+	/* Loop until either the condition becomes true, or "outcome" indicates
+	   cancellation or timeout. */
+	w = NULL;
+	outcome = 0;
+	while (outcome == 0 && !condition_is_true) {
+		uint32_t has_condition;
+		uint32_t remove_count;
+		uint32_t add_to_acquire;
+		int had_waiters;
+		int sem_outcome;
+		unsigned attempts;
+		int have_lock;
+		if (w == NULL) {
+			w = nsync_waiter_new_ (); /* get a waiter struct if we need one. */
+		}
+
+		/* Prepare to wait. */
+		w->cv_mu = NULL; /* not a condition variable wait */
+		w->l_type = l_type;
+		w->cond.f = condition;
+		w->cond.v = condition_arg;
+		w->cond.eq = condition_arg_eq;
+		has_condition = 0; /* set to MU_CONDITION if condition is non-NULL */
+		if (condition != NULL) {
+			has_condition = MU_CONDITION;
+		}
+		ATM_STORE (&w->nw.waiting, 1);
+		remove_count = ATM_LOAD (&w->remove_count);
+
+		/* Acquire spinlock. */
+		old_word = nsync_spin_test_and_set_ (&mu->word, MU_SPINLOCK,
+			MU_SPINLOCK|MU_WAITING|has_condition, MU_ALL_FALSE);
+		had_waiters = ((old_word & (MU_DESIG_WAKER | MU_WAITING)) == MU_WAITING);
+		/* Queue the waiter. */
+		if (first_wait) {
+			nsync_maybe_merge_conditions_ (nsync_dll_last_ (mu->waiters),
+						       &w->nw.q);
+			/* first wait goes to end of queue */
+			mu->waiters = nsync_dll_make_last_in_list_ (mu->waiters,
+							            &w->nw.q);
+			first_wait = 0;
+		} else {
+			nsync_maybe_merge_conditions_ (&w->nw.q,
+						       nsync_dll_first_ (mu->waiters));
+			/* subsequent waits go to front of queue */
+			mu->waiters = nsync_dll_make_first_in_list_ (mu->waiters,
+							             &w->nw.q);
+		}
+		/* Release spinlock and *mu. */
+		do {
+			old_word = ATM_LOAD (&mu->word);
+			add_to_acquire = l_type->add_to_acquire;
+			if (((old_word-l_type->add_to_acquire)&MU_ANY_LOCK) == 0 && had_waiters) {
+				add_to_acquire = 0; /* release happens in nsync_mu_unlock_slow_ */
+			}
+		} while (!ATM_CAS_REL (&mu->word, old_word,
+				       (old_word - add_to_acquire) & ~MU_SPINLOCK));
+		if (add_to_acquire == 0) {
+			/* The lock will be fully released, there are waiters, and
+			   no designated waker, so wake waiters. */
+			nsync_mu_unlock_slow_ (mu, l_type);
+		}
+
+		/* wait until awoken or a timeout. */
+		sem_outcome = 0;
+		attempts = 0;
+		have_lock = 0;
+		while (ATM_LOAD_ACQ (&w->nw.waiting) != 0) { /* acquire load */
+			if (sem_outcome == 0) {
+				sem_outcome = nsync_sem_wait_with_cancel_ (w, abs_deadline,
+									   cancel_note);
+				if (sem_outcome != 0 && ATM_LOAD (&w->nw.waiting) != 0) {
+					/* A timeout or cancellation occurred, and no wakeup.
+					   Acquire the spinlock and mu, and confirm. */
+					have_lock = mu_try_acquire_after_timeout_or_cancel (
+						mu, l_type, w, remove_count);
+					if (have_lock) { /* Successful acquire. */
+						outcome = sem_outcome;
+					}
+				}
+			}
+
+			if (ATM_LOAD (&w->nw.waiting) != 0) {
+				attempts = nsync_spin_delay_ (attempts); /* will ultimately yield */
+			}
+		}
+
+		if (!have_lock) {
+			/* If we didn't reacquire due to a cancellation/timeout, acquire now. */
+			nsync_mu_lock_slow_ (mu, w, MU_DESIG_WAKER, l_type);
+		}
+		condition_is_true = (condition == NULL || (*condition) (condition_arg));
+	}
+	if (w != NULL) {
+		nsync_waiter_free_ (w); /* free waiter if we allocated one. */
+	}
+	if (condition_is_true) {
+		outcome = 0; /* condition is true trumps other outcomes. */
+	}
+	IGNORE_RACES_END ();
+	return (outcome);
+}
+
+/* Return when the condition is true.  Perhaps unlock and relock *mu
+   while blocked waiting for the condition to become true.  It is equivalent to
+   a call to nsync_mu_wait_with_deadline() with abs_deadline==nsync_time_no_deadline, and
+   cancel_note==NULL.
+
+   Requires that *mu be held on entry.
+   Calls condition.eval() only with *mu held, though not always from the
+   calling thread.
+   See wait_with_deadline() for the restrictions on condition and performance
+   considerations. */
+void nsync_mu_wait (nsync_mu *mu, int (*condition) (const void *condition_arg),
+                    const void *condition_arg,
+		    int (*condition_arg_eq) (const void *a, const void *b)) {
+	if (nsync_mu_wait_with_deadline (mu, condition, condition_arg, condition_arg_eq,
+					 nsync_time_no_deadline, NULL) != 0) {
+		nsync_panic_ ("nsync_mu_wait woke but condition not true\n");
+	}
+}
+
+/* Unlock *mu, which must be held in write mode, and wake waiters, if
+   appropriate.  Unlike nsync_mu_unlock(), this call is not required to wake
+   nsync_mu_wait/nsync_mu_wait_with_deadline calls on conditions that were
+   false before this thread acquired the lock.  This call should be used only
+   at the end of critical sections for which:
+   - nsync_mu_wait/nsync_mu_wait_with_deadline are in use on the same mutex,
+   - this critical section cannot make the condition true for any of those
+     nsync_mu_wait/nsync_mu_wait_with_deadline waits, and
+   - when performance is significantly improved by doing so.  */
+void nsync_mu_unlock_without_wakeup (nsync_mu *mu) {
+	IGNORE_RACES_START ();
+	/* See comment in nsync_mu_unlock(). */
+	if (!ATM_CAS_REL (&mu->word, MU_WLOCK, 0)) {
+		uint32_t old_word = ATM_LOAD (&mu->word);
+		uint32_t new_word = old_word - MU_WLOCK;
+		if ((new_word & (MU_RLOCK_FIELD | MU_WLOCK)) != 0) {
+			if ((old_word & MU_RLOCK_FIELD) != 0) {
+				nsync_panic_ ("attempt to nsync_mu_unlock() an nsync_mu "
+					      "held in read mode\n");
+			} else {
+				nsync_panic_ ("attempt to nsync_mu_unlock() an nsync_mu "
+					      "not held in write mode\n");
+			}
+		} else if ((old_word & (MU_WAITING | MU_DESIG_WAKER | MU_ALL_FALSE)) ==
+			   MU_WAITING || !ATM_CAS_REL (&mu->word, old_word, new_word)) {
+			nsync_mu_unlock_slow_ (mu, nsync_writer_type_);
+		}
+	}
+	IGNORE_RACES_END ();
+}
+
+NSYNC_CPP_END_
diff --git a/libc/thread/note.c b/libc/thread/note.c
new file mode 100644
index 000000000..9b7299c90
--- /dev/null
+++ b/libc/thread/note.c
@@ -0,0 +1,303 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#include "libc/thread/nsync_cpp.h"
+#include "libc/thread/platform.h"
+#include "libc/thread/compiler.h"
+#include "libc/thread/cputype.h"
+#include "libc/thread/nsync.h"
+#include "libc/thread/dll.h"
+#include "libc/thread/sem.h"
+#include "libc/thread/wait_internal.h"
+#include "libc/thread/common.h"
+#include "libc/thread/atomic.h"
+
+NSYNC_CPP_START_
+
+/* Locking discipline for the nsync_note implementation:
+
+   Each nsync_note has a lock "note_mu" which protects the "parent" pointer,
+   "waiters" list, and "disconnecting" count.  It also protects the "children"
+   list; thus each node's "parent_child_link", which links together the
+   children of a single parent, is protected by the parent's "note_mu".
+
+   To connect a parent to a child, or to disconnect one, the parent's lock must
+   be held to manipulate its child list, and the child's lock must be held to
+   change the parent pointer, so both must be held simultaneously.
+   The locking order is "parent before child".
+
+   Operations like notify and free are given a node pointer n and must
+   disconnect *n from its parent n->parent.  The call must hold n->note_mu to
+   read n->parent, but need to release n->note_mu to acquire
+   n->parent->note_mu.  The parent could be disconnected and freed while
+   n->note_mu is not held.  The n->disconnecting count handles this; the
+   operation acquires n->note_mu, increments n->disconnecting, and can then
+   release n->note_mu, and acquire n->parent->note_mu and n->note_mu is the
+   correct order.  n->disconnecting!=0 indicates that a thread is already in
+   the processes of disconnecting n from n->parent.  A thread freeing or
+   notifying the parent should not perform the disconnection of that child, but
+   should instead wait for the "children" list to become empty via
+   WAIT_FOR_NO_CHILDREN().  WAKEUP_NO_CHILDREN() should be used whenever this
+   condition could become true.  */
+
+/* Set the expiry time in *n to t */
+static void set_expiry_time (nsync_note n, nsync_time t) {
+	n->expiry_time = t;
+	n->expiry_time_valid = 1;
+}
+
+/* Return a pointer to the note containing nsync_dll_element_ *e. */
+#define DLL_NOTE(e) ((nsync_note)((e)->container))
+
+/* Return whether n->children is empty.  Assumes n->note_mu held. */
+static int no_children (const void *v) {
+	return (nsync_dll_is_empty_ (((nsync_note)v)->children));
+}
+
+#define WAIT_FOR_NO_CHILDREN(pred_, n_) nsync_mu_wait (&(n_)->note_mu, &pred_, (n_), NULL)
+#define WAKEUP_NO_CHILDREN(n_) do { } while (0)
+
+/*
+// These lines can be used in place of those above if conditional critical
+// sections have been removed from the source.
+#define WAIT_FOR_NO_CHILDREN(pred_, n_) do { \
+		while (!pred_ (n_)) { nsync_cv_wait (&(n_)->no_children_cv, &(n_)->note_mu); } \
+	} while (0)
+#define WAKEUP_NO_CHILDREN(n_) nsync_cv_broadcast (&(n_)->no_children_cv)
+*/
+
+/* Notify *n and all its descendants that are not already disconnnecting.
+   n->note_mu is held.  May release and reacquire n->note_mu.
+   parent->note_mu is held if parent != NULL. */
+static void note_notify_child (nsync_note n, nsync_note parent) {
+	nsync_time t;
+	t = NOTIFIED_TIME (n);
+	if (nsync_time_cmp (t, nsync_time_zero) > 0) {
+		nsync_dll_element_ *p;
+		nsync_dll_element_ *next;
+		ATM_STORE_REL (&n->notified, 1);
+		while ((p = nsync_dll_first_ (n->waiters)) != NULL) {
+			struct nsync_waiter_s *nw = DLL_NSYNC_WAITER (p);
+			n->waiters = nsync_dll_remove_ (n->waiters, p);
+			ATM_STORE_REL (&nw->waiting, 0);
+			nsync_mu_semaphore_v (nw->sem);
+		}
+		for (p = nsync_dll_first_ (n->children); p != NULL; p = next) {
+			nsync_note child = DLL_NOTE (p);
+			next = nsync_dll_next_ (n->children, p);
+			nsync_mu_lock (&child->note_mu);
+			if (child->disconnecting == 0) {
+				note_notify_child (child, n);
+			}
+			nsync_mu_unlock (&child->note_mu);
+		}
+		WAIT_FOR_NO_CHILDREN (no_children, n);
+		if (parent != NULL) {
+			parent->children = nsync_dll_remove_ (parent->children,
+						              &n->parent_child_link);
+			WAKEUP_NO_CHILDREN (parent);
+			n->parent = NULL;
+		}
+	}
+}
+
+/* Notify *n and all its descendants that are not already disconnnecting.
+   No locks are held. */
+static void notify (nsync_note n) {
+	nsync_time t;
+	nsync_mu_lock (&n->note_mu);
+	t = NOTIFIED_TIME (n);
+	if (nsync_time_cmp (t, nsync_time_zero) > 0) {
+		nsync_note parent;
+		n->disconnecting++;
+		parent = n->parent;
+		if (parent != NULL && !nsync_mu_trylock (&parent->note_mu)) {
+			nsync_mu_unlock (&n->note_mu);
+			nsync_mu_lock (&parent->note_mu);
+			nsync_mu_lock (&n->note_mu);
+		}
+		note_notify_child (n, parent);
+		if (parent != NULL) {
+			nsync_mu_unlock (&parent->note_mu);
+		}
+		n->disconnecting--;
+	}
+	nsync_mu_unlock (&n->note_mu);
+}
+
+/* Return the deadline by which *n is certain to be notified,
+   setting it to zero if it already has passed that time.
+   Requires n->note_mu not held on entry.
+
+   Not static; used in sem_wait.c */
+nsync_time nsync_note_notified_deadline_ (nsync_note n) {
+	nsync_time ntime;
+	if (ATM_LOAD_ACQ (&n->notified) != 0) {
+		ntime = nsync_time_zero;
+	} else {
+		nsync_mu_lock (&n->note_mu);
+		ntime = NOTIFIED_TIME (n);
+		nsync_mu_unlock (&n->note_mu);
+		if (nsync_time_cmp (ntime, nsync_time_zero) > 0) {
+			if (nsync_time_cmp (ntime, nsync_time_now ()) <= 0) {
+				notify (n);
+				ntime = nsync_time_zero;
+			}
+		}
+	}
+	return (ntime);
+}
+
+int nsync_note_is_notified (nsync_note n) {
+	int result;
+	IGNORE_RACES_START ();
+	result = (nsync_time_cmp (nsync_note_notified_deadline_ (n), nsync_time_zero) <= 0);
+	IGNORE_RACES_END ();
+	return (result);
+}
+
+nsync_note nsync_note_new (nsync_note parent,
+			   nsync_time abs_deadline) {
+	nsync_note n = (nsync_note) malloc (sizeof (*n));
+	if (n != NULL) {
+		memset ((void *) n, 0, sizeof (*n));
+		nsync_dll_init_ (&n->parent_child_link, n);
+		set_expiry_time (n, abs_deadline);
+		if (!nsync_note_is_notified (n) && parent != NULL) {
+			nsync_time parent_time;
+			nsync_mu_lock (&parent->note_mu);
+			parent_time = NOTIFIED_TIME (parent);
+			if (nsync_time_cmp (parent_time, abs_deadline) < 0) {
+				set_expiry_time (n, parent_time);
+			}
+			if (nsync_time_cmp (parent_time, nsync_time_zero) > 0) {
+				n->parent = parent;
+				parent->children = nsync_dll_make_last_in_list_ (parent->children,
+					&n->parent_child_link);
+			}
+			nsync_mu_unlock (&parent->note_mu);
+		}
+	}
+	return (n);
+}
+
+void nsync_note_free (nsync_note n) {
+	nsync_note parent;
+	nsync_dll_element_ *p;
+	nsync_dll_element_ *next;
+	nsync_mu_lock (&n->note_mu);
+	n->disconnecting++;
+	ASSERT (nsync_dll_is_empty_ (n->waiters));
+	parent = n->parent;
+	if (parent != NULL && !nsync_mu_trylock (&parent->note_mu)) {
+		nsync_mu_unlock (&n->note_mu);
+		nsync_mu_lock (&parent->note_mu);
+		nsync_mu_lock (&n->note_mu);
+	}
+	for (p = nsync_dll_first_ (n->children); p != NULL; p = next) {
+		nsync_note child = DLL_NOTE (p);
+		next = nsync_dll_next_ (n->children, p);
+		nsync_mu_lock (&child->note_mu);
+		if (child->disconnecting == 0) {
+			n->children = nsync_dll_remove_ (n->children,
+							 &child->parent_child_link);
+			if (parent != NULL) {
+				child->parent = parent;
+				parent->children = nsync_dll_make_last_in_list_ (
+					parent->children, &child->parent_child_link);
+			} else {
+				child->parent = NULL;
+			}
+		}
+		nsync_mu_unlock (&child->note_mu);
+	}
+	WAIT_FOR_NO_CHILDREN (no_children, n);
+	if (parent != NULL) {
+		parent->children = nsync_dll_remove_ (parent->children,
+						      &n->parent_child_link);
+		WAKEUP_NO_CHILDREN (parent);
+		n->parent = NULL;
+		nsync_mu_unlock (&parent->note_mu);
+	}
+	n->disconnecting--;
+	nsync_mu_unlock (&n->note_mu);
+	free (n);
+}
+
+void nsync_note_notify (nsync_note n) {
+	IGNORE_RACES_START ();
+	if (nsync_time_cmp (nsync_note_notified_deadline_ (n), nsync_time_zero) > 0) {
+		notify (n);
+	}
+	IGNORE_RACES_END ();
+}
+
+int nsync_note_wait (nsync_note n, nsync_time abs_deadline) {
+	struct nsync_waitable_s waitable;
+	struct nsync_waitable_s *pwaitable = &waitable;
+	waitable.v = n;
+	waitable.funcs = &nsync_note_waitable_funcs;
+	return (nsync_wait_n (NULL, NULL, NULL, abs_deadline, 1, &pwaitable) == 0);
+}
+
+nsync_time nsync_note_expiry (nsync_note n) {
+	return (n->expiry_time);
+}
+
+static nsync_time note_ready_time (void *v, struct nsync_waiter_s *nw UNUSED) {
+	return (nsync_note_notified_deadline_ ((nsync_note)v));
+}
+
+static int note_enqueue (void *v, struct nsync_waiter_s *nw) {
+	int waiting = 0;
+	nsync_note n = (nsync_note) v;
+	nsync_time ntime;
+	nsync_mu_lock (&n->note_mu);
+	ntime = NOTIFIED_TIME (n);
+	if (nsync_time_cmp (ntime, nsync_time_zero) > 0) {
+		n->waiters = nsync_dll_make_last_in_list_ (n->waiters, &nw->q);
+		ATM_STORE (&nw->waiting, 1);
+		waiting = 1;
+	} else {
+		ATM_STORE (&nw->waiting, 0);
+		waiting = 0;
+	}
+	nsync_mu_unlock (&n->note_mu);
+	return (waiting);
+}
+
+static int note_dequeue (void *v, struct nsync_waiter_s *nw) {
+	int was_queued = 0;
+	nsync_note n = (nsync_note) v;
+	nsync_time ntime;
+	nsync_note_notified_deadline_ (n);
+	nsync_mu_lock (&n->note_mu);
+	ntime = NOTIFIED_TIME (n);
+	if (nsync_time_cmp (ntime, nsync_time_zero) > 0) {
+		n->waiters = nsync_dll_remove_ (n->waiters, &nw->q);
+		ATM_STORE (&nw->waiting, 0);
+		was_queued = 1;
+	}
+	nsync_mu_unlock (&n->note_mu);
+	return (was_queued);
+}
+
+const struct nsync_waitable_funcs_s nsync_note_waitable_funcs = {
+	&note_ready_time,
+	&note_enqueue,
+	&note_dequeue
+};
+
+NSYNC_CPP_END_
diff --git a/libc/thread/nsync.h b/libc/thread/nsync.h
new file mode 100644
index 000000000..ed4db3e41
--- /dev/null
+++ b/libc/thread/nsync.h
@@ -0,0 +1,28 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#ifndef NSYNC_PUBLIC_NSYNC_H_
+#define NSYNC_PUBLIC_NSYNC_H_
+
+#include "libc/thread/nsync_mu.h"
+#include "libc/thread/nsync_mu_wait.h"
+#include "libc/thread/nsync_cv.h"
+#include "libc/thread/nsync_note.h"
+#include "libc/thread/nsync_counter.h"
+#include "libc/thread/nsync_waiter.h"
+#include "libc/thread/nsync_once.h"
+#include "libc/thread/nsync_debug.h"
+
+#endif /*NSYNC_PUBLIC_NSYNC_H_*/
diff --git a/libc/thread/nsync_atomic.h b/libc/thread/nsync_atomic.h
new file mode 100644
index 000000000..46c04865c
--- /dev/null
+++ b/libc/thread/nsync_atomic.h
@@ -0,0 +1,67 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#ifndef NSYNC_PUBLIC_NSYNC_ATOMIC_H_
+#define NSYNC_PUBLIC_NSYNC_ATOMIC_H_
+
+#include "libc/thread/nsync_cpp.h"
+
+/* This file is not to be included directly by the client.  It exists because
+   on some platforms, one cannot use a simple uint32_t with atomic operations.
+   */
+#if NSYNC_ATOMIC_TYPECHECK
+#include "libc/fmt/conv.h"
+#include "libc/inttypes.h"
+NSYNC_CPP_START_
+typedef struct { uint32_t value; } nsync_atomic_uint32_;
+NSYNC_CPP_END_
+#define NSYNC_ATOMIC_UINT32_INIT_ { 0 }
+#define NSYNC_ATOMIC_UINT32_LOAD_(p) ((p)->value)
+#define NSYNC_ATOMIC_UINT32_STORE_(p,v) ((p)->value = (v))
+#define NSYNC_ATOMIC_UINT32_PTR_(p) (&(p)->value)
+
+#elif NSYNC_ATOMIC_C11
+#include "libc/intrin/atomic.h"
+NSYNC_CPP_START_
+typedef atomic_uint_least32_t nsync_atomic_uint32_;
+NSYNC_CPP_END_
+#define NSYNC_ATOMIC_UINT32_INIT_ 0
+#define NSYNC_ATOMIC_UINT32_LOAD_(p) (*(p))
+#define NSYNC_ATOMIC_UINT32_STORE_(p,v) (*(p) = (v))
+#define NSYNC_ATOMIC_UINT32_PTR_(p) (p)
+
+#elif NSYNC_ATOMIC_CPP11
+#include "third_party/libcxx/atomic"
+NSYNC_CPP_START_
+typedef std::atomic<uint32_t> nsync_atomic_uint32_;
+NSYNC_CPP_END_
+#define NSYNC_ATOMIC_UINT32_INIT_ ATOMIC_VAR_INIT (0)
+#define NSYNC_ATOMIC_UINT32_LOAD_(p) (std::atomic_load (p))
+#define NSYNC_ATOMIC_UINT32_STORE_(p,v) (std::atomic_store ((p), (uint32_t) (v)))
+#define NSYNC_ATOMIC_UINT32_PTR_(p) (p)
+
+#else
+#include "libc/fmt/conv.h"
+#include "libc/inttypes.h"
+NSYNC_CPP_START_
+typedef uint32_t nsync_atomic_uint32_;
+NSYNC_CPP_END_
+#define NSYNC_ATOMIC_UINT32_INIT_ 0
+#define NSYNC_ATOMIC_UINT32_LOAD_(p) (*(p))
+#define NSYNC_ATOMIC_UINT32_STORE_(p,v) (*(p) = (v))
+#define NSYNC_ATOMIC_UINT32_PTR_(p) (p)
+#endif
+
+#endif /*NSYNC_PUBLIC_NSYNC_ATOMIC_H_*/
diff --git a/libc/thread/nsync_counter.h b/libc/thread/nsync_counter.h
new file mode 100644
index 000000000..4a1910686
--- /dev/null
+++ b/libc/thread/nsync_counter.h
@@ -0,0 +1,64 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#ifndef NSYNC_PUBLIC_NSYNC_COUNTER_H_
+#define NSYNC_PUBLIC_NSYNC_COUNTER_H_
+
+#include "libc/fmt/conv.h"
+#include "libc/inttypes.h"
+#include "libc/thread/nsync_cpp.h"
+#include "libc/thread/nsync_mu.h"
+#include "libc/thread/nsync_atomic.h"
+#include "libc/thread/nsync_time.h"
+
+NSYNC_CPP_START_
+
+struct nsync_dll_element_s_;
+
+/* An nsync_counter represents an unsigned integer that can count up and down,
+   and wake waiters when zero.  */
+typedef struct nsync_counter_s_ *nsync_counter;
+
+/* Return a freshly allocated nsync_counter with the specified value,
+   of NULL if an nsync_counter cannot be created.
+
+   Any non-NULL returned value should be passed to nsync_counter_free() when no
+   longer needed.  */
+nsync_counter nsync_counter_new (uint32_t value);
+
+/* Free resources associated with c.  Requires that c was allocated by
+   nsync_counter_new(), and no concurrent or future operations are applied to
+   c.  */
+void nsync_counter_free (nsync_counter c);
+
+/* Add delta to c, and return its new value.  It is a checkable runtime error
+   to decrement c below 0, or to increment c (i.e., apply a delta > 0) after a
+   waiter has waited.  */
+uint32_t nsync_counter_add (nsync_counter c, int32_t delta);
+
+/* Return the current value of c.  */
+uint32_t nsync_counter_value (nsync_counter c);
+
+/* Wait until c has value 0, or until abs_deadline, then return
+   the value of c.  It is a checkable runtime error to increment c after
+   a waiter may have been woken due to the counter reaching zero.
+   If abs_deadline==nsync_time_no_deadline, the deadline
+   is far in the future. */
+uint32_t nsync_counter_wait (nsync_counter c, nsync_time abs_deadline);
+
+NSYNC_COUNTER_CPP_OVERLOAD_
+NSYNC_CPP_END_
+
+#endif /*NSYNC_PUBLIC_NSYNC_COUNTER_H_*/
diff --git a/libc/thread/nsync_cpp.h b/libc/thread/nsync_cpp.h
new file mode 100644
index 000000000..9ee463d94
--- /dev/null
+++ b/libc/thread/nsync_cpp.h
@@ -0,0 +1,46 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#ifndef NSYNC_PUBLIC_NSYNC_CPP_H_
+#define NSYNC_PUBLIC_NSYNC_CPP_H_
+
+/* This header file permits compilation via a C++ compiler using the macros
+   NSYNC_CPP_START_, NSYNC_CPP_END_, and NSYNC_CPP_USING_.
+
+   NSYNC_CPP_START_ and NSYNC_CPP_END_ surround C code in the public library.
+   They put all public symbols into the "nsync" name space.
+
+   NSYNC_CPP_USING_ is used before C code (used for testing) that might use
+   public exports from this package.  It makes symbols in the "nsync" 
+   name space available without the "nsync::" prefix.
+
+   NSYNC_C_START_ and NSYNC_C_END_ surround C code in the C++ modules.
+ */
+
+#if defined(__cplusplus)
+#define NSYNC_CPP_START_ namespace nsync {
+#define NSYNC_CPP_END_ }
+#define NSYNC_CPP_USING_ using namespace nsync;
+#define NSYNC_C_START_ extern "C" {
+#define NSYNC_C_END_ }
+#else
+#define NSYNC_CPP_START_
+#define NSYNC_CPP_END_
+#define NSYNC_CPP_USING_
+#define NSYNC_C_START_
+#define NSYNC_C_END_
+#endif
+
+#endif /*NSYNC_PUBLIC_NSYNC_CPP_H_*/
diff --git a/libc/thread/nsync_cv.h b/libc/thread/nsync_cv.h
new file mode 100644
index 000000000..a48c3482c
--- /dev/null
+++ b/libc/thread/nsync_cv.h
@@ -0,0 +1,150 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#ifndef NSYNC_PUBLIC_NSYNC_CV_H_
+#define NSYNC_PUBLIC_NSYNC_CV_H_
+
+#include "libc/fmt/conv.h"
+#include "libc/inttypes.h"
+#include "libc/thread/nsync_cpp.h"
+#include "libc/thread/nsync_mu.h"
+#include "libc/thread/nsync_atomic.h"
+#include "libc/thread/nsync_time.h"
+
+NSYNC_CPP_START_
+
+struct nsync_dll_element_s_;
+struct nsync_note_s_;
+
+/* An nsync_cv is a condition variable in the style of Mesa, Java, POSIX, and Go's sync.Cond.
+   It allows a thread to wait for a condition on state protected by a mutex,
+   and to proceed with the mutex held and the condition true.
+
+   See also nsync_mu_wait() and nsync_mu_wait_with_deadline(), which implement conditional
+   critical sections.  In many cases, they are easier to use than condition
+   variables.
+
+   Usage:
+
+   after making the desired predicate true, call:
+       nsync_cv_signal (&cv); // If at most one thread can make use of the predicate becoming true.
+   or
+       nsync_cv_broadcast (&cv); // If multiple threads can make use of the predicate becoming true.
+
+   To wait for a predicate with no deadline (assuming nsync_cv_broadcast() or
+   nsync_cv_signal() is called whenever the predicate becomes true):
+	nsync_mu_lock (&mu;)
+	while (!some_predicate_protected_by_mu) { // the while-loop is required.
+		nsync_cv_wait (&cv, &mu);
+	}
+	// predicate is now true
+	nsync_mu_unlock (&mu);
+
+   To wait for a predicate with a deadline (assuming nsync_cv_broadcast() or
+   nsync_cv_signal() is called whenever the predicate becomes true):
+	nsync_mu_lock (&mu);
+	while (!some_predicate_protected_by_mu &&
+	       nsync_cv_wait_with_deadline (&cv, &mu, abs_deadline, cancel_note) == 0) {
+	}
+	if (some_predicate_protected_by_mu) { // predicate is true
+	} else { // predicate is false, and deadline expired, or cancel_note was notified.
+	}
+	nsync_mu_unlock (&mu);
+   or, if the predicate is complex and you wish to write it just once and
+   inline, you could use the following instead of the for-loop above:
+	nsync_mu_lock (&mu);
+	int pred_is_true = 0;
+	int outcome = 0;
+	while (!(pred_is_true = some_predicate_protected_by_mu) && outcome == 0) {
+		outcome = nsync_cv_wait_with_deadline (&cv, &mu, abs_deadline, cancel_note);
+	}
+	if (pred_is_true) { // predicate is true
+	} else { // predicate is false, and deadline expired, or cancel_note was notified.
+	}
+	nsync_mu_unlock (&mu);
+
+   As the examples show, Mesa-style condition variables require that waits use
+   a loop that tests the predicate anew after each wait.  It may be surprising
+   that these are preferred over the precise wakeups offered by the condition
+   variables in Hoare monitors.  Imprecise wakeups make more efficient use of
+   the critical section, because threads can enter it while a woken thread is
+   still emerging from the scheduler, which may take thousands of cycles.
+   Further, they make the programme easier to read and debug by making the
+   predicate explicit locally at the wait, where the predicate is about to be
+   assumed; the reader does not have to infer the predicate by examining all
+   the places where wakeups may occur. */
+typedef struct nsync_cv_s_ {
+	nsync_atomic_uint32_ word;  /* see bits below */
+	struct nsync_dll_element_s_ *waiters; /* points to tail of list of waiters; under mu. */
+} nsync_cv;
+
+/* An nsync_cv should be zeroed to initialize, which can be accomplished by
+   initializing with static initializer NSYNC_CV_INIT, or by setting the entire
+   struct to 0, or using nsync_cv_init().  */
+#define NSYNC_CV_INIT { NSYNC_ATOMIC_UINT32_INIT_, 0 }
+void nsync_cv_init (nsync_cv *cv);
+
+/* Wake at least one thread if any are currently blocked on *cv.  If
+   the chosen thread is a reader on an nsync_mu, wake all readers and, if
+   possible, a writer. */
+void nsync_cv_signal (nsync_cv *cv);
+
+/* Wake all threads currently blocked on *cv. */
+void nsync_cv_broadcast (nsync_cv *cv);
+
+/* Atomically release "mu" (which must be held on entry) and block the caller
+   on *cv.  Wait until awakened by a call to nsync_cv_signal() or
+   nsync_cv_broadcast(), or a spurious wakeup; then reacquire "mu", and return.
+   Equivalent to a call to nsync_mu_wait_with_deadline() with
+   abs_deadline==nsync_time_no_deadline, and cancel_note==NULL.  Callers should use
+   nsync_cv_wait() in a loop, as with all standard Mesa-style condition
+   variables.  See examples above.  */
+void nsync_cv_wait (nsync_cv *cv, nsync_mu *mu);
+
+/* Atomically release "mu" (which must be held on entry)
+   and block the calling thread on *cv.  It then waits until awakened by a
+   call to nsync_cv_signal() or nsync_cv_broadcast() (or a spurious wakeup), or by the time
+   reaching abs_deadline, or by cancel_note being notified.  In all cases, it
+   reacquires "mu", and returns the reason for the call returned (0, ETIMEDOUT,
+   or ECANCELED).  Use abs_deadline==nsync_time_no_deadline for no deadline, and
+   cancel_note==NULL for no cancellation.  wait_with_deadline() should be used in a
+   loop, as with all Mesa-style condition variables.  See examples above.
+
+   There are two reasons for using an absolute deadline, rather than a relative
+   timeout---these are why pthread_cond_timedwait() also uses an absolute
+   deadline.  First, condition variable waits have to be used in a loop; with
+   an absolute times, the deadline does not have to be recomputed on each
+   iteration.  Second, in most real programmes, some activity (such as an RPC
+   to a server, or when guaranteeing response time in a UI), there is a
+   deadline imposed by the specification or the caller/user; relative delays
+   can shift arbitrarily with scheduling delays, and so after multiple waits
+   might extend beyond the expected deadline.  Relative delays tend to be more
+   convenient mostly in tests and trivial examples than they are in real
+   programmes. */
+int nsync_cv_wait_with_deadline (nsync_cv *cv, nsync_mu *mu,
+                                 nsync_time abs_deadline,
+				 struct nsync_note_s_ *cancel_note);
+
+/* Like nsync_cv_wait_with_deadline(), but allow an arbitrary lock *v to be used,
+   given its (*lock)(mu) and (*unlock)(mu) routines.  */
+int nsync_cv_wait_with_deadline_generic (nsync_cv *cv,
+				   void *mu, void (*lock) (void *), void (*unlock) (void *),
+				   nsync_time abs_deadline,
+				   struct nsync_note_s_ *cancel_note);
+
+NSYNC_CV_CPP_OVERLOAD_
+NSYNC_CPP_END_
+
+#endif /*NSYNC_PUBLIC_NSYNC_CV_H_*/
diff --git a/libc/thread/nsync_debug.h b/libc/thread/nsync_debug.h
new file mode 100644
index 000000000..d28184306
--- /dev/null
+++ b/libc/thread/nsync_debug.h
@@ -0,0 +1,55 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#ifndef NSYNC_PUBLIC_NSYNC_DEBUG_H_
+#define NSYNC_PUBLIC_NSYNC_DEBUG_H_
+
+/* Debugging operations for mutexes and condition variables.
+
+   These operations should not be relied upon for normal functionality.  The
+   implementation may be slow, output formats may change, and the
+   implementation is free to yield the empty string.  */
+
+#include "libc/thread/nsync_cpp.h"
+#include "libc/thread/nsync_mu.h"
+#include "libc/thread/nsync_cv.h"
+
+NSYNC_CPP_START_
+
+/* Place in buf[0,..,n-1] a nul-terminated, human readable string indicative of
+   some of the internal state of the mutex or condition variable, and return
+   buf.  If n>=4, buffer overflow is indicated by placing the characters "..."
+   at the end of the string.
+
+   The *_and_waiters() variants attempt to output the waiter lists in addition
+   to the basic state.  These variants may acquire internal locks and follow
+   internal pointers.  Thus, they are riskier if invoked in an address space
+   whose overall health is uncertain.  */
+char *nsync_mu_debug_state (nsync_mu *mu, char *buf, int n);
+char *nsync_cv_debug_state (nsync_cv *cv, char *buf, int n);
+char *nsync_mu_debug_state_and_waiters (nsync_mu *mu, char *buf, int n);
+char *nsync_cv_debug_state_and_waiters (nsync_cv *cv, char *buf, int n);
+
+/* Like nsync_*_debug_state_and_waiters(), but ignoring all locking and safety
+   considerations, and using an internal, possibly static buffer that may be
+   overwritten by subsequent or concurrent calls to these routines.  These
+   variants should be used only from an interactive debugger, when all other
+   threads are stopped; the debugger is expected to recover from errors.  */
+char *nsync_mu_debugger (nsync_mu *mu);
+char *nsync_cv_debugger (nsync_cv *cv);
+
+NSYNC_CPP_END_
+
+#endif /*NSYNC_PUBLIC_NSYNC_DEBUG_H_*/
diff --git a/libc/thread/nsync_mu.h b/libc/thread/nsync_mu.h
new file mode 100644
index 000000000..958c77ee8
--- /dev/null
+++ b/libc/thread/nsync_mu.h
@@ -0,0 +1,115 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#ifndef NSYNC_PUBLIC_NSYNC_MU_H_
+#define NSYNC_PUBLIC_NSYNC_MU_H_
+
+#include "libc/fmt/conv.h"
+#include "libc/inttypes.h"
+#include "libc/thread/nsync_cpp.h"
+#include "libc/thread/nsync_atomic.h"
+
+NSYNC_CPP_START_
+
+struct nsync_dll_element_s_;
+
+/* An nsync_mu is a lock.  If initialized to all zeroes, it is valid and unlocked.
+
+   An nsync_mu can be "free", held by a single thread (aka fiber, goroutine) in
+   "write" (exclusive) mode, or by many threads in "read" (shared) mode.  A
+   thread that acquires it should eventually release it.  It is illegal to
+   acquire an nsync_mu in one thread and release it in another.  It is
+   illegal for a thread to reacquire an nsync_mu while holding it (even a
+   second share of a "read" lock).
+
+   Example usage:
+	static struct foo {
+		nsync_mu mu; // protects invariant a+b==0 on fields below.
+		int a;
+		int b;
+	} p = { NSYNC_MU_INIT, 0, 0 };
+	....
+	nsync_mu_lock (&p.mu);
+	// The current thread now has exclusive access to p.a and p.b; invariant assumed true.
+	p.a++;
+	p.b--; // restore invariant p.a+p.b==0 before releasing p.mu
+	nsync_mu_unlock (&p.mu)
+
+   Mutexes can be used with condition variables; see nsync_cv.h.
+
+   nsync_mu_wait() and nsync_mu_wait_with_deadline() can be used instead of
+   condition variables.  See nsync_mu_wait.h for more details.
+   Example use of nsync_mu_wait() to wait for p.a==0, using definition above:
+	int a_is_zero (const void *condition_arg) {
+		return (((const struct foo *)condition_arg)->a == 0);
+	}
+	...
+	nsync_mu_lock (&p.mu);
+	nsync_mu_wait (&p.mu, &a_is_zero, &p, NULL);
+	// The current thread now has exclusive access to p.a and p.b, and p.a==0.
+	...
+	nsync_mu_unlock (&p.mu); */
+typedef struct nsync_mu_s_ {
+	nsync_atomic_uint32_ word; /* internal use only */
+	struct nsync_dll_element_s_ *waiters; /* internal use only */
+} nsync_mu;
+
+/* An nsync_mu should be zeroed to initialize, which can be accomplished by
+   initializing with static initializer NSYNC_MU_INIT, or by setting the entire
+   structure to all zeroes, or using nsync_mu_init().  */
+#define NSYNC_MU_INIT { NSYNC_ATOMIC_UINT32_INIT_, 0 }
+void nsync_mu_init (nsync_mu *mu);
+
+/* Block until *mu is free and then acquire it in writer mode.
+   Requires that the calling thread not already hold *mu in any mode.  */
+void nsync_mu_lock (nsync_mu *mu);
+
+/* Unlock *mu, which must have been acquired in write mode by the calling
+   thread, and wake waiters, if appropriate.  */
+void nsync_mu_unlock (nsync_mu *mu);
+
+/* Attempt to acquire *mu in writer mode without blocking, and return non-zero
+   iff successful.  Return non-zero with high probability if *mu was free
+   on entry.  */
+int nsync_mu_trylock (nsync_mu *mu);
+
+/* Block until *mu can be acquired in reader mode and then acquire it.
+   Requires that the calling thread not already hold *mu in any mode. */
+void nsync_mu_rlock (nsync_mu *mu);
+
+/* Unlock *mu, which must have been acquired in read mode by the calling
+   thread, and wake waiters, if appropriate.  */
+void nsync_mu_runlock (nsync_mu *mu);
+
+/* Attempt to acquire *mu in reader mode without blocking, and return non-zero
+   iff successful.  Return non-zero with high probability if *mu was free on
+   entry.  Perhaps fail to acquire if a writer is waiting, to avoid starvation.
+   */
+int nsync_mu_rtrylock (nsync_mu *mu);
+
+/* May abort if *mu is not held in write mode by the calling thread. */
+void nsync_mu_assert_held (const nsync_mu *mu);
+
+/* May abort if *mu is not held in read or write mode
+   by the calling thread.  */
+void nsync_mu_rassert_held (const nsync_mu *mu);
+
+/* Return whether *mu is held in read mode.
+   Requires that the calling thread holds *mu in some mode. */
+int nsync_mu_is_reader (const nsync_mu *mu);
+
+NSYNC_CPP_END_
+
+#endif /*NSYNC_PUBLIC_NSYNC_MU_H_*/
diff --git a/libc/thread/nsync_mu_wait.h b/libc/thread/nsync_mu_wait.h
new file mode 100644
index 000000000..f3432271f
--- /dev/null
+++ b/libc/thread/nsync_mu_wait.h
@@ -0,0 +1,129 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#ifndef NSYNC_PUBLIC_NSYNC_MU_WAIT_H_
+#define NSYNC_PUBLIC_NSYNC_MU_WAIT_H_
+
+/* nsync_mu_wait() and nsync_mu_wait_with_deadline() can be used instead of condition
+   variables.  In many straightforward situations they are of equivalent
+   performance and are somewhat easier to use, because unlike condition
+   variables, they do not require that the waits be placed in a loop, and they
+   do not require explicit wakeup calls.  Example:
+
+   Definitions:
+      static nsync_mu mu = NSYNC_MU_INIT;
+      static int i = 0;  // protected by mu
+      // Condition for use with nsync_mu_wait().
+      static int int_is_zero (const void *v) { return (*(const int *)v == 0); }
+
+   Waiter:
+      nsync_mu_lock (&mu);
+      // Wait until i is zero.
+      nsync_mu_wait (&mu, &int_is_zero, &i, NULL);
+      // i is known to be zero here.
+      // ...
+      nsync_mu_unlock (&mu);
+
+
+   Thread potentially making i zero:
+      nsync_mu_lock (&mu);
+      i--;
+      // No need to signal that i may have become zero.  The unlock call below
+      // will evaluate waiters' conditions to decide which to wake.
+      nsync_mu_unlock (&mu);
+
+   It is legal to use conditional critical sections and condition variables 
+   on the same mutex.
+
+                              --------------
+
+   The implementation benefits from determining whether waiters are waiting for
+   the same condition; it may then evaluate a condition once on behalf
+   of several waiters.  Two waiters have equal condition if their "condition"
+   pointers are equal, and either:
+   - their "condition_arg" pointers are equal, or
+   - "condition_arg_eq" is non-null and
+     (*condition_arg_eq) (condition_arg0, condition_arg1) returns non-zero.
+   *condition_arg_eq will not be invoked unless the "condition" pointers
+   are equal, and the "condition_arg" pointers are unequal.
+
+   If many waiters wait for distinct conditions simultaneously, condition
+   variables may be faster.
+ */
+
+#include "libc/thread/nsync_cpp.h"
+#include "libc/thread/nsync_mu.h"
+#include "libc/thread/nsync_time.h"
+
+NSYNC_CPP_START_
+
+struct nsync_note_s_; /* forward declaration for an nsync_note */
+
+/* Return when (*condition) (condition_arg) is true.  Perhaps unlock and relock
+   *mu while blocked waiting for the condition to become true.  nsync_mu_wait()
+   is equivalent to nsync_mu_wait_with_deadline() with
+   abs_deadline==nsync_time_no_deadline, and cancel_note==NULL.
+
+   Requires that *mu be held on entry.
+   See nsync_mu_wait_with_deadline() for more details on *condition and
+   *condition_arg_eq.  */
+void nsync_mu_wait (nsync_mu *mu, int (*condition) (const void *condition_arg),
+		    const void *condition_arg,
+		    int (*condition_arg_eq) (const void *a, const void *b));
+
+/* Return when at least one of:  (*condition) (condition_arg) is true, the
+   deadline expires, or *cancel_note is notified.  Perhaps unlock and relock *mu
+   while blocked waiting for one of these events, but always return with *mu
+   held.  Return 0 iff the (*condition) (condition_arg) is true on return, and
+   otherwise either ETIMEDOUT or ECANCELED, depending on why the call returned
+   early.  Callers should use abs_deadline==nsync_time_no_deadline for no
+   deadline, and cancel_note==NULL for no cancellation.
+
+   Requires that *mu be held on entry.
+
+   The implementation may call *condition from any thread using the mutex, and 
+   while holding *mu in either read or write mode; it guarantees that any
+   thread calling *condition will hold *mu in some mode.
+   Requires that (*condition) (condition_arg) neither modify state protected by
+   *mu, nor return a value dependent on state not protected by *mu.  To depend
+   on time, use the abs_deadline parameter.
+   (Conventional use of condition variables have the same restrictions on the
+   conditions tested by the while-loop.)
+   If non-null, condition_arg_eq should return whether two condition_arg
+   calls with the same "condition" pointer are considered equivalent; it should
+   have no side-effects.  */
+int nsync_mu_wait_with_deadline (nsync_mu *mu,
+				 int (*condition) (const void *condition_arg),
+				 const void *condition_arg,
+				 int (*condition_arg_eq) (const void *a, const void *b),
+				 nsync_time abs_deadline,
+				 struct nsync_note_s_ *cancel_note);
+
+/* Unlock *mu, which must be held in write mode, and wake waiters, if
+   appropriate.  Unlike nsync_mu_unlock(), this call is not required to wake
+   nsync_mu_wait/nsync_mu_wait_with_deadline calls on conditions that were
+   false before this thread acquired the lock.  This call should be used only
+   at the end of critical sections for which:
+   - nsync_mu_wait and/or nsync_mu_wait_with_deadline are in use on the same
+     mutex,
+   - this critical section cannot make the condition true for any of those
+     nsync_mu_wait/nsync_mu_wait_with_deadline waits, and
+   - when performance is significantly improved by using this call. */
+void nsync_mu_unlock_without_wakeup (nsync_mu *mu);
+
+NSYNC_MU_WAIT_CPP_OVERLOAD_
+NSYNC_CPP_END_
+
+#endif /*NSYNC_PUBLIC_NSYNC_MU_WAIT_H_*/
diff --git a/libc/thread/nsync_note.h b/libc/thread/nsync_note.h
new file mode 100644
index 000000000..ea4add583
--- /dev/null
+++ b/libc/thread/nsync_note.h
@@ -0,0 +1,68 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#ifndef NSYNC_PUBLIC_NSYNC_NOTE_H_
+#define NSYNC_PUBLIC_NSYNC_NOTE_H_
+
+#include "libc/thread/nsync_cpp.h"
+#include "libc/thread/nsync_time.h"
+
+NSYNC_CPP_START_
+
+/* An nsync_note represents a single bit that can transition from 0 to 1 at
+   most once.  When 1, the note is said to be notified.  There are operations
+   to wait for the transition, which can be triggered either by an explicit
+   call, or timer expiry.  Notes can have parent notes; a note becomes notified
+   if its parent becomes notified.  */
+typedef struct nsync_note_s_ *nsync_note;
+
+/* Return a freshly allocated nsync_note, or NULL if an nsync_note cannot be
+   created.
+
+   If parent!=NULL, the allocated nsync_note's parent will be parent.  The
+   newaly allocated note will be automatically notified at abs_deadline, and is
+   notified at initialization if abs_deadline==nsync_zero_time.
+
+   nsync_notes should be passed to nsync_note_free() when no longer needed.  */
+nsync_note nsync_note_new (nsync_note parent, nsync_time abs_deadline);
+
+/* Free resources associated with n.  Requires that n was allocated by
+   nsync_note_new(), and no concurrent or future operations are applied to n
+   directly.
+   It is legal to call nsync_note_free() on a node even if it has a parent or
+   children that are in use; if n has both a parent and children, n's
+   parent adopts its children.  */
+void nsync_note_free (nsync_note n);
+
+/* Notify n and all its descendants. */
+void nsync_note_notify (nsync_note n);
+
+/* Return whether n has been notified.  */
+int nsync_note_is_notified (nsync_note n);
+
+/* Wait until n has been notified or abs_deadline is reached, and return
+   whether n has been notified.  If abs_deadline==nsync_time_no_deadline,
+   the deadline is far in the future. */
+int nsync_note_wait (nsync_note n, nsync_time abs_deadline);
+
+/* Return the expiry time associated with n.
+   This is the minimum of the abs_deadline passed on creation and that of any
+   of its ancestors.  */
+nsync_time nsync_note_expiry (nsync_note n);
+
+NSYNC_NOTE_CPP_OVERLOAD_
+NSYNC_CPP_END_
+
+#endif /*NSYNC_PUBLIC_NSYNC_NOTE_H_*/
diff --git a/libc/thread/nsync_once.h b/libc/thread/nsync_once.h
new file mode 100644
index 000000000..5143a82e8
--- /dev/null
+++ b/libc/thread/nsync_once.h
@@ -0,0 +1,51 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#ifndef NSYNC_PUBLIC_NSYNC_ONCE_H_
+#define NSYNC_PUBLIC_NSYNC_ONCE_H_
+
+#include "libc/fmt/conv.h"
+#include "libc/inttypes.h"
+#include "libc/thread/nsync_cpp.h"
+#include "libc/thread/nsync_atomic.h"
+
+NSYNC_CPP_START_
+
+/* An nsync_once allows a function to be called exactly once, when first referenced. */
+typedef nsync_atomic_uint32_ nsync_once;
+
+/* An initializer for nsync_once; it is guaranteed to be all zeroes. */
+#define NSYNC_ONCE_INIT NSYNC_ATOMIC_UINT32_INIT_
+
+/* The first time nsync_run_once() or nsync_run_once_arg() is applied to *once,
+   the supplied function is run (with argument, in the case of nsync_run_once_arg()).
+   Other callers will wait until the run of the function is complete, and then
+   return without running the function again. */
+void nsync_run_once (nsync_once *once, void (*f) (void));
+void nsync_run_once_arg (nsync_once *once, void (*farg) (void *arg), void *arg);
+
+/* Same as nsync_run_once()/nsync_run_once_arg() but uses a spinloop.
+   Can be used on the same nsync_once as nsync_run_once/nsync_run_once_arg().
+
+   These *_spin variants should be used only in contexts where normal blocking
+   is disallowed, such as within user-space schedulers, when the runtime is
+   not fully initialized, etc.  They provide no significant performance benefit,
+   and they should be avoided in normal code. */
+void nsync_run_once_spin (nsync_once *once, void (*f) (void));
+void nsync_run_once_arg_spin (nsync_once *once, void (*farg) (void *arg), void *arg);
+
+NSYNC_CPP_END_
+
+#endif /*NSYNC_PUBLIC_NSYNC_ONCE_H_*/
diff --git a/libc/thread/nsync_panic.c b/libc/thread/nsync_panic.c
new file mode 100644
index 000000000..a0b144ccc
--- /dev/null
+++ b/libc/thread/nsync_panic.c
@@ -0,0 +1,42 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#include "libc/thread/headers.h"
+
+NSYNC_CPP_START_
+
+/* Write the nul-terminated string s[] to file descriptor fd. */
+static void writestr (int fd, const char *s) {
+        int len = strlen (s);
+        int n = 0;
+        while (len != 0 && n >= 0) {
+                n = write (fd, s, len);
+                if (n >= 0) {
+                        len -= n;
+                        s += n;
+                } else if (n == -1 && errno == EINTR) {
+                        n = 0;
+                }
+        }
+}
+
+/* Abort after printing the nul-terminated string s[]. */
+void nsync_panic_ (const char *s) {
+        writestr (2, "panic: ");
+        writestr (2, s);
+        abort ();
+}
+
+NSYNC_CPP_END_
diff --git a/libc/thread/nsync_semaphore_futex.c b/libc/thread/nsync_semaphore_futex.c
new file mode 100644
index 000000000..ac54ae3ce
--- /dev/null
+++ b/libc/thread/nsync_semaphore_futex.c
@@ -0,0 +1,132 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#include "libc/thread/headers.h"
+
+NSYNC_CPP_START_
+
+static int futex (int *uaddr, int op, int val, const struct timespec *timeout, int *uaddr2,
+		  int val3) {
+	return (syscall (__NR_futex, uaddr, op, val, timeout, uaddr2, val3));
+}
+
+/* Check that atomic operations on nsync_atomic_uint32_ can be applied to int. */
+static const int assert_int_size = 1 /
+	(sizeof (assert_int_size) == sizeof (uint32_t) &&
+	 sizeof (nsync_atomic_uint32_) == sizeof (uint32_t));
+
+#if defined(FUTEX_PRIVATE_FLAG)
+#define FUTEX_PRIVATE_FLAG_ FUTEX_PRIVATE_FLAG
+#else
+#define FUTEX_PRIVATE_FLAG_ 0
+#endif
+
+#if defined(FUTEX_WAIT_BITSET)
+#define FUTEX_WAIT_ (FUTEX_WAIT_BITSET | FUTEX_PRIVATE_FLAG_ | FUTEX_CLOCK_REALTIME)
+#define FUTEX_WAIT_BITS_ FUTEX_BITSET_MATCH_ANY
+#else
+#define FUTEX_WAIT_ (FUTEX_WAIT | FUTEX_PRIVATE_FLAG_)
+#define FUTEX_WAIT_BITS_ 0
+#endif
+#define FUTEX_WAKE_ (FUTEX_WAKE | FUTEX_PRIVATE_FLAG_)
+#define FUTEX_TIMEOUT_IS_ABSOLUTE (FUTEX_WAIT_BITS_ != 0)
+
+#define ASSERT(x) do { if (!(x)) { *(volatile int *)0 = 0; } } while (0)
+
+struct futex {
+	int i;  /* lo half=count; hi half=waiter count */
+};
+
+static nsync_semaphore *sem_big_enough_for_futex = (nsync_semaphore *) (uintptr_t)(1 /
+	(sizeof (struct futex) <= sizeof (*sem_big_enough_for_futex)));
+
+/* Initialize *s; the initial value is 0. */
+void nsync_mu_semaphore_init (nsync_semaphore *s) {
+	struct futex *f = (struct futex *) s;
+	f->i = 0;
+}
+
+/* Wait until the count of *s exceeds 0, and decrement it. */
+void nsync_mu_semaphore_p (nsync_semaphore *s) {
+	struct futex *f = (struct futex *) s;
+	int i;
+	do {
+		i = ATM_LOAD ((nsync_atomic_uint32_ *) &f->i);
+		if (i == 0) {
+                        int futex_result = futex (&f->i, FUTEX_WAIT_, i, NULL,
+                                                  NULL, FUTEX_WAIT_BITS_);
+			ASSERT (futex_result == 0 || errno == EINTR ||
+				errno == EWOULDBLOCK);
+		}
+	} while (i == 0 || !ATM_CAS_ACQ ((nsync_atomic_uint32_ *) &f->i, i, i-1));
+}
+
+/* Wait until one of:
+   the count of *s is non-zero, in which case decrement *s and return 0;
+   or abs_deadline expires, in which case return ETIMEDOUT. */
+int nsync_mu_semaphore_p_with_deadline (nsync_semaphore *s, nsync_time abs_deadline) {
+	struct futex *f = (struct futex *)s;
+	int i;
+	int result = 0;
+	do {
+		i = ATM_LOAD ((nsync_atomic_uint32_ *) &f->i);
+		if (i == 0) {
+			int futex_result;
+			struct timespec ts_buf;
+			const struct timespec *ts = NULL;
+			if (nsync_time_cmp (abs_deadline, nsync_time_no_deadline) != 0) {
+				memset (&ts_buf, 0, sizeof (ts_buf));
+				if (FUTEX_TIMEOUT_IS_ABSOLUTE) {
+					ts_buf.tv_sec = NSYNC_TIME_SEC (abs_deadline);
+					ts_buf.tv_nsec = NSYNC_TIME_NSEC (abs_deadline);
+				} else {
+					nsync_time now;
+					now = nsync_time_now ();
+					if (nsync_time_cmp (now, abs_deadline) > 0) {
+						ts_buf.tv_sec = 0;
+						ts_buf.tv_nsec = 0;
+					} else {
+						nsync_time rel_deadline;
+						rel_deadline = nsync_time_sub (abs_deadline, now);
+						ts_buf.tv_sec = NSYNC_TIME_SEC (rel_deadline);
+						ts_buf.tv_nsec = NSYNC_TIME_NSEC (rel_deadline);
+					}
+				}
+				ts = &ts_buf;
+			}
+			futex_result = futex (&f->i, FUTEX_WAIT_, i, ts, NULL, FUTEX_WAIT_BITS_);
+			ASSERT (futex_result == 0 || errno == EINTR || errno == EWOULDBLOCK ||
+				errno == ETIMEDOUT);
+			/* Some systems don't wait as long as they are told. */ 
+			if (futex_result == -1 && errno == ETIMEDOUT &&
+			    nsync_time_cmp (abs_deadline, nsync_time_now ()) <= 0) {
+				result = ETIMEDOUT;
+			}
+		}
+	} while (result == 0 && (i == 0 || !ATM_CAS_ACQ ((nsync_atomic_uint32_ *) &f->i, i, i - 1)));
+	return (result);
+}
+
+/* Ensure that the count of *s is at least 1. */
+void nsync_mu_semaphore_v (nsync_semaphore *s) {
+	struct futex *f = (struct futex *) s;
+        uint32_t old_value;
+        do {    
+                old_value = ATM_LOAD ((nsync_atomic_uint32_ *) &f->i);
+        } while (!ATM_CAS_REL ((nsync_atomic_uint32_ *) &f->i, old_value, old_value+1));
+	ASSERT (futex (&f->i, FUTEX_WAKE_, 1, NULL, NULL, 0) >= 0);
+}
+
+NSYNC_CPP_END_
diff --git a/libc/thread/nsync_time.h b/libc/thread/nsync_time.h
new file mode 100644
index 000000000..37cddf526
--- /dev/null
+++ b/libc/thread/nsync_time.h
@@ -0,0 +1,62 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#ifndef NSYNC_PUBLIC_NSYNC_TIME_H_
+#define NSYNC_PUBLIC_NSYNC_TIME_H_
+
+#include "libc/thread/nsync_cpp.h"
+#include "libc/thread/nsync_time_internal.h"
+
+/* The type nsync_time represents the interval elapsed between two moments in
+   time.  Often the first such moment is an address-space-wide epoch, such as
+   the Unix epoch, but clients should not rely on the epoch in one address
+   space being the same as that in another.  Intervals relative to the epoch
+   are known as absolute times.
+
+   The internals of nsync_time should be treated as opaque by clients.
+   See nsync_time_internal.h. */
+
+NSYNC_CPP_START_
+
+extern const nsync_time nsync_time_no_deadline; /* A deadline infinitely far in the future. */
+extern const nsync_time nsync_time_zero;  /* The zero delay, or an expired deadline. */
+
+nsync_time nsync_time_now (void); /* Return the current time since the epoch.  */
+
+/* Sleep for the specified delay.  Returns the unslept time
+   which may be non-zero if the call was interrupted. */
+nsync_time nsync_time_sleep (nsync_time delay);
+
+/* Return a+b */
+nsync_time nsync_time_add (nsync_time a, nsync_time b);
+
+/* Return a-b */
+nsync_time nsync_time_sub (nsync_time a, nsync_time b);
+
+/*  Return +ve, 0, or -ve according to whether a>b, a==b, or a<b. */
+int nsync_time_cmp (nsync_time a, nsync_time b);
+
+/* Return the specified number of milliseconds as a time. */
+nsync_time nsync_time_ms (unsigned ms);
+
+/* Return the specified number of microseconds as a time. */
+nsync_time nsync_time_us (unsigned us);
+
+/* Return an nsync_time constructed from second and nanosecond components */
+nsync_time nsync_time_s_ns (time_t s, unsigned ns);
+
+NSYNC_CPP_END_
+
+#endif /*NSYNC_PUBLIC_NSYNC_TIME_H_*/
diff --git a/libc/thread/nsync_time_init.h b/libc/thread/nsync_time_init.h
new file mode 100644
index 000000000..ffab2a468
--- /dev/null
+++ b/libc/thread/nsync_time_init.h
@@ -0,0 +1,21 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#ifndef NSYNC_PLATFORM_POSIX_NSYNC_TIME_INIT_H_
+#define NSYNC_PLATFORM_POSIX_NSYNC_TIME_INIT_H_
+
+#define NSYNC_TIME_STATIC_INIT(t,ns) { (t), (ns) }
+
+#endif /*NSYNC_PLATFORM_POSIX_NSYNC_TIME_INIT_H_*/
diff --git a/libc/thread/nsync_time_internal.h b/libc/thread/nsync_time_internal.h
new file mode 100644
index 000000000..ff11ae0be
--- /dev/null
+++ b/libc/thread/nsync_time_internal.h
@@ -0,0 +1,215 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#ifndef NSYNC_PUBLIC_NSYNC_TIME_INTERNAL_H_
+#define NSYNC_PUBLIC_NSYNC_TIME_INTERNAL_H_
+
+#include "libc/thread/nsync_cpp.h"
+
+/* Internal details of the implementation of the type nsync_time.
+
+   The type nsync_time can have different implementations on different
+   platforms, because the world has many different representations of time.
+   Further, the "epoch" of absolute times can vary from address space to
+   address space.
+
+   On monotonic clocks:  In our testing, we found that the monotonic clock on
+   various popular systems (such as Linux, and some BSD variants) was no better
+   behaved than the realtime clock, and routinely took large steps backwards,
+   especially on multiprocessors.  Given that "monotonic" doesn't seem to mean
+   what it says, implementers of nsync_time might consider retaining the
+   simplicity of a single epoch within an address space, by configuring any
+   time synchronization mechanism (like ntp) to adjust for leap seconds by
+   adjusting the rate, rather than with a backwards step.  */
+
+#if NSYNC_USE_GPR_TIMESPEC
+// MISSING #include "grpc/support/time.h"
+NSYNC_CPP_START_
+typedef gpr_timespec nsync_time;
+#define NSYNC_TIME_SEC(t) ((t).tv_sec)
+#define NSYNC_TIME_NSEC(t) ((t).tv_nsec)
+NSYNC_CPP_END_
+
+#elif defined(NSYNC_USE_INT_TIME)
+#include "libc/calls/struct/timespec.h"
+#include "libc/calls/struct/timeval.h"
+#include "libc/calls/weirdtypes.h"
+#include "libc/sysv/consts/clock.h"
+#include "libc/sysv/consts/sched.h"
+#include "libc/time/struct/tm.h"
+#include "libc/time/time.h"
+NSYNC_CPP_START_
+typedef NSYNC_USE_INT_TIME nsync_time;
+#define NSYNC_TIME_SEC(t)  (sizeof (nsync_time) >= 8? \
+			    (t) / (1000 * 1000 * 1000): \
+			    ((t) / 1000))
+#define NSYNC_TIME_NSEC(t) (sizeof (nsync_time) >= 8? \
+                            (t) % (1000 * 1000 * 1000): \
+			    (((t) % 1000) * 1000 * 1000))
+#define NSYNC_TIME_MAX_ MAX_INT_TYPE (nsync_time)
+NSYNC_CPP_END_
+
+#elif defined(NSYNC_USE_FLOATING_TIME)
+#include "libc/math.h"
+#include "libc/calls/struct/timespec.h"
+#include "libc/calls/struct/timeval.h"
+#include "libc/calls/weirdtypes.h"
+#include "libc/sysv/consts/clock.h"
+#include "libc/sysv/consts/sched.h"
+#include "libc/time/struct/tm.h"
+#include "libc/time/time.h"
+NSYNC_CPP_START_
+typedef NSYNC_USE_FLOATING_TIME nsync_time;
+#define NSYNC_TIME_SEC(t)  (trunc ((t) / (nsync_time) (1000 * 1000 * 1000)))
+#define NSYNC_TIME_NSEC(t) ((t) - ((1000 * 1000 * 1000) * NSYNC_TIME_SEC (t)))
+#define NSYNC_TIME_MAX_ DBL_MAX
+NSYNC_CPP_END_
+
+#elif NSYNC_USE_DEBUG_TIME
+/* Check that the library can be built with a different time struct.  */
+#include "libc/calls/struct/timespec.h"
+#include "libc/calls/struct/timeval.h"
+#include "libc/calls/weirdtypes.h"
+#include "libc/sysv/consts/clock.h"
+#include "libc/sysv/consts/sched.h"
+#include "libc/time/struct/tm.h"
+#include "libc/time/time.h"
+NSYNC_CPP_START_
+typedef struct {
+	time_t seconds;
+	unsigned nanoseconds;
+} nsync_time;
+#define NSYNC_TIME_SEC(t) ((t).seconds)
+#define NSYNC_TIME_NSEC(t) ((t).nanoseconds)
+NSYNC_CPP_END_
+
+#elif defined(__cplusplus) && \
+      (NSYNC_USE_CPP11_TIMEPOINT || (__cplusplus >= 201103L) || (_MSC_VER >= 1700))
+/* The inline functions below provide function overloads that accept the most
+   likely C++11 time type(s).
+
+   C++11 time types have many variations and subtleties:
+   - There are multiple clocks with potentially differing epochs; these clocks
+     are not necessarily phase-locked to the same rate, making conversion and
+     comparison between clocks tricky.
+   - Relative and absolute times are distinguished in the type system.
+   - Either integral or floating point counters may be used to represent time
+     intervals, and code valid with one may not be valid with the other
+     (see std::chrono::treat_as_floating_point).
+   - A counter increment of one can represent any rational number of seconds
+     (for whatever "seconds" means for this clock).
+   - Conversions between duration types may round or truncate at the
+     implementation's discretion.
+   - As mentioned above, common implementations of the default monotonic clock
+     ("steady_clock") illegally allow a thread to observe time going backwards,
+     especially in the face of scheduling on a different CPU, making its use
+     misleading, at best.
+   I've chosen to handle this complexity by doing a conversion to absolute
+   timespec at the interface layer, so all the C++ complication is here, rather
+   than spread throughout the library.  */
+
+#include "third_party/libcxx/chrono"
+#include "libc/calls/struct/timespec.h"
+#include "libc/calls/struct/timeval.h"
+#include "libc/calls/weirdtypes.h"
+#include "libc/sysv/consts/clock.h"
+#include "libc/sysv/consts/sched.h"
+#include "libc/time/struct/tm.h"
+#include "libc/time/time.h"
+NSYNC_CPP_START_
+typedef struct timespec nsync_time;
+#define NSYNC_TIME_SEC(t) ((t).tv_sec)
+#define NSYNC_TIME_NSEC(t) ((t).tv_nsec)
+
+typedef std::chrono::system_clock::time_point nsync_cpp_time_point_;
+nsync_time nsync_from_time_point_ (nsync_cpp_time_point_);
+nsync_cpp_time_point_ nsync_to_time_point_ (nsync_time);
+#define NSYNC_COUNTER_CPP_OVERLOAD_ \
+	static inline uint32_t nsync_counter_wait (nsync_counter c, \
+						   nsync_cpp_time_point_ abs_deadline) { \
+		return (nsync_counter_wait (c, nsync_from_time_point_ (abs_deadline))); \
+	}
+#define NSYNC_CV_CPP_OVERLOAD_ \
+	static inline int nsync_cv_wait_with_deadline (nsync_cv *cv, nsync_mu *mu, \
+		nsync_cpp_time_point_ abs_deadline, struct nsync_note_s_ *cancel_note) { \
+		return (nsync_cv_wait_with_deadline (cv, mu, \
+				nsync_from_time_point_ (abs_deadline), \
+				cancel_note)); \
+	} \
+	static inline int nsync_cv_wait_with_deadline_generic (nsync_cv *cv, \
+		void *mu, void (*lock) (void *), void (*unlock) (void *), \
+		nsync_cpp_time_point_ abs_deadline, struct nsync_note_s_ *cancel_note) { \
+		return (nsync_cv_wait_with_deadline_generic (cv, mu, lock, unlock, \
+				nsync_from_time_point_ (abs_deadline), \
+				cancel_note)); \
+	}
+#define NSYNC_MU_WAIT_CPP_OVERLOAD_ \
+	static inline int nsync_mu_wait_with_deadline (nsync_mu *mu, \
+		int (*condition) (const void *condition_arg), const void *condition_arg, \
+		int (*condition_arg_eq) (const void *a, const void *b), \
+		nsync_cpp_time_point_ abs_deadline, struct nsync_note_s_ *cancel_note) { \
+		return (nsync_mu_wait_with_deadline (mu, condition, condition_arg, \
+						     condition_arg_eq, \
+						     nsync_from_time_point_ (abs_deadline), \
+						     cancel_note)); \
+	}
+#define NSYNC_NOTE_CPP_OVERLOAD_ \
+	static inline nsync_note nsync_note_new (nsync_note parent, \
+						 nsync_cpp_time_point_ abs_deadline) { \
+		return (nsync_note_new (parent, nsync_from_time_point_ (abs_deadline))); \
+	} \
+	static inline int nsync_note_wait (nsync_note n, nsync_cpp_time_point_ abs_deadline) { \
+		return (nsync_note_wait (n, nsync_from_time_point_ (abs_deadline))); \
+	} \
+	static inline nsync_cpp_time_point_ nsync_note_expiry_timepoint (nsync_note n) { \
+		return (nsync_to_time_point_ (nsync_note_expiry (n))); \
+	}
+#define NSYNC_WAITER_CPP_OVERLOAD_ \
+	static inline int nsync_wait_n (void *mu, void (*lock) (void *), \
+					void (*unlock) (void *), \
+					nsync_cpp_time_point_ abs_deadline, \
+					int count, struct nsync_waitable_s *waitable[]) { \
+		return (nsync_wait_n (mu, lock, unlock, \
+				      nsync_from_time_point_ (abs_deadline), count, waitable)); \
+	}
+
+NSYNC_CPP_END_
+
+#else
+/* Default is to use timespec. */
+#include "libc/calls/struct/timespec.h"
+#include "libc/calls/struct/timeval.h"
+#include "libc/calls/weirdtypes.h"
+#include "libc/sysv/consts/clock.h"
+#include "libc/sysv/consts/sched.h"
+#include "libc/time/struct/tm.h"
+#include "libc/time/time.h"
+NSYNC_CPP_START_
+typedef struct timespec nsync_time;
+#define NSYNC_TIME_SEC(t) ((t).tv_sec)
+#define NSYNC_TIME_NSEC(t) ((t).tv_nsec)
+NSYNC_CPP_END_
+
+#endif
+
+#if !defined(NSYNC_COUNTER_CPP_OVERLOAD_)
+#define NSYNC_COUNTER_CPP_OVERLOAD_
+#define NSYNC_CV_CPP_OVERLOAD_
+#define NSYNC_MU_WAIT_CPP_OVERLOAD_
+#define NSYNC_NOTE_CPP_OVERLOAD_
+#define NSYNC_WAITER_CPP_OVERLOAD_
+#endif
+
+#endif /*NSYNC_PUBLIC_NSYNC_TIME_INTERNAL_H_*/
diff --git a/libc/thread/nsync_waiter.h b/libc/thread/nsync_waiter.h
new file mode 100644
index 000000000..d028a1f30
--- /dev/null
+++ b/libc/thread/nsync_waiter.h
@@ -0,0 +1,153 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#ifndef NSYNC_PUBLIC_NSYNC_WAITER_H_
+#define NSYNC_PUBLIC_NSYNC_WAITER_H_
+
+/* nsync_wait_n() allows the client to wait on multiple objects (condition
+   variables, nsync_notes, nsync_counters, etc.)  until at least one of them
+   becomes ready, or a deadline expires.
+
+   It can be thought of as rather like Unix's select() or poll(),
+   except the the objects being waited for are synchronization
+   data structures, rather than file descriptors.
+
+   The client can construct new objects that can be waited for by implementing
+   three routines.
+
+   Examples:
+
+   To wait on two nsync_notes n0, n1, and a nsync_counter c0, 
+   with a deadline of abs_deadline:
+
+	// Form an array of struct nsync_waitable_s, identifying the
+	// objects and the corresponding descriptors. (static initialization
+	// syntax is used for brevity)
+	static struct nsync_waitable_s w[] = {
+		{ &n0, &nsync_note_waitable_funcs },
+		{ &n1, &nsync_note_waitable_funcs },
+		{ &c0, &nsync_counter_waitable_funcs }
+	};
+	static struct nsync_waitable_s *pw[] = { &w[0], &w[1], &w[2] };
+	int n = sizeof (w) / sizeof (w[0]);
+
+	// Wait.  The mu, lock, and unlock arguments are NULL because
+	// no condition variables are invovled. 
+	int i = nsync_wait_n (NULL, NULL, NULL, abs_deadline, n, pw);
+	if (i == n) {
+		// timeout
+	} else {
+		// w[i].v became ready.
+	}
+
+    To wait on multiple condition variables, the mu/lock/unlock parameters are
+    used.  Imagine cv0 and cv1 are signalled when predicates pred0() (under
+    lock mu0) and pred1() (under lock mu1) become true respectively.  Assume
+    that mu0 is acquired before mu1.
+	static void lock2 (void *v) {  // lock two mutexes in order
+		nsync_mu **mu = (nsync_mu **) v;
+		nsync_mu_lock (mu[0]);
+		nsync_mu_lock (mu[1]);
+	}
+	static void unlock2 (void *v) { // unlock two mutexes.
+		nsync_mu **mu = (nsync_mu **) v;
+		nsync_mu_unlock (mu[1]);
+		nsync_mu_unlock (mu[0]);
+	}
+
+	// Describe the condition variables and the locks.
+	static struct nsync_waitable_s w[] = {
+		{ &cv0, &nsync_cv_waitable_funcs },
+		{ &cv1, &nsync_cv_waitable_funcs }
+	};
+	static struct nsync_waitable_s *pw[] = { &w[0], &w[1] };
+	nsync_mu *lock_list[] = { &mu0, &mu1 };
+	int n = sizeof (w) / sizeof (w[0]);
+
+	lock2 (list_list);
+	while (!pred0 () && !pred1 ()) {
+                // Wait for one of the condition variables to be signalled,
+                // with no timeout.
+		nsync_wait_n (lock_list, &lock2, &unlock2,
+			      nsync_time_no_deadline, n, pw);
+	}
+	if (pred0 ()) { ... }
+	if (pred1 ()) { ... }
+	unlock2 (list_list);
+
+   */
+
+#include "libc/thread/nsync_time.h"
+#include "libc/thread/nsync_cpp.h"
+
+NSYNC_CPP_START_
+
+struct nsync_waitable_funcs_s;  /* forward declaration of struct that contains
+                                   type dependent wait operations */
+
+/* Clients wait on objects by forming an array of struct nsync_waitable_s.
+   Each each element points to one object and its type-dependent functions. */
+struct nsync_waitable_s {
+	void *v;	/* pointer to object */
+	/* pointer to type-dependent functions.  Use
+		&nsync_note_waitable_funcs for an nsync_note,
+		&nsync_counternote_waitable_funcs for an nsync_counter,
+		&nsync_cv_waitable_funcs for an nsync_cv.  */
+	const struct nsync_waitable_funcs_s *funcs;
+};
+
+/* Wait until at least one of *waitable[0,..,count-1] is has been notified, or
+   abs_deadline is reached.  Return the index of the notified element of
+   waitable[], or count if no such element exists.
+   If mu!=NULL, (*unlock)(mu) is called after the thread is queued on the
+   various waiters, and (*lock)(mu) is called before return; mu/lock/unlock are
+   used to acquire and release the relevant locks whan waiting on condition
+   variables.  */
+int nsync_wait_n (void *mu, void (*lock) (void *), void (*unlock) (void *),
+		  nsync_time abs_deadline, int count,
+		  struct nsync_waitable_s *waitable[]);
+
+/* --------------------------------------------------- */
+
+/* A "struct nsync_waitable_s" implementation must implement these functions.
+   Clients should ignore the internals. */
+struct nsync_waiter_s;
+struct nsync_waitable_funcs_s {
+	/* Return the time when *v will be ready (max time if
+	   unknown), or 0 if it is already ready.  The parameter nw may be
+	   passed as NULL, in which case the result should indicate whether the
+	   thread would block if it were to wait on *v. 
+	   All calls with the same *v must report the same result until the
+	   object becomes ready, from which point calls must report 0. */
+	nsync_time (*ready_time) (void *v, struct nsync_waiter_s *nw);
+
+	/* If *v is ready, return zero; otherwise enqueue *nw on *v and return
+	   non-zero. */
+	int (*enqueue) (void *v, struct nsync_waiter_s *nw);
+
+        /* If nw has been previously dequeued, return zero; otherwise dequeue
+           *nw from *v and return non-zero.  */
+	int (*dequeue) (void *v, struct nsync_waiter_s *nw);
+};
+
+/* The "struct nsync_waitable_s" for nsync_note, nsync_counter, and nsync_cv. */
+extern const struct nsync_waitable_funcs_s nsync_note_waitable_funcs;
+extern const struct nsync_waitable_funcs_s nsync_counter_waitable_funcs;
+extern const struct nsync_waitable_funcs_s nsync_cv_waitable_funcs;
+
+NSYNC_WAITER_CPP_OVERLOAD_
+NSYNC_CPP_END_
+
+#endif /*NSYNC_PUBLIC_NSYNC_WAITER_H_*/
diff --git a/libc/thread/once.c b/libc/thread/once.c
new file mode 100644
index 000000000..d41ac9e0d
--- /dev/null
+++ b/libc/thread/once.c
@@ -0,0 +1,148 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#include "libc/thread/nsync_cpp.h"
+#include "libc/thread/platform.h"
+#include "libc/thread/compiler.h"
+#include "libc/thread/cputype.h"
+#include "libc/thread/nsync.h"
+#include "libc/thread/dll.h"
+#include "libc/thread/sem.h"
+#include "libc/thread/wait_internal.h"
+#include "libc/thread/common.h"
+#include "libc/thread/atomic.h"
+
+NSYNC_CPP_START_
+
+/* An once_sync_s struct contains a lock, and a condition variable on which
+   threads may wait for an nsync_once to be initialized by another thread.
+
+   A separate struct is used only to keep nsync_once small.
+
+   A given nsync_once can be associated with any once_sync_s struct, but cannot
+   be associated with more than one.  nsync_once instances are mapped to
+   once_sync_s instances by a trivial hashing scheme implemented by
+   NSYNC_ONCE_SYNC_().
+
+   The number of once_sync_s structs in the following array is greater than one
+   only to reduce the probability of contention if a great many distinct
+   nsync_once variables are initialized concurrently.  */
+static struct once_sync_s {
+	nsync_mu once_mu;
+	nsync_cv once_cv;
+} once_sync[64];
+
+/* Return a pointer to the once_sync_s struct associated with the nsync_once *p. */
+#define NSYNC_ONCE_SYNC_(p) &once_sync[(((uintptr_t) (p)) / sizeof (*(p))) % \
+				       (sizeof (once_sync) / sizeof (once_sync[0]))]
+
+/* Implement nsync_run_once, nsync_run_once_arg, nsync_run_once_spin, or
+   nsync_run_once_arg_spin, chosen as described below.
+
+   If s!=NULL, s is required to point to the once_sync_s associated with *once,
+   and the semantics of nsync_run_once or nsync_run_once_arg are provided.
+   If s==NULL, the semantics of nsync_run_once_spin, or nsync_run_once_arg_spin
+   are provided.
+   
+   If f!=NULL, the semantics of nsync_run_once or nsync_run_once_spin are
+   provided.  Otherwise, farg is required to be non-NULL, and the semantics of
+   nsync_run_once_arg or nsync_run_once_arg_spin are provided.  */
+static void nsync_run_once_impl (nsync_once *once, struct once_sync_s *s,
+				 void (*f) (void), void (*farg) (void *arg), void *arg) {
+	uint32_t o = ATM_LOAD_ACQ (once);
+	if (o != 2) {
+		unsigned attempts = 0;
+		if (s != NULL) {
+			nsync_mu_lock (&s->once_mu);
+		}
+		while (o == 0 && !ATM_CAS_ACQ (once, 0, 1)) {
+			o = ATM_LOAD (once);
+		}
+		if (o == 0) {
+			if (s != NULL) {
+				nsync_mu_unlock (&s->once_mu);
+			}
+			if (f != NULL) {
+				(*f) ();
+			} else {
+				(*farg) (arg);
+			}
+			if (s != NULL) {
+				nsync_mu_lock (&s->once_mu);
+				nsync_cv_broadcast (&s->once_cv);
+			}
+			ATM_STORE_REL (once, 2);
+		}
+		while (ATM_LOAD_ACQ (once) != 2) {
+			if (s != NULL) {
+				nsync_time deadline;
+				if (attempts < 50) {
+					attempts += 10;
+				}
+				deadline = nsync_time_add (nsync_time_now (), nsync_time_ms (attempts));
+				nsync_cv_wait_with_deadline (&s->once_cv, &s->once_mu, deadline, NULL);
+			} else {
+				attempts = nsync_spin_delay_ (attempts);
+			}
+		}
+		if (s != NULL) {
+			nsync_mu_unlock (&s->once_mu);
+		}
+	}
+}
+
+void nsync_run_once (nsync_once *once, void (*f) (void)) {
+	uint32_t o;
+	IGNORE_RACES_START ();
+	o = ATM_LOAD_ACQ (once);
+	if (o != 2) {
+		struct once_sync_s *s = NSYNC_ONCE_SYNC_ (once);
+		nsync_run_once_impl (once, s, f, NULL, NULL);
+	}
+	IGNORE_RACES_END ();
+}
+
+void nsync_run_once_arg (nsync_once *once, void (*farg) (void *arg), void *arg) {
+	uint32_t o;
+	IGNORE_RACES_START ();
+	o = ATM_LOAD_ACQ (once);
+	if (o != 2) {
+		struct once_sync_s *s = NSYNC_ONCE_SYNC_ (once);
+		nsync_run_once_impl (once, s, NULL, farg, arg);
+	}
+	IGNORE_RACES_END ();
+}
+
+void nsync_run_once_spin (nsync_once *once, void (*f) (void)) {
+	uint32_t o;
+	IGNORE_RACES_START ();
+	o = ATM_LOAD_ACQ (once);
+	if (o != 2) {
+		nsync_run_once_impl (once, NULL, f, NULL, NULL);
+	}
+	IGNORE_RACES_END ();
+}
+
+void nsync_run_once_arg_spin (nsync_once *once, void (*farg) (void *arg), void *arg) {
+	uint32_t o;
+	IGNORE_RACES_START ();
+	o = ATM_LOAD_ACQ (once);
+	if (o != 2) {
+		nsync_run_once_impl (once, NULL, NULL, farg, arg);
+	}
+	IGNORE_RACES_END ();
+}
+
+NSYNC_CPP_END_
diff --git a/libc/thread/per_thread_waiter.c b/libc/thread/per_thread_waiter.c
new file mode 100644
index 000000000..19fd54f8c
--- /dev/null
+++ b/libc/thread/per_thread_waiter.c
@@ -0,0 +1,49 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#include "libc/thread/headers.h"
+
+NSYNC_CPP_START_
+
+static pthread_key_t waiter_key;
+static nsync_atomic_uint32_ pt_once;
+
+static void do_once (nsync_atomic_uint32_ *ponce, void (*dest) (void *)) {
+	uint32_t o = ATM_LOAD_ACQ (ponce);
+	if (o != 2) {
+		while (o == 0 && !ATM_CAS_ACQ (ponce, 0, 1)) {
+			o = ATM_LOAD (ponce);
+		}
+		if (o == 0) {
+			pthread_key_create (&waiter_key, dest);
+			ATM_STORE_REL (ponce, 2);
+		}
+		while (ATM_LOAD_ACQ (ponce) != 2) {
+			sched_yield ();
+		}
+	}
+}
+
+void *nsync_per_thread_waiter_ (void (*dest) (void *)) {
+	do_once (&pt_once, dest);
+	return (pthread_getspecific (waiter_key));
+}
+
+void nsync_set_per_thread_waiter_ (void *v, void (*dest) (void *)) {
+	do_once (&pt_once, dest);
+	pthread_setspecific (waiter_key, v);
+}
+
+NSYNC_CPP_END_
diff --git a/libc/thread/platform.h b/libc/thread/platform.h
new file mode 100644
index 000000000..449b55eaf
--- /dev/null
+++ b/libc/thread/platform.h
@@ -0,0 +1,72 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#ifndef NSYNC_PLATFORM_LINUX_PLATFORM_H_
+#define NSYNC_PLATFORM_LINUX_PLATFORM_H_
+
+#if !defined(_GNU_SOURCE)
+#define _GNU_SOURCE /* for futexes */
+#endif
+
+#include "libc/mem/alg.h"
+#include "libc/str/str.h"
+#include "libc/calls/calls.h"
+#include "libc/calls/weirdtypes.h"
+#include "libc/runtime/pathconf.h"
+#include "libc/runtime/sysconf.h"
+#include "libc/sysv/consts/fileno.h"
+#include "libc/sysv/consts/o.h"
+#include "libc/sysv/consts/ok.h"
+#include "third_party/getopt/getopt.h"
+#include "libc/errno.h"
+#include "libc/mem/alg.h"
+#include "libc/fmt/conv.h"
+#include "libc/mem/mem.h"
+#include "libc/stdio/rand.h"
+#include "libc/runtime/runtime.h"
+#include "libc/stdio/temp.h"
+#include "libc/sysv/consts/exit.h"
+#include "third_party/gdtoa/gdtoa.h"
+
+#include "libc/calls/struct/timespec.h"
+#include "libc/calls/struct/timeval.h"
+#include "libc/calls/weirdtypes.h"
+#include "libc/sysv/consts/clock.h"
+#include "libc/sysv/consts/sched.h"
+#include "libc/time/struct/tm.h"
+#include "libc/time/time.h"
+#include "libc/fmt/conv.h"
+#include "libc/inttypes.h"
+#include "libc/thread/thread.h"
+#include "libc/limits.h"
+#include "libc/sysv/consts/_posix.h"
+#include "libc/sysv/consts/iov.h"
+#include "libc/sysv/consts/limits.h"
+#include "libc/sysv/consts/xopen.h"
+#include "libc/sysv/consts/futex.h"
+#include "libc/sysv/consts/nr.h"
+#include "libc/calls/calls.h"
+#include "libc/thread/thread.h"
+#include "libc/thread/thread2.h"
+#include "libc/calls/semaphore.internal.h"
+
+#include "libc/calls/calls.h"
+#include "libc/fmt/fmt.h"
+#include "libc/stdio/lock.internal.h"
+#include "libc/stdio/stdio.h"
+#include "libc/stdio/temp.h"
+
+
+#endif /*NSYNC_PLATFORM_LINUX_PLATFORM_H_*/
diff --git a/libc/thread/pthread_cond_timedwait.c b/libc/thread/pthread_cond_timedwait.c
index 475c30fff..812ebe8d6 100644
--- a/libc/thread/pthread_cond_timedwait.c
+++ b/libc/thread/pthread_cond_timedwait.c
@@ -48,7 +48,7 @@
  */
 int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex,
                            const struct timespec *abstime) {
-  int rc, err, seq;
+  int c, rc, err, seq;
   struct timespec now, rel, *tsp;
 
   if (abstime && !(0 <= abstime->tv_nsec && abstime->tv_nsec < 1000000000)) {
@@ -56,11 +56,16 @@ int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex,
     return EINVAL;
   }
 
-  if ((err = pthread_mutex_unlock(mutex))) {
-    return err;
+  if (mutex->type == PTHREAD_MUTEX_ERRORCHECK) {
+    c = atomic_load_explicit(&mutex->lock, memory_order_relaxed);
+    if ((c & 0x000fffff) != gettid()) {
+      assert(!"permlock");
+      return EPERM;
+    }
   }
 
   atomic_fetch_add(&cond->waits, 1);
+  if (pthread_mutex_unlock(mutex)) notpossible;
 
   rc = 0;
   seq = atomic_load_explicit(&cond->seq, memory_order_relaxed);
@@ -79,11 +84,8 @@ int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex,
     _futex_wait(&cond->seq, seq, cond->pshared, tsp);
   } while (seq == atomic_load_explicit(&cond->seq, memory_order_relaxed));
 
+  if (pthread_mutex_lock(mutex)) notpossible;
   atomic_fetch_sub(&cond->waits, 1);
 
-  if ((err = pthread_mutex_lock(mutex))) {
-    return err;
-  }
-
   return rc;
 }
diff --git a/libc/thread/sem.h b/libc/thread/sem.h
new file mode 100644
index 000000000..3659f8445
--- /dev/null
+++ b/libc/thread/sem.h
@@ -0,0 +1,47 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#ifndef NSYNC_INTERNAL_SEM_H_
+#define NSYNC_INTERNAL_SEM_H_
+
+/* A semaphore.
+   It may be counting or binary, and it need have no destructor.  */
+
+#include "libc/thread/nsync_time_internal.h"
+#include "libc/thread/nsync_cpp.h"
+
+NSYNC_CPP_START_
+
+typedef struct nsync_semaphore_s_ {
+	void *sem_space[32]; /* space used by implementation */
+} nsync_semaphore;
+
+/* Initialize *s; the initial value is 0. */
+void nsync_mu_semaphore_init (nsync_semaphore *s);
+
+/* Wait until the count of *s exceeds 0, and decrement it. */
+void nsync_mu_semaphore_p (nsync_semaphore *s);
+
+/* Wait until one of:
+   the count of *s is non-zero, in which case decrement *s and return 0;
+   or abs_deadline expires, in which case return ETIMEDOUT. */
+int nsync_mu_semaphore_p_with_deadline (nsync_semaphore *s, nsync_time abs_deadline);
+
+/* Ensure that the count of *s is at least 1. */
+void nsync_mu_semaphore_v (nsync_semaphore *s);
+
+NSYNC_CPP_END_
+
+#endif /*NSYNC_INTERNAL_SEM_H_*/
diff --git a/libc/thread/sem_wait.c b/libc/thread/sem_wait.c
new file mode 100644
index 000000000..376d07645
--- /dev/null
+++ b/libc/thread/sem_wait.c
@@ -0,0 +1,82 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#include "libc/thread/nsync_cpp.h"
+#include "libc/thread/platform.h"
+#include "libc/thread/compiler.h"
+#include "libc/thread/cputype.h"
+#include "libc/thread/nsync.h"
+#include "libc/thread/dll.h"
+#include "libc/thread/sem.h"
+#include "libc/thread/wait_internal.h"
+#include "libc/thread/common.h"
+#include "libc/thread/atomic.h"
+
+NSYNC_CPP_START_
+
+/* Wait until one of:
+     w->sem is non-zero----decrement it and return 0.
+     abs_deadline expires---return ETIMEDOUT.
+     cancel_note is non-NULL and *cancel_note becomes notified---return ECANCELED. */
+int nsync_sem_wait_with_cancel_ (waiter *w, nsync_time abs_deadline,
+			         nsync_note cancel_note) {
+	int sem_outcome;
+	if (cancel_note == NULL) {
+		sem_outcome = nsync_mu_semaphore_p_with_deadline (&w->sem, abs_deadline);
+	} else {
+		nsync_time cancel_time;
+		cancel_time = nsync_note_notified_deadline_ (cancel_note);
+		sem_outcome = ECANCELED;
+		if (nsync_time_cmp (cancel_time, nsync_time_zero) > 0) {
+			struct nsync_waiter_s nw;
+			nw.tag = NSYNC_WAITER_TAG;
+			nw.sem = &w->sem;
+			nsync_dll_init_ (&nw.q, &nw);
+			ATM_STORE (&nw.waiting, 1);
+			nw.flags = 0;
+			nsync_mu_lock (&cancel_note->note_mu);
+			cancel_time = NOTIFIED_TIME (cancel_note);
+			if (nsync_time_cmp (cancel_time, nsync_time_zero) > 0) {
+				nsync_time local_abs_deadline;
+				int deadline_is_nearer = 0;
+				cancel_note->waiters = nsync_dll_make_last_in_list_ (
+					cancel_note->waiters, &nw.q);
+				local_abs_deadline = cancel_time;
+				if (nsync_time_cmp (abs_deadline, cancel_time) < 0) {
+					local_abs_deadline = abs_deadline;
+					deadline_is_nearer = 1;
+				}
+				nsync_mu_unlock (&cancel_note->note_mu);
+				sem_outcome = nsync_mu_semaphore_p_with_deadline (&w->sem,
+					local_abs_deadline);
+				if (sem_outcome == ETIMEDOUT && !deadline_is_nearer) {
+					sem_outcome = ECANCELED;
+					nsync_note_notify (cancel_note);
+				}
+				nsync_mu_lock (&cancel_note->note_mu);
+				cancel_time = NOTIFIED_TIME (cancel_note);
+				if (nsync_time_cmp (cancel_time,
+						    nsync_time_zero) > 0) {
+					cancel_note->waiters = nsync_dll_remove_ (
+						cancel_note->waiters, &nw.q);
+				}
+			}
+			nsync_mu_unlock (&cancel_note->note_mu);
+		}
+	}
+	return (sem_outcome);
+}
+
+NSYNC_CPP_END_
diff --git a/libc/thread/sem_wait_no_note.c b/libc/thread/sem_wait_no_note.c
new file mode 100644
index 000000000..76f07b328
--- /dev/null
+++ b/libc/thread/sem_wait_no_note.c
@@ -0,0 +1,37 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#include "libc/thread/nsync_cpp.h"
+#include "libc/thread/platform.h"
+#include "libc/thread/compiler.h"
+#include "libc/thread/cputype.h"
+#include "libc/thread/nsync.h"
+#include "libc/thread/dll.h"
+#include "libc/thread/sem.h"
+#include "libc/thread/wait_internal.h"
+#include "libc/thread/common.h"
+#include "libc/thread/atomic.h"
+
+NSYNC_CPP_START_
+
+/* Wait until one of:
+     w->sem is non-zero----decrement it and return 0.
+     abs_deadline expires---return ETIMEDOUT.
+     Ignores cancel_note. */
+int nsync_sem_wait_with_cancel_ (waiter *w, nsync_time abs_deadline, nsync_note cancel_note UNUSED) {
+	return (nsync_mu_semaphore_p_with_deadline (&w->sem, abs_deadline));
+}
+
+NSYNC_CPP_END_
diff --git a/libc/thread/start_thread.c b/libc/thread/start_thread.c
new file mode 100644
index 000000000..c24a2667e
--- /dev/null
+++ b/libc/thread/start_thread.c
@@ -0,0 +1,41 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#include "libc/thread/headers.h"
+
+NSYNC_CPP_START_
+
+struct thd_args {
+	void (*f) (void *);
+	void *arg;
+};
+
+static void *body (void *v) {
+	struct thd_args *args = (struct thd_args *) v;
+	(*args->f) (args->arg);
+	free (args);
+	return (NULL);
+}
+
+void nsync_start_thread_ (void (*f) (void *), void *arg) {
+	struct thd_args *args = (struct thd_args *) malloc (sizeof (*args));
+	pthread_t t;
+	args->f = f;
+	args->arg = arg;
+	pthread_create (&t, NULL, body, args);
+	pthread_detach (t);
+}
+
+NSYNC_CPP_END_
diff --git a/libc/thread/thread.mk b/libc/thread/thread.mk
index c0cff1e2e..3e4e2ca44 100644
--- a/libc/thread/thread.mk
+++ b/libc/thread/thread.mk
@@ -47,10 +47,6 @@ $(LIBC_THREAD_A).pkg:					\
 		$(LIBC_THREAD_A_OBJS)			\
 		$(foreach x,$(LIBC_THREAD_A_DIRECTDEPS),$($(x)_A).pkg)
 
-o/tinylinux/libc/thread/clone.o: private		\
-		OVERRIDE_CFLAGS +=			\
-			-ffunction-sections
-
 LIBC_THREAD_LIBS = $(foreach x,$(LIBC_THREAD_ARTIFACTS),$($(x)))
 LIBC_THREAD_SRCS = $(foreach x,$(LIBC_THREAD_ARTIFACTS),$($(x)_SRCS))
 LIBC_THREAD_HDRS = $(foreach x,$(LIBC_THREAD_ARTIFACTS),$($(x)_HDRS))
diff --git a/libc/thread/time_internal.c b/libc/thread/time_internal.c
new file mode 100644
index 000000000..8b17c12ed
--- /dev/null
+++ b/libc/thread/time_internal.c
@@ -0,0 +1,34 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#include "libc/thread/nsync_cpp.h"
+#include "libc/thread/platform.h"
+#include "libc/thread/compiler.h"
+#include "libc/thread/cputype.h"
+#include "libc/thread/nsync_time.h"
+
+NSYNC_CPP_START_
+
+nsync_time nsync_time_ms (unsigned ms) {
+        unsigned s = ms / 1000;
+	return (nsync_time_s_ns (s, 1000 * 1000 * (ms % 1000)));
+}
+
+nsync_time nsync_time_us (unsigned us) {
+        unsigned s = us / (1000 * 1000);
+	return (nsync_time_s_ns (s, 1000 * (us % (1000 * 1000))));
+}
+
+NSYNC_CPP_END_
diff --git a/libc/thread/time_rep.c b/libc/thread/time_rep.c
new file mode 100644
index 000000000..74c5d45d2
--- /dev/null
+++ b/libc/thread/time_rep.c
@@ -0,0 +1,96 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#include "libc/thread/nsync_cpp.h"
+#include "libc/thread/platform.h"
+#include "libc/thread/compiler.h"
+#include "libc/thread/cputype.h"
+#include "libc/thread/nsync_time_init.h"
+#include "libc/thread/nsync_time.h"
+
+NSYNC_CPP_START_
+
+#define NSYNC_NS_IN_S_ (1000 * 1000 * 1000)
+
+/* Return the maximum t, assuming it's an integral
+   type, and the representation is not too strange.  */
+#define MAX_INT_TYPE(t) (((t)~(t)0) > 1?   /*is t unsigned?*/ \
+                (t)~(t)0 :  /*unsigned*/ \
+                (t) ((((uintmax_t)1) << (sizeof (t) * CHAR_BIT - 1)) - 1)) /*signed*/
+
+const nsync_time nsync_time_no_deadline =
+        NSYNC_TIME_STATIC_INIT (MAX_INT_TYPE (time_t), NSYNC_NS_IN_S_ - 1);
+
+const nsync_time nsync_time_zero = NSYNC_TIME_STATIC_INIT (0, 0);
+
+nsync_time nsync_time_s_ns (time_t s, unsigned ns) {
+	nsync_time t;
+	memset (&t, 0, sizeof (t));
+	t.tv_sec = s;
+	t.tv_nsec = ns;
+	return (t);
+}
+
+nsync_time nsync_time_now (void) {
+	struct timespec ts;
+	clock_gettime (CLOCK_REALTIME, &ts);
+	return (ts);
+}
+
+nsync_time nsync_time_sleep (nsync_time delay) {
+	struct timespec ts;
+	struct timespec remain;
+	memset (&ts, 0, sizeof (ts));
+	ts.tv_sec = NSYNC_TIME_SEC (delay);
+	ts.tv_nsec = NSYNC_TIME_NSEC (delay);
+	if (nanosleep (&ts, &remain) == 0) {
+		/* nanosleep() is not required to fill in "remain"
+		   if it returns 0. */
+		memset (&remain, 0, sizeof (remain));
+	}
+	return (remain);
+}
+
+nsync_time nsync_time_add (nsync_time a, nsync_time b) {
+	a.tv_sec += b.tv_sec;
+	a.tv_nsec += b.tv_nsec;
+	if (a.tv_nsec >= NSYNC_NS_IN_S_) {
+		a.tv_nsec -= NSYNC_NS_IN_S_;
+		a.tv_sec++;
+	}
+	return (a);
+}
+
+nsync_time nsync_time_sub (nsync_time a, nsync_time b) {
+	a.tv_sec -= b.tv_sec;
+	if (a.tv_nsec < b.tv_nsec) {
+		a.tv_nsec += NSYNC_NS_IN_S_;
+		a.tv_sec--;
+	}
+	a.tv_nsec -= b.tv_nsec;
+	return (a);
+}
+
+int nsync_time_cmp (nsync_time a, nsync_time b) {
+	int cmp = (NSYNC_TIME_SEC (a) > NSYNC_TIME_SEC (b)) -
+		  (NSYNC_TIME_SEC (a) < NSYNC_TIME_SEC (b));
+	if (cmp == 0) {
+		cmp = (NSYNC_TIME_NSEC (a) > NSYNC_TIME_NSEC (b)) -
+		      (NSYNC_TIME_NSEC (a) < NSYNC_TIME_NSEC (b));
+	}
+	return (cmp);
+}
+
+NSYNC_CPP_END_
diff --git a/libc/thread/wait.c b/libc/thread/wait.c
new file mode 100644
index 000000000..4b562c0f0
--- /dev/null
+++ b/libc/thread/wait.c
@@ -0,0 +1,104 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#include "libc/thread/nsync_cpp.h"
+#include "libc/thread/platform.h"
+#include "libc/thread/compiler.h"
+#include "libc/thread/cputype.h"
+#include "libc/thread/nsync.h"
+#include "libc/thread/sem.h"
+#include "libc/thread/dll.h"
+#include "libc/thread/wait_internal.h"
+#include "libc/thread/common.h"
+#include "libc/thread/atomic.h"
+
+NSYNC_CPP_START_
+
+int nsync_wait_n (void *mu, void (*lock) (void *), void (*unlock) (void *),
+		  nsync_time abs_deadline,
+		  int count, struct nsync_waitable_s *waitable[]) {
+	int ready;
+	IGNORE_RACES_START ();
+	for (ready = 0; ready != count &&
+			nsync_time_cmp ((*waitable[ready]->funcs->ready_time) (
+						waitable[ready]->v, NULL),
+					nsync_time_zero) > 0;
+	     ready++) {
+	}
+	if (ready == count && nsync_time_cmp (abs_deadline, nsync_time_zero) > 0) {
+		int i;
+		int unlocked = 0;
+		int j;
+		int enqueued = 1;
+		waiter *w = nsync_waiter_new_ ();
+		struct nsync_waiter_s nw_set[4];
+		struct nsync_waiter_s *nw = nw_set;
+		if (count > (int) (sizeof (nw_set) / sizeof (nw_set[0]))) {
+			nw = (struct nsync_waiter_s *) malloc (count * sizeof (nw[0]));
+		}
+		for (i = 0; i != count && enqueued; i++) {
+			nw[i].tag = NSYNC_WAITER_TAG;
+			nw[i].sem = &w->sem;
+			nsync_dll_init_ (&nw[i].q, &nw[i]);
+			ATM_STORE (&nw[i].waiting, 0);
+			nw[i].flags = 0;
+			enqueued = (*waitable[i]->funcs->enqueue) (waitable[i]->v, &nw[i]);
+		}
+
+		if (i == count) {
+			nsync_time min_ntime;
+			if (mu != NULL) {
+				(*unlock) (mu);
+				unlocked = 1;
+			}
+			do {
+				min_ntime = abs_deadline;
+				for (j = 0; j != count; j++) {
+					nsync_time ntime;
+					ntime = (*waitable[j]->funcs->ready_time) (
+						waitable[j]->v, &nw[j]);
+					if (nsync_time_cmp (ntime, min_ntime) < 0) {
+						min_ntime = ntime;
+					}
+				}
+			} while (nsync_time_cmp (min_ntime, nsync_time_zero) > 0 &&
+				 nsync_mu_semaphore_p_with_deadline (&w->sem,
+					min_ntime) == 0);
+		}
+
+		/* An attempt was made above to enqueue waitable[0..i-1].
+                   Dequeue any that are still enqueued, and remember the index
+                   of the first ready (i.e., not still enqueued) object, if any.  */
+		for (j = 0; j != i; j++) {
+			int was_still_enqueued =
+				(*waitable[j]->funcs->dequeue) (waitable[j]->v, &nw[j]);
+			if (!was_still_enqueued && ready == count) {
+				ready = j;
+			}
+		}
+
+		if (nw != nw_set) {
+			free (nw);
+		}
+		nsync_waiter_free_ (w);
+		if (unlocked) {
+			(*lock) (mu);
+		}
+	}
+	IGNORE_RACES_END ();
+	return (ready);
+}
+
+NSYNC_CPP_END_
diff --git a/libc/thread/wait_internal.h b/libc/thread/wait_internal.h
new file mode 100644
index 000000000..828614444
--- /dev/null
+++ b/libc/thread/wait_internal.h
@@ -0,0 +1,40 @@
+// clang-format off
+/* Copyright 2016 Google Inc.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. */
+
+#ifndef NSYNC_INTERNAL_WAIT_INTERNAL_H_
+#define NSYNC_INTERNAL_WAIT_INTERNAL_H_
+#include "libc/thread/dll.h"
+#include "libc/thread/nsync_atomic.h"
+#include "libc/thread/nsync_cpp.h"
+
+NSYNC_CPP_START_
+
+/* Implementations of "struct nsync_waitable_s" must provide functions in struct
+   nsync_waitable_funcs_s (see public/nsync_wait.h).  When nsync_wait_n() waits
+   on a client's object, those functions are called with v pointing to the
+   client's object and nw pointing to a struct nsync_waiter_s. */
+struct nsync_waiter_s {
+	uint32_t tag; /* used for debugging */
+	nsync_dll_element_ q; /* used to link children of parent */
+	nsync_atomic_uint32_ waiting; /* non-zero <=> the waiter is waiting */
+	struct nsync_semaphore_s_ *sem; /* *sem will be Ved when waiter is woken */
+	uint32_t flags; /* see below */
+};
+
+#define NSYNC_WAITER_FLAG_MUCV 0x1 /* set if waiter is embedded in Mu/CV's internal structures */
+
+NSYNC_CPP_END_
+
+#endif /*NSYNC_INTERNAL_WAIT_INTERNAL_H_*/
diff --git a/third_party/third_party.mk b/third_party/third_party.mk
index c7f65bec0..d4c57215a 100644
--- a/third_party/third_party.mk
+++ b/third_party/third_party.mk
@@ -23,9 +23,9 @@ o/$(MODE)/third_party:				\
 	o/$(MODE)/third_party/python		\
 	o/$(MODE)/third_party/quickjs		\
 	o/$(MODE)/third_party/regex		\
+	o/$(MODE)/third_party/sed		\
 	o/$(MODE)/third_party/smallz4		\
 	o/$(MODE)/third_party/sqlite3		\
-	o/$(MODE)/third_party/sed		\
 	o/$(MODE)/third_party/stb		\
 	o/$(MODE)/third_party/tidy		\
 	o/$(MODE)/third_party/unzip		\