From 58352df0a44f295fd52696b11fd0cc1c20f389ae Mon Sep 17 00:00:00 2001
From: Justine Tunney <jtunney@gmail.com>
Date: Sun, 30 Jul 2023 08:55:01 -0700
Subject: [PATCH] Make forking off threads reliable on Windows

This change makes posix_spawn_test no longer flaky on Windows, by (1)
fixing a race condition in wait(), and (2) removing a misguided vfork
implementation which was letting Windows bypass pthread_atfork().
---
 libc/calls/__sig2.c                |  30 +++---
 libc/calls/clock_nanosleep-nt.c    |   4 +-
 libc/calls/fdatasync-nt.c          |   2 +-
 libc/calls/internal.h              |   5 +-
 libc/calls/interrupts-nt.c         |   6 +-
 libc/calls/kill-nt.c               |   4 +-
 libc/calls/pause-nt.c              |   2 +-
 libc/calls/poll-nt.c               |   4 +-
 libc/calls/read-nt.c               |   4 +-
 libc/calls/sig.internal.h          |   4 +-
 libc/calls/sigchld-nt.c            |  50 +++++-----
 libc/calls/sigprocmask.c           |   2 +-
 libc/calls/sigsuspend.c            |   2 +-
 libc/calls/tcdrain.c               |   2 +-
 libc/calls/wait4-nt.c              | 153 +++++++++++++++--------------
 libc/calls/wait4.c                 |   4 +-
 libc/runtime/vfork.S               |  48 ++-------
 libc/sock/accept-nt.c              |   2 +-
 libc/sock/recv-nt.c                |   3 +-
 libc/sock/recvfrom-nt.c            |   3 +-
 libc/sock/send-nt.c                |   3 +-
 libc/sock/sendfile.c               |   2 +-
 libc/sock/sendto-nt.c              |   3 +-
 libc/sock/syscall_fd.internal.h    |   2 +-
 libc/sock/wsablock.c               |   7 +-
 libc/stdio/getrandom.c             |   4 +-
 test/libc/stdio/fds_torture_test.c |  56 +++++++++++
 test/libc/stdio/posix_spawn_test.c |   3 +
 test/libc/stdio/test.mk            |   1 +
 third_party/nsync/futex.c          |   2 +-
 30 files changed, 230 insertions(+), 187 deletions(-)
 create mode 100644 test/libc/stdio/fds_torture_test.c

diff --git a/libc/calls/__sig2.c b/libc/calls/__sig2.c
index 1b6439665..b0af84096 100644
--- a/libc/calls/__sig2.c
+++ b/libc/calls/__sig2.c
@@ -19,6 +19,7 @@
 #include "ape/sections.internal.h"
 #include "libc/assert.h"
 #include "libc/calls/calls.h"
+#include "libc/calls/internal.h"
 #include "libc/calls/sig.internal.h"
 #include "libc/calls/state.internal.h"
 #include "libc/calls/struct/sigaction.h"
@@ -75,12 +76,14 @@ textwindows int __sig_is_applicable(struct Signal *s) {
  * Dequeues signal that isn't masked.
  * @return signal or null if empty or none unmasked
  */
-static textwindows struct Signal *__sig_remove(void) {
+static textwindows struct Signal *__sig_remove(int sigops) {
   struct Signal *prev, *res;
   if (__sig.queue) {
     __sig_lock();
     for (prev = 0, res = __sig.queue; res; prev = res, res = res->next) {
-      if (__sig_is_applicable(res) && !__sig_is_masked(res->sig)) {
+      if (__sig_is_applicable(res) &&    //
+          !__sig_is_masked(res->sig) &&  //
+          !((sigops & kSigOpNochld) && res->sig == SIGCHLD)) {
         if (res == __sig.queue) {
           __sig.queue = res->next;
         } else if (prev) {
@@ -102,8 +105,7 @@ static textwindows struct Signal *__sig_remove(void) {
  * @note called from main thread
  * @return true if EINTR should be returned by caller
  */
-static bool __sig_deliver(bool restartable, int sig, int si_code,
-                          ucontext_t *ctx) {
+static bool __sig_deliver(int sigops, int sig, int si_code, ucontext_t *ctx) {
   unsigned rva, flags;
   siginfo_t info, *infop;
   STRACE("delivering %G", sig);
@@ -145,7 +147,7 @@ static bool __sig_deliver(bool restartable, int sig, int si_code,
     }
   }
 
-  if (!restartable) {
+  if (!(sigops & kSigOpRestartable)) {
     return true;  // always send EINTR for wait4(), poll(), etc.
   } else if (flags & SA_RESTART) {
     STRACE("restarting syscall on %G", sig);
@@ -168,11 +170,9 @@ static textwindows bool __sig_is_fatal(int sig) {
 
 /**
  * Handles signal.
- *
- * @param restartable can be used to suppress true return if SA_RESTART
  * @return true if signal was delivered
  */
-bool __sig_handle(bool restartable, int sig, int si_code, ucontext_t *ctx) {
+bool __sig_handle(int sigops, int sig, int si_code, ucontext_t *ctx) {
   bool delivered;
   switch (__sighandrvas[sig]) {
     case (intptr_t)SIG_DFL:
@@ -186,7 +186,7 @@ bool __sig_handle(bool restartable, int sig, int si_code, ucontext_t *ctx) {
       delivered = false;
       break;
     default:
-      delivered = __sig_deliver(restartable, sig, si_code, ctx);
+      delivered = __sig_deliver(sigops, sig, si_code, ctx);
       break;
   }
   return delivered;
@@ -226,7 +226,9 @@ textwindows int __sig_add(int tid, int sig, int si_code) {
   int rc;
   struct Signal *mem;
   if (1 <= sig && sig <= 64) {
-    if (__sighandrvas[sig] == (unsigned)(intptr_t)SIG_IGN) {
+    if (__sighandrvas[sig] == (unsigned)(uintptr_t)SIG_IGN ||
+        (__sighandrvas[sig] == (unsigned)(uintptr_t)SIG_DFL &&
+         !__sig_is_fatal(sig))) {
       STRACE("ignoring %G", sig);
       rc = 0;
     } else {
@@ -253,19 +255,17 @@ textwindows int __sig_add(int tid, int sig, int si_code) {
 
 /**
  * Checks for unblocked signals and delivers them on New Technology.
- *
- * @param restartable is for functions like read() but not poll()
  * @return true if EINTR should be returned by caller
  * @note called from main thread
  * @threadsafe
  */
-textwindows bool __sig_check(bool restartable) {
+textwindows bool __sig_check(int sigops) {
   unsigned rva;
   bool delivered;
   struct Signal *sig;
   delivered = false;
-  while ((sig = __sig_remove())) {
-    delivered |= __sig_handle(restartable, sig->sig, sig->si_code, 0);
+  while ((sig = __sig_remove(sigops))) {
+    delivered |= __sig_handle(sigops, sig->sig, sig->si_code, 0);
     __sig_free(sig);
   }
   return delivered;
diff --git a/libc/calls/clock_nanosleep-nt.c b/libc/calls/clock_nanosleep-nt.c
index 5bdac12ae..bc87ab855 100644
--- a/libc/calls/clock_nanosleep-nt.c
+++ b/libc/calls/clock_nanosleep-nt.c
@@ -34,7 +34,7 @@ textwindows int sys_clock_nanosleep_nt(int clock, int flags,
     for (;;) {
       if (sys_clock_gettime_nt(clock, &now)) return -1;
       if (timespec_cmp(now, abs) >= 0) return 0;
-      if (_check_interrupts(false, g_fds.p)) return -1;
+      if (_check_interrupts(0, g_fds.p)) return -1;
       SleepEx(MIN(__SIG_POLLING_INTERVAL_MS,
                   timespec_tomillis(timespec_sub(abs, now))),
               false);
@@ -45,7 +45,7 @@ textwindows int sys_clock_nanosleep_nt(int clock, int flags,
     for (;;) {
       sys_clock_gettime_nt(clock, &now);
       if (timespec_cmp(now, abs) >= 0) return 0;
-      if (_check_interrupts(false, g_fds.p)) {
+      if (_check_interrupts(0, g_fds.p)) {
         if (rem) *rem = timespec_sub(abs, now);
         return -1;
       }
diff --git a/libc/calls/fdatasync-nt.c b/libc/calls/fdatasync-nt.c
index 2cc02a178..7df26c05d 100644
--- a/libc/calls/fdatasync-nt.c
+++ b/libc/calls/fdatasync-nt.c
@@ -23,6 +23,6 @@
 textwindows int sys_fdatasync_nt(int fd) {
   // TODO(jart): what should we do with worker pipes?
   if (!__isfdkind(fd, kFdFile)) return ebadf();
-  if (_check_interrupts(false, 0)) return -1;
+  if (_check_interrupts(0, 0)) return -1;
   return FlushFileBuffers(g_fds.p[fd].handle) ? 0 : -1;
 }
diff --git a/libc/calls/internal.h b/libc/calls/internal.h
index 7025c6ccb..69ec14cfe 100644
--- a/libc/calls/internal.h
+++ b/libc/calls/internal.h
@@ -7,6 +7,9 @@
 
 #define kSigactionMinRva 8 /* >SIG_{ERR,DFL,IGN,...} */
 
+#define kSigOpRestartable 1
+#define kSigOpNochld      2
+
 #if !(__ASSEMBLER__ + __LINKER__ + 0)
 COSMOPOLITAN_C_START_
 
@@ -35,7 +38,7 @@ forceinline bool __isfdkind(int fd, int kind) {
 }
 
 int sys_close_nt(struct Fd *, int);
-int _check_interrupts(bool, struct Fd *);
+int _check_interrupts(int, struct Fd *);
 int sys_openat_metal(int, const char *, int, unsigned);
 
 COSMOPOLITAN_C_END_
diff --git a/libc/calls/interrupts-nt.c b/libc/calls/interrupts-nt.c
index 4757e74df..4341d37bc 100644
--- a/libc/calls/interrupts-nt.c
+++ b/libc/calls/interrupts-nt.c
@@ -33,7 +33,7 @@
 #include "libc/thread/thread.h"
 #include "libc/thread/tls.h"
 
-textwindows int _check_interrupts(bool restartable, struct Fd *fd) {
+textwindows int _check_interrupts(int sigops, struct Fd *fd) {
   int e, rc;
   e = errno;
   if (_weaken(pthread_testcancel_np) &&
@@ -45,14 +45,14 @@ textwindows int _check_interrupts(bool restartable, struct Fd *fd) {
     _weaken(_check_sigalrm)();
   }
   if (!__tls_enabled || !(__get_tls()->tib_flags & TIB_FLAG_TIME_CRITICAL)) {
-    if (_weaken(_check_sigchld)) {
+    if (!(sigops & kSigOpNochld) && _weaken(_check_sigchld)) {
       _weaken(_check_sigchld)();
     }
     if (fd && _weaken(_check_sigwinch)) {
       _weaken(_check_sigwinch)(fd);
     }
   }
-  if (_weaken(__sig_check) && _weaken(__sig_check)(restartable)) {
+  if (_weaken(__sig_check) && _weaken(__sig_check)(sigops)) {
     return eintr();
   }
   errno = e;
diff --git a/libc/calls/kill-nt.c b/libc/calls/kill-nt.c
index 1aef29ce7..b60ef1522 100644
--- a/libc/calls/kill-nt.c
+++ b/libc/calls/kill-nt.c
@@ -88,10 +88,9 @@ textwindows int sys_kill_nt(int pid, int sig) {
     // since windows can't execve we need to kill the grandchildren
     // TODO(jart): should we just kill the whole tree too? there's
     //             no obvious way to tell if it's the execve shell
-    int64_t hSnap, hProc, hChildProc;
     struct NtProcessEntry32 pe = {.dwSize = sizeof(struct NtProcessEntry32)};
     ntpid = GetProcessId(g_fds.p[pid].handle);
-    hSnap = CreateToolhelp32Snapshot(kNtTh32csSnapprocess, 0);
+    int64_t hSnap = CreateToolhelp32Snapshot(kNtTh32csSnapprocess, 0);
     if (Process32First(hSnap, &pe)) {
       do {
         if (pe.th32ParentProcessID == ntpid) {
@@ -102,6 +101,7 @@ textwindows int sys_kill_nt(int pid, int sig) {
         }
       } while (Process32Next(hSnap, &pe));
     }
+    CloseHandle(hSnap);
     ok = TerminateProcess(g_fds.p[pid].handle, 128 + sig);
     if (!ok && GetLastError() == kNtErrorAccessDenied) ok = true;
     return 0;
diff --git a/libc/calls/pause-nt.c b/libc/calls/pause-nt.c
index b1b7d2300..6013c870f 100644
--- a/libc/calls/pause-nt.c
+++ b/libc/calls/pause-nt.c
@@ -28,7 +28,7 @@
 textwindows int sys_pause_nt(void) {
   for (;;) {
 
-    if (_check_interrupts(false, g_fds.p)) {
+    if (_check_interrupts(0, g_fds.p)) {
       return -1;
     }
 
diff --git a/libc/calls/poll-nt.c b/libc/calls/poll-nt.c
index 05b85223b..beb1fd66c 100644
--- a/libc/calls/poll-nt.c
+++ b/libc/calls/poll-nt.c
@@ -65,7 +65,7 @@ textwindows int sys_poll_nt(struct pollfd *fds, uint64_t nfds, uint64_t *ms,
   if (sigmask) {
     __sig_mask(SIG_SETMASK, sigmask, &oldmask);
   }
-  if ((rc = _check_interrupts(false, g_fds.p))) {
+  if ((rc = _check_interrupts(0, g_fds.p))) {
     goto ReturnPath;
   }
 
@@ -190,7 +190,7 @@ textwindows int sys_poll_nt(struct pollfd *fds, uint64_t nfds, uint64_t *ms,
     }
     // otherwise loop limitlessly for timeout to elapse while
     // checking for signal delivery interrupts, along the way
-    if ((rc = _check_interrupts(false, g_fds.p))) {
+    if ((rc = _check_interrupts(0, g_fds.p))) {
       goto ReturnPath;
     }
   }
diff --git a/libc/calls/read-nt.c b/libc/calls/read-nt.c
index 4f965661a..ba0ecdd3d 100644
--- a/libc/calls/read-nt.c
+++ b/libc/calls/read-nt.c
@@ -53,7 +53,7 @@ static textwindows ssize_t sys_read_nt_impl(struct Fd *fd, void *data,
       if (fd->flags & O_NONBLOCK) {
         return eagain();
       }
-      if (_check_interrupts(true, g_fds.p)) {
+      if (_check_interrupts(kSigOpRestartable, g_fds.p)) {
         POLLTRACE("sys_read_nt interrupted");
         return -1;
       }
@@ -105,7 +105,7 @@ textwindows ssize_t sys_read_nt(struct Fd *fd, const struct iovec *iov,
   uint32_t size;
   size_t i, total;
   if (opt_offset < -1) return einval();
-  if (_check_interrupts(true, fd)) return -1;
+  if (_check_interrupts(kSigOpRestartable, fd)) return -1;
   while (iovlen && !iov[0].iov_len) iov++, iovlen--;
   if (iovlen) {
     for (total = i = 0; i < iovlen; ++i) {
diff --git a/libc/calls/sig.internal.h b/libc/calls/sig.internal.h
index 508e8568b..bfd15461b 100644
--- a/libc/calls/sig.internal.h
+++ b/libc/calls/sig.internal.h
@@ -28,8 +28,8 @@ struct Signals {
 extern struct Signals __sig;
 extern atomic_long __sig_count;
 
-bool __sig_check(bool);
-bool __sig_handle(bool, int, int, ucontext_t *);
+bool __sig_check(int);
+bool __sig_handle(int, int, int, ucontext_t *);
 int __sig_add(int, int, int);
 int __sig_mask(int, const sigset_t *, sigset_t *);
 int __sig_raise(int, int);
diff --git a/libc/calls/sigchld-nt.c b/libc/calls/sigchld-nt.c
index 06f5eedbb..92a16195f 100644
--- a/libc/calls/sigchld-nt.c
+++ b/libc/calls/sigchld-nt.c
@@ -24,7 +24,6 @@
 #include "libc/calls/struct/siginfo.h"
 #include "libc/calls/syscall_support-nt.internal.h"
 #include "libc/dce.h"
-#include "libc/intrin/strace.internal.h"
 #include "libc/nt/enum/wait.h"
 #include "libc/nt/runtime.h"
 #include "libc/nt/synchronization.h"
@@ -34,40 +33,37 @@
 
 #ifdef __x86_64__
 
+static textwindows bool CheckForExitedChildProcess(void) {
+  int pids[64];
+  uint32_t i, n;
+  int64_t handles[64];
+  if (!(n = __sample_pids(pids, handles, true))) return false;
+  i = WaitForMultipleObjects(n, handles, false, 0);
+  if (i == kNtWaitFailed) return false;
+  if (i == kNtWaitTimeout) return false;
+  if ((__sighandrvas[SIGCHLD] >= kSigactionMinRva) &&
+      (__sighandflags[SIGCHLD] & SA_NOCLDWAIT)) {
+    CloseHandle(handles[i]);
+    __releasefd(pids[i]);
+  } else {
+    g_fds.p[pids[i]].zombie = true;
+  }
+  return true;
+}
+
 /**
  * Checks to see if SIGCHLD should be raised on Windows.
  * @return true if a signal was raised
  * @note yoinked by fork-nt.c
  */
-void _check_sigchld(void) {
-  siginfo_t si;
-  int pids[64];
-  uint32_t i, n;
-  int64_t handles[64];
+textwindows void _check_sigchld(void) {
+  bool should_signal;
   __fds_lock();
-  n = __sample_pids(pids, handles, true);
+  should_signal = CheckForExitedChildProcess();
   __fds_unlock();
-  if (!n) return;
-  i = WaitForMultipleObjects(n, handles, false, 0);
-  if (i == kNtWaitTimeout) return;
-  if (i == kNtWaitFailed) {
-    NTTRACE("%s failed %u", "WaitForMultipleObjects", GetLastError());
-    return;
+  if (should_signal) {
+    __sig_add(0, SIGCHLD, CLD_EXITED);
   }
-  if (__sighandrvas[SIGCHLD] == (intptr_t)SIG_IGN ||
-      __sighandrvas[SIGCHLD] == (intptr_t)SIG_DFL) {
-    NTTRACE("new zombie fd=%d handle=%ld", pids[i], handles[i]);
-    return;
-  }
-  if (__sighandflags[SIGCHLD] & SA_NOCLDWAIT) {
-    NTTRACE("SIGCHILD SA_NOCLDWAIT fd=%d handle=%ld", pids[i], handles[i]);
-    CloseHandle(handles[i]);
-    __releasefd(pids[i]);
-  }
-  __fds_lock();
-  g_fds.p[pids[i]].zombie = true;
-  __fds_unlock();
-  __sig_add(0, SIGCHLD, CLD_EXITED);
 }
 
 #endif /* __x86_64__ */
diff --git a/libc/calls/sigprocmask.c b/libc/calls/sigprocmask.c
index 268490e59..389ab619d 100644
--- a/libc/calls/sigprocmask.c
+++ b/libc/calls/sigprocmask.c
@@ -61,7 +61,7 @@ int sigprocmask(int how, const sigset_t *opt_set, sigset_t *opt_out_oldset) {
   } else if (IsMetal() || IsWindows()) {
     rc = __sig_mask(how, opt_set, &old);
     if (_weaken(__sig_check)) {
-      _weaken(__sig_check)(true);
+      _weaken(__sig_check)(kSigOpRestartable);
     }
   } else {
     rc = sys_sigprocmask(how, opt_set, opt_out_oldset ? &old : 0);
diff --git a/libc/calls/sigsuspend.c b/libc/calls/sigsuspend.c
index 82457d0a5..e8f53d9a9 100644
--- a/libc/calls/sigsuspend.c
+++ b/libc/calls/sigsuspend.c
@@ -77,7 +77,7 @@ int sigsuspend(const sigset_t *ignore) {
       long totoms = 0;
 #endif
       do {
-        if ((rc = _check_interrupts(false, g_fds.p))) {
+        if ((rc = _check_interrupts(0, g_fds.p))) {
           break;
         }
         if (SleepEx(__SIG_POLLING_INTERVAL_MS, true) == kNtWaitIoCompletion) {
diff --git a/libc/calls/tcdrain.c b/libc/calls/tcdrain.c
index aa4d774ce..a077850f6 100644
--- a/libc/calls/tcdrain.c
+++ b/libc/calls/tcdrain.c
@@ -32,7 +32,7 @@
 
 static dontinline textwindows int sys_tcdrain_nt(int fd) {
   if (!__isfdopen(fd)) return ebadf();
-  if (_check_interrupts(false, g_fds.p)) return -1;
+  if (_check_interrupts(0, g_fds.p)) return -1;
   if (!FlushFileBuffers(g_fds.p[fd].handle)) return __winerr();
   return 0;
 }
diff --git a/libc/calls/wait4-nt.c b/libc/calls/wait4-nt.c
index 8ca5a5bdc..9f8c557e2 100644
--- a/libc/calls/wait4-nt.c
+++ b/libc/calls/wait4-nt.c
@@ -24,16 +24,19 @@
 #include "libc/calls/struct/rusage.h"
 #include "libc/calls/syscall_support-nt.internal.h"
 #include "libc/fmt/conv.h"
+#include "libc/intrin/kprintf.h"
 #include "libc/intrin/strace.internal.h"
 #include "libc/macros.internal.h"
 #include "libc/nt/accounting.h"
 #include "libc/nt/enum/accessmask.h"
 #include "libc/nt/enum/processaccess.h"
 #include "libc/nt/enum/status.h"
+#include "libc/nt/enum/th32cs.h"
 #include "libc/nt/enum/wait.h"
 #include "libc/nt/process.h"
 #include "libc/nt/runtime.h"
 #include "libc/nt/struct/filetime.h"
+#include "libc/nt/struct/processentry32.h"
 #include "libc/nt/struct/processmemorycounters.h"
 #include "libc/nt/synchronization.h"
 #include "libc/runtime/runtime.h"
@@ -46,105 +49,101 @@
 
 #ifdef __x86_64__
 
-static textwindows int sys_wait4_nt_impl(int pid, int *opt_out_wstatus,
+static textwindows void AddProcessStats(int64_t h, struct rusage *ru) {
+  struct NtProcessMemoryCountersEx memcount = {
+      .cb = sizeof(struct NtProcessMemoryCountersEx)};
+  if (GetProcessMemoryInfo(h, &memcount, sizeof(memcount))) {
+    ru->ru_maxrss = MAX(ru->ru_maxrss, memcount.PeakWorkingSetSize / 1024);
+    ru->ru_majflt += memcount.PageFaultCount;
+  } else {
+    STRACE("%s failed %u", "GetProcessMemoryInfo", GetLastError());
+  }
+  struct NtFileTime createfiletime, exitfiletime;
+  struct NtFileTime kernelfiletime, userfiletime;
+  if (GetProcessTimes(h, &createfiletime, &exitfiletime, &kernelfiletime,
+                      &userfiletime)) {
+    ru->ru_utime = timeval_add(
+        ru->ru_utime, WindowsDurationToTimeVal(ReadFileTime(userfiletime)));
+    ru->ru_stime = timeval_add(
+        ru->ru_stime, WindowsDurationToTimeVal(ReadFileTime(kernelfiletime)));
+  } else {
+    STRACE("%s failed %u", "GetProcessTimes", GetLastError());
+  }
+}
+
+static textwindows int sys_wait4_nt_impl(int *pid, int *opt_out_wstatus,
                                          int options,
                                          struct rusage *opt_out_rusage) {
   int64_t handle;
   int rc, pids[64];
   int64_t handles[64];
   uint32_t dwExitCode;
-  bool shouldinterrupt;
-  uint32_t i, j, base, count, timeout;
-  struct NtProcessMemoryCountersEx memcount;
-  struct NtFileTime createfiletime, exitfiletime, kernelfiletime, userfiletime;
-  if (_check_interrupts(true, g_fds.p)) return -1;
-  __fds_lock();
-  if (pid != -1 && pid != 0) {
-    if (pid < 0) {
-      /* XXX: this is sloppy */
-      pid = -pid;
+  uint32_t i, j, count;
+  if (*pid != -1 && *pid != 0) {
+    if (*pid < 0) {
+      // XXX: this is sloppy
+      *pid = -*pid;
     }
-    if (!__isfdkind(pid, kFdProcess)) {
-      /* XXX: this is sloppy (see fork-nt.c) */
-      if (!__isfdopen(pid) &&
+    if (!__isfdkind(*pid, kFdProcess)) {
+      // XXX: this is sloppy (see fork-nt.c)
+      if (!__isfdopen(*pid) &&
           (handle = OpenProcess(kNtSynchronize | kNtProcessQueryInformation,
-                                true, pid))) {
-        if ((pid = __reservefd_unlocked(-1)) != -1) {
-          g_fds.p[pid].kind = kFdProcess;
-          g_fds.p[pid].handle = handle;
-          g_fds.p[pid].flags = O_CLOEXEC;
+                                true, *pid))) {
+        if ((*pid = __reservefd_unlocked(-1)) != -1) {
+          g_fds.p[*pid].kind = kFdProcess;
+          g_fds.p[*pid].handle = handle;
+          g_fds.p[*pid].flags = O_CLOEXEC;
         } else {
-          __fds_unlock();
           CloseHandle(handle);
           return echild();
         }
       } else {
-        __fds_unlock();
         return echild();
       }
     }
-    handles[0] = g_fds.p[pid].handle;
-    pids[0] = pid;
+    handles[0] = g_fds.p[*pid].handle;
+    pids[0] = *pid;
     count = 1;
   } else {
     count = __sample_pids(pids, handles, false);
     if (!count) {
-      __fds_unlock();
       return echild();
     }
   }
-  __fds_unlock();
-  for (;;) {
-    if (_check_interrupts(true, 0)) return -1;
-    dwExitCode = kNtStillActive;
-    if (options & WNOHANG) {
-      i = WaitForMultipleObjects(count, handles, false, 0);
-      if (i == kNtWaitTimeout) {
-        return 0;
-      }
-    } else {
-      i = WaitForMultipleObjects(count, handles, false,
-                                 __SIG_POLLING_INTERVAL_MS);
-      if (i == kNtWaitTimeout) {
-        continue;
-      }
+  dwExitCode = kNtStillActive;
+  if (options & WNOHANG) {
+    i = WaitForMultipleObjects(count, handles, false, 0);
+    if (i == kNtWaitTimeout) {
+      return 0;
     }
-    if (i == kNtWaitFailed) {
-      STRACE("%s failed %u", "WaitForMultipleObjects", GetLastError());
-      return __winerr();
+  } else {
+    i = WaitForMultipleObjects(count, handles, false,
+                               __SIG_POLLING_INTERVAL_MS);
+    if (i == kNtWaitTimeout) {
+      return -2;
     }
-    if (!GetExitCodeProcess(handles[i], &dwExitCode)) {
-      STRACE("%s failed %u", "GetExitCodeProcess", GetLastError());
-      return __winerr();
-    }
-    if (dwExitCode == kNtStillActive) continue;
-    if (opt_out_wstatus) { /* @see WEXITSTATUS() */
-      *opt_out_wstatus = (dwExitCode & 0xff) << 8;
-    }
-    if (opt_out_rusage) {
-      bzero(opt_out_rusage, sizeof(*opt_out_rusage));
-      bzero(&memcount, sizeof(memcount));
-      memcount.cb = sizeof(struct NtProcessMemoryCountersEx);
-      if (GetProcessMemoryInfo(handles[i], &memcount, sizeof(memcount))) {
-        opt_out_rusage->ru_maxrss = memcount.PeakWorkingSetSize / 1024;
-        opt_out_rusage->ru_majflt = memcount.PageFaultCount;
-      } else {
-        STRACE("%s failed %u", "GetProcessMemoryInfo", GetLastError());
-      }
-      if (GetProcessTimes(handles[i], &createfiletime, &exitfiletime,
-                          &kernelfiletime, &userfiletime)) {
-        opt_out_rusage->ru_utime =
-            WindowsDurationToTimeVal(ReadFileTime(userfiletime));
-        opt_out_rusage->ru_stime =
-            WindowsDurationToTimeVal(ReadFileTime(kernelfiletime));
-      } else {
-        STRACE("%s failed %u", "GetProcessTimes", GetLastError());
-      }
-    }
-    CloseHandle(handles[i]);
-    __releasefd(pids[i]);
-    return pids[i];
   }
+  if (i == kNtWaitFailed) {
+    STRACE("%s failed %u", "WaitForMultipleObjects", GetLastError());
+    return __winerr();
+  }
+  if (!GetExitCodeProcess(handles[i], &dwExitCode)) {
+    STRACE("%s failed %u", "GetExitCodeProcess", GetLastError());
+    return __winerr();
+  }
+  if (dwExitCode == kNtStillActive) {
+    return -2;
+  }
+  if (opt_out_wstatus) {  // @see WEXITSTATUS()
+    *opt_out_wstatus = (dwExitCode & 0xff) << 8;
+  }
+  if (opt_out_rusage) {
+    bzero(opt_out_rusage, sizeof(*opt_out_rusage));
+    AddProcessStats(handles[i], opt_out_rusage);
+  }
+  CloseHandle(handles[i]);
+  __releasefd(pids[i]);
+  return pids[i];
 }
 
 textwindows int sys_wait4_nt(int pid, int *opt_out_wstatus, int options,
@@ -153,7 +152,13 @@ textwindows int sys_wait4_nt(int pid, int *opt_out_wstatus, int options,
   sigset_t oldmask, mask = {0};
   sigaddset(&mask, SIGCHLD);
   __sig_mask(SIG_BLOCK, &mask, &oldmask);
-  rc = sys_wait4_nt_impl(pid, opt_out_wstatus, options, opt_out_rusage);
+  do {
+    rc = _check_interrupts(kSigOpRestartable | kSigOpNochld, 0);
+    if (rc == -1) break;
+    __fds_lock();
+    rc = sys_wait4_nt_impl(&pid, opt_out_wstatus, options, opt_out_rusage);
+    __fds_unlock();
+  } while (rc == -2);
   __sig_mask(SIG_SETMASK, &oldmask, 0);
   return rc;
 }
diff --git a/libc/calls/wait4.c b/libc/calls/wait4.c
index 5cfcf209b..cf2bff5dd 100644
--- a/libc/calls/wait4.c
+++ b/libc/calls/wait4.c
@@ -55,7 +55,9 @@ int wait4(int pid, int *opt_out_wstatus, int options,
   } else {
     rc = sys_wait4_nt(pid, &ws, options, opt_out_rusage);
   }
-  if (rc != -1 && opt_out_wstatus) *opt_out_wstatus = ws;
+  if (rc != -1 && opt_out_wstatus) {
+    *opt_out_wstatus = ws;
+  }
 
   END_CANCELLATION_POINT;
   STRACE("wait4(%d, [%#x], %d, %p) → %d% m", pid, ws, options, opt_out_rusage,
diff --git a/libc/runtime/vfork.S b/libc/runtime/vfork.S
index d72849820..80f1bb79d 100644
--- a/libc/runtime/vfork.S
+++ b/libc/runtime/vfork.S
@@ -56,8 +56,18 @@
 vfork:
 	.ftrace2
 
+#ifdef __SANITIZE_ADDRESS__
+	jmp	fork
+#endif
+
 #ifdef __x86_64__
 
+#if SupportsWindows()
+//	these platforms disagree with vfork
+	testb	$_HOSTXNU|_HOSTOPENBSD|_HOSTWINDOWS,__hostos(%rip)
+	jnz	fork
+#endif
+
 #if !IsTiny()
 	push	%rbp
 	mov	%rsp,%rbp
@@ -69,21 +79,6 @@ vfork:
 	pop	%rbp
 #endif
 	mov	%fs:0,%r9		// get thread information block
-#if SupportsWindows()
-	testb	IsWindows()
-	jnz	6f			// and we're lucky to have that
-#endif
-#ifdef __SANITIZE_ADDRESS__
-	jmp	5f			// TODO: asan and vfork don't mix?
-#endif
-#if SupportsXnu()
-	testb	IsXnu()
-	jnz	5f
-#endif
-#if SupportsOpenbsd()
-	testb	IsOpenbsd()
-	jnz	5f			// fake vfork plus msyscall issues
-#endif
 	mov	0x3c(%r9),%r8d		// avoid question of @vforksafe errno
 	pop	%rsi			// saves return address in a register
 	mov	__NR_vfork(%rip),%eax
@@ -106,29 +101,6 @@ vfork:
 	ret
 .Lpar:	andb	$~TIB_FLAG_VFORKED,0x40(%r9)
 	ret
-#if SupportsXnu() || SupportsOpenbsd() || defined(__SANITIZE_ADDRESS__)
-5:	push	%rbp
-	mov	%rsp,%rbp
-	push	%r9
-	push	%r9
-	call	sys_fork
-	pop	%r9
-	pop	%r9
-	pop	%rbp
-	jmp	1b
-#endif
-#if SupportsWindows()
-6:	push	%rbp
-	mov	%rsp,%rbp
-	push	%r9
-	push	%r9
-	xor	%edi,%edi		// dwCreationFlags
-	call	sys_fork_nt
-	pop	%r9
-	pop	%r9
-	pop	%rbp
-	jmp	1b
-#endif
 
 #elif defined(__aarch64__)
 
diff --git a/libc/sock/accept-nt.c b/libc/sock/accept-nt.c
index 2be018140..60d9c16fa 100644
--- a/libc/sock/accept-nt.c
+++ b/libc/sock/accept-nt.c
@@ -67,7 +67,7 @@ textwindows int sys_accept_nt(struct Fd *fd, struct sockaddr_storage *addr,
   if (!AcceptEx(fd->handle, handle, &buffer, 0, sizeof(buffer.local),
                 sizeof(buffer.remote), &bytes_received, &overlapped)) {
     sockfd = (struct SockFd *)fd->extra;
-    if (__wsablock(fd, &overlapped, &completion_flags, true,
+    if (__wsablock(fd, &overlapped, &completion_flags, kSigOpRestartable,
                    sockfd->rcvtimeo) == -1) {
       WSACloseEvent(overlapped.hEvent);
       __sys_closesocket_nt(handle);
diff --git a/libc/sock/recv-nt.c b/libc/sock/recv-nt.c
index 86e8c8806..7705a51e7 100644
--- a/libc/sock/recv-nt.c
+++ b/libc/sock/recv-nt.c
@@ -47,7 +47,8 @@ textwindows ssize_t sys_recv_nt(struct Fd *fd, const struct iovec *iov,
   } else {
     errno = err;
     sockfd = (struct SockFd *)fd->extra;
-    rc = __wsablock(fd, &overlapped, &flags, true, sockfd->rcvtimeo);
+    rc = __wsablock(fd, &overlapped, &flags, kSigOpRestartable,
+                    sockfd->rcvtimeo);
   }
   unassert(WSACloseEvent(overlapped.hEvent));
   return rc;
diff --git a/libc/sock/recvfrom-nt.c b/libc/sock/recvfrom-nt.c
index c1346e796..4c4e36b1a 100644
--- a/libc/sock/recvfrom-nt.c
+++ b/libc/sock/recvfrom-nt.c
@@ -46,7 +46,8 @@ textwindows ssize_t sys_recvfrom_nt(struct Fd *fd, const struct iovec *iov,
   } else {
     errno = err;
     sockfd = (struct SockFd *)fd->extra;
-    rc = __wsablock(fd, &overlapped, &flags, true, sockfd->rcvtimeo);
+    rc = __wsablock(fd, &overlapped, &flags, kSigOpRestartable,
+                    sockfd->rcvtimeo);
   }
   WSACloseEvent(overlapped.hEvent);
   return rc;
diff --git a/libc/sock/send-nt.c b/libc/sock/send-nt.c
index 479fcfad5..0bc391b21 100644
--- a/libc/sock/send-nt.c
+++ b/libc/sock/send-nt.c
@@ -41,7 +41,8 @@ textwindows ssize_t sys_send_nt(int fd, const struct iovec *iov, size_t iovlen,
     }
   } else {
     sockfd = (struct SockFd *)g_fds.p[fd].extra;
-    rc = __wsablock(g_fds.p + fd, &overlapped, &flags, true, sockfd->sndtimeo);
+    rc = __wsablock(g_fds.p + fd, &overlapped, &flags, kSigOpRestartable,
+                    sockfd->sndtimeo);
   }
   WSACloseEvent(overlapped.hEvent);
   return rc;
diff --git a/libc/sock/sendfile.c b/libc/sock/sendfile.c
index 1edc8eede..8724589cf 100644
--- a/libc/sock/sendfile.c
+++ b/libc/sock/sendfile.c
@@ -57,7 +57,7 @@ static textwindows int SendfileBlock(int64_t handle,
       NTTRACE("WSAWaitForMultipleEvents failed %lm");
       return __winsockerr();
     } else if (i == kNtWaitTimeout || i == kNtWaitIoCompletion) {
-      if (_check_interrupts(true, g_fds.p)) return -1;
+      if (_check_interrupts(kSigOpRestartable, g_fds.p)) return -1;
 #if _NTTRACE
       POLLTRACE("WSAWaitForMultipleEvents...");
 #endif
diff --git a/libc/sock/sendto-nt.c b/libc/sock/sendto-nt.c
index 875ee284d..f9182a993 100644
--- a/libc/sock/sendto-nt.c
+++ b/libc/sock/sendto-nt.c
@@ -42,7 +42,8 @@ textwindows ssize_t sys_sendto_nt(int fd, const struct iovec *iov,
     }
   } else {
     sockfd = (struct SockFd *)g_fds.p[fd].extra;
-    rc = __wsablock(g_fds.p + fd, &overlapped, &flags, true, sockfd->sndtimeo);
+    rc = __wsablock(g_fds.p + fd, &overlapped, &flags, kSigOpRestartable,
+                    sockfd->sndtimeo);
   }
   WSACloseEvent(overlapped.hEvent);
   return rc;
diff --git a/libc/sock/syscall_fd.internal.h b/libc/sock/syscall_fd.internal.h
index f3907bbcb..b6c66d039 100644
--- a/libc/sock/syscall_fd.internal.h
+++ b/libc/sock/syscall_fd.internal.h
@@ -20,7 +20,7 @@ int sys_shutdown_nt(struct Fd *, int);
 ssize_t sys_recv_nt(struct Fd *, const struct iovec *, size_t, uint32_t);
 ssize_t sys_recvfrom_nt(struct Fd *, const struct iovec *, size_t, uint32_t,
                         void *, uint32_t *);
-int __wsablock(struct Fd *, struct NtOverlapped *, uint32_t *, bool, uint32_t);
+int __wsablock(struct Fd *, struct NtOverlapped *, uint32_t *, int, uint32_t);
 
 COSMOPOLITAN_C_END_
 #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
diff --git a/libc/sock/wsablock.c b/libc/sock/wsablock.c
index e343ff0f6..61a29110f 100644
--- a/libc/sock/wsablock.c
+++ b/libc/sock/wsablock.c
@@ -37,8 +37,7 @@
 #include "libc/sysv/errfuns.h"
 
 textwindows int __wsablock(struct Fd *fd, struct NtOverlapped *overlapped,
-                           uint32_t *flags, bool restartable,
-                           uint32_t timeout) {
+                           uint32_t *flags, int sigops, uint32_t timeout) {
   int e, rc;
   uint32_t i, got;
   if (WSAGetLastError() != kNtErrorIoPending) {
@@ -51,7 +50,7 @@ textwindows int __wsablock(struct Fd *fd, struct NtOverlapped *overlapped,
              WSAGetLastError() == kNtErrorNotFound);
     errno = e;
   } else {
-    if (_check_interrupts(restartable, g_fds.p)) {
+    if (_check_interrupts(sigops, g_fds.p)) {
       return -1;
     }
   }
@@ -62,7 +61,7 @@ textwindows int __wsablock(struct Fd *fd, struct NtOverlapped *overlapped,
       NTTRACE("WSAWaitForMultipleEvents failed %lm");
       return __winsockerr();
     } else if (i == kNtWaitTimeout || i == kNtWaitIoCompletion) {
-      if (_check_interrupts(restartable, g_fds.p)) {
+      if (_check_interrupts(sigops, g_fds.p)) {
         return -1;
       }
       if (timeout) {
diff --git a/libc/stdio/getrandom.c b/libc/stdio/getrandom.c
index 4e93be046..697cca69a 100644
--- a/libc/stdio/getrandom.c
+++ b/libc/stdio/getrandom.c
@@ -176,7 +176,9 @@ static ssize_t GetDevUrandom(char *p, size_t n) {
 ssize_t __getrandom(void *p, size_t n, unsigned f) {
   ssize_t rc;
   if (IsWindows()) {
-    if (_check_interrupts(true, 0)) return -1;
+    if (_check_interrupts(kSigOpRestartable, 0)) {
+      return -1;
+    }
     rc = RtlGenRandom(p, n) ? n : __winerr();
   } else if (have_getrandom) {
     if (IsXnu() || IsOpenbsd()) {
diff --git a/test/libc/stdio/fds_torture_test.c b/test/libc/stdio/fds_torture_test.c
new file mode 100644
index 000000000..72adeb425
--- /dev/null
+++ b/test/libc/stdio/fds_torture_test.c
@@ -0,0 +1,56 @@
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8                                :vi│
+╞══════════════════════════════════════════════════════════════════════════════╡
+│ Copyright 2023 Justine Alexandra Roberts Tunney                              │
+│                                                                              │
+│ Permission to use, copy, modify, and/or distribute this software for         │
+│ any purpose with or without fee is hereby granted, provided that the         │
+│ above copyright notice and this permission notice appear in all copies.      │
+│                                                                              │
+│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
+│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
+│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
+│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
+│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
+│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
+│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
+│ PERFORMANCE OF THIS SOFTWARE.                                                │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/calls/calls.h"
+#include "libc/runtime/runtime.h"
+#include "libc/runtime/syslib.internal.h"
+#include "libc/stdio/stdio.h"
+#include "libc/sysv/consts/o.h"
+#include "libc/testlib/testlib.h"
+#include "libc/thread/thread.h"
+
+#define THREADS 5
+#define FDS     10
+
+void *Torturer(void *arg) {
+  int i;
+  int fd[FDS];
+  for (i = 0; i < FDS; ++i) {
+    ASSERT_NE(-1, (fd[i] = open("/dev/null", O_WRONLY)));
+  }
+  for (i = 0; i < FDS; ++i) {
+    ASSERT_EQ(2, write(fd[i], "hi", 2));
+    if (!fork()) _Exit(0);
+    wait(0);
+  }
+  for (i = 0; i < FDS; ++i) {
+    ASSERT_NE(-1, close(fd[i]));
+  }
+  return 0;
+}
+
+TEST(fds_torture, test) {
+  int i;
+  pthread_t t[THREADS];
+  for (i = 0; i < THREADS; ++i) {
+    ASSERT_EQ(0, pthread_create(t + i, 0, Torturer, 0));
+  }
+  for (i = 0; i < THREADS; ++i) {
+    ASSERT_EQ(0, pthread_join(t[i], 0));
+  }
+}
diff --git a/test/libc/stdio/posix_spawn_test.c b/test/libc/stdio/posix_spawn_test.c
index 7e893a921..10d25fad1 100644
--- a/test/libc/stdio/posix_spawn_test.c
+++ b/test/libc/stdio/posix_spawn_test.c
@@ -28,14 +28,17 @@
 #include "libc/mem/gc.h"
 #include "libc/mem/mem.h"
 #include "libc/runtime/internal.h"
+#include "libc/runtime/memtrack.internal.h"
 #include "libc/runtime/runtime.h"
 #include "libc/stdio/stdio.h"
+#include "libc/str/str.h"
 #include "libc/sysv/consts/auxv.h"
 #include "libc/sysv/consts/o.h"
 #include "libc/sysv/consts/sig.h"
 #include "libc/testlib/ezbench.h"
 #include "libc/testlib/testlib.h"
 #include "libc/thread/thread.h"
+#include "third_party/nsync/mu.h"
 #ifdef __x86_64__
 
 char testlib_enable_tmp_setup_teardown;
diff --git a/test/libc/stdio/test.mk b/test/libc/stdio/test.mk
index b2ad38268..22eb3e939 100644
--- a/test/libc/stdio/test.mk
+++ b/test/libc/stdio/test.mk
@@ -43,6 +43,7 @@ TEST_LIBC_STDIO_DIRECTDEPS =					\
 	THIRD_PARTY_MBEDTLS					\
 	THIRD_PARTY_MUSL					\
 	THIRD_PARTY_TR						\
+	THIRD_PARTY_NSYNC					\
 	THIRD_PARTY_ZLIB					\
 	THIRD_PARTY_ZLIB_GZ
 
diff --git a/third_party/nsync/futex.c b/third_party/nsync/futex.c
index 326bbf049..c69e9405f 100644
--- a/third_party/nsync/futex.c
+++ b/third_party/nsync/futex.c
@@ -174,7 +174,7 @@ static int nsync_futex_wait_win32_ (atomic_int *w, int expect, char pshare, stru
 		deadline = timespec_max;
 	}
 
-	while (!(rc = _check_interrupts (false, 0))) {
+	while (!(rc = _check_interrupts (0, 0))) {
 		now = timespec_real ();
 		if (timespec_cmp (now, deadline) > 0) {
 			rc = etimedout();