Use CLK_TCK for clock_nanosleep() spin threshold

This more accurately reflects how the kernels actually implement this function and it most importantly avoids incurring startup latency.
2025-08-22 09:32:17 +00:00 · 2023-10-03 16:58:42 -07:00 · 2023-10-03 16:58:42 -07:00 · 695f74035d
commit 695f74035d
parent 11c18fa644
4 changed files with 108 additions and 55 deletions
--- a/libc/runtime/clktck.c
+++ b/libc/runtime/clktck.c
--- a/libc/calls/clock_nanosleep.c
+++ b/libc/calls/clock_nanosleep.c
@ -17,31 +17,20 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/assert.h"
-#include "libc/calls/asan.internal.h"
-#include "libc/calls/blockcancel.internal.h"
-#include "libc/calls/blocksigs.internal.h"
-#include "libc/calls/calls.h"
 #include "libc/calls/cp.internal.h"
-#include "libc/calls/state.internal.h"
-#include "libc/calls/struct/timespec.h"
 #include "libc/calls/struct/timespec.internal.h"
-#include "libc/calls/struct/timeval.h"
-#include "libc/calls/struct/timeval.internal.h"
 #include "libc/dce.h"
 #include "libc/errno.h"
 #include "libc/intrin/describeflags.internal.h"
 #include "libc/intrin/strace.internal.h"
 #include "libc/intrin/weaken.h"
-#include "libc/macros.internal.h"
-#include "libc/nt/ntdll.h"
+#include "libc/nexgen32e/yield.h"
+#include "libc/runtime/clktck.h"
 #include "libc/str/str.h"
 #include "libc/sysv/consts/clock.h"
 #include "libc/sysv/consts/timer.h"
 #include "libc/sysv/errfuns.h"
 #include "libc/thread/thread.h"
-#include "libc/thread/tls.h"
-
-static int64_t g_nanosleep_latency;

 static errno_t sys_clock_nanosleep(int clock, int flags,
                                   const struct timespec *req,
@ -65,33 +54,21 @@ static errno_t sys_clock_nanosleep(int clock, int flags,
    errno = e;
  }
  END_CANCELLATION_POINT;
+#if 0
+  STRACE("sys_clock_nanosleep(%s, %s, %s, [%s]) → %d% m",
+         DescribeClockName(clock), DescribeSleepFlags(flags),
+         DescribeTimespec(0, req), DescribeTimespec(rc, rem), rc);
+#endif
  return rc;
 }

-// determine sched_yield() vs. clock_nanosleep() threshold
-// 1ns sys_clock_nanosleep() on Windows takes milliseconds :'(
-// 1ns sys_clock_nanosleep() on Linux/FreeBSD takes tens of microseconds
-// 1ns sys_clock_nanosleep() on OpenBSD/NetBSD takes tens of milliseconds D:
-static struct timespec GetNanosleepLatency(void) {
-  errno_t rc;
-  int64_t nanos;
-  struct timespec x, y, w = {0, 1};
-  if (!(nanos = g_nanosleep_latency)) {
-    BLOCK_SIGNALS;
-    for (;;) {
-      unassert(!clock_gettime(CLOCK_REALTIME_PRECISE, &x));
-      rc = sys_clock_nanosleep(CLOCK_REALTIME, 0, &w, 0);
-      unassert(!rc || rc == EINTR);
-      if (!rc) {
-        unassert(!clock_gettime(CLOCK_REALTIME_PRECISE, &y));
-        nanos = timespec_tonanos(timespec_sub(y, x));
-        g_nanosleep_latency = nanos;
-        break;
-      }
-    }
-    ALLOW_SIGNALS;
-  }
-  return timespec_fromnanos(nanos);
+// determine how many nanoseconds it takes before clock_nanosleep()
+// starts sleeping with 90 percent accuracy; in other words when we
+// ask it to sleep 1 second, it (a) must NEVER sleep for less time,
+// and (b) does not sleep for longer than 1.1 seconds of time. what
+// ever is below that, thanks but no thanks, we'll just spin yield,
+static struct timespec GetNanosleepThreshold(void) {
+  return timespec_fromnanos(1000000000 / CLK_TCK);
 }

 static errno_t CheckCancel(void) {
@ -114,7 +91,7 @@ static errno_t SpinNanosleep(int clock, int flags, const struct timespec *req,
  }
  unassert(!clock_gettime(CLOCK_REALTIME, &start));
  for (;;) {
-    pthread_yield();
+    spin_yield();
    unassert(!clock_gettime(CLOCK_REALTIME, &now));
    if (flags & TIMER_ABSTIME) {
      if (timespec_cmp(now, *req) >= 0) {
@ -143,19 +120,13 @@ static errno_t SpinNanosleep(int clock, int flags, const struct timespec *req,
  }
 }

+// clock_gettime() takes a few nanoseconds but sys_clock_nanosleep()
+// is incapable of sleeping for less than a millisecond on platforms
+// such as windows and it's not much prettior on unix systems either
 static bool ShouldUseSpinNanosleep(int clock, int flags,
                                   const struct timespec *req) {
  errno_t e;
  struct timespec now;
-  if (IsWindows()) {
-    // Our spin technique here is intended to take advantage of the fact
-    // that sched_yield() takes about a hundred nanoseconds. But Windows
-    // SleepEx(0, 0) a.k.a. NtYieldExecution() takes a whole millisecond
-    // and it matters not whether our intent is to yielding or sleeping,
-    // since we use the SleepEx() function to implement both. Therefore,
-    // there's no reason to use SpinNanosleep() on Windows.
-    return false;
-  }
  if (clock != CLOCK_REALTIME &&          //
      clock != CLOCK_REALTIME_PRECISE &&  //
      clock != CLOCK_MONOTONIC &&         //
@ -164,13 +135,7 @@ static bool ShouldUseSpinNanosleep(int clock, int flags,
    return false;
  }
  if (!flags) {
-    return timespec_cmp(*req, GetNanosleepLatency()) < 0;
-  }
-  // We need a clock_gettime() system call to perform this check if the
-  // sleep request is an absolute timestamp. So we avoid doing that on
-  // systems where sleep latency isn't too outrageous.
-  if (timespec_cmp(GetNanosleepLatency(), timespec_fromnanos(50 * 1000)) < 0) {
-    return false;
+    return timespec_cmp(*req, GetNanosleepThreshold()) < 0;
  }
  e = errno;
  if (clock_gettime(clock, &now)) {
@ -179,7 +144,7 @@ static bool ShouldUseSpinNanosleep(int clock, int flags,
    return false;
  }
  return timespec_cmp(*req, now) < 0 ||
-         timespec_cmp(timespec_sub(*req, now), GetNanosleepLatency()) < 0;
+         timespec_cmp(timespec_sub(*req, now), GetNanosleepThreshold()) < 0;
 }

 /**
--- a/libc/nexgen32e/yield.h
+++ b/libc/nexgen32e/yield.h
@ -0,0 +1,16 @@
+#ifndef COSMOPOLITAN_LIBC_YIELD_H_
+#define COSMOPOLITAN_LIBC_YIELD_H_
+#if !(__ASSEMBLER__ + __LINKER__ + 0)
+#ifdef _COSMO_SOURCE
+
+static inline void spin_yield(void) {
+#if defined(__GNUC__) && defined(__aarch64__)
+  __asm__ volatile("yield");
+#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
+  __asm__ volatile("pause");
+#endif
+}
+
+#endif /* _COSMO_SOURCE */
+#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
+#endif /* COSMOPOLITAN_LIBC_YIELD_H_ */