Use CLK_TCK for clock_nanosleep() spin threshold

This more accurately reflects how the kernels actually implement this function and it most importantly avoids incurring startup latency.
2025-07-26 12:30:30 +00:00 · 2023-10-03 16:58:42 -07:00 · 2023-10-03 16:58:42 -07:00 · 695f74035d
commit 695f74035d
parent 11c18fa644
4 changed files with 108 additions and 55 deletions
--- a/libc/calls/clktck.c
+++ b/libc/calls/clktck.c
@ -0,0 +1,80 @@
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8                                :vi│
+╞══════════════════════════════════════════════════════════════════════════════╡
+│ Copyright 2021 Justine Alexandra Roberts Tunney                              │
+│                                                                              │
+│ Permission to use, copy, modify, and/or distribute this software for         │
+│ any purpose with or without fee is hereby granted, provided that the         │
+│ above copyright notice and this permission notice appear in all copies.      │
+│                                                                              │
+│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
+│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
+│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
+│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
+│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
+│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
+│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
+│ PERFORMANCE OF THIS SOFTWARE.                                                │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/runtime/clktck.h"
+#include "libc/calls/calls.h"
+#include "libc/dce.h"
+#include "libc/fmt/wintime.internal.h"
+#include "libc/intrin/getauxval.internal.h"
+#include "libc/runtime/runtime.h"
+#include "libc/sysv/consts/auxv.h"
+
+struct clockinfo_netbsd {
+  int32_t hz;       // number of clock ticks per second
+  int32_t tick;     // µs per tick
+  int32_t tickadj;  // skew rate for adjtime()
+  int32_t stathz;   // statistics clock frequency
+  int32_t profhz;   // profiling clock frequency
+};
+
+static int clk_tck;
+
+static dontinline int __clk_tck_init(void) {
+  int x;
+  int cmd[2];
+  size_t len;
+  struct clockinfo_netbsd clock;
+  if (IsWindows()) {
+    x = HECTONANOSECONDS;
+  } else if (IsXnu() || IsOpenbsd()) {
+    x = 100;
+  } else if (IsFreebsd()) {
+    x = 128;
+  } else if (IsNetbsd()) {
+    cmd[0] = 1;   // CTL_KERN
+    cmd[1] = 12;  // KERN_CLOCKRATE
+    len = sizeof(clock);
+    if (sys_sysctl(cmd, 2, &clock, &len, NULL, 0) != -1) {
+      x = clock.hz;
+    } else {
+      x = -1;
+    }
+  } else {
+    x = __getauxval(AT_CLKTCK).value;
+  }
+  if (x < 1) x = 100;
+  clk_tck = x;
+  return x;
+}
+
+/**
+ * Returns system clock ticks per second.
+ *
+ * The returned value is memoized. This function is intended to be
+ * used via the `CLK_TCK` macro wrapper.
+ *
+ * The returned value is always greater than zero. It's usually 100
+ * hertz which means each clock tick is 10 milliseconds long.
+ */
+int __clk_tck(void) {
+  if (clk_tck) {
+    return clk_tck;
+  } else {
+    return __clk_tck_init();
+  }
+}
--- a/libc/calls/clock_nanosleep.c
+++ b/libc/calls/clock_nanosleep.c
@ -17,31 +17,20 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/assert.h"
-#include "libc/calls/asan.internal.h"
-#include "libc/calls/blockcancel.internal.h"
-#include "libc/calls/blocksigs.internal.h"
-#include "libc/calls/calls.h"
 #include "libc/calls/cp.internal.h"
-#include "libc/calls/state.internal.h"
-#include "libc/calls/struct/timespec.h"
 #include "libc/calls/struct/timespec.internal.h"
-#include "libc/calls/struct/timeval.h"
-#include "libc/calls/struct/timeval.internal.h"
 #include "libc/dce.h"
 #include "libc/errno.h"
 #include "libc/intrin/describeflags.internal.h"
 #include "libc/intrin/strace.internal.h"
 #include "libc/intrin/weaken.h"
-#include "libc/macros.internal.h"
-#include "libc/nt/ntdll.h"
+#include "libc/nexgen32e/yield.h"
+#include "libc/runtime/clktck.h"
 #include "libc/str/str.h"
 #include "libc/sysv/consts/clock.h"
 #include "libc/sysv/consts/timer.h"
 #include "libc/sysv/errfuns.h"
 #include "libc/thread/thread.h"
-#include "libc/thread/tls.h"
-
-static int64_t g_nanosleep_latency;

 static errno_t sys_clock_nanosleep(int clock, int flags,
                                   const struct timespec *req,
@ -65,33 +54,21 @@ static errno_t sys_clock_nanosleep(int clock, int flags,
    errno = e;
  }
  END_CANCELLATION_POINT;
+#if 0
+  STRACE("sys_clock_nanosleep(%s, %s, %s, [%s]) → %d% m",
+         DescribeClockName(clock), DescribeSleepFlags(flags),
+         DescribeTimespec(0, req), DescribeTimespec(rc, rem), rc);
+#endif
  return rc;
 }

-// determine sched_yield() vs. clock_nanosleep() threshold
-// 1ns sys_clock_nanosleep() on Windows takes milliseconds :'(
-// 1ns sys_clock_nanosleep() on Linux/FreeBSD takes tens of microseconds
-// 1ns sys_clock_nanosleep() on OpenBSD/NetBSD takes tens of milliseconds D:
-static struct timespec GetNanosleepLatency(void) {
-  errno_t rc;
-  int64_t nanos;
-  struct timespec x, y, w = {0, 1};
-  if (!(nanos = g_nanosleep_latency)) {
-    BLOCK_SIGNALS;
-    for (;;) {
-      unassert(!clock_gettime(CLOCK_REALTIME_PRECISE, &x));
-      rc = sys_clock_nanosleep(CLOCK_REALTIME, 0, &w, 0);
-      unassert(!rc || rc == EINTR);
-      if (!rc) {
-        unassert(!clock_gettime(CLOCK_REALTIME_PRECISE, &y));
-        nanos = timespec_tonanos(timespec_sub(y, x));
-        g_nanosleep_latency = nanos;
-        break;
-      }
-    }
-    ALLOW_SIGNALS;
-  }
-  return timespec_fromnanos(nanos);
+// determine how many nanoseconds it takes before clock_nanosleep()
+// starts sleeping with 90 percent accuracy; in other words when we
+// ask it to sleep 1 second, it (a) must NEVER sleep for less time,
+// and (b) does not sleep for longer than 1.1 seconds of time. what
+// ever is below that, thanks but no thanks, we'll just spin yield,
+static struct timespec GetNanosleepThreshold(void) {
+  return timespec_fromnanos(1000000000 / CLK_TCK);
 }

 static errno_t CheckCancel(void) {
@ -114,7 +91,7 @@ static errno_t SpinNanosleep(int clock, int flags, const struct timespec *req,
  }
  unassert(!clock_gettime(CLOCK_REALTIME, &start));
  for (;;) {
-    pthread_yield();
+    spin_yield();
    unassert(!clock_gettime(CLOCK_REALTIME, &now));
    if (flags & TIMER_ABSTIME) {
      if (timespec_cmp(now, *req) >= 0) {
@ -143,19 +120,13 @@ static errno_t SpinNanosleep(int clock, int flags, const struct timespec *req,
  }
 }

+// clock_gettime() takes a few nanoseconds but sys_clock_nanosleep()
+// is incapable of sleeping for less than a millisecond on platforms
+// such as windows and it's not much prettior on unix systems either
 static bool ShouldUseSpinNanosleep(int clock, int flags,
                                   const struct timespec *req) {
  errno_t e;
  struct timespec now;
-  if (IsWindows()) {
-    // Our spin technique here is intended to take advantage of the fact
-    // that sched_yield() takes about a hundred nanoseconds. But Windows
-    // SleepEx(0, 0) a.k.a. NtYieldExecution() takes a whole millisecond
-    // and it matters not whether our intent is to yielding or sleeping,
-    // since we use the SleepEx() function to implement both. Therefore,
-    // there's no reason to use SpinNanosleep() on Windows.
-    return false;
-  }
  if (clock != CLOCK_REALTIME &&          //
      clock != CLOCK_REALTIME_PRECISE &&  //
      clock != CLOCK_MONOTONIC &&         //
@ -164,13 +135,7 @@ static bool ShouldUseSpinNanosleep(int clock, int flags,
    return false;
  }
  if (!flags) {
-    return timespec_cmp(*req, GetNanosleepLatency()) < 0;
-  }
-  // We need a clock_gettime() system call to perform this check if the
-  // sleep request is an absolute timestamp. So we avoid doing that on
-  // systems where sleep latency isn't too outrageous.
-  if (timespec_cmp(GetNanosleepLatency(), timespec_fromnanos(50 * 1000)) < 0) {
-    return false;
+    return timespec_cmp(*req, GetNanosleepThreshold()) < 0;
  }
  e = errno;
  if (clock_gettime(clock, &now)) {
@ -179,7 +144,7 @@ static bool ShouldUseSpinNanosleep(int clock, int flags,
    return false;
  }
  return timespec_cmp(*req, now) < 0 ||
-         timespec_cmp(timespec_sub(*req, now), GetNanosleepLatency()) < 0;
+         timespec_cmp(timespec_sub(*req, now), GetNanosleepThreshold()) < 0;
 }

 /**