mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-07-26 12:30:30 +00:00
Use CLK_TCK for clock_nanosleep() spin threshold
This more accurately reflects how the kernels actually implement this function and it most importantly avoids incurring startup latency.
This commit is contained in:
parent
11c18fa644
commit
695f74035d
4 changed files with 108 additions and 55 deletions
80
libc/calls/clktck.c
Normal file
80
libc/calls/clktck.c
Normal file
|
@ -0,0 +1,80 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/runtime/clktck.h"
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/fmt/wintime.internal.h"
|
||||
#include "libc/intrin/getauxval.internal.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/sysv/consts/auxv.h"
|
||||
|
||||
struct clockinfo_netbsd {
|
||||
int32_t hz; // number of clock ticks per second
|
||||
int32_t tick; // µs per tick
|
||||
int32_t tickadj; // skew rate for adjtime()
|
||||
int32_t stathz; // statistics clock frequency
|
||||
int32_t profhz; // profiling clock frequency
|
||||
};
|
||||
|
||||
static int clk_tck;
|
||||
|
||||
static dontinline int __clk_tck_init(void) {
|
||||
int x;
|
||||
int cmd[2];
|
||||
size_t len;
|
||||
struct clockinfo_netbsd clock;
|
||||
if (IsWindows()) {
|
||||
x = HECTONANOSECONDS;
|
||||
} else if (IsXnu() || IsOpenbsd()) {
|
||||
x = 100;
|
||||
} else if (IsFreebsd()) {
|
||||
x = 128;
|
||||
} else if (IsNetbsd()) {
|
||||
cmd[0] = 1; // CTL_KERN
|
||||
cmd[1] = 12; // KERN_CLOCKRATE
|
||||
len = sizeof(clock);
|
||||
if (sys_sysctl(cmd, 2, &clock, &len, NULL, 0) != -1) {
|
||||
x = clock.hz;
|
||||
} else {
|
||||
x = -1;
|
||||
}
|
||||
} else {
|
||||
x = __getauxval(AT_CLKTCK).value;
|
||||
}
|
||||
if (x < 1) x = 100;
|
||||
clk_tck = x;
|
||||
return x;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns system clock ticks per second.
|
||||
*
|
||||
* The returned value is memoized. This function is intended to be
|
||||
* used via the `CLK_TCK` macro wrapper.
|
||||
*
|
||||
* The returned value is always greater than zero. It's usually 100
|
||||
* hertz which means each clock tick is 10 milliseconds long.
|
||||
*/
|
||||
int __clk_tck(void) {
|
||||
if (clk_tck) {
|
||||
return clk_tck;
|
||||
} else {
|
||||
return __clk_tck_init();
|
||||
}
|
||||
}
|
|
@ -17,31 +17,20 @@
|
|||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/assert.h"
|
||||
#include "libc/calls/asan.internal.h"
|
||||
#include "libc/calls/blockcancel.internal.h"
|
||||
#include "libc/calls/blocksigs.internal.h"
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/calls/cp.internal.h"
|
||||
#include "libc/calls/state.internal.h"
|
||||
#include "libc/calls/struct/timespec.h"
|
||||
#include "libc/calls/struct/timespec.internal.h"
|
||||
#include "libc/calls/struct/timeval.h"
|
||||
#include "libc/calls/struct/timeval.internal.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/errno.h"
|
||||
#include "libc/intrin/describeflags.internal.h"
|
||||
#include "libc/intrin/strace.internal.h"
|
||||
#include "libc/intrin/weaken.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/nt/ntdll.h"
|
||||
#include "libc/nexgen32e/yield.h"
|
||||
#include "libc/runtime/clktck.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/sysv/consts/clock.h"
|
||||
#include "libc/sysv/consts/timer.h"
|
||||
#include "libc/sysv/errfuns.h"
|
||||
#include "libc/thread/thread.h"
|
||||
#include "libc/thread/tls.h"
|
||||
|
||||
static int64_t g_nanosleep_latency;
|
||||
|
||||
static errno_t sys_clock_nanosleep(int clock, int flags,
|
||||
const struct timespec *req,
|
||||
|
@ -65,33 +54,21 @@ static errno_t sys_clock_nanosleep(int clock, int flags,
|
|||
errno = e;
|
||||
}
|
||||
END_CANCELLATION_POINT;
|
||||
#if 0
|
||||
STRACE("sys_clock_nanosleep(%s, %s, %s, [%s]) → %d% m",
|
||||
DescribeClockName(clock), DescribeSleepFlags(flags),
|
||||
DescribeTimespec(0, req), DescribeTimespec(rc, rem), rc);
|
||||
#endif
|
||||
return rc;
|
||||
}
|
||||
|
||||
// determine sched_yield() vs. clock_nanosleep() threshold
|
||||
// 1ns sys_clock_nanosleep() on Windows takes milliseconds :'(
|
||||
// 1ns sys_clock_nanosleep() on Linux/FreeBSD takes tens of microseconds
|
||||
// 1ns sys_clock_nanosleep() on OpenBSD/NetBSD takes tens of milliseconds D:
|
||||
static struct timespec GetNanosleepLatency(void) {
|
||||
errno_t rc;
|
||||
int64_t nanos;
|
||||
struct timespec x, y, w = {0, 1};
|
||||
if (!(nanos = g_nanosleep_latency)) {
|
||||
BLOCK_SIGNALS;
|
||||
for (;;) {
|
||||
unassert(!clock_gettime(CLOCK_REALTIME_PRECISE, &x));
|
||||
rc = sys_clock_nanosleep(CLOCK_REALTIME, 0, &w, 0);
|
||||
unassert(!rc || rc == EINTR);
|
||||
if (!rc) {
|
||||
unassert(!clock_gettime(CLOCK_REALTIME_PRECISE, &y));
|
||||
nanos = timespec_tonanos(timespec_sub(y, x));
|
||||
g_nanosleep_latency = nanos;
|
||||
break;
|
||||
}
|
||||
}
|
||||
ALLOW_SIGNALS;
|
||||
}
|
||||
return timespec_fromnanos(nanos);
|
||||
// determine how many nanoseconds it takes before clock_nanosleep()
|
||||
// starts sleeping with 90 percent accuracy; in other words when we
|
||||
// ask it to sleep 1 second, it (a) must NEVER sleep for less time,
|
||||
// and (b) does not sleep for longer than 1.1 seconds of time. what
|
||||
// ever is below that, thanks but no thanks, we'll just spin yield,
|
||||
static struct timespec GetNanosleepThreshold(void) {
|
||||
return timespec_fromnanos(1000000000 / CLK_TCK);
|
||||
}
|
||||
|
||||
static errno_t CheckCancel(void) {
|
||||
|
@ -114,7 +91,7 @@ static errno_t SpinNanosleep(int clock, int flags, const struct timespec *req,
|
|||
}
|
||||
unassert(!clock_gettime(CLOCK_REALTIME, &start));
|
||||
for (;;) {
|
||||
pthread_yield();
|
||||
spin_yield();
|
||||
unassert(!clock_gettime(CLOCK_REALTIME, &now));
|
||||
if (flags & TIMER_ABSTIME) {
|
||||
if (timespec_cmp(now, *req) >= 0) {
|
||||
|
@ -143,19 +120,13 @@ static errno_t SpinNanosleep(int clock, int flags, const struct timespec *req,
|
|||
}
|
||||
}
|
||||
|
||||
// clock_gettime() takes a few nanoseconds but sys_clock_nanosleep()
|
||||
// is incapable of sleeping for less than a millisecond on platforms
|
||||
// such as windows and it's not much prettior on unix systems either
|
||||
static bool ShouldUseSpinNanosleep(int clock, int flags,
|
||||
const struct timespec *req) {
|
||||
errno_t e;
|
||||
struct timespec now;
|
||||
if (IsWindows()) {
|
||||
// Our spin technique here is intended to take advantage of the fact
|
||||
// that sched_yield() takes about a hundred nanoseconds. But Windows
|
||||
// SleepEx(0, 0) a.k.a. NtYieldExecution() takes a whole millisecond
|
||||
// and it matters not whether our intent is to yielding or sleeping,
|
||||
// since we use the SleepEx() function to implement both. Therefore,
|
||||
// there's no reason to use SpinNanosleep() on Windows.
|
||||
return false;
|
||||
}
|
||||
if (clock != CLOCK_REALTIME && //
|
||||
clock != CLOCK_REALTIME_PRECISE && //
|
||||
clock != CLOCK_MONOTONIC && //
|
||||
|
@ -164,13 +135,7 @@ static bool ShouldUseSpinNanosleep(int clock, int flags,
|
|||
return false;
|
||||
}
|
||||
if (!flags) {
|
||||
return timespec_cmp(*req, GetNanosleepLatency()) < 0;
|
||||
}
|
||||
// We need a clock_gettime() system call to perform this check if the
|
||||
// sleep request is an absolute timestamp. So we avoid doing that on
|
||||
// systems where sleep latency isn't too outrageous.
|
||||
if (timespec_cmp(GetNanosleepLatency(), timespec_fromnanos(50 * 1000)) < 0) {
|
||||
return false;
|
||||
return timespec_cmp(*req, GetNanosleepThreshold()) < 0;
|
||||
}
|
||||
e = errno;
|
||||
if (clock_gettime(clock, &now)) {
|
||||
|
@ -179,7 +144,7 @@ static bool ShouldUseSpinNanosleep(int clock, int flags,
|
|||
return false;
|
||||
}
|
||||
return timespec_cmp(*req, now) < 0 ||
|
||||
timespec_cmp(timespec_sub(*req, now), GetNanosleepLatency()) < 0;
|
||||
timespec_cmp(timespec_sub(*req, now), GetNanosleepThreshold()) < 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue