mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-07-06 11:18:30 +00:00
Use CLK_TCK for clock_nanosleep() spin threshold
This more accurately reflects how the kernels actually implement this function and it most importantly avoids incurring startup latency.
This commit is contained in:
parent
11c18fa644
commit
695f74035d
4 changed files with 108 additions and 55 deletions
|
@ -17,31 +17,20 @@
|
||||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/assert.h"
|
#include "libc/assert.h"
|
||||||
#include "libc/calls/asan.internal.h"
|
|
||||||
#include "libc/calls/blockcancel.internal.h"
|
|
||||||
#include "libc/calls/blocksigs.internal.h"
|
|
||||||
#include "libc/calls/calls.h"
|
|
||||||
#include "libc/calls/cp.internal.h"
|
#include "libc/calls/cp.internal.h"
|
||||||
#include "libc/calls/state.internal.h"
|
|
||||||
#include "libc/calls/struct/timespec.h"
|
|
||||||
#include "libc/calls/struct/timespec.internal.h"
|
#include "libc/calls/struct/timespec.internal.h"
|
||||||
#include "libc/calls/struct/timeval.h"
|
|
||||||
#include "libc/calls/struct/timeval.internal.h"
|
|
||||||
#include "libc/dce.h"
|
#include "libc/dce.h"
|
||||||
#include "libc/errno.h"
|
#include "libc/errno.h"
|
||||||
#include "libc/intrin/describeflags.internal.h"
|
#include "libc/intrin/describeflags.internal.h"
|
||||||
#include "libc/intrin/strace.internal.h"
|
#include "libc/intrin/strace.internal.h"
|
||||||
#include "libc/intrin/weaken.h"
|
#include "libc/intrin/weaken.h"
|
||||||
#include "libc/macros.internal.h"
|
#include "libc/nexgen32e/yield.h"
|
||||||
#include "libc/nt/ntdll.h"
|
#include "libc/runtime/clktck.h"
|
||||||
#include "libc/str/str.h"
|
#include "libc/str/str.h"
|
||||||
#include "libc/sysv/consts/clock.h"
|
#include "libc/sysv/consts/clock.h"
|
||||||
#include "libc/sysv/consts/timer.h"
|
#include "libc/sysv/consts/timer.h"
|
||||||
#include "libc/sysv/errfuns.h"
|
#include "libc/sysv/errfuns.h"
|
||||||
#include "libc/thread/thread.h"
|
#include "libc/thread/thread.h"
|
||||||
#include "libc/thread/tls.h"
|
|
||||||
|
|
||||||
static int64_t g_nanosleep_latency;
|
|
||||||
|
|
||||||
static errno_t sys_clock_nanosleep(int clock, int flags,
|
static errno_t sys_clock_nanosleep(int clock, int flags,
|
||||||
const struct timespec *req,
|
const struct timespec *req,
|
||||||
|
@ -65,33 +54,21 @@ static errno_t sys_clock_nanosleep(int clock, int flags,
|
||||||
errno = e;
|
errno = e;
|
||||||
}
|
}
|
||||||
END_CANCELLATION_POINT;
|
END_CANCELLATION_POINT;
|
||||||
|
#if 0
|
||||||
|
STRACE("sys_clock_nanosleep(%s, %s, %s, [%s]) → %d% m",
|
||||||
|
DescribeClockName(clock), DescribeSleepFlags(flags),
|
||||||
|
DescribeTimespec(0, req), DescribeTimespec(rc, rem), rc);
|
||||||
|
#endif
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
// determine sched_yield() vs. clock_nanosleep() threshold
|
// determine how many nanoseconds it takes before clock_nanosleep()
|
||||||
// 1ns sys_clock_nanosleep() on Windows takes milliseconds :'(
|
// starts sleeping with 90 percent accuracy; in other words when we
|
||||||
// 1ns sys_clock_nanosleep() on Linux/FreeBSD takes tens of microseconds
|
// ask it to sleep 1 second, it (a) must NEVER sleep for less time,
|
||||||
// 1ns sys_clock_nanosleep() on OpenBSD/NetBSD takes tens of milliseconds D:
|
// and (b) does not sleep for longer than 1.1 seconds of time. what
|
||||||
static struct timespec GetNanosleepLatency(void) {
|
// ever is below that, thanks but no thanks, we'll just spin yield,
|
||||||
errno_t rc;
|
static struct timespec GetNanosleepThreshold(void) {
|
||||||
int64_t nanos;
|
return timespec_fromnanos(1000000000 / CLK_TCK);
|
||||||
struct timespec x, y, w = {0, 1};
|
|
||||||
if (!(nanos = g_nanosleep_latency)) {
|
|
||||||
BLOCK_SIGNALS;
|
|
||||||
for (;;) {
|
|
||||||
unassert(!clock_gettime(CLOCK_REALTIME_PRECISE, &x));
|
|
||||||
rc = sys_clock_nanosleep(CLOCK_REALTIME, 0, &w, 0);
|
|
||||||
unassert(!rc || rc == EINTR);
|
|
||||||
if (!rc) {
|
|
||||||
unassert(!clock_gettime(CLOCK_REALTIME_PRECISE, &y));
|
|
||||||
nanos = timespec_tonanos(timespec_sub(y, x));
|
|
||||||
g_nanosleep_latency = nanos;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ALLOW_SIGNALS;
|
|
||||||
}
|
|
||||||
return timespec_fromnanos(nanos);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static errno_t CheckCancel(void) {
|
static errno_t CheckCancel(void) {
|
||||||
|
@ -114,7 +91,7 @@ static errno_t SpinNanosleep(int clock, int flags, const struct timespec *req,
|
||||||
}
|
}
|
||||||
unassert(!clock_gettime(CLOCK_REALTIME, &start));
|
unassert(!clock_gettime(CLOCK_REALTIME, &start));
|
||||||
for (;;) {
|
for (;;) {
|
||||||
pthread_yield();
|
spin_yield();
|
||||||
unassert(!clock_gettime(CLOCK_REALTIME, &now));
|
unassert(!clock_gettime(CLOCK_REALTIME, &now));
|
||||||
if (flags & TIMER_ABSTIME) {
|
if (flags & TIMER_ABSTIME) {
|
||||||
if (timespec_cmp(now, *req) >= 0) {
|
if (timespec_cmp(now, *req) >= 0) {
|
||||||
|
@ -143,19 +120,13 @@ static errno_t SpinNanosleep(int clock, int flags, const struct timespec *req,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// clock_gettime() takes a few nanoseconds but sys_clock_nanosleep()
|
||||||
|
// is incapable of sleeping for less than a millisecond on platforms
|
||||||
|
// such as windows and it's not much prettior on unix systems either
|
||||||
static bool ShouldUseSpinNanosleep(int clock, int flags,
|
static bool ShouldUseSpinNanosleep(int clock, int flags,
|
||||||
const struct timespec *req) {
|
const struct timespec *req) {
|
||||||
errno_t e;
|
errno_t e;
|
||||||
struct timespec now;
|
struct timespec now;
|
||||||
if (IsWindows()) {
|
|
||||||
// Our spin technique here is intended to take advantage of the fact
|
|
||||||
// that sched_yield() takes about a hundred nanoseconds. But Windows
|
|
||||||
// SleepEx(0, 0) a.k.a. NtYieldExecution() takes a whole millisecond
|
|
||||||
// and it matters not whether our intent is to yielding or sleeping,
|
|
||||||
// since we use the SleepEx() function to implement both. Therefore,
|
|
||||||
// there's no reason to use SpinNanosleep() on Windows.
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (clock != CLOCK_REALTIME && //
|
if (clock != CLOCK_REALTIME && //
|
||||||
clock != CLOCK_REALTIME_PRECISE && //
|
clock != CLOCK_REALTIME_PRECISE && //
|
||||||
clock != CLOCK_MONOTONIC && //
|
clock != CLOCK_MONOTONIC && //
|
||||||
|
@ -164,13 +135,7 @@ static bool ShouldUseSpinNanosleep(int clock, int flags,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!flags) {
|
if (!flags) {
|
||||||
return timespec_cmp(*req, GetNanosleepLatency()) < 0;
|
return timespec_cmp(*req, GetNanosleepThreshold()) < 0;
|
||||||
}
|
|
||||||
// We need a clock_gettime() system call to perform this check if the
|
|
||||||
// sleep request is an absolute timestamp. So we avoid doing that on
|
|
||||||
// systems where sleep latency isn't too outrageous.
|
|
||||||
if (timespec_cmp(GetNanosleepLatency(), timespec_fromnanos(50 * 1000)) < 0) {
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
e = errno;
|
e = errno;
|
||||||
if (clock_gettime(clock, &now)) {
|
if (clock_gettime(clock, &now)) {
|
||||||
|
@ -179,7 +144,7 @@ static bool ShouldUseSpinNanosleep(int clock, int flags,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return timespec_cmp(*req, now) < 0 ||
|
return timespec_cmp(*req, now) < 0 ||
|
||||||
timespec_cmp(timespec_sub(*req, now), GetNanosleepLatency()) < 0;
|
timespec_cmp(timespec_sub(*req, now), GetNanosleepThreshold()) < 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
16
libc/nexgen32e/yield.h
Normal file
16
libc/nexgen32e/yield.h
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
#ifndef COSMOPOLITAN_LIBC_YIELD_H_
|
||||||
|
#define COSMOPOLITAN_LIBC_YIELD_H_
|
||||||
|
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
|
#ifdef _COSMO_SOURCE
|
||||||
|
|
||||||
|
static inline void spin_yield(void) {
|
||||||
|
#if defined(__GNUC__) && defined(__aarch64__)
|
||||||
|
__asm__ volatile("yield");
|
||||||
|
#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
|
||||||
|
__asm__ volatile("pause");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* _COSMO_SOURCE */
|
||||||
|
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||||
|
#endif /* COSMOPOLITAN_LIBC_YIELD_H_ */
|
72
tool/viz/clock_nanosleep_accuracy.c
Normal file
72
tool/viz/clock_nanosleep_accuracy.c
Normal file
|
@ -0,0 +1,72 @@
|
||||||
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||||
|
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||||
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||||
|
│ Copyright 2023 Justine Alexandra Roberts Tunney │
|
||||||
|
│ │
|
||||||
|
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||||
|
│ any purpose with or without fee is hereby granted, provided that the │
|
||||||
|
│ above copyright notice and this permission notice appear in all copies. │
|
||||||
|
│ │
|
||||||
|
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||||
|
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||||
|
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||||
|
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||||
|
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||||
|
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||||
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||||
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||||
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
|
#include "libc/assert.h"
|
||||||
|
#include "libc/calls/struct/timespec.h"
|
||||||
|
#include "libc/intrin/kprintf.h"
|
||||||
|
#include "libc/runtime/runtime.h"
|
||||||
|
#include "libc/stdio/stdio.h"
|
||||||
|
#include "libc/sysv/consts/clock.h"
|
||||||
|
|
||||||
|
#define MAXIMUM 1e8
|
||||||
|
#define ITERATIONS 10
|
||||||
|
|
||||||
|
void WarmUp(void) {
|
||||||
|
struct timespec wf = {0, 1};
|
||||||
|
npassert(!clock_nanosleep(CLOCK_REALTIME, 0, &wf, 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
void TestSleepRealRelative(void) {
|
||||||
|
printf("\n");
|
||||||
|
printf("testing: clock_nanosleep(CLOCK_REALTIME) with relative timeout\n");
|
||||||
|
for (long nanos = 1; nanos < (long)MAXIMUM; nanos *= 2) {
|
||||||
|
struct timespec t1, t2, wf;
|
||||||
|
wf = timespec_fromnanos(nanos);
|
||||||
|
clock_gettime(CLOCK_REALTIME_PRECISE, &t1);
|
||||||
|
for (int i = 0; i < ITERATIONS; ++i) {
|
||||||
|
npassert(!clock_nanosleep(CLOCK_REALTIME, 0, &wf, 0));
|
||||||
|
}
|
||||||
|
clock_gettime(CLOCK_REALTIME_PRECISE, &t2);
|
||||||
|
long took = timespec_tonanos(timespec_sub(t2, t1)) / ITERATIONS;
|
||||||
|
printf("%,11ld ns sleep took %,11ld ns delta %,11ld ns\n", nanos, took,
|
||||||
|
took - nanos);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void TestSleepMonoRelative(void) {
|
||||||
|
printf("\n");
|
||||||
|
printf("testing: clock_nanosleep(CLOCK_MONOTONIC) with relative timeout\n");
|
||||||
|
for (long nanos = 1; nanos < (long)MAXIMUM; nanos *= 2) {
|
||||||
|
struct timespec t1, t2, wf;
|
||||||
|
wf = timespec_fromnanos(nanos);
|
||||||
|
clock_gettime(CLOCK_REALTIME_PRECISE, &t1);
|
||||||
|
for (int i = 0; i < ITERATIONS; ++i) {
|
||||||
|
npassert(!clock_nanosleep(CLOCK_MONOTONIC, 0, &wf, 0));
|
||||||
|
}
|
||||||
|
clock_gettime(CLOCK_REALTIME_PRECISE, &t2);
|
||||||
|
long took = timespec_tonanos(timespec_sub(t2, t1)) / ITERATIONS;
|
||||||
|
printf("%,11ld ns sleep took %,11ld ns delta %,11ld ns\n", nanos, took,
|
||||||
|
took - nanos);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
WarmUp();
|
||||||
|
TestSleepRealRelative();
|
||||||
|
TestSleepMonoRelative();
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue