mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-02-07 06:53:33 +00:00
Use CLK_TCK for clock_nanosleep() spin threshold
This more accurately reflects how the kernels actually implement this function and it most importantly avoids incurring startup latency.
This commit is contained in:
parent
11c18fa644
commit
695f74035d
4 changed files with 108 additions and 55 deletions
|
@ -17,31 +17,20 @@
|
|||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/assert.h"
|
||||
#include "libc/calls/asan.internal.h"
|
||||
#include "libc/calls/blockcancel.internal.h"
|
||||
#include "libc/calls/blocksigs.internal.h"
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/calls/cp.internal.h"
|
||||
#include "libc/calls/state.internal.h"
|
||||
#include "libc/calls/struct/timespec.h"
|
||||
#include "libc/calls/struct/timespec.internal.h"
|
||||
#include "libc/calls/struct/timeval.h"
|
||||
#include "libc/calls/struct/timeval.internal.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/errno.h"
|
||||
#include "libc/intrin/describeflags.internal.h"
|
||||
#include "libc/intrin/strace.internal.h"
|
||||
#include "libc/intrin/weaken.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/nt/ntdll.h"
|
||||
#include "libc/nexgen32e/yield.h"
|
||||
#include "libc/runtime/clktck.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/sysv/consts/clock.h"
|
||||
#include "libc/sysv/consts/timer.h"
|
||||
#include "libc/sysv/errfuns.h"
|
||||
#include "libc/thread/thread.h"
|
||||
#include "libc/thread/tls.h"
|
||||
|
||||
static int64_t g_nanosleep_latency;
|
||||
|
||||
static errno_t sys_clock_nanosleep(int clock, int flags,
|
||||
const struct timespec *req,
|
||||
|
@ -65,33 +54,21 @@ static errno_t sys_clock_nanosleep(int clock, int flags,
|
|||
errno = e;
|
||||
}
|
||||
END_CANCELLATION_POINT;
|
||||
#if 0
|
||||
STRACE("sys_clock_nanosleep(%s, %s, %s, [%s]) → %d% m",
|
||||
DescribeClockName(clock), DescribeSleepFlags(flags),
|
||||
DescribeTimespec(0, req), DescribeTimespec(rc, rem), rc);
|
||||
#endif
|
||||
return rc;
|
||||
}
|
||||
|
||||
// determine sched_yield() vs. clock_nanosleep() threshold
|
||||
// 1ns sys_clock_nanosleep() on Windows takes milliseconds :'(
|
||||
// 1ns sys_clock_nanosleep() on Linux/FreeBSD takes tens of microseconds
|
||||
// 1ns sys_clock_nanosleep() on OpenBSD/NetBSD takes tens of milliseconds D:
|
||||
static struct timespec GetNanosleepLatency(void) {
|
||||
errno_t rc;
|
||||
int64_t nanos;
|
||||
struct timespec x, y, w = {0, 1};
|
||||
if (!(nanos = g_nanosleep_latency)) {
|
||||
BLOCK_SIGNALS;
|
||||
for (;;) {
|
||||
unassert(!clock_gettime(CLOCK_REALTIME_PRECISE, &x));
|
||||
rc = sys_clock_nanosleep(CLOCK_REALTIME, 0, &w, 0);
|
||||
unassert(!rc || rc == EINTR);
|
||||
if (!rc) {
|
||||
unassert(!clock_gettime(CLOCK_REALTIME_PRECISE, &y));
|
||||
nanos = timespec_tonanos(timespec_sub(y, x));
|
||||
g_nanosleep_latency = nanos;
|
||||
break;
|
||||
}
|
||||
}
|
||||
ALLOW_SIGNALS;
|
||||
}
|
||||
return timespec_fromnanos(nanos);
|
||||
// determine how many nanoseconds it takes before clock_nanosleep()
|
||||
// starts sleeping with 90 percent accuracy; in other words when we
|
||||
// ask it to sleep 1 second, it (a) must NEVER sleep for less time,
|
||||
// and (b) does not sleep for longer than 1.1 seconds of time. what
|
||||
// ever is below that, thanks but no thanks, we'll just spin yield,
|
||||
static struct timespec GetNanosleepThreshold(void) {
|
||||
return timespec_fromnanos(1000000000 / CLK_TCK);
|
||||
}
|
||||
|
||||
static errno_t CheckCancel(void) {
|
||||
|
@ -114,7 +91,7 @@ static errno_t SpinNanosleep(int clock, int flags, const struct timespec *req,
|
|||
}
|
||||
unassert(!clock_gettime(CLOCK_REALTIME, &start));
|
||||
for (;;) {
|
||||
pthread_yield();
|
||||
spin_yield();
|
||||
unassert(!clock_gettime(CLOCK_REALTIME, &now));
|
||||
if (flags & TIMER_ABSTIME) {
|
||||
if (timespec_cmp(now, *req) >= 0) {
|
||||
|
@ -143,19 +120,13 @@ static errno_t SpinNanosleep(int clock, int flags, const struct timespec *req,
|
|||
}
|
||||
}
|
||||
|
||||
// clock_gettime() takes a few nanoseconds but sys_clock_nanosleep()
|
||||
// is incapable of sleeping for less than a millisecond on platforms
|
||||
// such as windows and it's not much prettior on unix systems either
|
||||
static bool ShouldUseSpinNanosleep(int clock, int flags,
|
||||
const struct timespec *req) {
|
||||
errno_t e;
|
||||
struct timespec now;
|
||||
if (IsWindows()) {
|
||||
// Our spin technique here is intended to take advantage of the fact
|
||||
// that sched_yield() takes about a hundred nanoseconds. But Windows
|
||||
// SleepEx(0, 0) a.k.a. NtYieldExecution() takes a whole millisecond
|
||||
// and it matters not whether our intent is to yielding or sleeping,
|
||||
// since we use the SleepEx() function to implement both. Therefore,
|
||||
// there's no reason to use SpinNanosleep() on Windows.
|
||||
return false;
|
||||
}
|
||||
if (clock != CLOCK_REALTIME && //
|
||||
clock != CLOCK_REALTIME_PRECISE && //
|
||||
clock != CLOCK_MONOTONIC && //
|
||||
|
@ -164,13 +135,7 @@ static bool ShouldUseSpinNanosleep(int clock, int flags,
|
|||
return false;
|
||||
}
|
||||
if (!flags) {
|
||||
return timespec_cmp(*req, GetNanosleepLatency()) < 0;
|
||||
}
|
||||
// We need a clock_gettime() system call to perform this check if the
|
||||
// sleep request is an absolute timestamp. So we avoid doing that on
|
||||
// systems where sleep latency isn't too outrageous.
|
||||
if (timespec_cmp(GetNanosleepLatency(), timespec_fromnanos(50 * 1000)) < 0) {
|
||||
return false;
|
||||
return timespec_cmp(*req, GetNanosleepThreshold()) < 0;
|
||||
}
|
||||
e = errno;
|
||||
if (clock_gettime(clock, &now)) {
|
||||
|
@ -179,7 +144,7 @@ static bool ShouldUseSpinNanosleep(int clock, int flags,
|
|||
return false;
|
||||
}
|
||||
return timespec_cmp(*req, now) < 0 ||
|
||||
timespec_cmp(timespec_sub(*req, now), GetNanosleepLatency()) < 0;
|
||||
timespec_cmp(timespec_sub(*req, now), GetNanosleepThreshold()) < 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
16
libc/nexgen32e/yield.h
Normal file
16
libc/nexgen32e/yield.h
Normal file
|
@ -0,0 +1,16 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_YIELD_H_
|
||||
#define COSMOPOLITAN_LIBC_YIELD_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifdef _COSMO_SOURCE
|
||||
|
||||
static inline void spin_yield(void) {
|
||||
#if defined(__GNUC__) && defined(__aarch64__)
|
||||
__asm__ volatile("yield");
|
||||
#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
|
||||
__asm__ volatile("pause");
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* _COSMO_SOURCE */
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_YIELD_H_ */
|
72
tool/viz/clock_nanosleep_accuracy.c
Normal file
72
tool/viz/clock_nanosleep_accuracy.c
Normal file
|
@ -0,0 +1,72 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2023 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/assert.h"
|
||||
#include "libc/calls/struct/timespec.h"
|
||||
#include "libc/intrin/kprintf.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/stdio/stdio.h"
|
||||
#include "libc/sysv/consts/clock.h"
|
||||
|
||||
#define MAXIMUM 1e8
|
||||
#define ITERATIONS 10
|
||||
|
||||
void WarmUp(void) {
|
||||
struct timespec wf = {0, 1};
|
||||
npassert(!clock_nanosleep(CLOCK_REALTIME, 0, &wf, 0));
|
||||
}
|
||||
|
||||
void TestSleepRealRelative(void) {
|
||||
printf("\n");
|
||||
printf("testing: clock_nanosleep(CLOCK_REALTIME) with relative timeout\n");
|
||||
for (long nanos = 1; nanos < (long)MAXIMUM; nanos *= 2) {
|
||||
struct timespec t1, t2, wf;
|
||||
wf = timespec_fromnanos(nanos);
|
||||
clock_gettime(CLOCK_REALTIME_PRECISE, &t1);
|
||||
for (int i = 0; i < ITERATIONS; ++i) {
|
||||
npassert(!clock_nanosleep(CLOCK_REALTIME, 0, &wf, 0));
|
||||
}
|
||||
clock_gettime(CLOCK_REALTIME_PRECISE, &t2);
|
||||
long took = timespec_tonanos(timespec_sub(t2, t1)) / ITERATIONS;
|
||||
printf("%,11ld ns sleep took %,11ld ns delta %,11ld ns\n", nanos, took,
|
||||
took - nanos);
|
||||
}
|
||||
}
|
||||
|
||||
void TestSleepMonoRelative(void) {
|
||||
printf("\n");
|
||||
printf("testing: clock_nanosleep(CLOCK_MONOTONIC) with relative timeout\n");
|
||||
for (long nanos = 1; nanos < (long)MAXIMUM; nanos *= 2) {
|
||||
struct timespec t1, t2, wf;
|
||||
wf = timespec_fromnanos(nanos);
|
||||
clock_gettime(CLOCK_REALTIME_PRECISE, &t1);
|
||||
for (int i = 0; i < ITERATIONS; ++i) {
|
||||
npassert(!clock_nanosleep(CLOCK_MONOTONIC, 0, &wf, 0));
|
||||
}
|
||||
clock_gettime(CLOCK_REALTIME_PRECISE, &t2);
|
||||
long took = timespec_tonanos(timespec_sub(t2, t1)) / ITERATIONS;
|
||||
printf("%,11ld ns sleep took %,11ld ns delta %,11ld ns\n", nanos, took,
|
||||
took - nanos);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
WarmUp();
|
||||
TestSleepRealRelative();
|
||||
TestSleepMonoRelative();
|
||||
}
|
Loading…
Reference in a new issue