Use CLK_TCK for clock_nanosleep() spin threshold

This more accurately reflects how the kernels actually implement this
function and it most importantly avoids incurring startup latency.
This commit is contained in:
Justine Tunney 2023-10-03 16:58:42 -07:00
parent 11c18fa644
commit 695f74035d
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
4 changed files with 108 additions and 55 deletions

View file

@ -17,31 +17,20 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/calls/asan.internal.h"
#include "libc/calls/blockcancel.internal.h"
#include "libc/calls/blocksigs.internal.h"
#include "libc/calls/calls.h"
#include "libc/calls/cp.internal.h"
#include "libc/calls/state.internal.h"
#include "libc/calls/struct/timespec.h"
#include "libc/calls/struct/timespec.internal.h"
#include "libc/calls/struct/timeval.h"
#include "libc/calls/struct/timeval.internal.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/intrin/describeflags.internal.h"
#include "libc/intrin/strace.internal.h"
#include "libc/intrin/weaken.h"
#include "libc/macros.internal.h"
#include "libc/nt/ntdll.h"
#include "libc/nexgen32e/yield.h"
#include "libc/runtime/clktck.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/clock.h"
#include "libc/sysv/consts/timer.h"
#include "libc/sysv/errfuns.h"
#include "libc/thread/thread.h"
#include "libc/thread/tls.h"
static int64_t g_nanosleep_latency;
static errno_t sys_clock_nanosleep(int clock, int flags,
const struct timespec *req,
@ -65,33 +54,21 @@ static errno_t sys_clock_nanosleep(int clock, int flags,
errno = e;
}
END_CANCELLATION_POINT;
#if 0
STRACE("sys_clock_nanosleep(%s, %s, %s, [%s]) → %d% m",
DescribeClockName(clock), DescribeSleepFlags(flags),
DescribeTimespec(0, req), DescribeTimespec(rc, rem), rc);
#endif
return rc;
}
// determine sched_yield() vs. clock_nanosleep() threshold
// 1ns sys_clock_nanosleep() on Windows takes milliseconds :'(
// 1ns sys_clock_nanosleep() on Linux/FreeBSD takes tens of microseconds
// 1ns sys_clock_nanosleep() on OpenBSD/NetBSD takes tens of milliseconds D:
static struct timespec GetNanosleepLatency(void) {
errno_t rc;
int64_t nanos;
struct timespec x, y, w = {0, 1};
if (!(nanos = g_nanosleep_latency)) {
BLOCK_SIGNALS;
for (;;) {
unassert(!clock_gettime(CLOCK_REALTIME_PRECISE, &x));
rc = sys_clock_nanosleep(CLOCK_REALTIME, 0, &w, 0);
unassert(!rc || rc == EINTR);
if (!rc) {
unassert(!clock_gettime(CLOCK_REALTIME_PRECISE, &y));
nanos = timespec_tonanos(timespec_sub(y, x));
g_nanosleep_latency = nanos;
break;
}
}
ALLOW_SIGNALS;
}
return timespec_fromnanos(nanos);
// determine how many nanoseconds it takes before clock_nanosleep()
// starts sleeping with 90 percent accuracy; in other words when we
// ask it to sleep 1 second, it (a) must NEVER sleep for less time,
// and (b) does not sleep for longer than 1.1 seconds of time. what
// ever is below that, thanks but no thanks, we'll just spin yield,
static struct timespec GetNanosleepThreshold(void) {
return timespec_fromnanos(1000000000 / CLK_TCK);
}
static errno_t CheckCancel(void) {
@ -114,7 +91,7 @@ static errno_t SpinNanosleep(int clock, int flags, const struct timespec *req,
}
unassert(!clock_gettime(CLOCK_REALTIME, &start));
for (;;) {
pthread_yield();
spin_yield();
unassert(!clock_gettime(CLOCK_REALTIME, &now));
if (flags & TIMER_ABSTIME) {
if (timespec_cmp(now, *req) >= 0) {
@ -143,19 +120,13 @@ static errno_t SpinNanosleep(int clock, int flags, const struct timespec *req,
}
}
// clock_gettime() takes a few nanoseconds but sys_clock_nanosleep()
// is incapable of sleeping for less than a millisecond on platforms
// such as windows and it's not much prettior on unix systems either
static bool ShouldUseSpinNanosleep(int clock, int flags,
const struct timespec *req) {
errno_t e;
struct timespec now;
if (IsWindows()) {
// Our spin technique here is intended to take advantage of the fact
// that sched_yield() takes about a hundred nanoseconds. But Windows
// SleepEx(0, 0) a.k.a. NtYieldExecution() takes a whole millisecond
// and it matters not whether our intent is to yielding or sleeping,
// since we use the SleepEx() function to implement both. Therefore,
// there's no reason to use SpinNanosleep() on Windows.
return false;
}
if (clock != CLOCK_REALTIME && //
clock != CLOCK_REALTIME_PRECISE && //
clock != CLOCK_MONOTONIC && //
@ -164,13 +135,7 @@ static bool ShouldUseSpinNanosleep(int clock, int flags,
return false;
}
if (!flags) {
return timespec_cmp(*req, GetNanosleepLatency()) < 0;
}
// We need a clock_gettime() system call to perform this check if the
// sleep request is an absolute timestamp. So we avoid doing that on
// systems where sleep latency isn't too outrageous.
if (timespec_cmp(GetNanosleepLatency(), timespec_fromnanos(50 * 1000)) < 0) {
return false;
return timespec_cmp(*req, GetNanosleepThreshold()) < 0;
}
e = errno;
if (clock_gettime(clock, &now)) {
@ -179,7 +144,7 @@ static bool ShouldUseSpinNanosleep(int clock, int flags,
return false;
}
return timespec_cmp(*req, now) < 0 ||
timespec_cmp(timespec_sub(*req, now), GetNanosleepLatency()) < 0;
timespec_cmp(timespec_sub(*req, now), GetNanosleepThreshold()) < 0;
}
/**

16
libc/nexgen32e/yield.h Normal file
View file

@ -0,0 +1,16 @@
#ifndef COSMOPOLITAN_LIBC_YIELD_H_
#define COSMOPOLITAN_LIBC_YIELD_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
#ifdef _COSMO_SOURCE
static inline void spin_yield(void) {
#if defined(__GNUC__) && defined(__aarch64__)
__asm__ volatile("yield");
#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
__asm__ volatile("pause");
#endif
}
#endif /* _COSMO_SOURCE */
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_YIELD_H_ */