Make fixes and improvements

- clock_nanosleep() is now much faster on OpenBSD and NetBSD
- Thread joining is now much faster on NetBSD
- FreeBSD timestamps are now more accurate
- Thread spawning now goes faster on XNU
- Clean up the clone() code
This commit is contained in:
Justine Tunney 2022-11-08 10:09:47 -08:00
parent aee50b1327
commit b407327972
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
47 changed files with 645 additions and 306 deletions

View file

@ -50,20 +50,23 @@
* sys_clock_gettime l: 220𝑐 71𝑛𝑠
*
* @param clock can be one of:
* - `CLOCK_REALTIME`: universally supported
* - `CLOCK_REALTIME_FAST`: ditto but faster on freebsd
* - `CLOCK_MONOTONIC`: universally supported
* - `CLOCK_MONOTONIC_FAST`: ditto but faster on freebsd
* - `CLOCK_MONOTONIC_RAW`: nearly universally supported
* - `CLOCK_PROCESS_CPUTIME_ID`: linux and bsd
* - `CLOCK_THREAD_CPUTIME_ID`: linux and bsd
* - `CLOCK_REALTIME_COARSE`: : linux and openbsd
* - `CLOCK_MONOTONIC_COARSE`: linux
* - `CLOCK_PROF`: linux and netbsd
* - `CLOCK_BOOTTIME`: linux and openbsd
* - `CLOCK_REALTIME_ALARM`: linux-only
* - `CLOCK_BOOTTIME_ALARM`: linux-only
* - `CLOCK_TAI`: linux-only
* - `CLOCK_REALTIME`: universally supported
* - `CLOCK_REALTIME_FAST`: ditto but faster on freebsd
* - `CLOCK_REALTIME_PRECISE`: ditto but better on freebsd
* - `CLOCK_REALTIME_COARSE`: : like `CLOCK_REALTIME_FAST` w/ Linux 2.6.32+
* - `CLOCK_MONOTONIC`: universally supported
* - `CLOCK_MONOTONIC_FAST`: ditto but faster on freebsd
* - `CLOCK_MONOTONIC_PRECISE`: ditto but better on freebsd
* - `CLOCK_MONOTONIC_COARSE`: : like `CLOCK_MONOTONIC_FAST` w/ Linux 2.6.32+
* - `CLOCK_MONOTONIC_RAW`: is actually monotonic but needs Linux 2.6.28+
* - `CLOCK_PROCESS_CPUTIME_ID`: linux and bsd
* - `CLOCK_THREAD_CPUTIME_ID`: linux and bsd
* - `CLOCK_MONOTONIC_COARSE`: linux, freebsd
* - `CLOCK_PROF`: linux and netbsd
* - `CLOCK_BOOTTIME`: linux and openbsd
* - `CLOCK_REALTIME_ALARM`: linux-only
* - `CLOCK_BOOTTIME_ALARM`: linux-only
* - `CLOCK_TAI`: linux-only
* @param ts is where the result is stored
* @return 0 on success, or -1 w/ errno
* @error EPERM if pledge() is in play without stdio promise

View file

@ -16,7 +16,11 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/calls/asan.internal.h"
#include "libc/calls/blockcancel.internal.h"
#include "libc/calls/calls.h"
#include "libc/calls/clock_gettime.internal.h"
#include "libc/calls/cp.internal.h"
#include "libc/calls/state.internal.h"
#include "libc/calls/struct/timespec.h"
@ -27,11 +31,161 @@
#include "libc/errno.h"
#include "libc/intrin/describeflags.internal.h"
#include "libc/intrin/strace.internal.h"
#include "libc/intrin/weaken.h"
#include "libc/macros.internal.h"
#include "libc/nt/ntdll.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/clock.h"
#include "libc/sysv/consts/timer.h"
#include "libc/sysv/errfuns.h"
#include "libc/thread/thread.h"
#include "libc/thread/tls.h"
static int64_t g_nanosleep_latency;
static errno_t sys_clock_nanosleep(int clock, int flags,
const struct timespec *req,
struct timespec *rem) {
int e, rc;
BEGIN_CANCELLATION_POINT;
e = errno;
STRACE("clock_nanosleep(%s, %s, %s, %s) → ...", DescribeClockName(clock),
DescribeSleepFlags(flags), DescribeTimespec(0, req),
DescribeTimespec(0, rem));
if (IsLinux() || IsFreebsd() || IsNetbsd()) {
rc = __sys_clock_nanosleep(clock, flags, req, rem);
} else if (IsXnu()) {
rc = sys_clock_nanosleep_xnu(clock, flags, req, rem);
} else if (IsOpenbsd()) {
rc = sys_clock_nanosleep_openbsd(clock, flags, req, rem);
} else {
rc = sys_clock_nanosleep_nt(clock, flags, req, rem);
}
if (rc == -1) {
rc = errno;
errno = e;
}
END_CANCELLATION_POINT;
return rc;
}
// determine sched_yield() vs. clock_nanosleep() threshold
// 1ns sys_clock_nanosleep() on Windows takes milliseconds :'(
// 1ns sys_clock_nanosleep() on Linux/FreeBSD takes tens of microseconds
// 1ns sys_clock_nanosleep() on OpenBSD/NetBSD takes tens of milliseconds D:
static struct timespec GetNanosleepLatency(void) {
errno_t rc;
int64_t nanos;
clock_gettime_f *cgt;
struct timespec x, y, w = {0, 1};
if (!(nanos = g_nanosleep_latency)) {
BLOCK_CANCELLATIONS;
for (cgt = __clock_gettime_get(0);;) {
_npassert(!cgt(CLOCK_REALTIME_PRECISE, &x));
rc = sys_clock_nanosleep(CLOCK_REALTIME, 0, &w, 0);
_npassert(!rc || rc == EINTR);
if (!rc) {
_npassert(!cgt(CLOCK_REALTIME_PRECISE, &y));
nanos = timespec_tonanos(timespec_sub(y, x));
g_nanosleep_latency = nanos;
break;
}
}
ALLOW_CANCELLATIONS;
}
return timespec_fromnanos(nanos);
}
static errno_t CheckCancel(void) {
if (_weaken(pthread_testcancel_np)) {
return _weaken(pthread_testcancel_np)();
} else {
return 0;
}
}
static errno_t SpinNanosleep(int clock, int flags, const struct timespec *req,
struct timespec *rem) {
errno_t rc;
clock_gettime_f *cgt;
struct timespec now, start, elapsed;
if ((rc = CheckCancel())) {
if (rc == EINTR && !flags && rem) {
*rem = *req;
}
return rc;
}
cgt = __clock_gettime_get(0);
_npassert(!cgt(CLOCK_REALTIME, &start));
for (;;) {
sched_yield();
_npassert(!cgt(CLOCK_REALTIME, &now));
if (flags & TIMER_ABSTIME) {
if (timespec_cmp(now, *req) >= 0) {
return 0;
}
if ((rc = CheckCancel())) {
return rc;
}
} else {
if (timespec_cmp(now, start) < 0) continue;
elapsed = timespec_sub(now, start);
if ((rc = CheckCancel())) {
if (rc == EINTR && rem) {
if (timespec_cmp(elapsed, *req) >= 0) {
bzero(rem, sizeof(*rem));
} else {
*rem = elapsed;
}
}
return rc;
}
if (timespec_cmp(elapsed, *req) >= 0) {
return 0;
}
}
}
}
static bool ShouldUseSpinNanosleep(int clock, int flags,
const struct timespec *req) {
errno_t e;
struct timespec now;
if (IsWindows()) {
// Our spin technique here is intended to take advantage of the fact
// that sched_yield() takes about a hundred nanoseconds. But Windows
// SleepEx(0, 0) a.k.a. NtYieldExecution() takes a whole millisecond
// and it matters not whether our intent is to yielding or sleeping,
// since we use the SleepEx() function to implement both. Therefore,
// there's no reason to use SpinNanosleep() on Windows.
return false;
}
if (clock != CLOCK_REALTIME && //
clock != CLOCK_REALTIME_PRECISE && //
clock != CLOCK_MONOTONIC && //
clock != CLOCK_MONOTONIC_RAW && //
clock != CLOCK_MONOTONIC_PRECISE) {
return false;
}
if (!flags) {
return timespec_cmp(*req, GetNanosleepLatency()) < 0;
}
// We need a clock_gettime() system call to perform this check if the
// sleep request is an absolute timestamp. So we avoid doing that on
// systems where sleep latency isn't too outrageous.
if (timespec_cmp(GetNanosleepLatency(), timespec_fromnanos(50 * 1000)) < 0) {
return false;
}
e = errno;
if (__clock_gettime_get(0)(clock, &now)) {
// punt to the nanosleep system call
errno = e;
return false;
}
return timespec_cmp(*req, now) < 0 ||
timespec_cmp(timespec_sub(*req, now), GetNanosleepLatency()) < 0;
}
/**
* Sleeps for particular amount of time.
*
@ -88,36 +242,22 @@
*/
errno_t clock_nanosleep(int clock, int flags, const struct timespec *req,
struct timespec *rem) {
int rc, e = errno;
BEGIN_CANCELLATION_POINT;
if (!req || (IsAsan() && (!__asan_is_valid_timespec(req) ||
(rem && !__asan_is_valid_timespec(rem))))) {
rc = efault();
int rc;
if (IsMetal()) {
rc = ENOSYS;
} else if (!req || (IsAsan() && (!__asan_is_valid_timespec(req) ||
(rem && !__asan_is_valid_timespec(rem))))) {
rc = EFAULT;
} else if (clock == 127 || //
(flags & ~TIMER_ABSTIME) || //
req->tv_sec < 0 || //
!(0 <= req->tv_nsec && req->tv_nsec <= 999999999)) {
rc = einval();
} else if (IsLinux() || IsFreebsd() || IsNetbsd()) {
rc = sys_clock_nanosleep(clock, flags, req, rem);
} else if (IsXnu()) {
rc = sys_clock_nanosleep_xnu(clock, flags, req, rem);
} else if (IsOpenbsd()) {
rc = sys_clock_nanosleep_openbsd(clock, flags, req, rem);
} else if (IsMetal()) {
rc = enosys();
rc = EINVAL;
} else if (ShouldUseSpinNanosleep(clock, flags, req)) {
rc = SpinNanosleep(clock, flags, req, rem);
} else {
rc = sys_clock_nanosleep_nt(clock, flags, req, rem);
rc = sys_clock_nanosleep(clock, flags, req, rem);
}
if (rc == -1) {
rc = errno;
errno = e;
}
END_CANCELLATION_POINT;
#if SYSDEBUG
if (__tls_enabled && !(__get_tls()->tib_flags & TIB_FLAG_TIME_CRITICAL)) {
STRACE("clock_nanosleep(%s, %s, %s, [%s]) → %s", DescribeClockName(clock),
@ -125,6 +265,5 @@ errno_t clock_nanosleep(int clock, int flags, const struct timespec *req,
DescribeTimespec(rc, rem), DescribeErrnoResult(rc));
}
#endif
return rc;
}

View file

@ -0,0 +1,25 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/calls/blockcancel.internal.h"
#include "libc/calls/calls.h"
#include "libc/calls/clock_gettime.internal.h"
#include "libc/errno.h"
#include "libc/macros.internal.h"
#include "libc/sysv/consts/clock.h"

View file

@ -45,10 +45,10 @@ static long double GetTimeSample(void) {
uint64_t tick1, tick2;
long double time1, time2;
sched_yield();
time1 = dtime(CLOCK_REALTIME_FAST);
time1 = dtime(CLOCK_REALTIME_PRECISE);
tick1 = rdtsc();
nanosleep(&(struct timespec){0, 1000000}, NULL);
time2 = dtime(CLOCK_REALTIME_FAST);
time2 = dtime(CLOCK_REALTIME_PRECISE);
tick2 = rdtsc();
return (time2 - time1) * 1e9 / MAX(1, tick2 - tick1);
}
@ -74,13 +74,13 @@ static long double MeasureNanosPerCycle(void) {
void RefreshTime(void) {
struct Now now;
now.cpn = MeasureNanosPerCycle();
now.r0 = dtime(CLOCK_REALTIME_FAST);
now.r0 = dtime(CLOCK_REALTIME_PRECISE);
now.k0 = rdtsc();
memcpy(&g_now, &now, sizeof(now));
}
static long double nowl_sys(void) {
return dtime(CLOCK_REALTIME_FAST);
return dtime(CLOCK_REALTIME_PRECISE);
}
static long double nowl_art(void) {
@ -92,7 +92,7 @@ static long double nowl_vdso(void) {
long double secs;
struct timespec tv;
_unassert(__gettime);
__gettime(CLOCK_REALTIME_FAST, &tv);
__gettime(CLOCK_REALTIME_PRECISE, &tv);
secs = tv.tv_nsec;
secs *= 1 / 1e9L;
secs += tv.tv_sec;

View file

@ -6,13 +6,13 @@
COSMOPOLITAN_C_START_
/* clang-format off */
int __sys_clock_nanosleep(int, int, const struct timespec *, struct timespec *) hidden;
int __sys_utimensat(int, const char *, const struct timespec[2], int) hidden;
int __utimens(int, const char *, const struct timespec[2], int) hidden;
int sys_clock_getres(int, struct timespec *) hidden;
int sys_clock_gettime(int, struct timespec *) hidden;
int sys_clock_gettime_nt(int, struct timespec *) hidden;
int sys_clock_gettime_xnu(int, struct timespec *) hidden;
int sys_clock_nanosleep(int, int, const struct timespec *, struct timespec *) hidden;
int sys_clock_nanosleep_nt(int, int, const struct timespec *, struct timespec *) hidden;
int sys_clock_nanosleep_openbsd(int, int, const struct timespec *, struct timespec *) hidden;
int sys_clock_nanosleep_xnu(int, int, const struct timespec *, struct timespec *) hidden;

View file

@ -29,6 +29,6 @@
*/
struct timespec timespec_mono(void) {
struct timespec ts;
_npassert(!clock_gettime(CLOCK_MONOTONIC_FAST, &ts));
_npassert(!clock_gettime(CLOCK_MONOTONIC, &ts));
return ts;
}

View file

@ -31,6 +31,6 @@
*/
struct timespec timespec_real(void) {
struct timespec ts;
_npassert(!clock_gettime(CLOCK_REALTIME_FAST, &ts));
_npassert(!clock_gettime(CLOCK_REALTIME, &ts));
return ts;
}