Make fixes and improvements

- clock_nanosleep() is now much faster on OpenBSD and NetBSD
- Thread joining is now much faster on NetBSD
- FreeBSD timestamps are now more accurate
- Thread spawning now goes faster on XNU
- Clean up the clone() code
This commit is contained in:
Justine Tunney 2022-11-08 10:09:47 -08:00
parent aee50b1327
commit b407327972
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
47 changed files with 645 additions and 306 deletions

View file

@ -16,7 +16,11 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/calls/asan.internal.h"
#include "libc/calls/blockcancel.internal.h"
#include "libc/calls/calls.h"
#include "libc/calls/clock_gettime.internal.h"
#include "libc/calls/cp.internal.h"
#include "libc/calls/state.internal.h"
#include "libc/calls/struct/timespec.h"
@ -27,11 +31,161 @@
#include "libc/errno.h"
#include "libc/intrin/describeflags.internal.h"
#include "libc/intrin/strace.internal.h"
#include "libc/intrin/weaken.h"
#include "libc/macros.internal.h"
#include "libc/nt/ntdll.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/clock.h"
#include "libc/sysv/consts/timer.h"
#include "libc/sysv/errfuns.h"
#include "libc/thread/thread.h"
#include "libc/thread/tls.h"
static int64_t g_nanosleep_latency;
static errno_t sys_clock_nanosleep(int clock, int flags,
const struct timespec *req,
struct timespec *rem) {
int e, rc;
BEGIN_CANCELLATION_POINT;
e = errno;
STRACE("clock_nanosleep(%s, %s, %s, %s) → ...", DescribeClockName(clock),
DescribeSleepFlags(flags), DescribeTimespec(0, req),
DescribeTimespec(0, rem));
if (IsLinux() || IsFreebsd() || IsNetbsd()) {
rc = __sys_clock_nanosleep(clock, flags, req, rem);
} else if (IsXnu()) {
rc = sys_clock_nanosleep_xnu(clock, flags, req, rem);
} else if (IsOpenbsd()) {
rc = sys_clock_nanosleep_openbsd(clock, flags, req, rem);
} else {
rc = sys_clock_nanosleep_nt(clock, flags, req, rem);
}
if (rc == -1) {
rc = errno;
errno = e;
}
END_CANCELLATION_POINT;
return rc;
}
// determine sched_yield() vs. clock_nanosleep() threshold
// 1ns sys_clock_nanosleep() on Windows takes milliseconds :'(
// 1ns sys_clock_nanosleep() on Linux/FreeBSD takes tens of microseconds
// 1ns sys_clock_nanosleep() on OpenBSD/NetBSD takes tens of milliseconds D:
static struct timespec GetNanosleepLatency(void) {
errno_t rc;
int64_t nanos;
clock_gettime_f *cgt;
struct timespec x, y, w = {0, 1};
if (!(nanos = g_nanosleep_latency)) {
BLOCK_CANCELLATIONS;
for (cgt = __clock_gettime_get(0);;) {
_npassert(!cgt(CLOCK_REALTIME_PRECISE, &x));
rc = sys_clock_nanosleep(CLOCK_REALTIME, 0, &w, 0);
_npassert(!rc || rc == EINTR);
if (!rc) {
_npassert(!cgt(CLOCK_REALTIME_PRECISE, &y));
nanos = timespec_tonanos(timespec_sub(y, x));
g_nanosleep_latency = nanos;
break;
}
}
ALLOW_CANCELLATIONS;
}
return timespec_fromnanos(nanos);
}
static errno_t CheckCancel(void) {
if (_weaken(pthread_testcancel_np)) {
return _weaken(pthread_testcancel_np)();
} else {
return 0;
}
}
static errno_t SpinNanosleep(int clock, int flags, const struct timespec *req,
struct timespec *rem) {
errno_t rc;
clock_gettime_f *cgt;
struct timespec now, start, elapsed;
if ((rc = CheckCancel())) {
if (rc == EINTR && !flags && rem) {
*rem = *req;
}
return rc;
}
cgt = __clock_gettime_get(0);
_npassert(!cgt(CLOCK_REALTIME, &start));
for (;;) {
sched_yield();
_npassert(!cgt(CLOCK_REALTIME, &now));
if (flags & TIMER_ABSTIME) {
if (timespec_cmp(now, *req) >= 0) {
return 0;
}
if ((rc = CheckCancel())) {
return rc;
}
} else {
if (timespec_cmp(now, start) < 0) continue;
elapsed = timespec_sub(now, start);
if ((rc = CheckCancel())) {
if (rc == EINTR && rem) {
if (timespec_cmp(elapsed, *req) >= 0) {
bzero(rem, sizeof(*rem));
} else {
*rem = elapsed;
}
}
return rc;
}
if (timespec_cmp(elapsed, *req) >= 0) {
return 0;
}
}
}
}
static bool ShouldUseSpinNanosleep(int clock, int flags,
const struct timespec *req) {
errno_t e;
struct timespec now;
if (IsWindows()) {
// Our spin technique here is intended to take advantage of the fact
// that sched_yield() takes about a hundred nanoseconds. But Windows
// SleepEx(0, 0) a.k.a. NtYieldExecution() takes a whole millisecond
// and it matters not whether our intent is to yielding or sleeping,
// since we use the SleepEx() function to implement both. Therefore,
// there's no reason to use SpinNanosleep() on Windows.
return false;
}
if (clock != CLOCK_REALTIME && //
clock != CLOCK_REALTIME_PRECISE && //
clock != CLOCK_MONOTONIC && //
clock != CLOCK_MONOTONIC_RAW && //
clock != CLOCK_MONOTONIC_PRECISE) {
return false;
}
if (!flags) {
return timespec_cmp(*req, GetNanosleepLatency()) < 0;
}
// We need a clock_gettime() system call to perform this check if the
// sleep request is an absolute timestamp. So we avoid doing that on
// systems where sleep latency isn't too outrageous.
if (timespec_cmp(GetNanosleepLatency(), timespec_fromnanos(50 * 1000)) < 0) {
return false;
}
e = errno;
if (__clock_gettime_get(0)(clock, &now)) {
// punt to the nanosleep system call
errno = e;
return false;
}
return timespec_cmp(*req, now) < 0 ||
timespec_cmp(timespec_sub(*req, now), GetNanosleepLatency()) < 0;
}
/**
* Sleeps for particular amount of time.
*
@ -88,36 +242,22 @@
*/
errno_t clock_nanosleep(int clock, int flags, const struct timespec *req,
struct timespec *rem) {
int rc, e = errno;
BEGIN_CANCELLATION_POINT;
if (!req || (IsAsan() && (!__asan_is_valid_timespec(req) ||
(rem && !__asan_is_valid_timespec(rem))))) {
rc = efault();
int rc;
if (IsMetal()) {
rc = ENOSYS;
} else if (!req || (IsAsan() && (!__asan_is_valid_timespec(req) ||
(rem && !__asan_is_valid_timespec(rem))))) {
rc = EFAULT;
} else if (clock == 127 || //
(flags & ~TIMER_ABSTIME) || //
req->tv_sec < 0 || //
!(0 <= req->tv_nsec && req->tv_nsec <= 999999999)) {
rc = einval();
} else if (IsLinux() || IsFreebsd() || IsNetbsd()) {
rc = sys_clock_nanosleep(clock, flags, req, rem);
} else if (IsXnu()) {
rc = sys_clock_nanosleep_xnu(clock, flags, req, rem);
} else if (IsOpenbsd()) {
rc = sys_clock_nanosleep_openbsd(clock, flags, req, rem);
} else if (IsMetal()) {
rc = enosys();
rc = EINVAL;
} else if (ShouldUseSpinNanosleep(clock, flags, req)) {
rc = SpinNanosleep(clock, flags, req, rem);
} else {
rc = sys_clock_nanosleep_nt(clock, flags, req, rem);
rc = sys_clock_nanosleep(clock, flags, req, rem);
}
if (rc == -1) {
rc = errno;
errno = e;
}
END_CANCELLATION_POINT;
#if SYSDEBUG
if (__tls_enabled && !(__get_tls()->tib_flags & TIB_FLAG_TIME_CRITICAL)) {
STRACE("clock_nanosleep(%s, %s, %s, [%s]) → %s", DescribeClockName(clock),
@ -125,6 +265,5 @@ errno_t clock_nanosleep(int clock, int flags, const struct timespec *req,
DescribeTimespec(rc, rem), DescribeErrnoResult(rc));
}
#endif
return rc;
}