Improve Windows sleep accuracy from 15ms to 15µs

This commit is contained in:
Justine Tunney 2024-12-06 23:00:07 -08:00
parent b40140e6c5
commit b490e23d63
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
16 changed files with 189 additions and 67 deletions

View file

@ -16,6 +16,7 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/atomic.h"
#include "libc/calls/internal.h"
#include "libc/calls/struct/sigset.internal.h"
#include "libc/calls/struct/timespec.h"
@ -23,26 +24,37 @@
#include "libc/calls/syscall-sysv.internal.h"
#include "libc/errno.h"
#include "libc/intrin/atomic.h"
#include "libc/nt/enum/status.h"
#include "libc/nt/ntdll.h"
#include "libc/stdio/sysparam.h"
#include "libc/sysv/consts/clock.h"
#include "libc/sysv/consts/timer.h"
#include "libc/thread/tls.h"
#ifdef __x86_64__
static atomic_int usingRes;
static atomic_bool changedRes;
static textwindows int sys_clock_nanosleep_nt_impl(int clock,
struct timespec abs,
sigset_t waitmask) {
uint32_t msdelay;
struct timespec now;
for (;;) {
if (sys_clock_gettime_nt(clock, &now))
return -1;
if (timespec_cmp(now, abs) >= 0)
return 0;
msdelay = timespec_tomillis(timespec_sub(abs, now));
msdelay = MIN(msdelay, -1u);
if (_park_norestart(msdelay, waitmask) == -1)
return -1;
}
struct timespec now, wall;
uint32_t minRes, maxRes, oldRes;
sys_clock_gettime_nt(0, &wall);
if (sys_clock_gettime_nt(clock, &now))
return -1;
bool wantRes = clock == CLOCK_REALTIME || //
clock == CLOCK_MONOTONIC || //
clock == CLOCK_BOOTTIME;
if (wantRes && !atomic_fetch_add(&usingRes, 1))
changedRes = NtSuccess(NtQueryTimerResolution(&minRes, &maxRes, &oldRes)) &&
NtSuccess(NtSetTimerResolution(maxRes, true, &oldRes));
if (timespec_cmp(abs, now) > 0)
wall = timespec_add(wall, timespec_sub(abs, now));
int rc = _park_norestart(wall, waitmask);
if (wantRes && atomic_fetch_sub(&usingRes, 1) == 1 && changedRes)
NtSetTimerResolution(0, false, &minRes);
return rc;
}
textwindows int sys_clock_nanosleep_nt(int clock, int flags,

View file

@ -57,6 +57,7 @@
*
* @param clock may be
* - `CLOCK_REALTIME`
* - `CLOCK_BOOTTIME`
* - `CLOCK_MONOTONIC`
* - `CLOCK_REALTIME_COARSE` but is likely to sleep negative time
* - `CLOCK_MONTONIC_COARSE` but is likely to sleep negative time

View file

@ -3,6 +3,7 @@
#include "libc/atomic.h"
#include "libc/calls/struct/sigset.h"
#include "libc/calls/struct/sigval.h"
#include "libc/calls/struct/timespec.h"
#include "libc/dce.h"
#include "libc/intrin/fds.h"
#include "libc/macros.h"
@ -46,8 +47,8 @@ int _check_signal(bool);
int _check_cancel(void);
bool _is_canceled(void);
int sys_close_nt(int, int);
int _park_norestart(uint32_t, uint64_t);
int _park_restartable(uint32_t, uint64_t);
int _park_norestart(struct timespec, uint64_t);
int _park_restartable(struct timespec, uint64_t);
int sys_openat_metal(int, const char *, int, unsigned);
#ifdef __x86_64__

View file

@ -19,65 +19,96 @@
#include "libc/calls/internal.h"
#include "libc/calls/sig.internal.h"
#include "libc/calls/struct/sigset.h"
#include "libc/calls/struct/timespec.h"
#include "libc/calls/syscall_support-nt.internal.h"
#include "libc/fmt/wintime.internal.h"
#include "libc/intrin/atomic.h"
#include "libc/intrin/weaken.h"
#include "libc/nt/enum/wait.h"
#include "libc/nt/events.h"
#include "libc/nt/runtime.h"
#include "libc/nt/synchronization.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/sicode.h"
#include "libc/sysv/errfuns.h"
#include "libc/thread/posixthread.internal.h"
#ifdef __x86_64__
// returns 0 on timeout or spurious wakeup
// returns 0 if deadline is reached
// raises EINTR if a signal delivery interrupted wait operation
// raises ECANCELED if this POSIX thread was canceled in masked mode
textwindows static int _park_thread(uint32_t msdelay, sigset_t waitmask,
textwindows static int _park_thread(struct timespec deadline, sigset_t waitmask,
bool restartable) {
struct PosixThread *pt = _pthread_self();
for (;;) {
uint32_t handl = 0;
intptr_t hands[2];
// perform the wait operation
intptr_t sigev;
if (!(sigev = CreateEvent(0, 0, 0, 0)))
return __winerr();
pt->pt_event = sigev;
pt->pt_blkmask = waitmask;
atomic_store_explicit(&pt->pt_blocker, PT_BLOCKER_EVENT,
memory_order_release);
//!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!//
int sig = 0;
uint32_t ws = 0;
if (!_is_canceled() &&
!(_weaken(__sig_get) && (sig = _weaken(__sig_get)(waitmask))))
ws = WaitForSingleObject(sigev, msdelay);
//!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!//
atomic_store_explicit(&pt->pt_blocker, 0, memory_order_release);
CloseHandle(sigev);
// create event object
intptr_t sigev;
if (!(sigev = CreateEvent(0, 0, 0, 0)))
return __winerr();
hands[handl++] = sigev;
// recursion is now safe
if (ws == -1u)
return __winerr();
int handler_was_called = 0;
if (sig)
handler_was_called = _weaken(__sig_relay)(sig, SI_KERNEL, waitmask);
if (_check_cancel())
return -1;
if (handler_was_called & SIG_HANDLED_NO_RESTART)
return eintr();
if (handler_was_called & SIG_HANDLED_SA_RESTART)
if (!restartable)
// create high precision timer if needed
if (memcmp(&deadline, &timespec_max, sizeof(struct timespec))) {
intptr_t hTimer;
if ((hTimer = CreateWaitableTimer(NULL, true, NULL))) {
int64_t due = TimeSpecToWindowsTime(deadline);
if (SetWaitableTimer(hTimer, &due, 0, NULL, NULL, false)) {
hands[handl++] = hTimer;
} else {
CloseHandle(hTimer);
}
}
}
// perform wait operation
struct PosixThread *pt = _pthread_self();
pt->pt_event = sigev;
pt->pt_blkmask = waitmask;
atomic_store_explicit(&pt->pt_blocker, PT_BLOCKER_EVENT,
memory_order_release);
//!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!//
int sig = 0;
uint32_t wi = 0;
if (!_is_canceled() &&
!(_weaken(__sig_get) && (sig = _weaken(__sig_get)(waitmask))))
wi = WaitForMultipleObjects(handl, hands, false, -1u);
//!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!/!//
atomic_store_explicit(&pt->pt_blocker, 0, memory_order_release);
for (int i = 0; i < handl; ++i)
CloseHandle(hands[i]);
// recursion is now safe
if (wi == 1)
return 0;
if (wi == -1u)
return __winerr();
int handler_was_called = 0;
if (!sig) {
if (_check_cancel())
return -1;
if (_weaken(__sig_get))
sig = _weaken(__sig_get)(waitmask);
}
if (sig)
handler_was_called = _weaken(__sig_relay)(sig, SI_KERNEL, waitmask);
if (_check_cancel())
return -1;
if (handler_was_called & SIG_HANDLED_NO_RESTART)
return eintr();
return 0;
if (handler_was_called & SIG_HANDLED_SA_RESTART)
if (!restartable)
return eintr();
}
}
textwindows int _park_norestart(uint32_t msdelay, sigset_t waitmask) {
return _park_thread(msdelay, waitmask, false);
textwindows int _park_norestart(struct timespec deadline, sigset_t waitmask) {
return _park_thread(deadline, waitmask, false);
}
textwindows int _park_restartable(uint32_t msdelay, sigset_t waitmask) {
return _park_thread(msdelay, waitmask, true);
textwindows int _park_restartable(struct timespec deadline, sigset_t waitmask) {
return _park_thread(deadline, waitmask, true);
}
#endif /* __x86_64__ */

View file

@ -18,21 +18,20 @@
*/
#include "libc/calls/internal.h"
#include "libc/calls/struct/sigset.internal.h"
#include "libc/calls/struct/timespec.h"
#include "libc/calls/syscall_support-nt.internal.h"
#ifdef __x86_64__
textwindows int sys_pause_nt(void) {
int rc;
// we don't strictly need to block signals, but it reduces signal
// delivery latency, by preventing other threads from delivering a
// signal asynchronously. it takes about ~5us to deliver a signal
// using SetEvent() whereas it takes ~30us to use SuspendThread(),
// GetThreadContext(), SetThreadContext(), and ResumeThread().
BLOCK_SIGNALS;
while (!(rc = _park_norestart(-1u, 0)))
donothing;
_park_norestart(timespec_max, 0);
ALLOW_SIGNALS;
return rc;
return -1;
}
#endif /* __x86_64__ */

View file

@ -318,8 +318,8 @@ textwindows static int sys_poll_nt_actual(struct pollfd *fds, uint64_t nfds,
textwindows static int sys_poll_nt_impl(struct pollfd *fds, uint64_t nfds,
struct timespec deadline,
const sigset_t waitmask) {
uint32_t waitms;
int i, n, rc, got = 0;
struct timespec now, next, target;
// we normally don't check for signals until we decide to wait, since
// it's nice to have functions like write() be unlikely to EINTR, but
@ -344,9 +344,16 @@ textwindows static int sys_poll_nt_impl(struct pollfd *fds, uint64_t nfds,
}
if (got)
return got;
if (!(waitms = sys_poll_nt_waitms(deadline)))
now = sys_clock_gettime_monotonic_nt();
if (timespec_cmp(now, deadline) >= 0)
return 0;
if (_park_norestart(waitms, waitmask) == -1)
next = timespec_add(now, timespec_frommillis(POLL_INTERVAL_MS));
if (timespec_cmp(next, deadline) >= 0) {
target = deadline;
} else {
target = next;
}
if (_park_norestart(target, waitmask) == -1)
return -1;
}
}

View file

@ -21,6 +21,7 @@
#include "libc/calls/sig.internal.h"
#include "libc/calls/struct/sigset.h"
#include "libc/calls/struct/sigset.internal.h"
#include "libc/calls/struct/timespec.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/intrin/atomic.h"
@ -59,8 +60,7 @@ int sigsuspend(const sigset_t *ignore) {
// using SetEvent() whereas it takes ~30us to use SuspendThread(),
// GetThreadContext(), SetThreadContext(), and ResumeThread().
BLOCK_SIGNALS;
while (!(rc = _park_norestart(-1u, waitmask)))
donothing;
rc = _park_norestart(timespec_max, waitmask);
ALLOW_SIGNALS;
} else {
rc = sys_sigsuspend((uint64_t[2]){waitmask}, 8);

View file

@ -26,7 +26,7 @@ long __get_minsigstksz(void) {
struct AuxiliaryValue x;
x = __getauxval(AT_MINSIGSTKSZ);
if (x.isfound) {
return MAX(_MINSIGSTKSZ, x.value);
return MAX(_MINSIGSTKSZ - 1024, x.value) + 1024;
} else {
return _MINSIGSTKSZ;
}

View file

@ -17,7 +17,14 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/fmt/wintime.internal.h"
#include "libc/limits.h"
#include "libc/stdckdint.h"
int64_t TimeSpecToWindowsTime(struct timespec t) {
return t.tv_nsec / 100 + (t.tv_sec + MODERNITYSECONDS) * HECTONANOSECONDS;
int64_t TimeSpecToWindowsTime(struct timespec time) {
int64_t wt;
if (ckd_add(&wt, time.tv_sec, MODERNITYSECONDS) ||
ckd_mul(&wt, wt, HECTONANOSECONDS) ||
ckd_add(&wt, wt, time.tv_nsec / 100))
wt = INT64_MAX;
return wt;
}

View file

@ -751,6 +751,7 @@ imp 'NtQuerySecurityObject' NtQuerySecurityObject ntdll 5
imp 'NtQuerySymbolicLinkObject' NtQuerySymbolicLinkObject ntdll 3
imp 'NtQuerySystemInformation' NtQuerySystemInformation ntdll 4
imp 'NtQuerySystemTime' NtQuerySystemTime ntdll 1
imp 'NtQueryTimerResolution' NtQueryTimerResolution ntdll 3
imp 'NtQueryValueKey' NtQueryValueKey ntdll 6
imp 'NtQueryVirtualMemory' NtQueryVirtualMemory ntdll 6
imp 'NtQueryVolumeInformationFile' NtQueryVolumeInformationFile ntdll 5
@ -767,6 +768,7 @@ imp 'NtSetInformationFile' NtSetInformationFile ntdll 5
imp 'NtSetInformationThread' NtSetInformationThread ntdll 4
imp 'NtSetIntervalProfile' NtSetIntervalProfile ntdll 2
imp 'NtSetTimer' NtSetTimer ntdll 7
imp 'NtSetTimerResolution' NtSetTimerResolution ntdll 3
imp 'NtSetValueKey' NtSetValueKey ntdll 6
imp 'NtSignalAndWaitForSingleObject' NtSignalAndWaitForSingleObject ntdll 4
imp 'NtStartProfile' NtStartProfile ntdll 1

View file

@ -224,6 +224,16 @@ NtStatus RtlUnlockHeap(int64_t heap);
NtStatus RtlGetProcessHeaps(uint32_t count, void **out_Heaps);
NtStatus RtlWalkHeap(int64_t heap, void *out_Info);
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § new technology » beyond the pale » i am the time lorde
*/
NtStatus NtSetTimerResolution(uint32_t DesiredResolution, bool32 SetResolution,
uint32_t *out_CurrentResolution);
NtStatus NtQueryTimerResolution(uint32_t *out_MinimumResolution,
uint32_t *out_MaximumResolution,
uint32_t *out_CurrentResolution);
#if ShouldUseMsabiAttribute()
#include "libc/nt/thunk/ntdll.inc"
#endif /* ShouldUseMsabiAttribute() */

View file

@ -0,0 +1,18 @@
#include "libc/nt/ntdllimport.h"
.ntimp NtQueryTimerResolution,NtQueryTimerResolution
.text.windows
.ftrace1
NtQueryTimerResolution:
.ftrace2
#ifdef __x86_64__
push %rbp
mov %rsp,%rbp
mov __imp_NtQueryTimerResolution(%rip),%rax
jmp __sysv2nt
#elif defined(__aarch64__)
mov x0,#0
ret
#endif
.endfn NtQueryTimerResolution,globl
.previous

View file

@ -0,0 +1,18 @@
#include "libc/nt/ntdllimport.h"
.ntimp NtSetTimerResolution,NtSetTimerResolution
.text.windows
.ftrace1
NtSetTimerResolution:
.ftrace2
#ifdef __x86_64__
push %rbp
mov %rsp,%rbp
mov __imp_NtSetTimerResolution(%rip),%rax
jmp __sysv2nt
#elif defined(__aarch64__)
mov x0,#0
ret
#endif
.endfn NtSetTimerResolution,globl
.previous

View file

@ -17,6 +17,7 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/cosmo.h"
#include "libc/dce.h"
#include "libc/mem/mem.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/rand.h"
@ -177,6 +178,8 @@ TEST(cosmo_args, dquote_plain_old_newline) {
#define CHARSET "abc#'\"$.\\{} \r\n"
TEST(cosmo_args, fuzz) {
if (IsWindows())
return; // not worth it fs too slow
char s[LENGTH + 1] = {0};
for (int i = 0; i < ITERATIONS; ++i) {
for (int j = 0; j < LENGTH; ++j)

View file

@ -28,6 +28,7 @@ TOOL_VIZ_DIRECTDEPS = \
LIBC_MEM \
LIBC_NEXGEN32E \
LIBC_NT_COMDLG32 \
LIBC_NT_NTDLL \
LIBC_NT_GDI32 \
LIBC_NT_KERNEL32 \
LIBC_NT_USER32 \

View file

@ -20,8 +20,17 @@
#include <stdio.h>
#include <time.h>
#include "libc/assert.h"
#include "libc/dce.h"
#include "libc/nt/enum/processcreationflags.h"
#include "libc/nt/enum/status.h"
#include "libc/nt/enum/threadpriority.h"
#include "libc/nt/ntdll.h"
#include "libc/nt/process.h"
#include "libc/nt/runtime.h"
#include "libc/nt/thread.h"
#include "libc/nt/windows.h"
#define MAXIMUM 1e9
#define MAXIMUM 1e8
#define ITERATIONS 10
const char *MyDescribeClockName(int clock) {
@ -29,6 +38,8 @@ const char *MyDescribeClockName(int clock) {
return "CLOCK_REALTIME";
if (clock == CLOCK_MONOTONIC)
return "CLOCK_MONOTONIC";
if (clock == CLOCK_BOOTTIME)
return "CLOCK_BOOTTIME";
if (clock == CLOCK_REALTIME_COARSE)
return "CLOCK_REALTIME_COARSE";
if (clock == CLOCK_MONOTONIC_COARSE)
@ -40,7 +51,7 @@ void TestSleepRelative(int clock) {
printf("\n");
printf("testing: clock_nanosleep(%s) with relative timeout\n",
MyDescribeClockName(clock));
for (long nanos = 1; nanos < (long)MAXIMUM; nanos *= 2) {
for (long nanos = 1; nanos < (long)MAXIMUM; nanos *= 4) {
struct timespec t1, t2, wf;
wf = timespec_fromnanos(nanos);
if (clock_gettime(clock, &t1))
@ -57,7 +68,8 @@ void TestSleepRelative(int clock) {
int main(int argc, char *argv[]) {
TestSleepRelative(CLOCK_REALTIME);
TestSleepRelative(CLOCK_MONOTONIC);
TestSleepRelative(CLOCK_REALTIME_COARSE);
TestSleepRelative(CLOCK_MONOTONIC);
TestSleepRelative(CLOCK_BOOTTIME);
TestSleepRelative(CLOCK_MONOTONIC_COARSE);
}