Make malloc scalable on all platforms

It turns out sched_getcpu() didn't work on many platforms. So the system
call now has tests and is well documented. We now employ new workarounds
on platforms where it isn't supported in our malloc() implementation. It
was previously the case that malloc() was only scalable on Linux/Windows
for x86-64. Now the other platforms are scalable too.
This commit is contained in:
Justine Tunney 2024-08-15 21:32:30 -07:00
parent 3fd275f59f
commit 0a79c6961f
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
9 changed files with 459 additions and 99 deletions

View file

@ -23,32 +23,82 @@
#include "libc/nexgen32e/x86feature.h"
#include "libc/nt/struct/processornumber.h"
#include "libc/nt/synchronization.h"
#include "libc/runtime/syslib.internal.h"
#include "libc/sysv/errfuns.h"
int sys_getcpu(unsigned *opt_cpu, unsigned *opt_node, void *tcache);
/**
* Returns ID of CPU on which thread is currently scheduled.
*
* This function is supported on the following platforms:
*
* - x86-64
*
* - Linux: rdtsc
* - FreeBSD: rdtsc
* - Windows: win32
* - OpenBSD: unsupported
* - NetBSD: unsupported
* - MacOS: unsupported
*
* - aarch64
*
* - Linux: syscall
* - FreeBSD: syscall
* - MacOS: supported
*
* @return cpu number on success, or -1 w/ errno
*/
int sched_getcpu(void) {
if (X86_HAVE(RDTSCP)) {
unsigned tsc_aux;
rdtscp(&tsc_aux);
return TSC_AUX_CORE(tsc_aux);
} else if (IsAarch64()) {
long tpidr_el0;
asm("mrs\t%0,tpidr_el0" : "=r"(tpidr_el0));
return tpidr_el0 & 255;
} else if (IsWindows()) {
if (IsWindows()) {
struct NtProcessorNumber pn;
GetCurrentProcessorNumberEx(&pn);
return 64 * pn.Group + pn.Number;
} else {
unsigned cpu = 0;
int rc = sys_getcpu(&cpu, 0, 0);
if (rc == -1)
return -1;
return cpu;
}
#ifdef __x86_64__
if (X86_HAVE(RDTSCP) && (IsLinux() || IsFreebsd())) {
// Only the Linux, FreeBSD, and Windows kernels can be counted upon
// to populate the TSC_AUX register with the current thread number.
unsigned tsc_aux;
rdtscp(&tsc_aux);
return TSC_AUX_CORE(tsc_aux);
}
#endif
#ifdef __aarch64__
if (IsXnu()) {
// pthread_cpu_number_np() is defined by MacOS 11.0+ (Big Sur) in
// the SDK pthread.h header file, even though there's no man page
if (__syslib && __syslib->__version >= 9) {
errno_t err;
size_t out = 0;
if ((err = __syslib->__pthread_cpu_number_np(&out))) {
errno = err;
return -1;
}
return out;
} else {
errno = ENOSYS; // upgrade your ape loader
return -1; // cc -o /usr/local/bin/ape ape/ape-m1.c
}
}
#endif
#ifdef __aarch64__
if (IsFreebsd()) {
register int x0 asm("x0");
register int x8 asm("x8") = 581; // sched_getcpu
asm volatile("svc\t0" : "=r"(x0) : "r"(x8) : "memory");
return x0;
}
#endif
unsigned cpu = 0;
int rc = sys_getcpu(&cpu, 0, 0);
if (rc == -1)
return -1;
return cpu;
}