mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-05-28 00:02:28 +00:00
Make mmap() scalable
It's now possible to create thousands of thousands of sparse independent memory mappings, without any slowdown. The memory manager is better with tracking memory protection now, particularly on Windows in a precise way that can be restored during fork(). You now have the highest quality mem manager possible. It's even better than some OSes like XNU, where mmap() is implemented as an O(n) operation which means sadly things aren't much improved over there. With this change the llamafile HTTP server endpoint at /tokenize with a prompt of 50 tokens is now able to handle 2.6m r/sec
This commit is contained in:
parent
3756870635
commit
8c645fa1ee
59 changed files with 1238 additions and 1067 deletions
|
@ -22,6 +22,7 @@
|
|||
#include "libc/calls/state.internal.h"
|
||||
#include "libc/calls/struct/sigset.h"
|
||||
#include "libc/calls/struct/sigset.internal.h"
|
||||
#include "libc/calls/struct/timespec.h"
|
||||
#include "libc/calls/syscall-nt.internal.h"
|
||||
#include "libc/calls/syscall-sysv.internal.h"
|
||||
#include "libc/dce.h"
|
||||
|
@ -45,7 +46,6 @@
|
|||
#include "libc/thread/posixthread.internal.h"
|
||||
#include "libc/thread/tls.h"
|
||||
|
||||
extern pthread_mutex_t nsync_waiters_mu;
|
||||
extern pthread_mutex_t _pthread_lock_obj;
|
||||
|
||||
static void _onfork_prepare(void) {
|
||||
|
@ -54,11 +54,10 @@ static void _onfork_prepare(void) {
|
|||
_pthread_lock();
|
||||
__maps_lock();
|
||||
__fds_lock();
|
||||
pthread_mutex_lock(&nsync_waiters_mu);
|
||||
LOCKTRACE("READY TO ROCK AND ROLL");
|
||||
}
|
||||
|
||||
static void _onfork_parent(void) {
|
||||
pthread_mutex_unlock(&nsync_waiters_mu);
|
||||
__fds_unlock();
|
||||
__maps_unlock();
|
||||
_pthread_unlock();
|
||||
|
@ -68,7 +67,6 @@ static void _onfork_parent(void) {
|
|||
|
||||
static void _onfork_child(void) {
|
||||
__fds_lock_obj = (pthread_mutex_t)PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
|
||||
nsync_waiters_mu = (pthread_mutex_t)PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
|
||||
_pthread_lock_obj = (pthread_mutex_t)PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
|
||||
atomic_store_explicit(&__maps.lock, 0, memory_order_relaxed);
|
||||
atomic_store_explicit(&__get_tls()->tib_relock_maps, 0, memory_order_relaxed);
|
||||
|
@ -77,7 +75,9 @@ static void _onfork_child(void) {
|
|||
}
|
||||
|
||||
int _fork(uint32_t dwCreationFlags) {
|
||||
long micros;
|
||||
struct Dll *e;
|
||||
struct timespec started;
|
||||
int ax, dx, tid, parent;
|
||||
parent = __pid;
|
||||
BLOCK_SIGNALS;
|
||||
|
@ -85,11 +85,13 @@ int _fork(uint32_t dwCreationFlags) {
|
|||
__proc_lock();
|
||||
if (__threaded)
|
||||
_onfork_prepare();
|
||||
started = timespec_real();
|
||||
if (!IsWindows()) {
|
||||
ax = sys_fork();
|
||||
} else {
|
||||
ax = sys_fork_nt(dwCreationFlags);
|
||||
}
|
||||
micros = timespec_tomicros(timespec_sub(timespec_real(), started));
|
||||
if (!ax) {
|
||||
|
||||
// get new process id
|
||||
|
@ -136,15 +138,14 @@ int _fork(uint32_t dwCreationFlags) {
|
|||
// run user fork callbacks
|
||||
if (__threaded)
|
||||
_onfork_child();
|
||||
STRACE("fork() → 0 (child of %d)", parent);
|
||||
STRACE("fork() → 0 (child of %d; took %ld us)", parent, micros);
|
||||
} else {
|
||||
// this is the parent process
|
||||
if (__threaded) {
|
||||
if (__threaded)
|
||||
_onfork_parent();
|
||||
}
|
||||
if (IsWindows())
|
||||
__proc_unlock();
|
||||
STRACE("fork() → %d% m", ax);
|
||||
STRACE("fork() → %d% m (took %ld us)", ax, micros);
|
||||
}
|
||||
ALLOW_SIGNALS;
|
||||
return ax;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue