Make fork() go 30% faster

This change makes fork() go nearly as fast as sys_fork() on UNIX. As for
Windows this change shaves about 4-5ms off fork() + wait() latency. This
is accomplished by using WriteProcessMemory() from the parent process to
setup the address space of a suspended process; it is better than a pipe
This commit is contained in:
Justine Tunney 2025-01-01 04:59:38 -08:00
parent 98c5847727
commit 0b3c81dd4e
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
44 changed files with 769 additions and 649 deletions

View file

@ -17,6 +17,7 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/describeflags.h"
#include "libc/intrin/maps.h"
#include "libc/runtime/memtrack.internal.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/prot.h"
@ -24,12 +25,13 @@
static char DescribeMapType(int flags) {
switch (flags & MAP_TYPE) {
case MAP_FILE:
if (flags & MAP_NOFORK)
return 'i'; // executable image
return '-';
case MAP_PRIVATE:
if (flags & MAP_NOFORK)
return 'P';
else
return 'p';
return 'w'; // windows memory
return 'p';
case MAP_SHARED:
return 's';
default:

View file

@ -19,7 +19,7 @@
#include "libc/thread/posixthread.internal.h"
#include "libc/thread/thread.h"
pthread_mutex_t __dlopen_lock_obj = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t __dlopen_lock_obj = PTHREAD_MUTEX_INITIALIZER;
void __dlopen_lock(void) {
_pthread_mutex_lock(&__dlopen_lock_obj);
@ -28,3 +28,7 @@ void __dlopen_lock(void) {
void __dlopen_unlock(void) {
_pthread_mutex_unlock(&__dlopen_lock_obj);
}
void __dlopen_wipe(void) {
_pthread_mutex_wipe_np(&__dlopen_lock_obj);
}

View file

@ -19,7 +19,7 @@
#include "libc/thread/posixthread.internal.h"
#include "third_party/tz/lock.h"
pthread_mutex_t __localtime_lock_obj = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t __localtime_lock_obj = PTHREAD_MUTEX_INITIALIZER;
void __localtime_lock(void) {
_pthread_mutex_lock(&__localtime_lock_obj);
@ -28,3 +28,7 @@ void __localtime_lock(void) {
void __localtime_unlock(void) {
_pthread_mutex_unlock(&__localtime_lock_obj);
}
void __localtime_wipe(void) {
_pthread_mutex_wipe_np(&__localtime_lock_obj);
}

View file

@ -30,6 +30,7 @@
#include "libc/nexgen32e/rdtsc.h"
#include "libc/runtime/runtime.h"
#include "libc/runtime/stack.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/prot.h"
#include "libc/thread/lock.h"
#include "libc/thread/tls.h"
@ -40,10 +41,6 @@ __static_yoink("_init_maps");
#define ABI privileged optimizespeed
// take great care if you enable this
// especially if you're using --ftrace too
#define DEBUG_MAPS_LOCK 0
struct Maps __maps;
void __maps_add(struct Map *map) {
@ -61,14 +58,18 @@ void __maps_stack(char *stackaddr, int pagesz, int guardsize, size_t stacksize,
__maps.stack.addr = stackaddr + guardsize;
__maps.stack.size = stacksize - guardsize;
__maps.stack.prot = stackprot;
__maps.stack.hand = -1;
__maps.stack.hand = MAPS_SUBREGION;
__maps.stack.flags = MAP_PRIVATE | MAP_ANONYMOUS;
__maps_adder(&__maps.stack, pagesz);
if (guardsize) {
__maps.guard.addr = stackaddr;
__maps.guard.size = guardsize;
__maps.guard.prot = PROT_NONE;
__maps.guard.prot = PROT_NONE | PROT_GUARD;
__maps.guard.hand = stackhand;
__maps.guard.flags = MAP_PRIVATE | MAP_ANONYMOUS;
__maps_adder(&__maps.guard, pagesz);
} else {
__maps.stack.hand = stackhand;
}
}
@ -102,29 +103,14 @@ void __maps_init(void) {
}
// record .text and .data mappings
static struct Map text, data;
text.addr = (char *)__executable_start;
text.size = _etext - __executable_start;
text.prot = PROT_READ | PROT_EXEC;
__maps_track((char *)__executable_start, _etext - __executable_start,
PROT_READ | PROT_EXEC, MAP_NOFORK);
uintptr_t ds = ((uintptr_t)_etext + pagesz - 1) & -pagesz;
if (ds < (uintptr_t)_end) {
data.addr = (char *)ds;
data.size = (uintptr_t)_end - ds;
data.prot = PROT_READ | PROT_WRITE;
__maps_adder(&data, pagesz);
}
__maps_adder(&text, pagesz);
if (ds < (uintptr_t)_end)
__maps_track((char *)ds, (uintptr_t)_end - ds, PROT_READ | PROT_WRITE,
MAP_NOFORK);
}
#if DEBUG_MAPS_LOCK
privileged static void __maps_panic(const char *msg) {
// it's only safe to pass a format string. if we use directives such
// as %s, %t etc. then kprintf() will recursively call __maps_lock()
kprintf(msg);
DebugBreak();
}
#endif
bool __maps_held(void) {
return __tls_enabled && !(__get_tls()->tib_flags & TIB_FLAG_VFORKED) &&
MUTEX_OWNER(
@ -143,7 +129,12 @@ ABI void __maps_lock(void) {
if (tib->tib_flags & TIB_FLAG_VFORKED)
return;
me = atomic_load_explicit(&tib->tib_ptid, memory_order_relaxed);
if (me <= 0)
word = 0;
lock = MUTEX_LOCK(word);
lock = MUTEX_SET_OWNER(lock, me);
if (atomic_compare_exchange_strong_explicit(&__maps.lock.word, &word, lock,
memory_order_acquire,
memory_order_relaxed))
return;
word = atomic_load_explicit(&__maps.lock.word, memory_order_relaxed);
for (;;) {
@ -154,24 +145,13 @@ ABI void __maps_lock(void) {
return;
continue;
}
#if DEBUG_MAPS_LOCK
if (__deadlock_tracked(&__maps.lock) == 1)
__maps_panic("error: maps lock already held\n");
if (__deadlock_check(&__maps.lock, 1))
__maps_panic("error: maps lock is cyclic\n");
#endif
word = 0;
lock = MUTEX_LOCK(word);
lock = MUTEX_SET_OWNER(lock, me);
if (atomic_compare_exchange_weak_explicit(&__maps.lock.word, &word, lock,
memory_order_acquire,
memory_order_relaxed)) {
#if DEBUG_MAPS_LOCK
__deadlock_track(&__maps.lock, 0);
__deadlock_record(&__maps.lock, 0);
#endif
memory_order_relaxed))
return;
}
for (;;) {
word = atomic_load_explicit(&__maps.lock.word, memory_order_relaxed);
if (MUTEX_OWNER(word) == me)
@ -183,7 +163,6 @@ ABI void __maps_lock(void) {
}
ABI void __maps_unlock(void) {
int me;
uint64_t word;
struct CosmoTib *tib;
if (!__tls_enabled)
@ -192,28 +171,16 @@ ABI void __maps_unlock(void) {
return;
if (tib->tib_flags & TIB_FLAG_VFORKED)
return;
me = atomic_load_explicit(&tib->tib_ptid, memory_order_relaxed);
if (me <= 0)
return;
word = atomic_load_explicit(&__maps.lock.word, memory_order_relaxed);
#if DEBUG_MAPS_LOCK
if (__deadlock_tracked(&__maps.lock) == 0)
__maps_panic("error: maps lock not owned by caller\n");
#endif
for (;;) {
if (MUTEX_DEPTH(word)) {
if (MUTEX_DEPTH(word))
if (atomic_compare_exchange_weak_explicit(
&__maps.lock.word, &word, MUTEX_DEC_DEPTH(word),
memory_order_relaxed, memory_order_relaxed))
break;
}
if (atomic_compare_exchange_weak_explicit(&__maps.lock.word, &word, 0,
memory_order_release,
memory_order_relaxed)) {
#if DEBUG_MAPS_LOCK
__deadlock_untrack(&__maps.lock);
#endif
memory_order_relaxed))
break;
}
}
}

View file

@ -5,6 +5,28 @@
#include "libc/runtime/runtime.h"
COSMOPOLITAN_C_START_
/* size of dynamic memory that is used internally by your memory manager */
#define MAPS_SIZE 65536
/* when map->hand is MAPS_RESERVATION it means mmap() is transactionally
reserving address space it is in the process of requesting from win32 */
#define MAPS_RESERVATION -2
/* when map->hand is MAPS_SUBREGION it means that an allocation has been
broken into multiple fragments by mprotect(). the first fragment must
be set to MAPS_VIRTUAL or your CreateFileMapping() handle. your frags
must be perfectly contiguous in memory and should have the same flags */
#define MAPS_SUBREGION -3
/* indicates an allocation was created by VirtualAlloc() and so munmap()
must call VirtualFree() when destroying it. use it on the hand field. */
#define MAPS_VIRTUAL -4
/* if this is used on MAP_PRIVATE memory, then it's assumed to be memory
that win32 allocated, e.g. a CreateThread() stack. if this is used on
MAP_FILE memory, then it's assumed to be part of the executable image */
#define MAP_NOFORK 0x10000000
#define MAP_TREE_CONTAINER(e) TREE_CONTAINER(struct Map, tree, e)
struct Map {
@ -12,9 +34,8 @@ struct Map {
size_t size; /* must be nonzero */
int64_t off; /* ignore for anon */
int flags; /* memory map flag */
char prot; /* memory protects */
short prot; /* memory protects */
bool iscow; /* windows nt only */
bool precious; /* windows nt only */
bool readonlyfile; /* windows nt only */
unsigned visited; /* checks and fork */
intptr_t hand; /* windows nt only */
@ -29,11 +50,17 @@ struct MapLock {
_Atomic(uint64_t) word;
};
struct MapSlab {
struct MapSlab *next;
struct Map maps[(MAPS_SIZE - sizeof(struct MapSlab *)) / sizeof(struct Map)];
};
struct Maps {
uint128_t rand;
struct Tree *maps;
struct MapLock lock;
_Atomic(uintptr_t) freed;
_Atomic(struct MapSlab *) slabs;
size_t count;
size_t pages;
struct Map stack;
@ -76,33 +103,37 @@ forceinline optimizespeed int __maps_search(const void *key,
return (addr > map->addr) - (addr < map->addr);
}
static inline struct Map *__maps_next(struct Map *map) {
dontinstrument static inline struct Map *__maps_next(struct Map *map) {
struct Tree *node;
if ((node = tree_next(&map->tree)))
return MAP_TREE_CONTAINER(node);
return 0;
}
static inline struct Map *__maps_prev(struct Map *map) {
dontinstrument static inline struct Map *__maps_prev(struct Map *map) {
struct Tree *node;
if ((node = tree_prev(&map->tree)))
return MAP_TREE_CONTAINER(node);
return 0;
}
static inline struct Map *__maps_first(void) {
dontinstrument static inline struct Map *__maps_first(void) {
struct Tree *node;
if ((node = tree_first(__maps.maps)))
return MAP_TREE_CONTAINER(node);
return 0;
}
static inline struct Map *__maps_last(void) {
dontinstrument static inline struct Map *__maps_last(void) {
struct Tree *node;
if ((node = tree_last(__maps.maps)))
return MAP_TREE_CONTAINER(node);
return 0;
}
static inline bool __maps_isalloc(struct Map *map) {
return map->hand != MAPS_SUBREGION;
}
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_MAPS_H_ */

View file

@ -19,6 +19,7 @@
#include "libc/calls/calls.h"
#include "libc/calls/internal.h"
#include "libc/calls/syscall-sysv.internal.h"
#include "libc/calls/syscall_support-nt.internal.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/intrin/atomic.h"
@ -32,6 +33,7 @@
#include "libc/intrin/weaken.h"
#include "libc/limits.h"
#include "libc/macros.h"
#include "libc/nt/enum/memflags.h"
#include "libc/nt/memory.h"
#include "libc/nt/runtime.h"
#include "libc/runtime/runtime.h"
@ -44,9 +46,10 @@
#include "libc/sysv/consts/prot.h"
#include "libc/sysv/errfuns.h"
#include "libc/thread/lock.h"
#include "libc/thread/thread.h"
#include "libc/thread/tls.h"
#define MMDEBUG 0
#define MMDEBUG 1
#define MAX_SIZE 0x0ff800000000ul
#define MAP_FIXED_NOREPLACE_linux 0x100000
@ -99,6 +102,31 @@ static bool __maps_overlaps(const char *addr, size_t size) {
return false;
}
// returns true if all fragments of all allocations which overlap
// [addr,addr+size) are completely contained by [addr,addr+size).
textwindows static bool __maps_envelops(const char *addr, size_t size) {
struct Map *map, *next;
size = PGUP(size);
if (!(map = __maps_floor(addr)))
if (!(map = __maps_first()))
return true;
do {
if (MAX(addr, map->addr) >= MIN(addr + size, map->addr + map->size))
break; // didn't overlap mapping
if (!__maps_isalloc(map))
return false; // didn't include first fragment of alloc
if (addr > map->addr)
return false; // excluded leading pages of first fragment
// set map to last fragment in allocation
for (; (next = __maps_next(map)) && !__maps_isalloc(next); map = next)
// fragments within an allocation must be perfectly contiguous
ASSERT(map->addr + map->size == next->addr);
if (addr + size < map->addr + PGUP(map->size))
return false; // excluded trailing pages of allocation
} while ((map = next));
return true;
}
void __maps_check(void) {
#if MMDEBUG
size_t maps = 0;
@ -130,17 +158,17 @@ static int __muntrack(char *addr, size_t size, struct Map **deleted,
size_t ti = 0;
struct Map *map;
struct Map *next;
struct Map *floor;
size = PGUP(size);
floor = __maps_floor(addr);
for (map = floor; map && map->addr <= addr + size; map = next) {
if (!(map = __maps_floor(addr)))
map = __maps_first();
for (; map && map->addr <= addr + size; map = next) {
next = __maps_next(map);
char *map_addr = map->addr;
size_t map_size = map->size;
if (!(MAX(addr, map_addr) < MIN(addr + size, map_addr + PGUP(map_size))))
continue;
if (addr <= map_addr && addr + size >= map_addr + PGUP(map_size)) {
if (map->precious)
if (map->hand == MAPS_RESERVATION)
continue;
// remove mapping completely
tree_remove(&__maps.maps, &map->tree);
@ -149,9 +177,6 @@ static int __muntrack(char *addr, size_t size, struct Map **deleted,
__maps.pages -= (map_size + __pagesize - 1) / __pagesize;
__maps.count -= 1;
__maps_check();
} else if (IsWindows()) {
STRACE("you can't carve up memory maps on windows ;_;");
rc = enotsup();
} else if (addr <= map_addr) {
// shave off lefthand side of mapping
ASSERT(addr + size < map_addr + PGUP(map_size));
@ -229,6 +254,7 @@ void __maps_free(struct Map *map) {
ASSERT(!TAG(map));
map->size = 0;
map->addr = MAP_FAILED;
map->hand = kNtInvalidHandleValue;
for (tip = atomic_load_explicit(&__maps.freed, memory_order_relaxed);;) {
map->freed = (struct Map *)PTR(tip);
if (atomic_compare_exchange_weak_explicit(
@ -261,11 +287,23 @@ static int __maps_destroy_all(struct Map *list) {
if (!IsWindows()) {
if (sys_munmap(map->addr, map->size))
rc = -1;
} else if (map->hand != -1) {
if (!UnmapViewOfFile(map->addr))
rc = -1;
if (!CloseHandle(map->hand))
rc = -1;
} else {
switch (map->hand) {
case MAPS_SUBREGION:
case MAPS_RESERVATION:
break;
case MAPS_VIRTUAL:
if (!VirtualFree(map->addr, 0, kNtMemRelease))
rc = __winerr();
break;
default:
ASSERT(map->hand > 0);
if (!UnmapViewOfFile(map->addr))
rc = -1;
if (!CloseHandle(map->hand))
rc = -1;
break;
}
}
}
return rc;
@ -345,10 +383,9 @@ void __maps_insert(struct Map *map) {
if (!map && left && right)
if (__maps_mergeable(left, right)) {
left->size = PGUP(left->size);
right->addr -= left->size;
right->size += left->size;
tree_remove(&__maps.maps, &left->tree);
__maps_free(left);
left->size += right->size;
tree_remove(&__maps.maps, &right->tree);
__maps_free(right);
__maps.count -= 1;
}
@ -369,7 +406,7 @@ bool __maps_track(char *addr, size_t size, int prot, int flags) {
map->size = size;
map->prot = prot;
map->flags = flags;
map->hand = -1;
map->hand = MAPS_VIRTUAL;
__maps_lock();
__maps_insert(map);
__maps_unlock();
@ -396,22 +433,23 @@ struct Map *__maps_alloc(void) {
return map;
pthread_pause_np();
}
int size = 65536;
// we're creating sudden surprise memory. the user might be in the
// middle of carefully planning a fixed memory structure. we don't
// want the system allocator to put our surprise memory inside it,
// and we also want to avoid the chances of accidentally unmapping
struct DirectMap sys =
sys_mmap(__maps_randaddr(), size, PROT_READ | PROT_WRITE,
sys_mmap(__maps_randaddr(), MAPS_SIZE, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (sys.addr == MAP_FAILED)
return 0;
map = sys.addr;
if (IsWindows())
CloseHandle(sys.maphandle);
for (int i = 1; i < size / sizeof(struct Map); ++i)
__maps_free(map + i);
return map;
struct MapSlab *slab = sys.addr;
while (!atomic_compare_exchange_weak(&__maps.slabs, &slab->next, slab)) {
}
for (size_t i = 1; i < ARRAYLEN(slab->maps); ++i)
__maps_free(&slab->maps[i]);
return &slab->maps[0];
}
static int __munmap(char *addr, size_t size) {
@ -429,13 +467,10 @@ static int __munmap(char *addr, size_t size) {
__maps_lock();
__maps_check();
// normalize size
// abort if size doesn't include all pages in granule
if (GRUP(size) > PGUP(size))
if (__maps_overlaps(addr + PGUP(size), GRUP(size) - PGUP(size))) {
__maps_unlock();
return einval();
}
// on windows we can only unmap whole allocations
if (IsWindows())
if (!__maps_envelops(addr, size))
return enotsup();
// untrack mappings
int rc;
@ -572,6 +607,11 @@ static void *__mmap_impl(char *addr, size_t size, int prot, int flags, int fd,
}
} else {
// remove existing mappings and their tracking objects
if (!__maps_envelops(addr, size)) {
__maps_unlock();
__maps_free(map);
return (void *)enotsup();
}
struct Map *deleted = 0;
if (__muntrack(addr, size, &deleted, 0, 0)) {
__maps_insert_all(deleted);
@ -592,8 +632,7 @@ static void *__mmap_impl(char *addr, size_t size, int prot, int flags, int fd,
map->size = size;
map->prot = 0;
map->flags = 0;
map->hand = -1;
map->precious = true;
map->hand = MAPS_RESERVATION;
__maps_insert(map);
__maps_unlock();
}
@ -610,7 +649,6 @@ static void *__mmap_impl(char *addr, size_t size, int prot, int flags, int fd,
__maps_lock();
tree_remove(&__maps.maps, &map->tree);
__maps.pages -= (map->size + __pagesize - 1) / __pagesize;
map->precious = false;
__maps_unlock();
if (errno == EADDRNOTAVAIL) {
// we've encountered mystery memory
@ -649,7 +687,6 @@ static void *__mmap_impl(char *addr, size_t size, int prot, int flags, int fd,
map->prot = prot;
map->flags = flags;
map->hand = res.maphandle;
map->precious = false;
if (IsWindows()) {
map->iscow = (flags & MAP_TYPE) != MAP_SHARED && fd != -1;
map->readonlyfile = (flags & MAP_TYPE) == MAP_SHARED && fd != -1 &&
@ -710,21 +747,6 @@ static void *__mmap(char *addr, size_t size, int prot, int flags, int fd,
static void *__mremap_impl(char *old_addr, size_t old_size, size_t new_size,
int flags, char *new_addr) {
// normalize and validate old size
// abort if size doesn't include all pages in granule
if (GRUP(old_size) > PGUP(old_size))
if (__maps_overlaps(old_addr + PGUP(old_size),
GRUP(old_size) - PGUP(old_size)))
return (void *)einval();
// validate new size
// abort if size doesn't include all pages in granule
if (flags & MREMAP_FIXED)
if (GRUP(new_size) > PGUP(new_size))
if (__maps_overlaps(new_addr + PGUP(new_size),
GRUP(new_size) - PGUP(new_size)))
return (void *)einval();
// allocate object for tracking new mapping
struct Map *map;
if (!(map = __maps_alloc()))
@ -787,6 +809,7 @@ static void *__mremap_impl(char *old_addr, size_t old_size, size_t new_size,
map->off = old_off;
map->prot = old_prot;
map->flags = old_flags;
map->hand = kNtInvalidHandleValue;
__maps_insert(map);
return res;
@ -945,8 +968,8 @@ static void *__mremap(char *old_addr, size_t old_size, size_t new_size,
*
* @raise ENOMEM if `RUSAGE_AS` or similar limits are exceeded
* @raise EEXIST if `flags` has `MAP_FIXED_NOREPLACE` and `addr` is used
* @raise ENOTSUP if interval overlapped without enveloping win32 alloc
* @raise EPERM if `addr` is null and `flags` has `MAP_FIXED`
* @raise ENOTSUP if memory map is cleaved on windows with `MAP_FIXED`
* @raise EINVAL if `addr` isn't granularity aligned with `MAP_FIXED`
* @raise EINVAL if `size` is zero
* @raise EINVAL if `flags` or `prot` hold invalid values
@ -1000,10 +1023,9 @@ void *mremap(void *old_addr, size_t old_size, size_t new_size, int flags, ...) {
*
* @return 0 on success, or -1 w/ errno.
* @raise ENOMEM if OOM happened when punching hole in existing mapping
* @raise ENOTSUP if memory map is cleaved on windows with `MAP_FIXED`
* @raise ENOTSUP if interval overlapped without enveloping win32 alloc
* @raise EDEADLK if called from signal handler interrupting mmap()
* @raise EINVAL if `addr` isn't granularity aligned
* @raise EINVAL if `size` didn't include all pages in granule
*/
int munmap(void *addr, size_t size) {
int rc = __munmap(addr, size);

View file

@ -108,7 +108,7 @@ int __mprotect(char *addr, size_t size, int prot) {
leftmap->hand = map->hand;
map->addr += left;
map->size = right;
map->hand = -1;
map->hand = MAPS_SUBREGION;
if (!(map->flags & MAP_ANONYMOUS))
map->off += left;
tree_insert(&__maps.maps, &leftmap->tree, __maps_compare);
@ -139,7 +139,7 @@ int __mprotect(char *addr, size_t size, int prot) {
map->addr += left;
map->size = right;
map->prot = prot;
map->hand = -1;
map->hand = MAPS_SUBREGION;
if (!(map->flags & MAP_ANONYMOUS))
map->off += left;
tree_insert(&__maps.maps, &leftmap->tree, __maps_compare);
@ -175,10 +175,10 @@ int __mprotect(char *addr, size_t size, int prot) {
midlmap->off = (map->flags & MAP_ANONYMOUS) ? 0 : map->off + left;
midlmap->prot = prot;
midlmap->flags = map->flags;
midlmap->hand = -1;
midlmap->hand = MAPS_SUBREGION;
map->addr += left + middle;
map->size = right;
map->hand = -1;
map->hand = MAPS_SUBREGION;
if (!(map->flags & MAP_ANONYMOUS))
map->off += left + middle;
tree_insert(&__maps.maps, &leftmap->tree, __maps_compare);

View file

@ -23,6 +23,7 @@
#include "libc/runtime/runtime.h"
#include "libc/stdio/sysparam.h"
#include "libc/sysv/consts/auxv.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/errfuns.h"
textwindows int sys_msync_nt(char *addr, size_t size, int flags) {
@ -35,14 +36,32 @@ textwindows int sys_msync_nt(char *addr, size_t size, int flags) {
int rc = 0;
__maps_lock();
struct Map *map, *floor;
floor = __maps_floor(addr);
for (map = floor; map && map->addr <= addr + size; map = __maps_next(map)) {
char *beg = MAX(addr, map->addr);
char *end = MIN(addr + size, map->addr + map->size);
if (beg < end)
if (!FlushViewOfFile(beg, end - beg))
rc = -1;
struct Map *map, *next;
if (!(map = __maps_floor(addr)))
if (!(map = __maps_first()))
return true;
for (; map; map = next) {
next = __maps_next(map);
if (!__maps_isalloc(map))
continue;
if (map->flags & MAP_ANONYMOUS)
continue;
if (MAX(addr, map->addr) >= MIN(addr + size, map->addr + map->size))
break; // didn't overlap mapping
// get true size of win32 allocation
size_t allocsize = map->size;
for (struct Map *map2 = next; map2; map2 = __maps_next(map2)) {
if (!__maps_isalloc(map2) && map->addr + allocsize == map2->addr) {
allocsize += map2->size;
} else {
break;
}
}
// perform the flush
if (!FlushViewOfFile(map->addr, allocsize))
rc = -1;
// TODO(jart): FlushFileBuffers too on g_fds handle if MS_SYNC?
}
__maps_unlock();

View file

@ -16,6 +16,7 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/dce.h"
#include "libc/fmt/conv.h"
#include "libc/fmt/itoa.h"
#include "libc/intrin/bsr.h"
@ -51,13 +52,14 @@ void __print_maps(size_t limit) {
char mappingbuf[8];
struct Map *last = 0;
int pagesz = __pagesize;
int gransz = __gransize;
int digs = get_address_digits(pagesz);
for (struct Tree *e = tree_first(__maps.maps); e; e = tree_next(e)) {
struct Map *map = MAP_TREE_CONTAINER(e);
// show gaps between maps
if (last) {
char *beg = last->addr + ((last->size + pagesz - 1) & -pagesz);
char *beg = last->addr + ((last->size + gransz - 1) & -gransz);
char *end = map->addr;
if (end > beg) {
size_t gap = end - beg;
@ -72,8 +74,21 @@ void __print_maps(size_t limit) {
_DescribeMapping(mappingbuf, map->prot, map->flags));
sizefmt(sb, map->size, 1024);
kprintf(" %!sb", sb);
if (map->hand && map->hand != -1)
kprintf(" hand=%ld", map->hand);
if (IsWindows()) {
switch (map->hand) {
case MAPS_RESERVATION:
kprintf(" reservation");
break;
case MAPS_SUBREGION:
break;
case MAPS_VIRTUAL:
kprintf(" virtual");
break;
default:
kprintf(" hand=%ld", map->hand);
break;
}
}
if (map->iscow)
kprintf(" cow");
if (map->readonlyfile)

View file

@ -23,6 +23,7 @@
#include "libc/nt/enum/memflags.h"
#include "libc/nt/memory.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/sysparam.h"
#include "libc/str/str.h"
static const struct DescribeFlags kNtMemState[] = {
@ -46,20 +47,25 @@ const char *DescribeNtMemType(char buf[64], uint32_t x) {
return _DescribeFlags(buf, 64, kNtMemType, ARRAYLEN(kNtMemType), "kNtMem", x);
}
void __print_maps_win32(void) {
void __print_maps_win32(int64_t hProcess, const char *addr, size_t size) {
char *p, b[5][64];
struct NtMemoryBasicInformation mi;
kprintf("%-12s %-12s %10s %16s %16s %32s %32s\n", "Allocation", "BaseAddress",
"RegionSize", "State", "Type", "AllocationProtect", "Protect");
for (p = 0;; p = (char *)mi.BaseAddress + mi.RegionSize) {
bzero(&mi, sizeof(mi));
if (!VirtualQuery(p, &mi, sizeof(mi)))
if (!VirtualQueryEx(hProcess, p, &mi, sizeof(mi)))
break;
sizefmt(b[0], mi.RegionSize, 1024);
kprintf("%.12lx %.12lx %10s %16s %16s %32s %32s\n", mi.AllocationBase,
kprintf("%.12lx %.12lx %10s %16s %16s %32s %32s%s\n", mi.AllocationBase,
mi.BaseAddress, b[0], DescribeNtMemState(b[1], mi.State),
DescribeNtMemType(b[2], mi.Type),
_DescribeNtPageFlags(b[3], mi.AllocationProtect),
_DescribeNtPageFlags(b[4], mi.Protect));
_DescribeNtPageFlags(b[4], mi.Protect),
(mi.State != kNtMemFree &&
MAX(addr, (const char *)mi.BaseAddress) <
MIN(addr + size, (const char *)mi.BaseAddress + mi.RegionSize))
? " [OVERLAPS]"
: "");
}
}

View file

@ -16,6 +16,7 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/atomic.h"
#include "libc/str/str.h"
#include "libc/thread/lock.h"
#include "libc/thread/posixthread.internal.h"
@ -25,11 +26,13 @@
* Unlocks mutex from child process after fork.
*/
int _pthread_mutex_wipe_np(pthread_mutex_t *mutex) {
void *edges = mutex->_edges;
uint64_t word = mutex->_word;
bzero(mutex, sizeof(*mutex));
mutex->_word = MUTEX_UNLOCK(word);
mutex->_edges = edges;
atomic_init(&mutex->_word, MUTEX_UNLOCK(atomic_load_explicit(
&mutex->_word, memory_order_relaxed)));
atomic_init(&mutex->_futex, 0);
mutex->_pid = 0;
mutex->_nsync[0] = 0;
atomic_signal_fence(memory_order_relaxed); // avoid xmm
mutex->_nsync[1] = 0;
return 0;
}

View file

@ -47,28 +47,30 @@
* @asyncsignalsafe
*/
errno_t pthread_setcancelstate(int state, int *oldstate) {
int old;
errno_t err;
struct PosixThread *pt;
if (__tls_enabled && (pt = _pthread_self())) {
if (pt->pt_flags & PT_NOCANCEL) {
old = PTHREAD_CANCEL_DISABLE;
} else if (pt->pt_flags & PT_MASKED) {
old = PTHREAD_CANCEL_MASKED;
} else {
old = PTHREAD_CANCEL_ENABLE;
}
switch (state) {
case PTHREAD_CANCEL_ENABLE:
case PTHREAD_CANCEL_DISABLE:
case PTHREAD_CANCEL_MASKED:
if (oldstate) {
if (pt->pt_flags & PT_NOCANCEL) {
*oldstate = PTHREAD_CANCEL_DISABLE;
} else if (pt->pt_flags & PT_MASKED) {
*oldstate = PTHREAD_CANCEL_MASKED;
} else {
*oldstate = PTHREAD_CANCEL_ENABLE;
}
}
pt->pt_flags &= ~(PT_NOCANCEL | PT_MASKED);
if (state == PTHREAD_CANCEL_MASKED) {
pt->pt_flags |= PT_MASKED;
} else if (state == PTHREAD_CANCEL_DISABLE) {
pt->pt_flags |= PT_NOCANCEL;
}
err = 0;
break;
case PTHREAD_CANCEL_DISABLE:
pt->pt_flags &= ~(PT_NOCANCEL | PT_MASKED);
pt->pt_flags |= PT_NOCANCEL;
err = 0;
break;
case PTHREAD_CANCEL_MASKED:
pt->pt_flags &= ~(PT_NOCANCEL | PT_MASKED);
pt->pt_flags |= PT_MASKED;
err = 0;
break;
default:
@ -76,11 +78,12 @@ errno_t pthread_setcancelstate(int state, int *oldstate) {
break;
}
} else {
if (oldstate) {
*oldstate = 0;
}
old = 0;
err = 0;
}
if (!err)
if (oldstate)
*oldstate = old;
#if IsModeDbg() && 0
STRACE("pthread_setcancelstate(%s, [%s]) → %s",
DescribeCancelState(0, &state), DescribeCancelState(err, oldstate),

View file

@ -28,7 +28,19 @@
static int _rand64_pid;
static unsigned __int128 _rand64_pool;
pthread_mutex_t __rand64_lock_obj = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t __rand64_lock_obj = PTHREAD_MUTEX_INITIALIZER;
void __rand64_lock(void) {
_pthread_mutex_lock(&__rand64_lock_obj);
}
void __rand64_unlock(void) {
_pthread_mutex_unlock(&__rand64_lock_obj);
}
void __rand64_wipe(void) {
_pthread_mutex_wipe_np(&__rand64_lock_obj);
}
/**
* Returns nondeterministic random data.
@ -43,7 +55,7 @@ pthread_mutex_t __rand64_lock_obj = PTHREAD_MUTEX_INITIALIZER;
uint64_t _rand64(void) {
void *p;
uint128_t s;
_pthread_mutex_lock(&__rand64_lock_obj);
__rand64_lock();
if (__pid == _rand64_pid) {
s = _rand64_pool; // normal path
} else {
@ -64,6 +76,6 @@ uint64_t _rand64(void) {
_rand64_pid = __pid;
}
_rand64_pool = (s *= 15750249268501108917ull); // lemur64
_pthread_mutex_unlock(&__rand64_lock_obj);
__rand64_unlock();
return s >> 64;
}

View file

@ -54,7 +54,8 @@ struct Tree *tree_prev(struct Tree *node) {
return parent;
}
static void tree_rotate_left(struct Tree **root, struct Tree *x) {
dontinstrument static void tree_rotate_left(struct Tree **root,
struct Tree *x) {
struct Tree *y = x->right;
x->right = tree_get_left(y);
if (tree_get_left(y))
@ -71,7 +72,8 @@ static void tree_rotate_left(struct Tree **root, struct Tree *x) {
x->parent = y;
}
static void tree_rotate_right(struct Tree **root, struct Tree *y) {
dontinstrument static void tree_rotate_right(struct Tree **root,
struct Tree *y) {
struct Tree *x = tree_get_left(y);
tree_set_left(y, x->right);
if (x->right)
@ -88,7 +90,8 @@ static void tree_rotate_right(struct Tree **root, struct Tree *y) {
x->right = y;
}
static void tree_rebalance_insert(struct Tree **root, struct Tree *node) {
dontinstrument static void tree_rebalance_insert(struct Tree **root,
struct Tree *node) {
struct Tree *uncle;
tree_set_red(node, 1);
while (node != *root && tree_get_red(node->parent)) {
@ -157,8 +160,8 @@ void tree_insert(struct Tree **root, struct Tree *node, tree_cmp_f *cmp) {
}
}
static void tree_transplant(struct Tree **root, struct Tree *u,
struct Tree *v) {
dontinstrument static void tree_transplant(struct Tree **root, struct Tree *u,
struct Tree *v) {
if (!u->parent) {
*root = v;
} else if (u == tree_get_left(u->parent)) {
@ -170,8 +173,9 @@ static void tree_transplant(struct Tree **root, struct Tree *u,
v->parent = u->parent;
}
static void tree_rebalance_remove(struct Tree **root, struct Tree *node,
struct Tree *parent) {
dontinstrument static void tree_rebalance_remove(struct Tree **root,
struct Tree *node,
struct Tree *parent) {
struct Tree *sibling;
while (node != *root && (!node || !tree_get_red(node))) {
if (node == tree_get_left(parent)) {

View file

@ -0,0 +1,50 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2024 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/syscall_support-nt.internal.h"
#include "libc/intrin/describeflags.h"
#include "libc/intrin/strace.h"
#include "libc/macros.h"
#include "libc/mem/alloca.h"
#include "libc/nt/enum/memflags.h"
#include "libc/nt/memory.h"
#include "libc/nt/thunk/msabi.h"
__msabi extern typeof(VirtualAllocEx) *const __imp_VirtualAllocEx;
static const char *DescribeAllocationType(char buf[48], uint32_t x) {
const struct DescribeFlags kAllocationTypeFlags[] = {
{kNtMemCommit, "Commit"}, //
{kNtMemReserve, "Reserve"}, //
{kNtMemReset, "Reset"}, //
};
return _DescribeFlags(buf, 48, kAllocationTypeFlags,
ARRAYLEN(kAllocationTypeFlags), "kNtMem", x);
}
void *VirtualAllocEx(int64_t hProcess, void *lpAddress, uint64_t dwSize,
uint32_t flAllocationType, uint32_t flProtect) {
void *res = __imp_VirtualAllocEx(hProcess, lpAddress, dwSize,
flAllocationType, flProtect);
if (!res)
__winerr();
NTTRACE("VirtualAllocEx(%ld, %p, %'lu, %s, %s) → %p% m", hProcess, lpAddress,
dwSize, DescribeAllocationType(alloca(48), flAllocationType),
DescribeNtPageFlags(flProtect), res);
return res;
}

View file

@ -16,13 +16,8 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/syscall_support-nt.internal.h"
#include "libc/intrin/describeflags.h"
#include "libc/intrin/strace.h"
#include "libc/log/libfatal.internal.h"
#include "libc/nt/memory.h"
__msabi extern typeof(VirtualProtect) *const __imp_VirtualProtect;
#include "libc/nt/runtime.h"
/**
* Protects memory on the New Technology.
@ -31,12 +26,6 @@ __msabi extern typeof(VirtualProtect) *const __imp_VirtualProtect;
textwindows bool32 VirtualProtect(void *lpAddress, uint64_t dwSize,
uint32_t flNewProtect,
uint32_t *lpflOldProtect) {
bool32 bOk;
bOk = __imp_VirtualProtect(lpAddress, dwSize, flNewProtect, lpflOldProtect);
if (!bOk)
__winerr();
NTTRACE("VirtualProtect(%p, %'zu, %s, [%s]) → %hhhd% m", lpAddress, dwSize,
DescribeNtPageFlags(flNewProtect),
DescribeNtPageFlags(*lpflOldProtect), bOk);
return bOk;
return VirtualProtectEx(GetCurrentProcess(), lpAddress, dwSize, flNewProtect,
lpflOldProtect);
}

View file

@ -0,0 +1,43 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/syscall_support-nt.internal.h"
#include "libc/intrin/describeflags.h"
#include "libc/intrin/strace.h"
#include "libc/log/libfatal.internal.h"
#include "libc/nt/memory.h"
__msabi extern typeof(VirtualProtectEx) *const __imp_VirtualProtectEx;
/**
* Protects memory on the New Technology.
* @note this wrapper takes care of ABI, STRACE(), and __winerr()
*/
textwindows bool32 VirtualProtectEx(int64_t hProcess, void *lpAddress,
uint64_t dwSize, uint32_t flNewProtect,
uint32_t *lpflOldProtect) {
bool32 bOk;
bOk = __imp_VirtualProtectEx(hProcess, lpAddress, dwSize, flNewProtect,
lpflOldProtect);
if (!bOk)
__winerr();
NTTRACE("VirtualProtectEx(%ld, %p, %'zu, %s, [%s]) → %hhhd% m", hProcess,
lpAddress, dwSize, DescribeNtPageFlags(flNewProtect),
DescribeNtPageFlags(*lpflOldProtect), bOk);
return bOk;
}

View file

@ -35,8 +35,8 @@ textwindows dontinstrument void __bootstrap_tls(struct CosmoTib *tib,
tib->tib_self = tib;
tib->tib_self2 = tib;
tib->tib_sigmask = -1;
tib->tib_strace = __strace;
tib->tib_ftrace = __ftrace;
tib->tib_strace = -100;
tib->tib_ftrace = -100;
tib->tib_sigstack_size = 57344;
tib->tib_sigstack_addr = bp - 57344;
int tid = __imp_GetCurrentThreadId();

View file

@ -0,0 +1,36 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2024 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/syscall_support-nt.internal.h"
#include "libc/intrin/strace.h"
#include "libc/nt/memory.h"
#include "libc/nt/thunk/msabi.h"
__msabi extern typeof(WriteProcessMemory) *const __imp_WriteProcessMemory;
bool32 WriteProcessMemory(int64_t hProcess, void *lpBaseAddress,
const void *lpBuffer, uint64_t nSize,
uint64_t *opt_out_lpNumberOfBytesWritten) {
bool32 ok = __imp_WriteProcessMemory(hProcess, lpBaseAddress, lpBuffer, nSize,
opt_out_lpNumberOfBytesWritten);
if (!ok)
__winerr();
NTTRACE("WriteProcessMemory(%ld, %p, %p, %'lu, %p) → %hhhd% m", hProcess,
lpBaseAddress, lpBuffer, nSize, opt_out_lpNumberOfBytesWritten, ok);
return ok;
}