From 0b3c81dd4e4a630d541c6f24abd0708984b16b4d Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Wed, 1 Jan 2025 04:59:38 -0800 Subject: [PATCH] Make fork() go 30% faster This change makes fork() go nearly as fast as sys_fork() on UNIX. As for Windows this change shaves about 4-5ms off fork() + wait() latency. This is accomplished by using WriteProcessMemory() from the parent process to setup the address space of a suspended process; it is better than a pipe --- Makefile | 2 +- libc/intrin/describemapping.c | 8 +- libc/intrin/dlopen.c | 6 +- libc/intrin/localtime_lock.c | 6 +- libc/intrin/maps.c | 75 +--- libc/intrin/maps.h | 43 +- libc/intrin/mmap.c | 128 +++--- libc/intrin/mprotect.c | 8 +- libc/intrin/msync-nt.c | 35 +- libc/intrin/printmaps.c | 21 +- libc/intrin/printmapswin32.c | 14 +- libc/intrin/pthread_mutex_wipe_np.c | 13 +- libc/intrin/pthread_setcancelstate.c | 41 +- libc/intrin/rand64.c | 18 +- libc/intrin/tree.c | 18 +- libc/intrin/virtualallocex.c | 50 +++ libc/intrin/virtualprotect.c | 17 +- libc/intrin/virtualprotectex.c | 43 ++ libc/intrin/wintlsinit.c | 4 +- libc/intrin/writeprocessmemory.c | 36 ++ libc/nexgen32e/threaded.c | 6 +- libc/nt/kernel32/VirtualAllocEx.S | 16 - libc/nt/kernel32/VirtualProtectEx.S | 2 + libc/nt/kernel32/VirtualQueryEx.S | 18 + libc/nt/kernel32/WriteProcessMemory.S | 2 + libc/nt/master.sh | 8 +- libc/nt/memory.h | 9 + libc/proc/fork-nt.c | 582 +++++++++----------------- libc/proc/fork.c | 72 ++-- libc/runtime/runtime.h | 2 +- libc/runtime/winmain.greg.c | 30 +- libc/sock/kntwsadata.c | 4 + libc/sysv/consts.sh | 1 - libc/sysv/consts/MAP_NOFORK.S | 2 - libc/sysv/consts/map.h | 1 - libc/sysv/hostos.S | 8 +- libc/thread/itimer.c | 1 + test/libc/proc/BUILD.mk | 5 +- test/libc/proc/fork_test.c | 31 +- test/posix/file_offset_exec_test.c | 4 - third_party/gdtoa/lock.c | 16 +- third_party/gdtoa/lock.h | 6 +- third_party/nsync/common.c | 3 +- third_party/tz/lock.h | 3 +- 44 files changed, 769 insertions(+), 649 deletions(-) create mode 100644 libc/intrin/virtualallocex.c create mode 100644 libc/intrin/virtualprotectex.c create mode 100644 libc/intrin/writeprocessmemory.c create mode 100644 libc/nt/kernel32/VirtualProtectEx.S create mode 100644 libc/nt/kernel32/VirtualQueryEx.S create mode 100644 libc/nt/kernel32/WriteProcessMemory.S delete mode 100644 libc/sysv/consts/MAP_NOFORK.S diff --git a/Makefile b/Makefile index c29c238ab..27b241b77 100644 --- a/Makefile +++ b/Makefile @@ -135,7 +135,7 @@ ARCH = aarch64 HOSTS ?= pi pi5 studio freebsdarm else ARCH = x86_64 -HOSTS ?= freebsd rhel7 xnu openbsd netbsd win10 +HOSTS ?= freebsd rhel7 xnu openbsd netbsd win10 luna endif ZIPOBJ_FLAGS += -a$(ARCH) diff --git a/libc/intrin/describemapping.c b/libc/intrin/describemapping.c index 6510e9848..9371028b8 100644 --- a/libc/intrin/describemapping.c +++ b/libc/intrin/describemapping.c @@ -17,6 +17,7 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/intrin/describeflags.h" +#include "libc/intrin/maps.h" #include "libc/runtime/memtrack.internal.h" #include "libc/sysv/consts/map.h" #include "libc/sysv/consts/prot.h" @@ -24,12 +25,13 @@ static char DescribeMapType(int flags) { switch (flags & MAP_TYPE) { case MAP_FILE: + if (flags & MAP_NOFORK) + return 'i'; // executable image return '-'; case MAP_PRIVATE: if (flags & MAP_NOFORK) - return 'P'; - else - return 'p'; + return 'w'; // windows memory + return 'p'; case MAP_SHARED: return 's'; default: diff --git a/libc/intrin/dlopen.c b/libc/intrin/dlopen.c index 7191d0ffb..3e93f8be3 100644 --- a/libc/intrin/dlopen.c +++ b/libc/intrin/dlopen.c @@ -19,7 +19,7 @@ #include "libc/thread/posixthread.internal.h" #include "libc/thread/thread.h" -pthread_mutex_t __dlopen_lock_obj = PTHREAD_MUTEX_INITIALIZER; +static pthread_mutex_t __dlopen_lock_obj = PTHREAD_MUTEX_INITIALIZER; void __dlopen_lock(void) { _pthread_mutex_lock(&__dlopen_lock_obj); @@ -28,3 +28,7 @@ void __dlopen_lock(void) { void __dlopen_unlock(void) { _pthread_mutex_unlock(&__dlopen_lock_obj); } + +void __dlopen_wipe(void) { + _pthread_mutex_wipe_np(&__dlopen_lock_obj); +} diff --git a/libc/intrin/localtime_lock.c b/libc/intrin/localtime_lock.c index b7064c9a4..bbc0a04d1 100644 --- a/libc/intrin/localtime_lock.c +++ b/libc/intrin/localtime_lock.c @@ -19,7 +19,7 @@ #include "libc/thread/posixthread.internal.h" #include "third_party/tz/lock.h" -pthread_mutex_t __localtime_lock_obj = PTHREAD_MUTEX_INITIALIZER; +static pthread_mutex_t __localtime_lock_obj = PTHREAD_MUTEX_INITIALIZER; void __localtime_lock(void) { _pthread_mutex_lock(&__localtime_lock_obj); @@ -28,3 +28,7 @@ void __localtime_lock(void) { void __localtime_unlock(void) { _pthread_mutex_unlock(&__localtime_lock_obj); } + +void __localtime_wipe(void) { + _pthread_mutex_wipe_np(&__localtime_lock_obj); +} diff --git a/libc/intrin/maps.c b/libc/intrin/maps.c index 8a3f0b054..f1709a665 100644 --- a/libc/intrin/maps.c +++ b/libc/intrin/maps.c @@ -30,6 +30,7 @@ #include "libc/nexgen32e/rdtsc.h" #include "libc/runtime/runtime.h" #include "libc/runtime/stack.h" +#include "libc/sysv/consts/map.h" #include "libc/sysv/consts/prot.h" #include "libc/thread/lock.h" #include "libc/thread/tls.h" @@ -40,10 +41,6 @@ __static_yoink("_init_maps"); #define ABI privileged optimizespeed -// take great care if you enable this -// especially if you're using --ftrace too -#define DEBUG_MAPS_LOCK 0 - struct Maps __maps; void __maps_add(struct Map *map) { @@ -61,14 +58,18 @@ void __maps_stack(char *stackaddr, int pagesz, int guardsize, size_t stacksize, __maps.stack.addr = stackaddr + guardsize; __maps.stack.size = stacksize - guardsize; __maps.stack.prot = stackprot; - __maps.stack.hand = -1; + __maps.stack.hand = MAPS_SUBREGION; + __maps.stack.flags = MAP_PRIVATE | MAP_ANONYMOUS; __maps_adder(&__maps.stack, pagesz); if (guardsize) { __maps.guard.addr = stackaddr; __maps.guard.size = guardsize; - __maps.guard.prot = PROT_NONE; + __maps.guard.prot = PROT_NONE | PROT_GUARD; __maps.guard.hand = stackhand; + __maps.guard.flags = MAP_PRIVATE | MAP_ANONYMOUS; __maps_adder(&__maps.guard, pagesz); + } else { + __maps.stack.hand = stackhand; } } @@ -102,29 +103,14 @@ void __maps_init(void) { } // record .text and .data mappings - static struct Map text, data; - text.addr = (char *)__executable_start; - text.size = _etext - __executable_start; - text.prot = PROT_READ | PROT_EXEC; + __maps_track((char *)__executable_start, _etext - __executable_start, + PROT_READ | PROT_EXEC, MAP_NOFORK); uintptr_t ds = ((uintptr_t)_etext + pagesz - 1) & -pagesz; - if (ds < (uintptr_t)_end) { - data.addr = (char *)ds; - data.size = (uintptr_t)_end - ds; - data.prot = PROT_READ | PROT_WRITE; - __maps_adder(&data, pagesz); - } - __maps_adder(&text, pagesz); + if (ds < (uintptr_t)_end) + __maps_track((char *)ds, (uintptr_t)_end - ds, PROT_READ | PROT_WRITE, + MAP_NOFORK); } -#if DEBUG_MAPS_LOCK -privileged static void __maps_panic(const char *msg) { - // it's only safe to pass a format string. if we use directives such - // as %s, %t etc. then kprintf() will recursively call __maps_lock() - kprintf(msg); - DebugBreak(); -} -#endif - bool __maps_held(void) { return __tls_enabled && !(__get_tls()->tib_flags & TIB_FLAG_VFORKED) && MUTEX_OWNER( @@ -143,7 +129,12 @@ ABI void __maps_lock(void) { if (tib->tib_flags & TIB_FLAG_VFORKED) return; me = atomic_load_explicit(&tib->tib_ptid, memory_order_relaxed); - if (me <= 0) + word = 0; + lock = MUTEX_LOCK(word); + lock = MUTEX_SET_OWNER(lock, me); + if (atomic_compare_exchange_strong_explicit(&__maps.lock.word, &word, lock, + memory_order_acquire, + memory_order_relaxed)) return; word = atomic_load_explicit(&__maps.lock.word, memory_order_relaxed); for (;;) { @@ -154,24 +145,13 @@ ABI void __maps_lock(void) { return; continue; } -#if DEBUG_MAPS_LOCK - if (__deadlock_tracked(&__maps.lock) == 1) - __maps_panic("error: maps lock already held\n"); - if (__deadlock_check(&__maps.lock, 1)) - __maps_panic("error: maps lock is cyclic\n"); -#endif word = 0; lock = MUTEX_LOCK(word); lock = MUTEX_SET_OWNER(lock, me); if (atomic_compare_exchange_weak_explicit(&__maps.lock.word, &word, lock, memory_order_acquire, - memory_order_relaxed)) { -#if DEBUG_MAPS_LOCK - __deadlock_track(&__maps.lock, 0); - __deadlock_record(&__maps.lock, 0); -#endif + memory_order_relaxed)) return; - } for (;;) { word = atomic_load_explicit(&__maps.lock.word, memory_order_relaxed); if (MUTEX_OWNER(word) == me) @@ -183,7 +163,6 @@ ABI void __maps_lock(void) { } ABI void __maps_unlock(void) { - int me; uint64_t word; struct CosmoTib *tib; if (!__tls_enabled) @@ -192,28 +171,16 @@ ABI void __maps_unlock(void) { return; if (tib->tib_flags & TIB_FLAG_VFORKED) return; - me = atomic_load_explicit(&tib->tib_ptid, memory_order_relaxed); - if (me <= 0) - return; word = atomic_load_explicit(&__maps.lock.word, memory_order_relaxed); -#if DEBUG_MAPS_LOCK - if (__deadlock_tracked(&__maps.lock) == 0) - __maps_panic("error: maps lock not owned by caller\n"); -#endif for (;;) { - if (MUTEX_DEPTH(word)) { + if (MUTEX_DEPTH(word)) if (atomic_compare_exchange_weak_explicit( &__maps.lock.word, &word, MUTEX_DEC_DEPTH(word), memory_order_relaxed, memory_order_relaxed)) break; - } if (atomic_compare_exchange_weak_explicit(&__maps.lock.word, &word, 0, memory_order_release, - memory_order_relaxed)) { -#if DEBUG_MAPS_LOCK - __deadlock_untrack(&__maps.lock); -#endif + memory_order_relaxed)) break; - } } } diff --git a/libc/intrin/maps.h b/libc/intrin/maps.h index 303a89476..5244f0d11 100644 --- a/libc/intrin/maps.h +++ b/libc/intrin/maps.h @@ -5,6 +5,28 @@ #include "libc/runtime/runtime.h" COSMOPOLITAN_C_START_ +/* size of dynamic memory that is used internally by your memory manager */ +#define MAPS_SIZE 65536 + +/* when map->hand is MAPS_RESERVATION it means mmap() is transactionally + reserving address space it is in the process of requesting from win32 */ +#define MAPS_RESERVATION -2 + +/* when map->hand is MAPS_SUBREGION it means that an allocation has been + broken into multiple fragments by mprotect(). the first fragment must + be set to MAPS_VIRTUAL or your CreateFileMapping() handle. your frags + must be perfectly contiguous in memory and should have the same flags */ +#define MAPS_SUBREGION -3 + +/* indicates an allocation was created by VirtualAlloc() and so munmap() + must call VirtualFree() when destroying it. use it on the hand field. */ +#define MAPS_VIRTUAL -4 + +/* if this is used on MAP_PRIVATE memory, then it's assumed to be memory + that win32 allocated, e.g. a CreateThread() stack. if this is used on + MAP_FILE memory, then it's assumed to be part of the executable image */ +#define MAP_NOFORK 0x10000000 + #define MAP_TREE_CONTAINER(e) TREE_CONTAINER(struct Map, tree, e) struct Map { @@ -12,9 +34,8 @@ struct Map { size_t size; /* must be nonzero */ int64_t off; /* ignore for anon */ int flags; /* memory map flag */ - char prot; /* memory protects */ + short prot; /* memory protects */ bool iscow; /* windows nt only */ - bool precious; /* windows nt only */ bool readonlyfile; /* windows nt only */ unsigned visited; /* checks and fork */ intptr_t hand; /* windows nt only */ @@ -29,11 +50,17 @@ struct MapLock { _Atomic(uint64_t) word; }; +struct MapSlab { + struct MapSlab *next; + struct Map maps[(MAPS_SIZE - sizeof(struct MapSlab *)) / sizeof(struct Map)]; +}; + struct Maps { uint128_t rand; struct Tree *maps; struct MapLock lock; _Atomic(uintptr_t) freed; + _Atomic(struct MapSlab *) slabs; size_t count; size_t pages; struct Map stack; @@ -76,33 +103,37 @@ forceinline optimizespeed int __maps_search(const void *key, return (addr > map->addr) - (addr < map->addr); } -static inline struct Map *__maps_next(struct Map *map) { +dontinstrument static inline struct Map *__maps_next(struct Map *map) { struct Tree *node; if ((node = tree_next(&map->tree))) return MAP_TREE_CONTAINER(node); return 0; } -static inline struct Map *__maps_prev(struct Map *map) { +dontinstrument static inline struct Map *__maps_prev(struct Map *map) { struct Tree *node; if ((node = tree_prev(&map->tree))) return MAP_TREE_CONTAINER(node); return 0; } -static inline struct Map *__maps_first(void) { +dontinstrument static inline struct Map *__maps_first(void) { struct Tree *node; if ((node = tree_first(__maps.maps))) return MAP_TREE_CONTAINER(node); return 0; } -static inline struct Map *__maps_last(void) { +dontinstrument static inline struct Map *__maps_last(void) { struct Tree *node; if ((node = tree_last(__maps.maps))) return MAP_TREE_CONTAINER(node); return 0; } +static inline bool __maps_isalloc(struct Map *map) { + return map->hand != MAPS_SUBREGION; +} + COSMOPOLITAN_C_END_ #endif /* COSMOPOLITAN_MAPS_H_ */ diff --git a/libc/intrin/mmap.c b/libc/intrin/mmap.c index 6f246e07b..ef7867b84 100644 --- a/libc/intrin/mmap.c +++ b/libc/intrin/mmap.c @@ -19,6 +19,7 @@ #include "libc/calls/calls.h" #include "libc/calls/internal.h" #include "libc/calls/syscall-sysv.internal.h" +#include "libc/calls/syscall_support-nt.internal.h" #include "libc/dce.h" #include "libc/errno.h" #include "libc/intrin/atomic.h" @@ -32,6 +33,7 @@ #include "libc/intrin/weaken.h" #include "libc/limits.h" #include "libc/macros.h" +#include "libc/nt/enum/memflags.h" #include "libc/nt/memory.h" #include "libc/nt/runtime.h" #include "libc/runtime/runtime.h" @@ -44,9 +46,10 @@ #include "libc/sysv/consts/prot.h" #include "libc/sysv/errfuns.h" #include "libc/thread/lock.h" +#include "libc/thread/thread.h" #include "libc/thread/tls.h" -#define MMDEBUG 0 +#define MMDEBUG 1 #define MAX_SIZE 0x0ff800000000ul #define MAP_FIXED_NOREPLACE_linux 0x100000 @@ -99,6 +102,31 @@ static bool __maps_overlaps(const char *addr, size_t size) { return false; } +// returns true if all fragments of all allocations which overlap +// [addr,addr+size) are completely contained by [addr,addr+size). +textwindows static bool __maps_envelops(const char *addr, size_t size) { + struct Map *map, *next; + size = PGUP(size); + if (!(map = __maps_floor(addr))) + if (!(map = __maps_first())) + return true; + do { + if (MAX(addr, map->addr) >= MIN(addr + size, map->addr + map->size)) + break; // didn't overlap mapping + if (!__maps_isalloc(map)) + return false; // didn't include first fragment of alloc + if (addr > map->addr) + return false; // excluded leading pages of first fragment + // set map to last fragment in allocation + for (; (next = __maps_next(map)) && !__maps_isalloc(next); map = next) + // fragments within an allocation must be perfectly contiguous + ASSERT(map->addr + map->size == next->addr); + if (addr + size < map->addr + PGUP(map->size)) + return false; // excluded trailing pages of allocation + } while ((map = next)); + return true; +} + void __maps_check(void) { #if MMDEBUG size_t maps = 0; @@ -130,17 +158,17 @@ static int __muntrack(char *addr, size_t size, struct Map **deleted, size_t ti = 0; struct Map *map; struct Map *next; - struct Map *floor; size = PGUP(size); - floor = __maps_floor(addr); - for (map = floor; map && map->addr <= addr + size; map = next) { + if (!(map = __maps_floor(addr))) + map = __maps_first(); + for (; map && map->addr <= addr + size; map = next) { next = __maps_next(map); char *map_addr = map->addr; size_t map_size = map->size; if (!(MAX(addr, map_addr) < MIN(addr + size, map_addr + PGUP(map_size)))) continue; if (addr <= map_addr && addr + size >= map_addr + PGUP(map_size)) { - if (map->precious) + if (map->hand == MAPS_RESERVATION) continue; // remove mapping completely tree_remove(&__maps.maps, &map->tree); @@ -149,9 +177,6 @@ static int __muntrack(char *addr, size_t size, struct Map **deleted, __maps.pages -= (map_size + __pagesize - 1) / __pagesize; __maps.count -= 1; __maps_check(); - } else if (IsWindows()) { - STRACE("you can't carve up memory maps on windows ;_;"); - rc = enotsup(); } else if (addr <= map_addr) { // shave off lefthand side of mapping ASSERT(addr + size < map_addr + PGUP(map_size)); @@ -229,6 +254,7 @@ void __maps_free(struct Map *map) { ASSERT(!TAG(map)); map->size = 0; map->addr = MAP_FAILED; + map->hand = kNtInvalidHandleValue; for (tip = atomic_load_explicit(&__maps.freed, memory_order_relaxed);;) { map->freed = (struct Map *)PTR(tip); if (atomic_compare_exchange_weak_explicit( @@ -261,11 +287,23 @@ static int __maps_destroy_all(struct Map *list) { if (!IsWindows()) { if (sys_munmap(map->addr, map->size)) rc = -1; - } else if (map->hand != -1) { - if (!UnmapViewOfFile(map->addr)) - rc = -1; - if (!CloseHandle(map->hand)) - rc = -1; + } else { + switch (map->hand) { + case MAPS_SUBREGION: + case MAPS_RESERVATION: + break; + case MAPS_VIRTUAL: + if (!VirtualFree(map->addr, 0, kNtMemRelease)) + rc = __winerr(); + break; + default: + ASSERT(map->hand > 0); + if (!UnmapViewOfFile(map->addr)) + rc = -1; + if (!CloseHandle(map->hand)) + rc = -1; + break; + } } } return rc; @@ -345,10 +383,9 @@ void __maps_insert(struct Map *map) { if (!map && left && right) if (__maps_mergeable(left, right)) { left->size = PGUP(left->size); - right->addr -= left->size; - right->size += left->size; - tree_remove(&__maps.maps, &left->tree); - __maps_free(left); + left->size += right->size; + tree_remove(&__maps.maps, &right->tree); + __maps_free(right); __maps.count -= 1; } @@ -369,7 +406,7 @@ bool __maps_track(char *addr, size_t size, int prot, int flags) { map->size = size; map->prot = prot; map->flags = flags; - map->hand = -1; + map->hand = MAPS_VIRTUAL; __maps_lock(); __maps_insert(map); __maps_unlock(); @@ -396,22 +433,23 @@ struct Map *__maps_alloc(void) { return map; pthread_pause_np(); } - int size = 65536; // we're creating sudden surprise memory. the user might be in the // middle of carefully planning a fixed memory structure. we don't // want the system allocator to put our surprise memory inside it, // and we also want to avoid the chances of accidentally unmapping struct DirectMap sys = - sys_mmap(__maps_randaddr(), size, PROT_READ | PROT_WRITE, + sys_mmap(__maps_randaddr(), MAPS_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (sys.addr == MAP_FAILED) return 0; - map = sys.addr; if (IsWindows()) CloseHandle(sys.maphandle); - for (int i = 1; i < size / sizeof(struct Map); ++i) - __maps_free(map + i); - return map; + struct MapSlab *slab = sys.addr; + while (!atomic_compare_exchange_weak(&__maps.slabs, &slab->next, slab)) { + } + for (size_t i = 1; i < ARRAYLEN(slab->maps); ++i) + __maps_free(&slab->maps[i]); + return &slab->maps[0]; } static int __munmap(char *addr, size_t size) { @@ -429,13 +467,10 @@ static int __munmap(char *addr, size_t size) { __maps_lock(); __maps_check(); - // normalize size - // abort if size doesn't include all pages in granule - if (GRUP(size) > PGUP(size)) - if (__maps_overlaps(addr + PGUP(size), GRUP(size) - PGUP(size))) { - __maps_unlock(); - return einval(); - } + // on windows we can only unmap whole allocations + if (IsWindows()) + if (!__maps_envelops(addr, size)) + return enotsup(); // untrack mappings int rc; @@ -572,6 +607,11 @@ static void *__mmap_impl(char *addr, size_t size, int prot, int flags, int fd, } } else { // remove existing mappings and their tracking objects + if (!__maps_envelops(addr, size)) { + __maps_unlock(); + __maps_free(map); + return (void *)enotsup(); + } struct Map *deleted = 0; if (__muntrack(addr, size, &deleted, 0, 0)) { __maps_insert_all(deleted); @@ -592,8 +632,7 @@ static void *__mmap_impl(char *addr, size_t size, int prot, int flags, int fd, map->size = size; map->prot = 0; map->flags = 0; - map->hand = -1; - map->precious = true; + map->hand = MAPS_RESERVATION; __maps_insert(map); __maps_unlock(); } @@ -610,7 +649,6 @@ static void *__mmap_impl(char *addr, size_t size, int prot, int flags, int fd, __maps_lock(); tree_remove(&__maps.maps, &map->tree); __maps.pages -= (map->size + __pagesize - 1) / __pagesize; - map->precious = false; __maps_unlock(); if (errno == EADDRNOTAVAIL) { // we've encountered mystery memory @@ -649,7 +687,6 @@ static void *__mmap_impl(char *addr, size_t size, int prot, int flags, int fd, map->prot = prot; map->flags = flags; map->hand = res.maphandle; - map->precious = false; if (IsWindows()) { map->iscow = (flags & MAP_TYPE) != MAP_SHARED && fd != -1; map->readonlyfile = (flags & MAP_TYPE) == MAP_SHARED && fd != -1 && @@ -710,21 +747,6 @@ static void *__mmap(char *addr, size_t size, int prot, int flags, int fd, static void *__mremap_impl(char *old_addr, size_t old_size, size_t new_size, int flags, char *new_addr) { - // normalize and validate old size - // abort if size doesn't include all pages in granule - if (GRUP(old_size) > PGUP(old_size)) - if (__maps_overlaps(old_addr + PGUP(old_size), - GRUP(old_size) - PGUP(old_size))) - return (void *)einval(); - - // validate new size - // abort if size doesn't include all pages in granule - if (flags & MREMAP_FIXED) - if (GRUP(new_size) > PGUP(new_size)) - if (__maps_overlaps(new_addr + PGUP(new_size), - GRUP(new_size) - PGUP(new_size))) - return (void *)einval(); - // allocate object for tracking new mapping struct Map *map; if (!(map = __maps_alloc())) @@ -787,6 +809,7 @@ static void *__mremap_impl(char *old_addr, size_t old_size, size_t new_size, map->off = old_off; map->prot = old_prot; map->flags = old_flags; + map->hand = kNtInvalidHandleValue; __maps_insert(map); return res; @@ -945,8 +968,8 @@ static void *__mremap(char *old_addr, size_t old_size, size_t new_size, * * @raise ENOMEM if `RUSAGE_AS` or similar limits are exceeded * @raise EEXIST if `flags` has `MAP_FIXED_NOREPLACE` and `addr` is used + * @raise ENOTSUP if interval overlapped without enveloping win32 alloc * @raise EPERM if `addr` is null and `flags` has `MAP_FIXED` - * @raise ENOTSUP if memory map is cleaved on windows with `MAP_FIXED` * @raise EINVAL if `addr` isn't granularity aligned with `MAP_FIXED` * @raise EINVAL if `size` is zero * @raise EINVAL if `flags` or `prot` hold invalid values @@ -1000,10 +1023,9 @@ void *mremap(void *old_addr, size_t old_size, size_t new_size, int flags, ...) { * * @return 0 on success, or -1 w/ errno. * @raise ENOMEM if OOM happened when punching hole in existing mapping - * @raise ENOTSUP if memory map is cleaved on windows with `MAP_FIXED` + * @raise ENOTSUP if interval overlapped without enveloping win32 alloc * @raise EDEADLK if called from signal handler interrupting mmap() * @raise EINVAL if `addr` isn't granularity aligned - * @raise EINVAL if `size` didn't include all pages in granule */ int munmap(void *addr, size_t size) { int rc = __munmap(addr, size); diff --git a/libc/intrin/mprotect.c b/libc/intrin/mprotect.c index d4faf24f5..847607e61 100644 --- a/libc/intrin/mprotect.c +++ b/libc/intrin/mprotect.c @@ -108,7 +108,7 @@ int __mprotect(char *addr, size_t size, int prot) { leftmap->hand = map->hand; map->addr += left; map->size = right; - map->hand = -1; + map->hand = MAPS_SUBREGION; if (!(map->flags & MAP_ANONYMOUS)) map->off += left; tree_insert(&__maps.maps, &leftmap->tree, __maps_compare); @@ -139,7 +139,7 @@ int __mprotect(char *addr, size_t size, int prot) { map->addr += left; map->size = right; map->prot = prot; - map->hand = -1; + map->hand = MAPS_SUBREGION; if (!(map->flags & MAP_ANONYMOUS)) map->off += left; tree_insert(&__maps.maps, &leftmap->tree, __maps_compare); @@ -175,10 +175,10 @@ int __mprotect(char *addr, size_t size, int prot) { midlmap->off = (map->flags & MAP_ANONYMOUS) ? 0 : map->off + left; midlmap->prot = prot; midlmap->flags = map->flags; - midlmap->hand = -1; + midlmap->hand = MAPS_SUBREGION; map->addr += left + middle; map->size = right; - map->hand = -1; + map->hand = MAPS_SUBREGION; if (!(map->flags & MAP_ANONYMOUS)) map->off += left + middle; tree_insert(&__maps.maps, &leftmap->tree, __maps_compare); diff --git a/libc/intrin/msync-nt.c b/libc/intrin/msync-nt.c index a6ead01a6..ea8c6c15f 100644 --- a/libc/intrin/msync-nt.c +++ b/libc/intrin/msync-nt.c @@ -23,6 +23,7 @@ #include "libc/runtime/runtime.h" #include "libc/stdio/sysparam.h" #include "libc/sysv/consts/auxv.h" +#include "libc/sysv/consts/map.h" #include "libc/sysv/errfuns.h" textwindows int sys_msync_nt(char *addr, size_t size, int flags) { @@ -35,14 +36,32 @@ textwindows int sys_msync_nt(char *addr, size_t size, int flags) { int rc = 0; __maps_lock(); - struct Map *map, *floor; - floor = __maps_floor(addr); - for (map = floor; map && map->addr <= addr + size; map = __maps_next(map)) { - char *beg = MAX(addr, map->addr); - char *end = MIN(addr + size, map->addr + map->size); - if (beg < end) - if (!FlushViewOfFile(beg, end - beg)) - rc = -1; + struct Map *map, *next; + if (!(map = __maps_floor(addr))) + if (!(map = __maps_first())) + return true; + for (; map; map = next) { + next = __maps_next(map); + if (!__maps_isalloc(map)) + continue; + if (map->flags & MAP_ANONYMOUS) + continue; + if (MAX(addr, map->addr) >= MIN(addr + size, map->addr + map->size)) + break; // didn't overlap mapping + + // get true size of win32 allocation + size_t allocsize = map->size; + for (struct Map *map2 = next; map2; map2 = __maps_next(map2)) { + if (!__maps_isalloc(map2) && map->addr + allocsize == map2->addr) { + allocsize += map2->size; + } else { + break; + } + } + + // perform the flush + if (!FlushViewOfFile(map->addr, allocsize)) + rc = -1; // TODO(jart): FlushFileBuffers too on g_fds handle if MS_SYNC? } __maps_unlock(); diff --git a/libc/intrin/printmaps.c b/libc/intrin/printmaps.c index fbd30d179..7503876ed 100644 --- a/libc/intrin/printmaps.c +++ b/libc/intrin/printmaps.c @@ -16,6 +16,7 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/dce.h" #include "libc/fmt/conv.h" #include "libc/fmt/itoa.h" #include "libc/intrin/bsr.h" @@ -51,13 +52,14 @@ void __print_maps(size_t limit) { char mappingbuf[8]; struct Map *last = 0; int pagesz = __pagesize; + int gransz = __gransize; int digs = get_address_digits(pagesz); for (struct Tree *e = tree_first(__maps.maps); e; e = tree_next(e)) { struct Map *map = MAP_TREE_CONTAINER(e); // show gaps between maps if (last) { - char *beg = last->addr + ((last->size + pagesz - 1) & -pagesz); + char *beg = last->addr + ((last->size + gransz - 1) & -gransz); char *end = map->addr; if (end > beg) { size_t gap = end - beg; @@ -72,8 +74,21 @@ void __print_maps(size_t limit) { _DescribeMapping(mappingbuf, map->prot, map->flags)); sizefmt(sb, map->size, 1024); kprintf(" %!sb", sb); - if (map->hand && map->hand != -1) - kprintf(" hand=%ld", map->hand); + if (IsWindows()) { + switch (map->hand) { + case MAPS_RESERVATION: + kprintf(" reservation"); + break; + case MAPS_SUBREGION: + break; + case MAPS_VIRTUAL: + kprintf(" virtual"); + break; + default: + kprintf(" hand=%ld", map->hand); + break; + } + } if (map->iscow) kprintf(" cow"); if (map->readonlyfile) diff --git a/libc/intrin/printmapswin32.c b/libc/intrin/printmapswin32.c index 65fbcd1e3..8f03b7db0 100644 --- a/libc/intrin/printmapswin32.c +++ b/libc/intrin/printmapswin32.c @@ -23,6 +23,7 @@ #include "libc/nt/enum/memflags.h" #include "libc/nt/memory.h" #include "libc/runtime/runtime.h" +#include "libc/stdio/sysparam.h" #include "libc/str/str.h" static const struct DescribeFlags kNtMemState[] = { @@ -46,20 +47,25 @@ const char *DescribeNtMemType(char buf[64], uint32_t x) { return _DescribeFlags(buf, 64, kNtMemType, ARRAYLEN(kNtMemType), "kNtMem", x); } -void __print_maps_win32(void) { +void __print_maps_win32(int64_t hProcess, const char *addr, size_t size) { char *p, b[5][64]; struct NtMemoryBasicInformation mi; kprintf("%-12s %-12s %10s %16s %16s %32s %32s\n", "Allocation", "BaseAddress", "RegionSize", "State", "Type", "AllocationProtect", "Protect"); for (p = 0;; p = (char *)mi.BaseAddress + mi.RegionSize) { bzero(&mi, sizeof(mi)); - if (!VirtualQuery(p, &mi, sizeof(mi))) + if (!VirtualQueryEx(hProcess, p, &mi, sizeof(mi))) break; sizefmt(b[0], mi.RegionSize, 1024); - kprintf("%.12lx %.12lx %10s %16s %16s %32s %32s\n", mi.AllocationBase, + kprintf("%.12lx %.12lx %10s %16s %16s %32s %32s%s\n", mi.AllocationBase, mi.BaseAddress, b[0], DescribeNtMemState(b[1], mi.State), DescribeNtMemType(b[2], mi.Type), _DescribeNtPageFlags(b[3], mi.AllocationProtect), - _DescribeNtPageFlags(b[4], mi.Protect)); + _DescribeNtPageFlags(b[4], mi.Protect), + (mi.State != kNtMemFree && + MAX(addr, (const char *)mi.BaseAddress) < + MIN(addr + size, (const char *)mi.BaseAddress + mi.RegionSize)) + ? " [OVERLAPS]" + : ""); } } diff --git a/libc/intrin/pthread_mutex_wipe_np.c b/libc/intrin/pthread_mutex_wipe_np.c index e49c3512f..9c19f6d0a 100644 --- a/libc/intrin/pthread_mutex_wipe_np.c +++ b/libc/intrin/pthread_mutex_wipe_np.c @@ -16,6 +16,7 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/intrin/atomic.h" #include "libc/str/str.h" #include "libc/thread/lock.h" #include "libc/thread/posixthread.internal.h" @@ -25,11 +26,13 @@ * Unlocks mutex from child process after fork. */ int _pthread_mutex_wipe_np(pthread_mutex_t *mutex) { - void *edges = mutex->_edges; - uint64_t word = mutex->_word; - bzero(mutex, sizeof(*mutex)); - mutex->_word = MUTEX_UNLOCK(word); - mutex->_edges = edges; + atomic_init(&mutex->_word, MUTEX_UNLOCK(atomic_load_explicit( + &mutex->_word, memory_order_relaxed))); + atomic_init(&mutex->_futex, 0); + mutex->_pid = 0; + mutex->_nsync[0] = 0; + atomic_signal_fence(memory_order_relaxed); // avoid xmm + mutex->_nsync[1] = 0; return 0; } diff --git a/libc/intrin/pthread_setcancelstate.c b/libc/intrin/pthread_setcancelstate.c index e6d478c47..6e2a35f35 100644 --- a/libc/intrin/pthread_setcancelstate.c +++ b/libc/intrin/pthread_setcancelstate.c @@ -47,28 +47,30 @@ * @asyncsignalsafe */ errno_t pthread_setcancelstate(int state, int *oldstate) { + int old; errno_t err; struct PosixThread *pt; if (__tls_enabled && (pt = _pthread_self())) { + if (pt->pt_flags & PT_NOCANCEL) { + old = PTHREAD_CANCEL_DISABLE; + } else if (pt->pt_flags & PT_MASKED) { + old = PTHREAD_CANCEL_MASKED; + } else { + old = PTHREAD_CANCEL_ENABLE; + } switch (state) { case PTHREAD_CANCEL_ENABLE: - case PTHREAD_CANCEL_DISABLE: - case PTHREAD_CANCEL_MASKED: - if (oldstate) { - if (pt->pt_flags & PT_NOCANCEL) { - *oldstate = PTHREAD_CANCEL_DISABLE; - } else if (pt->pt_flags & PT_MASKED) { - *oldstate = PTHREAD_CANCEL_MASKED; - } else { - *oldstate = PTHREAD_CANCEL_ENABLE; - } - } pt->pt_flags &= ~(PT_NOCANCEL | PT_MASKED); - if (state == PTHREAD_CANCEL_MASKED) { - pt->pt_flags |= PT_MASKED; - } else if (state == PTHREAD_CANCEL_DISABLE) { - pt->pt_flags |= PT_NOCANCEL; - } + err = 0; + break; + case PTHREAD_CANCEL_DISABLE: + pt->pt_flags &= ~(PT_NOCANCEL | PT_MASKED); + pt->pt_flags |= PT_NOCANCEL; + err = 0; + break; + case PTHREAD_CANCEL_MASKED: + pt->pt_flags &= ~(PT_NOCANCEL | PT_MASKED); + pt->pt_flags |= PT_MASKED; err = 0; break; default: @@ -76,11 +78,12 @@ errno_t pthread_setcancelstate(int state, int *oldstate) { break; } } else { - if (oldstate) { - *oldstate = 0; - } + old = 0; err = 0; } + if (!err) + if (oldstate) + *oldstate = old; #if IsModeDbg() && 0 STRACE("pthread_setcancelstate(%s, [%s]) → %s", DescribeCancelState(0, &state), DescribeCancelState(err, oldstate), diff --git a/libc/intrin/rand64.c b/libc/intrin/rand64.c index e0da32f7d..53252327e 100644 --- a/libc/intrin/rand64.c +++ b/libc/intrin/rand64.c @@ -28,7 +28,19 @@ static int _rand64_pid; static unsigned __int128 _rand64_pool; -pthread_mutex_t __rand64_lock_obj = PTHREAD_MUTEX_INITIALIZER; +static pthread_mutex_t __rand64_lock_obj = PTHREAD_MUTEX_INITIALIZER; + +void __rand64_lock(void) { + _pthread_mutex_lock(&__rand64_lock_obj); +} + +void __rand64_unlock(void) { + _pthread_mutex_unlock(&__rand64_lock_obj); +} + +void __rand64_wipe(void) { + _pthread_mutex_wipe_np(&__rand64_lock_obj); +} /** * Returns nondeterministic random data. @@ -43,7 +55,7 @@ pthread_mutex_t __rand64_lock_obj = PTHREAD_MUTEX_INITIALIZER; uint64_t _rand64(void) { void *p; uint128_t s; - _pthread_mutex_lock(&__rand64_lock_obj); + __rand64_lock(); if (__pid == _rand64_pid) { s = _rand64_pool; // normal path } else { @@ -64,6 +76,6 @@ uint64_t _rand64(void) { _rand64_pid = __pid; } _rand64_pool = (s *= 15750249268501108917ull); // lemur64 - _pthread_mutex_unlock(&__rand64_lock_obj); + __rand64_unlock(); return s >> 64; } diff --git a/libc/intrin/tree.c b/libc/intrin/tree.c index 23e25f7f5..2c3e3fecc 100644 --- a/libc/intrin/tree.c +++ b/libc/intrin/tree.c @@ -54,7 +54,8 @@ struct Tree *tree_prev(struct Tree *node) { return parent; } -static void tree_rotate_left(struct Tree **root, struct Tree *x) { +dontinstrument static void tree_rotate_left(struct Tree **root, + struct Tree *x) { struct Tree *y = x->right; x->right = tree_get_left(y); if (tree_get_left(y)) @@ -71,7 +72,8 @@ static void tree_rotate_left(struct Tree **root, struct Tree *x) { x->parent = y; } -static void tree_rotate_right(struct Tree **root, struct Tree *y) { +dontinstrument static void tree_rotate_right(struct Tree **root, + struct Tree *y) { struct Tree *x = tree_get_left(y); tree_set_left(y, x->right); if (x->right) @@ -88,7 +90,8 @@ static void tree_rotate_right(struct Tree **root, struct Tree *y) { x->right = y; } -static void tree_rebalance_insert(struct Tree **root, struct Tree *node) { +dontinstrument static void tree_rebalance_insert(struct Tree **root, + struct Tree *node) { struct Tree *uncle; tree_set_red(node, 1); while (node != *root && tree_get_red(node->parent)) { @@ -157,8 +160,8 @@ void tree_insert(struct Tree **root, struct Tree *node, tree_cmp_f *cmp) { } } -static void tree_transplant(struct Tree **root, struct Tree *u, - struct Tree *v) { +dontinstrument static void tree_transplant(struct Tree **root, struct Tree *u, + struct Tree *v) { if (!u->parent) { *root = v; } else if (u == tree_get_left(u->parent)) { @@ -170,8 +173,9 @@ static void tree_transplant(struct Tree **root, struct Tree *u, v->parent = u->parent; } -static void tree_rebalance_remove(struct Tree **root, struct Tree *node, - struct Tree *parent) { +dontinstrument static void tree_rebalance_remove(struct Tree **root, + struct Tree *node, + struct Tree *parent) { struct Tree *sibling; while (node != *root && (!node || !tree_get_red(node))) { if (node == tree_get_left(parent)) { diff --git a/libc/intrin/virtualallocex.c b/libc/intrin/virtualallocex.c new file mode 100644 index 000000000..b55caf9aa --- /dev/null +++ b/libc/intrin/virtualallocex.c @@ -0,0 +1,50 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2024 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/calls/syscall_support-nt.internal.h" +#include "libc/intrin/describeflags.h" +#include "libc/intrin/strace.h" +#include "libc/macros.h" +#include "libc/mem/alloca.h" +#include "libc/nt/enum/memflags.h" +#include "libc/nt/memory.h" +#include "libc/nt/thunk/msabi.h" + +__msabi extern typeof(VirtualAllocEx) *const __imp_VirtualAllocEx; + +static const char *DescribeAllocationType(char buf[48], uint32_t x) { + const struct DescribeFlags kAllocationTypeFlags[] = { + {kNtMemCommit, "Commit"}, // + {kNtMemReserve, "Reserve"}, // + {kNtMemReset, "Reset"}, // + }; + return _DescribeFlags(buf, 48, kAllocationTypeFlags, + ARRAYLEN(kAllocationTypeFlags), "kNtMem", x); +} + +void *VirtualAllocEx(int64_t hProcess, void *lpAddress, uint64_t dwSize, + uint32_t flAllocationType, uint32_t flProtect) { + void *res = __imp_VirtualAllocEx(hProcess, lpAddress, dwSize, + flAllocationType, flProtect); + if (!res) + __winerr(); + NTTRACE("VirtualAllocEx(%ld, %p, %'lu, %s, %s) → %p% m", hProcess, lpAddress, + dwSize, DescribeAllocationType(alloca(48), flAllocationType), + DescribeNtPageFlags(flProtect), res); + return res; +} diff --git a/libc/intrin/virtualprotect.c b/libc/intrin/virtualprotect.c index 4b1aaa1a0..5f653afff 100644 --- a/libc/intrin/virtualprotect.c +++ b/libc/intrin/virtualprotect.c @@ -16,13 +16,8 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/calls/syscall_support-nt.internal.h" -#include "libc/intrin/describeflags.h" -#include "libc/intrin/strace.h" -#include "libc/log/libfatal.internal.h" #include "libc/nt/memory.h" - -__msabi extern typeof(VirtualProtect) *const __imp_VirtualProtect; +#include "libc/nt/runtime.h" /** * Protects memory on the New Technology. @@ -31,12 +26,6 @@ __msabi extern typeof(VirtualProtect) *const __imp_VirtualProtect; textwindows bool32 VirtualProtect(void *lpAddress, uint64_t dwSize, uint32_t flNewProtect, uint32_t *lpflOldProtect) { - bool32 bOk; - bOk = __imp_VirtualProtect(lpAddress, dwSize, flNewProtect, lpflOldProtect); - if (!bOk) - __winerr(); - NTTRACE("VirtualProtect(%p, %'zu, %s, [%s]) → %hhhd% m", lpAddress, dwSize, - DescribeNtPageFlags(flNewProtect), - DescribeNtPageFlags(*lpflOldProtect), bOk); - return bOk; + return VirtualProtectEx(GetCurrentProcess(), lpAddress, dwSize, flNewProtect, + lpflOldProtect); } diff --git a/libc/intrin/virtualprotectex.c b/libc/intrin/virtualprotectex.c new file mode 100644 index 000000000..44615c730 --- /dev/null +++ b/libc/intrin/virtualprotectex.c @@ -0,0 +1,43 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2022 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/calls/syscall_support-nt.internal.h" +#include "libc/intrin/describeflags.h" +#include "libc/intrin/strace.h" +#include "libc/log/libfatal.internal.h" +#include "libc/nt/memory.h" + +__msabi extern typeof(VirtualProtectEx) *const __imp_VirtualProtectEx; + +/** + * Protects memory on the New Technology. + * @note this wrapper takes care of ABI, STRACE(), and __winerr() + */ +textwindows bool32 VirtualProtectEx(int64_t hProcess, void *lpAddress, + uint64_t dwSize, uint32_t flNewProtect, + uint32_t *lpflOldProtect) { + bool32 bOk; + bOk = __imp_VirtualProtectEx(hProcess, lpAddress, dwSize, flNewProtect, + lpflOldProtect); + if (!bOk) + __winerr(); + NTTRACE("VirtualProtectEx(%ld, %p, %'zu, %s, [%s]) → %hhhd% m", hProcess, + lpAddress, dwSize, DescribeNtPageFlags(flNewProtect), + DescribeNtPageFlags(*lpflOldProtect), bOk); + return bOk; +} diff --git a/libc/intrin/wintlsinit.c b/libc/intrin/wintlsinit.c index d14798d06..eb19331ff 100644 --- a/libc/intrin/wintlsinit.c +++ b/libc/intrin/wintlsinit.c @@ -35,8 +35,8 @@ textwindows dontinstrument void __bootstrap_tls(struct CosmoTib *tib, tib->tib_self = tib; tib->tib_self2 = tib; tib->tib_sigmask = -1; - tib->tib_strace = __strace; - tib->tib_ftrace = __ftrace; + tib->tib_strace = -100; + tib->tib_ftrace = -100; tib->tib_sigstack_size = 57344; tib->tib_sigstack_addr = bp - 57344; int tid = __imp_GetCurrentThreadId(); diff --git a/libc/intrin/writeprocessmemory.c b/libc/intrin/writeprocessmemory.c new file mode 100644 index 000000000..ec99b583b --- /dev/null +++ b/libc/intrin/writeprocessmemory.c @@ -0,0 +1,36 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2024 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/calls/syscall_support-nt.internal.h" +#include "libc/intrin/strace.h" +#include "libc/nt/memory.h" +#include "libc/nt/thunk/msabi.h" + +__msabi extern typeof(WriteProcessMemory) *const __imp_WriteProcessMemory; + +bool32 WriteProcessMemory(int64_t hProcess, void *lpBaseAddress, + const void *lpBuffer, uint64_t nSize, + uint64_t *opt_out_lpNumberOfBytesWritten) { + bool32 ok = __imp_WriteProcessMemory(hProcess, lpBaseAddress, lpBuffer, nSize, + opt_out_lpNumberOfBytesWritten); + if (!ok) + __winerr(); + NTTRACE("WriteProcessMemory(%ld, %p, %p, %'lu, %p) → %hhhd% m", hProcess, + lpBaseAddress, lpBuffer, nSize, opt_out_lpNumberOfBytesWritten, ok); + return ok; +} diff --git a/libc/nexgen32e/threaded.c b/libc/nexgen32e/threaded.c index 1fad2aa80..b2c53384b 100644 --- a/libc/nexgen32e/threaded.c +++ b/libc/nexgen32e/threaded.c @@ -18,8 +18,6 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/thread/tls.h" -#ifdef __x86_64__ -char __tls_enabled; -#endif - +#ifndef __x86_64__ unsigned __tls_index; +#endif diff --git a/libc/nt/kernel32/VirtualAllocEx.S b/libc/nt/kernel32/VirtualAllocEx.S index bdf00950b..239913a84 100644 --- a/libc/nt/kernel32/VirtualAllocEx.S +++ b/libc/nt/kernel32/VirtualAllocEx.S @@ -1,18 +1,2 @@ #include "libc/nt/codegen.h" .imp kernel32,__imp_VirtualAllocEx,VirtualAllocEx - - .text.windows - .ftrace1 -VirtualAllocEx: - .ftrace2 -#ifdef __x86_64__ - push %rbp - mov %rsp,%rbp - mov __imp_VirtualAllocEx(%rip),%rax - jmp __sysv2nt6 -#elif defined(__aarch64__) - mov x0,#0 - ret -#endif - .endfn VirtualAllocEx,globl - .previous diff --git a/libc/nt/kernel32/VirtualProtectEx.S b/libc/nt/kernel32/VirtualProtectEx.S new file mode 100644 index 000000000..8d22b1789 --- /dev/null +++ b/libc/nt/kernel32/VirtualProtectEx.S @@ -0,0 +1,2 @@ +#include "libc/nt/codegen.h" +.imp kernel32,__imp_VirtualProtectEx,VirtualProtectEx diff --git a/libc/nt/kernel32/VirtualQueryEx.S b/libc/nt/kernel32/VirtualQueryEx.S new file mode 100644 index 000000000..d810cf97a --- /dev/null +++ b/libc/nt/kernel32/VirtualQueryEx.S @@ -0,0 +1,18 @@ +#include "libc/nt/codegen.h" +.imp kernel32,__imp_VirtualQueryEx,VirtualQueryEx + + .text.windows + .ftrace1 +VirtualQueryEx: + .ftrace2 +#ifdef __x86_64__ + push %rbp + mov %rsp,%rbp + mov __imp_VirtualQueryEx(%rip),%rax + jmp __sysv2nt +#elif defined(__aarch64__) + mov x0,#0 + ret +#endif + .endfn VirtualQueryEx,globl + .previous diff --git a/libc/nt/kernel32/WriteProcessMemory.S b/libc/nt/kernel32/WriteProcessMemory.S new file mode 100644 index 000000000..222dd5e72 --- /dev/null +++ b/libc/nt/kernel32/WriteProcessMemory.S @@ -0,0 +1,2 @@ +#include "libc/nt/codegen.h" +.imp kernel32,__imp_WriteProcessMemory,WriteProcessMemory diff --git a/libc/nt/master.sh b/libc/nt/master.sh index eb05cfd07..d13447f2d 100755 --- a/libc/nt/master.sh +++ b/libc/nt/master.sh @@ -9,6 +9,7 @@ # KERNEL32.DLL # # Name Actual DLL Arity + imp '' CreateDirectoryW kernel32 2 imp '' CreateFileA kernel32 7 imp '' CreateFileMappingNumaW kernel32 7 @@ -40,9 +41,12 @@ imp '' SetCurrentDirectoryW kernel32 1 imp '' TerminateProcess kernel32 2 imp '' UnlockFileEx kernel32 5 imp '' UnmapViewOfFile kernel32 1 +imp '' VirtualAllocEx kernel32 5 imp '' VirtualProtect kernel32 4 +imp '' VirtualProtectEx kernel32 5 imp '' WaitForMultipleObjects kernel32 4 imp '' WaitForSingleObject kernel32 2 +imp '' WriteProcessMemory kernel32 5 imp 'AcquireSRWLockExclusive' AcquireSRWLockExclusive kernel32 1 imp 'AcquireSRWLockShared' AcquireSRWLockShared kernel32 1 imp 'AddDllDirectory' AddDllDirectory kernel32 1 @@ -185,8 +189,8 @@ imp 'GetWindowsDirectory' GetWindowsDirectoryW kernel32 2 imp 'GetWindowsDirectoryA' GetWindowsDirectoryA kernel32 2 imp 'GlobalAlloc' GlobalAlloc kernel32 2 imp 'GlobalFree' GlobalFree kernel32 1 -imp 'GlobalMemoryStatusEx' GlobalMemoryStatusEx kernel32 1 imp 'GlobalLock' GlobalLock kernel32 1 +imp 'GlobalMemoryStatusEx' GlobalMemoryStatusEx kernel32 1 imp 'GlobalUnlock' GlobalUnlock kernel32 1 imp 'HeapAlloc' HeapAlloc kernel32 3 imp 'HeapCompact' HeapCompact kernel32 2 @@ -300,10 +304,10 @@ imp 'UnmapViewOfFile2' UnmapViewOfFile2 kernel32 2 imp 'UnmapViewOfFileEx' UnmapViewOfFileEx kernel32 3 imp 'UpdateProcThreadAttribute' UpdateProcThreadAttribute kernel32 7 imp 'VirtualAlloc' VirtualAlloc kernel32 4 -imp 'VirtualAllocEx' VirtualAllocEx kernel32 5 imp 'VirtualFree' VirtualFree kernel32 3 imp 'VirtualLock' VirtualLock kernel32 2 imp 'VirtualQuery' VirtualQuery kernel32 3 +imp 'VirtualQueryEx' VirtualQueryEx kernel32 4 imp 'VirtualUnlock' VirtualUnlock kernel32 2 imp 'WaitForMultipleObjectsEx' WaitForMultipleObjectsEx kernel32 5 imp 'WaitForSingleObjectEx' WaitForSingleObjectEx kernel32 3 diff --git a/libc/nt/memory.h b/libc/nt/memory.h index 376f0fb16..9f6792657 100644 --- a/libc/nt/memory.h +++ b/libc/nt/memory.h @@ -71,8 +71,17 @@ bool32 VirtualUnlock(const void *lpAddress, size_t dwSize); uint64_t VirtualQuery(const void *lpAddress, struct NtMemoryBasicInformation *lpBuffer, uint64_t dwLength); +uint64_t VirtualQueryEx(int64_t hProcess, const void *lpAddress, + struct NtMemoryBasicInformation *lpBuffer, + uint64_t dwLength); + void *VirtualAllocEx(int64_t hProcess, void *lpAddress, uint64_t dwSize, uint32_t flAllocationType, uint32_t flProtect); +bool32 VirtualProtectEx(int64_t hProcess, void *lpAddress, uint64_t dwSize, + uint32_t flNewProtect, uint32_t *out_lpflOldProtect); +bool32 WriteProcessMemory(int64_t hProcess, void *lpBaseAddress, + const void *lpBuffer, uint64_t nSize, + uint64_t *opt_out_lpNumberOfBytesWritten); int64_t GetProcessHeap(void); void *HeapAlloc(int64_t hHeap, uint32_t dwFlags, size_t dwBytes) __wur; diff --git a/libc/proc/fork-nt.c b/libc/proc/fork-nt.c index d527e641a..3bb1c4176 100644 --- a/libc/proc/fork-nt.c +++ b/libc/proc/fork-nt.c @@ -16,61 +16,53 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "ape/sections.internal.h" -#include "libc/assert.h" -#include "libc/atomic.h" #include "libc/calls/internal.h" #include "libc/calls/sig.internal.h" -#include "libc/calls/state.internal.h" #include "libc/calls/syscall_support-nt.internal.h" #include "libc/errno.h" -#include "libc/fmt/itoa.h" -#include "libc/intrin/atomic.h" #include "libc/intrin/directmap.h" +#include "libc/intrin/dll.h" #include "libc/intrin/kprintf.h" #include "libc/intrin/maps.h" #include "libc/intrin/strace.h" -#include "libc/intrin/tree.h" #include "libc/intrin/weaken.h" +#include "libc/limits.h" #include "libc/macros.h" -#include "libc/nt/createfile.h" -#include "libc/nt/enum/accessmask.h" #include "libc/nt/enum/creationdisposition.h" #include "libc/nt/enum/filemapflags.h" +#include "libc/nt/enum/memflags.h" #include "libc/nt/enum/pageflags.h" +#include "libc/nt/enum/processcreationflags.h" #include "libc/nt/enum/startf.h" #include "libc/nt/errors.h" -#include "libc/nt/ipc.h" #include "libc/nt/memory.h" #include "libc/nt/process.h" #include "libc/nt/runtime.h" -#include "libc/nt/signals.h" -#include "libc/nt/struct/ntexceptionpointers.h" +#include "libc/nt/struct/processinformation.h" +#include "libc/nt/struct/startupinfo.h" #include "libc/nt/thread.h" #include "libc/nt/thunk/msabi.h" -#include "libc/proc/ntspawn.h" +#include "libc/nt/winsock.h" #include "libc/proc/proc.internal.h" #include "libc/runtime/internal.h" -#include "libc/runtime/memtrack.internal.h" -#include "libc/runtime/runtime.h" -#include "libc/runtime/stack.h" #include "libc/runtime/symbols.internal.h" -#include "libc/str/str.h" -#include "libc/sysv/consts/at.h" -#include "libc/sysv/consts/limits.h" #include "libc/sysv/consts/map.h" #include "libc/sysv/consts/prot.h" #include "libc/sysv/consts/sig.h" #include "libc/sysv/errfuns.h" -#include "libc/thread/itimer.h" -#include "libc/thread/posixthread.internal.h" #include "libc/thread/tls.h" #ifdef __x86_64__ extern long __klog_handle; -__msabi extern typeof(GetCurrentProcessId) *const __imp_GetCurrentProcessId; +extern bool __winmain_isfork; +extern intptr_t __winmain_jmpbuf[5]; +extern struct CosmoTib *__winmain_tib; -static textwindows wontreturn void AbortFork(const char *func, void *addr) { +__msabi extern typeof(TlsAlloc) *const __imp_TlsAlloc; +__msabi extern typeof(MapViewOfFileEx) *const __imp_MapViewOfFileEx; +__msabi extern typeof(VirtualProtectEx) *const __imp_VirtualProtectEx; + +textwindows wontreturn static void AbortFork(const char *func, void *addr) { #if SYSDEBUG kprintf("fork() %!s(%lx) failed with win32 error %u\n", func, addr, GetLastError()); @@ -78,93 +70,10 @@ static textwindows wontreturn void AbortFork(const char *func, void *addr) { TerminateThisProcess(SIGSTKFLT); } -static textwindows char16_t *ParseInt(char16_t *p, int64_t *x) { - *x = 0; - while (*p == ' ') - p++; - while ('0' <= *p && *p <= '9') { - *x *= 10; - *x += *p++ - '0'; - } - return p; -} - -static inline textwindows ssize_t ForkIo(int64_t h, char *p, size_t n, - bool32 (*f)(int64_t, void *, uint32_t, - uint32_t *, - struct NtOverlapped *)) { - size_t i; - uint32_t x; - for (i = 0; i < n; i += x) { - if (!f(h, p + i, n - i, &x, 0)) - return __winerr(); - if (!x) - break; - } - return i; -} - -static dontinline textwindows ssize_t ForkIo2( - int64_t h, void *buf, size_t n, - bool32 (*fn)(int64_t, void *, uint32_t, uint32_t *, struct NtOverlapped *), - const char *sf, bool ischild) { - ssize_t rc = ForkIo(h, buf, n, fn); - if (ischild) { - // prevent crashes - __tls_enabled = false; - __pid = __imp_GetCurrentProcessId(); - __klog_handle = 0; - __maps.maps = 0; - } - NTTRACE("%s(%ld, %p, %'zu) → %'zd% m", sf, h, buf, n, rc); - return rc; -} - -static dontinline textwindows bool WriteAll(int64_t h, void *buf, size_t n) { - bool ok; - ok = ForkIo2(h, buf, n, (void *)WriteFile, "WriteFile", false) != -1; - if (!ok) - STRACE("fork() failed in parent due to WriteAll(%ld, %p, %'zu) → %u", h, - buf, n, GetLastError()); - return ok; -} - -static textwindows dontinline void ReadOrDie(int64_t h, void *buf, size_t n) { - ssize_t got; - if ((got = ForkIo2(h, buf, n, ReadFile, "ReadFile", true)) == -1) - AbortFork("ReadFile1", buf); - if (got != n) - AbortFork("ReadFile2", buf); -} - -static textwindows int64_t MapOrDie(uint32_t prot, uint64_t size) { - int64_t h; - for (;;) { - if ((h = CreateFileMapping(-1, 0, prot, size >> 32, size, 0))) - return h; - if (GetLastError() == kNtErrorAccessDenied) { - switch (prot) { - case kNtPageExecuteWritecopy: - prot = kNtPageWritecopy; - continue; - case kNtPageExecuteReadwrite: - prot = kNtPageReadwrite; - continue; - case kNtPageExecuteRead: - prot = kNtPageReadonly; - continue; - default: - break; - } - } - AbortFork("MapOrDie", (void *)size); - } -} - -static textwindows void ViewOrDie(int64_t h, uint32_t access, size_t pos, +textwindows static void ViewOrDie(int64_t h, uint32_t access, size_t pos, size_t size, void *base) { TryAgain: - if (!MapViewOfFileEx(h, access, pos >> 32, pos, size, base)) { + if (!__imp_MapViewOfFileEx(h, access, pos >> 32, pos, size, base)) { if ((access & kNtFileMapExecute) && GetLastError() == kNtErrorAccessDenied) { access &= ~kNtFileMapExecute; @@ -174,302 +83,215 @@ TryAgain: } } -static __msabi textwindows int OnForkCrash(struct NtExceptionPointers *ep) { - kprintf("error: fork() child crashed!%n" - "\tExceptionCode = %#x%n" - "\tRip = %x%n", - ep->ExceptionRecord->ExceptionCode, - ep->ContextRecord ? ep->ContextRecord->Rip : -1); - TerminateThisProcess(SIGSTKFLT); -} +textwindows static void sys_fork_nt_child(void) { -static textwindows void *Malloc(size_t size) { - return HeapAlloc(GetProcessHeap(), 0, size); -} + // setup runtime + __klog_handle = 0; + __tls_index = __imp_TlsAlloc(); + __set_tls_win32(__winmain_tib); + __tls_enabled = true; -textwindows void WinMainForked(void) { - intptr_t jb[5]; - int64_t reader; - int64_t savetsc; - uint32_t varlen; - atomic_ulong *sigproc; - char16_t fvar[21 + 1 + 21 + 1]; - struct Fds *fds = __veil("r", &g_fds); + // resurrect shared memory mappings + struct Map *next; + for (struct Map *map = __maps_first(); map; map = next) { + next = __maps_next(map); - // save signal pointer - sigproc = __sig.process; - - // check to see if the process was actually forked - // this variable should have the pipe handle numba - varlen = GetEnvironmentVariable(u"_FORK", fvar, ARRAYLEN(fvar)); - if (!varlen || varlen >= ARRAYLEN(fvar)) - return; - /* STRACE("WinMainForked()"); */ - SetEnvironmentVariable(u"_FORK", NULL); -#if SYSDEBUG - int64_t oncrash = AddVectoredExceptionHandler(1, (void *)OnForkCrash); -#endif - ParseInt(fvar, &reader); - - // read the cpu state from the parent process & plus - ReadOrDie(reader, jb, sizeof(jb)); - - // read memory mappings from parent process - struct Tree *maps = 0; - for (;;) { - struct Map *map = Malloc(sizeof(struct Map)); - ReadOrDie(reader, map, sizeof(struct Map)); - if (map->addr == MAP_FAILED) - break; - tree_insert(&maps, &map->tree, __maps_compare); - } - - // map memory into process - int granularity = __gransize; - for (struct Tree *e = tree_first(maps); e; e = tree_next(e)) { - struct Map *map = MAP_TREE_CONTAINER(e); - if ((uintptr_t)map->addr & (granularity - 1)) - continue; - // get true length in case mprotect() chopped up actual win32 map - size_t size = map->size; - for (struct Tree *e2 = tree_next(e); e2; e2 = tree_next(e2)) { - struct Map *map2 = MAP_TREE_CONTAINER(e2); - if (map2->hand == -1 && map->addr + size == map2->addr) { - size += map2->size; - } else { - break; + // cleanup nofork mappings + if (map->flags & MAP_NOFORK) { + if ((map->flags & MAP_TYPE) != MAP_FILE) { + tree_remove(&__maps.maps, &map->tree); + __maps.pages -= (map->size + __pagesize - 1) / __pagesize; + __maps.count -= 1; + __maps_free(map); } + continue; } - // obtain the most permissive access possible - unsigned prot, access; - if (map->readonlyfile) { - prot = kNtPageExecuteRead; - access = kNtFileMapRead | kNtFileMapExecute; - } else { - prot = kNtPageExecuteReadwrite; - access = kNtFileMapWrite | kNtFileMapExecute; - } + + // private maps already copied/protected to child by parent if ((map->flags & MAP_TYPE) != MAP_SHARED) { - // we don't need to close the map handle because sys_mmap_nt - // doesn't mark it inheritable across fork() for MAP_PRIVATE - map->hand = MapOrDie(prot, size); - ViewOrDie(map->hand, access, 0, size, map->addr); - ReadOrDie(reader, map->addr, size); - } else { - // we can however safely inherit MAP_SHARED with zero copy - ViewOrDie(map->hand, access, map->off, size, map->addr); + // it's not copy-on-write anymore + map->iscow = false; + // but it used VirtualAlloc() so munmap() must VirtualFree() + if (map->hand > 0) { + CloseHandle(map->hand); + map->hand = MAPS_VIRTUAL; + } + continue; } - } - // read the .data and .bss program image sections - savetsc = kStartTsc; - ReadOrDie(reader, __data_start, __data_end - __data_start); - ReadOrDie(reader, __bss_start, __bss_end - __bss_start); - kStartTsc = savetsc; - __tls_enabled = false; + // handle granularity aligned shared mapping + if (__maps_isalloc(map)) { - // fixup memory manager - __maps.maps = 0; - __maps.freed = 0; - __maps.count = 0; - __maps.pages = 0; - for (struct Tree *e = tree_first(maps); e; e = tree_next(e)) { - struct Map *map = MAP_TREE_CONTAINER(e); - __maps.count += 1; - __maps.pages += (map->size + __pagesize - 1) / __pagesize; + // get true size of win32 allocation + size_t allocsize = map->size; + for (struct Map *map2 = next; map2; map2 = __maps_next(map2)) { + if (!__maps_isalloc(map2) && map->addr + allocsize == map2->addr) { + allocsize += map2->size; + } else { + break; + } + } + + // create allocation with most permissive access possible + // if we don't create as rwx then we can't mprotect(rwx) later + unsigned access; + if (map->readonlyfile) { + access = kNtFileMapRead | kNtFileMapExecute; + } else { + access = kNtFileMapWrite | kNtFileMapExecute; + } + + // resurrect copyless memory via inherited win32 handle + ViewOrDie(map->hand, access, map->off, allocsize, map->addr); + } + + // restore memory protection status on pages unsigned old_protect; - if (!VirtualProtect(map->addr, map->size, __prot2nt(map->prot, map->iscow), - &old_protect)) - AbortFork("VirtualProtect", map->addr); + if (!__imp_VirtualProtectEx(GetCurrentProcess(), map->addr, map->size, + __prot2nt(map->prot, false), &old_protect)) + AbortFork("VirtualProtectEx", map->addr); } - __maps.maps = maps; - __maps_init(); - // mitosis complete - if (!CloseHandle(reader)) - AbortFork("CloseHandle", (void *)reader); + // function tracing is now safe + ftrace_enabled(+1); + + // initialize winsock + void WinSockFork(void); + if (_weaken(WinSockFork)) + _weaken(WinSockFork)(); // rewrap the stdin named pipe hack // since the handles closed on fork - fds->p[0].handle = GetStdHandle(kNtStdInputHandle); - fds->p[1].handle = GetStdHandle(kNtStdOutputHandle); - fds->p[2].handle = GetStdHandle(kNtStdErrorHandle); + g_fds.p[0].handle = GetStdHandle(kNtStdInputHandle); + g_fds.p[1].handle = GetStdHandle(kNtStdOutputHandle); + g_fds.p[2].handle = GetStdHandle(kNtStdErrorHandle); +} - // restore signal pointer - __sig.process = sigproc; +textwindows static int sys_fork_nt_parent(uint32_t dwCreationFlags) { - // restore the crash reporting stuff -#if SYSDEBUG - RemoveVectoredExceptionHandler(oncrash); -#endif + // allocate process object + struct Proc *proc; + if (!(proc = __proc_new())) + return -1; - // jump back into function below - __builtin_longjmp(jb, 1); + // get path of this executable + char16_t prog[PATH_MAX]; + unsigned got = GetModuleFileName(0, prog, ARRAYLEN(prog)); + if (!got || got >= ARRAYLEN(prog)) { + dll_make_first(&__proc.free, &proc->elem); + enomem(); + return -1; + } + + // spawn new process in suspended state + struct NtProcessInformation procinfo; + struct NtStartupInfo startinfo = { + .cb = sizeof(struct NtStartupInfo), + .dwFlags = kNtStartfUsestdhandles, + .hStdInput = g_fds.p[0].handle, + .hStdOutput = g_fds.p[1].handle, + .hStdError = g_fds.p[2].handle, + }; + if (!CreateProcess(prog, 0, 0, 0, true, + dwCreationFlags | kNtCreateSuspended | + kNtInheritParentAffinity | + kNtCreateUnicodeEnvironment | + GetPriorityClass(GetCurrentProcess()), + 0, 0, &startinfo, &procinfo)) { + STRACE("fork() %s() failed w/ %m %d", "CreateProcess", GetLastError()); + dll_make_first(&__proc.free, &proc->elem); + if (errno != ENOMEM) + eagain(); + return -1; + } + + // ensure process can be signaled before returning + UnmapViewOfFile(__sig_map_process(procinfo.dwProcessId, kNtOpenAlways)); + + // let's go + bool ok = true; + uint32_t child_old_protect; + uint32_t parent_old_protect; + + // copy memory manager maps + for (struct MapSlab *slab = + atomic_load_explicit(&__maps.slabs, memory_order_acquire); + slab; slab = slab->next) { + ok = ok && !!VirtualAllocEx(procinfo.hProcess, slab, MAPS_SIZE, + kNtMemReserve | kNtMemCommit, kNtPageReadwrite); + ok = + ok && !!WriteProcessMemory(procinfo.hProcess, slab, slab, MAPS_SIZE, 0); + } + + // copy private memory maps + for (struct Map *map = __maps_first(); map; map = __maps_next(map)) { + if ((map->flags & MAP_TYPE) == MAP_SHARED) + continue; + if ((map->flags & MAP_NOFORK) && (map->flags & MAP_TYPE) != MAP_FILE) + continue; + if (__maps_isalloc(map)) { + size_t allocsize = map->size; + for (struct Map *m2 = __maps_next(map); m2; m2 = __maps_next(m2)) { + if (!__maps_isalloc(m2) && map->addr + allocsize == m2->addr) { + allocsize += m2->size; + } else { + break; + } + } + if ((map->flags & MAP_NOFORK) && (map->flags & MAP_TYPE) == MAP_FILE) { + ok = ok && !!VirtualProtectEx(procinfo.hProcess, map->addr, allocsize, + kNtPageReadwrite, &child_old_protect); + } else { + ok = ok && !!VirtualAllocEx(procinfo.hProcess, map->addr, allocsize, + kNtMemReserve | kNtMemCommit, + kNtPageExecuteReadwrite); + } + } + if (!(map->prot & PROT_READ)) + ok = ok && !!VirtualProtect(map->addr, map->size, kNtPageReadwrite, + &parent_old_protect); + ok = ok && !!WriteProcessMemory(procinfo.hProcess, map->addr, map->addr, + map->size, 0); + ok = ok && + !!VirtualProtectEx(procinfo.hProcess, map->addr, map->size, + __prot2nt(map->prot, false), &child_old_protect); + if (!(map->prot & PROT_READ)) + ok = ok && !!VirtualProtect(map->addr, map->size, parent_old_protect, + &parent_old_protect); + } + + // set process loose + ok = ok && ResumeThread(procinfo.hThread) != -1u; + ok &= !!CloseHandle(procinfo.hThread); + + // return pid of new process + if (ok) { + proc->wasforked = true; + proc->handle = procinfo.hProcess; + proc->pid = procinfo.dwProcessId; + __proc_add(proc); + return procinfo.dwProcessId; + } else { + if (errno != ENOMEM) + eagain(); // posix fork() only specifies two errors + TerminateProcess(procinfo.hProcess, SIGKILL); + CloseHandle(procinfo.hProcess); + dll_make_first(&__proc.free, &proc->elem); + return -1; + } } textwindows int sys_fork_nt(uint32_t dwCreationFlags) { - char ok; - char **args; - int rc = -1; - intptr_t jb[5]; - struct Proc *proc; - struct CosmoTib *tib; - char16_t pipename[64]; - int64_t reader, writer; - struct NtStartupInfo startinfo; - struct NtProcessInformation procinfo; - char *p, forkvar[6 + 21 + 1 + 21 + 1]; - tib = __get_tls(); - if (!(proc = __proc_new())) - return -1; - ftrace_enabled(-1); - strace_enabled(-1); - if (!__builtin_setjmp(jb)) { - reader = CreateNamedPipe(__create_pipe_name(pipename), kNtPipeAccessInbound, - kNtPipeTypeByte | kNtPipeReadmodeByte, 1, PIPE_BUF, - PIPE_BUF, 0, &kNtIsInheritable); - writer = CreateFile(pipename, kNtGenericWrite, 0, 0, kNtOpenExisting, 0, 0); - if (reader != -1 && writer != -1) { - p = stpcpy(forkvar, "_FORK="); - p = FormatUint64(p, reader); - bzero(&startinfo, sizeof(startinfo)); - startinfo.cb = sizeof(struct NtStartupInfo); - startinfo.dwFlags = kNtStartfUsestdhandles; - startinfo.hStdInput = g_fds.p[0].handle; - startinfo.hStdOutput = g_fds.p[1].handle; - startinfo.hStdError = g_fds.p[2].handle; - args = __argv; -#if SYSDEBUG - int i; - // If --strace was passed to this program, then propagate it the - // forked process since the flag was removed by __intercept_flag - if (strace_enabled(0) > 0) { - int n; - for (n = 0; args[n];) - ++n; -#pragma GCC push_options -#pragma GCC diagnostic ignored "-Walloca-larger-than=" - int nbytes = (n + 2) * sizeof(char *); - char **args2 = alloca(nbytes); - CheckLargeStackAllocation(args2, nbytes); -#pragma GCC pop_options - for (i = 0; i < n; ++i) - args2[i] = args[i]; - args2[i++] = "--strace"; - args2[i] = 0; - args = args2; - } -#endif - NTTRACE("STARTING SPAWN"); - int spawnrc = ntspawn(&(struct NtSpawnArgs){ - AT_FDCWD, GetProgramExecutableName(), args, environ, - (char *[]){forkvar, 0}, dwCreationFlags, 0, 0, 0, 0, &startinfo, - &procinfo}); - if (spawnrc != -1) { - CloseHandle(procinfo.hThread); - ok = WriteAll(writer, jb, sizeof(jb)); - // this list will be populated with the maps we're transferring - for (struct Map *map = __maps_first(); ok && map; - map = __maps_next(map)) { - if (map->flags & MAP_NOFORK) - continue; - if (MAX((char *)__executable_start, map->addr) < - MIN((char *)_end, map->addr + map->size)) - continue; // executable image is loaded by windows - ok = WriteAll(writer, map, sizeof(*map)); - } - // send a terminating Map struct to child - if (ok) { - struct Map map; - map.addr = MAP_FAILED; - ok = WriteAll(writer, &map, sizeof(map)); - } - // now write content of each map to child - int granularity = __gransize; - for (struct Map *map = __maps_first(); ok && map; - map = __maps_next(map)) { - if (map->flags & MAP_NOFORK) - continue; - // we only need to worry about the base mapping - if ((uintptr_t)map->addr & (granularity - 1)) - continue; - if (MAX((char *)__executable_start, map->addr) < - MIN((char *)_end, map->addr + map->size)) - continue; // executable image is loaded by windows - // shared mappings don't need to be copied - if ((map->flags & MAP_TYPE) == MAP_SHARED) - continue; - // get true length in case mprotect() chopped up actual win32 map - size_t size = map->size; - for (struct Map *map2 = __maps_next(map); map2; - map2 = __maps_next(map2)) { - if (map2->hand == -1 && map->addr + size == map2->addr) { - size += map2->size; - } else { - break; - } - } - for (struct Map *map2 = map; ok && map2; map2 = __maps_next(map2)) { - if (!(map2->prot & PROT_READ)) - if (map->addr >= map2->addr && map->addr < map->addr + size) - ok = VirtualProtect( - map2->addr, map2->size, - __prot2nt(map2->prot | PROT_READ, map2->iscow), - &map2->visited); - } - if (ok) - ok = WriteAll(writer, map->addr, size); - for (struct Map *map2 = map; ok && map2; map2 = __maps_next(map2)) { - if (!(map2->prot & PROT_READ)) - if (map->addr >= map2->addr && map->addr < map->addr + size) - ok = VirtualProtect(map2->addr, map2->size, map2->visited, - &map2->visited); - } - } - if (ok) - ok = WriteAll(writer, __data_start, __data_end - __data_start); - if (ok) - ok = WriteAll(writer, __bss_start, __bss_end - __bss_start); - if (ok) { - if (!CloseHandle(writer)) - ok = false; - writer = -1; - } - if (ok) { - proc->wasforked = true; - proc->handle = procinfo.hProcess; - rc = proc->pid = procinfo.dwProcessId; - __proc_add(proc); - } else { - TerminateProcess(procinfo.hProcess, SIGKILL); - CloseHandle(procinfo.hProcess); - rc = -1; - } - } - } - if (reader != -1) - CloseHandle(reader); - if (writer != -1) - CloseHandle(writer); - if (rc == -1 && errno != ENOMEM) - eagain(); // posix fork() only specifies two errors + int rc; + __winmain_isfork = true; + __winmain_tib = __get_tls(); + if (!__builtin_setjmp(__winmain_jmpbuf)) { + rc = sys_fork_nt_parent(dwCreationFlags); } else { + sys_fork_nt_child(); rc = 0; - // re-apply code morphing for thread-local storage - __tls_index = TlsAlloc(); - __set_tls_win32(tib); - __morph_tls(); - __tls_enabled = true; - // the child's pending signals is initially empty - atomic_store_explicit(&tib->tib_sigpending, 0, memory_order_relaxed); - // re-apply code morphing for function tracing - if (ftrace_stackdigs) - _weaken(__hook)(_weaken(ftrace_hook), _weaken(GetSymbolTable)()); } - if (rc == -1) - dll_make_first(&__proc.free, &proc->elem); - ftrace_enabled(+1); - strace_enabled(+1); + __winmain_isfork = false; return rc; } diff --git a/libc/proc/fork.c b/libc/proc/fork.c index cefa51fb6..eb2213c94 100644 --- a/libc/proc/fork.c +++ b/libc/proc/fork.c @@ -39,6 +39,7 @@ #include "libc/nt/thunk/msabi.h" #include "libc/proc/proc.internal.h" #include "libc/runtime/internal.h" +#include "libc/runtime/runtime.h" #include "libc/runtime/syslib.internal.h" #include "libc/stdio/internal.h" #include "libc/str/str.h" @@ -52,13 +53,16 @@ __msabi extern typeof(GetCurrentProcessId) *const __imp_GetCurrentProcessId; extern pthread_mutex_t __cxa_lock_obj; -extern pthread_mutex_t __dlopen_lock_obj; extern pthread_mutex_t __pthread_lock_obj; -extern pthread_mutex_t __rand64_lock_obj; extern pthread_mutex_t __sig_worker_lock; +void __rand64_lock(void); +void __rand64_unlock(void); +void __rand64_wipe(void); + void __dlopen_lock(void); void __dlopen_unlock(void); +void __dlopen_wipe(void); // first and last and always // it is the lord of all locks @@ -111,34 +115,46 @@ static void fork_prepare(void) { if (_weaken(_pthread_onfork_prepare)) _weaken(_pthread_onfork_prepare)(); fork_prepare_stdio(); - __localtime_lock(); - __dlopen_lock(); + if (_weaken(__localtime_lock)) + _weaken(__localtime_lock)(); + if (_weaken(__dlopen_lock)) + _weaken(__dlopen_lock)(); if (_weaken(cosmo_stack_lock)) _weaken(cosmo_stack_lock)(); __cxa_lock(); - __gdtoa_lock1(); - __gdtoa_lock(); + if (_weaken(__gdtoa_lock)) { + _weaken(__gdtoa_lock1)(); + _weaken(__gdtoa_lock)(); + } _pthread_lock(); - dlmalloc_pre_fork(); + if (_weaken(dlmalloc_pre_fork)) + _weaken(dlmalloc_pre_fork)(); __fds_lock(); - _pthread_mutex_lock(&__rand64_lock_obj); + if (_weaken(__rand64_lock)) + _weaken(__rand64_lock)(); __maps_lock(); LOCKTRACE("READY TO LOCK AND ROLL"); } static void fork_parent(void) { __maps_unlock(); - _pthread_mutex_unlock(&__rand64_lock_obj); + if (_weaken(__rand64_unlock)) + _weaken(__rand64_unlock)(); __fds_unlock(); - dlmalloc_post_fork_parent(); + if (_weaken(dlmalloc_post_fork_parent)) + _weaken(dlmalloc_post_fork_parent)(); _pthread_unlock(); - __gdtoa_unlock(); - __gdtoa_unlock1(); + if (_weaken(__gdtoa_unlock)) { + _weaken(__gdtoa_unlock)(); + _weaken(__gdtoa_unlock1)(); + } __cxa_unlock(); if (_weaken(cosmo_stack_unlock)) _weaken(cosmo_stack_unlock)(); - __dlopen_unlock(); - __localtime_unlock(); + if (_weaken(__dlopen_unlock)) + _weaken(__dlopen_unlock)(); + if (_weaken(__localtime_unlock)) + _weaken(__localtime_unlock)(); fork_parent_stdio(); if (_weaken(_pthread_onfork_parent)) _weaken(_pthread_onfork_parent)(); @@ -146,18 +162,23 @@ static void fork_parent(void) { } static void fork_child(void) { - _pthread_mutex_wipe_np(&__dlopen_lock_obj); - _pthread_mutex_wipe_np(&__rand64_lock_obj); + if (_weaken(__rand64_wipe)) + _weaken(__rand64_wipe)(); _pthread_mutex_wipe_np(&__fds_lock_obj); dlmalloc_post_fork_child(); - _pthread_mutex_wipe_np(&__gdtoa_lock_obj); - _pthread_mutex_wipe_np(&__gdtoa_lock1_obj); + if (_weaken(__gdtoa_wipe)) { + _weaken(__gdtoa_wipe)(); + _weaken(__gdtoa_wipe1)(); + } fork_child_stdio(); _pthread_mutex_wipe_np(&__pthread_lock_obj); _pthread_mutex_wipe_np(&__cxa_lock_obj); if (_weaken(cosmo_stack_wipe)) _weaken(cosmo_stack_wipe)(); - _pthread_mutex_wipe_np(&__localtime_lock_obj); + if (_weaken(__dlopen_wipe)) + _weaken(__dlopen_wipe)(); + if (_weaken(__localtime_wipe)) + _weaken(__localtime_wipe)(); if (IsWindows()) { // we don't bother locking the proc/itimer/sig locks above since // their state is reset in the forked child. nothing to protect. @@ -174,12 +195,9 @@ static void fork_child(void) { } int _fork(uint32_t dwCreationFlags) { - long micros; struct Dll *e; - struct timespec started; int ax, dx, tid, parent; parent = __pid; - started = timespec_mono(); BLOCK_SIGNALS; fork_prepare(); if (!IsWindows()) { @@ -187,7 +205,6 @@ int _fork(uint32_t dwCreationFlags) { } else { ax = sys_fork_nt(dwCreationFlags); } - micros = timespec_tomicros(timespec_sub(timespec_mono(), started)); if (!ax) { // get new process id @@ -237,11 +254,14 @@ int _fork(uint32_t dwCreationFlags) { } atomic_init(&tib->tib_syshand, syshand); + // the child's pending signals is initially empty + atomic_init(&tib->tib_sigpending, 0); + // we can't be canceled if the canceler no longer exists atomic_init(&pt->pt_canceled, false); // forget locks - memset(tib->tib_locks, 0, sizeof(tib->tib_locks)); + bzero(tib->tib_locks, sizeof(tib->tib_locks)); // run user fork callbacks fork_child(); @@ -256,11 +276,11 @@ int _fork(uint32_t dwCreationFlags) { } } - STRACE("fork() → 0 (child of %d; took %ld us)", parent, micros); + STRACE("fork() → 0 (child of %d)", parent); } else { // this is the parent process fork_parent(); - STRACE("fork() → %d% m (took %ld us)", ax, micros); + STRACE("fork() → %d% m", ax); } ALLOW_SIGNALS; return ax; diff --git a/libc/runtime/runtime.h b/libc/runtime/runtime.h index 58fde8c23..8a0dc5fc3 100644 --- a/libc/runtime/runtime.h +++ b/libc/runtime/runtime.h @@ -95,7 +95,7 @@ int ftrace_install(void) libcesque; int ftrace_enabled(int) libcesque; int strace_enabled(int) libcesque; void __print_maps(size_t) libcesque; -void __print_maps_win32(void) libcesque; +void __print_maps_win32(int64_t, const char *, size_t) libcesque; void __printargs(const char *) libcesque; /* builtin sh-like system/popen dsl */ int _cocmd(int, char **, char **) libcesque; diff --git a/libc/runtime/winmain.greg.c b/libc/runtime/winmain.greg.c index 41fa5776d..3e85b6860 100644 --- a/libc/runtime/winmain.greg.c +++ b/libc/runtime/winmain.greg.c @@ -52,6 +52,7 @@ #include "libc/sock/internal.h" #include "libc/str/str.h" #include "libc/sysv/consts/prot.h" +#include "libc/thread/tls.h" #ifdef __x86_64__ #define abi __msabi textwindows dontinstrument @@ -87,11 +88,15 @@ void __stack_call(int, char **, char **, long (*)[2], void (*)(int, char **, char **, long (*)[2]), intptr_t) wontreturn; +bool __winmain_isfork; +intptr_t __winmain_jmpbuf[5]; +struct CosmoTib *__winmain_tib; + __funline int IsAlpha(int c) { return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z'); } -static abi char16_t *StrStr(const char16_t *haystack, const char16_t *needle) { +abi static char16_t *StrStr(const char16_t *haystack, const char16_t *needle) { size_t i; for (;;) { for (i = 0;; ++i) { @@ -108,13 +113,13 @@ static abi char16_t *StrStr(const char16_t *haystack, const char16_t *needle) { return 0; } -static abi void PrintError(const char *s, size_t n) { +abi static void PrintError(const char *s, size_t n) { #define PrintError(s) PrintError(s, sizeof(s) - 1) __imp_WriteFile(__imp_GetStdHandle(kNtStdErrorHandle), s, n, 0, 0); } // detect the unholiest of environments -static abi bool32 IsWslChimera(void) { +abi static bool32 IsWslChimera(void) { char16_t path[PATH_MAX]; return __imp_GetCurrentDirectoryW(PATH_MAX, path) && // path[0] == '\\' && // @@ -125,7 +130,7 @@ static abi bool32 IsWslChimera(void) { } // returns true if utf-8 path is a win32-style path that exists -static abi bool32 WinFileExists(const char *path) { +abi static bool32 WinFileExists(const char *path) { uint16_t path16[PATH_MAX]; size_t z = ARRAYLEN(path16); size_t n = tprecode8to16(path16, z, path).ax; @@ -135,7 +140,7 @@ static abi bool32 WinFileExists(const char *path) { } // this ensures close(1) won't accidentally close(2) for example -static abi void DeduplicateStdioHandles(void) { +abi static void DeduplicateStdioHandles(void) { for (long i = 0; i < 3; ++i) { int64_t h1 = __imp_GetStdHandle(kNtStdio[i]); for (long j = i + 1; j < 3; ++j) { @@ -150,19 +155,19 @@ static abi void DeduplicateStdioHandles(void) { } } -static bool32 HasEnvironmentVariable(const char16_t *name) { +abi static bool32 HasEnvironmentVariable(const char16_t *name) { char16_t buf[4]; return __imp_GetEnvironmentVariableW(name, buf, ARRAYLEN(buf)); } -static abi unsigned OnWinCrash(struct NtExceptionPointers *ep) { +abi static unsigned OnWinCrash(struct NtExceptionPointers *ep) { int code, sig = __sig_crash_sig(ep->ExceptionRecord->ExceptionCode, &code); TerminateThisProcess(sig); } // main function of windows init process // i.e. first process spawned that isn't forked -static abi wontreturn void WinInit(const char16_t *cmdline) { +abi wontreturn static void WinInit(const char16_t *cmdline) { __oldstack = (intptr_t)__builtin_frame_address(0); __imp_SetConsoleOutputCP(kNtCpUtf8); @@ -314,7 +319,7 @@ static int Atoi(const char16_t *str) { return x; } -static abi int WinGetPid(const char16_t *var, bool *out_is_inherited) { +abi static int WinGetPid(const char16_t *var, bool *out_is_inherited) { uint32_t len; char16_t val[12]; if ((len = __imp_GetEnvironmentVariableW(var, val, ARRAYLEN(val)))) { @@ -338,6 +343,8 @@ abi int64_t WinMain(int64_t hInstance, int64_t hPrevInstance, extern char os asm("__hostos"); os = _HOSTWINDOWS; // madness https://news.ycombinator.com/item?id=21019722 kStartTsc = rdtsc(); + __tls_enabled = false; + ftrace_enabled(-1); if (!IsTiny() && IsWslChimera()) { PrintError("error: APE is running on WIN32 inside WSL. You need to run: " "sudo sh -c 'echo -1 > /proc/sys/fs/binfmt_misc/WSLInterop'\n"); @@ -351,6 +358,8 @@ abi int64_t WinMain(int64_t hInstance, int64_t hPrevInstance, __pid = WinGetPid(u"_COSMO_PID", &pid_is_inherited); if (!(__sig.process = __sig_map_process(__pid, kNtOpenAlways))) __sig.process = &fake_process_signals; + if (__winmain_isfork) + __builtin_longjmp(__winmain_jmpbuf, 1); if (!pid_is_inherited) atomic_store_explicit(__sig.process, 0, memory_order_release); cmdline = __imp_GetCommandLineW(); @@ -359,11 +368,10 @@ abi int64_t WinMain(int64_t hInstance, int64_t hPrevInstance, if (StrStr(cmdline, u"--strace")) ++__strace; #endif + ftrace_enabled(+1); if (_weaken(WinSockInit)) _weaken(WinSockInit)(); DeduplicateStdioHandles(); - if (_weaken(WinMainForked)) - _weaken(WinMainForked)(); WinInit(cmdline); } diff --git a/libc/sock/kntwsadata.c b/libc/sock/kntwsadata.c index 2c08015e1..6e03dc588 100644 --- a/libc/sock/kntwsadata.c +++ b/libc/sock/kntwsadata.c @@ -51,3 +51,7 @@ textwindows void WinSockInit(void) { _Exit(1); } } + +textwindows dontinstrument void WinSockFork(void) { + WSAStartup(VERSION, &kNtWsaData); +} diff --git a/libc/sysv/consts.sh b/libc/sysv/consts.sh index 48742fc3f..b89f6c742 100755 --- a/libc/sysv/consts.sh +++ b/libc/sysv/consts.sh @@ -227,7 +227,6 @@ syscon mmap MAP_LOCKED 0x00002000 0x00002000 0 0 0 0 0 0 syscon mmap MAP_NORESERVE 0x00004000 0x00004000 0x00000040 0x00000040 0 0 0x00000040 0 # Linux calls it "reserve"; NT calls it "commit"? which is default? syscon mmap MAP_POPULATE 0x00008000 0x00008000 0 0 0x00040000 0 0 0 # MAP_PREFAULT_READ on FreeBSD; can avoid madvise(MADV_WILLNEED) on private file mapping syscon mmap MAP_NONBLOCK 0x00010000 0x00010000 0 0 0 0 0 0 -syscon mmap MAP_NOFORK 0 0 0 0 0 0 0 0x10000000 # used on pages internal to our mmap() implemention on windows syscon mmap MAP_SYNC 0x00080000 0x00080000 0 0 0 0 0 0 # perform synchronous page faults for mapping (Linux 4.15+) syscon mmap MAP_HUGETLB 0x00040000 -1 -1 -1 -1 -1 -1 -1 # make it inherit across execve() syscon mmap MAP_INHERIT -1 -1 -1 -1 -1 -1 0x00000080 -1 # make it inherit across execve() diff --git a/libc/sysv/consts/MAP_NOFORK.S b/libc/sysv/consts/MAP_NOFORK.S deleted file mode 100644 index 04b0363b6..000000000 --- a/libc/sysv/consts/MAP_NOFORK.S +++ /dev/null @@ -1,2 +0,0 @@ -#include "libc/sysv/consts/syscon.internal.h" -.syscon mmap,MAP_NOFORK,0,0,0,0,0,0,0,0x10000000 diff --git a/libc/sysv/consts/map.h b/libc/sysv/consts/map.h index ae719ea0b..20ed8bf51 100644 --- a/libc/sysv/consts/map.h +++ b/libc/sysv/consts/map.h @@ -19,7 +19,6 @@ extern const int MAP_JIT; extern const int MAP_LOCKED; extern const int MAP_NOCACHE; extern const int MAP_NOEXTEND; -extern const int MAP_NOFORK; extern const int MAP_NONBLOCK; extern const int MAP_NORESERVE; extern const int MAP_NOSYNC; diff --git a/libc/sysv/hostos.S b/libc/sysv/hostos.S index e4550d488..5adcfc603 100644 --- a/libc/sysv/hostos.S +++ b/libc/sysv/hostos.S @@ -22,4 +22,10 @@ .balign 8 __hostos: .quad 0 - .endfn __hostos,globl + .endobj __hostos,globl +__tls_index: + .long 0 + .endobj __tls_index,globl +__tls_enabled: + .long 0 + .endobj __tls_enabled,globl diff --git a/libc/thread/itimer.c b/libc/thread/itimer.c index a820f9151..7e4d331c6 100644 --- a/libc/thread/itimer.c +++ b/libc/thread/itimer.c @@ -28,6 +28,7 @@ #include "libc/intrin/strace.h" #include "libc/nt/enum/processcreationflags.h" #include "libc/nt/thread.h" +#include "libc/runtime/runtime.h" #include "libc/str/str.h" #include "libc/sysv/consts/clock.h" #include "libc/sysv/consts/map.h" diff --git a/test/libc/proc/BUILD.mk b/test/libc/proc/BUILD.mk index dc8a42cee..52857c1f7 100644 --- a/test/libc/proc/BUILD.mk +++ b/test/libc/proc/BUILD.mk @@ -29,15 +29,16 @@ TEST_LIBC_PROC_DIRECTDEPS = \ LIBC_MEM \ LIBC_NEXGEN32E \ LIBC_NT_KERNEL32 \ - LIBC_RUNTIME \ LIBC_PROC \ + LIBC_RUNTIME \ + LIBC_STDIO \ LIBC_STR \ LIBC_SYSV \ LIBC_TESTLIB \ LIBC_THREAD \ LIBC_X \ THIRD_PARTY_MUSL \ - THIRD_PARTY_TR + THIRD_PARTY_TR \ TEST_LIBC_PROC_DEPS := \ $(call uniq,$(foreach x,$(TEST_LIBC_PROC_DIRECTDEPS),$($(x)))) diff --git a/test/libc/proc/fork_test.c b/test/libc/proc/fork_test.c index 1bb7d61ee..0beae3889 100644 --- a/test/libc/proc/fork_test.c +++ b/test/libc/proc/fork_test.c @@ -21,6 +21,7 @@ #include "libc/calls/struct/sigaction.h" #include "libc/calls/struct/sigset.h" #include "libc/calls/struct/timespec.h" +#include "libc/calls/syscall-sysv.internal.h" #include "libc/dce.h" #include "libc/errno.h" #include "libc/log/check.h" @@ -32,6 +33,7 @@ #include "libc/sysv/consts/msync.h" #include "libc/sysv/consts/prot.h" #include "libc/sysv/consts/sig.h" +#include "libc/testlib/benchmark.h" #include "libc/testlib/ezbench.h" #include "libc/testlib/subprocess.h" #include "libc/testlib/testlib.h" @@ -150,6 +152,31 @@ void ForkInSerial(void) { ASSERT_EQ(0, WEXITSTATUS(ws)); } -BENCH(fork, bench) { - EZBENCH2("fork a", donothing, ForkInSerial()); +void VforkInSerial(void) { + int pid, ws; + ASSERT_NE(-1, (pid = vfork())); + if (!pid) + _Exit(0); + ASSERT_NE(-1, waitpid(pid, &ws, 0)); + ASSERT_TRUE(WIFEXITED(ws)); + ASSERT_EQ(0, WEXITSTATUS(ws)); +} + +void SysForkInSerial(void) { + int pid, ws; + ASSERT_NE(-1, (pid = sys_fork())); + if (!pid) + _Exit(0); + ASSERT_NE(-1, waitpid(pid, &ws, 0)); + ASSERT_TRUE(WIFEXITED(ws)); + ASSERT_EQ(0, WEXITSTATUS(ws)); +} + +TEST(fork, bench) { + VforkInSerial(); + BENCHMARK(10, 1, VforkInSerial()); + if (!IsWindows()) + BENCHMARK(10, 1, SysForkInSerial()); + ForkInSerial(); + BENCHMARK(10, 1, ForkInSerial()); } diff --git a/test/posix/file_offset_exec_test.c b/test/posix/file_offset_exec_test.c index 7cfc6b88d..e9b9e94ba 100644 --- a/test/posix/file_offset_exec_test.c +++ b/test/posix/file_offset_exec_test.c @@ -38,10 +38,6 @@ void on_unexpected_death(int sig) { int main() { - // TODO(jart): fix flakes - if (IsWindows()) - return 0; - signal(SIGCHLD, on_unexpected_death); // extract test program diff --git a/third_party/gdtoa/lock.c b/third_party/gdtoa/lock.c index e30dcb7c7..1e5cc36de 100644 --- a/third_party/gdtoa/lock.c +++ b/third_party/gdtoa/lock.c @@ -32,8 +32,8 @@ #include "libc/thread/posixthread.internal.h" #include "third_party/gdtoa/lock.h" -pthread_mutex_t __gdtoa_lock_obj = PTHREAD_MUTEX_INITIALIZER; -pthread_mutex_t __gdtoa_lock1_obj = PTHREAD_MUTEX_INITIALIZER; +static pthread_mutex_t __gdtoa_lock_obj = PTHREAD_MUTEX_INITIALIZER; +static pthread_mutex_t __gdtoa_lock1_obj = PTHREAD_MUTEX_INITIALIZER; void __gdtoa_lock(void) @@ -47,6 +47,12 @@ __gdtoa_unlock(void) _pthread_mutex_unlock(&__gdtoa_lock_obj); } +void +__gdtoa_wipe(void) +{ + _pthread_mutex_wipe_np(&__gdtoa_lock_obj); +} + void __gdtoa_lock1(void) { @@ -58,3 +64,9 @@ __gdtoa_unlock1(void) { _pthread_mutex_unlock(&__gdtoa_lock1_obj); } + +void +__gdtoa_wipe1(void) +{ + _pthread_mutex_wipe_np(&__gdtoa_lock1_obj); +} diff --git a/third_party/gdtoa/lock.h b/third_party/gdtoa/lock.h index e630e31e1..71af847aa 100644 --- a/third_party/gdtoa/lock.h +++ b/third_party/gdtoa/lock.h @@ -3,13 +3,13 @@ #include "libc/thread/thread.h" COSMOPOLITAN_C_START_ -extern pthread_mutex_t __gdtoa_lock_obj; -extern pthread_mutex_t __gdtoa_lock1_obj; - void __gdtoa_lock(void); void __gdtoa_unlock(void); +void __gdtoa_wipe(void); + void __gdtoa_lock1(void); void __gdtoa_unlock1(void); +void __gdtoa_wipe1(void); COSMOPOLITAN_C_END_ #endif /* COSMOPOLITAN_THIRD_PARTY_GDTOA_LOCK_H_ */ diff --git a/third_party/nsync/common.c b/third_party/nsync/common.c index 352168049..ad7fb0176 100644 --- a/third_party/nsync/common.c +++ b/third_party/nsync/common.c @@ -238,7 +238,8 @@ static bool free_waiters_populate (void) { // netbsd semaphores are file descriptors n = 1; } else { - n = __pagesize / sizeof(waiter); + // don't create too much fork() overhead + n = 16; } waiter *waiters = mmap (0, n * sizeof(waiter), PROT_READ | PROT_WRITE, diff --git a/third_party/tz/lock.h b/third_party/tz/lock.h index 60070aad1..501505478 100644 --- a/third_party/tz/lock.h +++ b/third_party/tz/lock.h @@ -3,10 +3,9 @@ #include "libc/thread/thread.h" COSMOPOLITAN_C_START_ -extern pthread_mutex_t __localtime_lock_obj; - void __localtime_lock(void); void __localtime_unlock(void); +void __localtime_wipe(void); COSMOPOLITAN_C_END_ #endif /* COSMOPOLITAN_THIRD_PARTY_TZ_LOCK_H_ */