Make fork() go 30% faster

This change makes fork() go nearly as fast as sys_fork() on UNIX. As for
Windows this change shaves about 4-5ms off fork() + wait() latency. This
is accomplished by using WriteProcessMemory() from the parent process to
setup the address space of a suspended process; it is better than a pipe
This commit is contained in:
Justine Tunney 2025-01-01 04:59:38 -08:00
parent 98c5847727
commit 0b3c81dd4e
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
44 changed files with 769 additions and 649 deletions

View file

@ -135,7 +135,7 @@ ARCH = aarch64
HOSTS ?= pi pi5 studio freebsdarm
else
ARCH = x86_64
HOSTS ?= freebsd rhel7 xnu openbsd netbsd win10
HOSTS ?= freebsd rhel7 xnu openbsd netbsd win10 luna
endif
ZIPOBJ_FLAGS += -a$(ARCH)

View file

@ -17,6 +17,7 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/describeflags.h"
#include "libc/intrin/maps.h"
#include "libc/runtime/memtrack.internal.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/prot.h"
@ -24,12 +25,13 @@
static char DescribeMapType(int flags) {
switch (flags & MAP_TYPE) {
case MAP_FILE:
if (flags & MAP_NOFORK)
return 'i'; // executable image
return '-';
case MAP_PRIVATE:
if (flags & MAP_NOFORK)
return 'P';
else
return 'p';
return 'w'; // windows memory
return 'p';
case MAP_SHARED:
return 's';
default:

View file

@ -19,7 +19,7 @@
#include "libc/thread/posixthread.internal.h"
#include "libc/thread/thread.h"
pthread_mutex_t __dlopen_lock_obj = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t __dlopen_lock_obj = PTHREAD_MUTEX_INITIALIZER;
void __dlopen_lock(void) {
_pthread_mutex_lock(&__dlopen_lock_obj);
@ -28,3 +28,7 @@ void __dlopen_lock(void) {
void __dlopen_unlock(void) {
_pthread_mutex_unlock(&__dlopen_lock_obj);
}
void __dlopen_wipe(void) {
_pthread_mutex_wipe_np(&__dlopen_lock_obj);
}

View file

@ -19,7 +19,7 @@
#include "libc/thread/posixthread.internal.h"
#include "third_party/tz/lock.h"
pthread_mutex_t __localtime_lock_obj = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t __localtime_lock_obj = PTHREAD_MUTEX_INITIALIZER;
void __localtime_lock(void) {
_pthread_mutex_lock(&__localtime_lock_obj);
@ -28,3 +28,7 @@ void __localtime_lock(void) {
void __localtime_unlock(void) {
_pthread_mutex_unlock(&__localtime_lock_obj);
}
void __localtime_wipe(void) {
_pthread_mutex_wipe_np(&__localtime_lock_obj);
}

View file

@ -30,6 +30,7 @@
#include "libc/nexgen32e/rdtsc.h"
#include "libc/runtime/runtime.h"
#include "libc/runtime/stack.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/prot.h"
#include "libc/thread/lock.h"
#include "libc/thread/tls.h"
@ -40,10 +41,6 @@ __static_yoink("_init_maps");
#define ABI privileged optimizespeed
// take great care if you enable this
// especially if you're using --ftrace too
#define DEBUG_MAPS_LOCK 0
struct Maps __maps;
void __maps_add(struct Map *map) {
@ -61,14 +58,18 @@ void __maps_stack(char *stackaddr, int pagesz, int guardsize, size_t stacksize,
__maps.stack.addr = stackaddr + guardsize;
__maps.stack.size = stacksize - guardsize;
__maps.stack.prot = stackprot;
__maps.stack.hand = -1;
__maps.stack.hand = MAPS_SUBREGION;
__maps.stack.flags = MAP_PRIVATE | MAP_ANONYMOUS;
__maps_adder(&__maps.stack, pagesz);
if (guardsize) {
__maps.guard.addr = stackaddr;
__maps.guard.size = guardsize;
__maps.guard.prot = PROT_NONE;
__maps.guard.prot = PROT_NONE | PROT_GUARD;
__maps.guard.hand = stackhand;
__maps.guard.flags = MAP_PRIVATE | MAP_ANONYMOUS;
__maps_adder(&__maps.guard, pagesz);
} else {
__maps.stack.hand = stackhand;
}
}
@ -102,29 +103,14 @@ void __maps_init(void) {
}
// record .text and .data mappings
static struct Map text, data;
text.addr = (char *)__executable_start;
text.size = _etext - __executable_start;
text.prot = PROT_READ | PROT_EXEC;
__maps_track((char *)__executable_start, _etext - __executable_start,
PROT_READ | PROT_EXEC, MAP_NOFORK);
uintptr_t ds = ((uintptr_t)_etext + pagesz - 1) & -pagesz;
if (ds < (uintptr_t)_end) {
data.addr = (char *)ds;
data.size = (uintptr_t)_end - ds;
data.prot = PROT_READ | PROT_WRITE;
__maps_adder(&data, pagesz);
}
__maps_adder(&text, pagesz);
if (ds < (uintptr_t)_end)
__maps_track((char *)ds, (uintptr_t)_end - ds, PROT_READ | PROT_WRITE,
MAP_NOFORK);
}
#if DEBUG_MAPS_LOCK
privileged static void __maps_panic(const char *msg) {
// it's only safe to pass a format string. if we use directives such
// as %s, %t etc. then kprintf() will recursively call __maps_lock()
kprintf(msg);
DebugBreak();
}
#endif
bool __maps_held(void) {
return __tls_enabled && !(__get_tls()->tib_flags & TIB_FLAG_VFORKED) &&
MUTEX_OWNER(
@ -143,7 +129,12 @@ ABI void __maps_lock(void) {
if (tib->tib_flags & TIB_FLAG_VFORKED)
return;
me = atomic_load_explicit(&tib->tib_ptid, memory_order_relaxed);
if (me <= 0)
word = 0;
lock = MUTEX_LOCK(word);
lock = MUTEX_SET_OWNER(lock, me);
if (atomic_compare_exchange_strong_explicit(&__maps.lock.word, &word, lock,
memory_order_acquire,
memory_order_relaxed))
return;
word = atomic_load_explicit(&__maps.lock.word, memory_order_relaxed);
for (;;) {
@ -154,24 +145,13 @@ ABI void __maps_lock(void) {
return;
continue;
}
#if DEBUG_MAPS_LOCK
if (__deadlock_tracked(&__maps.lock) == 1)
__maps_panic("error: maps lock already held\n");
if (__deadlock_check(&__maps.lock, 1))
__maps_panic("error: maps lock is cyclic\n");
#endif
word = 0;
lock = MUTEX_LOCK(word);
lock = MUTEX_SET_OWNER(lock, me);
if (atomic_compare_exchange_weak_explicit(&__maps.lock.word, &word, lock,
memory_order_acquire,
memory_order_relaxed)) {
#if DEBUG_MAPS_LOCK
__deadlock_track(&__maps.lock, 0);
__deadlock_record(&__maps.lock, 0);
#endif
memory_order_relaxed))
return;
}
for (;;) {
word = atomic_load_explicit(&__maps.lock.word, memory_order_relaxed);
if (MUTEX_OWNER(word) == me)
@ -183,7 +163,6 @@ ABI void __maps_lock(void) {
}
ABI void __maps_unlock(void) {
int me;
uint64_t word;
struct CosmoTib *tib;
if (!__tls_enabled)
@ -192,28 +171,16 @@ ABI void __maps_unlock(void) {
return;
if (tib->tib_flags & TIB_FLAG_VFORKED)
return;
me = atomic_load_explicit(&tib->tib_ptid, memory_order_relaxed);
if (me <= 0)
return;
word = atomic_load_explicit(&__maps.lock.word, memory_order_relaxed);
#if DEBUG_MAPS_LOCK
if (__deadlock_tracked(&__maps.lock) == 0)
__maps_panic("error: maps lock not owned by caller\n");
#endif
for (;;) {
if (MUTEX_DEPTH(word)) {
if (MUTEX_DEPTH(word))
if (atomic_compare_exchange_weak_explicit(
&__maps.lock.word, &word, MUTEX_DEC_DEPTH(word),
memory_order_relaxed, memory_order_relaxed))
break;
}
if (atomic_compare_exchange_weak_explicit(&__maps.lock.word, &word, 0,
memory_order_release,
memory_order_relaxed)) {
#if DEBUG_MAPS_LOCK
__deadlock_untrack(&__maps.lock);
#endif
memory_order_relaxed))
break;
}
}
}

View file

@ -5,6 +5,28 @@
#include "libc/runtime/runtime.h"
COSMOPOLITAN_C_START_
/* size of dynamic memory that is used internally by your memory manager */
#define MAPS_SIZE 65536
/* when map->hand is MAPS_RESERVATION it means mmap() is transactionally
reserving address space it is in the process of requesting from win32 */
#define MAPS_RESERVATION -2
/* when map->hand is MAPS_SUBREGION it means that an allocation has been
broken into multiple fragments by mprotect(). the first fragment must
be set to MAPS_VIRTUAL or your CreateFileMapping() handle. your frags
must be perfectly contiguous in memory and should have the same flags */
#define MAPS_SUBREGION -3
/* indicates an allocation was created by VirtualAlloc() and so munmap()
must call VirtualFree() when destroying it. use it on the hand field. */
#define MAPS_VIRTUAL -4
/* if this is used on MAP_PRIVATE memory, then it's assumed to be memory
that win32 allocated, e.g. a CreateThread() stack. if this is used on
MAP_FILE memory, then it's assumed to be part of the executable image */
#define MAP_NOFORK 0x10000000
#define MAP_TREE_CONTAINER(e) TREE_CONTAINER(struct Map, tree, e)
struct Map {
@ -12,9 +34,8 @@ struct Map {
size_t size; /* must be nonzero */
int64_t off; /* ignore for anon */
int flags; /* memory map flag */
char prot; /* memory protects */
short prot; /* memory protects */
bool iscow; /* windows nt only */
bool precious; /* windows nt only */
bool readonlyfile; /* windows nt only */
unsigned visited; /* checks and fork */
intptr_t hand; /* windows nt only */
@ -29,11 +50,17 @@ struct MapLock {
_Atomic(uint64_t) word;
};
struct MapSlab {
struct MapSlab *next;
struct Map maps[(MAPS_SIZE - sizeof(struct MapSlab *)) / sizeof(struct Map)];
};
struct Maps {
uint128_t rand;
struct Tree *maps;
struct MapLock lock;
_Atomic(uintptr_t) freed;
_Atomic(struct MapSlab *) slabs;
size_t count;
size_t pages;
struct Map stack;
@ -76,33 +103,37 @@ forceinline optimizespeed int __maps_search(const void *key,
return (addr > map->addr) - (addr < map->addr);
}
static inline struct Map *__maps_next(struct Map *map) {
dontinstrument static inline struct Map *__maps_next(struct Map *map) {
struct Tree *node;
if ((node = tree_next(&map->tree)))
return MAP_TREE_CONTAINER(node);
return 0;
}
static inline struct Map *__maps_prev(struct Map *map) {
dontinstrument static inline struct Map *__maps_prev(struct Map *map) {
struct Tree *node;
if ((node = tree_prev(&map->tree)))
return MAP_TREE_CONTAINER(node);
return 0;
}
static inline struct Map *__maps_first(void) {
dontinstrument static inline struct Map *__maps_first(void) {
struct Tree *node;
if ((node = tree_first(__maps.maps)))
return MAP_TREE_CONTAINER(node);
return 0;
}
static inline struct Map *__maps_last(void) {
dontinstrument static inline struct Map *__maps_last(void) {
struct Tree *node;
if ((node = tree_last(__maps.maps)))
return MAP_TREE_CONTAINER(node);
return 0;
}
static inline bool __maps_isalloc(struct Map *map) {
return map->hand != MAPS_SUBREGION;
}
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_MAPS_H_ */

View file

@ -19,6 +19,7 @@
#include "libc/calls/calls.h"
#include "libc/calls/internal.h"
#include "libc/calls/syscall-sysv.internal.h"
#include "libc/calls/syscall_support-nt.internal.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/intrin/atomic.h"
@ -32,6 +33,7 @@
#include "libc/intrin/weaken.h"
#include "libc/limits.h"
#include "libc/macros.h"
#include "libc/nt/enum/memflags.h"
#include "libc/nt/memory.h"
#include "libc/nt/runtime.h"
#include "libc/runtime/runtime.h"
@ -44,9 +46,10 @@
#include "libc/sysv/consts/prot.h"
#include "libc/sysv/errfuns.h"
#include "libc/thread/lock.h"
#include "libc/thread/thread.h"
#include "libc/thread/tls.h"
#define MMDEBUG 0
#define MMDEBUG 1
#define MAX_SIZE 0x0ff800000000ul
#define MAP_FIXED_NOREPLACE_linux 0x100000
@ -99,6 +102,31 @@ static bool __maps_overlaps(const char *addr, size_t size) {
return false;
}
// returns true if all fragments of all allocations which overlap
// [addr,addr+size) are completely contained by [addr,addr+size).
textwindows static bool __maps_envelops(const char *addr, size_t size) {
struct Map *map, *next;
size = PGUP(size);
if (!(map = __maps_floor(addr)))
if (!(map = __maps_first()))
return true;
do {
if (MAX(addr, map->addr) >= MIN(addr + size, map->addr + map->size))
break; // didn't overlap mapping
if (!__maps_isalloc(map))
return false; // didn't include first fragment of alloc
if (addr > map->addr)
return false; // excluded leading pages of first fragment
// set map to last fragment in allocation
for (; (next = __maps_next(map)) && !__maps_isalloc(next); map = next)
// fragments within an allocation must be perfectly contiguous
ASSERT(map->addr + map->size == next->addr);
if (addr + size < map->addr + PGUP(map->size))
return false; // excluded trailing pages of allocation
} while ((map = next));
return true;
}
void __maps_check(void) {
#if MMDEBUG
size_t maps = 0;
@ -130,17 +158,17 @@ static int __muntrack(char *addr, size_t size, struct Map **deleted,
size_t ti = 0;
struct Map *map;
struct Map *next;
struct Map *floor;
size = PGUP(size);
floor = __maps_floor(addr);
for (map = floor; map && map->addr <= addr + size; map = next) {
if (!(map = __maps_floor(addr)))
map = __maps_first();
for (; map && map->addr <= addr + size; map = next) {
next = __maps_next(map);
char *map_addr = map->addr;
size_t map_size = map->size;
if (!(MAX(addr, map_addr) < MIN(addr + size, map_addr + PGUP(map_size))))
continue;
if (addr <= map_addr && addr + size >= map_addr + PGUP(map_size)) {
if (map->precious)
if (map->hand == MAPS_RESERVATION)
continue;
// remove mapping completely
tree_remove(&__maps.maps, &map->tree);
@ -149,9 +177,6 @@ static int __muntrack(char *addr, size_t size, struct Map **deleted,
__maps.pages -= (map_size + __pagesize - 1) / __pagesize;
__maps.count -= 1;
__maps_check();
} else if (IsWindows()) {
STRACE("you can't carve up memory maps on windows ;_;");
rc = enotsup();
} else if (addr <= map_addr) {
// shave off lefthand side of mapping
ASSERT(addr + size < map_addr + PGUP(map_size));
@ -229,6 +254,7 @@ void __maps_free(struct Map *map) {
ASSERT(!TAG(map));
map->size = 0;
map->addr = MAP_FAILED;
map->hand = kNtInvalidHandleValue;
for (tip = atomic_load_explicit(&__maps.freed, memory_order_relaxed);;) {
map->freed = (struct Map *)PTR(tip);
if (atomic_compare_exchange_weak_explicit(
@ -261,11 +287,23 @@ static int __maps_destroy_all(struct Map *list) {
if (!IsWindows()) {
if (sys_munmap(map->addr, map->size))
rc = -1;
} else if (map->hand != -1) {
if (!UnmapViewOfFile(map->addr))
rc = -1;
if (!CloseHandle(map->hand))
rc = -1;
} else {
switch (map->hand) {
case MAPS_SUBREGION:
case MAPS_RESERVATION:
break;
case MAPS_VIRTUAL:
if (!VirtualFree(map->addr, 0, kNtMemRelease))
rc = __winerr();
break;
default:
ASSERT(map->hand > 0);
if (!UnmapViewOfFile(map->addr))
rc = -1;
if (!CloseHandle(map->hand))
rc = -1;
break;
}
}
}
return rc;
@ -345,10 +383,9 @@ void __maps_insert(struct Map *map) {
if (!map && left && right)
if (__maps_mergeable(left, right)) {
left->size = PGUP(left->size);
right->addr -= left->size;
right->size += left->size;
tree_remove(&__maps.maps, &left->tree);
__maps_free(left);
left->size += right->size;
tree_remove(&__maps.maps, &right->tree);
__maps_free(right);
__maps.count -= 1;
}
@ -369,7 +406,7 @@ bool __maps_track(char *addr, size_t size, int prot, int flags) {
map->size = size;
map->prot = prot;
map->flags = flags;
map->hand = -1;
map->hand = MAPS_VIRTUAL;
__maps_lock();
__maps_insert(map);
__maps_unlock();
@ -396,22 +433,23 @@ struct Map *__maps_alloc(void) {
return map;
pthread_pause_np();
}
int size = 65536;
// we're creating sudden surprise memory. the user might be in the
// middle of carefully planning a fixed memory structure. we don't
// want the system allocator to put our surprise memory inside it,
// and we also want to avoid the chances of accidentally unmapping
struct DirectMap sys =
sys_mmap(__maps_randaddr(), size, PROT_READ | PROT_WRITE,
sys_mmap(__maps_randaddr(), MAPS_SIZE, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (sys.addr == MAP_FAILED)
return 0;
map = sys.addr;
if (IsWindows())
CloseHandle(sys.maphandle);
for (int i = 1; i < size / sizeof(struct Map); ++i)
__maps_free(map + i);
return map;
struct MapSlab *slab = sys.addr;
while (!atomic_compare_exchange_weak(&__maps.slabs, &slab->next, slab)) {
}
for (size_t i = 1; i < ARRAYLEN(slab->maps); ++i)
__maps_free(&slab->maps[i]);
return &slab->maps[0];
}
static int __munmap(char *addr, size_t size) {
@ -429,13 +467,10 @@ static int __munmap(char *addr, size_t size) {
__maps_lock();
__maps_check();
// normalize size
// abort if size doesn't include all pages in granule
if (GRUP(size) > PGUP(size))
if (__maps_overlaps(addr + PGUP(size), GRUP(size) - PGUP(size))) {
__maps_unlock();
return einval();
}
// on windows we can only unmap whole allocations
if (IsWindows())
if (!__maps_envelops(addr, size))
return enotsup();
// untrack mappings
int rc;
@ -572,6 +607,11 @@ static void *__mmap_impl(char *addr, size_t size, int prot, int flags, int fd,
}
} else {
// remove existing mappings and their tracking objects
if (!__maps_envelops(addr, size)) {
__maps_unlock();
__maps_free(map);
return (void *)enotsup();
}
struct Map *deleted = 0;
if (__muntrack(addr, size, &deleted, 0, 0)) {
__maps_insert_all(deleted);
@ -592,8 +632,7 @@ static void *__mmap_impl(char *addr, size_t size, int prot, int flags, int fd,
map->size = size;
map->prot = 0;
map->flags = 0;
map->hand = -1;
map->precious = true;
map->hand = MAPS_RESERVATION;
__maps_insert(map);
__maps_unlock();
}
@ -610,7 +649,6 @@ static void *__mmap_impl(char *addr, size_t size, int prot, int flags, int fd,
__maps_lock();
tree_remove(&__maps.maps, &map->tree);
__maps.pages -= (map->size + __pagesize - 1) / __pagesize;
map->precious = false;
__maps_unlock();
if (errno == EADDRNOTAVAIL) {
// we've encountered mystery memory
@ -649,7 +687,6 @@ static void *__mmap_impl(char *addr, size_t size, int prot, int flags, int fd,
map->prot = prot;
map->flags = flags;
map->hand = res.maphandle;
map->precious = false;
if (IsWindows()) {
map->iscow = (flags & MAP_TYPE) != MAP_SHARED && fd != -1;
map->readonlyfile = (flags & MAP_TYPE) == MAP_SHARED && fd != -1 &&
@ -710,21 +747,6 @@ static void *__mmap(char *addr, size_t size, int prot, int flags, int fd,
static void *__mremap_impl(char *old_addr, size_t old_size, size_t new_size,
int flags, char *new_addr) {
// normalize and validate old size
// abort if size doesn't include all pages in granule
if (GRUP(old_size) > PGUP(old_size))
if (__maps_overlaps(old_addr + PGUP(old_size),
GRUP(old_size) - PGUP(old_size)))
return (void *)einval();
// validate new size
// abort if size doesn't include all pages in granule
if (flags & MREMAP_FIXED)
if (GRUP(new_size) > PGUP(new_size))
if (__maps_overlaps(new_addr + PGUP(new_size),
GRUP(new_size) - PGUP(new_size)))
return (void *)einval();
// allocate object for tracking new mapping
struct Map *map;
if (!(map = __maps_alloc()))
@ -787,6 +809,7 @@ static void *__mremap_impl(char *old_addr, size_t old_size, size_t new_size,
map->off = old_off;
map->prot = old_prot;
map->flags = old_flags;
map->hand = kNtInvalidHandleValue;
__maps_insert(map);
return res;
@ -945,8 +968,8 @@ static void *__mremap(char *old_addr, size_t old_size, size_t new_size,
*
* @raise ENOMEM if `RUSAGE_AS` or similar limits are exceeded
* @raise EEXIST if `flags` has `MAP_FIXED_NOREPLACE` and `addr` is used
* @raise ENOTSUP if interval overlapped without enveloping win32 alloc
* @raise EPERM if `addr` is null and `flags` has `MAP_FIXED`
* @raise ENOTSUP if memory map is cleaved on windows with `MAP_FIXED`
* @raise EINVAL if `addr` isn't granularity aligned with `MAP_FIXED`
* @raise EINVAL if `size` is zero
* @raise EINVAL if `flags` or `prot` hold invalid values
@ -1000,10 +1023,9 @@ void *mremap(void *old_addr, size_t old_size, size_t new_size, int flags, ...) {
*
* @return 0 on success, or -1 w/ errno.
* @raise ENOMEM if OOM happened when punching hole in existing mapping
* @raise ENOTSUP if memory map is cleaved on windows with `MAP_FIXED`
* @raise ENOTSUP if interval overlapped without enveloping win32 alloc
* @raise EDEADLK if called from signal handler interrupting mmap()
* @raise EINVAL if `addr` isn't granularity aligned
* @raise EINVAL if `size` didn't include all pages in granule
*/
int munmap(void *addr, size_t size) {
int rc = __munmap(addr, size);

View file

@ -108,7 +108,7 @@ int __mprotect(char *addr, size_t size, int prot) {
leftmap->hand = map->hand;
map->addr += left;
map->size = right;
map->hand = -1;
map->hand = MAPS_SUBREGION;
if (!(map->flags & MAP_ANONYMOUS))
map->off += left;
tree_insert(&__maps.maps, &leftmap->tree, __maps_compare);
@ -139,7 +139,7 @@ int __mprotect(char *addr, size_t size, int prot) {
map->addr += left;
map->size = right;
map->prot = prot;
map->hand = -1;
map->hand = MAPS_SUBREGION;
if (!(map->flags & MAP_ANONYMOUS))
map->off += left;
tree_insert(&__maps.maps, &leftmap->tree, __maps_compare);
@ -175,10 +175,10 @@ int __mprotect(char *addr, size_t size, int prot) {
midlmap->off = (map->flags & MAP_ANONYMOUS) ? 0 : map->off + left;
midlmap->prot = prot;
midlmap->flags = map->flags;
midlmap->hand = -1;
midlmap->hand = MAPS_SUBREGION;
map->addr += left + middle;
map->size = right;
map->hand = -1;
map->hand = MAPS_SUBREGION;
if (!(map->flags & MAP_ANONYMOUS))
map->off += left + middle;
tree_insert(&__maps.maps, &leftmap->tree, __maps_compare);

View file

@ -23,6 +23,7 @@
#include "libc/runtime/runtime.h"
#include "libc/stdio/sysparam.h"
#include "libc/sysv/consts/auxv.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/errfuns.h"
textwindows int sys_msync_nt(char *addr, size_t size, int flags) {
@ -35,14 +36,32 @@ textwindows int sys_msync_nt(char *addr, size_t size, int flags) {
int rc = 0;
__maps_lock();
struct Map *map, *floor;
floor = __maps_floor(addr);
for (map = floor; map && map->addr <= addr + size; map = __maps_next(map)) {
char *beg = MAX(addr, map->addr);
char *end = MIN(addr + size, map->addr + map->size);
if (beg < end)
if (!FlushViewOfFile(beg, end - beg))
rc = -1;
struct Map *map, *next;
if (!(map = __maps_floor(addr)))
if (!(map = __maps_first()))
return true;
for (; map; map = next) {
next = __maps_next(map);
if (!__maps_isalloc(map))
continue;
if (map->flags & MAP_ANONYMOUS)
continue;
if (MAX(addr, map->addr) >= MIN(addr + size, map->addr + map->size))
break; // didn't overlap mapping
// get true size of win32 allocation
size_t allocsize = map->size;
for (struct Map *map2 = next; map2; map2 = __maps_next(map2)) {
if (!__maps_isalloc(map2) && map->addr + allocsize == map2->addr) {
allocsize += map2->size;
} else {
break;
}
}
// perform the flush
if (!FlushViewOfFile(map->addr, allocsize))
rc = -1;
// TODO(jart): FlushFileBuffers too on g_fds handle if MS_SYNC?
}
__maps_unlock();

View file

@ -16,6 +16,7 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/dce.h"
#include "libc/fmt/conv.h"
#include "libc/fmt/itoa.h"
#include "libc/intrin/bsr.h"
@ -51,13 +52,14 @@ void __print_maps(size_t limit) {
char mappingbuf[8];
struct Map *last = 0;
int pagesz = __pagesize;
int gransz = __gransize;
int digs = get_address_digits(pagesz);
for (struct Tree *e = tree_first(__maps.maps); e; e = tree_next(e)) {
struct Map *map = MAP_TREE_CONTAINER(e);
// show gaps between maps
if (last) {
char *beg = last->addr + ((last->size + pagesz - 1) & -pagesz);
char *beg = last->addr + ((last->size + gransz - 1) & -gransz);
char *end = map->addr;
if (end > beg) {
size_t gap = end - beg;
@ -72,8 +74,21 @@ void __print_maps(size_t limit) {
_DescribeMapping(mappingbuf, map->prot, map->flags));
sizefmt(sb, map->size, 1024);
kprintf(" %!sb", sb);
if (map->hand && map->hand != -1)
kprintf(" hand=%ld", map->hand);
if (IsWindows()) {
switch (map->hand) {
case MAPS_RESERVATION:
kprintf(" reservation");
break;
case MAPS_SUBREGION:
break;
case MAPS_VIRTUAL:
kprintf(" virtual");
break;
default:
kprintf(" hand=%ld", map->hand);
break;
}
}
if (map->iscow)
kprintf(" cow");
if (map->readonlyfile)

View file

@ -23,6 +23,7 @@
#include "libc/nt/enum/memflags.h"
#include "libc/nt/memory.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/sysparam.h"
#include "libc/str/str.h"
static const struct DescribeFlags kNtMemState[] = {
@ -46,20 +47,25 @@ const char *DescribeNtMemType(char buf[64], uint32_t x) {
return _DescribeFlags(buf, 64, kNtMemType, ARRAYLEN(kNtMemType), "kNtMem", x);
}
void __print_maps_win32(void) {
void __print_maps_win32(int64_t hProcess, const char *addr, size_t size) {
char *p, b[5][64];
struct NtMemoryBasicInformation mi;
kprintf("%-12s %-12s %10s %16s %16s %32s %32s\n", "Allocation", "BaseAddress",
"RegionSize", "State", "Type", "AllocationProtect", "Protect");
for (p = 0;; p = (char *)mi.BaseAddress + mi.RegionSize) {
bzero(&mi, sizeof(mi));
if (!VirtualQuery(p, &mi, sizeof(mi)))
if (!VirtualQueryEx(hProcess, p, &mi, sizeof(mi)))
break;
sizefmt(b[0], mi.RegionSize, 1024);
kprintf("%.12lx %.12lx %10s %16s %16s %32s %32s\n", mi.AllocationBase,
kprintf("%.12lx %.12lx %10s %16s %16s %32s %32s%s\n", mi.AllocationBase,
mi.BaseAddress, b[0], DescribeNtMemState(b[1], mi.State),
DescribeNtMemType(b[2], mi.Type),
_DescribeNtPageFlags(b[3], mi.AllocationProtect),
_DescribeNtPageFlags(b[4], mi.Protect));
_DescribeNtPageFlags(b[4], mi.Protect),
(mi.State != kNtMemFree &&
MAX(addr, (const char *)mi.BaseAddress) <
MIN(addr + size, (const char *)mi.BaseAddress + mi.RegionSize))
? " [OVERLAPS]"
: "");
}
}

View file

@ -16,6 +16,7 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/atomic.h"
#include "libc/str/str.h"
#include "libc/thread/lock.h"
#include "libc/thread/posixthread.internal.h"
@ -25,11 +26,13 @@
* Unlocks mutex from child process after fork.
*/
int _pthread_mutex_wipe_np(pthread_mutex_t *mutex) {
void *edges = mutex->_edges;
uint64_t word = mutex->_word;
bzero(mutex, sizeof(*mutex));
mutex->_word = MUTEX_UNLOCK(word);
mutex->_edges = edges;
atomic_init(&mutex->_word, MUTEX_UNLOCK(atomic_load_explicit(
&mutex->_word, memory_order_relaxed)));
atomic_init(&mutex->_futex, 0);
mutex->_pid = 0;
mutex->_nsync[0] = 0;
atomic_signal_fence(memory_order_relaxed); // avoid xmm
mutex->_nsync[1] = 0;
return 0;
}

View file

@ -47,28 +47,30 @@
* @asyncsignalsafe
*/
errno_t pthread_setcancelstate(int state, int *oldstate) {
int old;
errno_t err;
struct PosixThread *pt;
if (__tls_enabled && (pt = _pthread_self())) {
if (pt->pt_flags & PT_NOCANCEL) {
old = PTHREAD_CANCEL_DISABLE;
} else if (pt->pt_flags & PT_MASKED) {
old = PTHREAD_CANCEL_MASKED;
} else {
old = PTHREAD_CANCEL_ENABLE;
}
switch (state) {
case PTHREAD_CANCEL_ENABLE:
case PTHREAD_CANCEL_DISABLE:
case PTHREAD_CANCEL_MASKED:
if (oldstate) {
if (pt->pt_flags & PT_NOCANCEL) {
*oldstate = PTHREAD_CANCEL_DISABLE;
} else if (pt->pt_flags & PT_MASKED) {
*oldstate = PTHREAD_CANCEL_MASKED;
} else {
*oldstate = PTHREAD_CANCEL_ENABLE;
}
}
pt->pt_flags &= ~(PT_NOCANCEL | PT_MASKED);
if (state == PTHREAD_CANCEL_MASKED) {
pt->pt_flags |= PT_MASKED;
} else if (state == PTHREAD_CANCEL_DISABLE) {
pt->pt_flags |= PT_NOCANCEL;
}
err = 0;
break;
case PTHREAD_CANCEL_DISABLE:
pt->pt_flags &= ~(PT_NOCANCEL | PT_MASKED);
pt->pt_flags |= PT_NOCANCEL;
err = 0;
break;
case PTHREAD_CANCEL_MASKED:
pt->pt_flags &= ~(PT_NOCANCEL | PT_MASKED);
pt->pt_flags |= PT_MASKED;
err = 0;
break;
default:
@ -76,11 +78,12 @@ errno_t pthread_setcancelstate(int state, int *oldstate) {
break;
}
} else {
if (oldstate) {
*oldstate = 0;
}
old = 0;
err = 0;
}
if (!err)
if (oldstate)
*oldstate = old;
#if IsModeDbg() && 0
STRACE("pthread_setcancelstate(%s, [%s]) → %s",
DescribeCancelState(0, &state), DescribeCancelState(err, oldstate),

View file

@ -28,7 +28,19 @@
static int _rand64_pid;
static unsigned __int128 _rand64_pool;
pthread_mutex_t __rand64_lock_obj = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t __rand64_lock_obj = PTHREAD_MUTEX_INITIALIZER;
void __rand64_lock(void) {
_pthread_mutex_lock(&__rand64_lock_obj);
}
void __rand64_unlock(void) {
_pthread_mutex_unlock(&__rand64_lock_obj);
}
void __rand64_wipe(void) {
_pthread_mutex_wipe_np(&__rand64_lock_obj);
}
/**
* Returns nondeterministic random data.
@ -43,7 +55,7 @@ pthread_mutex_t __rand64_lock_obj = PTHREAD_MUTEX_INITIALIZER;
uint64_t _rand64(void) {
void *p;
uint128_t s;
_pthread_mutex_lock(&__rand64_lock_obj);
__rand64_lock();
if (__pid == _rand64_pid) {
s = _rand64_pool; // normal path
} else {
@ -64,6 +76,6 @@ uint64_t _rand64(void) {
_rand64_pid = __pid;
}
_rand64_pool = (s *= 15750249268501108917ull); // lemur64
_pthread_mutex_unlock(&__rand64_lock_obj);
__rand64_unlock();
return s >> 64;
}

View file

@ -54,7 +54,8 @@ struct Tree *tree_prev(struct Tree *node) {
return parent;
}
static void tree_rotate_left(struct Tree **root, struct Tree *x) {
dontinstrument static void tree_rotate_left(struct Tree **root,
struct Tree *x) {
struct Tree *y = x->right;
x->right = tree_get_left(y);
if (tree_get_left(y))
@ -71,7 +72,8 @@ static void tree_rotate_left(struct Tree **root, struct Tree *x) {
x->parent = y;
}
static void tree_rotate_right(struct Tree **root, struct Tree *y) {
dontinstrument static void tree_rotate_right(struct Tree **root,
struct Tree *y) {
struct Tree *x = tree_get_left(y);
tree_set_left(y, x->right);
if (x->right)
@ -88,7 +90,8 @@ static void tree_rotate_right(struct Tree **root, struct Tree *y) {
x->right = y;
}
static void tree_rebalance_insert(struct Tree **root, struct Tree *node) {
dontinstrument static void tree_rebalance_insert(struct Tree **root,
struct Tree *node) {
struct Tree *uncle;
tree_set_red(node, 1);
while (node != *root && tree_get_red(node->parent)) {
@ -157,8 +160,8 @@ void tree_insert(struct Tree **root, struct Tree *node, tree_cmp_f *cmp) {
}
}
static void tree_transplant(struct Tree **root, struct Tree *u,
struct Tree *v) {
dontinstrument static void tree_transplant(struct Tree **root, struct Tree *u,
struct Tree *v) {
if (!u->parent) {
*root = v;
} else if (u == tree_get_left(u->parent)) {
@ -170,8 +173,9 @@ static void tree_transplant(struct Tree **root, struct Tree *u,
v->parent = u->parent;
}
static void tree_rebalance_remove(struct Tree **root, struct Tree *node,
struct Tree *parent) {
dontinstrument static void tree_rebalance_remove(struct Tree **root,
struct Tree *node,
struct Tree *parent) {
struct Tree *sibling;
while (node != *root && (!node || !tree_get_red(node))) {
if (node == tree_get_left(parent)) {

View file

@ -0,0 +1,50 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2024 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/syscall_support-nt.internal.h"
#include "libc/intrin/describeflags.h"
#include "libc/intrin/strace.h"
#include "libc/macros.h"
#include "libc/mem/alloca.h"
#include "libc/nt/enum/memflags.h"
#include "libc/nt/memory.h"
#include "libc/nt/thunk/msabi.h"
__msabi extern typeof(VirtualAllocEx) *const __imp_VirtualAllocEx;
static const char *DescribeAllocationType(char buf[48], uint32_t x) {
const struct DescribeFlags kAllocationTypeFlags[] = {
{kNtMemCommit, "Commit"}, //
{kNtMemReserve, "Reserve"}, //
{kNtMemReset, "Reset"}, //
};
return _DescribeFlags(buf, 48, kAllocationTypeFlags,
ARRAYLEN(kAllocationTypeFlags), "kNtMem", x);
}
void *VirtualAllocEx(int64_t hProcess, void *lpAddress, uint64_t dwSize,
uint32_t flAllocationType, uint32_t flProtect) {
void *res = __imp_VirtualAllocEx(hProcess, lpAddress, dwSize,
flAllocationType, flProtect);
if (!res)
__winerr();
NTTRACE("VirtualAllocEx(%ld, %p, %'lu, %s, %s) → %p% m", hProcess, lpAddress,
dwSize, DescribeAllocationType(alloca(48), flAllocationType),
DescribeNtPageFlags(flProtect), res);
return res;
}

View file

@ -16,13 +16,8 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/syscall_support-nt.internal.h"
#include "libc/intrin/describeflags.h"
#include "libc/intrin/strace.h"
#include "libc/log/libfatal.internal.h"
#include "libc/nt/memory.h"
__msabi extern typeof(VirtualProtect) *const __imp_VirtualProtect;
#include "libc/nt/runtime.h"
/**
* Protects memory on the New Technology.
@ -31,12 +26,6 @@ __msabi extern typeof(VirtualProtect) *const __imp_VirtualProtect;
textwindows bool32 VirtualProtect(void *lpAddress, uint64_t dwSize,
uint32_t flNewProtect,
uint32_t *lpflOldProtect) {
bool32 bOk;
bOk = __imp_VirtualProtect(lpAddress, dwSize, flNewProtect, lpflOldProtect);
if (!bOk)
__winerr();
NTTRACE("VirtualProtect(%p, %'zu, %s, [%s]) → %hhhd% m", lpAddress, dwSize,
DescribeNtPageFlags(flNewProtect),
DescribeNtPageFlags(*lpflOldProtect), bOk);
return bOk;
return VirtualProtectEx(GetCurrentProcess(), lpAddress, dwSize, flNewProtect,
lpflOldProtect);
}

View file

@ -0,0 +1,43 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/syscall_support-nt.internal.h"
#include "libc/intrin/describeflags.h"
#include "libc/intrin/strace.h"
#include "libc/log/libfatal.internal.h"
#include "libc/nt/memory.h"
__msabi extern typeof(VirtualProtectEx) *const __imp_VirtualProtectEx;
/**
* Protects memory on the New Technology.
* @note this wrapper takes care of ABI, STRACE(), and __winerr()
*/
textwindows bool32 VirtualProtectEx(int64_t hProcess, void *lpAddress,
uint64_t dwSize, uint32_t flNewProtect,
uint32_t *lpflOldProtect) {
bool32 bOk;
bOk = __imp_VirtualProtectEx(hProcess, lpAddress, dwSize, flNewProtect,
lpflOldProtect);
if (!bOk)
__winerr();
NTTRACE("VirtualProtectEx(%ld, %p, %'zu, %s, [%s]) → %hhhd% m", hProcess,
lpAddress, dwSize, DescribeNtPageFlags(flNewProtect),
DescribeNtPageFlags(*lpflOldProtect), bOk);
return bOk;
}

View file

@ -35,8 +35,8 @@ textwindows dontinstrument void __bootstrap_tls(struct CosmoTib *tib,
tib->tib_self = tib;
tib->tib_self2 = tib;
tib->tib_sigmask = -1;
tib->tib_strace = __strace;
tib->tib_ftrace = __ftrace;
tib->tib_strace = -100;
tib->tib_ftrace = -100;
tib->tib_sigstack_size = 57344;
tib->tib_sigstack_addr = bp - 57344;
int tid = __imp_GetCurrentThreadId();

View file

@ -0,0 +1,36 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2024 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/syscall_support-nt.internal.h"
#include "libc/intrin/strace.h"
#include "libc/nt/memory.h"
#include "libc/nt/thunk/msabi.h"
__msabi extern typeof(WriteProcessMemory) *const __imp_WriteProcessMemory;
bool32 WriteProcessMemory(int64_t hProcess, void *lpBaseAddress,
const void *lpBuffer, uint64_t nSize,
uint64_t *opt_out_lpNumberOfBytesWritten) {
bool32 ok = __imp_WriteProcessMemory(hProcess, lpBaseAddress, lpBuffer, nSize,
opt_out_lpNumberOfBytesWritten);
if (!ok)
__winerr();
NTTRACE("WriteProcessMemory(%ld, %p, %p, %'lu, %p) → %hhhd% m", hProcess,
lpBaseAddress, lpBuffer, nSize, opt_out_lpNumberOfBytesWritten, ok);
return ok;
}

View file

@ -18,8 +18,6 @@
*/
#include "libc/thread/tls.h"
#ifdef __x86_64__
char __tls_enabled;
#endif
#ifndef __x86_64__
unsigned __tls_index;
#endif

View file

@ -1,18 +1,2 @@
#include "libc/nt/codegen.h"
.imp kernel32,__imp_VirtualAllocEx,VirtualAllocEx
.text.windows
.ftrace1
VirtualAllocEx:
.ftrace2
#ifdef __x86_64__
push %rbp
mov %rsp,%rbp
mov __imp_VirtualAllocEx(%rip),%rax
jmp __sysv2nt6
#elif defined(__aarch64__)
mov x0,#0
ret
#endif
.endfn VirtualAllocEx,globl
.previous

View file

@ -0,0 +1,2 @@
#include "libc/nt/codegen.h"
.imp kernel32,__imp_VirtualProtectEx,VirtualProtectEx

View file

@ -0,0 +1,18 @@
#include "libc/nt/codegen.h"
.imp kernel32,__imp_VirtualQueryEx,VirtualQueryEx
.text.windows
.ftrace1
VirtualQueryEx:
.ftrace2
#ifdef __x86_64__
push %rbp
mov %rsp,%rbp
mov __imp_VirtualQueryEx(%rip),%rax
jmp __sysv2nt
#elif defined(__aarch64__)
mov x0,#0
ret
#endif
.endfn VirtualQueryEx,globl
.previous

View file

@ -0,0 +1,2 @@
#include "libc/nt/codegen.h"
.imp kernel32,__imp_WriteProcessMemory,WriteProcessMemory

View file

@ -9,6 +9,7 @@
# KERNEL32.DLL
#
# Name Actual DLL Arity
imp '' CreateDirectoryW kernel32 2
imp '' CreateFileA kernel32 7
imp '' CreateFileMappingNumaW kernel32 7
@ -40,9 +41,12 @@ imp '' SetCurrentDirectoryW kernel32 1
imp '' TerminateProcess kernel32 2
imp '' UnlockFileEx kernel32 5
imp '' UnmapViewOfFile kernel32 1
imp '' VirtualAllocEx kernel32 5
imp '' VirtualProtect kernel32 4
imp '' VirtualProtectEx kernel32 5
imp '' WaitForMultipleObjects kernel32 4
imp '' WaitForSingleObject kernel32 2
imp '' WriteProcessMemory kernel32 5
imp 'AcquireSRWLockExclusive' AcquireSRWLockExclusive kernel32 1
imp 'AcquireSRWLockShared' AcquireSRWLockShared kernel32 1
imp 'AddDllDirectory' AddDllDirectory kernel32 1
@ -185,8 +189,8 @@ imp 'GetWindowsDirectory' GetWindowsDirectoryW kernel32 2
imp 'GetWindowsDirectoryA' GetWindowsDirectoryA kernel32 2
imp 'GlobalAlloc' GlobalAlloc kernel32 2
imp 'GlobalFree' GlobalFree kernel32 1
imp 'GlobalMemoryStatusEx' GlobalMemoryStatusEx kernel32 1
imp 'GlobalLock' GlobalLock kernel32 1
imp 'GlobalMemoryStatusEx' GlobalMemoryStatusEx kernel32 1
imp 'GlobalUnlock' GlobalUnlock kernel32 1
imp 'HeapAlloc' HeapAlloc kernel32 3
imp 'HeapCompact' HeapCompact kernel32 2
@ -300,10 +304,10 @@ imp 'UnmapViewOfFile2' UnmapViewOfFile2 kernel32 2
imp 'UnmapViewOfFileEx' UnmapViewOfFileEx kernel32 3
imp 'UpdateProcThreadAttribute' UpdateProcThreadAttribute kernel32 7
imp 'VirtualAlloc' VirtualAlloc kernel32 4
imp 'VirtualAllocEx' VirtualAllocEx kernel32 5
imp 'VirtualFree' VirtualFree kernel32 3
imp 'VirtualLock' VirtualLock kernel32 2
imp 'VirtualQuery' VirtualQuery kernel32 3
imp 'VirtualQueryEx' VirtualQueryEx kernel32 4
imp 'VirtualUnlock' VirtualUnlock kernel32 2
imp 'WaitForMultipleObjectsEx' WaitForMultipleObjectsEx kernel32 5
imp 'WaitForSingleObjectEx' WaitForSingleObjectEx kernel32 3

View file

@ -71,8 +71,17 @@ bool32 VirtualUnlock(const void *lpAddress, size_t dwSize);
uint64_t VirtualQuery(const void *lpAddress,
struct NtMemoryBasicInformation *lpBuffer,
uint64_t dwLength);
uint64_t VirtualQueryEx(int64_t hProcess, const void *lpAddress,
struct NtMemoryBasicInformation *lpBuffer,
uint64_t dwLength);
void *VirtualAllocEx(int64_t hProcess, void *lpAddress, uint64_t dwSize,
uint32_t flAllocationType, uint32_t flProtect);
bool32 VirtualProtectEx(int64_t hProcess, void *lpAddress, uint64_t dwSize,
uint32_t flNewProtect, uint32_t *out_lpflOldProtect);
bool32 WriteProcessMemory(int64_t hProcess, void *lpBaseAddress,
const void *lpBuffer, uint64_t nSize,
uint64_t *opt_out_lpNumberOfBytesWritten);
int64_t GetProcessHeap(void);
void *HeapAlloc(int64_t hHeap, uint32_t dwFlags, size_t dwBytes) __wur;

View file

@ -16,61 +16,53 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "ape/sections.internal.h"
#include "libc/assert.h"
#include "libc/atomic.h"
#include "libc/calls/internal.h"
#include "libc/calls/sig.internal.h"
#include "libc/calls/state.internal.h"
#include "libc/calls/syscall_support-nt.internal.h"
#include "libc/errno.h"
#include "libc/fmt/itoa.h"
#include "libc/intrin/atomic.h"
#include "libc/intrin/directmap.h"
#include "libc/intrin/dll.h"
#include "libc/intrin/kprintf.h"
#include "libc/intrin/maps.h"
#include "libc/intrin/strace.h"
#include "libc/intrin/tree.h"
#include "libc/intrin/weaken.h"
#include "libc/limits.h"
#include "libc/macros.h"
#include "libc/nt/createfile.h"
#include "libc/nt/enum/accessmask.h"
#include "libc/nt/enum/creationdisposition.h"
#include "libc/nt/enum/filemapflags.h"
#include "libc/nt/enum/memflags.h"
#include "libc/nt/enum/pageflags.h"
#include "libc/nt/enum/processcreationflags.h"
#include "libc/nt/enum/startf.h"
#include "libc/nt/errors.h"
#include "libc/nt/ipc.h"
#include "libc/nt/memory.h"
#include "libc/nt/process.h"
#include "libc/nt/runtime.h"
#include "libc/nt/signals.h"
#include "libc/nt/struct/ntexceptionpointers.h"
#include "libc/nt/struct/processinformation.h"
#include "libc/nt/struct/startupinfo.h"
#include "libc/nt/thread.h"
#include "libc/nt/thunk/msabi.h"
#include "libc/proc/ntspawn.h"
#include "libc/nt/winsock.h"
#include "libc/proc/proc.internal.h"
#include "libc/runtime/internal.h"
#include "libc/runtime/memtrack.internal.h"
#include "libc/runtime/runtime.h"
#include "libc/runtime/stack.h"
#include "libc/runtime/symbols.internal.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/at.h"
#include "libc/sysv/consts/limits.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/prot.h"
#include "libc/sysv/consts/sig.h"
#include "libc/sysv/errfuns.h"
#include "libc/thread/itimer.h"
#include "libc/thread/posixthread.internal.h"
#include "libc/thread/tls.h"
#ifdef __x86_64__
extern long __klog_handle;
__msabi extern typeof(GetCurrentProcessId) *const __imp_GetCurrentProcessId;
extern bool __winmain_isfork;
extern intptr_t __winmain_jmpbuf[5];
extern struct CosmoTib *__winmain_tib;
static textwindows wontreturn void AbortFork(const char *func, void *addr) {
__msabi extern typeof(TlsAlloc) *const __imp_TlsAlloc;
__msabi extern typeof(MapViewOfFileEx) *const __imp_MapViewOfFileEx;
__msabi extern typeof(VirtualProtectEx) *const __imp_VirtualProtectEx;
textwindows wontreturn static void AbortFork(const char *func, void *addr) {
#if SYSDEBUG
kprintf("fork() %!s(%lx) failed with win32 error %u\n", func, addr,
GetLastError());
@ -78,93 +70,10 @@ static textwindows wontreturn void AbortFork(const char *func, void *addr) {
TerminateThisProcess(SIGSTKFLT);
}
static textwindows char16_t *ParseInt(char16_t *p, int64_t *x) {
*x = 0;
while (*p == ' ')
p++;
while ('0' <= *p && *p <= '9') {
*x *= 10;
*x += *p++ - '0';
}
return p;
}
static inline textwindows ssize_t ForkIo(int64_t h, char *p, size_t n,
bool32 (*f)(int64_t, void *, uint32_t,
uint32_t *,
struct NtOverlapped *)) {
size_t i;
uint32_t x;
for (i = 0; i < n; i += x) {
if (!f(h, p + i, n - i, &x, 0))
return __winerr();
if (!x)
break;
}
return i;
}
static dontinline textwindows ssize_t ForkIo2(
int64_t h, void *buf, size_t n,
bool32 (*fn)(int64_t, void *, uint32_t, uint32_t *, struct NtOverlapped *),
const char *sf, bool ischild) {
ssize_t rc = ForkIo(h, buf, n, fn);
if (ischild) {
// prevent crashes
__tls_enabled = false;
__pid = __imp_GetCurrentProcessId();
__klog_handle = 0;
__maps.maps = 0;
}
NTTRACE("%s(%ld, %p, %'zu) → %'zd% m", sf, h, buf, n, rc);
return rc;
}
static dontinline textwindows bool WriteAll(int64_t h, void *buf, size_t n) {
bool ok;
ok = ForkIo2(h, buf, n, (void *)WriteFile, "WriteFile", false) != -1;
if (!ok)
STRACE("fork() failed in parent due to WriteAll(%ld, %p, %'zu) → %u", h,
buf, n, GetLastError());
return ok;
}
static textwindows dontinline void ReadOrDie(int64_t h, void *buf, size_t n) {
ssize_t got;
if ((got = ForkIo2(h, buf, n, ReadFile, "ReadFile", true)) == -1)
AbortFork("ReadFile1", buf);
if (got != n)
AbortFork("ReadFile2", buf);
}
static textwindows int64_t MapOrDie(uint32_t prot, uint64_t size) {
int64_t h;
for (;;) {
if ((h = CreateFileMapping(-1, 0, prot, size >> 32, size, 0)))
return h;
if (GetLastError() == kNtErrorAccessDenied) {
switch (prot) {
case kNtPageExecuteWritecopy:
prot = kNtPageWritecopy;
continue;
case kNtPageExecuteReadwrite:
prot = kNtPageReadwrite;
continue;
case kNtPageExecuteRead:
prot = kNtPageReadonly;
continue;
default:
break;
}
}
AbortFork("MapOrDie", (void *)size);
}
}
static textwindows void ViewOrDie(int64_t h, uint32_t access, size_t pos,
textwindows static void ViewOrDie(int64_t h, uint32_t access, size_t pos,
size_t size, void *base) {
TryAgain:
if (!MapViewOfFileEx(h, access, pos >> 32, pos, size, base)) {
if (!__imp_MapViewOfFileEx(h, access, pos >> 32, pos, size, base)) {
if ((access & kNtFileMapExecute) &&
GetLastError() == kNtErrorAccessDenied) {
access &= ~kNtFileMapExecute;
@ -174,302 +83,215 @@ TryAgain:
}
}
static __msabi textwindows int OnForkCrash(struct NtExceptionPointers *ep) {
kprintf("error: fork() child crashed!%n"
"\tExceptionCode = %#x%n"
"\tRip = %x%n",
ep->ExceptionRecord->ExceptionCode,
ep->ContextRecord ? ep->ContextRecord->Rip : -1);
TerminateThisProcess(SIGSTKFLT);
}
textwindows static void sys_fork_nt_child(void) {
static textwindows void *Malloc(size_t size) {
return HeapAlloc(GetProcessHeap(), 0, size);
}
// setup runtime
__klog_handle = 0;
__tls_index = __imp_TlsAlloc();
__set_tls_win32(__winmain_tib);
__tls_enabled = true;
textwindows void WinMainForked(void) {
intptr_t jb[5];
int64_t reader;
int64_t savetsc;
uint32_t varlen;
atomic_ulong *sigproc;
char16_t fvar[21 + 1 + 21 + 1];
struct Fds *fds = __veil("r", &g_fds);
// resurrect shared memory mappings
struct Map *next;
for (struct Map *map = __maps_first(); map; map = next) {
next = __maps_next(map);
// save signal pointer
sigproc = __sig.process;
// check to see if the process was actually forked
// this variable should have the pipe handle numba
varlen = GetEnvironmentVariable(u"_FORK", fvar, ARRAYLEN(fvar));
if (!varlen || varlen >= ARRAYLEN(fvar))
return;
/* STRACE("WinMainForked()"); */
SetEnvironmentVariable(u"_FORK", NULL);
#if SYSDEBUG
int64_t oncrash = AddVectoredExceptionHandler(1, (void *)OnForkCrash);
#endif
ParseInt(fvar, &reader);
// read the cpu state from the parent process & plus
ReadOrDie(reader, jb, sizeof(jb));
// read memory mappings from parent process
struct Tree *maps = 0;
for (;;) {
struct Map *map = Malloc(sizeof(struct Map));
ReadOrDie(reader, map, sizeof(struct Map));
if (map->addr == MAP_FAILED)
break;
tree_insert(&maps, &map->tree, __maps_compare);
}
// map memory into process
int granularity = __gransize;
for (struct Tree *e = tree_first(maps); e; e = tree_next(e)) {
struct Map *map = MAP_TREE_CONTAINER(e);
if ((uintptr_t)map->addr & (granularity - 1))
continue;
// get true length in case mprotect() chopped up actual win32 map
size_t size = map->size;
for (struct Tree *e2 = tree_next(e); e2; e2 = tree_next(e2)) {
struct Map *map2 = MAP_TREE_CONTAINER(e2);
if (map2->hand == -1 && map->addr + size == map2->addr) {
size += map2->size;
} else {
break;
// cleanup nofork mappings
if (map->flags & MAP_NOFORK) {
if ((map->flags & MAP_TYPE) != MAP_FILE) {
tree_remove(&__maps.maps, &map->tree);
__maps.pages -= (map->size + __pagesize - 1) / __pagesize;
__maps.count -= 1;
__maps_free(map);
}
continue;
}
// obtain the most permissive access possible
unsigned prot, access;
if (map->readonlyfile) {
prot = kNtPageExecuteRead;
access = kNtFileMapRead | kNtFileMapExecute;
} else {
prot = kNtPageExecuteReadwrite;
access = kNtFileMapWrite | kNtFileMapExecute;
}
// private maps already copied/protected to child by parent
if ((map->flags & MAP_TYPE) != MAP_SHARED) {
// we don't need to close the map handle because sys_mmap_nt
// doesn't mark it inheritable across fork() for MAP_PRIVATE
map->hand = MapOrDie(prot, size);
ViewOrDie(map->hand, access, 0, size, map->addr);
ReadOrDie(reader, map->addr, size);
} else {
// we can however safely inherit MAP_SHARED with zero copy
ViewOrDie(map->hand, access, map->off, size, map->addr);
// it's not copy-on-write anymore
map->iscow = false;
// but it used VirtualAlloc() so munmap() must VirtualFree()
if (map->hand > 0) {
CloseHandle(map->hand);
map->hand = MAPS_VIRTUAL;
}
continue;
}
}
// read the .data and .bss program image sections
savetsc = kStartTsc;
ReadOrDie(reader, __data_start, __data_end - __data_start);
ReadOrDie(reader, __bss_start, __bss_end - __bss_start);
kStartTsc = savetsc;
__tls_enabled = false;
// handle granularity aligned shared mapping
if (__maps_isalloc(map)) {
// fixup memory manager
__maps.maps = 0;
__maps.freed = 0;
__maps.count = 0;
__maps.pages = 0;
for (struct Tree *e = tree_first(maps); e; e = tree_next(e)) {
struct Map *map = MAP_TREE_CONTAINER(e);
__maps.count += 1;
__maps.pages += (map->size + __pagesize - 1) / __pagesize;
// get true size of win32 allocation
size_t allocsize = map->size;
for (struct Map *map2 = next; map2; map2 = __maps_next(map2)) {
if (!__maps_isalloc(map2) && map->addr + allocsize == map2->addr) {
allocsize += map2->size;
} else {
break;
}
}
// create allocation with most permissive access possible
// if we don't create as rwx then we can't mprotect(rwx) later
unsigned access;
if (map->readonlyfile) {
access = kNtFileMapRead | kNtFileMapExecute;
} else {
access = kNtFileMapWrite | kNtFileMapExecute;
}
// resurrect copyless memory via inherited win32 handle
ViewOrDie(map->hand, access, map->off, allocsize, map->addr);
}
// restore memory protection status on pages
unsigned old_protect;
if (!VirtualProtect(map->addr, map->size, __prot2nt(map->prot, map->iscow),
&old_protect))
AbortFork("VirtualProtect", map->addr);
if (!__imp_VirtualProtectEx(GetCurrentProcess(), map->addr, map->size,
__prot2nt(map->prot, false), &old_protect))
AbortFork("VirtualProtectEx", map->addr);
}
__maps.maps = maps;
__maps_init();
// mitosis complete
if (!CloseHandle(reader))
AbortFork("CloseHandle", (void *)reader);
// function tracing is now safe
ftrace_enabled(+1);
// initialize winsock
void WinSockFork(void);
if (_weaken(WinSockFork))
_weaken(WinSockFork)();
// rewrap the stdin named pipe hack
// since the handles closed on fork
fds->p[0].handle = GetStdHandle(kNtStdInputHandle);
fds->p[1].handle = GetStdHandle(kNtStdOutputHandle);
fds->p[2].handle = GetStdHandle(kNtStdErrorHandle);
g_fds.p[0].handle = GetStdHandle(kNtStdInputHandle);
g_fds.p[1].handle = GetStdHandle(kNtStdOutputHandle);
g_fds.p[2].handle = GetStdHandle(kNtStdErrorHandle);
}
// restore signal pointer
__sig.process = sigproc;
textwindows static int sys_fork_nt_parent(uint32_t dwCreationFlags) {
// restore the crash reporting stuff
#if SYSDEBUG
RemoveVectoredExceptionHandler(oncrash);
#endif
// allocate process object
struct Proc *proc;
if (!(proc = __proc_new()))
return -1;
// jump back into function below
__builtin_longjmp(jb, 1);
// get path of this executable
char16_t prog[PATH_MAX];
unsigned got = GetModuleFileName(0, prog, ARRAYLEN(prog));
if (!got || got >= ARRAYLEN(prog)) {
dll_make_first(&__proc.free, &proc->elem);
enomem();
return -1;
}
// spawn new process in suspended state
struct NtProcessInformation procinfo;
struct NtStartupInfo startinfo = {
.cb = sizeof(struct NtStartupInfo),
.dwFlags = kNtStartfUsestdhandles,
.hStdInput = g_fds.p[0].handle,
.hStdOutput = g_fds.p[1].handle,
.hStdError = g_fds.p[2].handle,
};
if (!CreateProcess(prog, 0, 0, 0, true,
dwCreationFlags | kNtCreateSuspended |
kNtInheritParentAffinity |
kNtCreateUnicodeEnvironment |
GetPriorityClass(GetCurrentProcess()),
0, 0, &startinfo, &procinfo)) {
STRACE("fork() %s() failed w/ %m %d", "CreateProcess", GetLastError());
dll_make_first(&__proc.free, &proc->elem);
if (errno != ENOMEM)
eagain();
return -1;
}
// ensure process can be signaled before returning
UnmapViewOfFile(__sig_map_process(procinfo.dwProcessId, kNtOpenAlways));
// let's go
bool ok = true;
uint32_t child_old_protect;
uint32_t parent_old_protect;
// copy memory manager maps
for (struct MapSlab *slab =
atomic_load_explicit(&__maps.slabs, memory_order_acquire);
slab; slab = slab->next) {
ok = ok && !!VirtualAllocEx(procinfo.hProcess, slab, MAPS_SIZE,
kNtMemReserve | kNtMemCommit, kNtPageReadwrite);
ok =
ok && !!WriteProcessMemory(procinfo.hProcess, slab, slab, MAPS_SIZE, 0);
}
// copy private memory maps
for (struct Map *map = __maps_first(); map; map = __maps_next(map)) {
if ((map->flags & MAP_TYPE) == MAP_SHARED)
continue;
if ((map->flags & MAP_NOFORK) && (map->flags & MAP_TYPE) != MAP_FILE)
continue;
if (__maps_isalloc(map)) {
size_t allocsize = map->size;
for (struct Map *m2 = __maps_next(map); m2; m2 = __maps_next(m2)) {
if (!__maps_isalloc(m2) && map->addr + allocsize == m2->addr) {
allocsize += m2->size;
} else {
break;
}
}
if ((map->flags & MAP_NOFORK) && (map->flags & MAP_TYPE) == MAP_FILE) {
ok = ok && !!VirtualProtectEx(procinfo.hProcess, map->addr, allocsize,
kNtPageReadwrite, &child_old_protect);
} else {
ok = ok && !!VirtualAllocEx(procinfo.hProcess, map->addr, allocsize,
kNtMemReserve | kNtMemCommit,
kNtPageExecuteReadwrite);
}
}
if (!(map->prot & PROT_READ))
ok = ok && !!VirtualProtect(map->addr, map->size, kNtPageReadwrite,
&parent_old_protect);
ok = ok && !!WriteProcessMemory(procinfo.hProcess, map->addr, map->addr,
map->size, 0);
ok = ok &&
!!VirtualProtectEx(procinfo.hProcess, map->addr, map->size,
__prot2nt(map->prot, false), &child_old_protect);
if (!(map->prot & PROT_READ))
ok = ok && !!VirtualProtect(map->addr, map->size, parent_old_protect,
&parent_old_protect);
}
// set process loose
ok = ok && ResumeThread(procinfo.hThread) != -1u;
ok &= !!CloseHandle(procinfo.hThread);
// return pid of new process
if (ok) {
proc->wasforked = true;
proc->handle = procinfo.hProcess;
proc->pid = procinfo.dwProcessId;
__proc_add(proc);
return procinfo.dwProcessId;
} else {
if (errno != ENOMEM)
eagain(); // posix fork() only specifies two errors
TerminateProcess(procinfo.hProcess, SIGKILL);
CloseHandle(procinfo.hProcess);
dll_make_first(&__proc.free, &proc->elem);
return -1;
}
}
textwindows int sys_fork_nt(uint32_t dwCreationFlags) {
char ok;
char **args;
int rc = -1;
intptr_t jb[5];
struct Proc *proc;
struct CosmoTib *tib;
char16_t pipename[64];
int64_t reader, writer;
struct NtStartupInfo startinfo;
struct NtProcessInformation procinfo;
char *p, forkvar[6 + 21 + 1 + 21 + 1];
tib = __get_tls();
if (!(proc = __proc_new()))
return -1;
ftrace_enabled(-1);
strace_enabled(-1);
if (!__builtin_setjmp(jb)) {
reader = CreateNamedPipe(__create_pipe_name(pipename), kNtPipeAccessInbound,
kNtPipeTypeByte | kNtPipeReadmodeByte, 1, PIPE_BUF,
PIPE_BUF, 0, &kNtIsInheritable);
writer = CreateFile(pipename, kNtGenericWrite, 0, 0, kNtOpenExisting, 0, 0);
if (reader != -1 && writer != -1) {
p = stpcpy(forkvar, "_FORK=");
p = FormatUint64(p, reader);
bzero(&startinfo, sizeof(startinfo));
startinfo.cb = sizeof(struct NtStartupInfo);
startinfo.dwFlags = kNtStartfUsestdhandles;
startinfo.hStdInput = g_fds.p[0].handle;
startinfo.hStdOutput = g_fds.p[1].handle;
startinfo.hStdError = g_fds.p[2].handle;
args = __argv;
#if SYSDEBUG
int i;
// If --strace was passed to this program, then propagate it the
// forked process since the flag was removed by __intercept_flag
if (strace_enabled(0) > 0) {
int n;
for (n = 0; args[n];)
++n;
#pragma GCC push_options
#pragma GCC diagnostic ignored "-Walloca-larger-than="
int nbytes = (n + 2) * sizeof(char *);
char **args2 = alloca(nbytes);
CheckLargeStackAllocation(args2, nbytes);
#pragma GCC pop_options
for (i = 0; i < n; ++i)
args2[i] = args[i];
args2[i++] = "--strace";
args2[i] = 0;
args = args2;
}
#endif
NTTRACE("STARTING SPAWN");
int spawnrc = ntspawn(&(struct NtSpawnArgs){
AT_FDCWD, GetProgramExecutableName(), args, environ,
(char *[]){forkvar, 0}, dwCreationFlags, 0, 0, 0, 0, &startinfo,
&procinfo});
if (spawnrc != -1) {
CloseHandle(procinfo.hThread);
ok = WriteAll(writer, jb, sizeof(jb));
// this list will be populated with the maps we're transferring
for (struct Map *map = __maps_first(); ok && map;
map = __maps_next(map)) {
if (map->flags & MAP_NOFORK)
continue;
if (MAX((char *)__executable_start, map->addr) <
MIN((char *)_end, map->addr + map->size))
continue; // executable image is loaded by windows
ok = WriteAll(writer, map, sizeof(*map));
}
// send a terminating Map struct to child
if (ok) {
struct Map map;
map.addr = MAP_FAILED;
ok = WriteAll(writer, &map, sizeof(map));
}
// now write content of each map to child
int granularity = __gransize;
for (struct Map *map = __maps_first(); ok && map;
map = __maps_next(map)) {
if (map->flags & MAP_NOFORK)
continue;
// we only need to worry about the base mapping
if ((uintptr_t)map->addr & (granularity - 1))
continue;
if (MAX((char *)__executable_start, map->addr) <
MIN((char *)_end, map->addr + map->size))
continue; // executable image is loaded by windows
// shared mappings don't need to be copied
if ((map->flags & MAP_TYPE) == MAP_SHARED)
continue;
// get true length in case mprotect() chopped up actual win32 map
size_t size = map->size;
for (struct Map *map2 = __maps_next(map); map2;
map2 = __maps_next(map2)) {
if (map2->hand == -1 && map->addr + size == map2->addr) {
size += map2->size;
} else {
break;
}
}
for (struct Map *map2 = map; ok && map2; map2 = __maps_next(map2)) {
if (!(map2->prot & PROT_READ))
if (map->addr >= map2->addr && map->addr < map->addr + size)
ok = VirtualProtect(
map2->addr, map2->size,
__prot2nt(map2->prot | PROT_READ, map2->iscow),
&map2->visited);
}
if (ok)
ok = WriteAll(writer, map->addr, size);
for (struct Map *map2 = map; ok && map2; map2 = __maps_next(map2)) {
if (!(map2->prot & PROT_READ))
if (map->addr >= map2->addr && map->addr < map->addr + size)
ok = VirtualProtect(map2->addr, map2->size, map2->visited,
&map2->visited);
}
}
if (ok)
ok = WriteAll(writer, __data_start, __data_end - __data_start);
if (ok)
ok = WriteAll(writer, __bss_start, __bss_end - __bss_start);
if (ok) {
if (!CloseHandle(writer))
ok = false;
writer = -1;
}
if (ok) {
proc->wasforked = true;
proc->handle = procinfo.hProcess;
rc = proc->pid = procinfo.dwProcessId;
__proc_add(proc);
} else {
TerminateProcess(procinfo.hProcess, SIGKILL);
CloseHandle(procinfo.hProcess);
rc = -1;
}
}
}
if (reader != -1)
CloseHandle(reader);
if (writer != -1)
CloseHandle(writer);
if (rc == -1 && errno != ENOMEM)
eagain(); // posix fork() only specifies two errors
int rc;
__winmain_isfork = true;
__winmain_tib = __get_tls();
if (!__builtin_setjmp(__winmain_jmpbuf)) {
rc = sys_fork_nt_parent(dwCreationFlags);
} else {
sys_fork_nt_child();
rc = 0;
// re-apply code morphing for thread-local storage
__tls_index = TlsAlloc();
__set_tls_win32(tib);
__morph_tls();
__tls_enabled = true;
// the child's pending signals is initially empty
atomic_store_explicit(&tib->tib_sigpending, 0, memory_order_relaxed);
// re-apply code morphing for function tracing
if (ftrace_stackdigs)
_weaken(__hook)(_weaken(ftrace_hook), _weaken(GetSymbolTable)());
}
if (rc == -1)
dll_make_first(&__proc.free, &proc->elem);
ftrace_enabled(+1);
strace_enabled(+1);
__winmain_isfork = false;
return rc;
}

View file

@ -39,6 +39,7 @@
#include "libc/nt/thunk/msabi.h"
#include "libc/proc/proc.internal.h"
#include "libc/runtime/internal.h"
#include "libc/runtime/runtime.h"
#include "libc/runtime/syslib.internal.h"
#include "libc/stdio/internal.h"
#include "libc/str/str.h"
@ -52,13 +53,16 @@
__msabi extern typeof(GetCurrentProcessId) *const __imp_GetCurrentProcessId;
extern pthread_mutex_t __cxa_lock_obj;
extern pthread_mutex_t __dlopen_lock_obj;
extern pthread_mutex_t __pthread_lock_obj;
extern pthread_mutex_t __rand64_lock_obj;
extern pthread_mutex_t __sig_worker_lock;
void __rand64_lock(void);
void __rand64_unlock(void);
void __rand64_wipe(void);
void __dlopen_lock(void);
void __dlopen_unlock(void);
void __dlopen_wipe(void);
// first and last and always
// it is the lord of all locks
@ -111,34 +115,46 @@ static void fork_prepare(void) {
if (_weaken(_pthread_onfork_prepare))
_weaken(_pthread_onfork_prepare)();
fork_prepare_stdio();
__localtime_lock();
__dlopen_lock();
if (_weaken(__localtime_lock))
_weaken(__localtime_lock)();
if (_weaken(__dlopen_lock))
_weaken(__dlopen_lock)();
if (_weaken(cosmo_stack_lock))
_weaken(cosmo_stack_lock)();
__cxa_lock();
__gdtoa_lock1();
__gdtoa_lock();
if (_weaken(__gdtoa_lock)) {
_weaken(__gdtoa_lock1)();
_weaken(__gdtoa_lock)();
}
_pthread_lock();
dlmalloc_pre_fork();
if (_weaken(dlmalloc_pre_fork))
_weaken(dlmalloc_pre_fork)();
__fds_lock();
_pthread_mutex_lock(&__rand64_lock_obj);
if (_weaken(__rand64_lock))
_weaken(__rand64_lock)();
__maps_lock();
LOCKTRACE("READY TO LOCK AND ROLL");
}
static void fork_parent(void) {
__maps_unlock();
_pthread_mutex_unlock(&__rand64_lock_obj);
if (_weaken(__rand64_unlock))
_weaken(__rand64_unlock)();
__fds_unlock();
dlmalloc_post_fork_parent();
if (_weaken(dlmalloc_post_fork_parent))
_weaken(dlmalloc_post_fork_parent)();
_pthread_unlock();
__gdtoa_unlock();
__gdtoa_unlock1();
if (_weaken(__gdtoa_unlock)) {
_weaken(__gdtoa_unlock)();
_weaken(__gdtoa_unlock1)();
}
__cxa_unlock();
if (_weaken(cosmo_stack_unlock))
_weaken(cosmo_stack_unlock)();
__dlopen_unlock();
__localtime_unlock();
if (_weaken(__dlopen_unlock))
_weaken(__dlopen_unlock)();
if (_weaken(__localtime_unlock))
_weaken(__localtime_unlock)();
fork_parent_stdio();
if (_weaken(_pthread_onfork_parent))
_weaken(_pthread_onfork_parent)();
@ -146,18 +162,23 @@ static void fork_parent(void) {
}
static void fork_child(void) {
_pthread_mutex_wipe_np(&__dlopen_lock_obj);
_pthread_mutex_wipe_np(&__rand64_lock_obj);
if (_weaken(__rand64_wipe))
_weaken(__rand64_wipe)();
_pthread_mutex_wipe_np(&__fds_lock_obj);
dlmalloc_post_fork_child();
_pthread_mutex_wipe_np(&__gdtoa_lock_obj);
_pthread_mutex_wipe_np(&__gdtoa_lock1_obj);
if (_weaken(__gdtoa_wipe)) {
_weaken(__gdtoa_wipe)();
_weaken(__gdtoa_wipe1)();
}
fork_child_stdio();
_pthread_mutex_wipe_np(&__pthread_lock_obj);
_pthread_mutex_wipe_np(&__cxa_lock_obj);
if (_weaken(cosmo_stack_wipe))
_weaken(cosmo_stack_wipe)();
_pthread_mutex_wipe_np(&__localtime_lock_obj);
if (_weaken(__dlopen_wipe))
_weaken(__dlopen_wipe)();
if (_weaken(__localtime_wipe))
_weaken(__localtime_wipe)();
if (IsWindows()) {
// we don't bother locking the proc/itimer/sig locks above since
// their state is reset in the forked child. nothing to protect.
@ -174,12 +195,9 @@ static void fork_child(void) {
}
int _fork(uint32_t dwCreationFlags) {
long micros;
struct Dll *e;
struct timespec started;
int ax, dx, tid, parent;
parent = __pid;
started = timespec_mono();
BLOCK_SIGNALS;
fork_prepare();
if (!IsWindows()) {
@ -187,7 +205,6 @@ int _fork(uint32_t dwCreationFlags) {
} else {
ax = sys_fork_nt(dwCreationFlags);
}
micros = timespec_tomicros(timespec_sub(timespec_mono(), started));
if (!ax) {
// get new process id
@ -237,11 +254,14 @@ int _fork(uint32_t dwCreationFlags) {
}
atomic_init(&tib->tib_syshand, syshand);
// the child's pending signals is initially empty
atomic_init(&tib->tib_sigpending, 0);
// we can't be canceled if the canceler no longer exists
atomic_init(&pt->pt_canceled, false);
// forget locks
memset(tib->tib_locks, 0, sizeof(tib->tib_locks));
bzero(tib->tib_locks, sizeof(tib->tib_locks));
// run user fork callbacks
fork_child();
@ -256,11 +276,11 @@ int _fork(uint32_t dwCreationFlags) {
}
}
STRACE("fork() → 0 (child of %d; took %ld us)", parent, micros);
STRACE("fork() → 0 (child of %d)", parent);
} else {
// this is the parent process
fork_parent();
STRACE("fork() → %d% m (took %ld us)", ax, micros);
STRACE("fork() → %d% m", ax);
}
ALLOW_SIGNALS;
return ax;

View file

@ -95,7 +95,7 @@ int ftrace_install(void) libcesque;
int ftrace_enabled(int) libcesque;
int strace_enabled(int) libcesque;
void __print_maps(size_t) libcesque;
void __print_maps_win32(void) libcesque;
void __print_maps_win32(int64_t, const char *, size_t) libcesque;
void __printargs(const char *) libcesque;
/* builtin sh-like system/popen dsl */
int _cocmd(int, char **, char **) libcesque;

View file

@ -52,6 +52,7 @@
#include "libc/sock/internal.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/prot.h"
#include "libc/thread/tls.h"
#ifdef __x86_64__
#define abi __msabi textwindows dontinstrument
@ -87,11 +88,15 @@ void __stack_call(int, char **, char **, long (*)[2],
void (*)(int, char **, char **, long (*)[2]),
intptr_t) wontreturn;
bool __winmain_isfork;
intptr_t __winmain_jmpbuf[5];
struct CosmoTib *__winmain_tib;
__funline int IsAlpha(int c) {
return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z');
}
static abi char16_t *StrStr(const char16_t *haystack, const char16_t *needle) {
abi static char16_t *StrStr(const char16_t *haystack, const char16_t *needle) {
size_t i;
for (;;) {
for (i = 0;; ++i) {
@ -108,13 +113,13 @@ static abi char16_t *StrStr(const char16_t *haystack, const char16_t *needle) {
return 0;
}
static abi void PrintError(const char *s, size_t n) {
abi static void PrintError(const char *s, size_t n) {
#define PrintError(s) PrintError(s, sizeof(s) - 1)
__imp_WriteFile(__imp_GetStdHandle(kNtStdErrorHandle), s, n, 0, 0);
}
// detect the unholiest of environments
static abi bool32 IsWslChimera(void) {
abi static bool32 IsWslChimera(void) {
char16_t path[PATH_MAX];
return __imp_GetCurrentDirectoryW(PATH_MAX, path) && //
path[0] == '\\' && //
@ -125,7 +130,7 @@ static abi bool32 IsWslChimera(void) {
}
// returns true if utf-8 path is a win32-style path that exists
static abi bool32 WinFileExists(const char *path) {
abi static bool32 WinFileExists(const char *path) {
uint16_t path16[PATH_MAX];
size_t z = ARRAYLEN(path16);
size_t n = tprecode8to16(path16, z, path).ax;
@ -135,7 +140,7 @@ static abi bool32 WinFileExists(const char *path) {
}
// this ensures close(1) won't accidentally close(2) for example
static abi void DeduplicateStdioHandles(void) {
abi static void DeduplicateStdioHandles(void) {
for (long i = 0; i < 3; ++i) {
int64_t h1 = __imp_GetStdHandle(kNtStdio[i]);
for (long j = i + 1; j < 3; ++j) {
@ -150,19 +155,19 @@ static abi void DeduplicateStdioHandles(void) {
}
}
static bool32 HasEnvironmentVariable(const char16_t *name) {
abi static bool32 HasEnvironmentVariable(const char16_t *name) {
char16_t buf[4];
return __imp_GetEnvironmentVariableW(name, buf, ARRAYLEN(buf));
}
static abi unsigned OnWinCrash(struct NtExceptionPointers *ep) {
abi static unsigned OnWinCrash(struct NtExceptionPointers *ep) {
int code, sig = __sig_crash_sig(ep->ExceptionRecord->ExceptionCode, &code);
TerminateThisProcess(sig);
}
// main function of windows init process
// i.e. first process spawned that isn't forked
static abi wontreturn void WinInit(const char16_t *cmdline) {
abi wontreturn static void WinInit(const char16_t *cmdline) {
__oldstack = (intptr_t)__builtin_frame_address(0);
__imp_SetConsoleOutputCP(kNtCpUtf8);
@ -314,7 +319,7 @@ static int Atoi(const char16_t *str) {
return x;
}
static abi int WinGetPid(const char16_t *var, bool *out_is_inherited) {
abi static int WinGetPid(const char16_t *var, bool *out_is_inherited) {
uint32_t len;
char16_t val[12];
if ((len = __imp_GetEnvironmentVariableW(var, val, ARRAYLEN(val)))) {
@ -338,6 +343,8 @@ abi int64_t WinMain(int64_t hInstance, int64_t hPrevInstance,
extern char os asm("__hostos");
os = _HOSTWINDOWS; // madness https://news.ycombinator.com/item?id=21019722
kStartTsc = rdtsc();
__tls_enabled = false;
ftrace_enabled(-1);
if (!IsTiny() && IsWslChimera()) {
PrintError("error: APE is running on WIN32 inside WSL. You need to run: "
"sudo sh -c 'echo -1 > /proc/sys/fs/binfmt_misc/WSLInterop'\n");
@ -351,6 +358,8 @@ abi int64_t WinMain(int64_t hInstance, int64_t hPrevInstance,
__pid = WinGetPid(u"_COSMO_PID", &pid_is_inherited);
if (!(__sig.process = __sig_map_process(__pid, kNtOpenAlways)))
__sig.process = &fake_process_signals;
if (__winmain_isfork)
__builtin_longjmp(__winmain_jmpbuf, 1);
if (!pid_is_inherited)
atomic_store_explicit(__sig.process, 0, memory_order_release);
cmdline = __imp_GetCommandLineW();
@ -359,11 +368,10 @@ abi int64_t WinMain(int64_t hInstance, int64_t hPrevInstance,
if (StrStr(cmdline, u"--strace"))
++__strace;
#endif
ftrace_enabled(+1);
if (_weaken(WinSockInit))
_weaken(WinSockInit)();
DeduplicateStdioHandles();
if (_weaken(WinMainForked))
_weaken(WinMainForked)();
WinInit(cmdline);
}

View file

@ -51,3 +51,7 @@ textwindows void WinSockInit(void) {
_Exit(1);
}
}
textwindows dontinstrument void WinSockFork(void) {
WSAStartup(VERSION, &kNtWsaData);
}

View file

@ -227,7 +227,6 @@ syscon mmap MAP_LOCKED 0x00002000 0x00002000 0 0 0 0 0 0
syscon mmap MAP_NORESERVE 0x00004000 0x00004000 0x00000040 0x00000040 0 0 0x00000040 0 # Linux calls it "reserve"; NT calls it "commit"? which is default?
syscon mmap MAP_POPULATE 0x00008000 0x00008000 0 0 0x00040000 0 0 0 # MAP_PREFAULT_READ on FreeBSD; can avoid madvise(MADV_WILLNEED) on private file mapping
syscon mmap MAP_NONBLOCK 0x00010000 0x00010000 0 0 0 0 0 0
syscon mmap MAP_NOFORK 0 0 0 0 0 0 0 0x10000000 # used on pages internal to our mmap() implemention on windows
syscon mmap MAP_SYNC 0x00080000 0x00080000 0 0 0 0 0 0 # perform synchronous page faults for mapping (Linux 4.15+)
syscon mmap MAP_HUGETLB 0x00040000 -1 -1 -1 -1 -1 -1 -1 # make it inherit across execve()
syscon mmap MAP_INHERIT -1 -1 -1 -1 -1 -1 0x00000080 -1 # make it inherit across execve()

View file

@ -1,2 +0,0 @@
#include "libc/sysv/consts/syscon.internal.h"
.syscon mmap,MAP_NOFORK,0,0,0,0,0,0,0,0x10000000

View file

@ -19,7 +19,6 @@ extern const int MAP_JIT;
extern const int MAP_LOCKED;
extern const int MAP_NOCACHE;
extern const int MAP_NOEXTEND;
extern const int MAP_NOFORK;
extern const int MAP_NONBLOCK;
extern const int MAP_NORESERVE;
extern const int MAP_NOSYNC;

View file

@ -22,4 +22,10 @@
.balign 8
__hostos:
.quad 0
.endfn __hostos,globl
.endobj __hostos,globl
__tls_index:
.long 0
.endobj __tls_index,globl
__tls_enabled:
.long 0
.endobj __tls_enabled,globl

View file

@ -28,6 +28,7 @@
#include "libc/intrin/strace.h"
#include "libc/nt/enum/processcreationflags.h"
#include "libc/nt/thread.h"
#include "libc/runtime/runtime.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/clock.h"
#include "libc/sysv/consts/map.h"

View file

@ -29,15 +29,16 @@ TEST_LIBC_PROC_DIRECTDEPS = \
LIBC_MEM \
LIBC_NEXGEN32E \
LIBC_NT_KERNEL32 \
LIBC_RUNTIME \
LIBC_PROC \
LIBC_RUNTIME \
LIBC_STDIO \
LIBC_STR \
LIBC_SYSV \
LIBC_TESTLIB \
LIBC_THREAD \
LIBC_X \
THIRD_PARTY_MUSL \
THIRD_PARTY_TR
THIRD_PARTY_TR \
TEST_LIBC_PROC_DEPS := \
$(call uniq,$(foreach x,$(TEST_LIBC_PROC_DIRECTDEPS),$($(x))))

View file

@ -21,6 +21,7 @@
#include "libc/calls/struct/sigaction.h"
#include "libc/calls/struct/sigset.h"
#include "libc/calls/struct/timespec.h"
#include "libc/calls/syscall-sysv.internal.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/log/check.h"
@ -32,6 +33,7 @@
#include "libc/sysv/consts/msync.h"
#include "libc/sysv/consts/prot.h"
#include "libc/sysv/consts/sig.h"
#include "libc/testlib/benchmark.h"
#include "libc/testlib/ezbench.h"
#include "libc/testlib/subprocess.h"
#include "libc/testlib/testlib.h"
@ -150,6 +152,31 @@ void ForkInSerial(void) {
ASSERT_EQ(0, WEXITSTATUS(ws));
}
BENCH(fork, bench) {
EZBENCH2("fork a", donothing, ForkInSerial());
void VforkInSerial(void) {
int pid, ws;
ASSERT_NE(-1, (pid = vfork()));
if (!pid)
_Exit(0);
ASSERT_NE(-1, waitpid(pid, &ws, 0));
ASSERT_TRUE(WIFEXITED(ws));
ASSERT_EQ(0, WEXITSTATUS(ws));
}
void SysForkInSerial(void) {
int pid, ws;
ASSERT_NE(-1, (pid = sys_fork()));
if (!pid)
_Exit(0);
ASSERT_NE(-1, waitpid(pid, &ws, 0));
ASSERT_TRUE(WIFEXITED(ws));
ASSERT_EQ(0, WEXITSTATUS(ws));
}
TEST(fork, bench) {
VforkInSerial();
BENCHMARK(10, 1, VforkInSerial());
if (!IsWindows())
BENCHMARK(10, 1, SysForkInSerial());
ForkInSerial();
BENCHMARK(10, 1, ForkInSerial());
}

View file

@ -38,10 +38,6 @@ void on_unexpected_death(int sig) {
int main() {
// TODO(jart): fix flakes
if (IsWindows())
return 0;
signal(SIGCHLD, on_unexpected_death);
// extract test program

View file

@ -32,8 +32,8 @@
#include "libc/thread/posixthread.internal.h"
#include "third_party/gdtoa/lock.h"
pthread_mutex_t __gdtoa_lock_obj = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t __gdtoa_lock1_obj = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t __gdtoa_lock_obj = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t __gdtoa_lock1_obj = PTHREAD_MUTEX_INITIALIZER;
void
__gdtoa_lock(void)
@ -47,6 +47,12 @@ __gdtoa_unlock(void)
_pthread_mutex_unlock(&__gdtoa_lock_obj);
}
void
__gdtoa_wipe(void)
{
_pthread_mutex_wipe_np(&__gdtoa_lock_obj);
}
void
__gdtoa_lock1(void)
{
@ -58,3 +64,9 @@ __gdtoa_unlock1(void)
{
_pthread_mutex_unlock(&__gdtoa_lock1_obj);
}
void
__gdtoa_wipe1(void)
{
_pthread_mutex_wipe_np(&__gdtoa_lock1_obj);
}

View file

@ -3,13 +3,13 @@
#include "libc/thread/thread.h"
COSMOPOLITAN_C_START_
extern pthread_mutex_t __gdtoa_lock_obj;
extern pthread_mutex_t __gdtoa_lock1_obj;
void __gdtoa_lock(void);
void __gdtoa_unlock(void);
void __gdtoa_wipe(void);
void __gdtoa_lock1(void);
void __gdtoa_unlock1(void);
void __gdtoa_wipe1(void);
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_THIRD_PARTY_GDTOA_LOCK_H_ */

View file

@ -238,7 +238,8 @@ static bool free_waiters_populate (void) {
// netbsd semaphores are file descriptors
n = 1;
} else {
n = __pagesize / sizeof(waiter);
// don't create too much fork() overhead
n = 16;
}
waiter *waiters = mmap (0, n * sizeof(waiter),
PROT_READ | PROT_WRITE,

View file

@ -3,10 +3,9 @@
#include "libc/thread/thread.h"
COSMOPOLITAN_C_START_
extern pthread_mutex_t __localtime_lock_obj;
void __localtime_lock(void);
void __localtime_unlock(void);
void __localtime_wipe(void);
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_THIRD_PARTY_TZ_LOCK_H_ */