Make fork() go 30% faster

This change makes fork() go nearly as fast as sys_fork() on UNIX. As for
Windows this change shaves about 4-5ms off fork() + wait() latency. This
is accomplished by using WriteProcessMemory() from the parent process to
setup the address space of a suspended process; it is better than a pipe
This commit is contained in:
Justine Tunney 2025-01-01 04:59:38 -08:00
parent 98c5847727
commit 0b3c81dd4e
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
44 changed files with 769 additions and 649 deletions

View file

@ -1,18 +1,2 @@
#include "libc/nt/codegen.h"
.imp kernel32,__imp_VirtualAllocEx,VirtualAllocEx
.text.windows
.ftrace1
VirtualAllocEx:
.ftrace2
#ifdef __x86_64__
push %rbp
mov %rsp,%rbp
mov __imp_VirtualAllocEx(%rip),%rax
jmp __sysv2nt6
#elif defined(__aarch64__)
mov x0,#0
ret
#endif
.endfn VirtualAllocEx,globl
.previous

View file

@ -0,0 +1,2 @@
#include "libc/nt/codegen.h"
.imp kernel32,__imp_VirtualProtectEx,VirtualProtectEx

View file

@ -0,0 +1,18 @@
#include "libc/nt/codegen.h"
.imp kernel32,__imp_VirtualQueryEx,VirtualQueryEx
.text.windows
.ftrace1
VirtualQueryEx:
.ftrace2
#ifdef __x86_64__
push %rbp
mov %rsp,%rbp
mov __imp_VirtualQueryEx(%rip),%rax
jmp __sysv2nt
#elif defined(__aarch64__)
mov x0,#0
ret
#endif
.endfn VirtualQueryEx,globl
.previous

View file

@ -0,0 +1,2 @@
#include "libc/nt/codegen.h"
.imp kernel32,__imp_WriteProcessMemory,WriteProcessMemory

View file

@ -9,6 +9,7 @@
# KERNEL32.DLL
#
# Name Actual DLL Arity
imp '' CreateDirectoryW kernel32 2
imp '' CreateFileA kernel32 7
imp '' CreateFileMappingNumaW kernel32 7
@ -40,9 +41,12 @@ imp '' SetCurrentDirectoryW kernel32 1
imp '' TerminateProcess kernel32 2
imp '' UnlockFileEx kernel32 5
imp '' UnmapViewOfFile kernel32 1
imp '' VirtualAllocEx kernel32 5
imp '' VirtualProtect kernel32 4
imp '' VirtualProtectEx kernel32 5
imp '' WaitForMultipleObjects kernel32 4
imp '' WaitForSingleObject kernel32 2
imp '' WriteProcessMemory kernel32 5
imp 'AcquireSRWLockExclusive' AcquireSRWLockExclusive kernel32 1
imp 'AcquireSRWLockShared' AcquireSRWLockShared kernel32 1
imp 'AddDllDirectory' AddDllDirectory kernel32 1
@ -185,8 +189,8 @@ imp 'GetWindowsDirectory' GetWindowsDirectoryW kernel32 2
imp 'GetWindowsDirectoryA' GetWindowsDirectoryA kernel32 2
imp 'GlobalAlloc' GlobalAlloc kernel32 2
imp 'GlobalFree' GlobalFree kernel32 1
imp 'GlobalMemoryStatusEx' GlobalMemoryStatusEx kernel32 1
imp 'GlobalLock' GlobalLock kernel32 1
imp 'GlobalMemoryStatusEx' GlobalMemoryStatusEx kernel32 1
imp 'GlobalUnlock' GlobalUnlock kernel32 1
imp 'HeapAlloc' HeapAlloc kernel32 3
imp 'HeapCompact' HeapCompact kernel32 2
@ -300,10 +304,10 @@ imp 'UnmapViewOfFile2' UnmapViewOfFile2 kernel32 2
imp 'UnmapViewOfFileEx' UnmapViewOfFileEx kernel32 3
imp 'UpdateProcThreadAttribute' UpdateProcThreadAttribute kernel32 7
imp 'VirtualAlloc' VirtualAlloc kernel32 4
imp 'VirtualAllocEx' VirtualAllocEx kernel32 5
imp 'VirtualFree' VirtualFree kernel32 3
imp 'VirtualLock' VirtualLock kernel32 2
imp 'VirtualQuery' VirtualQuery kernel32 3
imp 'VirtualQueryEx' VirtualQueryEx kernel32 4
imp 'VirtualUnlock' VirtualUnlock kernel32 2
imp 'WaitForMultipleObjectsEx' WaitForMultipleObjectsEx kernel32 5
imp 'WaitForSingleObjectEx' WaitForSingleObjectEx kernel32 3

View file

@ -71,8 +71,17 @@ bool32 VirtualUnlock(const void *lpAddress, size_t dwSize);
uint64_t VirtualQuery(const void *lpAddress,
struct NtMemoryBasicInformation *lpBuffer,
uint64_t dwLength);
uint64_t VirtualQueryEx(int64_t hProcess, const void *lpAddress,
struct NtMemoryBasicInformation *lpBuffer,
uint64_t dwLength);
void *VirtualAllocEx(int64_t hProcess, void *lpAddress, uint64_t dwSize,
uint32_t flAllocationType, uint32_t flProtect);
bool32 VirtualProtectEx(int64_t hProcess, void *lpAddress, uint64_t dwSize,
uint32_t flNewProtect, uint32_t *out_lpflOldProtect);
bool32 WriteProcessMemory(int64_t hProcess, void *lpBaseAddress,
const void *lpBuffer, uint64_t nSize,
uint64_t *opt_out_lpNumberOfBytesWritten);
int64_t GetProcessHeap(void);
void *HeapAlloc(int64_t hHeap, uint32_t dwFlags, size_t dwBytes) __wur;