Implement pthread_atfork()

If threads are being used, then fork() will now acquire and release and
runtime locks so that fork() may be safely used from threads. This also
makes vfork() thread safe, because pthread mutexes will do nothing when
the process is a child of vfork(). More torture tests have been written
to confirm this all works like a charm. Additionally:

- Invent hexpcpy() api
- Rename nsync_malloc_() to kmalloc()
- Complete posix named semaphore implementation
- Make pthread_create() asynchronous signal safe
- Add rm, rmdir, and touch to command interpreter builtins
- Invent sigisprecious() and modify sigset functions to use it
- Add unit tests for posix_spawn() attributes and fix its bugs

One unresolved problem is the reclaiming of *NSYNC waiter memory in the
forked child processes, within apps which have threads waiting on locks
This commit is contained in:
Justine Tunney 2022-10-16 12:05:08 -07:00
parent 64c284003d
commit 60cb435cb4
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
124 changed files with 2169 additions and 718 deletions

View file

@ -87,21 +87,26 @@ static unsigned char *brk_unlocked(unsigned char *p) {
}
}
int brk_lock(void) {
void brk_lock(void) {
pthread_mutex_lock(&__brk.m);
return 0;
}
void brk_unlock(void) {
pthread_mutex_unlock(&__brk.m);
}
void brk_funlock(void) {
pthread_mutex_init(&__brk.m, 0);
}
__attribute__((__constructor__)) static void brk_init(void) {
brk_funlock();
pthread_atfork(brk_lock, brk_unlock, brk_funlock);
}
#ifdef _NOPL0
#define brk_lock() _NOPL0("__threadcalls", brk_lock)
#define brk_unlock() _NOPL0("__threadcalls", brk_unlock)
#else
#define brk_lock() (__threaded ? brk_lock() : 0)
#define brk_unlock() (__threaded ? brk_unlock() : 0)
#endif
/**

View file

@ -301,7 +301,7 @@ static int Kill(void) {
int sig, rc = 0, i = 1;
if (i < n && args[i][0] == '-') {
sig = GetSignalByName(args[i++] + 1);
if (!sig) return 1;
if (!sig) return -1; // fallback to system kill command
} else {
sig = SIGTERM;
}
@ -340,15 +340,15 @@ static int Usleep(void) {
}
static int Test(void) {
int w;
int w, m = n;
struct stat st;
if (n && READ16LE(args[n - 1]) == READ16LE("]")) --n;
if (n == 4) {
if (m && READ16LE(args[m - 1]) == READ16LE("]")) --m;
if (m == 4) {
w = READ32LE(args[2]) & 0x00ffffff;
if ((w & 65535) == READ16LE("=")) return !!strcmp(args[1], args[3]);
if (w == READ24("==")) return !!strcmp(args[1], args[3]);
if (w == READ24("!=")) return !strcmp(args[1], args[3]);
} else if (n == 3) {
} else if (m == 3) {
w = READ32LE(args[1]) & 0x00ffffff;
if (w == READ24("-n")) return !(strlen(args[2]) > 0);
if (w == READ24("-z")) return !(strlen(args[2]) == 0);
@ -357,7 +357,52 @@ static int Test(void) {
if (w == READ24("-d")) return !(!stat(args[2], &st) && S_ISDIR(st.st_mode));
if (w == READ24("-h")) return !(!stat(args[2], &st) && S_ISLNK(st.st_mode));
}
return 1;
return -1; // fall back to system test command
}
static int Rm(void) {
int i;
if (n > 1 && args[1][0] != '-') {
for (i = 1; i < n; ++i) {
if (unlink(args[i])) {
Log("rm: ", args[i], ": ", _strerdoc(errno), 0);
return 1;
}
}
return 0;
} else {
return -1; // fall back to system rm command
}
}
static int Rmdir(void) {
int i;
if (n > 1 && args[1][0] != '-') {
for (i = 1; i < n; ++i) {
if (rmdir(args[i])) {
Log("rmdir: ", args[i], ": ", _strerdoc(errno), 0);
return 1;
}
}
return 0;
} else {
return -1; // fall back to system rmdir command
}
}
static int Touch(void) {
int i;
if (n > 1 && args[1][0] != '-') {
for (i = 1; i < n; ++i) {
if (touch(args[i], 0644)) {
Log("touch: ", args[i], ": ", _strerdoc(errno), 0);
return 1;
}
}
return 0;
} else {
return -1; // fall back to system rmdir command
}
}
static int Fake(int main(int, char **)) {
@ -377,6 +422,7 @@ static int TryBuiltin(void) {
if (!n) return 0;
if (!strcmp(args[0], "exit")) Exit();
if (!strcmp(args[0], "cd")) return Cd();
if (!strcmp(args[0], "rm")) return Rm();
if (!strcmp(args[0], "[")) return Test();
if (!strcmp(args[0], "wait")) return Wait();
if (!strcmp(args[0], "echo")) return Echo();
@ -384,6 +430,8 @@ static int TryBuiltin(void) {
if (!strcmp(args[0], "true")) return True();
if (!strcmp(args[0], "test")) return Test();
if (!strcmp(args[0], "kill")) return Kill();
if (!strcmp(args[0], "touch")) return Touch();
if (!strcmp(args[0], "rmdir")) return Rmdir();
if (!strcmp(args[0], "mkdir")) return Mkdir();
if (!strcmp(args[0], "false")) return False();
if (!strcmp(args[0], "usleep")) return Usleep();

View file

@ -19,6 +19,7 @@
#include "ape/sections.internal.h"
#include "libc/assert.h"
#include "libc/calls/calls.h"
#include "libc/calls/syscall-sysv.internal.h"
#include "libc/intrin/strace.internal.h"
#include "libc/runtime/runtime.h"
#include "libc/thread/tls.h"
@ -28,7 +29,7 @@ extern int __threadcalls_start[];
#pragma weak __threadcalls_start
#pragma weak __threadcalls_end
static privileged dontinline void FixupLocks(void) {
static privileged dontinline void FixupLockNops(void) {
__morph_begin();
/*
* _NOPL("__threadcalls", func)
@ -59,6 +60,6 @@ static privileged dontinline void FixupLocks(void) {
void __enable_threads(void) {
if (__threaded) return;
STRACE("__enable_threads()");
FixupLocks();
__threaded = gettid();
FixupLockNops();
__threaded = sys_gettid();
}

View file

@ -273,7 +273,7 @@ textwindows int sys_fork_nt(uint32_t dwCreationFlags) {
char *p, forkvar[6 + 21 + 1 + 21 + 1];
struct NtProcessInformation procinfo;
if (!setjmp(jb)) {
pid = untrackpid = __reservefd(-1);
pid = untrackpid = __reservefd_unlocked(-1);
reader = CreateNamedPipe(CreatePipeName(pipename),
kNtPipeAccessInbound | kNtFileFlagOverlapped,
kNtPipeTypeMessage | kNtPipeReadmodeMessage, 1,
@ -347,7 +347,7 @@ textwindows int sys_fork_nt(uint32_t dwCreationFlags) {
rc = 0;
}
if (untrackpid != -1) {
__releasefd(untrackpid);
__releasefd_unlocked(untrackpid);
}
return rc;
}

View file

@ -16,6 +16,7 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/calls/calls.h"
#include "libc/calls/struct/sigset.h"
#include "libc/calls/struct/sigset.internal.h"
@ -33,14 +34,15 @@
int _fork(uint32_t dwCreationFlags) {
axdx_t ad;
bool threaded;
sigset_t old, all;
int ax, dx, parent, parent_tid = 0;
if (_weaken(_pthread_atfork)) {
parent_tid = gettid();
int ax, dx, parent;
sigfillset(&all);
_unassert(!sigprocmask(SIG_BLOCK, &all, &old));
if (__threaded && _weaken(_pthread_onfork_prepare)) {
_weaken(_pthread_onfork_prepare)();
}
if (!IsWindows()) {
sigfillset(&all);
sys_sigprocmask(SIG_BLOCK, &all, &old);
ad = sys_fork();
ax = ad.ax;
dx = ad.dx;
@ -50,7 +52,11 @@ int _fork(uint32_t dwCreationFlags) {
ax &= dx - 1;
}
} else {
threaded = __threaded;
ax = sys_fork_nt(dwCreationFlags);
if (threaded && !__threaded && _weaken(__enable_threads)) {
_weaken(__enable_threads)();
}
}
if (!ax) {
if (!IsWindows()) {
@ -65,15 +71,17 @@ int _fork(uint32_t dwCreationFlags) {
IsLinux() ? dx : sys_gettid(),
memory_order_relaxed);
}
if (_weaken(_pthread_atfork)) {
_weaken(_pthread_atfork)(parent_tid);
if (__threaded && _weaken(_pthread_onfork_child)) {
_weaken(_pthread_onfork_child)();
}
if (!IsWindows()) sys_sigprocmask(SIG_SETMASK, &old, 0);
STRACE("fork() → 0 (child of %d)", parent);
} else {
if (!IsWindows()) sys_sigprocmask(SIG_SETMASK, &old, 0);
if (__threaded && _weaken(_pthread_onfork_parent)) {
_weaken(_pthread_onfork_parent)();
}
STRACE("fork() → %d% m", ax);
}
_unassert(!sigprocmask(SIG_SETMASK, &old, 0));
return ax;
}

View file

@ -11,19 +11,19 @@
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
#define kAutomapStart 0x100080040000
#define kAutomapSize (kMemtrackStart - kAutomapStart)
#define kMemtrackStart 0x1fe7fffc0000
#define kMemtrackSize (0x1ffffffc0000 - kMemtrackStart)
#define kFixedmapStart 0x300000040000
#define kFixedmapSize (0x400000040000 - kFixedmapStart)
#define kMemtrackFdsStart 0x6fe000040000
#define kMemtrackFdsSize (0x6feffffc0000 - kMemtrackFdsStart)
#define kMemtrackZiposStart 0x6fd000040000
#define kMemtrackZiposSize (0x6fdffffc0000 - kMemtrackZiposStart)
#define kMemtrackNsyncStart 0x6fc000040000
#define kMemtrackNsyncSize (0x6fcffffc0000 - kMemtrackNsyncStart)
#define kMemtrackGran (!IsAsan() ? FRAMESIZE : FRAMESIZE * 8)
#define kAutomapStart 0x100080040000
#define kAutomapSize (kMemtrackStart - kAutomapStart)
#define kMemtrackStart 0x1fe7fffc0000
#define kMemtrackSize (0x1ffffffc0000 - kMemtrackStart)
#define kFixedmapStart 0x300000040000
#define kFixedmapSize (0x400000040000 - kFixedmapStart)
#define kMemtrackFdsStart 0x6fe000040000
#define kMemtrackFdsSize (0x6feffffc0000 - kMemtrackFdsStart)
#define kMemtrackZiposStart 0x6fd000040000
#define kMemtrackZiposSize (0x6fdffffc0000 - kMemtrackZiposStart)
#define kMemtrackKmallocStart 0x6fc000040000
#define kMemtrackKmallocSize (0x6fcffffc0000 - kMemtrackKmallocStart)
#define kMemtrackGran (!IsAsan() ? FRAMESIZE : FRAMESIZE * 8)
struct MemoryInterval {
int x;
@ -47,6 +47,7 @@ extern struct MemoryIntervals _mmi;
void __mmi_lock(void);
void __mmi_unlock(void);
void __mmi_funlock(void);
bool IsMemtracked(int, int);
void PrintSystemMappings(int);
unsigned FindMemoryInterval(const struct MemoryIntervals *, int) nosideeffect;
@ -95,9 +96,9 @@ forceinline pureconst bool IsZiposFrame(int x) {
x <= (int)((kMemtrackZiposStart + kMemtrackZiposSize - 1) >> 16);
}
forceinline pureconst bool IsNsyncFrame(int x) {
return (int)(kMemtrackNsyncStart >> 16) <= x &&
x <= (int)((kMemtrackNsyncStart + kMemtrackNsyncSize - 1) >> 16);
forceinline pureconst bool IsKmallocFrame(int x) {
return (int)(kMemtrackKmallocStart >> 16) <= x &&
x <= (int)((kMemtrackKmallocStart + kMemtrackKmallocSize - 1) >> 16);
}
forceinline pureconst bool IsShadowFrame(int x) {

View file

@ -1808,7 +1808,7 @@
6f900000-6f9fffff 64gb free
6fa00000-6fafffff 64gb free
6fb00000-6fbfffff 64gb free
6fc00004-6fcffffb 64gb nsync
6fc00004-6fcffffb 64gb kmalloc
6fd00004-6fdffffb 64gb zipos
6fe00004-6feffffb 64gb g_fds
6ff00004-70000003 64gb free

View file

@ -18,6 +18,7 @@
*/
#include "libc/dce.h"
#include "libc/intrin/strace.internal.h"
#include "libc/thread/tls.h"
#include "libc/macros.internal.h"
.privileged
@ -26,7 +27,10 @@
// This is the same as fork() except it's optimized for the case
// where the caller invokes execve() immediately afterwards. You
// can also call functions like close(), dup2(), etc. Call _exit
// but don't call exit. Look for vforksafe function annotations.
// but don't call exit. Look for vforksafe function annotations,
// For example pthread mutexes are @vforksafe because they don't
// do anything in a vfork()'d child process. TLS memory must not
// be disabled (it's enabled by default) since vfork() needs it.
//
// Do not make the assumption that the parent is suspended until
// the child terminates since this impl calls fork() on Windows,
@ -34,8 +38,10 @@
//
// @return pid of child process or 0 if forked process
// @returnstwice
// @threadsafe
// @vforksafe
vfork: xor %edi,%edi # dwCreationFlags
vfork: call __require_tls
xor %edi,%edi # dwCreationFlags
#ifdef __SANITIZE_ADDRESS__
jmp fork # TODO: asan and vfork don't mix?
.endfn vfork,globl
@ -56,45 +62,31 @@ vfork: xor %edi,%edi # dwCreationFlags
ezlea .Llog,di
call __stracef
#endif /* SYSDEBUG */
mov __NR_vfork(%rip),%eax
mov __errno(%rip),%r8d # avoid question of @vforksafe errno
mov %fs:0,%r9 # get thread information block
mov 0x3c(%r9),%r8d # avoid question of @vforksafe errno
pop %rsi # saves return address in a register
mov __NR_vfork(%rip),%eax
#if SupportsBsd()
testb IsBsd()
jnz vfork.bsd
clc
#endif
syscall
#if SupportsBsd()
jnc 0f
neg %rax
0:
#endif
push %rsi # note it happens twice in same page
#if SupportsLinux()
cmp $-4095,%eax
jae systemfive_error
#endif
0: mov %r8d,__errno(%rip)
ezlea __vforked,di
mov %r8d,0x3c(%r9) # restore errno
test %eax,%eax
jz 1f
decl (%rdi)
jns 2f # openbsd doesn't actually share mem
1: incl (%rdi)
2: ret
jnz .Lprnt
.Lchld: orb $TIB_FLAG_VFORKED,0x40(%r9)
ret
.Lprnt: andb $~TIB_FLAG_VFORKED,0x40(%r9)
ret
.endfn vfork,globl
#if SupportsBsd()
vfork.bsd:
syscall
push %rsi
jc systemfive_errno
#if SupportsXnu()
testb IsXnu()
jz 0b
neg %edx # edx is 0 for parent and 1 for child
not %edx # eax always returned with childs pid
and %edx,%eax
#endif /* XNU */
jmp 0b
.endfn vfork.bsd
#endif /* BSD */
#ifdef SYSDEBUG
.rodata.str1.1
.Llog: .ascii STRACE_PROLOGUE