cosmopolitan/libc/stdio/posix_spawn.c
2023-09-12 08:58:57 -07:00

457 lines
15 KiB
C

/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
╞══════════════════════════════════════════════════════════════════════════════╡
│ Copyright 2021 Justine Alexandra Roberts Tunney │
│ │
│ Permission to use, copy, modify, and/or distribute this software for │
│ any purpose with or without fee is hereby granted, provided that the │
│ above copyright notice and this permission notice appear in all copies. │
│ │
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
│ PERFORMANCE OF THIS SOFTWARE. │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "libc/stdio/posix_spawn.h"
#include "libc/assert.h"
#include "libc/atomic.h"
#include "libc/calls/calls.h"
#include "libc/calls/internal.h"
#include "libc/calls/ntspawn.h"
#include "libc/calls/state.internal.h"
#include "libc/calls/struct/fd.internal.h"
#include "libc/calls/struct/rlimit.h"
#include "libc/calls/struct/rlimit.internal.h"
#include "libc/calls/struct/rusage.internal.h"
#include "libc/calls/struct/sigaction.h"
#include "libc/calls/struct/sigset.h"
#include "libc/calls/struct/sigset.internal.h"
#include "libc/calls/syscall-sysv.internal.h"
#include "libc/calls/syscall_support-nt.internal.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/fmt/itoa.h"
#include "libc/fmt/magnumstrs.internal.h"
#include "libc/intrin/asan.internal.h"
#include "libc/intrin/atomic.h"
#include "libc/intrin/describeflags.internal.h"
#include "libc/intrin/handlock.internal.h"
#include "libc/intrin/kprintf.h"
#include "libc/intrin/strace.internal.h"
#include "libc/intrin/weaken.h"
#include "libc/mem/alloca.h"
#include "libc/nt/createfile.h"
#include "libc/nt/enum/processcreationflags.h"
#include "libc/nt/enum/startf.h"
#include "libc/nt/files.h"
#include "libc/nt/runtime.h"
#include "libc/nt/struct/processinformation.h"
#include "libc/nt/struct/startupinfo.h"
#include "libc/runtime/runtime.h"
#include "libc/sock/sock.h"
#include "libc/stdio/posix_spawn.h"
#include "libc/stdio/posix_spawn.internal.h"
#include "libc/stdio/stdio.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/at.h"
#include "libc/sysv/consts/f.h"
#include "libc/sysv/consts/fd.h"
#include "libc/sysv/consts/limits.h"
#include "libc/sysv/consts/o.h"
#include "libc/sysv/consts/ok.h"
#include "libc/sysv/consts/sig.h"
#include "libc/thread/thread.h"
#include "libc/thread/tls.h"
#ifndef SYSDEBUG
#define read sys_read
#define write sys_write
#define close sys_close
#define pipe2 sys_pipe2
#define getgid sys_getgid
#define setgid sys_setgid
#define getuid sys_getuid
#define setuid sys_setuid
#define setsid sys_setsid
#define setpgid sys_setpgid
#define fcntl __sys_fcntl
#define wait4 __sys_wait4
#define openat __sys_openat
#define setrlimit sys_setrlimit
#define sigprocmask sys_sigprocmask
#endif
static atomic_bool real_vfork; // i.e. not qemu/wsl/xnu/openbsd
static void posix_spawn_unhand(int64_t hands[3]) {
for (int i = 0; i < 3; ++i) {
if (hands[i] != -1) {
CloseHandle(hands[i]);
}
}
}
static void posix_spawn_inherit(int64_t hands[3], bool32 bInherit) {
for (int i = 0; i < 3; ++i) {
if (hands[i] != -1) {
SetHandleInformation(hands[i], kNtHandleFlagInherit, bInherit);
}
}
}
static textwindows errno_t posix_spawn_windows_impl(
int *pid, const char *path, const posix_spawn_file_actions_t *file_actions,
const posix_spawnattr_t *attrp, char *const argv[], char *const envp[]) {
int i;
// create file descriptor work area
char stdio_kind[3] = {kFdEmpty, kFdEmpty, kFdEmpty};
intptr_t stdio_handle[3] = {-1, -1, -1};
for (i = 0; i < 3; ++i) {
if (g_fds.p[i].kind != kFdEmpty && !(g_fds.p[i].flags & O_CLOEXEC)) {
stdio_kind[i] = g_fds.p[i].kind;
stdio_handle[i] = g_fds.p[i].handle;
}
}
// reserve a fake pid for this spawn
int child = __reservefd(-1);
// apply user file actions
intptr_t close_handle[3] = {-1, -1, -1};
if (file_actions) {
int err = 0;
for (struct _posix_faction *a = *file_actions; a && !err; a = a->next) {
switch (a->action) {
case _POSIX_SPAWN_CLOSE:
unassert(a->fildes < 3u);
stdio_kind[a->fildes] = kFdEmpty;
stdio_handle[a->fildes] = -1;
break;
case _POSIX_SPAWN_DUP2:
unassert(a->newfildes < 3u);
if (__isfdopen(a->fildes)) {
stdio_kind[a->newfildes] = g_fds.p[a->fildes].kind;
stdio_handle[a->newfildes] = g_fds.p[a->fildes].handle;
} else {
err = EBADF;
}
break;
case _POSIX_SPAWN_OPEN: {
int64_t hand;
int e = errno;
char16_t path16[PATH_MAX];
uint32_t perm, share, disp, attr;
unassert(a->fildes < 3u);
if (__mkntpathat(AT_FDCWD, a->path, 0, path16) != -1 &&
GetNtOpenFlags(a->oflag, a->mode, //
&perm, &share, &disp, &attr) != -1 &&
(hand = CreateFile(path16, perm, share, 0, disp, attr, 0))) {
stdio_kind[a->fildes] = kFdFile;
close_handle[a->fildes] = hand;
stdio_handle[a->fildes] = hand;
} else {
err = errno;
errno = e;
}
break;
}
default:
__builtin_unreachable();
}
}
if (err) {
posix_spawn_unhand(close_handle);
__releasefd(child);
return err;
}
}
// create the windows process start info
int bits;
char buf[32], *v = 0;
if (_weaken(socket)) {
for (bits = i = 0; i < 3; ++i) {
if (stdio_kind[i] == kFdSocket) {
bits |= 1 << i;
}
}
FormatInt32(stpcpy(buf, "__STDIO_SOCKETS="), bits);
v = buf;
}
struct NtStartupInfo startinfo = {
.cb = sizeof(struct NtStartupInfo),
.dwFlags = kNtStartfUsestdhandles,
.hStdInput = stdio_handle[0],
.hStdOutput = stdio_handle[1],
.hStdError = stdio_handle[2],
};
// figure out the flags
short flags = 0;
bool bInheritHandles = false;
uint32_t dwCreationFlags = 0;
for (i = 0; i < 3; ++i) {
bInheritHandles |= stdio_handle[i] != -1;
}
if (attrp && *attrp) {
flags = (*attrp)->flags;
if (flags & POSIX_SPAWN_SETSID) {
dwCreationFlags |= kNtDetachedProcess;
}
if (flags & POSIX_SPAWN_SETPGROUP) {
dwCreationFlags |= kNtCreateNewProcessGroup;
}
}
// launch the process
int rc, e = errno;
struct NtProcessInformation procinfo;
if (!envp) envp = environ;
__hand_rlock();
posix_spawn_inherit(stdio_handle, true);
rc = ntspawn(path, argv, envp, v, 0, 0, bInheritHandles, dwCreationFlags, 0,
&startinfo, &procinfo);
posix_spawn_inherit(stdio_handle, false);
posix_spawn_unhand(close_handle);
__hand_runlock();
if (rc == -1) {
int err = errno;
__releasefd(child);
errno = e;
return err;
}
// track the process
CloseHandle(procinfo.hThread);
g_fds.p[child].kind = kFdProcess;
g_fds.p[child].handle = procinfo.hProcess;
g_fds.p[child].flags = O_CLOEXEC;
g_fds.p[child].zombie = false;
// return the result
if (pid) *pid = child;
return 0;
}
static const char *DescribePid(char buf[12], int err, int *pid) {
if (err) return "n/a";
if (!pid) return "NULL";
FormatInt32(buf, *pid);
return buf;
}
static textwindows dontinline errno_t posix_spawn_windows(
int *pid, const char *path, const posix_spawn_file_actions_t *file_actions,
const posix_spawnattr_t *attrp, char *const argv[], char *const envp[]) {
int err;
if (!path || !argv ||
(IsAsan() && (!__asan_is_valid_str(path) || //
!__asan_is_valid_strlist(argv) || //
(envp && !__asan_is_valid_strlist(envp))))) {
err = EFAULT;
} else {
err = posix_spawn_windows_impl(pid, path, file_actions, attrp, argv, envp);
}
STRACE("posix_spawn([%s], %#s, %s, %s) → %s",
DescribePid(alloca(12), err, pid), path, DescribeStringList(argv),
DescribeStringList(envp), !err ? "0" : _strerrno(err));
return err;
}
/**
* Spawns process, the POSIX way.
*
* This provides superior process creation performance across systems
* Processes are normally spawned by calling fork() and execve(), but
* that goes slow on Windows if the caller has allocated a nontrivial
* number of memory mappings, all of which need to be copied into the
* forked child, only to be destroyed a moment later. On UNIX systems
* fork() bears a similar cost that's 100x less bad, which is copying
* the page tables. So what this implementation does is on Windows it
* calls CreateProcess() directly and on UNIX it uses vfork() if it's
* possible (XNU and OpenBSD don't have it). On UNIX this API has the
* benefit of avoiding the footguns of using vfork() directly because
* this implementation will ensure signal handlers can't be called in
* the child process since that'd likely corrupt the parent's memory.
*
* @param pid if non-null shall be set to child pid on success
* @param path is resolved path of program which is not `$PATH` searched
* @param file_actions specifies close(), dup2(), and open() operations
* @param attrp specifies signal masks, user ids, scheduling, etc.
* @param envp is environment variables, or `environ` if null
* @return 0 on success or error number on failure
* @see posix_spawnp() for `$PATH` searching
* @returnserrno
* @tlsrequired
* @threadsafe
*/
errno_t posix_spawn(int *pid, const char *path,
const posix_spawn_file_actions_t *file_actions,
const posix_spawnattr_t *attrp, char *const argv[],
char *const envp[]) {
if (IsWindows()) {
return posix_spawn_windows(pid, path, file_actions, attrp, argv, envp);
}
int pfds[2];
bool use_pipe;
volatile int status = 0;
sigset_t blockall, oldmask;
int child, res, cs, e = errno;
volatile bool can_clobber = false;
sigfillset(&blockall);
sigprocmask(SIG_SETMASK, &blockall, &oldmask);
pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
if ((use_pipe = !atomic_load_explicit(&real_vfork, memory_order_acquire))) {
if (pipe2(pfds, O_CLOEXEC)) {
res = errno;
goto ParentFailed;
}
}
if (!(child = vfork())) {
can_clobber = true;
sigset_t *childmask;
bool lost_cloexec = 0;
struct sigaction dfl = {0};
short flags = attrp && *attrp ? (*attrp)->flags : 0;
if (use_pipe) close(pfds[0]);
for (int sig = 1; sig < _NSIG; sig++) {
if (__sighandrvas[sig] != (long)SIG_DFL &&
(__sighandrvas[sig] != (long)SIG_IGN ||
((flags & POSIX_SPAWN_SETSIGDEF) &&
sigismember(&(*attrp)->sigdefault, sig) == 1))) {
sigaction(sig, &dfl, 0);
}
}
if (flags & POSIX_SPAWN_SETSID) {
setsid();
}
if ((flags & POSIX_SPAWN_SETPGROUP) && setpgid(0, (*attrp)->pgroup)) {
goto ChildFailed;
}
if ((flags & POSIX_SPAWN_RESETIDS) && setgid(getgid())) {
goto ChildFailed;
}
if ((flags & POSIX_SPAWN_RESETIDS) && setuid(getuid())) {
goto ChildFailed;
}
if (file_actions) {
struct _posix_faction *a;
for (a = *file_actions; a; a = a->next) {
if (use_pipe && pfds[1] == a->fildes) {
int p2;
if ((p2 = dup(pfds[1])) == -1) {
goto ChildFailed;
}
lost_cloexec = true;
close(pfds[1]);
pfds[1] = p2;
}
switch (a->action) {
case _POSIX_SPAWN_CLOSE:
if (close(a->fildes)) {
goto ChildFailed;
}
break;
case _POSIX_SPAWN_DUP2:
if (dup2(a->fildes, a->newfildes) == -1) {
goto ChildFailed;
}
break;
case _POSIX_SPAWN_OPEN: {
int t;
if ((t = openat(AT_FDCWD, a->path, a->oflag, a->mode)) == -1) {
goto ChildFailed;
}
if (t != a->fildes) {
if (dup2(t, a->fildes) == -1) {
close(t);
goto ChildFailed;
}
if (close(t)) {
goto ChildFailed;
}
}
break;
}
default:
__builtin_unreachable();
}
}
}
if (IsLinux() || IsFreebsd() || IsNetbsd()) {
if (flags & POSIX_SPAWN_SETSCHEDULER) {
if (sched_setscheduler(0, (*attrp)->schedpolicy,
&(*attrp)->schedparam) == -1) {
goto ChildFailed;
}
}
if (flags & POSIX_SPAWN_SETSCHEDPARAM) {
if (sched_setparam(0, &(*attrp)->schedparam)) {
goto ChildFailed;
}
}
}
if (flags & POSIX_SPAWN_SETRLIMIT) {
for (int rez = 0; rez <= ARRAYLEN((*attrp)->rlim); ++rez) {
if ((*attrp)->rlimset & (1u << rez)) {
if (setrlimit(rez, (*attrp)->rlim + rez)) {
goto ChildFailed;
}
}
}
}
if (lost_cloexec) {
fcntl(pfds[1], F_SETFD, FD_CLOEXEC);
}
if (flags & POSIX_SPAWN_SETSIGMASK) {
childmask = &(*attrp)->sigmask;
} else {
childmask = &oldmask;
}
sigprocmask(SIG_SETMASK, childmask, 0);
if (!envp) envp = environ;
execve(path, argv, envp);
ChildFailed:
res = errno;
if (!use_pipe) {
status = res;
} else {
write(pfds[1], &res, sizeof(res));
}
_Exit(127);
}
if (use_pipe) {
close(pfds[1]);
}
if (child != -1) {
if (!use_pipe) {
res = status;
} else {
if (can_clobber) {
atomic_store_explicit(&real_vfork, true, memory_order_release);
}
res = 0;
read(pfds[0], &res, sizeof(res));
}
if (!res) {
if (pid) *pid = child;
} else {
wait4(child, 0, 0, 0);
}
} else {
res = errno;
}
if (use_pipe) {
close(pfds[0]);
}
ParentFailed:
sigprocmask(SIG_SETMASK, &oldmask, 0);
pthread_setcancelstate(cs, 0);
errno = e;
return res;
}