From ed8fadea37032979e5b37ff455826d0311aff7a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C5=8Dshin?= Date: Mon, 4 Dec 2023 15:45:46 -0500 Subject: [PATCH] Keep argv[0], add COSMOPOLITAN_PROGRAM_EXECUTABLE (#980) * Introduce env.com Handy tool for debugging environment issues. * Inject path as COSMOPOLITAN_PROGRAM_EXECUTABLE `argv[0]` was previously being used as a communication channel between the loader and the binary, giving the binary its full path for use e.g. in `GetProgramExecutableName`. But `argv[0]` is not a good channel for this; much of what made 2a3813c6 so gross is due to that. This change fixes the issue by preserving `argv[0]` and establishing a new communication channel: `COSMOPOLITAN_PROGRAM_EXECUTABLE`. The M1 loader will always set this as the first variable. Linux should soon follow. On the other side, `GetProgramExecutableName` checks that variable first. If it sees it, it trusts it as-is. A lot of the churn in `ape/ape-m1.c` in this change is actually backing out hacks introduced in 2a3813c6; the best comparison is: git diff 2a3813c6^.. --- ape/ape-m1.c | 82 +++++++------- examples/env.c | 10 ++ libc/calls/getprogramexecutablename.greg.c | 10 ++ .../calls/getprogramexecutablename_test.c | 104 ++++++++++++++++++ 4 files changed, 168 insertions(+), 38 deletions(-) create mode 100644 examples/env.c create mode 100644 test/libc/calls/getprogramexecutablename_test.c diff --git a/ape/ape-m1.c b/ape/ape-m1.c index 4fdfbf324..8ad15ed2b 100644 --- a/ape/ape-m1.c +++ b/ape/ape-m1.c @@ -16,6 +16,7 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include #include #include #include @@ -35,6 +36,10 @@ #include #define pagesz 16384 +#define VARNAME "COSMOPOLITAN_PROGRAM_EXECUTABLE=" +#define VARSIZE (sizeof(VARNAME) - 1) +/* maximum path size that cosmo can take */ +#define PATHSIZE (PATH_MAX < 1024 ? PATH_MAX : 1024) #define SYSLIB_MAGIC ('s' | 'l' << 8 | 'i' << 16 | 'b' << 24) #define SYSLIB_VERSION 8 @@ -198,8 +203,11 @@ struct PathSearcher { unsigned long namelen; const char *name; const char *syspath; - char path[1024]; + char varname[VARSIZE]; + char path[PATHSIZE]; }; +_Static_assert(offsetof(struct PathSearcher, varname) + VARSIZE == + offsetof(struct PathSearcher, path), "struct layout"); struct ApeLoader { struct PathSearcher ps; @@ -313,7 +321,14 @@ __attribute__((__noreturn__)) static void Pexit(const char *c, int failed, } static char AccessCommand(struct PathSearcher *ps, unsigned long pathlen) { - if (pathlen + 1 + ps->namelen + 1 > sizeof(ps->path)) return 0; + if (!pathlen && *ps->name != '/') { + if (!getcwd(ps->path, sizeof(ps->path) - 1 - ps->namelen)) { + Pexit("getcwd", -errno, "failed"); + } + pathlen = strlen(ps->path); + } else if (pathlen + 1 + ps->namelen + 1 > sizeof(ps->path)) { + return 0; + } if (pathlen && ps->path[pathlen - 1] != '/') ps->path[pathlen++] = '/'; memmove(ps->path + pathlen, ps->name, ps->namelen); ps->path[pathlen + ps->namelen] = 0; @@ -884,8 +899,9 @@ int main(int argc, char **argv, char **envp) { struct ApeLoader *M; long *sp, *sp2, *auxv; union ElfEhdrBuf *ebuf; - int c, islogin, n, fd, rc; - char *p, *pe, *dash_l, *exe, *prog, *shell, *execfn; + int c, n, fd, rc; + char *p, *pe, *exe, *prog, *shell, *execfn; + char **varpos; /* allocate loader memory in program's arg block */ n = sizeof(struct ApeLoader); @@ -947,24 +963,15 @@ int main(int argc, char **argv, char **envp) { M->lib.dlclose = dlclose; M->lib.dlerror = dlerror; - /* there is a common convention of shells being told that they - are login shells via the OS prepending a - to their argv[0]. - the APE system doesn't like it when argv[0] is not the full - path of the binary. to rectify this, the loader puts a "-l" - flag in argv[1] and ignores the dash. */ - if ((islogin = argc > 0 && *argv[0] == '-' && (shell = GetEnv(envp, "SHELL")) - && !StrCmp(argv[0] + 1, BaseName(shell)))) { - execfn = shell; - dash_l = __builtin_alloca(3); - memmove(dash_l, "-l", 3); - } else { - execfn = argc > 0 ? argv[0] : 0; - } - /* getenv("_") is close enough to at_execfn */ + execfn = argc > 0 ? argv[0] : 0; + varpos = 0; for (i = 0; envp[i]; ++i) { if (envp[i][0] == '_' && envp[i][1] == '=') { execfn = envp[i] + 2; + } else if (!memcmp(VARNAME, envp[i], VARSIZE)) { + assert(!varpos); + varpos = envp + i; } } @@ -975,24 +982,22 @@ int main(int argc, char **argv, char **envp) { /* create new bottom of stack for spawned program system v abi aligns this on a 16-byte boundary grows down the alloc by poking the guard pages */ - n = (auxv - sp + islogin + AUXV_WORDS + 1) * sizeof(long); + n = (auxv - sp + !varpos + AUXV_WORDS + 1) * sizeof(long); sp2 = (long *)__builtin_alloca(n); if ((long)sp2 & 15) ++sp2; for (; n > 0; n -= pagesz) { ((char *)sp2)[n - 1] = 0; } - if (islogin) { - memmove(sp2, sp, 2 * sizeof(long)); - *((char **)sp2 + 2) = dash_l; - memmove(sp2 + 3, sp + 2, (auxv - sp - 2) * sizeof(long)); - ++argc; - sp2[0] = argc; - } else { - memmove(sp2, sp, (auxv - sp) * sizeof(long)); - } + memmove(sp2, sp, (auxv - sp) * sizeof(long)); argv = (char **)(sp2 + 1); envp = (char **)(sp2 + 1 + argc + 1); + if (varpos) { + varpos = (char **)((long *)varpos - sp + sp2); + } else { + varpos = envp + i++; + *(envp + i) = 0; + } auxv = (long *)(envp + i + 1); sp = sp2; @@ -1008,13 +1013,14 @@ int main(int argc, char **argv, char **envp) { but it will if you say: ln -sf /usr/local/bin/ape /opt/cosmos/bin/bash.ape and then use #!/opt/cosmos/bin/bash.ape instead. */ - prog = (char *)sp[1]; + if (*argv[0] == '-' && (shell = GetEnv(envp, "SHELL")) && + !StrCmp(argv[0] + 1, BaseName(shell))) { + execfn = prog = shell; + } else { + prog = (char *)sp[1]; + } argc = sp[0]; argv = (char **)(sp + 1); - if (islogin) { - ++argv[0]; - prog = shell; - } } else if ((M->ps.literally = argc >= 3 && !StrCmp(argv[1], "-"))) { /* if the first argument is a hyphen then we give the user the power to change argv[0] or omit it entirely. most operating @@ -1056,11 +1062,11 @@ int main(int argc, char **argv, char **envp) { } pe = ebuf->buf + rc; - /* resolve argv[0] to reflect path search */ - if (argc > 0 && ((*prog != '/' && *exe == '/' && !StrCmp(prog, argv[0])) || - M->ps.indirect || !StrCmp(BaseName(prog), argv[0]))) { - argv[0] = exe; - } + /* inject program executable as first environment variable, + swapping the old first variable for it. */ + memmove(M->ps.varname, VARNAME, VARSIZE); + *varpos = *envp; + *envp = M->ps.varname; /* generate some hard random data */ if ((rc = sys_getentropy(M->rando, sizeof(M->rando))) < 0) { diff --git a/examples/env.c b/examples/env.c new file mode 100644 index 000000000..7fc971663 --- /dev/null +++ b/examples/env.c @@ -0,0 +1,10 @@ +#include "libc/stdio/stdio.h" +#include "libc/runtime/runtime.h" + +int main(int argc, char* argv[]) { + printf("%s\n", argv[0]); + for (char **p = environ; *p; ++p) { + printf(" %s\n", *p); + } + return 0; +} diff --git a/libc/calls/getprogramexecutablename.greg.c b/libc/calls/getprogramexecutablename.greg.c index 983e91b2b..ab72f1727 100644 --- a/libc/calls/getprogramexecutablename.greg.c +++ b/libc/calls/getprogramexecutablename.greg.c @@ -36,6 +36,8 @@ #define KERN_PROC 14 #define KERN_PROC_PATHNAME_FREEBSD 12 #define KERN_PROC_PATHNAME_NETBSD 5 +#define VARNAME "COSMOPOLITAN_PROGRAM_EXECUTABLE=" +#define VARSIZE (sizeof(VARNAME) - 1) static struct { atomic_uint once; @@ -77,6 +79,13 @@ static inline void InitProgramExecutableNameImpl(void) { goto CopyString; } + /* new-style loader supplies the full program path as the first + environment variable; if it is defined, trust it as-is. */ + if (*__envp && !strncmp(*__envp, VARNAME, VARSIZE)) { + strlcpy(g_prog.u.buf, *__envp + VARSIZE, sizeof(g_prog.u.buf)); + return; + } + // if argv[0] exists then turn it into an absolute path. we also try // adding a .com suffix since the ape auto-appends it when resolving if ((q = __argv[0])) { @@ -146,6 +155,7 @@ static inline void InitProgramExecutableNameImpl(void) { } } *p = 0; + return; } // if we don't even have that then empty the string diff --git a/test/libc/calls/getprogramexecutablename_test.c b/test/libc/calls/getprogramexecutablename_test.c new file mode 100644 index 000000000..4d43e1454 --- /dev/null +++ b/test/libc/calls/getprogramexecutablename_test.c @@ -0,0 +1,104 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2023 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/calls/calls.h" +#include "libc/dce.h" +#include "libc/limits.h" +#include "libc/runtime/runtime.h" +#include "libc/stdio/stdio.h" +#include "libc/str/str.h" +#include "libc/sysv/consts/o.h" +#include "libc/testlib/subprocess.h" +#include "libc/testlib/testlib.h" + +static char *self; +static bool skipcosmotests; + +void SetUp(void) { + self = GetProgramExecutableName(); +} + +void SetUpOnce(void) { + if (!getenv("COSMOPOLITAN_PROGRAM_EXECUTABLE")) { + fprintf(stderr, + "warning: old ape loader detected; skipping some tests %m\n"); + skipcosmotests = true; + } + testlib_enable_tmp_setup_teardown(); +} + +__attribute__((__constructor__)) static void Child(int argc, char *argv[]) { + if (argc >= 2 && !strcmp(argv[1], "Child")) { + ASSERT_EQ(3, argc); + EXPECT_STREQ(argv[2], GetProgramExecutableName()); + exit(0); + } +} + +TEST(GetProgramExecutableName, ofThisFile) { + EXPECT_EQ('/', *self); + EXPECT_TRUE( + endswith(self, "test/libc/calls/getprogramexecutablename_test.com")); +} + +TEST(GetProgramExecutableName, nullEnv) { + SPAWN(fork); + execve(self, (char *[]){self, "Child", self, 0}, (char *[]){ 0 }); + EXITS(0); +} + +TEST(GetProramExecutableName, weirdArgv0NullEnv) { + SPAWN(fork); + execve(self, (char *[]){"hello", "Child", self, 0}, (char *[]){ 0 }); + EXITS(0); +} + +TEST(GetProgramExecutableName, weirdArgv0CosmoVar) { + if (skipcosmotests) return; + char buf[32 + PATH_MAX]; + stpcpy(stpcpy(buf, "COSMOPOLITAN_PROGRAM_EXECUTABLE="), self); + SPAWN(fork); + execve(self, (char *[]){"hello", "Child", self, 0}, (char *[]){ buf, 0}); + EXITS(0); +} + +TEST(GetProgramExecutableName, weirdArgv0WrongCosmoVar) { + if (skipcosmotests) return; + char *bad = "COSMOPOLITAN_PROGRAM_EXECUTABLE=hi"; + SPAWN(fork); + execve(self, (char *[]){"hello", "Child", self, 0}, (char *[]){ bad, 0}); + EXITS(0); +} + +TEST(GetProgramExecutableName, MovedSelf) { + char buf[BUFSIZ]; + ASSERT_SYS(0, 3, open(GetProgramExecutableName(), O_RDONLY)); + ASSERT_SYS(0, 4, creat("test", 0755)); + ssize_t rc; + while ((rc = read(3, buf, BUFSIZ)) > 0) { + ASSERT_SYS(0, rc, write(4, buf, rc)); + } + ASSERT_EQ(0, rc); + ASSERT_SYS(0, 0, close(4)); + ASSERT_SYS(0, 0, close(3)); + ASSERT_NE(NULL, getcwd(buf, BUFSIZ - 5)); + stpcpy(buf + strlen(buf), "/test"); + SPAWN(fork); + execve(buf, (char *[]){"hello", "Child", buf, 0}, (char *[]){ 0 }); + EXITS(0); +}