Keep argv[0], add COSMOPOLITAN_PROGRAM_EXECUTABLE (#980)

* Introduce env.com

Handy tool for debugging environment issues.

* Inject path as COSMOPOLITAN_PROGRAM_EXECUTABLE

`argv[0]` was previously being used as a communication channel between
the loader and the binary, giving the binary its full path for use e.g.
in `GetProgramExecutableName`. But `argv[0]` is not a good channel for
this; much of what made 2a3813c6 so gross is due to that.

This change fixes the issue by preserving `argv[0]` and establishing a
new communication channel: `COSMOPOLITAN_PROGRAM_EXECUTABLE`.

The M1 loader will always set this as the first variable. Linux should
soon follow. On the other side, `GetProgramExecutableName` checks that
variable first. If it sees it, it trusts it as-is.

A lot of the churn in `ape/ape-m1.c` in this change is actually backing
out hacks introduced in 2a3813c6; the best comparison is:

    git diff 2a3813c6^..
This commit is contained in:
Jōshin 2023-12-04 15:45:46 -05:00 committed by GitHub
parent 2a3813c6cf
commit ed8fadea37
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 168 additions and 38 deletions

View file

@ -16,6 +16,7 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include <assert.h>
#include <dispatch/dispatch.h>
#include <dlfcn.h>
#include <errno.h>
@ -35,6 +36,10 @@
#include <unistd.h>
#define pagesz 16384
#define VARNAME "COSMOPOLITAN_PROGRAM_EXECUTABLE="
#define VARSIZE (sizeof(VARNAME) - 1)
/* maximum path size that cosmo can take */
#define PATHSIZE (PATH_MAX < 1024 ? PATH_MAX : 1024)
#define SYSLIB_MAGIC ('s' | 'l' << 8 | 'i' << 16 | 'b' << 24)
#define SYSLIB_VERSION 8
@ -198,8 +203,11 @@ struct PathSearcher {
unsigned long namelen;
const char *name;
const char *syspath;
char path[1024];
char varname[VARSIZE];
char path[PATHSIZE];
};
_Static_assert(offsetof(struct PathSearcher, varname) + VARSIZE ==
offsetof(struct PathSearcher, path), "struct layout");
struct ApeLoader {
struct PathSearcher ps;
@ -313,7 +321,14 @@ __attribute__((__noreturn__)) static void Pexit(const char *c, int failed,
}
static char AccessCommand(struct PathSearcher *ps, unsigned long pathlen) {
if (pathlen + 1 + ps->namelen + 1 > sizeof(ps->path)) return 0;
if (!pathlen && *ps->name != '/') {
if (!getcwd(ps->path, sizeof(ps->path) - 1 - ps->namelen)) {
Pexit("getcwd", -errno, "failed");
}
pathlen = strlen(ps->path);
} else if (pathlen + 1 + ps->namelen + 1 > sizeof(ps->path)) {
return 0;
}
if (pathlen && ps->path[pathlen - 1] != '/') ps->path[pathlen++] = '/';
memmove(ps->path + pathlen, ps->name, ps->namelen);
ps->path[pathlen + ps->namelen] = 0;
@ -884,8 +899,9 @@ int main(int argc, char **argv, char **envp) {
struct ApeLoader *M;
long *sp, *sp2, *auxv;
union ElfEhdrBuf *ebuf;
int c, islogin, n, fd, rc;
char *p, *pe, *dash_l, *exe, *prog, *shell, *execfn;
int c, n, fd, rc;
char *p, *pe, *exe, *prog, *shell, *execfn;
char **varpos;
/* allocate loader memory in program's arg block */
n = sizeof(struct ApeLoader);
@ -947,24 +963,15 @@ int main(int argc, char **argv, char **envp) {
M->lib.dlclose = dlclose;
M->lib.dlerror = dlerror;
/* there is a common convention of shells being told that they
are login shells via the OS prepending a - to their argv[0].
the APE system doesn't like it when argv[0] is not the full
path of the binary. to rectify this, the loader puts a "-l"
flag in argv[1] and ignores the dash. */
if ((islogin = argc > 0 && *argv[0] == '-' && (shell = GetEnv(envp, "SHELL"))
&& !StrCmp(argv[0] + 1, BaseName(shell)))) {
execfn = shell;
dash_l = __builtin_alloca(3);
memmove(dash_l, "-l", 3);
} else {
execfn = argc > 0 ? argv[0] : 0;
}
/* getenv("_") is close enough to at_execfn */
execfn = argc > 0 ? argv[0] : 0;
varpos = 0;
for (i = 0; envp[i]; ++i) {
if (envp[i][0] == '_' && envp[i][1] == '=') {
execfn = envp[i] + 2;
} else if (!memcmp(VARNAME, envp[i], VARSIZE)) {
assert(!varpos);
varpos = envp + i;
}
}
@ -975,24 +982,22 @@ int main(int argc, char **argv, char **envp) {
/* create new bottom of stack for spawned program
system v abi aligns this on a 16-byte boundary
grows down the alloc by poking the guard pages */
n = (auxv - sp + islogin + AUXV_WORDS + 1) * sizeof(long);
n = (auxv - sp + !varpos + AUXV_WORDS + 1) * sizeof(long);
sp2 = (long *)__builtin_alloca(n);
if ((long)sp2 & 15) ++sp2;
for (; n > 0; n -= pagesz) {
((char *)sp2)[n - 1] = 0;
}
if (islogin) {
memmove(sp2, sp, 2 * sizeof(long));
*((char **)sp2 + 2) = dash_l;
memmove(sp2 + 3, sp + 2, (auxv - sp - 2) * sizeof(long));
++argc;
sp2[0] = argc;
} else {
memmove(sp2, sp, (auxv - sp) * sizeof(long));
}
memmove(sp2, sp, (auxv - sp) * sizeof(long));
argv = (char **)(sp2 + 1);
envp = (char **)(sp2 + 1 + argc + 1);
if (varpos) {
varpos = (char **)((long *)varpos - sp + sp2);
} else {
varpos = envp + i++;
*(envp + i) = 0;
}
auxv = (long *)(envp + i + 1);
sp = sp2;
@ -1008,13 +1013,14 @@ int main(int argc, char **argv, char **envp) {
but it will if you say:
ln -sf /usr/local/bin/ape /opt/cosmos/bin/bash.ape
and then use #!/opt/cosmos/bin/bash.ape instead. */
prog = (char *)sp[1];
if (*argv[0] == '-' && (shell = GetEnv(envp, "SHELL")) &&
!StrCmp(argv[0] + 1, BaseName(shell))) {
execfn = prog = shell;
} else {
prog = (char *)sp[1];
}
argc = sp[0];
argv = (char **)(sp + 1);
if (islogin) {
++argv[0];
prog = shell;
}
} else if ((M->ps.literally = argc >= 3 && !StrCmp(argv[1], "-"))) {
/* if the first argument is a hyphen then we give the user the
power to change argv[0] or omit it entirely. most operating
@ -1056,11 +1062,11 @@ int main(int argc, char **argv, char **envp) {
}
pe = ebuf->buf + rc;
/* resolve argv[0] to reflect path search */
if (argc > 0 && ((*prog != '/' && *exe == '/' && !StrCmp(prog, argv[0])) ||
M->ps.indirect || !StrCmp(BaseName(prog), argv[0]))) {
argv[0] = exe;
}
/* inject program executable as first environment variable,
swapping the old first variable for it. */
memmove(M->ps.varname, VARNAME, VARSIZE);
*varpos = *envp;
*envp = M->ps.varname;
/* generate some hard random data */
if ((rc = sys_getentropy(M->rando, sizeof(M->rando))) < 0) {