Remove realpath/getcwd from loaders (#1024)

This implements proposals 1 and 2a from this gist:

https://gist.github.com/mrdomino/2222cab61715fd527e82e036ba4156b1

The only reason to use realpath from the loader was to try to prevent a
TOCTOU between the loader and the binary. But this is only a real issue
in set-id contexts, and in those cases there is already a canonical way
to do it: `/dev/fd`, passed by the kernel to the loader, so all we have
to do is pass that along to the binary.

Aside from realpath, there is no reason to absolutize the path we supply
to the binary, since it can call `getcwd` as well as we can, and on non-
M1 the binary is in a much better position to make that call.

Since we no longer absolutize the path, the binary does need to do this,
so we make its argv-parsing code generic and apply that to the different
possible places the path could come from. This means that `_` is finally
usable as a relative path, as a nice side benefit.

The M1 realpath code had a significant bug - it uses the wrong offset to
truncate the `.ape` in the `$prog.ape` case.

This PR also fixes a regression in `ape $progname` out of `$PATH` on the
two BSDs (Free and Net) that did not implement `RealPath`.
This commit is contained in:
Jōshin 2023-12-18 15:01:16 -05:00 committed by GitHub
parent f73576ab8a
commit 2a11a09d98
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 69 additions and 96 deletions

View file

@ -316,21 +316,12 @@ __attribute__((__noreturn__)) static void Pexit(const char *c, int failed,
}
static char AccessCommand(struct PathSearcher *ps, unsigned long pathlen) {
char buf[PATH_MAX];
size_t n;
if (pathlen + 1 + ps->namelen + 1 > sizeof(ps->path)) {
return 0;
}
if (pathlen && ps->path[pathlen - 1] != '/') ps->path[pathlen++] = '/';
memmove(ps->path + pathlen, ps->name, ps->namelen);
ps->path[pathlen + ps->namelen] = 0;
if (!realpath(ps->path, buf)) {
Pexit(ps->path, -errno, "realpath");
}
if ((n = strlen(buf)) >= sizeof(ps->path)) {
Pexit(buf, 0, "too long");
}
memcpy(ps->path, buf, n + 1);
if (!access(ps->path, X_OK)) {
if (ps->indirect) {
ps->namelen -= 4;

View file

@ -545,40 +545,6 @@ __attribute__((__noreturn__)) static void Pexit(int os, const char *c, int rc,
Exit(127, os);
}
#define PSFD "/proc/self/fd/"
static int RealPath(int os, int fd, char *path, char **resolved) {
char buf[PATH_MAX];
int rc;
if (IsLinux()) {
char psfd[sizeof(PSFD) + 19];
MemMove(psfd, PSFD, sizeof(PSFD) - 1);
Utoa(psfd + sizeof(PSFD) - 1, fd);
rc = SystemCall(-100, (long)psfd, (long)buf, PATH_MAX, 0, 0, 0,
IsAarch64() ? 78 : 267);
if (rc >= 0) {
if (rc == PATH_MAX) {
rc = -36;
} else {
buf[rc] = 0;
}
}
} else if (IsXnu()) {
rc = SystemCall(fd, 50, (long)buf, 0, 0, 0, 0, 92 | 0x2000000);
} else if (IsOpenbsd()) {
rc = SystemCall((long)path, (long)buf, 0, 0, 0, 0, 0, 115);
} else {
*resolved = 0;
return 0;
}
if (rc >= 0) {
MemMove(path, buf, StrLen(buf) + 1);
*resolved = path;
rc = 0;
}
return rc;
}
static char AccessCommand(struct PathSearcher *ps, unsigned long pathlen) {
if (pathlen + 1 + ps->namelen + 1 > sizeof(ps->path)) return 0;
if (pathlen && ps->path[pathlen - 1] != '/') ps->path[pathlen++] = '/';
@ -644,9 +610,8 @@ static char *Commandv(struct PathSearcher *ps, int os, const char *name,
}
}
__attribute__((__noreturn__)) static void Spawn(int os, const char *exe,
char *path, int fd, long *sp,
unsigned long pagesz,
__attribute__((__noreturn__)) static void Spawn(int os, char *exe, int fd,
long *sp, unsigned long pagesz,
struct ElfEhdr *e,
struct ElfPhdr *p) {
long rc;
@ -803,12 +768,12 @@ __attribute__((__noreturn__)) static void Spawn(int os, const char *exe,
Msyscall(dynbase + code, codesize, os);
/* call program entrypoint */
Launch(IsFreebsd() ? sp : 0, dynbase + e->e_entry, path, sp, os);
Launch(IsFreebsd() ? sp : 0, dynbase + e->e_entry, exe, sp, os);
}
static const char *TryElf(struct ApeLoader *M, union ElfEhdrBuf *ebuf,
const char *exe, char *path, int fd, long *sp,
long *auxv, unsigned long pagesz, int os) {
char *exe, int fd, long *sp, long *auxv,
unsigned long pagesz, int os) {
long i, rc;
unsigned size;
struct ElfEhdr *e;
@ -923,7 +888,7 @@ static const char *TryElf(struct ApeLoader *M, union ElfEhdrBuf *ebuf,
}
/* we're now ready to load */
Spawn(os, exe, path, fd, sp, pagesz, e, p);
Spawn(os, exe, fd, sp, pagesz, e, p);
}
__attribute__((__noreturn__)) static void ShowUsage(int os, int fd, int rc) {
@ -1081,8 +1046,6 @@ EXTERN_C __attribute__((__noreturn__)) void ApeLoader(long di, long *sp,
Pexit(os, prog, 0, "not found (maybe chmod +x or ./ needed)");
} else if ((fd = Open(exe, O_RDONLY, 0, os)) < 0) {
Pexit(os, exe, fd, "open");
} else if ((rc = RealPath(os, fd, exe, &prog)) < 0) {
Pexit(os, exe, rc, "realpath");
} else if ((rc = Pread(fd, ebuf->buf, sizeof(ebuf->buf), 0, os)) < 0) {
Pexit(os, exe, rc, "read");
} else if ((unsigned long)rc < sizeof(ebuf->ehdr)) {
@ -1122,9 +1085,9 @@ EXTERN_C __attribute__((__noreturn__)) void ApeLoader(long di, long *sp,
}
}
if (i >= sizeof(ebuf->ehdr)) {
TryElf(M, ebuf, exe, prog, fd, sp, auxv, pagesz, os);
TryElf(M, ebuf, exe, fd, sp, auxv, pagesz, os);
}
}
}
Pexit(os, exe, 0, TryElf(M, ebuf, exe, prog, fd, sp, auxv, pagesz, os));
Pexit(os, exe, 0, TryElf(M, ebuf, exe, fd, sp, auxv, pagesz, os));
}

View file

@ -63,22 +63,52 @@ static inline int AllNumDot(const char *s) {
}
}
static int IsApeLoader(char *s) {
// old loaders do not pass __program_executable_name, so we need to
// check for them when we use KERN_PROC_PATHNAME et al.
static int OldApeLoader(char *s) {
char *b;
return !strcmp(s, "/usr/bin/ape") ||
(!strncmp((b = basename(s)), ".ape-", 5) &&
AllNumDot(b + 5));
}
// if q exists then turn it into an absolute path. we also try adding
// a .com suffix since the ape auto-appends it when resolving
static int TryPath(const char *q, int com) {
char c, *p, *e;
if (!q) return 0;
p = g_prog.u.buf;
e = p + sizeof(g_prog.u.buf);
if (*q != '/') {
if (q[0] == '.' && q[1] == '/') {
q += 2;
}
int got = __getcwd(p, e - p - 1 /* '/' */ - com * 4);
if (got != -1) {
p += got - 1;
*p++ = '/';
}
}
while ((c = *q++)) {
if (p + com * 4 + 1 /* nul */ < e) {
*p++ = c;
} else {
return 0;
}
}
*p = 0;
if (!sys_faccessat(AT_FDCWD, g_prog.u.buf, F_OK, 0)) return 1;
p = WRITE32LE(p, READ32LE(".com"));
*p = 0;
if (!sys_faccessat(AT_FDCWD, g_prog.u.buf, F_OK, 0)) return 1;
return 0;
}
static inline void InitProgramExecutableNameImpl(void) {
size_t n;
ssize_t got;
char c, *q, *b;
if (__program_executable_name) {
/* already set by the loader */
return;
}
if (IsWindows()) {
int n = GetModuleFileName(0, g_prog.u.buf16, ARRAYLEN(g_prog.u.buf16));
for (int i = 0; i < n; ++i) {
@ -103,6 +133,22 @@ static inline void InitProgramExecutableNameImpl(void) {
return;
}
// loader passed us a path. it may be relative.
if (__program_executable_name) {
if (*__program_executable_name == '/') {
return;
}
if (TryPath(__program_executable_name, 0)) {
goto UseBuf;
}
/* if TryPath fails, it probably failed because getcwd() was too long.
we are out of options now; KERN_PROC_PATHNAME et al will return the
name of the loader not the binary, and argv et al will at best have
the same problem. just use the relative path we got from the loader
as-is, and accept that if we chdir then things will break. */
return;
}
b = g_prog.u.buf;
n = sizeof(g_prog.u.buf) - 1;
if (IsFreebsd() || IsNetbsd()) {
@ -116,7 +162,7 @@ static inline void InitProgramExecutableNameImpl(void) {
}
cmd[3] = -1; // current process
if (sys_sysctl(cmd, ARRAYLEN(cmd), b, &n, 0, 0) != -1) {
if (!IsApeLoader(b)) {
if (!OldApeLoader(b)) {
goto UseBuf;
}
}
@ -125,54 +171,27 @@ static inline void InitProgramExecutableNameImpl(void) {
if ((got = sys_readlinkat(AT_FDCWD, "/proc/self/exe", b, n)) > 0 ||
(got = sys_readlinkat(AT_FDCWD, "/proc/curproc/file", b, n)) > 0) {
b[got] = 0;
if (!IsApeLoader(b)) {
if (!OldApeLoader(b)) {
goto UseBuf;
}
}
}
// don't trust argument parsing if set-id.
if (issetugid()) {
/* give up prior to using less secure methods */
goto UseEmpty;
}
// if argv[0] exists then turn it into an absolute path. we also try
// adding a .com suffix since the ape auto-appends it when resolving
if ((q = __argv[0])) {
char *p = g_prog.u.buf;
char *e = p + sizeof(g_prog.u.buf);
if (*q != '/') {
if (q[0] == '.' && q[1] == '/') {
q += 2;
}
int got = __getcwd(p, e - p - 1 - 4); // for / and .com
if (got != -1) {
p += got - 1;
*p++ = '/';
}
}
while ((c = *q++)) {
if (p + 1 + 4 < e) { // for nul and .com
*p++ = c;
}
}
*p = 0;
if (!sys_faccessat(AT_FDCWD, g_prog.u.buf, F_OK, 0)) goto UseBuf;
p = WRITE32LE(p, READ32LE(".com"));
*p = 0;
if (!sys_faccessat(AT_FDCWD, g_prog.u.buf, F_OK, 0)) goto UseBuf;
}
/* the previous loader supplied the full program path as the first
environment variable. we also try "_". */
if ((q = __getenv(__envp, "COSMOPOLITAN_PROGRAM_EXECUTABLE").s) ||
(q = __getenv(__envp, "_").s)) {
goto CopyString;
// try argv[0], then argv[0].com, then $_, then $_.com.
if (TryPath(__argv[0], 1) ||
/* TODO(mrdomino): remove after next loader mint */
TryPath(__getenv(__envp, "COSMOPOLITAN_PROGRAM_EXECUTABLE").s, 0) ||
TryPath(__getenv(__envp, "_").s, 1)) {
goto UseBuf;
}
// give up and just copy argv[0] into it
if ((q = __argv[0])) {
CopyString:
char *p = g_prog.u.buf;
char *e = p + sizeof(g_prog.u.buf);
while ((c = *q++)) {
@ -184,8 +203,8 @@ static inline void InitProgramExecutableNameImpl(void) {
goto UseBuf;
}
// if we don't even have that then empty the string
UseEmpty:
// if we don't even have that then empty the string
g_prog.u.buf[0] = 0;
UseBuf: