Rewrite getcwd()

This change addresses a bug that was reported in #923 where bash on
Windows behaved strangely. It turned out that our weak linking of
malloc() caused bash's configure script to favor its own getcwd()
function, which is implemented in the most astonishing way, using
opendir() and readdir() to recursively construct the current path.

This change moves getcwd() into LIBC_STDIO so it can strongly link
malloc(). A new __getcwd() function is now introduced, so all the
low-level runtime services can still use the actual system call. It
provides the Linux Kernel API convention across platforms, and is
overall a higher-quality implementation than what we had before.

In the future, we should probably take a closer look into why bash's
getcwd() polyfill wasn't working as intended on Windows, since there
might be a potential opportunity there to improve our readdir() too.
This commit is contained in:
Justine Tunney 2023-11-02 13:06:23 -07:00
parent a46ec61787
commit 1eb6484c9c
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
13 changed files with 251 additions and 211 deletions

View file

@ -219,6 +219,7 @@ bool32 isexecutable(const char *);
bool32 isregularfile(const char *);
bool32 issymlink(const char *);
char *commandv(const char *, char *, size_t);
int __getcwd(char *, size_t);
int clone(void *, void *, size_t, int, void *, void *, void *, void *);
int fadvise(int, uint64_t, uint64_t, int);
int makedirs(const char *, unsigned);

View file

@ -99,8 +99,6 @@ o//libc/calls/prctl.o: \
# we always want -Os because:
# it's early runtime mandatory and quite huge without it
o//libc/calls/getcwd.greg.o \
o//libc/calls/getcwd-nt.greg.o \
o//libc/calls/getcwd-xnu.greg.o \
o//libc/calls/statfs2cosmo.o: private \
CFLAGS += \
-Os

View file

@ -1,84 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/syscall_support-nt.internal.h"
#include "libc/macros.internal.h"
#include "libc/nt/files.h"
#include "libc/str/str.h"
#include "libc/str/utf16.h"
#include "libc/sysv/errfuns.h"
textwindows char *sys_getcwd_nt(char *buf, size_t size) {
uint64_t w;
wint_t x, y;
uint32_t n, i, j;
char16_t p[PATH_MAX];
if ((n = GetCurrentDirectory(ARRAYLEN(p), p))) {
if (4 + n + 1 <= size && 4 + n + 1 <= ARRAYLEN(p)) {
tprecode16to8(buf, size, p);
i = 0;
j = 0;
if (n >= 3 && isalpha(p[0]) && p[1] == ':' && p[2] == '\\') {
// turn c:\... into \c\...
p[1] = p[0];
p[0] = '\\';
} else if (n >= 7 && p[0] == '\\' && p[1] == '\\' && p[2] == '?' &&
p[3] == '\\' && isalpha(p[4]) && p[5] == ':' && p[6] == '\\') {
// turn \\?\c:\... into \c\...
buf[j++] = '/';
buf[j++] = p[4];
buf[j++] = '/';
i += 7;
}
while (i < n) {
x = p[i++] & 0xffff;
if (!IsUcs2(x)) {
if (i < n) {
y = p[i++] & 0xffff;
x = MergeUtf16(x, y);
} else {
x = 0xfffd;
}
}
if (x < 0200) {
if (x == '\\') {
x = '/';
}
w = x;
} else {
w = tpenc(x);
}
do {
if (j < size) {
buf[j++] = w;
}
w >>= 8;
} while (w);
}
if (j < size) {
buf[j] = 0;
return buf;
}
}
erange();
return NULL;
} else {
__winerr();
return NULL;
}
}

View file

@ -16,81 +16,154 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/calls/calls.h"
#include "libc/calls/state.internal.h"
#include "libc/calls/struct/metastat.internal.h"
#include "libc/calls/syscall-nt.internal.h"
#include "libc/calls/syscall-sysv.internal.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/intrin/strace.internal.h"
#include "libc/intrin/weaken.h"
#include "libc/limits.h"
#include "libc/log/backtrace.internal.h"
#include "libc/mem/mem.h"
#include "libc/nt/files.h"
#include "libc/stdio/sysparam.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/at.h"
#include "libc/sysv/consts/o.h"
#include "libc/sysv/errfuns.h"
/**
* Returns current working directory, e.g.
*
* const char *dirname = gc(getcwd(0,0)); // if malloc is linked
* const char *dirname = getcwd(alloca(PATH_MAX),PATH_MAX);
*
* @param buf is where UTF-8 NUL-terminated path string gets written,
* which may be NULL to ask this function to malloc a buffer
* @param size is number of bytes available in buf, e.g. PATH_MAX+1,
* which may be 0 if buf is NULL
* @return buf containing system-normative path or NULL w/ errno
* @error ERANGE, EINVAL, ENOMEM
*/
char *getcwd(char *buf, size_t size) {
char *p, *r;
if (buf) {
p = buf;
if (!size) {
einval();
STRACE("getcwd(%p, %'zu) %m", buf, size);
return 0;
}
} else if (_weaken(malloc)) {
unassert(!__vforked);
if (!size) size = PATH_MAX;
if (!(p = _weaken(malloc)(size))) {
STRACE("getcwd(%p, %'zu) %m", buf, size);
return 0;
}
} else {
einval();
STRACE("getcwd() needs buf≠0 or __static_yoink(\"malloc\")");
return 0;
}
*p = '\0';
if (!IsWindows()) {
if (IsMetal()) {
r = size >= 5 ? strcpy(p, "/zip") : 0;
} else if (IsXnu()) {
r = sys_getcwd_xnu(p, size);
} else if (sys_getcwd(p, size) != (void *)-1) {
r = p;
} else {
r = 0;
}
} else {
r = sys_getcwd_nt(p, size);
}
if (!buf) {
if (!r) {
if (_weaken(free)) {
_weaken(free)(p);
}
} else {
if (_weaken(realloc)) {
if ((p = _weaken(realloc)(r, strlen(r) + 1))) {
r = p;
#define XNU_F_GETPATH 50
#define XNU_MAXPATHLEN 1024
static int sys_getcwd_xnu(char *res, size_t size) {
int fd, len, rc = -1;
union metastat st[2];
char buf[XNU_MAXPATHLEN];
if ((fd = __sys_openat_nc(AT_FDCWD, ".", O_RDONLY | O_DIRECTORY, 0)) != -1) {
if (__sys_fstat(fd, &st[0]) != -1) {
if (st[0].xnu.st_dev && st[0].xnu.st_ino) {
if (__sys_fcntl(fd, XNU_F_GETPATH, (uintptr_t)buf) != -1) {
if (__sys_fstatat(AT_FDCWD, buf, &st[1], 0) != -1) {
if (st[0].xnu.st_dev == st[1].xnu.st_dev &&
st[0].xnu.st_ino == st[1].xnu.st_ino) {
if ((len = strlen(buf)) < size) {
memcpy(res, buf, (rc = len + 1));
} else {
erange();
}
} else {
einval();
}
}
}
} else {
einval();
}
}
sys_close(fd);
}
STRACE("getcwd(%p, %'zu) → %#s", buf, size, r);
return r;
return rc;
}
static int sys_getcwd_metal(char *buf, size_t size) {
if (size >= 5) {
strcpy(buf, "/zip");
return 5;
} else {
return erange();
}
}
static inline int IsAlpha(int c) {
return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z');
}
static dontinline textwindows int sys_getcwd_nt(char *buf, size_t size) {
// get current directory from the system
char16_t p16[PATH_MAX];
uint32_t n = GetCurrentDirectory(PATH_MAX, p16);
if (!n) return eacces(); // system call failed
if (n >= PATH_MAX) return erange(); // not enough room?!?
// convert utf-16 to utf-8
// we can't modify `buf` until we're certain of success
char p8[PATH_MAX], *p = p8;
n = tprecode16to8(p, PATH_MAX, p16).ax;
if (n >= PATH_MAX) return erange(); // utf-8 explosion
// turn \\?\c:\... into c:\...
if (p[0] == '\\' && //
p[1] == '\\' && //
p[2] == '?' && //
p[3] == '\\' && //
IsAlpha(p[4]) && //
p[5] == ':' && //
p[6] == '\\') {
p += 4;
n -= 4;
}
// turn c:\... into \c\...
if (IsAlpha(p[0]) && //
p[1] == ':' && //
p[2] == '\\') {
p[1] = p[0];
p[0] = '\\';
}
// we now know the final length
// check if the user supplied a buffer large enough
if (n >= size) {
return erange();
}
// copy bytes converting backslashes to slash
for (int i = 0; i < n; ++i) {
int c = p[i];
if (c == '\\') {
c = '/';
}
buf[i] = c;
}
// return number of bytes including nul
buf[n++] = 0;
return n;
}
/**
* Returns current working directory.
*
* Cosmo provides this function to address the shortcomings of getcwd().
* First, this function doesn't link malloc(). Secondly, this offers the
* Linux and NetBSD's getcwd() API across platforms since it's the best.
*
* @param buf receives utf-8 path and isn't modified on error
* @param size is byte capacity of `buf`
* @return bytes copied including nul on success, or -1 w/ errno
* @raise EACCES if the current directory path couldn't be accessed
* @raise ERANGE if `size` wasn't big enough for path and nul byte
* @raise EFAULT if `buf` points to invalid memory
*/
int __getcwd(char *buf, size_t size) {
int rc;
if (IsLinux() || IsNetbsd()) {
rc = sys_getcwd(buf, size);
} else if (IsXnu()) {
rc = sys_getcwd_xnu(buf, size);
} else if (IsWindows()) {
rc = sys_getcwd_nt(buf, size);
} else if (IsFreebsd() || IsOpenbsd()) {
if (sys_getcwd(buf, size) != -1) {
rc = strlen(buf) + 1;
} else if (SupportsFreebsd() && (errno == ENOMEM || errno == EINVAL)) {
rc = erange();
} else {
rc = -1;
}
} else {
rc = sys_getcwd_metal(buf, size);
}
STRACE("__getcwd([%#hhs], %'zu) → %d% m", rc != -1 ? buf : "n/a", size, rc);
return rc;
}

View file

@ -86,8 +86,9 @@ static inline void InitProgramExecutableNameImpl(void) {
if (q[0] == '.' && q[1] == '/') {
q += 2;
}
if (getcwd(p, e - p - 1 - 4)) { // for / and .com
while (*p) ++p;
int got = __getcwd(p, e - p - 1 - 4); // for / and .com
if (got != -1) {
p += got - 1;
*p++ = '/';
}
}

View file

@ -4,7 +4,6 @@
COSMOPOLITAN_C_START_
bool32 sys_isatty(int);
char *sys_getcwd_nt(char *, size_t);
int sys_chdir_nt(const char *);
int sys_close_epoll_nt(int);
int sys_dup_nt(int, int, int, int);

View file

@ -13,8 +13,6 @@ COSMOPOLITAN_C_START_
axdx_t __sys_fork(void);
axdx_t __sys_pipe(i32[hasatleast 2], i32);
axdx_t sys_getpid(void);
char *sys_getcwd(char *, u64);
char *sys_getcwd_xnu(char *, u64);
i32 __sys_dup3(i32, i32, i32);
i32 __sys_execve(const char *, char *const[], char *const[]);
i32 __sys_fcntl(i32, i32, ...);
@ -53,6 +51,7 @@ i32 sys_fork(void);
i32 sys_fsync(i32);
i32 sys_ftruncate(i32, i64, i64);
i32 sys_getcontext(void *);
i32 sys_getcwd(char *, u64);
i32 sys_getpgid(i32);
i32 sys_getppid(void);
i32 sys_getpriority(i32, u32);
@ -85,9 +84,9 @@ i32 sys_posix_openpt(i32);
i32 sys_renameat(i32, const char *, i32, const char *);
i32 sys_sem_close(i64);
i32 sys_sem_destroy(i64);
i32 sys_sem_destroy(i64);
i32 sys_sem_getvalue(i64, u32 *);
i32 sys_sem_init(u32, i64 *);
i32 sys_sem_destroy(i64);
i32 sys_sem_open(const char *, int, u32, i64 *);
i32 sys_sem_post(i64);
i32 sys_sem_trywait(i64);

View file

@ -45,7 +45,7 @@ static void __tmpdir_init(void) {
if ((s = getenv("TMPDIR"))) {
if (*s != '/') {
if (!getcwd(__tmpdir.path, PATH_MAX)) {
if (__getcwd(__tmpdir.path, PATH_MAX) == -1) {
goto GiveUp;
}
strlcat(__tmpdir.path, "/", sizeof(__tmpdir.path));

View file

@ -213,7 +213,7 @@ skip_readlink:
output[q] = 0;
if (output[0] != '/') {
if (!getcwd(stack, sizeof(stack))) return 0;
if (__getcwd(stack, sizeof(stack)) == -1) return 0;
l = strlen(stack);
/* Cancel any initial .. components. */
p = 0;

View file

@ -305,9 +305,11 @@ static int Chmod(void) {
}
static int Pwd(void) {
char path[PATH_MAX + 2];
if (getcwd(path, PATH_MAX)) {
strlcat(path, "\n", sizeof(path));
int got;
char path[PATH_MAX];
if ((got = __getcwd(path, PATH_MAX - 1)) != -1) {
path[got - 1] = '\n';
path[got] = 0;
Write(1, path);
return 0;
} else {
@ -790,7 +792,7 @@ static const char *GetVar(const char *key) {
} else if (key[0] == '?' && !key[1]) {
return IntToStr(exitstatus);
} else if (!strcmp(key, "PWD")) {
npassert(getcwd(vbuf, sizeof(vbuf)));
npassert(__getcwd(vbuf, sizeof(vbuf)) != -1);
return vbuf;
} else if (!strcmp(key, "UID")) {
FormatInt32(vbuf, getuid());

View file

@ -17,21 +17,28 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/calls.h"
#include "libc/calls/struct/stat.h"
#include "libc/mem/mem.h"
#include "libc/runtime/runtime.h"
/**
* Returns current working directory.
*
* If the `PWD` environment variable is set, and it seems legit, then
* that'll be returned.
* If the `PWD` environment variable is set, and it's correct, then
* that'll be returned in its exact canonical or non-canonical form
* instead of calling getcwd().
*
* @return pointer that must be free()'d, or NULL w/ errno
*/
char *get_current_dir_name(void) {
const char *p;
if ((p = getenv("PWD")) && *p == '/') {
return strdup(p);
const char *res;
struct stat st1, st2;
if ((res = getenv("PWD")) && *res && //
!stat(res, &st1) && //
!stat(".", &st2) && //
st1.st_dev == st2.st_dev && //
st1.st_ino == st2.st_ino) {
return strdup(res);
} else {
return getcwd(0, 0);
}

View file

@ -1,7 +1,7 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Copyright 2023 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
@ -17,53 +17,61 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/calls.h"
#include "libc/calls/struct/metastat.internal.h"
#include "libc/calls/struct/stat.h"
#include "libc/calls/syscall-sysv.internal.h"
#include "libc/mem/mem.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/at.h"
#include "libc/sysv/consts/o.h"
#include "libc/sysv/errfuns.h"
#define XNU_F_GETPATH 50
#define XNU_MAXPATHLEN 1024
/**
* Returns current working directory, e.g.
*
* const char *dirname = gc(getcwd(0,0)); // if malloc is linked
* const char *dirname = getcwd(alloca(PATH_MAX),PATH_MAX);
*
* @param buf is where UTF-8 NUL-terminated path string gets written,
* which may be NULL to ask this function to malloc a buffer
* @param size is number of bytes available in buf, e.g. PATH_MAX+1,
* which may be 0 if buf is NULL
* @return buf containing system-normative path or NULL w/ errno
* @raise EACCES if the current directory path couldn't be accessed
* @raise ERANGE if `size` wasn't big enough for path and nul byte
* @raise ENOMEM on failure to allocate the requested buffer
* @raise EINVAL if `size` is zero and `buf` is non-null
* @raise EFAULT if `buf` points to invalid memory
*/
char *getcwd(char *buf, size_t size) {
static inline bool CopyString(char *d, const char *s, size_t n) {
size_t i;
for (i = 0; i < n; ++i) {
if (!(d[i] = s[i])) {
return true;
// setup memory per api
char *path;
if (buf) {
path = buf;
if (!size) {
einval();
return 0;
}
} else {
if (!size) size = 4096;
if (!(path = malloc(size))) {
return 0; // enomem
}
}
return false;
}
char *sys_getcwd_xnu(char *res, size_t size) {
int fd;
union metastat st[2];
char buf[XNU_MAXPATHLEN], *ret = NULL;
if ((fd = __sys_openat_nc(AT_FDCWD, ".", O_RDONLY | O_DIRECTORY, 0)) != -1) {
if (__sys_fstat(fd, &st[0]) != -1) {
if (st[0].xnu.st_dev && st[0].xnu.st_ino) {
if (__sys_fcntl(fd, XNU_F_GETPATH, (uintptr_t)buf) != -1) {
if (__sys_fstatat(AT_FDCWD, buf, &st[1], 0) != -1) {
if (st[0].xnu.st_dev == st[1].xnu.st_dev &&
st[0].xnu.st_ino == st[1].xnu.st_ino) {
if (CopyString(res, buf, size)) {
ret = res;
} else {
erange();
}
} else {
einval();
}
}
}
} else {
einval();
}
// invoke the real system call
int got = __getcwd(path, size);
if (got == -1) {
if (path != buf) {
free(path);
}
close(fd);
return 0; // eacces, erange, or efault
}
return ret;
// cut allocation down to size
if (path != buf) {
char *p;
if ((p = realloc(path, strlen(path) + 1))) {
path = p;
}
}
// return result
return path;
}

View file

@ -17,21 +17,57 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/calls.h"
#include "libc/fmt/fmt.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/fmt/libgen.h"
#include "libc/intrin/bits.h"
#include "libc/limits.h"
#include "libc/log/check.h"
#include "libc/macros.internal.h"
#include "libc/mem/gc.internal.h"
#include "libc/str/str.h"
#include "libc/testlib/testlib.h"
#include "libc/x/x.h"
void SetUpOnce(void) {
testlib_enable_tmp_setup_teardown();
ASSERT_SYS(0, 0, pledge("stdio rpath cpath fattr", 0));
}
TEST(__getcwd, zero) {
ASSERT_SYS(ERANGE, -1, __getcwd(0, 0));
}
TEST(__getcwd, returnsLengthIncludingNul) {
char cwd1[PATH_MAX];
char cwd2[PATH_MAX];
ASSERT_NE(-1, __getcwd(cwd1, PATH_MAX));
ASSERT_EQ(strlen(cwd1) + 1, __getcwd(cwd2, PATH_MAX));
}
TEST(__getcwd, tooShort_negOneReturned_bufferIsntModified) {
char cwd[4] = {0x55, 0x55, 0x55, 0x55};
ASSERT_SYS(ERANGE, -1, __getcwd(cwd, 4));
ASSERT_EQ(0x55555555, READ32LE(cwd));
}
TEST(__getcwd, noRoomForNul) {
char cwd1[PATH_MAX];
char cwd2[PATH_MAX];
ASSERT_NE(-1, __getcwd(cwd1, PATH_MAX));
ASSERT_SYS(ERANGE, -1, __getcwd(cwd2, strlen(cwd1)));
}
TEST(__getcwd, alwaysStartsWithSlash) {
char cwd[PATH_MAX];
ASSERT_NE(-1, __getcwd(cwd, PATH_MAX));
ASSERT_EQ('/', *cwd);
}
TEST(__getcwd, notInRootDir_neverEndsWithSlash) {
char cwd[PATH_MAX];
ASSERT_NE(-1, __getcwd(cwd, PATH_MAX));
ASSERT_FALSE(endswith(cwd, "/"));
}
TEST(getcwd, test) {
char buf[PATH_MAX];
EXPECT_SYS(0, 0, mkdir("subdir", 0755));