From af3f62a71ae87134826c54628d5558deb2cc67d0 Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Sun, 26 May 2024 16:53:13 -0700 Subject: [PATCH] Ensure io requests are always capped at 0x7ffff000 This gives us the Linux behavior across platforms. Fixes #1189 --- Makefile | 2 +- libc/calls/pread.c | 7 ++- libc/calls/preadv.c | 50 +++++++++++++++++--- libc/calls/pwrite.c | 7 ++- libc/calls/pwritev.c | 54 +++++++++++++++++---- libc/calls/read.c | 7 ++- libc/calls/readv.c | 96 +++++++++++++++++++++++++++----------- libc/calls/write.c | 6 +++ libc/calls/writev.c | 94 +++++++++++++++++++++++++++---------- test/posix/fread3gb_test.c | 26 +++++++---- 10 files changed, 268 insertions(+), 81 deletions(-) diff --git a/Makefile b/Makefile index 8bbcd2b2b..c59ae71e8 100644 --- a/Makefile +++ b/Makefile @@ -133,7 +133,7 @@ endif ifneq ($(findstring aarch64,$(MODE)),) ARCH = aarch64 -HOSTS ?= pi pi5 studio freebsdarm +HOSTS ?= pi studio freebsdarm else ARCH = x86_64 HOSTS ?= freebsd rhel7 xnu openbsd netbsd win10 diff --git a/libc/calls/pread.c b/libc/calls/pread.c index c90fe6108..00de0e652 100644 --- a/libc/calls/pread.c +++ b/libc/calls/pread.c @@ -30,6 +30,7 @@ #include "libc/macros.internal.h" #include "libc/runtime/runtime.h" #include "libc/runtime/zipos.internal.h" +#include "libc/stdio/sysparam.h" #include "libc/sysv/errfuns.h" /** @@ -39,7 +40,7 @@ * * @param fd is something open()'d earlier, noting pipes might not work * @param buf is copied into, cf. copy_file_range(), sendfile(), etc. - * @param size in range [1..0x7ffff000] is reasonable + * @param size is always saturated to 0x7ffff000 automatically * @param offset is bytes from start of file at which read begins * @return [1..size] bytes on success, 0 on EOF, or -1 w/ errno; with * exception of size==0, in which case return zero means no error @@ -58,6 +59,10 @@ ssize_t pread(int fd, void *buf, size_t size, int64_t offset) { ssize_t rc; BEGIN_CANCELATION_POINT; + // XNU and BSDs will EINVAL if requested bytes exceeds INT_MAX + // this is inconsistent with Linux which ignores huge requests + size = MIN(size, 0x7ffff000); + if (offset < 0) { rc = einval(); } else if (fd < 0) { diff --git a/libc/calls/preadv.c b/libc/calls/preadv.c index b0fc220dd..cad6a4ac2 100644 --- a/libc/calls/preadv.c +++ b/libc/calls/preadv.c @@ -28,24 +28,55 @@ #include "libc/intrin/likely.h" #include "libc/intrin/strace.internal.h" #include "libc/intrin/weaken.h" +#include "libc/limits.h" +#include "libc/mem/alloca.h" +#include "libc/runtime/stack.h" #include "libc/runtime/zipos.internal.h" +#include "libc/stdckdint.h" #include "libc/sysv/errfuns.h" +static size_t SumIovecBytes(const struct iovec *iov, int iovlen) { + size_t count = 0; + for (int i = 0; i < iovlen; ++i) + if (ckd_add(&count, count, iov[i].iov_len)) + count = SIZE_MAX; + return count; +} + static ssize_t Preadv(int fd, struct iovec *iov, int iovlen, int64_t off) { int e, i; size_t got; ssize_t rc, toto; - if (fd < 0) { + if (fd < 0) return ebadf(); - } - - if (iovlen < 0) { + if (iovlen < 0) return einval(); - } - - if (IsAsan() && !__asan_is_valid_iov(iov, iovlen)) { + if (IsAsan() && !__asan_is_valid_iov(iov, iovlen)) return efault(); + + // XNU and BSDs will EINVAL if requested bytes exceeds INT_MAX + // this is inconsistent with Linux which ignores huge requests + if (!IsLinux()) { + size_t sum, remain = 0x7ffff000; + if ((sum = SumIovecBytes(iov, iovlen)) > remain) { + struct iovec *iov2; +#pragma GCC push_options +#pragma GCC diagnostic ignored "-Walloca-larger-than=" + iov2 = alloca(iovlen * sizeof(struct iovec)); + CheckLargeStackAllocation(iov2, iovlen * sizeof(struct iovec)); +#pragma GCC pop_options + for (int i = 0; i < iovlen; ++i) { + iov2[i] = iov[i]; + if (remain >= iov2[i].iov_len) { + remain -= iov2[i].iov_len; + } else { + iov2[i].iov_len = remain; + remain = 0; + } + } + iov = iov2; + } } if (fd < g_fds.n && g_fds.p[fd].kind == kFdZip) { @@ -112,6 +143,11 @@ static ssize_t Preadv(int fd, struct iovec *iov, int iovlen, int64_t off) { /** * Reads with maximum generality. * + * It's possible for file write request to be partially completed. For + * example, if the sum of `iov` lengths exceeds 0x7ffff000 then bytes + * beyond that will be ignored. This is a Linux behavior that Cosmo + * polyfills across platforms. + * * @return number of bytes actually read, or -1 w/ errno * @cancelationpoint * @asyncsignalsafe diff --git a/libc/calls/pwrite.c b/libc/calls/pwrite.c index 472472c6d..8bf05ac53 100644 --- a/libc/calls/pwrite.c +++ b/libc/calls/pwrite.c @@ -28,6 +28,7 @@ #include "libc/intrin/asan.internal.h" #include "libc/intrin/strace.internal.h" #include "libc/macros.internal.h" +#include "libc/stdio/sysparam.h" #include "libc/sysv/errfuns.h" /** @@ -37,7 +38,7 @@ * * @param fd is something open()'d earlier, noting pipes might not work * @param buf is copied from, cf. copy_file_range(), sendfile(), etc. - * @param size in range [1..0x7ffff000] is reasonable + * @param size is always saturated to 0x7ffff000 automatically * @param offset is bytes from start of file at which write begins, * which can exceed or overlap the end of file, in which case your * file will be extended @@ -53,6 +54,10 @@ ssize_t pwrite(int fd, const void *buf, size_t size, int64_t offset) { size_t wrote; BEGIN_CANCELATION_POINT; + // XNU and BSDs will EINVAL if requested bytes exceeds INT_MAX + // this is inconsistent with Linux which ignores huge requests + size = MIN(size, 0x7ffff000); + if (offset < 0) { rc = einval(); } else if (fd == -1) { diff --git a/libc/calls/pwritev.c b/libc/calls/pwritev.c index fd70331fb..b53d5de64 100644 --- a/libc/calls/pwritev.c +++ b/libc/calls/pwritev.c @@ -28,28 +28,57 @@ #include "libc/intrin/likely.h" #include "libc/intrin/strace.internal.h" #include "libc/intrin/weaken.h" +#include "libc/limits.h" +#include "libc/mem/alloca.h" +#include "libc/runtime/stack.h" +#include "libc/stdckdint.h" #include "libc/sysv/errfuns.h" +static size_t SumIovecBytes(const struct iovec *iov, int iovlen) { + size_t count = 0; + for (int i = 0; i < iovlen; ++i) + if (ckd_add(&count, count, iov[i].iov_len)) + count = SIZE_MAX; + return count; +} + static ssize_t Pwritev(int fd, const struct iovec *iov, int iovlen, int64_t off) { int i, e; size_t sent; ssize_t rc, toto; - if (fd < 0) { + if (fd < 0) return ebadf(); - } - - if (iovlen < 0) { + if (iovlen < 0) return einval(); - } - - if (IsAsan() && !__asan_is_valid_iov(iov, iovlen)) { + if (IsAsan() && !__asan_is_valid_iov(iov, iovlen)) return efault(); - } - - if (fd < g_fds.n && g_fds.p[fd].kind == kFdZip) { + if (fd < g_fds.n && g_fds.p[fd].kind == kFdZip) return ebadf(); + + // XNU and BSDs will EINVAL if requested bytes exceeds INT_MAX + // this is inconsistent with Linux which ignores huge requests + if (!IsLinux()) { + size_t sum, remain = 0x7ffff000; + if ((sum = SumIovecBytes(iov, iovlen)) > remain) { + struct iovec *iov2; +#pragma GCC push_options +#pragma GCC diagnostic ignored "-Walloca-larger-than=" + iov2 = alloca(iovlen * sizeof(struct iovec)); + CheckLargeStackAllocation(iov2, iovlen * sizeof(struct iovec)); +#pragma GCC pop_options + for (int i = 0; i < iovlen; ++i) { + iov2[i] = iov[i]; + if (remain >= iov2[i].iov_len) { + remain -= iov2[i].iov_len; + } else { + iov2[i].iov_len = remain; + remain = 0; + } + } + iov = iov2; + } } if (IsWindows()) { @@ -116,6 +145,11 @@ static ssize_t Pwritev(int fd, const struct iovec *iov, int iovlen, * been committed. It can also happen if we need to polyfill this system * call using pwrite(). * + * It's possible for file write request to be partially completed. For + * example, if the sum of `iov` lengths exceeds 0x7ffff000 then bytes + * beyond that will be ignored. This is a Linux behavior that Cosmo + * polyfills across platforms. + * * @return number of bytes actually sent, or -1 w/ errno * @cancelationpoint * @asyncsignalsafe diff --git a/libc/calls/read.c b/libc/calls/read.c index 966da4b1e..389a3eead 100644 --- a/libc/calls/read.c +++ b/libc/calls/read.c @@ -29,6 +29,7 @@ #include "libc/runtime/zipos.internal.h" #include "libc/sock/internal.h" #include "libc/sock/sock.h" +#include "libc/stdio/sysparam.h" #include "libc/sysv/errfuns.h" /** @@ -41,7 +42,7 @@ * * @param fd is something open()'d earlier * @param buf is copied into, cf. copy_file_range(), sendfile(), etc. - * @param size in range [1..0x7ffff000] is reasonable + * @param size is always saturated to 0x7ffff000 automatically * @return [1..size] bytes on success, 0 on EOF, or -1 w/ errno; with * exception of size==0, in which case return zero means no error * @raise EBADF if `fd` is negative or not an open file descriptor @@ -67,6 +68,10 @@ ssize_t read(int fd, void *buf, size_t size) { ssize_t rc; BEGIN_CANCELATION_POINT; + // XNU and BSDs will EINVAL if requested bytes exceeds INT_MAX + // this is inconsistent with Linux which ignores huge requests + size = MIN(size, 0x7ffff000); + if (fd < 0) { rc = ebadf(); } else if ((!buf && size) || (IsAsan() && !__asan_is_valid(buf, size))) { diff --git a/libc/calls/readv.c b/libc/calls/readv.c index f142673e6..994242d74 100644 --- a/libc/calls/readv.c +++ b/libc/calls/readv.c @@ -28,10 +28,74 @@ #include "libc/intrin/likely.h" #include "libc/intrin/strace.internal.h" #include "libc/intrin/weaken.h" +#include "libc/limits.h" +#include "libc/mem/alloca.h" +#include "libc/runtime/stack.h" #include "libc/runtime/zipos.internal.h" #include "libc/sock/internal.h" +#include "libc/stdckdint.h" #include "libc/sysv/errfuns.h" +static size_t SumIovecBytes(const struct iovec *iov, int iovlen) { + size_t count = 0; + for (int i = 0; i < iovlen; ++i) + if (ckd_add(&count, count, iov[i].iov_len)) + count = SIZE_MAX; + return count; +} + +static ssize_t readv_impl(int fd, const struct iovec *iov, int iovlen) { + if (fd < 0) + return ebadf(); + if (iovlen < 0) + return einval(); + if (IsAsan() && !__asan_is_valid_iov(iov, iovlen)) + return efault(); + + // XNU and BSDs will EINVAL if requested bytes exceeds INT_MAX + // this is inconsistent with Linux which ignores huge requests + if (!IsLinux()) { + size_t sum, remain = 0x7ffff000; + if ((sum = SumIovecBytes(iov, iovlen)) > remain) { + struct iovec *iov2; +#pragma GCC push_options +#pragma GCC diagnostic ignored "-Walloca-larger-than=" + iov2 = alloca(iovlen * sizeof(struct iovec)); + CheckLargeStackAllocation(iov2, iovlen * sizeof(struct iovec)); +#pragma GCC pop_options + for (int i = 0; i < iovlen; ++i) { + iov2[i] = iov[i]; + if (remain >= iov2[i].iov_len) { + remain -= iov2[i].iov_len; + } else { + iov2[i].iov_len = remain; + remain = 0; + } + } + iov = iov2; + } + } + + if (fd < g_fds.n && g_fds.p[fd].kind == kFdZip) { + return _weaken(__zipos_read)( + (struct ZiposHandle *)(intptr_t)g_fds.p[fd].handle, iov, iovlen, -1); + } else if (IsLinux() || IsXnu() || IsFreebsd() || IsOpenbsd() || IsNetbsd()) { + if (iovlen == 1) { + return sys_read(fd, iov[0].iov_base, iov[0].iov_len); + } else { + return sys_readv(fd, iov, iovlen); + } + } else if (fd >= g_fds.n) { + return ebadf(); + } else if (IsMetal()) { + return sys_readv_metal(fd, iov, iovlen); + } else if (IsWindows()) { + return sys_readv_nt(fd, iov, iovlen); + } else { + return enosys(); + } +} + /** * Reads data to multiple buffers. * @@ -42,6 +106,11 @@ * be passed to the kernel as read() instead. This yields a 100 cycle * performance boost in the case of a single small iovec. * + * It's possible for file write request to be partially completed. For + * example, if the sum of `iov` lengths exceeds 0x7ffff000 then bytes + * beyond that will be ignored. This is a Linux behavior that Cosmo + * polyfills across platforms. + * * @return number of bytes actually read, or -1 w/ errno * @cancelationpoint * @restartable @@ -49,32 +118,7 @@ ssize_t readv(int fd, const struct iovec *iov, int iovlen) { ssize_t rc; BEGIN_CANCELATION_POINT; - - if (fd < 0) { - rc = ebadf(); - } else if (iovlen < 0) { - rc = einval(); - } else if (IsAsan() && !__asan_is_valid_iov(iov, iovlen)) { - rc = efault(); - } else if (fd < g_fds.n && g_fds.p[fd].kind == kFdZip) { - rc = _weaken(__zipos_read)( - (struct ZiposHandle *)(intptr_t)g_fds.p[fd].handle, iov, iovlen, -1); - } else if (IsLinux() || IsXnu() || IsFreebsd() || IsOpenbsd() || IsNetbsd()) { - if (iovlen == 1) { - rc = sys_read(fd, iov[0].iov_base, iov[0].iov_len); - } else { - rc = sys_readv(fd, iov, iovlen); - } - } else if (fd >= g_fds.n) { - rc = ebadf(); - } else if (IsMetal()) { - rc = sys_readv_metal(fd, iov, iovlen); - } else if (IsWindows()) { - rc = sys_readv_nt(fd, iov, iovlen); - } else { - rc = enosys(); - } - + rc = readv_impl(fd, iov, iovlen); END_CANCELATION_POINT; STRACE("readv(%d, [%s], %d) → %'ld% m", fd, DescribeIovec(rc, iov, iovlen), iovlen, rc); diff --git a/libc/calls/write.c b/libc/calls/write.c index 776c97d61..9ed622ebe 100644 --- a/libc/calls/write.c +++ b/libc/calls/write.c @@ -27,6 +27,7 @@ #include "libc/intrin/weaken.h" #include "libc/runtime/zipos.internal.h" #include "libc/sock/sock.h" +#include "libc/stdio/sysparam.h" #include "libc/sysv/errfuns.h" /** @@ -39,6 +40,7 @@ * * @param fd is open file descriptor * @param buf is copied from, cf. copy_file_range(), sendfile(), etc. + * @param size is always saturated to 0x7ffff000 automatically * @return [1..size] bytes on success, or -1 w/ errno; noting zero is * impossible unless size was passed as zero to do an error check * @raise EBADF if `fd` is negative or not an open file descriptor @@ -68,6 +70,10 @@ ssize_t write(int fd, const void *buf, size_t size) { ssize_t rc; BEGIN_CANCELATION_POINT; + // XNU and BSDs will EINVAL if requested bytes exceeds INT_MAX + // this is inconsistent with Linux which ignores huge requests + size = MIN(size, 0x7ffff000); + if (fd < 0) { rc = ebadf(); } else if (IsAsan() && !__asan_is_valid(buf, size)) { diff --git a/libc/calls/writev.c b/libc/calls/writev.c index 9e624c702..b21105315 100644 --- a/libc/calls/writev.c +++ b/libc/calls/writev.c @@ -27,8 +27,71 @@ #include "libc/intrin/likely.h" #include "libc/intrin/strace.internal.h" #include "libc/intrin/weaken.h" +#include "libc/limits.h" +#include "libc/runtime/stack.h" #include "libc/sock/internal.h" +#include "libc/stdckdint.h" #include "libc/sysv/errfuns.h" +#include "libc/vga/vga.internal.h" + +static size_t SumIovecBytes(const struct iovec *iov, int iovlen) { + size_t count = 0; + for (int i = 0; i < iovlen; ++i) + if (ckd_add(&count, count, iov[i].iov_len)) + count = SIZE_MAX; + return count; +} + +static ssize_t writev_impl(int fd, const struct iovec *iov, int iovlen) { + if (fd < 0) + return ebadf(); + if (iovlen < 0) + return einval(); + if (IsAsan() && !__asan_is_valid_iov(iov, iovlen)) + return efault(); + if (fd < g_fds.n && g_fds.p[fd].kind == kFdZip) + return ebadf(); // posix specifies this when not open()'d for writing + + // XNU and BSDs will EINVAL if requested bytes exceeds INT_MAX + // this is inconsistent with Linux which ignores huge requests + if (!IsLinux()) { + size_t sum, remain = 0x7ffff000; + if ((sum = SumIovecBytes(iov, iovlen)) > remain) { + struct iovec *iov2; +#pragma GCC push_options +#pragma GCC diagnostic ignored "-Walloca-larger-than=" + iov2 = alloca(iovlen * sizeof(struct iovec)); + CheckLargeStackAllocation(iov2, iovlen * sizeof(struct iovec)); +#pragma GCC pop_options + for (int i = 0; i < iovlen; ++i) { + iov2[i] = iov[i]; + if (remain >= iov2[i].iov_len) { + remain -= iov2[i].iov_len; + } else { + iov2[i].iov_len = remain; + remain = 0; + } + } + iov = iov2; + } + } + + if (IsLinux() || IsXnu() || IsFreebsd() || IsOpenbsd() || IsNetbsd()) { + if (iovlen == 1) { + return sys_write(fd, iov[0].iov_base, iov[0].iov_len); + } else { + return sys_writev(fd, iov, iovlen); + } + } else if (fd >= g_fds.n) { + return ebadf(); + } else if (IsMetal()) { + return sys_writev_metal(g_fds.p + fd, iov, iovlen); + } else if (IsWindows()) { + return sys_writev_nt(fd, iov, iovlen); + } else { + return enosys(); + } +} /** * Writes data from multiple buffers. @@ -45,6 +108,11 @@ * been committed. It can also happen if we need to polyfill this system * call using write(). * + * It's possible for file write request to be partially completed. For + * example, if the sum of `iov` lengths exceeds 0x7ffff000 then bytes + * beyond that will be ignored. This is a Linux behavior that Cosmo + * polyfills across platforms. + * * @return number of bytes actually handed off, or -1 w/ errno * @cancelationpoint * @restartable @@ -52,31 +120,7 @@ ssize_t writev(int fd, const struct iovec *iov, int iovlen) { ssize_t rc; BEGIN_CANCELATION_POINT; - - if (fd < 0) { - rc = ebadf(); - } else if (iovlen < 0) { - rc = einval(); - } else if (IsAsan() && !__asan_is_valid_iov(iov, iovlen)) { - rc = efault(); - } else if (fd < g_fds.n && g_fds.p[fd].kind == kFdZip) { - rc = ebadf(); // posix specifies this when not open()'d for writing - } else if (IsLinux() || IsXnu() || IsFreebsd() || IsOpenbsd() || IsNetbsd()) { - if (iovlen == 1) { - rc = sys_write(fd, iov[0].iov_base, iov[0].iov_len); - } else { - rc = sys_writev(fd, iov, iovlen); - } - } else if (fd >= g_fds.n) { - rc = ebadf(); - } else if (IsMetal()) { - rc = sys_writev_metal(g_fds.p + fd, iov, iovlen); - } else if (IsWindows()) { - rc = sys_writev_nt(fd, iov, iovlen); - } else { - rc = enosys(); - } - + rc = writev_impl(fd, iov, iovlen); END_CANCELATION_POINT; STRACE("writev(%d, %s, %d) → %'ld% m", fd, DescribeIovec(rc != -1 ? rc : -2, iov, iovlen), iovlen, rc); diff --git a/test/posix/fread3gb_test.c b/test/posix/fread3gb_test.c index 1167ceb1d..000ab5498 100644 --- a/test/posix/fread3gb_test.c +++ b/test/posix/fread3gb_test.c @@ -30,10 +30,14 @@ int setup(void) { return 2; if (pwrite(fd, "a", 1, 0) != 1) return 3; - if (pwrite(fd, "z", 1, SIZE - 1) != 1) + if (pwrite(fd, "b", 1, 0x7fffefff) != 1) return 4; - if (close(fd)) + if (pwrite(fd, "c", 1, 0x7ffff000) != 1) return 5; + if (pwrite(fd, "z", 1, SIZE - 1) != 1) + return 6; + if (close(fd)) + return 7; return 0; } @@ -42,17 +46,21 @@ int test(void) { char *buf; size_t rc; if (!(f = fopen(path, "r"))) - return 6; - if (!(buf = malloc(SIZE))) - return 7; - if ((rc = fread(buf, SIZE, 1, f)) != 1) return 8; - if (buf[0] != 'a') + if (!(buf = malloc(SIZE))) return 9; - if (buf[SIZE - 1] != 'z') + if ((rc = fread(buf, SIZE, 1, f)) != 1) return 10; - if (fclose(f)) + if (buf[0] != 'a') return 11; + if (buf[0x7fffefff] != 'b') + return 12; + if (buf[0x7ffff000] != 'c') + return 13; + if (buf[SIZE - 1] != 'z') + return 14; + if (fclose(f)) + return 15; return 0; }