Linux: Add cachestat, fchmodat2 syscalls (#958)

This commit is contained in:
Stephen Gregoratto 2023-11-20 14:01:20 +11:00 committed by GitHub
parent 69faf1b403
commit cc5c5319bf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 287 additions and 1 deletions

47
libc/calls/cachestat.c Normal file
View file

@ -0,0 +1,47 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/cachestat.h"
#include "libc/intrin/strace.internal.h"
int sys_cachestat(int, struct cachestat_range *, struct cachestat *, uint32_t);
/**
* Query the page cache statistics of a file.
*
* @param fd The open file descriptor to retrieve statistics from.
* @param cstat_range The byte range in `fd` to query. When `len > 0`, the range
* is `[off..off + len]`. When `len` == 0, the range is from `off` to the end of
* `fd`.
* @param cstat The structure where page cache statistics are stored.
* @param flags Currently unused, and must be set to `0`.
* @return 0 on success, or -1 w/ errno.
* @raise EFAULT if `cstat_range` or `cstat` points to invalid memory
* @raise EINVAL if `flags` is nonzero
* @raise EBADF if `fd` is negative or not open
* @raise EOPNOTSUPP if `fd` refers to a hugetlbfs file
* @raise ENOSYS if not Linux 6.5
*/
int cachestat(int fd, struct cachestat_range *cstat_range,
struct cachestat *cstat, uint32_t flags) {
int rc;
rc = sys_cachestat(fd, cstat_range, cstat, flags);
STRACE("cachestat(%d, %p, %p, %#x) → %d% m", fd, cstat_range, cstat, flags);
return rc;
}

34
libc/calls/cachestat.h Normal file
View file

@ -0,0 +1,34 @@
#ifndef COSMOPOLITAN_LIBC_CALLS_CACHESTAT_H_
#define COSMOPOLITAN_LIBC_CALLS_CACHESTAT_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
struct cachestat_range {
uint64_t off;
uint64_t len;
};
struct cachestat {
/** Number of cached pages. */
uint64_t nr_cache;
/** Number of dirty pages */
uint64_t nr_dirty;
/** Number of pages marked for writeback. */
uint64_t nr_writeback;
/** Number of pages evicted from the cache. */
uint64_t nr_evicted;
/**
* Number of recently evicted pages.
* A page is recently evicted if its last eviction was recent enough that its
* reentry to the cache would indicate that it is actively being used by the
* system, and that there is memory pressure on the system.
*/
uint64_t nr_recently_evicted;
};
int cachestat(int, struct cachestat_range *, struct cachestat *, uint32_t);
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_CALLS_LANDLOCK_H_ */

View file

@ -20,6 +20,7 @@
#include "libc/calls/syscall-nt.internal.h" #include "libc/calls/syscall-nt.internal.h"
#include "libc/calls/syscall-sysv.internal.h" #include "libc/calls/syscall-sysv.internal.h"
#include "libc/dce.h" #include "libc/dce.h"
#include "libc/errno.h"
#include "libc/intrin/asan.internal.h" #include "libc/intrin/asan.internal.h"
#include "libc/intrin/describeflags.internal.h" #include "libc/intrin/describeflags.internal.h"
#include "libc/intrin/strace.internal.h" #include "libc/intrin/strace.internal.h"
@ -28,6 +29,7 @@
#include "libc/sysv/errfuns.h" #include "libc/sysv/errfuns.h"
int sys_fchmodat_linux(int, const char *, unsigned, int); int sys_fchmodat_linux(int, const char *, unsigned, int);
int sys_fchmodat2(int, const char *, unsigned, int);
/** /**
* Changes permissions on file, e.g.: * Changes permissions on file, e.g.:
@ -40,6 +42,9 @@ int sys_fchmodat_linux(int, const char *, unsigned, int);
* @param mode contains octal flags (base 8) * @param mode contains octal flags (base 8)
* @param flags can have `AT_SYMLINK_NOFOLLOW` * @param flags can have `AT_SYMLINK_NOFOLLOW`
* @raise EROFS if `dirfd` or `path` use zip file system * @raise EROFS if `dirfd` or `path` use zip file system
* @raise EOPNOTSUP on Linux if `path` is a symbolic link, `AT_SYMLINK_NOFOLLOW`
* is set in `flags`, and filesystem does not support setting the mode of
* symbolic links.
* @errors ENOENT, ENOTDIR, ENOSYS * @errors ENOENT, ENOTDIR, ENOSYS
* @asyncsignalsafe * @asyncsignalsafe
* @see fchmod() * @see fchmod()
@ -53,7 +58,12 @@ int fchmodat(int dirfd, const char *path, uint32_t mode, int flags) {
rc = erofs(); rc = erofs();
} else if (!IsWindows()) { } else if (!IsWindows()) {
if (IsLinux() && flags) { if (IsLinux() && flags) {
rc = sys_fchmodat_linux(dirfd, path, mode, flags); int serrno = errno;
rc = sys_fchmodat2(dirfd, path, mode, flags);
if (rc == -1 && errno == ENOSYS) {
errno = serrno;
rc = sys_fchmodat_linux(dirfd, path, mode, flags);
}
} else { } else {
rc = sys_fchmodat(dirfd, path, mode, flags); rc = sys_fchmodat(dirfd, path, mode, flags);
} }

View file

@ -0,0 +1,2 @@
#include "libc/sysv/macros.internal.h"
.scall sys_cachestat,0xfffffffffffff1c3,451,4095,globl

View file

@ -0,0 +1,2 @@
#include "libc/sysv/macros.internal.h"
.scall sys_fchmodat2,0xfffffffffffff1c4,452,4095,globl

View file

@ -383,6 +383,8 @@ scall sys_memfd_secret 0xfffffffffffff1bf 0xfff globl # no wrapper
scall sys_process_mrelease 0xfffffffffffff1c0 0xfff globl # no wrapper scall sys_process_mrelease 0xfffffffffffff1c0 0xfff globl # no wrapper
scall sys_futex_waitv 0xfffffffffffff1c1 0xfff globl # no wrapper scall sys_futex_waitv 0xfffffffffffff1c1 0xfff globl # no wrapper
scall sys_set_mempolicy_home_node 0xfffffffffffff1c2 0xfff globl # no wrapper scall sys_set_mempolicy_home_node 0xfffffffffffff1c2 0xfff globl # no wrapper
scall sys_cachestat 0xfffffffffffff1c3 0x1c3 globl # Linux 6.5+
scall sys_fchmodat2 0xfffffffffffff1c4 0x1c4 globl # no wrapper Linux 6.6+
# The Fifth Bell System Interface, Community Edition # The Fifth Bell System Interface, Community Edition
# » besiyata dishmaya # » besiyata dishmaya

View file

@ -0,0 +1,121 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2023 Nhat Pham <nphamcs@gmail.com>
Copyright 2023 Stephen Gregoratto <dev@sgregoratto.me>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/calls/cachestat.h"
#include "libc/calls/calls.h"
#include "libc/calls/struct/statfs.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/intrin/kprintf.h"
#include "libc/macros.internal.h"
#include "libc/mem/gc.internal.h"
#include "libc/runtime/runtime.h"
#include "libc/runtime/sysconf.h"
#include "libc/stdio/rand.h"
#include "libc/sysv/consts/auxv.h"
#include "libc/sysv/consts/o.h"
#include "libc/testlib/testlib.h"
#include "libc/x/x.h"
static size_t pagesize;
bool HasCachestatSupport(void) {
return IsLinux() && cachestat(-1, 0, 0, 0) == -1 && errno == EBADF;
}
void SetUpOnce(void) {
if (!HasCachestatSupport()) {
kprintf("warning: cachestat not supported on this systemL %m\n");
exit(0);
}
testlib_enable_tmp_setup_teardown();
pagesize = (size_t)getauxval(AT_PAGESZ);
// ASSERT_SYS(0, 0, pledge("stdio rpath wpath cpath", 0));
}
TEST(cachestat, testCachestatOnDevices) {
const char *const files[] = {
"/dev/zero", "/dev/null", "/dev/urandom", "/proc/version", "/proc",
};
struct cachestat_range range = {0, 4 * pagesize};
struct cachestat cs;
for (size_t i = 0; i < ARRAYLEN(files); i++) {
ASSERT_SYS(0, 3, open(files[i], O_RDONLY));
ASSERT_SYS(0, 0, cachestat(3, &range, &cs, 0));
ASSERT_SYS(0, 0, close(3));
}
}
TEST(cachestat, testCachestatAfterWrite) {
size_t size = 4 * pagesize;
char *data = gc(xmalloc(size));
ASSERT_SYS(0, size, getrandom(data, size, 0));
// TODO: handle EINTR like xbarf
ASSERT_SYS(0, 3, open("tmpfilecachestat", O_CREAT | O_RDWR, 0600));
ASSERT_SYS(0, size, write(3, data, size));
struct cachestat_range range = {0, size};
struct cachestat cs;
ASSERT_SYS(0, 0, cachestat(3, &range, &cs, 0));
ASSERT_EQ(4, cs.nr_cache + cs.nr_evicted,
"total number of evicted pages is off.");
ASSERT_SYS(0, 0, close(3));
}
#define TMPFS_MAGIC 0x01021994
TEST(cachestat, testCachestatSyncNoDirty) {
size_t size = 4 * pagesize;
char *data = gc(xmalloc(size));
ASSERT_SYS(0, size, getrandom(data, size, 0));
// TODO: handle EINTR like xbarf
ASSERT_SYS(0, 3, open("tmpfilecachestat", O_CREAT | O_RDWR, 0600));
ASSERT_SYS(0, size, write(3, data, size));
struct cachestat_range range = {0, size};
struct cachestat cs;
ASSERT_SYS(0, 0, cachestat(3, &range, &cs, 0));
ASSERT_EQ(4, cs.nr_cache + cs.nr_evicted,
"total number of evicted pages is off.");
struct statfs statfs;
ASSERT_SYS(0, 0, fstatfs(3, &statfs));
if (statfs.f_type == TMPFS_MAGIC) goto done;
ASSERT_SYS(0, 0, fsync(3));
ASSERT_SYS(0, 0, cachestat(3, &range, &cs, 0));
EXPECT_EQ(0, cs.nr_dirty,
"dirty pages should be zero after fsync, got %llu\n", cs.nr_dirty);
done:
ASSERT_SYS(0, 0, close(3));
}
TEST(cachestat, testCachestatShmem) {
size_t filesize = 512 * 2 * pagesize; // 2 2MB huge pages.
size_t compute_len = 512 * pagesize;
unsigned long num_pages = compute_len / pagesize;
char *data = gc(xmalloc(filesize));
ASSERT_SYS(0, filesize, getrandom(data, filesize, 0));
ASSERT_SYS(0, 3, shm_open("tmpshmcstat", O_CREAT | O_RDWR, 0600));
ASSERT_SYS(0, 0, ftruncate(3, filesize));
ASSERT_SYS(0, filesize, write(3, data, filesize));
struct cachestat_range range = {pagesize, compute_len};
struct cachestat cs;
ASSERT_SYS(0, 0, cachestat(3, &range, &cs, 0));
ASSERT_EQ(num_pages, cs.nr_cache + cs.nr_evicted,
"total number of cached and evicted pages is off.\n");
ASSERT_SYS(0, 0, shm_unlink("tmpshmcstat"));
ASSERT_SYS(0, 0, close(3));
}

View file

@ -0,0 +1,68 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Stephen Gregoratto
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/calls.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/macros.internal.h"
#include "libc/sysv/consts/s.h"
// #include "libc/mem/gc.internal.h"
#include "libc/calls/struct/stat.h"
#include "libc/runtime/runtime.h"
#include "libc/sysv/consts/at.h"
#include "libc/sysv/consts/o.h"
#include "libc/testlib/testlib.h"
#include "libc/x/x.h"
void SetUpOnce(void) {
testlib_enable_tmp_setup_teardown();
// ASSERT_SYS(0, 0, pledge("stdio rpath wpath cpath", 0));
}
static void ExpectMode(const char *filename, uint32_t mode) {
struct stat st;
ASSERT_SYS(0, 0, fstatat(AT_FDCWD, filename, &st, AT_SYMLINK_NOFOLLOW));
ASSERT_TRUE((st.st_mode & 0777) == mode);
}
TEST(fchmodat, testFchmodat) {
ASSERT_SYS(0, 3,
open("regfile", O_WRONLY | O_CREAT | O_EXCL | O_TRUNC, 0644));
ASSERT_SYS(0, 0, close(3));
ASSERT_SYS(0, 0, symlink("regfile", "symlink"));
ExpectMode("regfile", 0644);
struct stat st;
ASSERT_SYS(0, 0, fstatat(AT_FDCWD, "symlink", &st, AT_SYMLINK_NOFOLLOW));
uint32_t sym_mode = st.st_mode & 0777;
ASSERT_SYS(0, 0, fchmodat(AT_FDCWD, "regfile", 0640, 0));
ExpectMode("regfile", 0640);
ASSERT_SYS(0, 0, fchmodat(AT_FDCWD, "regfile", 0600, AT_SYMLINK_NOFOLLOW));
ExpectMode("regfile", 0600);
ASSERT_SYS(0, 0, fchmodat(AT_FDCWD, "symlink", 0640, 0));
ExpectMode("regfile", 0640);
ExpectMode("symlink", sym_mode);
int rc = fchmodat(AT_FDCWD, "symlink", 0600, AT_SYMLINK_NOFOLLOW);
if (rc == -1) {
ASSERT_TRUE(errno == ENOTSUP);
errno = 0;
} else {
ExpectMode("symlink", 0600);
}
ExpectMode("regfile", 0640);
}