Improve dlopen() on Apple Silicon

- Introduce MAP_JIT which is zero on other platforms
- Invent __jit_begin() and __jit_end() which wrap Apple's APIs
- Runtime dispatch to sys_icache_invalidate() in __clear_cache()
This commit is contained in:
Justine Tunney 2023-11-17 02:33:14 -08:00
parent 7a9e176ecf
commit 529cb4817c
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
20 changed files with 120 additions and 117 deletions

View file

@ -36,7 +36,7 @@
#define pagesz 16384
#define SYSLIB_MAGIC ('s' | 'l' << 8 | 'i' << 16 | 'b' << 24)
#define SYSLIB_VERSION 7
#define SYSLIB_VERSION 8
struct Syslib {
int magic;
@ -891,6 +891,7 @@ int main(int argc, char **argv, char **envp) {
M->lib.pthread_jit_write_protect_supported_np =
pthread_jit_write_protect_supported_np;
M->lib.pthread_jit_write_protect_np = pthread_jit_write_protect_np_workaround;
M->lib.sys_icache_invalidate = sys_icache_invalidate;
M->lib.pthread_create = pthread_create;
M->lib.pthread_exit = pthread_exit;
M->lib.pthread_kill = pthread_kill;

View file

@ -21,6 +21,7 @@
#include "libc/intrin/describeflags.internal.h"
#include "libc/intrin/directmap.internal.h"
#include "libc/intrin/strace.internal.h"
#include "libc/runtime/syslib.internal.h"
/**
* Unmaps memory directly with system.
@ -32,10 +33,12 @@
*/
int sys_munmap(void *p, size_t n) {
int rc;
if (!IsMetal()) {
rc = __sys_munmap(p, n);
} else {
if (IsXnuSilicon()) {
rc = _sysret(__syslib->__munmap(p, n));
} else if (IsMetal()) {
rc = sys_munmap_metal(p, n);
} else {
rc = __sys_munmap(p, n);
}
KERNTRACE("sys_munmap(%p /* %s */, %'zu) → %d", p,
DescribeFrame((intptr_t)p >> 16), n, rc);

View file

@ -20,6 +20,7 @@ i32 __sys_fcntl_cp(i32, i32, ...);
i32 __sys_fstat(i32, void *);
i32 __sys_fstatat(i32, const char *, void *, i32);
i32 __sys_gettid(i64 *);
i32 __sys_mprotect(void *, u64, i32);
i32 __sys_munmap(void *, u64);
i32 __sys_openat(i32, const char *, i32, u32);
i32 __sys_openat_nc(i32, const char *, i32, u32);

View file

@ -424,12 +424,12 @@ static char *dlerror_set(const char *str) {
return dlerror_buf;
}
static char *foreign_alloc_block(void) {
static dontinline char *foreign_alloc_block(void) {
char *p = 0;
size_t sz = 65536;
if (!IsWindows()) {
p = __sys_mmap(0, sz, PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0, 0);
MAP_PRIVATE | MAP_ANONYMOUS | MAP_JIT, -1, 0, 0);
if (p == MAP_FAILED) {
p = 0;
}
@ -448,7 +448,7 @@ static char *foreign_alloc_block(void) {
return p;
}
static void *foreign_alloc(size_t n) {
static dontinline void *foreign_alloc(size_t n) {
void *res;
static char *block;
static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
@ -520,11 +520,14 @@ static void *foreign_thunk_sysv(void *func) {
*p++ = 0xff;
*p++ = 0xe2;
#elif defined(__aarch64__)
if (!(p = code = foreign_alloc(36))) return 0; // 16 + 16 + 4 = 36
__jit_begin();
if ((p = code = foreign_alloc(36))) {
p = movimm(p, 5, (uintptr_t)func);
p = movimm(p, 10, (uintptr_t)foreign_tramp);
*(uint32_t *)p = 0xd61f0140; // br x10
__clear_cache(code, p + 4);
}
__jit_end();
#else
#error "unsupported architecture"
#endif

View file

@ -33,8 +33,7 @@ LIBC_DLOPEN_A_DIRECTDEPS = \
LIBC_RUNTIME \
LIBC_SYSV \
LIBC_SYSV_CALLS \
LIBC_STR \
THIRD_PARTY_COMPILER_RT
LIBC_STR
LIBC_DLOPEN_A_DEPS := \
$(call uniq,$(foreach x,$(LIBC_DLOPEN_A_DIRECTDEPS),$($(x))))

View file

@ -17,12 +17,14 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/syscall-sysv.internal.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/intrin/describeflags.internal.h"
#include "libc/intrin/directmap.internal.h"
#include "libc/intrin/strace.internal.h"
#include "libc/nt/runtime.h"
#include "libc/runtime/memtrack.internal.h"
#include "libc/runtime/syslib.internal.h"
/**
* Obtains memory mapping directly from system.
@ -37,7 +39,11 @@
struct DirectMap sys_mmap(void *addr, size_t size, int prot, int flags, int fd,
int64_t off) {
struct DirectMap d;
if (!IsWindows() && !IsMetal()) {
if (IsXnuSilicon()) {
long p = _sysret(__syslib->__mmap(addr, size, prot, flags, fd, off));
d.maphandle = kNtInvalidHandleValue;
d.addr = (void *)p;
} else if (!IsWindows() && !IsMetal()) {
d.addr = __sys_mmap(addr, size, prot, flags, fd, off, off);
d.maphandle = kNtInvalidHandleValue;
} else if (IsMetal()) {

View file

@ -0,0 +1,29 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2023 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/syscall-sysv.internal.h"
#include "libc/dce.h"
#include "libc/runtime/syslib.internal.h"
int sys_mprotect(void *data, size_t size, int prot) {
if (IsXnuSilicon()) {
return _sysret(__syslib->__mprotect(data, size, prot));
} else {
return __sys_mprotect(data, size, prot);
}
}

37
libc/runtime/jit.c Normal file
View file

@ -0,0 +1,37 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2023 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/dce.h"
#include "libc/runtime/runtime.h"
#include "libc/runtime/syslib.internal.h"
void __jit_begin(void) {
if (IsXnuSilicon()) {
if (__syslib->__pthread_jit_write_protect_supported_np()) {
__syslib->__pthread_jit_write_protect_np(false);
}
}
}
void __jit_end(void) {
if (IsXnuSilicon()) {
if (__syslib->__pthread_jit_write_protect_supported_np()) {
__syslib->__pthread_jit_write_protect_np(true);
}
}
}

View file

@ -115,6 +115,8 @@ bool32 _isheap(void *);
/* code morphing */
void __morph_begin(void);
void __morph_end(void);
void __jit_begin(void);
void __jit_end(void);
void __clear_cache(void *, void *);
/* portability */
int NtGetVersion(void) pureconst;

View file

@ -45,6 +45,7 @@ LIBC_RUNTIME_A_DIRECTDEPS = \
LIBC_STR \
LIBC_SYSV \
LIBC_SYSV_CALLS \
THIRD_PARTY_COMPILER_RT \
THIRD_PARTY_NSYNC \
THIRD_PARTY_PUFF \
THIRD_PARTY_XED

View file

@ -12,7 +12,7 @@ COSMOPOLITAN_C_START_
*/
#define SYSLIB_MAGIC ('s' | 'l' << 8 | 'i' << 16 | 'b' << 24)
#define SYSLIB_VERSION 6
#define SYSLIB_VERSION 8
typedef uint64_t dispatch_time_t;
typedef uint64_t dispatch_semaphore_t;

View file

@ -0,0 +1,2 @@
#include "libc/sysv/macros.internal.h"
.scall __sys_mprotect,0x04a04a04a204a00a,226,74,globl,hidden

View file

@ -1,2 +0,0 @@
#include "libc/sysv/macros.internal.h"
.scall sys_mprotect,0x04a04a04a204a00a,226,74,globl,hidden

View file

@ -234,6 +234,7 @@ syscon mmap MAP_INHERIT -1 -1 -1 -1 -1 -1 0x00000080 -1 # make
syscon mmap MAP_HASSEMAPHORE 0 0 0x00000200 0x00000200 0x00000200 0 0x00000200 0 # does it matter on x86?
syscon mmap MAP_NOSYNC 0 0 0 0 0x00000800 0 0 0 # flush to physical media only when necessary rather than gratuitously; be sure to use write() rather than ftruncate() with this!
syscon mmap MAP_CONCEAL 0 0 0 0 0x00020000 0x00008000 0x00008000 0 # omit from core dumps; MAP_NOCORE on FreeBSD
syscon mmap MAP_JIT 0 0 0 0x00000800 0 0 0 0 # omit from core dumps; MAP_NOCORE on FreeBSD
syscon compat MAP_NOCORE 0 0 0 0 0x00020000 0x00008000 0x00008000 0 # use MAP_CONCEAL
syscon compat MAP_ANON 0x00000020 0x00000020 0x00001000 0x00001000 0x00001000 0x00001000 0x00001000 0x00000020 # bsd consensus; faked nt
syscon compat MAP_EXECUTABLE 0x00001000 0x00001000 0 0 0 0 0 0 # ignored

View file

@ -0,0 +1,2 @@
#include "libc/sysv/consts/syscon.internal.h"
.syscon mmap,MAP_JIT,0,0,0,0x00000800,0,0,0,0

View file

@ -1,2 +1,2 @@
#include "libc/sysv/consts/syscon.internal.h"
.syscon rlimit,RLIMIT_AS,9,9,5,5,10,2,10,0
.syscon rlimit,RLIMIT_AS,9 ,9,5,5,10,2,10,0

View file

@ -14,6 +14,7 @@ extern const int MAP_FIXED;
extern const int MAP_FIXED_NOREPLACE;
extern const int MAP_HASSEMAPHORE;
extern const int MAP_INHERIT;
extern const int MAP_JIT;
extern const int MAP_LOCKED;
extern const int MAP_NONBLOCK;
extern const int MAP_NORESERVE;
@ -43,5 +44,4 @@ COSMOPOLITAN_C_END_
#define MAP_ANON MAP_ANONYMOUS
#define MAP_NOCORE MAP_CONCEAL
#endif /* COSMOPOLITAN_LIBC_SYSV_CONSTS_MAP_H_ */

View file

@ -45,7 +45,7 @@ scall sys_ppoll 0xfff86da21ffff90f 0x849 globl hidden # consider INTON/INTOFF t
scall sys_lseek 0x0c70a61de20c7008 0x03e globl hidden # netbsd:evilpad, OpenBSD 7.3+
scall __sys_mmap 0x0c50311dd20c5009 0x0de globl hidden # netbsd:pad, OpenBSD 7.3+
scall sys_msync 0x915900841284181a 0x8e3 globl hidden
scall sys_mprotect 0x04a04a04a204a00a 0x0e2 globl hidden
scall __sys_mprotect 0x04a04a04a204a00a 0x0e2 globl hidden
scall __sys_munmap 0x049049049204900b 0x0d7 globl hidden
scall sys_sigaction 0x15402e1a0202e00d 0x086 globl hidden # rt_sigaction on Lunix; __sigaction_sigtramp() on NetBSD
scall __sys_sigprocmask 0x125030154214900e 0x087 globl hidden # a.k.a. rt_sigprocmask, openbsd:byvalue, a.k.a. pthread_sigmask

View file

@ -6,45 +6,9 @@
//
//===----------------------------------------------------------------------===//
#include <assert.h>
#include <stddef.h>
#include "int_lib.h"
#if __APPLE__
#include <libkern/OSCacheControl.h>
#endif
#if defined(_WIN32)
// Forward declare Win32 APIs since the GCC mode driver does not handle the
// newer SDKs as well as needed.
uint32_t FlushInstructionCache(uintptr_t hProcess, void *lpBaseAddress,
uintptr_t dwSize);
uintptr_t GetCurrentProcess(void);
#endif
#if defined(__FreeBSD__) && defined(__arm__)
// clang-format off
#include <sys/types.h>
#include <machine/sysarch.h>
// clang-format on
#endif
#if defined(__NetBSD__) && defined(__arm__)
#include <machine/sysarch.h>
#endif
#if defined(__OpenBSD__) && defined(__mips__)
// clang-format off
#include <sys/types.h>
#include <machine/sysarch.h>
// clang-format on
#endif
#if defined(__linux__) && defined(__mips__)
#include <sys/cachectl.h>
#include <sys/syscall.h>
#include <unistd.h>
#endif
#include "libc/dce.h"
#include "libc/runtime/syslib.internal.h"
// The compiler generates calls to __clear_cache() when creating
// trampoline functions on the stack for use with nested functions.
@ -52,53 +16,19 @@ uintptr_t GetCurrentProcess(void);
// specified range.
void __clear_cache(void *start, void *end) {
#if __i386__ || __x86_64__ || defined(_M_IX86) || defined(_M_X64)
// Intel processors have a unified instruction and data cache
// so there is nothing to do
#elif defined(_WIN32) && (defined(__arm__) || defined(__aarch64__))
FlushInstructionCache(GetCurrentProcess(), start, end - start);
#elif defined(__arm__) && !defined(__APPLE__)
#if defined(__FreeBSD__) || defined(__NetBSD__)
struct arm_sync_icache_args arg;
arg.addr = (uintptr_t)start;
arg.len = (uintptr_t)end - (uintptr_t)start;
sysarch(ARM_SYNC_ICACHE, &arg);
#elif defined(__linux__)
// We used to include asm/unistd.h for the __ARM_NR_cacheflush define, but
// it also brought many other unused defines, as well as a dependency on
// kernel headers to be installed.
//
// This value is stable at least since Linux 3.13 and should remain so for
// compatibility reasons, warranting it's re-definition here.
#define __ARM_NR_cacheflush 0x0f0002
register int start_reg __asm("r0") = (int)(intptr_t)start;
const register int end_reg __asm("r1") = (int)(intptr_t)end;
const register int flags __asm("r2") = 0;
const register int syscall_nr __asm("r7") = __ARM_NR_cacheflush;
__asm __volatile("svc 0x0"
: "=r"(start_reg)
: "r"(syscall_nr), "r"(start_reg), "r"(end_reg), "r"(flags));
assert(start_reg == 0 && "Cache flush syscall failed.");
#else
compilerrt_abort();
#endif
#elif defined(__linux__) && defined(__mips__)
const uintptr_t start_int = (uintptr_t)start;
const uintptr_t end_int = (uintptr_t)end;
syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE);
#elif defined(__mips__) && defined(__OpenBSD__)
cacheflush(start, (uintptr_t)end - (uintptr_t)start, BCACHE);
#elif defined(__aarch64__) && !defined(__APPLE__)
#ifdef __aarch64__
if (IsXnu()) {
__syslib->__sys_icache_invalidate((char *)start,
(char *)end - (char *)start);
return;
}
uint64_t xstart = (uint64_t)(uintptr_t)start;
uint64_t xend = (uint64_t)(uintptr_t)end;
uint64_t addr;
// Get Cache Type Info
uint64_t ctr_el0;
__asm __volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
// dc & ic instructions must use 64bit registers so we don't use
// uintptr_t in case this runs in an IPL32 environment.
const size_t dcache_line_size = 4 << ((ctr_el0 >> 16) & 15);
@ -106,12 +36,12 @@ void __clear_cache(void *start, void *end) {
addr += dcache_line_size)
__asm __volatile("dc cvau, %0" ::"r"(addr));
__asm __volatile("dsb ish");
const size_t icache_line_size = 4 << ((ctr_el0 >> 0) & 15);
for (addr = xstart & ~(icache_line_size - 1); addr < xend;
addr += icache_line_size)
__asm __volatile("ic ivau, %0" ::"r"(addr));
__asm __volatile("isb sy");
#elif defined(__powerpc64__)
const size_t line_size = 32;
const size_t len = (uintptr_t)end - (uintptr_t)start;
@ -120,29 +50,16 @@ void __clear_cache(void *start, void *end) {
const uintptr_t start_line = ((uintptr_t)start) & mask;
const uintptr_t end_line = ((uintptr_t)start + len + line_size - 1) & mask;
for (uintptr_t line = start_line; line < end_line; line += line_size)
for (uintptr_t line = start_line; line < end_line; line += line_size) {
__asm__ volatile("dcbf 0, %0" : : "r"(line));
}
__asm__ volatile("sync");
for (uintptr_t line = start_line; line < end_line; line += line_size)
for (uintptr_t line = start_line; line < end_line; line += line_size) {
__asm__ volatile("icbi 0, %0" : : "r"(line));
}
__asm__ volatile("isync");
#elif defined(__sparc__)
const size_t dword_size = 8;
const size_t len = (uintptr_t)end - (uintptr_t)start;
const uintptr_t mask = ~(dword_size - 1);
const uintptr_t start_dword = ((uintptr_t)start) & mask;
const uintptr_t end_dword = ((uintptr_t)start + len + dword_size - 1) & mask;
for (uintptr_t dword = start_dword; dword < end_dword; dword += dword_size)
__asm__ volatile("flush %0" : : "r"(dword));
#else
#if __APPLE__
// On Darwin, sys_icache_invalidate() provides this functionality
sys_icache_invalidate(start, end - start);
#else
compilerrt_abort();
#endif
#endif
}

View file

@ -29,6 +29,7 @@ THIRD_PARTY_COMPILER_RT_A_CHECKS = \
THIRD_PARTY_COMPILER_RT_A_DIRECTDEPS = \
LIBC_INTRIN \
LIBC_NEXGEN32E \
LIBC_SYSV
THIRD_PARTY_COMPILER_RT_A_DEPS := \
$(call uniq,$(foreach x,$(THIRD_PARTY_COMPILER_RT_A_DIRECTDEPS),$($(x))))