mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-04-30 10:44:55 +00:00
Get llama.com working on aarch64
This commit is contained in:
parent
4c093155a3
commit
a0237a017c
19 changed files with 321 additions and 157 deletions
|
@ -1361,7 +1361,6 @@ static privileged void AllowMmapExec(struct Filter *f) {
|
||||||
// The flags parameter must not have:
|
// The flags parameter must not have:
|
||||||
//
|
//
|
||||||
// - MAP_LOCKED (0x02000)
|
// - MAP_LOCKED (0x02000)
|
||||||
// - MAP_POPULATE (0x08000)
|
|
||||||
// - MAP_NONBLOCK (0x10000)
|
// - MAP_NONBLOCK (0x10000)
|
||||||
// - MAP_HUGETLB (0x40000)
|
// - MAP_HUGETLB (0x40000)
|
||||||
//
|
//
|
||||||
|
|
|
@ -57,6 +57,49 @@
|
||||||
#include "libc/thread/tls2.h"
|
#include "libc/thread/tls2.h"
|
||||||
#include "libc/vga/vga.internal.h"
|
#include "libc/vga/vga.internal.h"
|
||||||
|
|
||||||
|
#define KGETINT(x, va, t, s) \
|
||||||
|
switch (t) { \
|
||||||
|
case -3: \
|
||||||
|
x = !!va_arg(va, int); \
|
||||||
|
break; \
|
||||||
|
case -2: \
|
||||||
|
if (s) { \
|
||||||
|
x = (signed char)va_arg(va, int); \
|
||||||
|
} else { \
|
||||||
|
x = (unsigned char)va_arg(va, int); \
|
||||||
|
} \
|
||||||
|
break; \
|
||||||
|
case -1: \
|
||||||
|
if (s) { \
|
||||||
|
x = (signed short)va_arg(va, int); \
|
||||||
|
} else { \
|
||||||
|
x = (unsigned short)va_arg(va, int); \
|
||||||
|
} \
|
||||||
|
break; \
|
||||||
|
case 0: \
|
||||||
|
default: \
|
||||||
|
if (s) { \
|
||||||
|
x = va_arg(va, int); \
|
||||||
|
} else { \
|
||||||
|
x = va_arg(va, unsigned int); \
|
||||||
|
} \
|
||||||
|
break; \
|
||||||
|
case 1: \
|
||||||
|
if (s) { \
|
||||||
|
x = va_arg(va, long); \
|
||||||
|
} else { \
|
||||||
|
x = va_arg(va, unsigned long); \
|
||||||
|
} \
|
||||||
|
break; \
|
||||||
|
case 2: \
|
||||||
|
if (s) { \
|
||||||
|
x = va_arg(va, long long); \
|
||||||
|
} else { \
|
||||||
|
x = va_arg(va, unsigned long long); \
|
||||||
|
} \
|
||||||
|
break; \
|
||||||
|
}
|
||||||
|
|
||||||
extern _Hide struct SymbolTable *__symtab;
|
extern _Hide struct SymbolTable *__symtab;
|
||||||
|
|
||||||
privileged static inline char *kadvance(char *p, char *e, long n) {
|
privileged static inline char *kadvance(char *p, char *e, long n) {
|
||||||
|
@ -80,23 +123,6 @@ privileged static char *kemitquote(char *p, char *e, signed char t,
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
privileged static unsigned long long kgetint(va_list va, signed char t,
|
|
||||||
bool s) {
|
|
||||||
int bits;
|
|
||||||
unsigned long long x;
|
|
||||||
x = va_arg(va, unsigned long);
|
|
||||||
if (t <= 0) {
|
|
||||||
bits = 64 - (32 >> MIN(5, -t));
|
|
||||||
x <<= bits;
|
|
||||||
if (s) {
|
|
||||||
x = (signed long)x >> bits;
|
|
||||||
} else {
|
|
||||||
x >>= bits;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return x;
|
|
||||||
}
|
|
||||||
|
|
||||||
privileged static inline bool kiskernelpointer(const void *p) {
|
privileged static inline bool kiskernelpointer(const void *p) {
|
||||||
return 0x7f0000000000 <= (intptr_t)p && (intptr_t)p < 0x800000000000;
|
return 0x7f0000000000 <= (intptr_t)p && (intptr_t)p < 0x800000000000;
|
||||||
}
|
}
|
||||||
|
@ -363,7 +389,7 @@ privileged static size_t kformat(char *b, size_t n, const char *fmt,
|
||||||
s = va_arg(va, int) ? "true" : "false";
|
s = va_arg(va, int) ? "true" : "false";
|
||||||
goto FormatString;
|
goto FormatString;
|
||||||
}
|
}
|
||||||
x = kgetint(va, type, c == 'd');
|
KGETINT(x, va, type, c == 'd');
|
||||||
FormatDecimal:
|
FormatDecimal:
|
||||||
if ((long long)x < 0 && c != 'u') {
|
if ((long long)x < 0 && c != 'u') {
|
||||||
x = -x;
|
x = -x;
|
||||||
|
@ -426,7 +452,7 @@ privileged static size_t kformat(char *b, size_t n, const char *fmt,
|
||||||
base = 1;
|
base = 1;
|
||||||
if (hash) hash = '0' | 'b' << 8;
|
if (hash) hash = '0' | 'b' << 8;
|
||||||
BinaryNumber:
|
BinaryNumber:
|
||||||
x = kgetint(va, type, false);
|
KGETINT(x, va, type, false);
|
||||||
FormatNumber:
|
FormatNumber:
|
||||||
i = 0;
|
i = 0;
|
||||||
m = (1 << base) - 1;
|
m = (1 << base) - 1;
|
||||||
|
|
|
@ -20,14 +20,9 @@
|
||||||
#include "libc/runtime/memtrack.internal.h"
|
#include "libc/runtime/memtrack.internal.h"
|
||||||
#include "libc/thread/thread.h"
|
#include "libc/thread/thread.h"
|
||||||
|
|
||||||
|
#ifdef __x86_64__
|
||||||
|
STATIC_YOINK("_init__mmi");
|
||||||
|
#endif
|
||||||
|
|
||||||
struct MemoryIntervals _mmi;
|
struct MemoryIntervals _mmi;
|
||||||
pthread_mutex_t __mmi_lock_obj; // recursive :'(
|
pthread_mutex_t __mmi_lock_obj; // recursive :'(
|
||||||
|
|
||||||
__attribute__((__constructor__)) void __mmi_init(void) {
|
|
||||||
static bool once;
|
|
||||||
if (once) return;
|
|
||||||
_mmi.n = ARRAYLEN(_mmi.s);
|
|
||||||
_mmi.p = _mmi.s;
|
|
||||||
__mmi_lock_obj._type = PTHREAD_MUTEX_RECURSIVE;
|
|
||||||
once = true;
|
|
||||||
}
|
|
||||||
|
|
26
libc/intrin/mmi.init.S
Normal file
26
libc/intrin/mmi.init.S
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||||
|
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||||
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||||
|
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||||
|
│ │
|
||||||
|
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||||
|
│ any purpose with or without fee is hereby granted, provided that the │
|
||||||
|
│ above copyright notice and this permission notice appear in all copies. │
|
||||||
|
│ │
|
||||||
|
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||||
|
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||||
|
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||||
|
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||||
|
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||||
|
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||||
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||||
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||||
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
|
#include "libc/thread/thread.h"
|
||||||
|
#include "libc/macros.internal.h"
|
||||||
|
|
||||||
|
.init.start 200,_init__mmi
|
||||||
|
movb $OPEN_MAX,_mmi+8
|
||||||
|
movl $_mmi+24,_mmi+16
|
||||||
|
movb $PTHREAD_MUTEX_RECURSIVE,__mmi_lock_obj+4(%rip)
|
||||||
|
.init.end 200,_init__mmi
|
|
@ -56,8 +56,6 @@ sys_clone_linux:
|
||||||
syscall
|
syscall
|
||||||
1: hlt // ctid was corrupted by program!
|
1: hlt // ctid was corrupted by program!
|
||||||
#elif defined(__aarch64__)
|
#elif defined(__aarch64__)
|
||||||
and x1,x1,#-16 // align stack
|
|
||||||
stp x5,x6,[x1,#-16]! // save func and arg
|
|
||||||
mov x8,x3 // swap x3 and x4
|
mov x8,x3 // swap x3 and x4
|
||||||
mov x3,x4 // swap x3 and x4
|
mov x3,x4 // swap x3 and x4
|
||||||
mov x4,x8 // swap x3 and x4
|
mov x4,x8 // swap x3 and x4
|
||||||
|
@ -65,8 +63,8 @@ sys_clone_linux:
|
||||||
svc #0
|
svc #0
|
||||||
cbz x0,2f
|
cbz x0,2f
|
||||||
ret
|
ret
|
||||||
2: ldp x1,x0,[sp],#16 // child thread
|
2: mov x0,x6 // child thread
|
||||||
blr x1
|
blr x5
|
||||||
mov x8,#93 // __NR_exit
|
mov x8,#93 // __NR_exit
|
||||||
svc #0
|
svc #0
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -25,6 +25,7 @@
|
||||||
#include "libc/errno.h"
|
#include "libc/errno.h"
|
||||||
#include "libc/intrin/asan.internal.h"
|
#include "libc/intrin/asan.internal.h"
|
||||||
#include "libc/intrin/describeflags.internal.h"
|
#include "libc/intrin/describeflags.internal.h"
|
||||||
|
#include "libc/intrin/kprintf.h"
|
||||||
#include "libc/intrin/strace.internal.h"
|
#include "libc/intrin/strace.internal.h"
|
||||||
#include "libc/limits.h"
|
#include "libc/limits.h"
|
||||||
#include "libc/macros.internal.h"
|
#include "libc/macros.internal.h"
|
||||||
|
@ -452,7 +453,12 @@ static int CloneLinux(int (*func)(void *arg, int rc), char *stk, size_t stksz,
|
||||||
ctid = (int *)sp;
|
ctid = (int *)sp;
|
||||||
sp -= 8; // experiment
|
sp -= 8; // experiment
|
||||||
}
|
}
|
||||||
sp = sp & -16; // align the stack
|
// align the stack
|
||||||
|
#ifdef __aarch64__
|
||||||
|
sp = sp & -128; // for kernel 4.6 and earlier
|
||||||
|
#else
|
||||||
|
sp = sp & -16;
|
||||||
|
#endif
|
||||||
if ((rc = sys_clone_linux(flags, sp, ptid, ctid, tls, func, arg)) >= 0) {
|
if ((rc = sys_clone_linux(flags, sp, ptid, ctid, tls, func, arg)) >= 0) {
|
||||||
// clone() is documented as setting ptid before return
|
// clone() is documented as setting ptid before return
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -577,6 +583,10 @@ errno_t clone(void *func, void *stk, size_t stksz, int flags, void *arg,
|
||||||
__enable_threads();
|
__enable_threads();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
STRACE("clone(func=%t, stk=%p, stksz=%'zu, flags=%#x, arg=%p, ptid=%p, "
|
||||||
|
"tls=%p, ctid=%p)",
|
||||||
|
func, stk, stksz, flags, arg, ptid, tls, ctid);
|
||||||
|
|
||||||
if (!func) {
|
if (!func) {
|
||||||
rc = EINVAL;
|
rc = EINVAL;
|
||||||
} else if (!IsTiny() &&
|
} else if (!IsTiny() &&
|
||||||
|
|
|
@ -83,19 +83,19 @@ cosmo: push %rbp
|
||||||
call _init
|
call _init
|
||||||
|
|
||||||
// call constructors
|
// call constructors
|
||||||
ezlea __init_array_start,ax // static ctors in forward order
|
ezlea __init_array_end,ax // static ctors in forward order
|
||||||
.weak __init_array_start // could be called multiple times
|
.weak __init_array_end // could be called multiple times
|
||||||
ezlea __init_array_end,cx // idempotency recommended
|
ezlea __init_array_start,cx // idempotency recommended
|
||||||
.weak __init_array_end // @see ape/ape.lds
|
.weak __init_array_start // @see ape/ape.lds
|
||||||
1: cmp %rax,%rcx
|
1: cmp %rax,%rcx
|
||||||
je 2f
|
je 2f
|
||||||
|
sub $8,%rax
|
||||||
push %rax
|
push %rax
|
||||||
push %rcx
|
push %rcx
|
||||||
call .Largs
|
call .Largs
|
||||||
call *(%rax)
|
call *(%rax)
|
||||||
pop %rcx
|
pop %rcx
|
||||||
pop %rax
|
pop %rax
|
||||||
add $8,%rax
|
|
||||||
jmp 1b
|
jmp 1b
|
||||||
|
|
||||||
// call main()
|
// call main()
|
||||||
|
@ -141,7 +141,6 @@ cosmo: push %rbp
|
||||||
push %rsi
|
push %rsi
|
||||||
|
|
||||||
// allocate stack
|
// allocate stack
|
||||||
call __mmi_init
|
|
||||||
movabs $ape_stack_vaddr,%rdi
|
movabs $ape_stack_vaddr,%rdi
|
||||||
mov $ape_stack_memsz,%esi
|
mov $ape_stack_memsz,%esi
|
||||||
mov $ape_stack_prot,%edx
|
mov $ape_stack_prot,%edx
|
||||||
|
|
|
@ -16,8 +16,15 @@
|
||||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
|
#include "libc/intrin/kprintf.h"
|
||||||
|
#include "libc/intrin/strace.internal.h"
|
||||||
|
#include "libc/macros.internal.h"
|
||||||
|
#include "libc/nexgen32e/rdtsc.h"
|
||||||
#include "libc/runtime/internal.h"
|
#include "libc/runtime/internal.h"
|
||||||
|
#include "libc/runtime/memtrack.internal.h"
|
||||||
#include "libc/runtime/runtime.h"
|
#include "libc/runtime/runtime.h"
|
||||||
|
#include "libc/thread/thread.h"
|
||||||
|
#include "libc/thread/tls.h"
|
||||||
#ifndef __x86_64__
|
#ifndef __x86_64__
|
||||||
|
|
||||||
int main(int, char **, char **) __attribute__((__weak__));
|
int main(int, char **, char **) __attribute__((__weak__));
|
||||||
|
@ -40,35 +47,65 @@ typedef int init_f(int argc, char **argv, char **envp, unsigned long *auxv);
|
||||||
extern init_f __strace_init;
|
extern init_f __strace_init;
|
||||||
extern init_f *__init_array_start[] __attribute__((__weak__));
|
extern init_f *__init_array_start[] __attribute__((__weak__));
|
||||||
extern init_f *__init_array_end[] __attribute__((__weak__));
|
extern init_f *__init_array_end[] __attribute__((__weak__));
|
||||||
|
extern uintptr_t ape_idata_iat[] __attribute__((__weak__));
|
||||||
|
extern uintptr_t ape_idata_iatend[] __attribute__((__weak__));
|
||||||
|
extern pthread_mutex_t __mmi_lock_obj;
|
||||||
|
|
||||||
|
struct CosmoTib *tib;
|
||||||
|
|
||||||
void cosmo(long *sp) {
|
void cosmo(long *sp) {
|
||||||
int argc;
|
int argc;
|
||||||
init_f **fp;
|
init_f **fp;
|
||||||
|
uintptr_t *pp;
|
||||||
char **argv, **envp;
|
char **argv, **envp;
|
||||||
unsigned long *auxv;
|
unsigned long *auxv;
|
||||||
|
|
||||||
|
// get startup timestamp as early as possible
|
||||||
|
// its used by --strace and also kprintf() %T
|
||||||
|
kStartTsc = rdtsc();
|
||||||
|
|
||||||
|
// extracts arguments from old sysv stack abi
|
||||||
argc = *sp;
|
argc = *sp;
|
||||||
argv = (char **)(sp + 1);
|
argv = (char **)(sp + 1);
|
||||||
envp = (char **)(sp + 1 + argc + 1);
|
envp = (char **)(sp + 1 + argc + 1);
|
||||||
auxv = (unsigned long *)(sp + 1 + argc + 1);
|
auxv = (unsigned long *)(sp + 1 + argc + 1);
|
||||||
for (;;) {
|
while (*auxv++) donothing;
|
||||||
if (!*auxv++) {
|
|
||||||
break;
|
// needed by kisdangerous()
|
||||||
}
|
__oldstack = (intptr_t)sp;
|
||||||
|
|
||||||
|
// make win32 imps noop
|
||||||
|
for (pp = ape_idata_iat; pp < ape_idata_iatend; ++pp) {
|
||||||
|
*pp = (uintptr_t)_missingno;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// initialize mmap() manager extremely early
|
||||||
|
_mmi.n = ARRAYLEN(_mmi.s);
|
||||||
|
_mmi.p = _mmi.s;
|
||||||
|
__mmi_lock_obj._type = PTHREAD_MUTEX_RECURSIVE;
|
||||||
|
|
||||||
#ifdef SYSDEBUG
|
#ifdef SYSDEBUG
|
||||||
|
// initialize --strace functionality
|
||||||
argc = __strace_init(argc, argv, envp, auxv);
|
argc = __strace_init(argc, argv, envp, auxv);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// set helpful globals
|
||||||
__argc = argc;
|
__argc = argc;
|
||||||
__argv = argv;
|
__argv = argv;
|
||||||
__envp = envp;
|
__envp = envp;
|
||||||
__auxv = auxv;
|
__auxv = auxv;
|
||||||
environ = envp;
|
environ = envp;
|
||||||
if (argc) program_invocation_name = argv[0];
|
if (argc) program_invocation_name = argv[0];
|
||||||
|
|
||||||
|
// run initialization callbacks
|
||||||
_init();
|
_init();
|
||||||
for (fp = __init_array_start; fp < __init_array_end; ++fp) {
|
__enable_tls();
|
||||||
|
for (fp = __init_array_end; fp-- > __init_array_start;) {
|
||||||
(*fp)(argc, argv, envp, auxv);
|
(*fp)(argc, argv, envp, auxv);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// run program
|
||||||
exit(main(argc, argv, envp));
|
exit(main(argc, argv, envp));
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* __aarch64__ */
|
#endif /* __x86_64__ */
|
||||||
|
|
|
@ -62,7 +62,9 @@ static privileged dontinline void FixupLockNops(void) {
|
||||||
|
|
||||||
void __enable_threads(void) {
|
void __enable_threads(void) {
|
||||||
if (__threaded) return;
|
if (__threaded) return;
|
||||||
|
#ifdef __x86_64__
|
||||||
STRACE("__enable_threads()");
|
STRACE("__enable_threads()");
|
||||||
FixupLockNops();
|
FixupLockNops();
|
||||||
|
#endif
|
||||||
__threaded = sys_gettid();
|
__threaded = sys_gettid();
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,17 +23,15 @@
|
||||||
#include "libc/intrin/asancodes.h"
|
#include "libc/intrin/asancodes.h"
|
||||||
#include "libc/intrin/atomic.h"
|
#include "libc/intrin/atomic.h"
|
||||||
#include "libc/intrin/weaken.h"
|
#include "libc/intrin/weaken.h"
|
||||||
#include "libc/log/libfatal.internal.h"
|
|
||||||
#include "libc/macros.internal.h"
|
#include "libc/macros.internal.h"
|
||||||
#include "libc/runtime/internal.h"
|
#include "libc/runtime/internal.h"
|
||||||
#include "libc/runtime/runtime.h"
|
#include "libc/runtime/runtime.h"
|
||||||
|
#include "libc/str/str.h"
|
||||||
#include "libc/thread/posixthread.internal.h"
|
#include "libc/thread/posixthread.internal.h"
|
||||||
#include "libc/thread/thread.h"
|
#include "libc/thread/thread.h"
|
||||||
#include "libc/thread/tls.h"
|
#include "libc/thread/tls.h"
|
||||||
|
|
||||||
#define _TLSZ ((intptr_t)_tls_size)
|
#define I(x) ((uintptr_t)x)
|
||||||
#define _TLDZ ((intptr_t)_tdata_size)
|
|
||||||
#define _TIBZ sizeof(struct CosmoTib)
|
|
||||||
|
|
||||||
extern unsigned char __tls_mov_nt_rax[];
|
extern unsigned char __tls_mov_nt_rax[];
|
||||||
extern unsigned char __tls_add_nt_rax[];
|
extern unsigned char __tls_add_nt_rax[];
|
||||||
|
@ -41,20 +39,34 @@ extern unsigned char __tls_add_nt_rax[];
|
||||||
nsync_dll_list_ _pthread_list;
|
nsync_dll_list_ _pthread_list;
|
||||||
pthread_spinlock_t _pthread_lock;
|
pthread_spinlock_t _pthread_lock;
|
||||||
static struct PosixThread _pthread_main;
|
static struct PosixThread _pthread_main;
|
||||||
_Alignas(TLS_ALIGNMENT) static char __static_tls[5008];
|
_Alignas(TLS_ALIGNMENT) static char __static_tls[6016];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Enables thread local storage for main process.
|
* Enables thread local storage for main process.
|
||||||
*
|
*
|
||||||
* %fs Linux/BSDs
|
* Here's the TLS memory layout on x86_64:
|
||||||
|
*
|
||||||
|
* __get_tls()
|
||||||
* │
|
* │
|
||||||
* _Thread_local │ __get_tls()
|
* %fs Linux/BSDs
|
||||||
|
* _Thread_local │
|
||||||
* ┌───┬──────────┬──────────┼───┐
|
* ┌───┬──────────┬──────────┼───┐
|
||||||
* │pad│ .tdata │ .tbss │tib│
|
* │pad│ .tdata │ .tbss │tib│
|
||||||
* └───┴──────────┴──────────┼───┘
|
* └───┴──────────┴──────────┼───┘
|
||||||
* │
|
* │
|
||||||
* Windows/Mac %gs
|
* Windows/Mac %gs
|
||||||
*
|
*
|
||||||
|
* Here's the TLS memory layout on aarch64:
|
||||||
|
*
|
||||||
|
* %tpidr_el0
|
||||||
|
* │
|
||||||
|
* │ _Thread_local
|
||||||
|
* ┌───┼───┬──────────┬──────────┐
|
||||||
|
* │tib│dtv│ .tdata │ .tbss │
|
||||||
|
* ├───┴───┴──────────┴──────────┘
|
||||||
|
* │
|
||||||
|
* __get_tls()
|
||||||
|
*
|
||||||
* This function is always called by the core runtime to guarantee TLS
|
* This function is always called by the core runtime to guarantee TLS
|
||||||
* is always available to your program. You must build your code using
|
* is always available to your program. You must build your code using
|
||||||
* -mno-tls-direct-seg-refs if you want to use _Thread_local.
|
* -mno-tls-direct-seg-refs if you want to use _Thread_local.
|
||||||
|
@ -81,10 +93,31 @@ _Alignas(TLS_ALIGNMENT) static char __static_tls[5008];
|
||||||
void __enable_tls(void) {
|
void __enable_tls(void) {
|
||||||
int tid;
|
int tid;
|
||||||
size_t siz;
|
size_t siz;
|
||||||
struct CosmoTib *tib;
|
|
||||||
char *mem, *tls;
|
char *mem, *tls;
|
||||||
|
struct CosmoTib *tib;
|
||||||
|
|
||||||
siz = ROUNDUP(_TLSZ + _TIBZ, _Alignof(__static_tls));
|
// Here's the layout we're currently using:
|
||||||
|
//
|
||||||
|
// .align PAGESIZE
|
||||||
|
// _tdata_start:
|
||||||
|
// .tdata
|
||||||
|
// _tdata_size = . - _tdata_start
|
||||||
|
// .align PAGESIZE
|
||||||
|
// _tbss_start:
|
||||||
|
// _tdata_start + _tbss_offset:
|
||||||
|
// .tbss
|
||||||
|
// .align TLS_ALIGNMENT
|
||||||
|
// _tbss_size = . - _tbss_start
|
||||||
|
// _tbss_end:
|
||||||
|
// _tbss_start + _tbss_size:
|
||||||
|
// _tdata_start + _tls_size:
|
||||||
|
//
|
||||||
|
_unassert(_tbss_start == _tdata_start + I(_tbss_offset));
|
||||||
|
_unassert(_tbss_start + I(_tbss_size) == _tdata_start + I(_tls_size));
|
||||||
|
|
||||||
|
#ifdef __x86_64__
|
||||||
|
|
||||||
|
siz = ROUNDUP(I(_tls_size) + sizeof(*tib), _Alignof(__static_tls));
|
||||||
if (siz <= sizeof(__static_tls)) {
|
if (siz <= sizeof(__static_tls)) {
|
||||||
// if tls requirement is small then use the static tls block
|
// if tls requirement is small then use the static tls block
|
||||||
// which helps avoid a system call for appes with little tls
|
// which helps avoid a system call for appes with little tls
|
||||||
|
@ -103,14 +136,52 @@ void __enable_tls(void) {
|
||||||
|
|
||||||
if (IsAsan()) {
|
if (IsAsan()) {
|
||||||
// poison the space between .tdata and .tbss
|
// poison the space between .tdata and .tbss
|
||||||
__asan_poison(mem + (intptr_t)_tdata_size,
|
__asan_poison(mem + I(_tdata_size), I(_tbss_offset) - I(_tdata_size),
|
||||||
(intptr_t)_tbss_offset - (intptr_t)_tdata_size,
|
|
||||||
kAsanProtected);
|
kAsanProtected);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tib = (struct CosmoTib *)(mem + siz - sizeof(*tib));
|
||||||
|
tls = mem + siz - sizeof(*tib) - I(_tls_size);
|
||||||
|
|
||||||
|
#elif defined(__aarch64__)
|
||||||
|
|
||||||
|
siz = ROUNDUP(sizeof(*tib) + 2 * sizeof(void *) + I(_tls_size),
|
||||||
|
_Alignof(__static_tls));
|
||||||
|
if (siz <= sizeof(__static_tls)) {
|
||||||
|
mem = __static_tls;
|
||||||
|
} else {
|
||||||
|
_npassert(_weaken(_mapanon));
|
||||||
|
siz = ROUNDUP(siz, FRAMESIZE);
|
||||||
|
mem = _weaken(_mapanon)(siz);
|
||||||
|
_npassert(mem);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (IsAsan()) {
|
||||||
|
// there's a roundup(pagesize) gap between .tdata and .tbss
|
||||||
|
// poison that empty space
|
||||||
|
__asan_poison(mem + sizeof(*tib) + 2 * sizeof(void *) + I(_tdata_size),
|
||||||
|
I(_tbss_offset) - I(_tdata_size), kAsanProtected);
|
||||||
|
}
|
||||||
|
|
||||||
|
tib = (struct CosmoTib *)mem;
|
||||||
|
tls = mem + sizeof(*tib) + 2 * sizeof(void *);
|
||||||
|
|
||||||
|
// Set the DTV.
|
||||||
|
//
|
||||||
|
// We don't support dynamic shared objects at the moment. The APE
|
||||||
|
// linker script will only produce a single PT_TLS program header
|
||||||
|
// therefore our job is relatively simple.
|
||||||
|
//
|
||||||
|
// @see musl/src/env/__init_tls.c
|
||||||
|
// @see https://chao-tic.github.io/blog/2018/12/25/tls
|
||||||
|
((uintptr_t *)tls)[-2] = 1;
|
||||||
|
((void **)tls)[-1] = tls;
|
||||||
|
|
||||||
|
#else
|
||||||
|
#error "unsupported architecture"
|
||||||
|
#endif /* __x86_64__ */
|
||||||
|
|
||||||
// initialize main thread tls memory
|
// initialize main thread tls memory
|
||||||
tib = (struct CosmoTib *)(mem + siz - _TIBZ);
|
|
||||||
tls = mem + siz - _TIBZ - _TLSZ;
|
|
||||||
tib->tib_self = tib;
|
tib->tib_self = tib;
|
||||||
tib->tib_self2 = tib;
|
tib->tib_self2 = tib;
|
||||||
tib->tib_errno = __errno;
|
tib->tib_errno = __errno;
|
||||||
|
@ -135,7 +206,9 @@ void __enable_tls(void) {
|
||||||
atomic_store_explicit(&_pthread_main.ptid, tid, memory_order_relaxed);
|
atomic_store_explicit(&_pthread_main.ptid, tid, memory_order_relaxed);
|
||||||
|
|
||||||
// copy in initialized data section
|
// copy in initialized data section
|
||||||
__repmovsb(tls, _tdata_start, _TLDZ);
|
if (I(_tdata_size)) {
|
||||||
|
memcpy(tls, _tdata_start, I(_tdata_size));
|
||||||
|
}
|
||||||
|
|
||||||
// ask the operating system to change the x86 segment register
|
// ask the operating system to change the x86 segment register
|
||||||
__set_tls(tib);
|
__set_tls(tib);
|
||||||
|
|
|
@ -27,6 +27,7 @@
|
||||||
int sys_set_tls();
|
int sys_set_tls();
|
||||||
|
|
||||||
void __set_tls(struct CosmoTib *tib) {
|
void __set_tls(struct CosmoTib *tib) {
|
||||||
|
tib = __adj_tls(tib);
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
// ask the operating system to change the x86 segment register
|
// ask the operating system to change the x86 segment register
|
||||||
int ax, dx;
|
int ax, dx;
|
||||||
|
@ -58,6 +59,6 @@ void __set_tls(struct CosmoTib *tib) {
|
||||||
"d"((uint32_t)(val >> 32)));
|
"d"((uint32_t)(val >> 32)));
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
asm volatile("msr\ttpidr_el0,%0" : /* no outputs */ : "r"(tib + 1));
|
asm volatile("msr\ttpidr_el0,%0" : /* no outputs */ : "r"(tib));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,18 +28,30 @@
|
||||||
#include "libc/thread/spawn.h"
|
#include "libc/thread/spawn.h"
|
||||||
#include "libc/thread/tls.h"
|
#include "libc/thread/tls.h"
|
||||||
|
|
||||||
#define I(x) ((intptr_t)x)
|
#define I(x) ((uintptr_t)x)
|
||||||
|
|
||||||
void Bzero(void *, size_t) asm("bzero"); // gcc bug
|
void Bzero(void *, size_t) asm("bzero"); // gcc bug
|
||||||
|
|
||||||
/**
|
static char *_mktls_finish(struct CosmoTib **out_tib, char *mem,
|
||||||
* Allocates thread-local storage memory for new thread.
|
struct CosmoTib *tib) {
|
||||||
* @return buffer that must be released with free()
|
struct CosmoTib *old;
|
||||||
*/
|
old = __get_tls();
|
||||||
char *_mktls(struct CosmoTib **out_tib) {
|
Bzero(tib, sizeof(*tib));
|
||||||
|
tib->tib_self = tib;
|
||||||
|
tib->tib_self2 = tib;
|
||||||
|
tib->tib_ftrace = old->tib_ftrace;
|
||||||
|
tib->tib_strace = old->tib_strace;
|
||||||
|
tib->tib_sigmask = old->tib_sigmask;
|
||||||
|
atomic_store_explicit(&tib->tib_tid, -1, memory_order_relaxed);
|
||||||
|
if (out_tib) {
|
||||||
|
*out_tib = tib;
|
||||||
|
}
|
||||||
|
return mem;
|
||||||
|
}
|
||||||
|
|
||||||
|
static char *_mktls_below(struct CosmoTib **out_tib) {
|
||||||
char *tls;
|
char *tls;
|
||||||
struct CosmoTib *neu, *old;
|
struct CosmoTib *neu;
|
||||||
__require_tls();
|
|
||||||
|
|
||||||
// allocate memory for tdata, tbss, and tib
|
// allocate memory for tdata, tbss, and tib
|
||||||
tls = memalign(TLS_ALIGNMENT, I(_tls_size) + sizeof(struct CosmoTib));
|
tls = memalign(TLS_ALIGNMENT, I(_tls_size) + sizeof(struct CosmoTib));
|
||||||
|
@ -51,22 +63,67 @@ char *_mktls(struct CosmoTib **out_tib) {
|
||||||
kAsanProtected);
|
kAsanProtected);
|
||||||
}
|
}
|
||||||
|
|
||||||
// initialize tdata and clear tbss
|
// initialize .tdata
|
||||||
|
if (I(_tdata_size)) {
|
||||||
memmove(tls, _tdata_start, I(_tdata_size));
|
memmove(tls, _tdata_start, I(_tdata_size));
|
||||||
Bzero(tls + I(_tbss_offset), I(_tbss_size) + sizeof(struct CosmoTib));
|
}
|
||||||
|
|
||||||
|
// clear .tbss
|
||||||
|
Bzero(tls + I(_tbss_offset), I(_tbss_size));
|
||||||
|
|
||||||
// set up thread information block
|
// set up thread information block
|
||||||
old = __get_tls();
|
return _mktls_finish(out_tib, tls, (struct CosmoTib *)(tls + I(_tls_size)));
|
||||||
neu = (struct CosmoTib *)(tls + I(_tls_size));
|
}
|
||||||
neu->tib_self = neu;
|
|
||||||
neu->tib_self2 = neu;
|
static char *_mktls_above(struct CosmoTib **out_tib) {
|
||||||
neu->tib_ftrace = old->tib_ftrace;
|
size_t siz;
|
||||||
neu->tib_strace = old->tib_strace;
|
char *mem, *dtv, *tls;
|
||||||
neu->tib_sigmask = old->tib_sigmask;
|
struct CosmoTib *tib, *old;
|
||||||
atomic_store_explicit(&neu->tib_tid, -1, memory_order_relaxed);
|
|
||||||
|
// allocate memory for tdata, tbss, and tib
|
||||||
if (out_tib) {
|
siz = ROUNDUP(sizeof(struct CosmoTib) + 2 * sizeof(void *) + I(_tls_size),
|
||||||
*out_tib = neu;
|
TLS_ALIGNMENT);
|
||||||
}
|
mem = memalign(TLS_ALIGNMENT, siz);
|
||||||
return tls;
|
if (!mem) return 0;
|
||||||
|
|
||||||
|
// poison memory between tdata and tbss
|
||||||
|
if (IsAsan()) {
|
||||||
|
__asan_poison(
|
||||||
|
mem + sizeof(struct CosmoTib) + 2 * sizeof(void *) + I(_tdata_size),
|
||||||
|
I(_tbss_offset) - I(_tdata_size), kAsanProtected);
|
||||||
|
}
|
||||||
|
|
||||||
|
tib = (struct CosmoTib *)mem;
|
||||||
|
dtv = mem + sizeof(*tib);
|
||||||
|
tls = dtv + 2 * sizeof(void *);
|
||||||
|
|
||||||
|
// set dtv
|
||||||
|
((uintptr_t *)dtv)[0] = 1;
|
||||||
|
((void **)dtv)[1] = tls;
|
||||||
|
|
||||||
|
// initialize .tdata
|
||||||
|
if (I(_tdata_size)) {
|
||||||
|
memmove(tls, _tdata_start, I(_tdata_size));
|
||||||
|
}
|
||||||
|
|
||||||
|
// clear .tbss
|
||||||
|
if (I(_tbss_size)) {
|
||||||
|
Bzero(tls + I(_tbss_offset), I(_tbss_size));
|
||||||
|
}
|
||||||
|
|
||||||
|
// set up thread information block
|
||||||
|
return _mktls_finish(out_tib, mem, tib);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allocates thread-local storage memory for new thread.
|
||||||
|
* @return buffer that must be released with free()
|
||||||
|
*/
|
||||||
|
char *_mktls(struct CosmoTib **out_tib) {
|
||||||
|
__require_tls();
|
||||||
|
#ifdef __x86_64__
|
||||||
|
return _mktls_below(out_tib);
|
||||||
|
#else
|
||||||
|
return _mktls_above(out_tib);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
|
@ -255,9 +255,10 @@ static errno_t pthread_create_impl(pthread_t *thread,
|
||||||
if ((rc = clone(PosixThread, pt->attr.__stackaddr,
|
if ((rc = clone(PosixThread, pt->attr.__stackaddr,
|
||||||
pt->attr.__stacksize - (IsOpenbsd() ? 16 : 0),
|
pt->attr.__stacksize - (IsOpenbsd() ? 16 : 0),
|
||||||
CLONE_VM | CLONE_THREAD | CLONE_FS | CLONE_FILES |
|
CLONE_VM | CLONE_THREAD | CLONE_FS | CLONE_FILES |
|
||||||
CLONE_SIGHAND | CLONE_SETTLS | CLONE_PARENT_SETTID |
|
CLONE_SIGHAND | CLONE_SYSVSEM | CLONE_SETTLS |
|
||||||
CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID,
|
CLONE_PARENT_SETTID | CLONE_CHILD_SETTID |
|
||||||
pt, &pt->ptid, pt->tib, &pt->tib->tib_tid))) {
|
CLONE_CHILD_CLEARTID,
|
||||||
|
pt, &pt->ptid, __adj_tls(pt->tib), &pt->tib->tib_tid))) {
|
||||||
pthread_spin_lock(&_pthread_lock);
|
pthread_spin_lock(&_pthread_lock);
|
||||||
_pthread_list = nsync_dll_remove_(_pthread_list, &pt->list);
|
_pthread_list = nsync_dll_remove_(_pthread_list, &pt->list);
|
||||||
pthread_spin_unlock(&_pthread_lock);
|
pthread_spin_unlock(&_pthread_lock);
|
||||||
|
|
|
@ -129,7 +129,7 @@ int _spawn(int fun(void *, int), void *arg, struct spawn *opt_out_thread) {
|
||||||
CLONE_VM | CLONE_THREAD | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
|
CLONE_VM | CLONE_THREAD | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
|
||||||
CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID |
|
CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID |
|
||||||
CLONE_CHILD_CLEARTID,
|
CLONE_CHILD_CLEARTID,
|
||||||
spawner, &th->ptid, th->tib, &th->tib->tib_tid);
|
spawner, &th->ptid, __adj_tls(th->tib), &th->tib->tib_tid);
|
||||||
if (rc) {
|
if (rc) {
|
||||||
errno = rc;
|
errno = rc;
|
||||||
_freestack(th->stk);
|
_freestack(th->stk);
|
||||||
|
|
|
@ -57,8 +57,10 @@ void __set_tls(struct CosmoTib *);
|
||||||
asm("mov\t%%fs:0,%0" : "=r"(_t) : /* no inputs */ : "memory"); \
|
asm("mov\t%%fs:0,%0" : "=r"(_t) : /* no inputs */ : "memory"); \
|
||||||
_t; \
|
_t; \
|
||||||
})
|
})
|
||||||
#else
|
#define __adj_tls(tib) (tib)
|
||||||
#define __get_tls() ((struct CosmoTib *)__builtin_thread_pointer())
|
#elif defined(__aarch64__)
|
||||||
|
#define __get_tls() ((struct CosmoTib *)__builtin_thread_pointer() - 1)
|
||||||
|
#define __adj_tls(tib) ((struct CosmoTib *)(tib) + 1)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
COSMOPOLITAN_C_END_
|
COSMOPOLITAN_C_END_
|
||||||
|
|
|
@ -37,7 +37,7 @@ static noasan inline void __set_tls_win32(void *tls) {
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif defined(__aarch64__)
|
#elif defined(__aarch64__)
|
||||||
#define __get_tls_privileged() ((struct CosmoTib *)__builtin_thread_pointer())
|
#define __get_tls_privileged() __get_tls()
|
||||||
#define __get_tls_win32() ((struct CosmoTib *)0)
|
#define __get_tls_win32() ((struct CosmoTib *)0)
|
||||||
#define __set_tls_win32(tls) (void)0
|
#define __set_tls_win32(tls) (void)0
|
||||||
#endif /* GNU x86-64 */
|
#endif /* GNU x86-64 */
|
||||||
|
|
|
@ -58,7 +58,7 @@ static inline uint64_t mul64(uint64_t a, uint64_t b)
|
||||||
*/
|
*/
|
||||||
double sqrt(double x)
|
double sqrt(double x)
|
||||||
{
|
{
|
||||||
#ifdef __SSE2__
|
#if defined(__x86_64__) && defined(__SSE2__)
|
||||||
|
|
||||||
asm("sqrtsd\t%1,%0" : "=x"(x) : "x"(x));
|
asm("sqrtsd\t%1,%0" : "=x"(x) : "x"(x));
|
||||||
return x;
|
return x;
|
||||||
|
@ -218,5 +218,5 @@ double sqrt(double x)
|
||||||
}
|
}
|
||||||
return y;
|
return y;
|
||||||
|
|
||||||
#endif /* __SSE2__ */
|
#endif /* __x86_64__ */
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,12 +16,12 @@
|
||||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
|
#include "libc/intrin/kprintf.h"
|
||||||
#include "libc/calls/calls.h"
|
#include "libc/calls/calls.h"
|
||||||
#include "libc/dce.h"
|
#include "libc/dce.h"
|
||||||
#include "libc/errno.h"
|
#include "libc/errno.h"
|
||||||
#include "libc/fmt/fmt.h"
|
#include "libc/fmt/fmt.h"
|
||||||
#include "libc/intrin/bits.h"
|
#include "libc/intrin/bits.h"
|
||||||
#include "libc/intrin/kprintf.h"
|
|
||||||
#include "libc/limits.h"
|
#include "libc/limits.h"
|
||||||
#include "libc/log/libfatal.internal.h"
|
#include "libc/log/libfatal.internal.h"
|
||||||
#include "libc/macros.internal.h"
|
#include "libc/macros.internal.h"
|
||||||
|
|
62
third_party/ggml/ggml.c
vendored
62
third_party/ggml/ggml.c
vendored
|
@ -47,69 +47,7 @@ GGML (MIT License)\\n\
|
||||||
Copyright (c) 2023 Georgi Gerganov\"");
|
Copyright (c) 2023 Georgi Gerganov\"");
|
||||||
asm(".include \"libc/disclaimer.inc\"");
|
asm(".include \"libc/disclaimer.inc\"");
|
||||||
// clang-format off
|
// clang-format off
|
||||||
|
|
||||||
#if defined(_WIN32)
|
|
||||||
|
|
||||||
typedef volatile LONG atomic_int;
|
|
||||||
typedef atomic_int atomic_bool;
|
|
||||||
|
|
||||||
static void atomic_store(atomic_int* ptr, LONG val) {
|
|
||||||
InterlockedExchange(ptr, val);
|
|
||||||
}
|
|
||||||
static LONG atomic_load(atomic_int* ptr) {
|
|
||||||
return InterlockedCompareExchange(ptr, 0, 0);
|
|
||||||
}
|
|
||||||
static LONG atomic_fetch_add(atomic_int* ptr, LONG inc) {
|
|
||||||
return InterlockedExchangeAdd(ptr, inc);
|
|
||||||
}
|
|
||||||
static LONG atomic_fetch_sub(atomic_int* ptr, LONG dec) {
|
|
||||||
return atomic_fetch_add(ptr, -(dec));
|
|
||||||
}
|
|
||||||
|
|
||||||
typedef HANDLE pthread_t;
|
|
||||||
|
|
||||||
typedef DWORD thread_ret_t;
|
|
||||||
static int pthread_create(pthread_t* out, void* unused, thread_ret_t(*func)(void*), void* arg) {
|
|
||||||
(void) unused;
|
|
||||||
HANDLE handle = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) func, arg, 0, NULL);
|
|
||||||
if (handle == NULL)
|
|
||||||
{
|
|
||||||
return EAGAIN;
|
|
||||||
}
|
|
||||||
|
|
||||||
*out = handle;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int pthread_join(pthread_t thread, void* unused) {
|
|
||||||
(void) unused;
|
|
||||||
return (int) WaitForSingleObject(thread, INFINITE);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int sched_yield (void) {
|
|
||||||
Sleep (0);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
typedef void* thread_ret_t;
|
typedef void* thread_ret_t;
|
||||||
#endif
|
|
||||||
|
|
||||||
// __FMA__ and __F16C__ are not defined in MSVC, however they are implied with AVX2/AVX512
|
|
||||||
#if defined(_MSC_VER) && (defined(__AVX2__) || defined(__AVX512F__))
|
|
||||||
#ifndef __FMA__
|
|
||||||
#define __FMA__
|
|
||||||
#endif
|
|
||||||
#ifndef __F16C__
|
|
||||||
#define __F16C__
|
|
||||||
#endif
|
|
||||||
#ifndef __SSE3__
|
|
||||||
#define __SSE3__
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef __HAIKU__
|
|
||||||
#define static_assert(cond, msg) _Static_assert(cond, msg)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*#define GGML_PERF*/
|
/*#define GGML_PERF*/
|
||||||
#define GGML_DEBUG 0
|
#define GGML_DEBUG 0
|
||||||
|
|
Loading…
Add table
Reference in a new issue