mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-06-27 14:58:30 +00:00
Get llama.com working on aarch64
This commit is contained in:
parent
4c093155a3
commit
a0237a017c
19 changed files with 321 additions and 157 deletions
|
@ -56,8 +56,6 @@ sys_clone_linux:
|
|||
syscall
|
||||
1: hlt // ctid was corrupted by program!
|
||||
#elif defined(__aarch64__)
|
||||
and x1,x1,#-16 // align stack
|
||||
stp x5,x6,[x1,#-16]! // save func and arg
|
||||
mov x8,x3 // swap x3 and x4
|
||||
mov x3,x4 // swap x3 and x4
|
||||
mov x4,x8 // swap x3 and x4
|
||||
|
@ -65,8 +63,8 @@ sys_clone_linux:
|
|||
svc #0
|
||||
cbz x0,2f
|
||||
ret
|
||||
2: ldp x1,x0,[sp],#16 // child thread
|
||||
blr x1
|
||||
2: mov x0,x6 // child thread
|
||||
blr x5
|
||||
mov x8,#93 // __NR_exit
|
||||
svc #0
|
||||
#else
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include "libc/errno.h"
|
||||
#include "libc/intrin/asan.internal.h"
|
||||
#include "libc/intrin/describeflags.internal.h"
|
||||
#include "libc/intrin/kprintf.h"
|
||||
#include "libc/intrin/strace.internal.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/macros.internal.h"
|
||||
|
@ -452,7 +453,12 @@ static int CloneLinux(int (*func)(void *arg, int rc), char *stk, size_t stksz,
|
|||
ctid = (int *)sp;
|
||||
sp -= 8; // experiment
|
||||
}
|
||||
sp = sp & -16; // align the stack
|
||||
// align the stack
|
||||
#ifdef __aarch64__
|
||||
sp = sp & -128; // for kernel 4.6 and earlier
|
||||
#else
|
||||
sp = sp & -16;
|
||||
#endif
|
||||
if ((rc = sys_clone_linux(flags, sp, ptid, ctid, tls, func, arg)) >= 0) {
|
||||
// clone() is documented as setting ptid before return
|
||||
return 0;
|
||||
|
@ -577,6 +583,10 @@ errno_t clone(void *func, void *stk, size_t stksz, int flags, void *arg,
|
|||
__enable_threads();
|
||||
}
|
||||
|
||||
STRACE("clone(func=%t, stk=%p, stksz=%'zu, flags=%#x, arg=%p, ptid=%p, "
|
||||
"tls=%p, ctid=%p)",
|
||||
func, stk, stksz, flags, arg, ptid, tls, ctid);
|
||||
|
||||
if (!func) {
|
||||
rc = EINVAL;
|
||||
} else if (!IsTiny() &&
|
||||
|
|
|
@ -83,19 +83,19 @@ cosmo: push %rbp
|
|||
call _init
|
||||
|
||||
// call constructors
|
||||
ezlea __init_array_start,ax // static ctors in forward order
|
||||
.weak __init_array_start // could be called multiple times
|
||||
ezlea __init_array_end,cx // idempotency recommended
|
||||
.weak __init_array_end // @see ape/ape.lds
|
||||
ezlea __init_array_end,ax // static ctors in forward order
|
||||
.weak __init_array_end // could be called multiple times
|
||||
ezlea __init_array_start,cx // idempotency recommended
|
||||
.weak __init_array_start // @see ape/ape.lds
|
||||
1: cmp %rax,%rcx
|
||||
je 2f
|
||||
sub $8,%rax
|
||||
push %rax
|
||||
push %rcx
|
||||
call .Largs
|
||||
call *(%rax)
|
||||
pop %rcx
|
||||
pop %rax
|
||||
add $8,%rax
|
||||
jmp 1b
|
||||
|
||||
// call main()
|
||||
|
@ -141,7 +141,6 @@ cosmo: push %rbp
|
|||
push %rsi
|
||||
|
||||
// allocate stack
|
||||
call __mmi_init
|
||||
movabs $ape_stack_vaddr,%rdi
|
||||
mov $ape_stack_memsz,%esi
|
||||
mov $ape_stack_prot,%edx
|
||||
|
|
|
@ -16,8 +16,15 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/kprintf.h"
|
||||
#include "libc/intrin/strace.internal.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/nexgen32e/rdtsc.h"
|
||||
#include "libc/runtime/internal.h"
|
||||
#include "libc/runtime/memtrack.internal.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/thread/thread.h"
|
||||
#include "libc/thread/tls.h"
|
||||
#ifndef __x86_64__
|
||||
|
||||
int main(int, char **, char **) __attribute__((__weak__));
|
||||
|
@ -40,35 +47,65 @@ typedef int init_f(int argc, char **argv, char **envp, unsigned long *auxv);
|
|||
extern init_f __strace_init;
|
||||
extern init_f *__init_array_start[] __attribute__((__weak__));
|
||||
extern init_f *__init_array_end[] __attribute__((__weak__));
|
||||
extern uintptr_t ape_idata_iat[] __attribute__((__weak__));
|
||||
extern uintptr_t ape_idata_iatend[] __attribute__((__weak__));
|
||||
extern pthread_mutex_t __mmi_lock_obj;
|
||||
|
||||
struct CosmoTib *tib;
|
||||
|
||||
void cosmo(long *sp) {
|
||||
int argc;
|
||||
init_f **fp;
|
||||
uintptr_t *pp;
|
||||
char **argv, **envp;
|
||||
unsigned long *auxv;
|
||||
|
||||
// get startup timestamp as early as possible
|
||||
// its used by --strace and also kprintf() %T
|
||||
kStartTsc = rdtsc();
|
||||
|
||||
// extracts arguments from old sysv stack abi
|
||||
argc = *sp;
|
||||
argv = (char **)(sp + 1);
|
||||
envp = (char **)(sp + 1 + argc + 1);
|
||||
auxv = (unsigned long *)(sp + 1 + argc + 1);
|
||||
for (;;) {
|
||||
if (!*auxv++) {
|
||||
break;
|
||||
}
|
||||
while (*auxv++) donothing;
|
||||
|
||||
// needed by kisdangerous()
|
||||
__oldstack = (intptr_t)sp;
|
||||
|
||||
// make win32 imps noop
|
||||
for (pp = ape_idata_iat; pp < ape_idata_iatend; ++pp) {
|
||||
*pp = (uintptr_t)_missingno;
|
||||
}
|
||||
|
||||
// initialize mmap() manager extremely early
|
||||
_mmi.n = ARRAYLEN(_mmi.s);
|
||||
_mmi.p = _mmi.s;
|
||||
__mmi_lock_obj._type = PTHREAD_MUTEX_RECURSIVE;
|
||||
|
||||
#ifdef SYSDEBUG
|
||||
// initialize --strace functionality
|
||||
argc = __strace_init(argc, argv, envp, auxv);
|
||||
#endif
|
||||
|
||||
// set helpful globals
|
||||
__argc = argc;
|
||||
__argv = argv;
|
||||
__envp = envp;
|
||||
__auxv = auxv;
|
||||
environ = envp;
|
||||
if (argc) program_invocation_name = argv[0];
|
||||
|
||||
// run initialization callbacks
|
||||
_init();
|
||||
for (fp = __init_array_start; fp < __init_array_end; ++fp) {
|
||||
__enable_tls();
|
||||
for (fp = __init_array_end; fp-- > __init_array_start;) {
|
||||
(*fp)(argc, argv, envp, auxv);
|
||||
}
|
||||
|
||||
// run program
|
||||
exit(main(argc, argv, envp));
|
||||
}
|
||||
|
||||
#endif /* __aarch64__ */
|
||||
#endif /* __x86_64__ */
|
||||
|
|
|
@ -62,7 +62,9 @@ static privileged dontinline void FixupLockNops(void) {
|
|||
|
||||
void __enable_threads(void) {
|
||||
if (__threaded) return;
|
||||
#ifdef __x86_64__
|
||||
STRACE("__enable_threads()");
|
||||
FixupLockNops();
|
||||
#endif
|
||||
__threaded = sys_gettid();
|
||||
}
|
||||
|
|
|
@ -23,17 +23,15 @@
|
|||
#include "libc/intrin/asancodes.h"
|
||||
#include "libc/intrin/atomic.h"
|
||||
#include "libc/intrin/weaken.h"
|
||||
#include "libc/log/libfatal.internal.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/runtime/internal.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/thread/posixthread.internal.h"
|
||||
#include "libc/thread/thread.h"
|
||||
#include "libc/thread/tls.h"
|
||||
|
||||
#define _TLSZ ((intptr_t)_tls_size)
|
||||
#define _TLDZ ((intptr_t)_tdata_size)
|
||||
#define _TIBZ sizeof(struct CosmoTib)
|
||||
#define I(x) ((uintptr_t)x)
|
||||
|
||||
extern unsigned char __tls_mov_nt_rax[];
|
||||
extern unsigned char __tls_add_nt_rax[];
|
||||
|
@ -41,20 +39,34 @@ extern unsigned char __tls_add_nt_rax[];
|
|||
nsync_dll_list_ _pthread_list;
|
||||
pthread_spinlock_t _pthread_lock;
|
||||
static struct PosixThread _pthread_main;
|
||||
_Alignas(TLS_ALIGNMENT) static char __static_tls[5008];
|
||||
_Alignas(TLS_ALIGNMENT) static char __static_tls[6016];
|
||||
|
||||
/**
|
||||
* Enables thread local storage for main process.
|
||||
*
|
||||
* %fs Linux/BSDs
|
||||
* Here's the TLS memory layout on x86_64:
|
||||
*
|
||||
* __get_tls()
|
||||
* │
|
||||
* _Thread_local │ __get_tls()
|
||||
* %fs Linux/BSDs
|
||||
* _Thread_local │
|
||||
* ┌───┬──────────┬──────────┼───┐
|
||||
* │pad│ .tdata │ .tbss │tib│
|
||||
* └───┴──────────┴──────────┼───┘
|
||||
* │
|
||||
* Windows/Mac %gs
|
||||
*
|
||||
* Here's the TLS memory layout on aarch64:
|
||||
*
|
||||
* %tpidr_el0
|
||||
* │
|
||||
* │ _Thread_local
|
||||
* ┌───┼───┬──────────┬──────────┐
|
||||
* │tib│dtv│ .tdata │ .tbss │
|
||||
* ├───┴───┴──────────┴──────────┘
|
||||
* │
|
||||
* __get_tls()
|
||||
*
|
||||
* This function is always called by the core runtime to guarantee TLS
|
||||
* is always available to your program. You must build your code using
|
||||
* -mno-tls-direct-seg-refs if you want to use _Thread_local.
|
||||
|
@ -81,10 +93,31 @@ _Alignas(TLS_ALIGNMENT) static char __static_tls[5008];
|
|||
void __enable_tls(void) {
|
||||
int tid;
|
||||
size_t siz;
|
||||
struct CosmoTib *tib;
|
||||
char *mem, *tls;
|
||||
struct CosmoTib *tib;
|
||||
|
||||
siz = ROUNDUP(_TLSZ + _TIBZ, _Alignof(__static_tls));
|
||||
// Here's the layout we're currently using:
|
||||
//
|
||||
// .align PAGESIZE
|
||||
// _tdata_start:
|
||||
// .tdata
|
||||
// _tdata_size = . - _tdata_start
|
||||
// .align PAGESIZE
|
||||
// _tbss_start:
|
||||
// _tdata_start + _tbss_offset:
|
||||
// .tbss
|
||||
// .align TLS_ALIGNMENT
|
||||
// _tbss_size = . - _tbss_start
|
||||
// _tbss_end:
|
||||
// _tbss_start + _tbss_size:
|
||||
// _tdata_start + _tls_size:
|
||||
//
|
||||
_unassert(_tbss_start == _tdata_start + I(_tbss_offset));
|
||||
_unassert(_tbss_start + I(_tbss_size) == _tdata_start + I(_tls_size));
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
siz = ROUNDUP(I(_tls_size) + sizeof(*tib), _Alignof(__static_tls));
|
||||
if (siz <= sizeof(__static_tls)) {
|
||||
// if tls requirement is small then use the static tls block
|
||||
// which helps avoid a system call for appes with little tls
|
||||
|
@ -103,14 +136,52 @@ void __enable_tls(void) {
|
|||
|
||||
if (IsAsan()) {
|
||||
// poison the space between .tdata and .tbss
|
||||
__asan_poison(mem + (intptr_t)_tdata_size,
|
||||
(intptr_t)_tbss_offset - (intptr_t)_tdata_size,
|
||||
__asan_poison(mem + I(_tdata_size), I(_tbss_offset) - I(_tdata_size),
|
||||
kAsanProtected);
|
||||
}
|
||||
|
||||
tib = (struct CosmoTib *)(mem + siz - sizeof(*tib));
|
||||
tls = mem + siz - sizeof(*tib) - I(_tls_size);
|
||||
|
||||
#elif defined(__aarch64__)
|
||||
|
||||
siz = ROUNDUP(sizeof(*tib) + 2 * sizeof(void *) + I(_tls_size),
|
||||
_Alignof(__static_tls));
|
||||
if (siz <= sizeof(__static_tls)) {
|
||||
mem = __static_tls;
|
||||
} else {
|
||||
_npassert(_weaken(_mapanon));
|
||||
siz = ROUNDUP(siz, FRAMESIZE);
|
||||
mem = _weaken(_mapanon)(siz);
|
||||
_npassert(mem);
|
||||
}
|
||||
|
||||
if (IsAsan()) {
|
||||
// there's a roundup(pagesize) gap between .tdata and .tbss
|
||||
// poison that empty space
|
||||
__asan_poison(mem + sizeof(*tib) + 2 * sizeof(void *) + I(_tdata_size),
|
||||
I(_tbss_offset) - I(_tdata_size), kAsanProtected);
|
||||
}
|
||||
|
||||
tib = (struct CosmoTib *)mem;
|
||||
tls = mem + sizeof(*tib) + 2 * sizeof(void *);
|
||||
|
||||
// Set the DTV.
|
||||
//
|
||||
// We don't support dynamic shared objects at the moment. The APE
|
||||
// linker script will only produce a single PT_TLS program header
|
||||
// therefore our job is relatively simple.
|
||||
//
|
||||
// @see musl/src/env/__init_tls.c
|
||||
// @see https://chao-tic.github.io/blog/2018/12/25/tls
|
||||
((uintptr_t *)tls)[-2] = 1;
|
||||
((void **)tls)[-1] = tls;
|
||||
|
||||
#else
|
||||
#error "unsupported architecture"
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
// initialize main thread tls memory
|
||||
tib = (struct CosmoTib *)(mem + siz - _TIBZ);
|
||||
tls = mem + siz - _TIBZ - _TLSZ;
|
||||
tib->tib_self = tib;
|
||||
tib->tib_self2 = tib;
|
||||
tib->tib_errno = __errno;
|
||||
|
@ -135,7 +206,9 @@ void __enable_tls(void) {
|
|||
atomic_store_explicit(&_pthread_main.ptid, tid, memory_order_relaxed);
|
||||
|
||||
// copy in initialized data section
|
||||
__repmovsb(tls, _tdata_start, _TLDZ);
|
||||
if (I(_tdata_size)) {
|
||||
memcpy(tls, _tdata_start, I(_tdata_size));
|
||||
}
|
||||
|
||||
// ask the operating system to change the x86 segment register
|
||||
__set_tls(tib);
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
int sys_set_tls();
|
||||
|
||||
void __set_tls(struct CosmoTib *tib) {
|
||||
tib = __adj_tls(tib);
|
||||
#ifdef __x86_64__
|
||||
// ask the operating system to change the x86 segment register
|
||||
int ax, dx;
|
||||
|
@ -58,6 +59,6 @@ void __set_tls(struct CosmoTib *tib) {
|
|||
"d"((uint32_t)(val >> 32)));
|
||||
}
|
||||
#else
|
||||
asm volatile("msr\ttpidr_el0,%0" : /* no outputs */ : "r"(tib + 1));
|
||||
asm volatile("msr\ttpidr_el0,%0" : /* no outputs */ : "r"(tib));
|
||||
#endif
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue