mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-03-03 07:29:23 +00:00
Get POSIX threads working on Apple Silicon
It's now possible to run a working ape-m1 o/aarch64/third_party/ggml/llama.com on Apple M1 hardware running XNU!
This commit is contained in:
parent
8fdb31681a
commit
b5eab2b0b7
3 changed files with 78 additions and 16 deletions
|
@ -89,7 +89,13 @@ sched_yield:
|
||||||
ret
|
ret
|
||||||
|
|
||||||
#elif defined(__aarch64__)
|
#elif defined(__aarch64__)
|
||||||
mov x8,#0x7c
|
|
||||||
|
mov x0,#0
|
||||||
|
mov x1,#0
|
||||||
|
mov x2,#0
|
||||||
|
mov x3,#0
|
||||||
|
mov x8,#0x7c // sched_yield() for linux
|
||||||
|
mov x16,#0x85d // select(0,0,0,0) for xnu
|
||||||
svc 0
|
svc 0
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
|
|
@ -18,12 +18,14 @@
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/sysv/consts/clone.h"
|
#include "libc/sysv/consts/clone.h"
|
||||||
#include "libc/assert.h"
|
#include "libc/assert.h"
|
||||||
|
#include "libc/atomic.h"
|
||||||
#include "libc/calls/calls.h"
|
#include "libc/calls/calls.h"
|
||||||
#include "libc/calls/struct/ucontext-netbsd.internal.h"
|
#include "libc/calls/struct/ucontext-netbsd.internal.h"
|
||||||
#include "libc/calls/syscall-sysv.internal.h"
|
#include "libc/calls/syscall-sysv.internal.h"
|
||||||
#include "libc/dce.h"
|
#include "libc/dce.h"
|
||||||
#include "libc/errno.h"
|
#include "libc/errno.h"
|
||||||
#include "libc/intrin/asan.internal.h"
|
#include "libc/intrin/asan.internal.h"
|
||||||
|
#include "libc/intrin/atomic.h"
|
||||||
#include "libc/intrin/describeflags.internal.h"
|
#include "libc/intrin/describeflags.internal.h"
|
||||||
#include "libc/intrin/kprintf.h"
|
#include "libc/intrin/kprintf.h"
|
||||||
#include "libc/intrin/strace.internal.h"
|
#include "libc/intrin/strace.internal.h"
|
||||||
|
@ -36,6 +38,7 @@
|
||||||
#include "libc/runtime/clone.internal.h"
|
#include "libc/runtime/clone.internal.h"
|
||||||
#include "libc/runtime/internal.h"
|
#include "libc/runtime/internal.h"
|
||||||
#include "libc/runtime/runtime.h"
|
#include "libc/runtime/runtime.h"
|
||||||
|
#include "libc/runtime/syslib.internal.h"
|
||||||
#include "libc/sock/internal.h"
|
#include "libc/sock/internal.h"
|
||||||
#include "libc/stdalign.internal.h"
|
#include "libc/stdalign.internal.h"
|
||||||
#include "libc/str/str.h"
|
#include "libc/str/str.h"
|
||||||
|
@ -50,8 +53,6 @@
|
||||||
#include "libc/thread/tls2.h"
|
#include "libc/thread/tls2.h"
|
||||||
#include "libc/thread/xnu.internal.h"
|
#include "libc/thread/xnu.internal.h"
|
||||||
|
|
||||||
#ifdef __x86_64__
|
|
||||||
|
|
||||||
#define __NR_thr_new 455
|
#define __NR_thr_new 455
|
||||||
#define __NR_clone_linux 56
|
#define __NR_clone_linux 56
|
||||||
#define __NR__lwp_create 309
|
#define __NR__lwp_create 309
|
||||||
|
@ -62,10 +63,6 @@
|
||||||
#define LWP_DETACHED 0x00000040
|
#define LWP_DETACHED 0x00000040
|
||||||
#define LWP_SUSPENDED 0x00000080
|
#define LWP_SUSPENDED 0x00000080
|
||||||
|
|
||||||
__msabi extern typeof(TlsSetValue) *const __imp_TlsSetValue;
|
|
||||||
__msabi extern typeof(ExitThread) *const __imp_ExitThread;
|
|
||||||
__msabi extern typeof(WakeByAddressAll) *const __imp_WakeByAddressAll;
|
|
||||||
|
|
||||||
struct CloneArgs {
|
struct CloneArgs {
|
||||||
union {
|
union {
|
||||||
int tid;
|
int tid;
|
||||||
|
@ -80,9 +77,15 @@ struct CloneArgs {
|
||||||
void *arg;
|
void *arg;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef __x86_64__
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// THE NEW TECHNOLOGY
|
// THE NEW TECHNOLOGY
|
||||||
|
|
||||||
|
__msabi extern typeof(TlsSetValue) *const __imp_TlsSetValue;
|
||||||
|
__msabi extern typeof(ExitThread) *const __imp_ExitThread;
|
||||||
|
__msabi extern typeof(WakeByAddressAll) *const __imp_WakeByAddressAll;
|
||||||
|
|
||||||
int WinThreadLaunch(void *arg, // rdi
|
int WinThreadLaunch(void *arg, // rdi
|
||||||
int tid, // rsi
|
int tid, // rsi
|
||||||
int (*func)(void *, int), // rdx
|
int (*func)(void *, int), // rdx
|
||||||
|
@ -143,12 +146,12 @@ static textwindows errno_t CloneWindows(int (*func)(void *, int), char *stk,
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// XNU'S NOT UNIX
|
// XNU'S NOT UNIX
|
||||||
|
|
||||||
void XnuThreadThunk(void *pthread, // rdi
|
void XnuThreadThunk(void *pthread, // rdi x0
|
||||||
int machport, // rsi
|
int machport, // rsi x1
|
||||||
void *(*func)(void *), // rdx
|
void *(*func)(void *), // rdx x2
|
||||||
void *arg, // rcx
|
void *arg, // rcx x3
|
||||||
intptr_t *stack, // r8
|
intptr_t *stack, // r8 x4
|
||||||
unsigned xnuflags); // r9
|
unsigned xnuflags); // r9 x5
|
||||||
asm("XnuThreadThunk:\n\t"
|
asm("XnuThreadThunk:\n\t"
|
||||||
"xor\t%ebp,%ebp\n\t"
|
"xor\t%ebp,%ebp\n\t"
|
||||||
"mov\t%r8,%rsp\n\t"
|
"mov\t%r8,%rsp\n\t"
|
||||||
|
@ -189,8 +192,7 @@ XnuThreadMain(void *pthread, // rdi
|
||||||
// %r10 = uint32_t sem);
|
// %r10 = uint32_t sem);
|
||||||
asm volatile("movl\t$0,%0\n\t" // *wt->ztid = 0
|
asm volatile("movl\t$0,%0\n\t" // *wt->ztid = 0
|
||||||
"xor\t%%r10d,%%r10d\n\t" // sem = 0
|
"xor\t%%r10d,%%r10d\n\t" // sem = 0
|
||||||
"syscall\n\t" // __bsdthread_terminate()
|
"syscall" // __bsdthread_terminate()
|
||||||
"ud2"
|
|
||||||
: "=m"(*wt->ztid)
|
: "=m"(*wt->ztid)
|
||||||
: "a"(0x2000000 | 361), "D"(0), "S"(0), "d"(0L)
|
: "a"(0x2000000 | 361), "D"(0), "S"(0), "d"(0L)
|
||||||
: "rcx", "r10", "r11", "memory");
|
: "rcx", "r10", "r11", "memory");
|
||||||
|
@ -430,6 +432,52 @@ static int CloneNetbsd(int (*func)(void *, int), char *stk, size_t stksz,
|
||||||
|
|
||||||
#endif /* __x86_64__ */
|
#endif /* __x86_64__ */
|
||||||
|
|
||||||
|
#ifdef __aarch64__
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// APPLE SILICON
|
||||||
|
|
||||||
|
static void *SiliconThreadMain(void *arg) {
|
||||||
|
register struct CloneArgs *wt asm("x21") = arg;
|
||||||
|
asm volatile("ldr\tx28,%0" : /* no outputs */ : "m"(wt->tls));
|
||||||
|
int tid = sys_gettid();
|
||||||
|
*wt->ctid = tid;
|
||||||
|
*wt->ptid = tid;
|
||||||
|
register long x0 asm("x0") = (long)wt->arg;
|
||||||
|
register long x1 asm("x1") = (long)tid;
|
||||||
|
asm volatile("mov\tx19,x29\n\t" // save frame pointer
|
||||||
|
"mov\tx20,sp\n\t" // save stack pointer
|
||||||
|
"mov\tx29,#0\n\t" // reset backtrace
|
||||||
|
"mov\tsp,x21\n\t" // switch stack
|
||||||
|
"blr\t%2\n\t" // wt->func(wt->arg, tid)
|
||||||
|
"mov\tx29,x19\n\t" // restore frame pointer
|
||||||
|
"mov\tsp,x20" // restore stack pointer
|
||||||
|
: "+r"(x0)
|
||||||
|
: "r"(x1), "r"(wt->func)
|
||||||
|
: "x19", "x20", "memory");
|
||||||
|
*wt->ztid = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static errno_t CloneSilicon(int (*fn)(void *, int), char *stk, size_t stksz,
|
||||||
|
int flags, void *arg, void *tls, int *ptid,
|
||||||
|
int *ctid) {
|
||||||
|
pthread_t th;
|
||||||
|
struct CloneArgs *wt;
|
||||||
|
wt = (struct CloneArgs *)(((intptr_t)(stk + stksz) -
|
||||||
|
sizeof(struct CloneArgs)) &
|
||||||
|
-MAX(16, alignof(struct CloneArgs)));
|
||||||
|
wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid;
|
||||||
|
wt->ptid = flags & CLONE_PARENT_SETTID ? ptid : &wt->tid;
|
||||||
|
wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid;
|
||||||
|
wt->tls = flags & CLONE_SETTLS ? tls : 0;
|
||||||
|
wt->func = fn;
|
||||||
|
wt->arg = arg;
|
||||||
|
return __syslib->pthread_create(&th, 0, SiliconThreadMain, wt);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* __aarch64__ */
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// GNU/SYSTEMD
|
// GNU/SYSTEMD
|
||||||
|
|
||||||
|
@ -605,9 +653,15 @@ errno_t clone(void *func, void *stk, size_t stksz, int flags, void *arg,
|
||||||
CLONE_SIGHAND | CLONE_SYSVSEM)) {
|
CLONE_SIGHAND | CLONE_SYSVSEM)) {
|
||||||
STRACE("cosmo clone() is picky about flags, see clone.c");
|
STRACE("cosmo clone() is picky about flags, see clone.c");
|
||||||
rc = EINVAL;
|
rc = EINVAL;
|
||||||
#ifdef __x86_64__
|
|
||||||
} else if (IsXnu()) {
|
} else if (IsXnu()) {
|
||||||
|
#ifdef __x86_64__
|
||||||
rc = CloneXnu(func, stk, stksz, flags, arg, tls, ptid, ctid);
|
rc = CloneXnu(func, stk, stksz, flags, arg, tls, ptid, ctid);
|
||||||
|
#elif defined(__aarch64__)
|
||||||
|
rc = CloneSilicon(func, stk, stksz, flags, arg, tls, ptid, ctid);
|
||||||
|
#else
|
||||||
|
#error "unsupported architecture"
|
||||||
|
#endif
|
||||||
|
#ifdef __x86_64__
|
||||||
} else if (IsFreebsd()) {
|
} else if (IsFreebsd()) {
|
||||||
rc = CloneFreebsd(func, stk, stksz, flags, arg, tls, ptid, ctid);
|
rc = CloneFreebsd(func, stk, stksz, flags, arg, tls, ptid, ctid);
|
||||||
} else if (IsNetbsd()) {
|
} else if (IsNetbsd()) {
|
||||||
|
|
2
third_party/ggml/llama.cc
vendored
2
third_party/ggml/llama.cc
vendored
|
@ -1128,8 +1128,10 @@ static void llama_model_load_internal(
|
||||||
const size_t mem_required_state =
|
const size_t mem_required_state =
|
||||||
scale*MEM_REQ_KV_SELF().at(model.type);
|
scale*MEM_REQ_KV_SELF().at(model.type);
|
||||||
|
|
||||||
|
if (verbose > 0) {
|
||||||
fprintf(stderr, "%s: mem required = %7.2f MB (+ %7.2f MB per state)\n", __func__,
|
fprintf(stderr, "%s: mem required = %7.2f MB (+ %7.2f MB per state)\n", __func__,
|
||||||
mem_required / 1024.0 / 1024.0, mem_required_state / 1024.0 / 1024.0);
|
mem_required / 1024.0 / 1024.0, mem_required_state / 1024.0 / 1024.0);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef GGML_USE_CUBLAS
|
#ifdef GGML_USE_CUBLAS
|
||||||
const int n_gpu = std::min(n_gpu_layers, int(hparams.n_layer));
|
const int n_gpu = std::min(n_gpu_layers, int(hparams.n_layer));
|
||||||
|
|
Loading…
Add table
Reference in a new issue