Fix some more issues with aarch64 and llama.cpp

This commit is contained in:
Justine Tunney 2023-05-10 07:32:15 -07:00
parent 64aca4dc4f
commit 290a49952e
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
11 changed files with 168 additions and 65 deletions

View file

@ -328,9 +328,15 @@ _Hide int __fmt(void *fn, void *arg, const char *format, va_list va) {
flags |= FLAGS_ISSIGNED;
// fallthrough
case 'u': {
uint128_t value;
flags &= ~FLAGS_HASH; // no hash for dec format
FormatNumber:
if (__fmt_ntoa(out, arg, va, signbit, log2base, prec, width, flags,
if (signbit > 63) {
value = va_arg(va, uint128_t);
} else {
value = va_arg(va, uint64_t);
}
if (__fmt_ntoa(out, arg, value, signbit, log2base, prec, width, flags,
alphabet) == -1) {
return -1;
}
@ -386,19 +392,32 @@ _Hide int __fmt(void *fn, void *arg, const char *format, va_list va) {
case 'e':
case 'E':
case 'a':
case 'A':
case 'A': {
int rc;
if (!_weaken(__fmt_dtoa)) {
p = "?";
prec = 0;
flags &= ~(FLAGS_PRECISION | FLAGS_PLUS | FLAGS_SPACE);
goto FormatString;
}
if (_weaken(__fmt_dtoa)(out, arg, d, flags, prec, sign, width,
longdouble, qchar, signbit, alphabet,
va) == -1) {
return -1;
rc = _weaken(__fmt_dtoa)(out, arg, d, flags, prec, sign, width,
longdouble, qchar, signbit, alphabet, va);
if (rc == -1) return -1;
#ifdef __aarch64__
// huge kludge
switch (rc) {
case __FMT_CONSUMED_DOUBLE:
va_arg(va, double);
break;
case __FMT_CONSUMED_LONG_DOUBLE:
va_arg(va, long double);
break;
default:
unreachable;
}
#endif /* __aarch64__ */
break;
}
case '%':
__FMT_PUT('%');
break;

View file

@ -3,6 +3,9 @@
#define PRINTF_NTOA_BUFFER_SIZE 144
#define __FMT_CONSUMED_DOUBLE 1
#define __FMT_CONSUMED_LONG_DOUBLE 2
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
@ -19,7 +22,7 @@ int __fmt_pad(int (*)(const char *, void *, size_t), void *,
int __fmt_stoa(int (*)(const char *, void *, size_t), void *, void *,
unsigned long, unsigned long, unsigned long, unsigned char,
unsigned char) _Hide;
int __fmt_ntoa(int (*)(const char *, void *, size_t), void *, va_list,
int __fmt_ntoa(int (*)(const char *, void *, size_t), void *, uint128_t,
unsigned char, unsigned long, unsigned long, unsigned long,
unsigned char, const char *) _Hide;
int __fmt_dtoa(int (*)(const char *, void *, size_t), void *, int, int, int,

View file

@ -158,12 +158,12 @@ int __fmt_ntoa2(int out(const char *, void *, size_t), void *arg,
flags, alphabet);
}
int __fmt_ntoa(int out(const char *, void *, size_t), void *arg, va_list va,
unsigned char signbit, unsigned long log2base,
int __fmt_ntoa(int out(const char *, void *, size_t), void *arg,
uint128_t value, unsigned char signbit, unsigned long log2base,
unsigned long prec, unsigned long width, unsigned char flags,
const char *lang) {
bool neg;
uint128_t value, sign;
uint128_t sign;
/* ignore '0' flag when prec or minus flag is given */
if (flags & (FLAGS_PRECISION | FLAGS_LEFT)) {
@ -175,12 +175,6 @@ int __fmt_ntoa(int out(const char *, void *, size_t), void *arg, va_list va,
flags &= ~(FLAGS_PLUS | FLAGS_SPACE);
}
if (signbit > 63) {
value = va_arg(va, uint128_t);
} else {
value = va_arg(va, uint64_t);
}
neg = 0;
sign = 1;
sign <<= signbit;

View file

@ -19,6 +19,7 @@
#include "libc/intrin/atomic.h"
#include "libc/intrin/kmalloc.h"
#include "libc/stdio/internal.h"
#include "libc/stdio/stdio.h"
#include "libc/str/str.h"
#include "libc/thread/thread.h"
@ -30,9 +31,11 @@ FILE *__stdio_alloc(void) {
while (f) {
if (atomic_compare_exchange_weak_explicit(
&__stdio_freelist, &f,
atomic_load_explicit(&f->next, memory_order_acquire),
atomic_load_explicit((_Atomic(struct FILE *) *)&f->next,
memory_order_acquire),
memory_order_release, memory_order_relaxed)) {
atomic_store_explicit(&f->next, 0, memory_order_release);
atomic_store_explicit((_Atomic(struct FILE *) *)&f->next, 0,
memory_order_release);
break;
}
}
@ -50,7 +53,8 @@ void __stdio_free(FILE *f) {
bzero(f, sizeof(*f));
g = atomic_load_explicit(&__stdio_freelist, memory_order_acquire);
for (;;) {
atomic_store_explicit(&f->next, g, memory_order_release);
atomic_store_explicit((_Atomic(struct FILE *) *)&f->next, g,
memory_order_release);
if (atomic_compare_exchange_weak_explicit(&__stdio_freelist, &g, f,
memory_order_release,
memory_order_relaxed)) {

View file

@ -237,7 +237,7 @@ int __fmt_dtoa(int (*out)(const char *, void *, size_t), void *arg, int d,
union U u;
struct FPBits fpb;
char *s, *q, *se, *s0, special[8];
int c, k, i1, ui, bw, bex, sgn, prec1, decpt;
int c, k, i1, ui, bw, rc, bex, sgn, prec1, decpt, consumed;
x = 0;
switch (d) {
case 'F':
@ -245,6 +245,7 @@ int __fmt_dtoa(int (*out)(const char *, void *, size_t), void *arg, int d,
if (!(flags & FLAGS_PRECISION)) prec = 6;
if (!longdouble) {
x = va_arg(va, double);
consumed = __FMT_CONSUMED_DOUBLE;
s = s0 = dtoa(x, 3, prec, &decpt, &fpb.sign, &se);
if (decpt == 9999) {
if (s && s[0] == 'N') {
@ -255,6 +256,7 @@ int __fmt_dtoa(int (*out)(const char *, void *, size_t), void *arg, int d,
}
} else {
u.ld = va_arg(va, long double);
consumed = __FMT_CONSUMED_LONG_DOUBLE;
xfpbits(&u, &fpb);
s = s0 =
gdtoa(fpb.fpi, fpb.ex, fpb.bits, &fpb.kind, 3, prec, &decpt, &se);
@ -274,7 +276,9 @@ int __fmt_dtoa(int (*out)(const char *, void *, size_t), void *arg, int d,
memcpy(q, kSpecialFloats[fpb.kind == STRTOG_NaN][d >= 'a'], 4);
flags &= ~(FLAGS_PRECISION | FLAGS_PLUS | FLAGS_HASH | FLAGS_SPACE);
prec = 0;
return __fmt_stoa(out, arg, s, flags, prec, width, signbit, qchar);
rc = __fmt_stoa(out, arg, s, flags, prec, width, signbit, qchar);
if (rc == -1) return -1;
return consumed;
}
FormatReal:
if (fpb.sign /* && (x || sign) */) sign = '-';
@ -338,6 +342,7 @@ int __fmt_dtoa(int (*out)(const char *, void *, size_t), void *arg, int d,
if (prec < 1) prec = 1;
if (!longdouble) {
x = va_arg(va, double);
consumed = __FMT_CONSUMED_DOUBLE;
s = s0 = dtoa(x, 2, prec, &decpt, &fpb.sign, &se);
if (decpt == 9999) {
if (s && s[0] == 'N') {
@ -348,6 +353,7 @@ int __fmt_dtoa(int (*out)(const char *, void *, size_t), void *arg, int d,
}
} else {
u.ld = va_arg(va, long double);
consumed = __FMT_CONSUMED_LONG_DOUBLE;
xfpbits(&u, &fpb);
s = s0 = gdtoa(fpb.fpi, fpb.ex, fpb.bits, &fpb.kind, prec ? 2 : 0, prec,
&decpt, &se);
@ -379,6 +385,7 @@ int __fmt_dtoa(int (*out)(const char *, void *, size_t), void *arg, int d,
if (prec < 0) prec = 0;
if (!longdouble) {
x = va_arg(va, double);
consumed = __FMT_CONSUMED_DOUBLE;
s = s0 = dtoa(x, 2, prec + 1, &decpt, &fpb.sign, &se);
if (decpt == 9999) {
if (s && s[0] == 'N') {
@ -389,6 +396,7 @@ int __fmt_dtoa(int (*out)(const char *, void *, size_t), void *arg, int d,
}
} else {
u.ld = va_arg(va, long double);
consumed = __FMT_CONSUMED_LONG_DOUBLE;
xfpbits(&u, &fpb);
s = s0 = gdtoa(fpb.fpi, fpb.ex, fpb.bits, &fpb.kind, prec ? 2 : 0, prec,
&decpt, &se);
@ -451,9 +459,11 @@ int __fmt_dtoa(int (*out)(const char *, void *, size_t), void *arg, int d,
FormatBinary:
if (longdouble) {
u.ld = va_arg(va, long double);
consumed = __FMT_CONSUMED_LONG_DOUBLE;
xfpbits(&u, &fpb);
} else {
u.d = va_arg(va, double);
consumed = __FMT_CONSUMED_DOUBLE;
dfpbits(&u, &fpb);
}
if (fpb.kind == STRTOG_Infinite || fpb.kind == STRTOG_NaN) {
@ -530,5 +540,5 @@ int __fmt_dtoa(int (*out)(const char *, void *, size_t), void *arg, int d,
default:
unreachable;
}
return 0;
return consumed;
}

View file

@ -15,21 +15,21 @@ COSMOPOLITAN_C_START_
*/
typedef struct FILE {
uint8_t bufmode; /* 0x00 _IOFBF, etc. (ignored if fd=-1) */
bool noclose; /* 0x01 for fake dup() todo delete! */
uint32_t iomode; /* 0x04 O_RDONLY, etc. (ignored if fd=-1) */
int32_t state; /* 0x08 0=OK, -1=EOF, >0=errno */
int fd; /* 0x0c ≥0=fd, -1=closed|buffer */
uint32_t beg; /* 0x10 */
uint32_t end; /* 0x14 */
char *buf; /* 0x18 */
uint32_t size; /* 0x20 */
uint32_t nofree; /* 0x24 */
int pid; /* 0x28 */
char *getln; /* 0x30 */
char lock[16]; /* 0x38 */
_Atomic(struct FILE *) next; /* 0x48 */
char mem[BUFSIZ]; /* 0x50 */
uint8_t bufmode; /* 0x00 _IOFBF, etc. (ignored if fd=-1) */
bool noclose; /* 0x01 for fake dup() todo delete! */
uint32_t iomode; /* 0x04 O_RDONLY, etc. (ignored if fd=-1) */
int32_t state; /* 0x08 0=OK, -1=EOF, >0=errno */
int fd; /* 0x0c ≥0=fd, -1=closed|buffer */
uint32_t beg; /* 0x10 */
uint32_t end; /* 0x14 */
char *buf; /* 0x18 */
uint32_t size; /* 0x20 */
uint32_t nofree; /* 0x24 */
int pid; /* 0x28 */
char *getln; /* 0x30 */
char lock[16]; /* 0x38 */
struct FILE *next; /* 0x48 */
char mem[BUFSIZ]; /* 0x50 */
} FILE;
extern FILE *stdin;

View file

@ -22,6 +22,18 @@
/**
* Sets signal mask on thread attributes object.
*
* For example, to spawn a thread that won't interfere with signals:
*
* pthread_t id;
* sigset_t mask;
* pthread_attr_t attr;
* sigfillset(&mask);
* pthread_attr_init(&attr);
* pthread_attr_setsigmask_np(&attr, &mask);
* pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
* pthread_create(&id, &attr, Worker, 0);
* pthread_attr_destroy(&attr);
*
* @param attr is the thread attributes object
* @param sigmask will be copied into attributes, or if it's null, then
* the existing signal mask presence on the object will be cleared

View file

@ -0,0 +1,51 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2023 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/struct/sigset.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/sig.h"
#include "libc/testlib/testlib.h"
#include "libc/thread/thread.h"
#include "libc/thread/thread2.h"
sigset_t actual;
sigset_t golden;
sigset_t original;
static void *MyThread(void *arg) {
ASSERT_EQ(0, sigprocmask(SIG_BLOCK, 0, &actual));
ASSERT_EQ(0, memcmp(&golden, &actual, sizeof(sigset_t)));
return 0;
}
TEST(pthread_attr_setsigmask_np, getsAppliedToThread) {
pthread_t id;
pthread_attr_t attr;
sigemptyset(&golden);
sigaddset(&golden, SIGSYS);
sigaddset(&golden, SIGUSR1);
ASSERT_EQ(0, sigprocmask(SIG_BLOCK, 0, &original));
ASSERT_NE(0, memcmp(&golden, &original, sizeof(sigset_t)));
ASSERT_EQ(0, pthread_attr_init(&attr));
ASSERT_EQ(0, pthread_attr_setsigmask_np(&attr, &golden));
ASSERT_EQ(0, pthread_create(&id, &attr, MyThread, 0));
ASSERT_EQ(0, pthread_attr_destroy(&attr));
ASSERT_EQ(0, pthread_join(id, 0));
ASSERT_EQ(0, sigprocmask(SIG_BLOCK, 0, &actual));
ASSERT_EQ(0, memcmp(&actual, &original, sizeof(sigset_t)));
}

View file

@ -25,7 +25,7 @@ struct gpt_params {
int32_t n_predict = 128; // new tokens to predict
int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions)
int32_t n_ctx = 512; // context size
int32_t n_batch = 32; // batch size for prompt processing (must be >=32 to use BLAS)
int32_t n_batch = 64; // batch size for prompt processing (must be >=32 to use BLAS)
int32_t n_keep = 0; // number of tokens to keep from initial prompt
// sampling parameters

View file

@ -42,6 +42,7 @@
#include "third_party/ggml/common.h"
#include "third_party/ggml/llama.h"
#include "third_party/ggml/llama_util.h"
#include "third_party/libcxx/atomic"
#include "third_party/libcxx/iostream"
#include "third_party/libcxx/string"
#include "third_party/libcxx/vector"
@ -52,26 +53,20 @@ Copyright (c) 2023 Georgi Gerganov\"");
asm(".include \"libc/disclaimer.inc\"");
// clang-format off
static console_state con_st;
static llama_context ** g_ctx;
static int g_verbose;
static bool is_interacting = false;
static std::atomic<bool> is_interacting;
static std::atomic<bool> is_terminated;
#define EPHEMERAL(fmt) "\r\e[K\033[1;35m" fmt " \033[0m"
void sigint_handler(int signo) {
if (signo == SIGINT) {
if (!is_interacting) {
is_interacting=true;
} else {
console_cleanup(con_st);
printf("\n");
if (g_verbose) {
llama_print_timings(*g_ctx);
}
_exit(128 + signo);
}
static void sigint_handler_batch(int signo) {
is_terminated = true;
}
static void sigint_handler_interactive(int signo) {
if (!is_interacting) {
is_interacting = true;
} else {
is_terminated = true;
}
}
@ -116,9 +111,9 @@ int main(int argc, char ** argv) {
// save choice to use color for later
// (note for later: this is a slightly awkward choice)
static console_state con_st;
con_st.use_color = params.use_color;
g_verbose = params.verbose;
con_st.multiline_input = params.multiline_input;
console_init(con_st);
atexit([]() { console_cleanup(con_st); });
@ -162,7 +157,6 @@ int main(int argc, char ** argv) {
llama_context * ctx;
struct stat model_stat;
g_ctx = &ctx;
// load the model and apply lora adapter, if any
ctx = llama_init_from_gpt_params(params);
@ -280,13 +274,18 @@ int main(int argc, char ** argv) {
fprintf(stderr, "\n");
}
// setup ctrl-c handler
struct sigaction sa;
sa.sa_flags = 0;
sigemptyset(&sa.sa_mask);
if (params.interactive) {
struct sigaction sigint_action;
sigint_action.sa_handler = sigint_handler;
sigemptyset (&sigint_action.sa_mask);
sigint_action.sa_flags = 0;
sigaction(SIGINT, &sigint_action, NULL);
sa.sa_handler = sigint_handler_interactive;
} else {
sa.sa_handler = sigint_handler_batch;
}
sigaction(SIGINT, &sa, NULL);
if (params.interactive) {
if (params.verbose) {
fprintf(stderr, "%s: interactive mode on.\n", __func__);
}
@ -483,7 +482,7 @@ int main(int argc, char ** argv) {
fprintf(stderr, EPHEMERAL("loading weights..."));
}
while (n_remain != 0 || params.interactive) {
while ((n_remain != 0 || params.interactive) && !is_terminated) {
// perform evaluation
if (embd.size() > 0) {
@ -872,6 +871,17 @@ int main(int argc, char ** argv) {
}
}
if (is_terminated) {
if (params.interactive) {
console_cleanup(con_st);
printf("\n");
}
if (params.verbose) {
llama_print_timings(ctx);
}
_exit(128 + SIGINT);
}
if (params.verbose) {
llama_print_timings(ctx);
}

View file

@ -731,7 +731,7 @@ void __cxx_atomic_store(__cxx_atomic_base_impl<_Tp>* __a, _Tp __val,
}
template <typename _Tp>
_LIBCPP_INLINE_VISIBILITY
_LIBCPP_INLINE_VISIBILITY inline
_Tp __cxx_atomic_load(const volatile __cxx_atomic_base_impl<_Tp>* __a,
memory_order __order) {
_Tp __ret;
@ -741,7 +741,7 @@ _Tp __cxx_atomic_load(const volatile __cxx_atomic_base_impl<_Tp>* __a,
}
template <typename _Tp>
_LIBCPP_INLINE_VISIBILITY
_LIBCPP_INLINE_VISIBILITY inline
_Tp __cxx_atomic_load(const __cxx_atomic_base_impl<_Tp>* __a, memory_order __order) {
_Tp __ret;
__atomic_load(&__a->__a_value, &__ret,