Fix some more issues with aarch64 and llama.cpp

This commit is contained in:
Justine Tunney 2023-05-10 07:32:15 -07:00
parent 64aca4dc4f
commit 290a49952e
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
11 changed files with 168 additions and 65 deletions

View file

@ -328,9 +328,15 @@ _Hide int __fmt(void *fn, void *arg, const char *format, va_list va) {
flags |= FLAGS_ISSIGNED;
// fallthrough
case 'u': {
uint128_t value;
flags &= ~FLAGS_HASH; // no hash for dec format
FormatNumber:
if (__fmt_ntoa(out, arg, va, signbit, log2base, prec, width, flags,
if (signbit > 63) {
value = va_arg(va, uint128_t);
} else {
value = va_arg(va, uint64_t);
}
if (__fmt_ntoa(out, arg, value, signbit, log2base, prec, width, flags,
alphabet) == -1) {
return -1;
}
@ -386,19 +392,32 @@ _Hide int __fmt(void *fn, void *arg, const char *format, va_list va) {
case 'e':
case 'E':
case 'a':
case 'A':
case 'A': {
int rc;
if (!_weaken(__fmt_dtoa)) {
p = "?";
prec = 0;
flags &= ~(FLAGS_PRECISION | FLAGS_PLUS | FLAGS_SPACE);
goto FormatString;
}
if (_weaken(__fmt_dtoa)(out, arg, d, flags, prec, sign, width,
longdouble, qchar, signbit, alphabet,
va) == -1) {
return -1;
rc = _weaken(__fmt_dtoa)(out, arg, d, flags, prec, sign, width,
longdouble, qchar, signbit, alphabet, va);
if (rc == -1) return -1;
#ifdef __aarch64__
// huge kludge
switch (rc) {
case __FMT_CONSUMED_DOUBLE:
va_arg(va, double);
break;
case __FMT_CONSUMED_LONG_DOUBLE:
va_arg(va, long double);
break;
default:
unreachable;
}
#endif /* __aarch64__ */
break;
}
case '%':
__FMT_PUT('%');
break;

View file

@ -3,6 +3,9 @@
#define PRINTF_NTOA_BUFFER_SIZE 144
#define __FMT_CONSUMED_DOUBLE 1
#define __FMT_CONSUMED_LONG_DOUBLE 2
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
@ -19,7 +22,7 @@ int __fmt_pad(int (*)(const char *, void *, size_t), void *,
int __fmt_stoa(int (*)(const char *, void *, size_t), void *, void *,
unsigned long, unsigned long, unsigned long, unsigned char,
unsigned char) _Hide;
int __fmt_ntoa(int (*)(const char *, void *, size_t), void *, va_list,
int __fmt_ntoa(int (*)(const char *, void *, size_t), void *, uint128_t,
unsigned char, unsigned long, unsigned long, unsigned long,
unsigned char, const char *) _Hide;
int __fmt_dtoa(int (*)(const char *, void *, size_t), void *, int, int, int,

View file

@ -158,12 +158,12 @@ int __fmt_ntoa2(int out(const char *, void *, size_t), void *arg,
flags, alphabet);
}
int __fmt_ntoa(int out(const char *, void *, size_t), void *arg, va_list va,
unsigned char signbit, unsigned long log2base,
int __fmt_ntoa(int out(const char *, void *, size_t), void *arg,
uint128_t value, unsigned char signbit, unsigned long log2base,
unsigned long prec, unsigned long width, unsigned char flags,
const char *lang) {
bool neg;
uint128_t value, sign;
uint128_t sign;
/* ignore '0' flag when prec or minus flag is given */
if (flags & (FLAGS_PRECISION | FLAGS_LEFT)) {
@ -175,12 +175,6 @@ int __fmt_ntoa(int out(const char *, void *, size_t), void *arg, va_list va,
flags &= ~(FLAGS_PLUS | FLAGS_SPACE);
}
if (signbit > 63) {
value = va_arg(va, uint128_t);
} else {
value = va_arg(va, uint64_t);
}
neg = 0;
sign = 1;
sign <<= signbit;

View file

@ -19,6 +19,7 @@
#include "libc/intrin/atomic.h"
#include "libc/intrin/kmalloc.h"
#include "libc/stdio/internal.h"
#include "libc/stdio/stdio.h"
#include "libc/str/str.h"
#include "libc/thread/thread.h"
@ -30,9 +31,11 @@ FILE *__stdio_alloc(void) {
while (f) {
if (atomic_compare_exchange_weak_explicit(
&__stdio_freelist, &f,
atomic_load_explicit(&f->next, memory_order_acquire),
atomic_load_explicit((_Atomic(struct FILE *) *)&f->next,
memory_order_acquire),
memory_order_release, memory_order_relaxed)) {
atomic_store_explicit(&f->next, 0, memory_order_release);
atomic_store_explicit((_Atomic(struct FILE *) *)&f->next, 0,
memory_order_release);
break;
}
}
@ -50,7 +53,8 @@ void __stdio_free(FILE *f) {
bzero(f, sizeof(*f));
g = atomic_load_explicit(&__stdio_freelist, memory_order_acquire);
for (;;) {
atomic_store_explicit(&f->next, g, memory_order_release);
atomic_store_explicit((_Atomic(struct FILE *) *)&f->next, g,
memory_order_release);
if (atomic_compare_exchange_weak_explicit(&__stdio_freelist, &g, f,
memory_order_release,
memory_order_relaxed)) {

View file

@ -237,7 +237,7 @@ int __fmt_dtoa(int (*out)(const char *, void *, size_t), void *arg, int d,
union U u;
struct FPBits fpb;
char *s, *q, *se, *s0, special[8];
int c, k, i1, ui, bw, bex, sgn, prec1, decpt;
int c, k, i1, ui, bw, rc, bex, sgn, prec1, decpt, consumed;
x = 0;
switch (d) {
case 'F':
@ -245,6 +245,7 @@ int __fmt_dtoa(int (*out)(const char *, void *, size_t), void *arg, int d,
if (!(flags & FLAGS_PRECISION)) prec = 6;
if (!longdouble) {
x = va_arg(va, double);
consumed = __FMT_CONSUMED_DOUBLE;
s = s0 = dtoa(x, 3, prec, &decpt, &fpb.sign, &se);
if (decpt == 9999) {
if (s && s[0] == 'N') {
@ -255,6 +256,7 @@ int __fmt_dtoa(int (*out)(const char *, void *, size_t), void *arg, int d,
}
} else {
u.ld = va_arg(va, long double);
consumed = __FMT_CONSUMED_LONG_DOUBLE;
xfpbits(&u, &fpb);
s = s0 =
gdtoa(fpb.fpi, fpb.ex, fpb.bits, &fpb.kind, 3, prec, &decpt, &se);
@ -274,7 +276,9 @@ int __fmt_dtoa(int (*out)(const char *, void *, size_t), void *arg, int d,
memcpy(q, kSpecialFloats[fpb.kind == STRTOG_NaN][d >= 'a'], 4);
flags &= ~(FLAGS_PRECISION | FLAGS_PLUS | FLAGS_HASH | FLAGS_SPACE);
prec = 0;
return __fmt_stoa(out, arg, s, flags, prec, width, signbit, qchar);
rc = __fmt_stoa(out, arg, s, flags, prec, width, signbit, qchar);
if (rc == -1) return -1;
return consumed;
}
FormatReal:
if (fpb.sign /* && (x || sign) */) sign = '-';
@ -338,6 +342,7 @@ int __fmt_dtoa(int (*out)(const char *, void *, size_t), void *arg, int d,
if (prec < 1) prec = 1;
if (!longdouble) {
x = va_arg(va, double);
consumed = __FMT_CONSUMED_DOUBLE;
s = s0 = dtoa(x, 2, prec, &decpt, &fpb.sign, &se);
if (decpt == 9999) {
if (s && s[0] == 'N') {
@ -348,6 +353,7 @@ int __fmt_dtoa(int (*out)(const char *, void *, size_t), void *arg, int d,
}
} else {
u.ld = va_arg(va, long double);
consumed = __FMT_CONSUMED_LONG_DOUBLE;
xfpbits(&u, &fpb);
s = s0 = gdtoa(fpb.fpi, fpb.ex, fpb.bits, &fpb.kind, prec ? 2 : 0, prec,
&decpt, &se);
@ -379,6 +385,7 @@ int __fmt_dtoa(int (*out)(const char *, void *, size_t), void *arg, int d,
if (prec < 0) prec = 0;
if (!longdouble) {
x = va_arg(va, double);
consumed = __FMT_CONSUMED_DOUBLE;
s = s0 = dtoa(x, 2, prec + 1, &decpt, &fpb.sign, &se);
if (decpt == 9999) {
if (s && s[0] == 'N') {
@ -389,6 +396,7 @@ int __fmt_dtoa(int (*out)(const char *, void *, size_t), void *arg, int d,
}
} else {
u.ld = va_arg(va, long double);
consumed = __FMT_CONSUMED_LONG_DOUBLE;
xfpbits(&u, &fpb);
s = s0 = gdtoa(fpb.fpi, fpb.ex, fpb.bits, &fpb.kind, prec ? 2 : 0, prec,
&decpt, &se);
@ -451,9 +459,11 @@ int __fmt_dtoa(int (*out)(const char *, void *, size_t), void *arg, int d,
FormatBinary:
if (longdouble) {
u.ld = va_arg(va, long double);
consumed = __FMT_CONSUMED_LONG_DOUBLE;
xfpbits(&u, &fpb);
} else {
u.d = va_arg(va, double);
consumed = __FMT_CONSUMED_DOUBLE;
dfpbits(&u, &fpb);
}
if (fpb.kind == STRTOG_Infinite || fpb.kind == STRTOG_NaN) {
@ -530,5 +540,5 @@ int __fmt_dtoa(int (*out)(const char *, void *, size_t), void *arg, int d,
default:
unreachable;
}
return 0;
return consumed;
}

View file

@ -15,21 +15,21 @@ COSMOPOLITAN_C_START_
*/
typedef struct FILE {
uint8_t bufmode; /* 0x00 _IOFBF, etc. (ignored if fd=-1) */
bool noclose; /* 0x01 for fake dup() todo delete! */
uint32_t iomode; /* 0x04 O_RDONLY, etc. (ignored if fd=-1) */
int32_t state; /* 0x08 0=OK, -1=EOF, >0=errno */
int fd; /* 0x0c ≥0=fd, -1=closed|buffer */
uint32_t beg; /* 0x10 */
uint32_t end; /* 0x14 */
char *buf; /* 0x18 */
uint32_t size; /* 0x20 */
uint32_t nofree; /* 0x24 */
int pid; /* 0x28 */
char *getln; /* 0x30 */
char lock[16]; /* 0x38 */
_Atomic(struct FILE *) next; /* 0x48 */
char mem[BUFSIZ]; /* 0x50 */
uint8_t bufmode; /* 0x00 _IOFBF, etc. (ignored if fd=-1) */
bool noclose; /* 0x01 for fake dup() todo delete! */
uint32_t iomode; /* 0x04 O_RDONLY, etc. (ignored if fd=-1) */
int32_t state; /* 0x08 0=OK, -1=EOF, >0=errno */
int fd; /* 0x0c ≥0=fd, -1=closed|buffer */
uint32_t beg; /* 0x10 */
uint32_t end; /* 0x14 */
char *buf; /* 0x18 */
uint32_t size; /* 0x20 */
uint32_t nofree; /* 0x24 */
int pid; /* 0x28 */
char *getln; /* 0x30 */
char lock[16]; /* 0x38 */
struct FILE *next; /* 0x48 */
char mem[BUFSIZ]; /* 0x50 */
} FILE;
extern FILE *stdin;

View file

@ -22,6 +22,18 @@
/**
* Sets signal mask on thread attributes object.
*
* For example, to spawn a thread that won't interfere with signals:
*
* pthread_t id;
* sigset_t mask;
* pthread_attr_t attr;
* sigfillset(&mask);
* pthread_attr_init(&attr);
* pthread_attr_setsigmask_np(&attr, &mask);
* pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
* pthread_create(&id, &attr, Worker, 0);
* pthread_attr_destroy(&attr);
*
* @param attr is the thread attributes object
* @param sigmask will be copied into attributes, or if it's null, then
* the existing signal mask presence on the object will be cleared