Make more ML improvements

- Fix UX issues with llama.com
- Do housekeeping on libm code
- Add more vectorization to GGML
- Get GGJT quantizer programs working well
- Have the quantizer keep the output layer as f16c
- Prefetching improves performance 15% if you use fewer threads
This commit is contained in:
Justine Tunney 2023-05-16 08:07:23 -07:00
parent 80db9de173
commit e7eb0b3070
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
46 changed files with 340 additions and 289 deletions

View file

@ -88,7 +88,6 @@ typedef double double_t;
#define isnan(x) __builtin_isnan(x)
#define isfinite(x) __builtin_isfinite(x)
#define isnormal(x) __builtin_isnormal(x)
#define signbit(x) __builtin_signbit(x)
#define isgreater(x, y) __builtin_isgreater(x, y)
#define isgreaterequal(x, y) __builtin_isgreaterequal(x, y)
#define isless(x, y) __builtin_isless(x, y)
@ -99,6 +98,11 @@ typedef double double_t;
#define fpclassify(x) \
__builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, x)
#define signbit(x) \
(sizeof(x) == sizeof(double) ? __builtin_signbit(x) \
: sizeof(x) == sizeof(float) ? __builtin_signbitf(x) \
: __builtin_signbitl(x))
extern int signgam;
double acos(double);
@ -305,7 +309,7 @@ void sincos(double, double *, double *);
void sincosf(float, float *, float *);
void sincosl(long double, long double *, long double *);
float fsumf(const float *, size_t);
double fsumf(const float *, size_t);
double fsum(const double *, size_t);
double j0(double);