Make more ML improvements

- Fix UX issues with llama.com
- Do housekeeping on libm code
- Add more vectorization to GGML
- Get GGJT quantizer programs working well
- Have the quantizer keep the output layer as f16c
- Prefetching improves performance 15% if you use fewer threads
This commit is contained in:
Justine Tunney 2023-05-16 08:07:23 -07:00
parent 80db9de173
commit e7eb0b3070
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
46 changed files with 340 additions and 289 deletions

View file

@ -27,6 +27,18 @@
#include "libc/testlib/testlib.h"
#include "third_party/vqsort/vqsort.h"
void InsertionSort(int *A, int n) {
for (int i = 1; i < n; i++) {
int key = A[i];
int j = i - 1;
while (j >= 0 && A[j] > key) {
A[j + 1] = A[j];
j--;
}
A[j + 1] = key;
}
}
int CompareLong(const void *a, const void *b) {
const long *x = a;
const long *y = b;
@ -145,14 +157,14 @@ int CompareInt(const void *a, const void *b) {
return 0;
}
TEST(_intsort, test) {
TEST(InsertionSort, test) {
size_t n = 5000;
int *a = gc(calloc(n, sizeof(int)));
int *b = gc(calloc(n, sizeof(int)));
rngset(a, n * sizeof(int), 0, 0);
memcpy(b, a, n * sizeof(int));
qsort(a, n, sizeof(int), CompareInt);
_intsort(b, n);
InsertionSort(b, n);
ASSERT_EQ(0, memcmp(b, a, n * sizeof(int)));
}
@ -218,13 +230,14 @@ TEST(radix_sort_int32, test) {
ASSERT_EQ(0, memcmp(b, a, n * sizeof(int)));
}
BENCH(_intsort, bench) {
BENCH(InsertionSort, bench) {
printf("\n");
size_t n = 10000;
int *p1 = gc(malloc(n * sizeof(int)));
int *p2 = gc(malloc(n * sizeof(int)));
rngset(p1, n * sizeof(int), 0, 0);
EZBENCH2("_intsort", memcpy(p2, p1, n * sizeof(int)), _intsort(p2, n));
EZBENCH2("InsertionSort", memcpy(p2, p1, n * sizeof(int)),
InsertionSort(p2, n));
#ifdef __x86_64__
if (X86_HAVE(AVX2)) {
EZBENCH2("vqsort_int32_avx2", memcpy(p2, p1, n * sizeof(int)),

View file

@ -24,7 +24,6 @@
#include "libc/runtime/runtime.h"
#include "libc/testlib/ezbench.h"
#include "libc/testlib/testlib.h"
#include "libc/tinymath/tinymath.h"
#include "libc/x/x.h"
float remainderf2(float, float);