cosmopolitan/test/libc/str/longsort_test.c
Justine Tunney e7eb0b3070
Make more ML improvements
- Fix UX issues with llama.com
- Do housekeeping on libm code
- Add more vectorization to GGML
- Get GGJT quantizer programs working well
- Have the quantizer keep the output layer as f16c
- Prefetching improves performance 15% if you use fewer threads
2023-05-16 08:07:23 -07:00

262 lines
8.7 KiB
C

/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
╞══════════════════════════════════════════════════════════════════════════════╡
│ Copyright 2021 Justine Alexandra Roberts Tunney │
│ │
│ Permission to use, copy, modify, and/or distribute this software for │
│ any purpose with or without fee is hereby granted, provided that the │
│ above copyright notice and this permission notice appear in all copies. │
│ │
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
│ PERFORMANCE OF THIS SOFTWARE. │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "libc/mem/alg.h"
#include "libc/mem/gc.internal.h"
#include "libc/mem/mem.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/rand.h"
#include "libc/stdio/stdio.h"
#include "libc/str/str.h"
#include "libc/testlib/ezbench.h"
#include "libc/testlib/testlib.h"
#include "third_party/vqsort/vqsort.h"
void InsertionSort(int *A, int n) {
for (int i = 1; i < n; i++) {
int key = A[i];
int j = i - 1;
while (j >= 0 && A[j] > key) {
A[j + 1] = A[j];
j--;
}
A[j + 1] = key;
}
}
int CompareLong(const void *a, const void *b) {
const long *x = a;
const long *y = b;
if (*x < *y) return -1;
if (*x > *y) return +1;
return 0;
}
TEST(_longsort, test) {
size_t n = 5000;
long *a = gc(calloc(n, sizeof(long)));
long *b = gc(calloc(n, sizeof(long)));
rngset(a, n * sizeof(long), 0, 0);
memcpy(b, a, n * sizeof(long));
qsort(a, n, sizeof(long), CompareLong);
_longsort(b, n);
ASSERT_EQ(0, memcmp(b, a, n * sizeof(long)));
}
#ifdef __x86_64__
TEST(vqsort_int64_avx2, test) {
if (!X86_HAVE(AVX2)) return;
size_t n = 5000;
long *a = gc(calloc(n, sizeof(long)));
long *b = gc(calloc(n, sizeof(long)));
rngset(a, n * sizeof(long), 0, 0);
memcpy(b, a, n * sizeof(long));
qsort(a, n, sizeof(long), CompareLong);
vqsort_int64_avx2(b, n);
ASSERT_EQ(0, memcmp(b, a, n * sizeof(long)));
}
TEST(vqsort_int64_sse4, test) {
if (!X86_HAVE(SSE4_2)) return;
size_t n = 5000;
long *a = gc(calloc(n, sizeof(long)));
long *b = gc(calloc(n, sizeof(long)));
rngset(a, n * sizeof(long), 0, 0);
memcpy(b, a, n * sizeof(long));
qsort(a, n, sizeof(long), CompareLong);
vqsort_int64_sse4(b, n);
ASSERT_EQ(0, memcmp(b, a, n * sizeof(long)));
}
TEST(vqsort_int64_ssse3, test) {
if (!X86_HAVE(SSSE3)) return;
size_t n = 5000;
long *a = gc(calloc(n, sizeof(long)));
long *b = gc(calloc(n, sizeof(long)));
rngset(a, n * sizeof(long), 0, 0);
memcpy(b, a, n * sizeof(long));
qsort(a, n, sizeof(long), CompareLong);
vqsort_int64_ssse3(b, n);
ASSERT_EQ(0, memcmp(b, a, n * sizeof(long)));
}
TEST(vqsort_int64_sse2, test) {
size_t n = 5000;
long *a = gc(calloc(n, sizeof(long)));
long *b = gc(calloc(n, sizeof(long)));
rngset(a, n * sizeof(long), 0, 0);
memcpy(b, a, n * sizeof(long));
qsort(a, n, sizeof(long), CompareLong);
vqsort_int64_sse2(b, n);
ASSERT_EQ(0, memcmp(b, a, n * sizeof(long)));
}
#endif /* __x86_64__ */
TEST(radix_sort_int64, test) {
size_t n = 5000;
long *a = gc(calloc(n, sizeof(long)));
long *b = gc(calloc(n, sizeof(long)));
rngset(a, n * sizeof(long), 0, 0);
memcpy(b, a, n * sizeof(long));
qsort(a, n, sizeof(long), CompareLong);
radix_sort_int64(b, n);
ASSERT_EQ(0, memcmp(b, a, n * sizeof(long)));
}
BENCH(_longsort, bench) {
printf("\n");
size_t n = 5000;
long *p1 = gc(malloc(n * sizeof(long)));
long *p2 = gc(malloc(n * sizeof(long)));
rngset(p1, n * sizeof(long), 0, 0);
EZBENCH2("_longsort", memcpy(p2, p1, n * sizeof(long)), _longsort(p2, n));
#ifdef __x86_64__
if (X86_HAVE(AVX2)) {
EZBENCH2("vqsort_int64_avx2", memcpy(p2, p1, n * sizeof(long)),
vqsort_int64_avx2(p2, n));
}
if (X86_HAVE(SSE4_2)) {
EZBENCH2("vqsort_int64_sse4", memcpy(p2, p1, n * sizeof(long)),
vqsort_int64_sse4(p2, n));
}
if (X86_HAVE(SSSE3)) {
EZBENCH2("vqsort_int64_ssse3", memcpy(p2, p1, n * sizeof(long)),
vqsort_int64_ssse3(p2, n));
}
EZBENCH2("vqsort_int64_sse2", memcpy(p2, p1, n * sizeof(long)),
vqsort_int64_sse2(p2, n));
#endif /* __x86_64__ */
EZBENCH2("radix_sort_int64", memcpy(p2, p1, n * sizeof(long)),
radix_sort_int64(p2, n));
EZBENCH2("qsort(long)", memcpy(p2, p1, n * sizeof(long)),
qsort(p2, n, sizeof(long), CompareLong));
}
int CompareInt(const void *a, const void *b) {
const int *x = a;
const int *y = b;
if (*x < *y) return -1;
if (*x > *y) return +1;
return 0;
}
TEST(InsertionSort, test) {
size_t n = 5000;
int *a = gc(calloc(n, sizeof(int)));
int *b = gc(calloc(n, sizeof(int)));
rngset(a, n * sizeof(int), 0, 0);
memcpy(b, a, n * sizeof(int));
qsort(a, n, sizeof(int), CompareInt);
InsertionSort(b, n);
ASSERT_EQ(0, memcmp(b, a, n * sizeof(int)));
}
#ifdef __x86_64__
TEST(vqsort_int32_avx2, test) {
if (!X86_HAVE(AVX2)) return;
size_t n = 5000;
int *a = gc(calloc(n, sizeof(int)));
int *b = gc(calloc(n, sizeof(int)));
rngset(a, n * sizeof(int), 0, 0);
memcpy(b, a, n * sizeof(int));
qsort(a, n, sizeof(int), CompareInt);
vqsort_int32_avx2(b, n);
ASSERT_EQ(0, memcmp(b, a, n * sizeof(int)));
}
TEST(vqsort_int32_sse4, test) {
if (!X86_HAVE(SSE4_2)) return;
size_t n = 5000;
int *a = gc(calloc(n, sizeof(int)));
int *b = gc(calloc(n, sizeof(int)));
rngset(a, n * sizeof(int), 0, 0);
memcpy(b, a, n * sizeof(int));
qsort(a, n, sizeof(int), CompareInt);
vqsort_int32_sse4(b, n);
ASSERT_EQ(0, memcmp(b, a, n * sizeof(int)));
}
TEST(vqsort_int32_ssse3, test) {
if (!X86_HAVE(SSSE3)) return;
size_t n = 5000;
int *a = gc(calloc(n, sizeof(int)));
int *b = gc(calloc(n, sizeof(int)));
rngset(a, n * sizeof(int), 0, 0);
memcpy(b, a, n * sizeof(int));
qsort(a, n, sizeof(int), CompareInt);
vqsort_int32_ssse3(b, n);
ASSERT_EQ(0, memcmp(b, a, n * sizeof(int)));
}
TEST(vqsort_int32_sse2, test) {
size_t n = 5000;
int *a = gc(calloc(n, sizeof(int)));
int *b = gc(calloc(n, sizeof(int)));
rngset(a, n * sizeof(int), 0, 0);
memcpy(b, a, n * sizeof(int));
qsort(a, n, sizeof(int), CompareInt);
vqsort_int32_sse2(b, n);
ASSERT_EQ(0, memcmp(b, a, n * sizeof(int)));
}
#endif /* __x86_64__ */
TEST(radix_sort_int32, test) {
size_t n = 5000;
int *a = gc(calloc(n, sizeof(int)));
int *b = gc(calloc(n, sizeof(int)));
rngset(a, n * sizeof(int), 0, 0);
memcpy(b, a, n * sizeof(int));
qsort(a, n, sizeof(int), CompareInt);
radix_sort_int32(b, n);
ASSERT_EQ(0, memcmp(b, a, n * sizeof(int)));
}
BENCH(InsertionSort, bench) {
printf("\n");
size_t n = 10000;
int *p1 = gc(malloc(n * sizeof(int)));
int *p2 = gc(malloc(n * sizeof(int)));
rngset(p1, n * sizeof(int), 0, 0);
EZBENCH2("InsertionSort", memcpy(p2, p1, n * sizeof(int)),
InsertionSort(p2, n));
#ifdef __x86_64__
if (X86_HAVE(AVX2)) {
EZBENCH2("vqsort_int32_avx2", memcpy(p2, p1, n * sizeof(int)),
vqsort_int32_avx2(p2, n));
}
if (X86_HAVE(SSE4_2)) {
EZBENCH2("vqsort_int32_sse4", memcpy(p2, p1, n * sizeof(int)),
vqsort_int32_sse4(p2, n));
}
if (X86_HAVE(SSSE3)) {
EZBENCH2("vqsort_int32_ssse3", memcpy(p2, p1, n * sizeof(int)),
vqsort_int32_ssse3(p2, n));
}
EZBENCH2("vqsort_int32_sse2", memcpy(p2, p1, n * sizeof(int)),
vqsort_int32_sse2(p2, n));
#endif /* __x86_64__ */
EZBENCH2("djbsort", memcpy(p2, p1, n * sizeof(int)), djbsort(p2, n));
EZBENCH2("radix_sort_int32", memcpy(p2, p1, n * sizeof(int)),
radix_sort_int32(p2, n));
EZBENCH2("qsort(int)", memcpy(p2, p1, n * sizeof(int)),
qsort(p2, n, sizeof(int), CompareInt));
}