mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 03:27:39 +00:00
Add more sorting algorithms
This commit is contained in:
parent
b7bf052a4b
commit
7c9ef924bf
17 changed files with 49004 additions and 5 deletions
1
Makefile
1
Makefile
|
@ -143,6 +143,7 @@ include libc/time/time.mk # │
|
|||
include libc/stdio/stdio.mk # │
|
||||
include third_party/libcxx/libcxx.mk # │
|
||||
include net/net.mk # │
|
||||
include third_party/vqsort/vqsort.mk # │
|
||||
include libc/log/log.mk # │
|
||||
include third_party/bzip2/bzip2.mk # │
|
||||
include dsp/core/core.mk # │
|
||||
|
|
|
@ -89,6 +89,7 @@ EXAMPLES_DIRECTDEPS = \
|
|||
THIRD_PARTY_SED \
|
||||
THIRD_PARTY_STB \
|
||||
THIRD_PARTY_TR \
|
||||
THIRD_PARTY_VQSORT \
|
||||
THIRD_PARTY_XED \
|
||||
THIRD_PARTY_ZLIB \
|
||||
TOOL_BUILD_LIB \
|
||||
|
|
25
examples/vqsort.c
Normal file
25
examples/vqsort.c
Normal file
|
@ -0,0 +1,25 @@
|
|||
#if 0
|
||||
/*─────────────────────────────────────────────────────────────────╗
|
||||
│ To the extent possible under law, Justine Tunney has waived │
|
||||
│ all copyright and related or neighboring rights to this file, │
|
||||
│ as it is written in the following disclaimers: │
|
||||
│ • http://unlicense.org/ │
|
||||
│ • http://creativecommons.org/publicdomain/zero/1.0/ │
|
||||
╚─────────────────────────────────────────────────────────────────*/
|
||||
#endif
|
||||
#include "third_party/vqsort/vqsort.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/stdio/stdio.h"
|
||||
#include "third_party/vqsort/vqsort.h"
|
||||
|
||||
// how to sort one gigabyte of 64-bit integers per second
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
int64_t A[] = {9, 3, -3, 5, 23, 7};
|
||||
vqsort_int64(A, ARRAYLEN(A));
|
||||
for (int i = 0; i < ARRAYLEN(A); ++i) {
|
||||
if (i) printf(" ");
|
||||
printf("%ld", A[i]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
|
@ -34,6 +34,9 @@ int _tarjan(int, const int (*)[2], int, int[], int[], int *)
|
|||
char *_replacestr(const char *, const char *, const char *)
|
||||
paramsnonnull() __algalloc;
|
||||
|
||||
bool radix_sort_int32(int32_t *, size_t);
|
||||
bool radix_sort_int64(int64_t *, size_t);
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_ALG_ALG_H_ */
|
||||
|
|
101
libc/mem/radix_sort_int32.c
Normal file
101
libc/mem/radix_sort_int32.c
Normal file
|
@ -0,0 +1,101 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2023 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/mem/alg.h"
|
||||
#include "libc/mem/mem.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
// Credit: Andrew Schein. 2009. Open-source C++ implementation of Radix
|
||||
// Sort for double-precision floating points. (2009).
|
||||
|
||||
#define HIST_SIZE (size_t)2048
|
||||
#define get_byte_0(v) ((v)&0x7FF)
|
||||
#define get_byte_1(v) (((v) >> 11) & 0x7FF)
|
||||
#define get_byte_2_flip_sign(v) (((unsigned)(v) >> 22) ^ 0x200)
|
||||
|
||||
bool radix_sort_int32(int32_t *A, size_t n) {
|
||||
int32_t *T, *reader, *writer;
|
||||
size_t i, pos, sum0, sum1, sum2, tsum, *b0, *b1, *b2;
|
||||
|
||||
if (n < HIST_SIZE) {
|
||||
_intsort(A, n);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!(T = (int32_t *)malloc(n * sizeof(int32_t)))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!(b0 = (size_t *)calloc(HIST_SIZE * 3, sizeof(size_t)))) {
|
||||
free(T);
|
||||
return false;
|
||||
}
|
||||
|
||||
b1 = b0 + HIST_SIZE;
|
||||
b2 = b1 + HIST_SIZE;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
b0[get_byte_0(A[i])]++;
|
||||
b1[get_byte_1(A[i])]++;
|
||||
b2[get_byte_2_flip_sign(A[i])]++;
|
||||
}
|
||||
|
||||
sum0 = sum1 = sum2 = tsum = 0;
|
||||
|
||||
for (i = 0; i < HIST_SIZE; i++) {
|
||||
tsum = b0[i] + sum0;
|
||||
b0[i] = sum0 - 1;
|
||||
sum0 = tsum;
|
||||
|
||||
tsum = b1[i] + sum1;
|
||||
b1[i] = sum1 - 1;
|
||||
sum1 = tsum;
|
||||
|
||||
tsum = b2[i] + sum2;
|
||||
b2[i] = sum2 - 1;
|
||||
sum2 = tsum;
|
||||
}
|
||||
|
||||
writer = T;
|
||||
reader = A;
|
||||
for (i = 0; i < n; i++) {
|
||||
pos = get_byte_0(reader[i]);
|
||||
writer[++b0[pos]] = reader[i];
|
||||
}
|
||||
|
||||
writer = A;
|
||||
reader = T;
|
||||
for (i = 0; i < n; i++) {
|
||||
pos = get_byte_1(reader[i]);
|
||||
writer[++b1[pos]] = reader[i];
|
||||
}
|
||||
|
||||
writer = T;
|
||||
reader = A;
|
||||
for (i = 0; i < n; i++) {
|
||||
pos = get_byte_2_flip_sign(reader[i]);
|
||||
writer[++b2[pos]] = reader[i];
|
||||
}
|
||||
|
||||
memcpy(A, T, n * sizeof(int));
|
||||
|
||||
free(b0);
|
||||
free(T);
|
||||
return true;
|
||||
}
|
144
libc/mem/radix_sort_int64.c
Normal file
144
libc/mem/radix_sort_int64.c
Normal file
|
@ -0,0 +1,144 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2023 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/mem/alg.h"
|
||||
#include "libc/mem/mem.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
// Credit: Andrew Schein. 2009. Open-source C++ implementation of Radix
|
||||
// Sort for double-precision floating points. (2009).
|
||||
|
||||
#define HIST_SIZE (size_t)2048
|
||||
#define get_byte_0(v) ((v)&0x7FF)
|
||||
#define get_byte_1(v) (((v) >> 11) & 0x7FF)
|
||||
#define get_byte_2(v) (((v) >> 22) & 0x7FF)
|
||||
#define get_byte_3(v) (((v) >> 33) & 0x7FF)
|
||||
#define get_byte_4(v) (((v) >> 44) & 0x7FF)
|
||||
#define get_byte_5(v) (((v) >> 55) & 0x7FF)
|
||||
#define get_byte_2_flip_sign(v) (((unsigned)(v) >> 22) ^ 0x200)
|
||||
#define get_byte_5_flip_sign(v) ((((v) >> 55) & 0x7FF) ^ 0x400)
|
||||
|
||||
bool radix_sort_int64(int64_t *A, size_t n) {
|
||||
int64_t *T, *reader, *writer;
|
||||
size_t *b0, *b1, *b2, *b3, *b4, *b5;
|
||||
size_t i, pos, sum0, sum1, sum2, sum3, sum4, sum5, tsum;
|
||||
|
||||
if (n < HIST_SIZE) {
|
||||
_longsort(A, n);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!(T = (int64_t *)malloc(n * sizeof(int64_t)))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!(b0 = (size_t *)calloc(HIST_SIZE * 6, sizeof(size_t)))) {
|
||||
free(T);
|
||||
return false;
|
||||
}
|
||||
|
||||
b1 = b0 + HIST_SIZE;
|
||||
b2 = b1 + HIST_SIZE;
|
||||
b3 = b2 + HIST_SIZE;
|
||||
b4 = b3 + HIST_SIZE;
|
||||
b5 = b4 + HIST_SIZE;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
b0[get_byte_0(A[i])]++;
|
||||
b1[get_byte_1(A[i])]++;
|
||||
b2[get_byte_2(A[i])]++;
|
||||
b3[get_byte_3(A[i])]++;
|
||||
b4[get_byte_4(A[i])]++;
|
||||
b5[get_byte_5_flip_sign(A[i])]++;
|
||||
}
|
||||
|
||||
sum0 = sum1 = sum2 = sum3 = sum4 = sum5 = tsum = 0;
|
||||
|
||||
for (i = 0; i < HIST_SIZE; i++) {
|
||||
tsum = b0[i] + sum0;
|
||||
b0[i] = sum0 - 1;
|
||||
sum0 = tsum;
|
||||
|
||||
tsum = b1[i] + sum1;
|
||||
b1[i] = sum1 - 1;
|
||||
sum1 = tsum;
|
||||
|
||||
tsum = b2[i] + sum2;
|
||||
b2[i] = sum2 - 1;
|
||||
sum2 = tsum;
|
||||
|
||||
tsum = b3[i] + sum3;
|
||||
b3[i] = sum3 - 1;
|
||||
sum3 = tsum;
|
||||
|
||||
tsum = b4[i] + sum4;
|
||||
b4[i] = sum4 - 1;
|
||||
sum4 = tsum;
|
||||
|
||||
tsum = b5[i] + sum5;
|
||||
b5[i] = sum5 - 1;
|
||||
sum5 = tsum;
|
||||
}
|
||||
|
||||
writer = T;
|
||||
reader = A;
|
||||
for (i = 0; i < n; i++) {
|
||||
pos = get_byte_0(reader[i]);
|
||||
writer[++b0[pos]] = reader[i];
|
||||
}
|
||||
|
||||
writer = A;
|
||||
reader = T;
|
||||
for (i = 0; i < n; i++) {
|
||||
pos = get_byte_1(reader[i]);
|
||||
writer[++b1[pos]] = reader[i];
|
||||
}
|
||||
|
||||
writer = T;
|
||||
reader = A;
|
||||
for (i = 0; i < n; i++) {
|
||||
pos = get_byte_2(reader[i]);
|
||||
writer[++b2[pos]] = reader[i];
|
||||
}
|
||||
|
||||
writer = A;
|
||||
reader = T;
|
||||
for (i = 0; i < n; i++) {
|
||||
pos = get_byte_3(reader[i]);
|
||||
writer[++b3[pos]] = reader[i];
|
||||
}
|
||||
|
||||
writer = T;
|
||||
reader = A;
|
||||
for (i = 0; i < n; i++) {
|
||||
pos = get_byte_4(reader[i]);
|
||||
writer[++b4[pos]] = reader[i];
|
||||
}
|
||||
|
||||
writer = A;
|
||||
reader = T;
|
||||
for (i = 0; i < n; i++) {
|
||||
pos = get_byte_5_flip_sign(reader[i]);
|
||||
writer[++b5[pos]] = reader[i];
|
||||
}
|
||||
|
||||
free(b0);
|
||||
free(T);
|
||||
return true;
|
||||
}
|
|
@ -21,9 +21,11 @@
|
|||
#include "libc/mem/mem.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/stdio/rand.h"
|
||||
#include "libc/stdio/stdio.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/testlib/ezbench.h"
|
||||
#include "libc/testlib/testlib.h"
|
||||
#include "third_party/vqsort/vqsort.h"
|
||||
|
||||
int CompareLong(const void *a, const void *b) {
|
||||
const long *x = a;
|
||||
|
@ -44,13 +46,88 @@ TEST(_longsort, test) {
|
|||
ASSERT_EQ(0, memcmp(b, a, n * sizeof(long)));
|
||||
}
|
||||
|
||||
TEST(vqsort_int64_avx2, test) {
|
||||
if (!X86_HAVE(AVX2)) return;
|
||||
size_t n = 5000;
|
||||
long *a = gc(calloc(n, sizeof(long)));
|
||||
long *b = gc(calloc(n, sizeof(long)));
|
||||
rngset(a, n * sizeof(long), 0, 0);
|
||||
memcpy(b, a, n * sizeof(long));
|
||||
qsort(a, n, sizeof(long), CompareLong);
|
||||
vqsort_int64_avx2(b, n);
|
||||
ASSERT_EQ(0, memcmp(b, a, n * sizeof(long)));
|
||||
}
|
||||
|
||||
TEST(vqsort_int64_sse4, test) {
|
||||
if (!X86_HAVE(SSE4_2)) return;
|
||||
size_t n = 5000;
|
||||
long *a = gc(calloc(n, sizeof(long)));
|
||||
long *b = gc(calloc(n, sizeof(long)));
|
||||
rngset(a, n * sizeof(long), 0, 0);
|
||||
memcpy(b, a, n * sizeof(long));
|
||||
qsort(a, n, sizeof(long), CompareLong);
|
||||
vqsort_int64_sse4(b, n);
|
||||
ASSERT_EQ(0, memcmp(b, a, n * sizeof(long)));
|
||||
}
|
||||
|
||||
TEST(vqsort_int64_ssse3, test) {
|
||||
if (!X86_HAVE(SSSE3)) return;
|
||||
size_t n = 5000;
|
||||
long *a = gc(calloc(n, sizeof(long)));
|
||||
long *b = gc(calloc(n, sizeof(long)));
|
||||
rngset(a, n * sizeof(long), 0, 0);
|
||||
memcpy(b, a, n * sizeof(long));
|
||||
qsort(a, n, sizeof(long), CompareLong);
|
||||
vqsort_int64_ssse3(b, n);
|
||||
ASSERT_EQ(0, memcmp(b, a, n * sizeof(long)));
|
||||
}
|
||||
|
||||
TEST(vqsort_int64_sse2, test) {
|
||||
size_t n = 5000;
|
||||
long *a = gc(calloc(n, sizeof(long)));
|
||||
long *b = gc(calloc(n, sizeof(long)));
|
||||
rngset(a, n * sizeof(long), 0, 0);
|
||||
memcpy(b, a, n * sizeof(long));
|
||||
qsort(a, n, sizeof(long), CompareLong);
|
||||
vqsort_int64_sse2(b, n);
|
||||
ASSERT_EQ(0, memcmp(b, a, n * sizeof(long)));
|
||||
}
|
||||
|
||||
TEST(radix_sort_int64, test) {
|
||||
size_t n = 5000;
|
||||
long *a = gc(calloc(n, sizeof(long)));
|
||||
long *b = gc(calloc(n, sizeof(long)));
|
||||
rngset(a, n * sizeof(long), 0, 0);
|
||||
memcpy(b, a, n * sizeof(long));
|
||||
qsort(a, n, sizeof(long), CompareLong);
|
||||
radix_sort_int64(b, n);
|
||||
ASSERT_EQ(0, memcmp(b, a, n * sizeof(long)));
|
||||
}
|
||||
|
||||
BENCH(_longsort, bench) {
|
||||
size_t n = 1000;
|
||||
printf("\n");
|
||||
size_t n = 5000;
|
||||
long *p1 = gc(malloc(n * sizeof(long)));
|
||||
long *p2 = gc(malloc(n * sizeof(long)));
|
||||
rngset(p1, n * sizeof(long), 0, 0);
|
||||
EZBENCH2("_longsort", memcpy(p2, p1, n * sizeof(long)), _longsort(p2, n));
|
||||
EZBENCH2("qsort", memcpy(p2, p1, n * sizeof(long)),
|
||||
if (X86_HAVE(AVX2)) {
|
||||
EZBENCH2("vqsort_int64_avx2", memcpy(p2, p1, n * sizeof(long)),
|
||||
vqsort_int64_avx2(p2, n));
|
||||
}
|
||||
if (X86_HAVE(SSE4_2)) {
|
||||
EZBENCH2("vqsort_int64_sse4", memcpy(p2, p1, n * sizeof(long)),
|
||||
vqsort_int64_sse4(p2, n));
|
||||
}
|
||||
if (X86_HAVE(SSSE3)) {
|
||||
EZBENCH2("vqsort_int64_ssse3", memcpy(p2, p1, n * sizeof(long)),
|
||||
vqsort_int64_ssse3(p2, n));
|
||||
}
|
||||
EZBENCH2("vqsort_int64_sse2", memcpy(p2, p1, n * sizeof(long)),
|
||||
vqsort_int64_sse2(p2, n));
|
||||
EZBENCH2("radix_sort_int64", memcpy(p2, p1, n * sizeof(long)),
|
||||
radix_sort_int64(p2, n));
|
||||
EZBENCH2("qsort(long)", memcpy(p2, p1, n * sizeof(long)),
|
||||
qsort(p2, n, sizeof(long), CompareLong));
|
||||
}
|
||||
|
||||
|
@ -73,12 +150,88 @@ TEST(_intsort, test) {
|
|||
ASSERT_EQ(0, memcmp(b, a, n * sizeof(int)));
|
||||
}
|
||||
|
||||
TEST(vqsort_int32_avx2, test) {
|
||||
if (!X86_HAVE(AVX2)) return;
|
||||
size_t n = 5000;
|
||||
int *a = gc(calloc(n, sizeof(int)));
|
||||
int *b = gc(calloc(n, sizeof(int)));
|
||||
rngset(a, n * sizeof(int), 0, 0);
|
||||
memcpy(b, a, n * sizeof(int));
|
||||
qsort(a, n, sizeof(int), CompareInt);
|
||||
vqsort_int32_avx2(b, n);
|
||||
ASSERT_EQ(0, memcmp(b, a, n * sizeof(int)));
|
||||
}
|
||||
|
||||
TEST(vqsort_int32_sse4, test) {
|
||||
if (!X86_HAVE(SSE4_2)) return;
|
||||
size_t n = 5000;
|
||||
int *a = gc(calloc(n, sizeof(int)));
|
||||
int *b = gc(calloc(n, sizeof(int)));
|
||||
rngset(a, n * sizeof(int), 0, 0);
|
||||
memcpy(b, a, n * sizeof(int));
|
||||
qsort(a, n, sizeof(int), CompareInt);
|
||||
vqsort_int32_sse4(b, n);
|
||||
ASSERT_EQ(0, memcmp(b, a, n * sizeof(int)));
|
||||
}
|
||||
|
||||
TEST(vqsort_int32_ssse3, test) {
|
||||
if (!X86_HAVE(SSSE3)) return;
|
||||
size_t n = 5000;
|
||||
int *a = gc(calloc(n, sizeof(int)));
|
||||
int *b = gc(calloc(n, sizeof(int)));
|
||||
rngset(a, n * sizeof(int), 0, 0);
|
||||
memcpy(b, a, n * sizeof(int));
|
||||
qsort(a, n, sizeof(int), CompareInt);
|
||||
vqsort_int32_ssse3(b, n);
|
||||
ASSERT_EQ(0, memcmp(b, a, n * sizeof(int)));
|
||||
}
|
||||
|
||||
TEST(vqsort_int32_sse2, test) {
|
||||
size_t n = 5000;
|
||||
int *a = gc(calloc(n, sizeof(int)));
|
||||
int *b = gc(calloc(n, sizeof(int)));
|
||||
rngset(a, n * sizeof(int), 0, 0);
|
||||
memcpy(b, a, n * sizeof(int));
|
||||
qsort(a, n, sizeof(int), CompareInt);
|
||||
vqsort_int32_sse2(b, n);
|
||||
ASSERT_EQ(0, memcmp(b, a, n * sizeof(int)));
|
||||
}
|
||||
|
||||
TEST(radix_sort_int32, test) {
|
||||
size_t n = 5000;
|
||||
int *a = gc(calloc(n, sizeof(int)));
|
||||
int *b = gc(calloc(n, sizeof(int)));
|
||||
rngset(a, n * sizeof(int), 0, 0);
|
||||
memcpy(b, a, n * sizeof(int));
|
||||
qsort(a, n, sizeof(int), CompareInt);
|
||||
radix_sort_int32(b, n);
|
||||
ASSERT_EQ(0, memcmp(b, a, n * sizeof(int)));
|
||||
}
|
||||
|
||||
BENCH(_intsort, bench) {
|
||||
size_t n = 1000;
|
||||
printf("\n");
|
||||
size_t n = 10000;
|
||||
int *p1 = gc(malloc(n * sizeof(int)));
|
||||
int *p2 = gc(malloc(n * sizeof(int)));
|
||||
rngset(p1, n * sizeof(int), 0, 0);
|
||||
EZBENCH2("_intsort", memcpy(p2, p1, n * sizeof(int)), _intsort(p2, n));
|
||||
EZBENCH2("qsort", memcpy(p2, p1, n * sizeof(int)),
|
||||
if (X86_HAVE(AVX2)) {
|
||||
EZBENCH2("vqsort_int32_avx2", memcpy(p2, p1, n * sizeof(int)),
|
||||
vqsort_int32_avx2(p2, n));
|
||||
}
|
||||
if (X86_HAVE(SSE4_2)) {
|
||||
EZBENCH2("vqsort_int32_sse4", memcpy(p2, p1, n * sizeof(int)),
|
||||
vqsort_int32_sse4(p2, n));
|
||||
}
|
||||
if (X86_HAVE(SSSE3)) {
|
||||
EZBENCH2("vqsort_int32_ssse3", memcpy(p2, p1, n * sizeof(int)),
|
||||
vqsort_int32_ssse3(p2, n));
|
||||
}
|
||||
EZBENCH2("vqsort_int32_sse2", memcpy(p2, p1, n * sizeof(int)),
|
||||
vqsort_int32_sse2(p2, n));
|
||||
EZBENCH2("djbsort", memcpy(p2, p1, n * sizeof(int)), djbsort(p2, n));
|
||||
EZBENCH2("radix_sort_int32", memcpy(p2, p1, n * sizeof(int)),
|
||||
radix_sort_int32(p2, n));
|
||||
EZBENCH2("qsort(int)", memcpy(p2, p1, n * sizeof(int)),
|
||||
qsort(p2, n, sizeof(int), CompareInt));
|
||||
}
|
||||
|
|
|
@ -51,7 +51,8 @@ TEST_LIBC_STR_DIRECTDEPS = \
|
|||
THIRD_PARTY_REGEX \
|
||||
THIRD_PARTY_ZLIB \
|
||||
THIRD_PARTY_LIBCXX \
|
||||
THIRD_PARTY_SMALLZ4
|
||||
THIRD_PARTY_SMALLZ4 \
|
||||
THIRD_PARTY_VQSORT
|
||||
|
||||
TEST_LIBC_STR_DEPS := \
|
||||
$(call uniq,$(foreach x,$(TEST_LIBC_STR_DIRECTDEPS),$($(x))))
|
||||
|
|
5
third_party/compiler_rt/popcountdi2.c
vendored
5
third_party/compiler_rt/popcountdi2.c
vendored
|
@ -22,6 +22,10 @@ STATIC_YOINK("huge_compiler_rt_license");
|
|||
COMPILER_RT_ABI si_int
|
||||
__popcountdi2(di_int a)
|
||||
{
|
||||
#ifdef __POPCNT__
|
||||
asm("popcnt\t%1,%0" : "=r"(a) : "r"(a) : "cc");
|
||||
return a;
|
||||
#else
|
||||
du_int x2 = (du_int)a;
|
||||
x2 = x2 - ((x2 >> 1) & 0x5555555555555555uLL);
|
||||
/* Every 2 bits holds the sum of every pair of bits (32) */
|
||||
|
@ -36,4 +40,5 @@ __popcountdi2(di_int a)
|
|||
/* The lower 16 bits hold two 32 bit sums (6 significant bits). */
|
||||
/* Upper 16 bits are garbage */
|
||||
return (x + (x >> 8)) & 0x0000007F; /* (7 significant bits) */
|
||||
#endif
|
||||
}
|
||||
|
|
1
third_party/third_party.mk
vendored
1
third_party/third_party.mk
vendored
|
@ -34,6 +34,7 @@ o/$(MODE)/third_party: \
|
|||
o/$(MODE)/third_party/tidy \
|
||||
o/$(MODE)/third_party/tr \
|
||||
o/$(MODE)/third_party/unzip \
|
||||
o/$(MODE)/third_party/vqsort \
|
||||
o/$(MODE)/third_party/xed \
|
||||
o/$(MODE)/third_party/zip \
|
||||
o/$(MODE)/third_party/zlib
|
||||
|
|
23
third_party/vqsort/README.cosmo
vendored
Normal file
23
third_party/vqsort/README.cosmo
vendored
Normal file
|
@ -0,0 +1,23 @@
|
|||
DESCRIPTION
|
||||
|
||||
vqsort implements vectorized quicksort using avx2. this is the fastest
|
||||
way to sort integers. this goes as fast as djbsort for 32-bit integers
|
||||
except it supports 64-bit integers too, which go just as fast: about a
|
||||
gigabyte of memory sorted per second. It's 3x faster than simple radix
|
||||
sort. It's 5x faster than simple quicksort. It's 10x faster than qsort
|
||||
|
||||
LICENSE
|
||||
|
||||
Apache 2.o
|
||||
|
||||
ORIGIN
|
||||
|
||||
https://github.com/google/highway/
|
||||
commit 50331e0523bbf5f6c94b94263a91680f118e0986
|
||||
Author: Jan Wassenberg <janwas@google.com>
|
||||
Date: Wed Apr 26 11:20:33 2023 -0700
|
||||
Faster vqsort for small arrays (7x speedup! for N=100)
|
||||
|
||||
LOCAL CHANGES
|
||||
|
||||
Precompiled beacuse upstream codebase is slow, gigantic, and hairy.
|
20
third_party/vqsort/vqsort.h
vendored
Normal file
20
third_party/vqsort/vqsort.h
vendored
Normal file
|
@ -0,0 +1,20 @@
|
|||
#ifndef COSMOPOLITAN_THIRD_PARTY_VQSORT_H_
|
||||
#define COSMOPOLITAN_THIRD_PARTY_VQSORT_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void vqsort_int64(int64_t *, size_t);
|
||||
void vqsort_int64_avx2(int64_t *, size_t);
|
||||
void vqsort_int64_sse4(int64_t *, size_t);
|
||||
void vqsort_int64_ssse3(int64_t *, size_t);
|
||||
void vqsort_int64_sse2(int64_t *, size_t);
|
||||
|
||||
void vqsort_int32(int32_t *, size_t);
|
||||
void vqsort_int32_avx2(int32_t *, size_t);
|
||||
void vqsort_int32_sse4(int32_t *, size_t);
|
||||
void vqsort_int32_ssse3(int32_t *, size_t);
|
||||
void vqsort_int32_sse2(int32_t *, size_t);
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_THIRD_PARTY_VQSORT_H_ */
|
52
third_party/vqsort/vqsort.mk
vendored
Normal file
52
third_party/vqsort/vqsort.mk
vendored
Normal file
|
@ -0,0 +1,52 @@
|
|||
#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐
|
||||
#───vi: set et ft=make ts=8 tw=8 fenc=utf-8 :vi───────────────────────┘
|
||||
|
||||
PKGS += THIRD_PARTY_VQSORT
|
||||
|
||||
THIRD_PARTY_VQSORT_ARTIFACTS += THIRD_PARTY_VQSORT_A
|
||||
THIRD_PARTY_VQSORT = $(THIRD_PARTY_VQSORT_A_DEPS) $(THIRD_PARTY_VQSORT_A)
|
||||
THIRD_PARTY_VQSORT_A = o/$(MODE)/third_party/vqsort/vqsort.a
|
||||
THIRD_PARTY_VQSORT_A_FILES := $(wildcard third_party/vqsort/*)
|
||||
THIRD_PARTY_VQSORT_A_HDRS = $(filter %.h,$(THIRD_PARTY_VQSORT_A_FILES))
|
||||
THIRD_PARTY_VQSORT_A_SRCS_C = $(filter %.c,$(THIRD_PARTY_VQSORT_A_FILES))
|
||||
THIRD_PARTY_VQSORT_A_SRCS_S = $(filter %.S,$(THIRD_PARTY_VQSORT_A_FILES))
|
||||
THIRD_PARTY_VQSORT_A_SRCS = $(THIRD_PARTY_VQSORT_A_SRCS_C) $(THIRD_PARTY_VQSORT_A_SRCS_S)
|
||||
THIRD_PARTY_VQSORT_A_OBJS_C = $(THIRD_PARTY_VQSORT_A_SRCS_C:%.c=o/$(MODE)/%.o)
|
||||
THIRD_PARTY_VQSORT_A_OBJS_S = $(THIRD_PARTY_VQSORT_A_SRCS_S:%.S=o/$(MODE)/%.o)
|
||||
THIRD_PARTY_VQSORT_A_OBJS = $(THIRD_PARTY_VQSORT_A_OBJS_C) $(THIRD_PARTY_VQSORT_A_OBJS_S)
|
||||
|
||||
THIRD_PARTY_VQSORT_A_CHECKS = \
|
||||
$(THIRD_PARTY_VQSORT_A).pkg \
|
||||
$(THIRD_PARTY_VQSORT_A_HDRS:%=o/$(MODE)/%.ok)
|
||||
|
||||
THIRD_PARTY_VQSORT_A_DIRECTDEPS = \
|
||||
LIBC_INTRIN \
|
||||
LIBC_MEM \
|
||||
LIBC_NEXGEN32E \
|
||||
LIBC_RUNTIME \
|
||||
LIBC_STDIO \
|
||||
LIBC_STR \
|
||||
LIBC_STUBS \
|
||||
THIRD_PARTY_COMPILER_RT
|
||||
|
||||
THIRD_PARTY_VQSORT_A_DEPS := \
|
||||
$(call uniq,$(foreach x,$(THIRD_PARTY_VQSORT_A_DIRECTDEPS),$($(x))))
|
||||
|
||||
$(THIRD_PARTY_VQSORT_A): \
|
||||
third_party/vqsort/ \
|
||||
$(THIRD_PARTY_VQSORT_A).pkg \
|
||||
$(THIRD_PARTY_VQSORT_A_OBJS)
|
||||
|
||||
$(THIRD_PARTY_VQSORT_A).pkg: \
|
||||
$(THIRD_PARTY_VQSORT_A_OBJS) \
|
||||
$(foreach x,$(THIRD_PARTY_VQSORT_A_DIRECTDEPS),$($(x)_A).pkg)
|
||||
|
||||
THIRD_PARTY_VQSORT_LIBS = $(foreach x,$(THIRD_PARTY_VQSORT_ARTIFACTS),$($(x)))
|
||||
THIRD_PARTY_VQSORT_SRCS = $(foreach x,$(THIRD_PARTY_VQSORT_ARTIFACTS),$($(x)_SRCS))
|
||||
THIRD_PARTY_VQSORT_HDRS = $(foreach x,$(THIRD_PARTY_VQSORT_ARTIFACTS),$($(x)_HDRS))
|
||||
THIRD_PARTY_VQSORT_CHECKS = $(foreach x,$(THIRD_PARTY_VQSORT_ARTIFACTS),$($(x)_CHECKS))
|
||||
THIRD_PARTY_VQSORT_OBJS = $(foreach x,$(THIRD_PARTY_VQSORT_ARTIFACTS),$($(x)_OBJS))
|
||||
$(THIRD_PARTY_VQSORT_OBJS): $(BUILD_FILES) third_party/vqsort/vqsort.mk
|
||||
|
||||
.PHONY: o/$(MODE)/third_party/vqsort
|
||||
o/$(MODE)/third_party/vqsort: $(THIRD_PARTY_VQSORT_CHECKS)
|
24732
third_party/vqsort/vqsort_i32a.S
vendored
Normal file
24732
third_party/vqsort/vqsort_i32a.S
vendored
Normal file
File diff suppressed because it is too large
Load diff
23679
third_party/vqsort/vqsort_i64a.S
vendored
Normal file
23679
third_party/vqsort/vqsort_i64a.S
vendored
Normal file
File diff suppressed because it is too large
Load diff
29
third_party/vqsort/vqsort_int32.c
vendored
Normal file
29
third_party/vqsort/vqsort_int32.c
vendored
Normal file
|
@ -0,0 +1,29 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2023 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/mem/alg.h"
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
#include "third_party/vqsort/vqsort.h"
|
||||
|
||||
void vqsort_int32(int32_t *A, size_t n) {
|
||||
if (X86_HAVE(AVX2)) {
|
||||
vqsort_int32_avx2(A, n);
|
||||
} else {
|
||||
radix_sort_int32(A, n);
|
||||
}
|
||||
}
|
29
third_party/vqsort/vqsort_int64.c
vendored
Normal file
29
third_party/vqsort/vqsort_int64.c
vendored
Normal file
|
@ -0,0 +1,29 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2023 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/mem/alg.h"
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
#include "third_party/vqsort/vqsort.h"
|
||||
|
||||
void vqsort_int64(int64_t *A, size_t n) {
|
||||
if (X86_HAVE(AVX2)) {
|
||||
vqsort_int64_avx2(A, n);
|
||||
} else {
|
||||
radix_sort_int64(A, n);
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue