mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-02-07 06:53:33 +00:00
Add more sorting algorithms
This commit is contained in:
parent
b7bf052a4b
commit
7c9ef924bf
17 changed files with 49004 additions and 5 deletions
1
Makefile
1
Makefile
|
@ -143,6 +143,7 @@ include libc/time/time.mk # │
|
||||||
include libc/stdio/stdio.mk # │
|
include libc/stdio/stdio.mk # │
|
||||||
include third_party/libcxx/libcxx.mk # │
|
include third_party/libcxx/libcxx.mk # │
|
||||||
include net/net.mk # │
|
include net/net.mk # │
|
||||||
|
include third_party/vqsort/vqsort.mk # │
|
||||||
include libc/log/log.mk # │
|
include libc/log/log.mk # │
|
||||||
include third_party/bzip2/bzip2.mk # │
|
include third_party/bzip2/bzip2.mk # │
|
||||||
include dsp/core/core.mk # │
|
include dsp/core/core.mk # │
|
||||||
|
|
|
@ -89,6 +89,7 @@ EXAMPLES_DIRECTDEPS = \
|
||||||
THIRD_PARTY_SED \
|
THIRD_PARTY_SED \
|
||||||
THIRD_PARTY_STB \
|
THIRD_PARTY_STB \
|
||||||
THIRD_PARTY_TR \
|
THIRD_PARTY_TR \
|
||||||
|
THIRD_PARTY_VQSORT \
|
||||||
THIRD_PARTY_XED \
|
THIRD_PARTY_XED \
|
||||||
THIRD_PARTY_ZLIB \
|
THIRD_PARTY_ZLIB \
|
||||||
TOOL_BUILD_LIB \
|
TOOL_BUILD_LIB \
|
||||||
|
|
25
examples/vqsort.c
Normal file
25
examples/vqsort.c
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
#if 0
|
||||||
|
/*─────────────────────────────────────────────────────────────────╗
|
||||||
|
│ To the extent possible under law, Justine Tunney has waived │
|
||||||
|
│ all copyright and related or neighboring rights to this file, │
|
||||||
|
│ as it is written in the following disclaimers: │
|
||||||
|
│ • http://unlicense.org/ │
|
||||||
|
│ • http://creativecommons.org/publicdomain/zero/1.0/ │
|
||||||
|
╚─────────────────────────────────────────────────────────────────*/
|
||||||
|
#endif
|
||||||
|
#include "third_party/vqsort/vqsort.h"
|
||||||
|
#include "libc/macros.internal.h"
|
||||||
|
#include "libc/stdio/stdio.h"
|
||||||
|
#include "third_party/vqsort/vqsort.h"
|
||||||
|
|
||||||
|
// how to sort one gigabyte of 64-bit integers per second
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
int64_t A[] = {9, 3, -3, 5, 23, 7};
|
||||||
|
vqsort_int64(A, ARRAYLEN(A));
|
||||||
|
for (int i = 0; i < ARRAYLEN(A); ++i) {
|
||||||
|
if (i) printf(" ");
|
||||||
|
printf("%ld", A[i]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
|
@ -34,6 +34,9 @@ int _tarjan(int, const int (*)[2], int, int[], int[], int *)
|
||||||
char *_replacestr(const char *, const char *, const char *)
|
char *_replacestr(const char *, const char *, const char *)
|
||||||
paramsnonnull() __algalloc;
|
paramsnonnull() __algalloc;
|
||||||
|
|
||||||
|
bool radix_sort_int32(int32_t *, size_t);
|
||||||
|
bool radix_sort_int64(int64_t *, size_t);
|
||||||
|
|
||||||
COSMOPOLITAN_C_END_
|
COSMOPOLITAN_C_END_
|
||||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||||
#endif /* COSMOPOLITAN_LIBC_ALG_ALG_H_ */
|
#endif /* COSMOPOLITAN_LIBC_ALG_ALG_H_ */
|
||||||
|
|
101
libc/mem/radix_sort_int32.c
Normal file
101
libc/mem/radix_sort_int32.c
Normal file
|
@ -0,0 +1,101 @@
|
||||||
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||||
|
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||||
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||||
|
│ Copyright 2023 Justine Alexandra Roberts Tunney │
|
||||||
|
│ │
|
||||||
|
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||||
|
│ any purpose with or without fee is hereby granted, provided that the │
|
||||||
|
│ above copyright notice and this permission notice appear in all copies. │
|
||||||
|
│ │
|
||||||
|
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||||
|
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||||
|
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||||
|
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||||
|
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||||
|
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||||
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||||
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||||
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
|
#include "libc/mem/alg.h"
|
||||||
|
#include "libc/mem/mem.h"
|
||||||
|
#include "libc/runtime/runtime.h"
|
||||||
|
#include "libc/str/str.h"
|
||||||
|
|
||||||
|
// Credit: Andrew Schein. 2009. Open-source C++ implementation of Radix
|
||||||
|
// Sort for double-precision floating points. (2009).
|
||||||
|
|
||||||
|
#define HIST_SIZE (size_t)2048
|
||||||
|
#define get_byte_0(v) ((v)&0x7FF)
|
||||||
|
#define get_byte_1(v) (((v) >> 11) & 0x7FF)
|
||||||
|
#define get_byte_2_flip_sign(v) (((unsigned)(v) >> 22) ^ 0x200)
|
||||||
|
|
||||||
|
bool radix_sort_int32(int32_t *A, size_t n) {
|
||||||
|
int32_t *T, *reader, *writer;
|
||||||
|
size_t i, pos, sum0, sum1, sum2, tsum, *b0, *b1, *b2;
|
||||||
|
|
||||||
|
if (n < HIST_SIZE) {
|
||||||
|
_intsort(A, n);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(T = (int32_t *)malloc(n * sizeof(int32_t)))) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(b0 = (size_t *)calloc(HIST_SIZE * 3, sizeof(size_t)))) {
|
||||||
|
free(T);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
b1 = b0 + HIST_SIZE;
|
||||||
|
b2 = b1 + HIST_SIZE;
|
||||||
|
|
||||||
|
for (i = 0; i < n; i++) {
|
||||||
|
b0[get_byte_0(A[i])]++;
|
||||||
|
b1[get_byte_1(A[i])]++;
|
||||||
|
b2[get_byte_2_flip_sign(A[i])]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
sum0 = sum1 = sum2 = tsum = 0;
|
||||||
|
|
||||||
|
for (i = 0; i < HIST_SIZE; i++) {
|
||||||
|
tsum = b0[i] + sum0;
|
||||||
|
b0[i] = sum0 - 1;
|
||||||
|
sum0 = tsum;
|
||||||
|
|
||||||
|
tsum = b1[i] + sum1;
|
||||||
|
b1[i] = sum1 - 1;
|
||||||
|
sum1 = tsum;
|
||||||
|
|
||||||
|
tsum = b2[i] + sum2;
|
||||||
|
b2[i] = sum2 - 1;
|
||||||
|
sum2 = tsum;
|
||||||
|
}
|
||||||
|
|
||||||
|
writer = T;
|
||||||
|
reader = A;
|
||||||
|
for (i = 0; i < n; i++) {
|
||||||
|
pos = get_byte_0(reader[i]);
|
||||||
|
writer[++b0[pos]] = reader[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
writer = A;
|
||||||
|
reader = T;
|
||||||
|
for (i = 0; i < n; i++) {
|
||||||
|
pos = get_byte_1(reader[i]);
|
||||||
|
writer[++b1[pos]] = reader[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
writer = T;
|
||||||
|
reader = A;
|
||||||
|
for (i = 0; i < n; i++) {
|
||||||
|
pos = get_byte_2_flip_sign(reader[i]);
|
||||||
|
writer[++b2[pos]] = reader[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy(A, T, n * sizeof(int));
|
||||||
|
|
||||||
|
free(b0);
|
||||||
|
free(T);
|
||||||
|
return true;
|
||||||
|
}
|
144
libc/mem/radix_sort_int64.c
Normal file
144
libc/mem/radix_sort_int64.c
Normal file
|
@ -0,0 +1,144 @@
|
||||||
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||||
|
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||||
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||||
|
│ Copyright 2023 Justine Alexandra Roberts Tunney │
|
||||||
|
│ │
|
||||||
|
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||||
|
│ any purpose with or without fee is hereby granted, provided that the │
|
||||||
|
│ above copyright notice and this permission notice appear in all copies. │
|
||||||
|
│ │
|
||||||
|
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||||
|
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||||
|
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||||
|
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||||
|
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||||
|
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||||
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||||
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||||
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
|
#include "libc/mem/alg.h"
|
||||||
|
#include "libc/mem/mem.h"
|
||||||
|
#include "libc/runtime/runtime.h"
|
||||||
|
#include "libc/str/str.h"
|
||||||
|
|
||||||
|
// Credit: Andrew Schein. 2009. Open-source C++ implementation of Radix
|
||||||
|
// Sort for double-precision floating points. (2009).
|
||||||
|
|
||||||
|
#define HIST_SIZE (size_t)2048
|
||||||
|
#define get_byte_0(v) ((v)&0x7FF)
|
||||||
|
#define get_byte_1(v) (((v) >> 11) & 0x7FF)
|
||||||
|
#define get_byte_2(v) (((v) >> 22) & 0x7FF)
|
||||||
|
#define get_byte_3(v) (((v) >> 33) & 0x7FF)
|
||||||
|
#define get_byte_4(v) (((v) >> 44) & 0x7FF)
|
||||||
|
#define get_byte_5(v) (((v) >> 55) & 0x7FF)
|
||||||
|
#define get_byte_2_flip_sign(v) (((unsigned)(v) >> 22) ^ 0x200)
|
||||||
|
#define get_byte_5_flip_sign(v) ((((v) >> 55) & 0x7FF) ^ 0x400)
|
||||||
|
|
||||||
|
bool radix_sort_int64(int64_t *A, size_t n) {
|
||||||
|
int64_t *T, *reader, *writer;
|
||||||
|
size_t *b0, *b1, *b2, *b3, *b4, *b5;
|
||||||
|
size_t i, pos, sum0, sum1, sum2, sum3, sum4, sum5, tsum;
|
||||||
|
|
||||||
|
if (n < HIST_SIZE) {
|
||||||
|
_longsort(A, n);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(T = (int64_t *)malloc(n * sizeof(int64_t)))) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(b0 = (size_t *)calloc(HIST_SIZE * 6, sizeof(size_t)))) {
|
||||||
|
free(T);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
b1 = b0 + HIST_SIZE;
|
||||||
|
b2 = b1 + HIST_SIZE;
|
||||||
|
b3 = b2 + HIST_SIZE;
|
||||||
|
b4 = b3 + HIST_SIZE;
|
||||||
|
b5 = b4 + HIST_SIZE;
|
||||||
|
|
||||||
|
for (i = 0; i < n; i++) {
|
||||||
|
b0[get_byte_0(A[i])]++;
|
||||||
|
b1[get_byte_1(A[i])]++;
|
||||||
|
b2[get_byte_2(A[i])]++;
|
||||||
|
b3[get_byte_3(A[i])]++;
|
||||||
|
b4[get_byte_4(A[i])]++;
|
||||||
|
b5[get_byte_5_flip_sign(A[i])]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
sum0 = sum1 = sum2 = sum3 = sum4 = sum5 = tsum = 0;
|
||||||
|
|
||||||
|
for (i = 0; i < HIST_SIZE; i++) {
|
||||||
|
tsum = b0[i] + sum0;
|
||||||
|
b0[i] = sum0 - 1;
|
||||||
|
sum0 = tsum;
|
||||||
|
|
||||||
|
tsum = b1[i] + sum1;
|
||||||
|
b1[i] = sum1 - 1;
|
||||||
|
sum1 = tsum;
|
||||||
|
|
||||||
|
tsum = b2[i] + sum2;
|
||||||
|
b2[i] = sum2 - 1;
|
||||||
|
sum2 = tsum;
|
||||||
|
|
||||||
|
tsum = b3[i] + sum3;
|
||||||
|
b3[i] = sum3 - 1;
|
||||||
|
sum3 = tsum;
|
||||||
|
|
||||||
|
tsum = b4[i] + sum4;
|
||||||
|
b4[i] = sum4 - 1;
|
||||||
|
sum4 = tsum;
|
||||||
|
|
||||||
|
tsum = b5[i] + sum5;
|
||||||
|
b5[i] = sum5 - 1;
|
||||||
|
sum5 = tsum;
|
||||||
|
}
|
||||||
|
|
||||||
|
writer = T;
|
||||||
|
reader = A;
|
||||||
|
for (i = 0; i < n; i++) {
|
||||||
|
pos = get_byte_0(reader[i]);
|
||||||
|
writer[++b0[pos]] = reader[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
writer = A;
|
||||||
|
reader = T;
|
||||||
|
for (i = 0; i < n; i++) {
|
||||||
|
pos = get_byte_1(reader[i]);
|
||||||
|
writer[++b1[pos]] = reader[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
writer = T;
|
||||||
|
reader = A;
|
||||||
|
for (i = 0; i < n; i++) {
|
||||||
|
pos = get_byte_2(reader[i]);
|
||||||
|
writer[++b2[pos]] = reader[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
writer = A;
|
||||||
|
reader = T;
|
||||||
|
for (i = 0; i < n; i++) {
|
||||||
|
pos = get_byte_3(reader[i]);
|
||||||
|
writer[++b3[pos]] = reader[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
writer = T;
|
||||||
|
reader = A;
|
||||||
|
for (i = 0; i < n; i++) {
|
||||||
|
pos = get_byte_4(reader[i]);
|
||||||
|
writer[++b4[pos]] = reader[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
writer = A;
|
||||||
|
reader = T;
|
||||||
|
for (i = 0; i < n; i++) {
|
||||||
|
pos = get_byte_5_flip_sign(reader[i]);
|
||||||
|
writer[++b5[pos]] = reader[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
free(b0);
|
||||||
|
free(T);
|
||||||
|
return true;
|
||||||
|
}
|
|
@ -21,9 +21,11 @@
|
||||||
#include "libc/mem/mem.h"
|
#include "libc/mem/mem.h"
|
||||||
#include "libc/runtime/runtime.h"
|
#include "libc/runtime/runtime.h"
|
||||||
#include "libc/stdio/rand.h"
|
#include "libc/stdio/rand.h"
|
||||||
|
#include "libc/stdio/stdio.h"
|
||||||
#include "libc/str/str.h"
|
#include "libc/str/str.h"
|
||||||
#include "libc/testlib/ezbench.h"
|
#include "libc/testlib/ezbench.h"
|
||||||
#include "libc/testlib/testlib.h"
|
#include "libc/testlib/testlib.h"
|
||||||
|
#include "third_party/vqsort/vqsort.h"
|
||||||
|
|
||||||
int CompareLong(const void *a, const void *b) {
|
int CompareLong(const void *a, const void *b) {
|
||||||
const long *x = a;
|
const long *x = a;
|
||||||
|
@ -44,13 +46,88 @@ TEST(_longsort, test) {
|
||||||
ASSERT_EQ(0, memcmp(b, a, n * sizeof(long)));
|
ASSERT_EQ(0, memcmp(b, a, n * sizeof(long)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(vqsort_int64_avx2, test) {
|
||||||
|
if (!X86_HAVE(AVX2)) return;
|
||||||
|
size_t n = 5000;
|
||||||
|
long *a = gc(calloc(n, sizeof(long)));
|
||||||
|
long *b = gc(calloc(n, sizeof(long)));
|
||||||
|
rngset(a, n * sizeof(long), 0, 0);
|
||||||
|
memcpy(b, a, n * sizeof(long));
|
||||||
|
qsort(a, n, sizeof(long), CompareLong);
|
||||||
|
vqsort_int64_avx2(b, n);
|
||||||
|
ASSERT_EQ(0, memcmp(b, a, n * sizeof(long)));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(vqsort_int64_sse4, test) {
|
||||||
|
if (!X86_HAVE(SSE4_2)) return;
|
||||||
|
size_t n = 5000;
|
||||||
|
long *a = gc(calloc(n, sizeof(long)));
|
||||||
|
long *b = gc(calloc(n, sizeof(long)));
|
||||||
|
rngset(a, n * sizeof(long), 0, 0);
|
||||||
|
memcpy(b, a, n * sizeof(long));
|
||||||
|
qsort(a, n, sizeof(long), CompareLong);
|
||||||
|
vqsort_int64_sse4(b, n);
|
||||||
|
ASSERT_EQ(0, memcmp(b, a, n * sizeof(long)));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(vqsort_int64_ssse3, test) {
|
||||||
|
if (!X86_HAVE(SSSE3)) return;
|
||||||
|
size_t n = 5000;
|
||||||
|
long *a = gc(calloc(n, sizeof(long)));
|
||||||
|
long *b = gc(calloc(n, sizeof(long)));
|
||||||
|
rngset(a, n * sizeof(long), 0, 0);
|
||||||
|
memcpy(b, a, n * sizeof(long));
|
||||||
|
qsort(a, n, sizeof(long), CompareLong);
|
||||||
|
vqsort_int64_ssse3(b, n);
|
||||||
|
ASSERT_EQ(0, memcmp(b, a, n * sizeof(long)));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(vqsort_int64_sse2, test) {
|
||||||
|
size_t n = 5000;
|
||||||
|
long *a = gc(calloc(n, sizeof(long)));
|
||||||
|
long *b = gc(calloc(n, sizeof(long)));
|
||||||
|
rngset(a, n * sizeof(long), 0, 0);
|
||||||
|
memcpy(b, a, n * sizeof(long));
|
||||||
|
qsort(a, n, sizeof(long), CompareLong);
|
||||||
|
vqsort_int64_sse2(b, n);
|
||||||
|
ASSERT_EQ(0, memcmp(b, a, n * sizeof(long)));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(radix_sort_int64, test) {
|
||||||
|
size_t n = 5000;
|
||||||
|
long *a = gc(calloc(n, sizeof(long)));
|
||||||
|
long *b = gc(calloc(n, sizeof(long)));
|
||||||
|
rngset(a, n * sizeof(long), 0, 0);
|
||||||
|
memcpy(b, a, n * sizeof(long));
|
||||||
|
qsort(a, n, sizeof(long), CompareLong);
|
||||||
|
radix_sort_int64(b, n);
|
||||||
|
ASSERT_EQ(0, memcmp(b, a, n * sizeof(long)));
|
||||||
|
}
|
||||||
|
|
||||||
BENCH(_longsort, bench) {
|
BENCH(_longsort, bench) {
|
||||||
size_t n = 1000;
|
printf("\n");
|
||||||
|
size_t n = 5000;
|
||||||
long *p1 = gc(malloc(n * sizeof(long)));
|
long *p1 = gc(malloc(n * sizeof(long)));
|
||||||
long *p2 = gc(malloc(n * sizeof(long)));
|
long *p2 = gc(malloc(n * sizeof(long)));
|
||||||
rngset(p1, n * sizeof(long), 0, 0);
|
rngset(p1, n * sizeof(long), 0, 0);
|
||||||
EZBENCH2("_longsort", memcpy(p2, p1, n * sizeof(long)), _longsort(p2, n));
|
EZBENCH2("_longsort", memcpy(p2, p1, n * sizeof(long)), _longsort(p2, n));
|
||||||
EZBENCH2("qsort", memcpy(p2, p1, n * sizeof(long)),
|
if (X86_HAVE(AVX2)) {
|
||||||
|
EZBENCH2("vqsort_int64_avx2", memcpy(p2, p1, n * sizeof(long)),
|
||||||
|
vqsort_int64_avx2(p2, n));
|
||||||
|
}
|
||||||
|
if (X86_HAVE(SSE4_2)) {
|
||||||
|
EZBENCH2("vqsort_int64_sse4", memcpy(p2, p1, n * sizeof(long)),
|
||||||
|
vqsort_int64_sse4(p2, n));
|
||||||
|
}
|
||||||
|
if (X86_HAVE(SSSE3)) {
|
||||||
|
EZBENCH2("vqsort_int64_ssse3", memcpy(p2, p1, n * sizeof(long)),
|
||||||
|
vqsort_int64_ssse3(p2, n));
|
||||||
|
}
|
||||||
|
EZBENCH2("vqsort_int64_sse2", memcpy(p2, p1, n * sizeof(long)),
|
||||||
|
vqsort_int64_sse2(p2, n));
|
||||||
|
EZBENCH2("radix_sort_int64", memcpy(p2, p1, n * sizeof(long)),
|
||||||
|
radix_sort_int64(p2, n));
|
||||||
|
EZBENCH2("qsort(long)", memcpy(p2, p1, n * sizeof(long)),
|
||||||
qsort(p2, n, sizeof(long), CompareLong));
|
qsort(p2, n, sizeof(long), CompareLong));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -73,12 +150,88 @@ TEST(_intsort, test) {
|
||||||
ASSERT_EQ(0, memcmp(b, a, n * sizeof(int)));
|
ASSERT_EQ(0, memcmp(b, a, n * sizeof(int)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(vqsort_int32_avx2, test) {
|
||||||
|
if (!X86_HAVE(AVX2)) return;
|
||||||
|
size_t n = 5000;
|
||||||
|
int *a = gc(calloc(n, sizeof(int)));
|
||||||
|
int *b = gc(calloc(n, sizeof(int)));
|
||||||
|
rngset(a, n * sizeof(int), 0, 0);
|
||||||
|
memcpy(b, a, n * sizeof(int));
|
||||||
|
qsort(a, n, sizeof(int), CompareInt);
|
||||||
|
vqsort_int32_avx2(b, n);
|
||||||
|
ASSERT_EQ(0, memcmp(b, a, n * sizeof(int)));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(vqsort_int32_sse4, test) {
|
||||||
|
if (!X86_HAVE(SSE4_2)) return;
|
||||||
|
size_t n = 5000;
|
||||||
|
int *a = gc(calloc(n, sizeof(int)));
|
||||||
|
int *b = gc(calloc(n, sizeof(int)));
|
||||||
|
rngset(a, n * sizeof(int), 0, 0);
|
||||||
|
memcpy(b, a, n * sizeof(int));
|
||||||
|
qsort(a, n, sizeof(int), CompareInt);
|
||||||
|
vqsort_int32_sse4(b, n);
|
||||||
|
ASSERT_EQ(0, memcmp(b, a, n * sizeof(int)));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(vqsort_int32_ssse3, test) {
|
||||||
|
if (!X86_HAVE(SSSE3)) return;
|
||||||
|
size_t n = 5000;
|
||||||
|
int *a = gc(calloc(n, sizeof(int)));
|
||||||
|
int *b = gc(calloc(n, sizeof(int)));
|
||||||
|
rngset(a, n * sizeof(int), 0, 0);
|
||||||
|
memcpy(b, a, n * sizeof(int));
|
||||||
|
qsort(a, n, sizeof(int), CompareInt);
|
||||||
|
vqsort_int32_ssse3(b, n);
|
||||||
|
ASSERT_EQ(0, memcmp(b, a, n * sizeof(int)));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(vqsort_int32_sse2, test) {
|
||||||
|
size_t n = 5000;
|
||||||
|
int *a = gc(calloc(n, sizeof(int)));
|
||||||
|
int *b = gc(calloc(n, sizeof(int)));
|
||||||
|
rngset(a, n * sizeof(int), 0, 0);
|
||||||
|
memcpy(b, a, n * sizeof(int));
|
||||||
|
qsort(a, n, sizeof(int), CompareInt);
|
||||||
|
vqsort_int32_sse2(b, n);
|
||||||
|
ASSERT_EQ(0, memcmp(b, a, n * sizeof(int)));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(radix_sort_int32, test) {
|
||||||
|
size_t n = 5000;
|
||||||
|
int *a = gc(calloc(n, sizeof(int)));
|
||||||
|
int *b = gc(calloc(n, sizeof(int)));
|
||||||
|
rngset(a, n * sizeof(int), 0, 0);
|
||||||
|
memcpy(b, a, n * sizeof(int));
|
||||||
|
qsort(a, n, sizeof(int), CompareInt);
|
||||||
|
radix_sort_int32(b, n);
|
||||||
|
ASSERT_EQ(0, memcmp(b, a, n * sizeof(int)));
|
||||||
|
}
|
||||||
|
|
||||||
BENCH(_intsort, bench) {
|
BENCH(_intsort, bench) {
|
||||||
size_t n = 1000;
|
printf("\n");
|
||||||
|
size_t n = 10000;
|
||||||
int *p1 = gc(malloc(n * sizeof(int)));
|
int *p1 = gc(malloc(n * sizeof(int)));
|
||||||
int *p2 = gc(malloc(n * sizeof(int)));
|
int *p2 = gc(malloc(n * sizeof(int)));
|
||||||
rngset(p1, n * sizeof(int), 0, 0);
|
rngset(p1, n * sizeof(int), 0, 0);
|
||||||
EZBENCH2("_intsort", memcpy(p2, p1, n * sizeof(int)), _intsort(p2, n));
|
EZBENCH2("_intsort", memcpy(p2, p1, n * sizeof(int)), _intsort(p2, n));
|
||||||
EZBENCH2("qsort", memcpy(p2, p1, n * sizeof(int)),
|
if (X86_HAVE(AVX2)) {
|
||||||
|
EZBENCH2("vqsort_int32_avx2", memcpy(p2, p1, n * sizeof(int)),
|
||||||
|
vqsort_int32_avx2(p2, n));
|
||||||
|
}
|
||||||
|
if (X86_HAVE(SSE4_2)) {
|
||||||
|
EZBENCH2("vqsort_int32_sse4", memcpy(p2, p1, n * sizeof(int)),
|
||||||
|
vqsort_int32_sse4(p2, n));
|
||||||
|
}
|
||||||
|
if (X86_HAVE(SSSE3)) {
|
||||||
|
EZBENCH2("vqsort_int32_ssse3", memcpy(p2, p1, n * sizeof(int)),
|
||||||
|
vqsort_int32_ssse3(p2, n));
|
||||||
|
}
|
||||||
|
EZBENCH2("vqsort_int32_sse2", memcpy(p2, p1, n * sizeof(int)),
|
||||||
|
vqsort_int32_sse2(p2, n));
|
||||||
|
EZBENCH2("djbsort", memcpy(p2, p1, n * sizeof(int)), djbsort(p2, n));
|
||||||
|
EZBENCH2("radix_sort_int32", memcpy(p2, p1, n * sizeof(int)),
|
||||||
|
radix_sort_int32(p2, n));
|
||||||
|
EZBENCH2("qsort(int)", memcpy(p2, p1, n * sizeof(int)),
|
||||||
qsort(p2, n, sizeof(int), CompareInt));
|
qsort(p2, n, sizeof(int), CompareInt));
|
||||||
}
|
}
|
||||||
|
|
|
@ -51,7 +51,8 @@ TEST_LIBC_STR_DIRECTDEPS = \
|
||||||
THIRD_PARTY_REGEX \
|
THIRD_PARTY_REGEX \
|
||||||
THIRD_PARTY_ZLIB \
|
THIRD_PARTY_ZLIB \
|
||||||
THIRD_PARTY_LIBCXX \
|
THIRD_PARTY_LIBCXX \
|
||||||
THIRD_PARTY_SMALLZ4
|
THIRD_PARTY_SMALLZ4 \
|
||||||
|
THIRD_PARTY_VQSORT
|
||||||
|
|
||||||
TEST_LIBC_STR_DEPS := \
|
TEST_LIBC_STR_DEPS := \
|
||||||
$(call uniq,$(foreach x,$(TEST_LIBC_STR_DIRECTDEPS),$($(x))))
|
$(call uniq,$(foreach x,$(TEST_LIBC_STR_DIRECTDEPS),$($(x))))
|
||||||
|
|
5
third_party/compiler_rt/popcountdi2.c
vendored
5
third_party/compiler_rt/popcountdi2.c
vendored
|
@ -22,6 +22,10 @@ STATIC_YOINK("huge_compiler_rt_license");
|
||||||
COMPILER_RT_ABI si_int
|
COMPILER_RT_ABI si_int
|
||||||
__popcountdi2(di_int a)
|
__popcountdi2(di_int a)
|
||||||
{
|
{
|
||||||
|
#ifdef __POPCNT__
|
||||||
|
asm("popcnt\t%1,%0" : "=r"(a) : "r"(a) : "cc");
|
||||||
|
return a;
|
||||||
|
#else
|
||||||
du_int x2 = (du_int)a;
|
du_int x2 = (du_int)a;
|
||||||
x2 = x2 - ((x2 >> 1) & 0x5555555555555555uLL);
|
x2 = x2 - ((x2 >> 1) & 0x5555555555555555uLL);
|
||||||
/* Every 2 bits holds the sum of every pair of bits (32) */
|
/* Every 2 bits holds the sum of every pair of bits (32) */
|
||||||
|
@ -36,4 +40,5 @@ __popcountdi2(di_int a)
|
||||||
/* The lower 16 bits hold two 32 bit sums (6 significant bits). */
|
/* The lower 16 bits hold two 32 bit sums (6 significant bits). */
|
||||||
/* Upper 16 bits are garbage */
|
/* Upper 16 bits are garbage */
|
||||||
return (x + (x >> 8)) & 0x0000007F; /* (7 significant bits) */
|
return (x + (x >> 8)) & 0x0000007F; /* (7 significant bits) */
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
1
third_party/third_party.mk
vendored
1
third_party/third_party.mk
vendored
|
@ -34,6 +34,7 @@ o/$(MODE)/third_party: \
|
||||||
o/$(MODE)/third_party/tidy \
|
o/$(MODE)/third_party/tidy \
|
||||||
o/$(MODE)/third_party/tr \
|
o/$(MODE)/third_party/tr \
|
||||||
o/$(MODE)/third_party/unzip \
|
o/$(MODE)/third_party/unzip \
|
||||||
|
o/$(MODE)/third_party/vqsort \
|
||||||
o/$(MODE)/third_party/xed \
|
o/$(MODE)/third_party/xed \
|
||||||
o/$(MODE)/third_party/zip \
|
o/$(MODE)/third_party/zip \
|
||||||
o/$(MODE)/third_party/zlib
|
o/$(MODE)/third_party/zlib
|
||||||
|
|
23
third_party/vqsort/README.cosmo
vendored
Normal file
23
third_party/vqsort/README.cosmo
vendored
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
DESCRIPTION
|
||||||
|
|
||||||
|
vqsort implements vectorized quicksort using avx2. this is the fastest
|
||||||
|
way to sort integers. this goes as fast as djbsort for 32-bit integers
|
||||||
|
except it supports 64-bit integers too, which go just as fast: about a
|
||||||
|
gigabyte of memory sorted per second. It's 3x faster than simple radix
|
||||||
|
sort. It's 5x faster than simple quicksort. It's 10x faster than qsort
|
||||||
|
|
||||||
|
LICENSE
|
||||||
|
|
||||||
|
Apache 2.o
|
||||||
|
|
||||||
|
ORIGIN
|
||||||
|
|
||||||
|
https://github.com/google/highway/
|
||||||
|
commit 50331e0523bbf5f6c94b94263a91680f118e0986
|
||||||
|
Author: Jan Wassenberg <janwas@google.com>
|
||||||
|
Date: Wed Apr 26 11:20:33 2023 -0700
|
||||||
|
Faster vqsort for small arrays (7x speedup! for N=100)
|
||||||
|
|
||||||
|
LOCAL CHANGES
|
||||||
|
|
||||||
|
Precompiled beacuse upstream codebase is slow, gigantic, and hairy.
|
20
third_party/vqsort/vqsort.h
vendored
Normal file
20
third_party/vqsort/vqsort.h
vendored
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
#ifndef COSMOPOLITAN_THIRD_PARTY_VQSORT_H_
|
||||||
|
#define COSMOPOLITAN_THIRD_PARTY_VQSORT_H_
|
||||||
|
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
|
COSMOPOLITAN_C_START_
|
||||||
|
|
||||||
|
void vqsort_int64(int64_t *, size_t);
|
||||||
|
void vqsort_int64_avx2(int64_t *, size_t);
|
||||||
|
void vqsort_int64_sse4(int64_t *, size_t);
|
||||||
|
void vqsort_int64_ssse3(int64_t *, size_t);
|
||||||
|
void vqsort_int64_sse2(int64_t *, size_t);
|
||||||
|
|
||||||
|
void vqsort_int32(int32_t *, size_t);
|
||||||
|
void vqsort_int32_avx2(int32_t *, size_t);
|
||||||
|
void vqsort_int32_sse4(int32_t *, size_t);
|
||||||
|
void vqsort_int32_ssse3(int32_t *, size_t);
|
||||||
|
void vqsort_int32_sse2(int32_t *, size_t);
|
||||||
|
|
||||||
|
COSMOPOLITAN_C_END_
|
||||||
|
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||||
|
#endif /* COSMOPOLITAN_THIRD_PARTY_VQSORT_H_ */
|
52
third_party/vqsort/vqsort.mk
vendored
Normal file
52
third_party/vqsort/vqsort.mk
vendored
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐
|
||||||
|
#───vi: set et ft=make ts=8 tw=8 fenc=utf-8 :vi───────────────────────┘
|
||||||
|
|
||||||
|
PKGS += THIRD_PARTY_VQSORT
|
||||||
|
|
||||||
|
THIRD_PARTY_VQSORT_ARTIFACTS += THIRD_PARTY_VQSORT_A
|
||||||
|
THIRD_PARTY_VQSORT = $(THIRD_PARTY_VQSORT_A_DEPS) $(THIRD_PARTY_VQSORT_A)
|
||||||
|
THIRD_PARTY_VQSORT_A = o/$(MODE)/third_party/vqsort/vqsort.a
|
||||||
|
THIRD_PARTY_VQSORT_A_FILES := $(wildcard third_party/vqsort/*)
|
||||||
|
THIRD_PARTY_VQSORT_A_HDRS = $(filter %.h,$(THIRD_PARTY_VQSORT_A_FILES))
|
||||||
|
THIRD_PARTY_VQSORT_A_SRCS_C = $(filter %.c,$(THIRD_PARTY_VQSORT_A_FILES))
|
||||||
|
THIRD_PARTY_VQSORT_A_SRCS_S = $(filter %.S,$(THIRD_PARTY_VQSORT_A_FILES))
|
||||||
|
THIRD_PARTY_VQSORT_A_SRCS = $(THIRD_PARTY_VQSORT_A_SRCS_C) $(THIRD_PARTY_VQSORT_A_SRCS_S)
|
||||||
|
THIRD_PARTY_VQSORT_A_OBJS_C = $(THIRD_PARTY_VQSORT_A_SRCS_C:%.c=o/$(MODE)/%.o)
|
||||||
|
THIRD_PARTY_VQSORT_A_OBJS_S = $(THIRD_PARTY_VQSORT_A_SRCS_S:%.S=o/$(MODE)/%.o)
|
||||||
|
THIRD_PARTY_VQSORT_A_OBJS = $(THIRD_PARTY_VQSORT_A_OBJS_C) $(THIRD_PARTY_VQSORT_A_OBJS_S)
|
||||||
|
|
||||||
|
THIRD_PARTY_VQSORT_A_CHECKS = \
|
||||||
|
$(THIRD_PARTY_VQSORT_A).pkg \
|
||||||
|
$(THIRD_PARTY_VQSORT_A_HDRS:%=o/$(MODE)/%.ok)
|
||||||
|
|
||||||
|
THIRD_PARTY_VQSORT_A_DIRECTDEPS = \
|
||||||
|
LIBC_INTRIN \
|
||||||
|
LIBC_MEM \
|
||||||
|
LIBC_NEXGEN32E \
|
||||||
|
LIBC_RUNTIME \
|
||||||
|
LIBC_STDIO \
|
||||||
|
LIBC_STR \
|
||||||
|
LIBC_STUBS \
|
||||||
|
THIRD_PARTY_COMPILER_RT
|
||||||
|
|
||||||
|
THIRD_PARTY_VQSORT_A_DEPS := \
|
||||||
|
$(call uniq,$(foreach x,$(THIRD_PARTY_VQSORT_A_DIRECTDEPS),$($(x))))
|
||||||
|
|
||||||
|
$(THIRD_PARTY_VQSORT_A): \
|
||||||
|
third_party/vqsort/ \
|
||||||
|
$(THIRD_PARTY_VQSORT_A).pkg \
|
||||||
|
$(THIRD_PARTY_VQSORT_A_OBJS)
|
||||||
|
|
||||||
|
$(THIRD_PARTY_VQSORT_A).pkg: \
|
||||||
|
$(THIRD_PARTY_VQSORT_A_OBJS) \
|
||||||
|
$(foreach x,$(THIRD_PARTY_VQSORT_A_DIRECTDEPS),$($(x)_A).pkg)
|
||||||
|
|
||||||
|
THIRD_PARTY_VQSORT_LIBS = $(foreach x,$(THIRD_PARTY_VQSORT_ARTIFACTS),$($(x)))
|
||||||
|
THIRD_PARTY_VQSORT_SRCS = $(foreach x,$(THIRD_PARTY_VQSORT_ARTIFACTS),$($(x)_SRCS))
|
||||||
|
THIRD_PARTY_VQSORT_HDRS = $(foreach x,$(THIRD_PARTY_VQSORT_ARTIFACTS),$($(x)_HDRS))
|
||||||
|
THIRD_PARTY_VQSORT_CHECKS = $(foreach x,$(THIRD_PARTY_VQSORT_ARTIFACTS),$($(x)_CHECKS))
|
||||||
|
THIRD_PARTY_VQSORT_OBJS = $(foreach x,$(THIRD_PARTY_VQSORT_ARTIFACTS),$($(x)_OBJS))
|
||||||
|
$(THIRD_PARTY_VQSORT_OBJS): $(BUILD_FILES) third_party/vqsort/vqsort.mk
|
||||||
|
|
||||||
|
.PHONY: o/$(MODE)/third_party/vqsort
|
||||||
|
o/$(MODE)/third_party/vqsort: $(THIRD_PARTY_VQSORT_CHECKS)
|
24732
third_party/vqsort/vqsort_i32a.S
vendored
Normal file
24732
third_party/vqsort/vqsort_i32a.S
vendored
Normal file
File diff suppressed because it is too large
Load diff
23679
third_party/vqsort/vqsort_i64a.S
vendored
Normal file
23679
third_party/vqsort/vqsort_i64a.S
vendored
Normal file
File diff suppressed because it is too large
Load diff
29
third_party/vqsort/vqsort_int32.c
vendored
Normal file
29
third_party/vqsort/vqsort_int32.c
vendored
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||||
|
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||||
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||||
|
│ Copyright 2023 Justine Alexandra Roberts Tunney │
|
||||||
|
│ │
|
||||||
|
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||||
|
│ any purpose with or without fee is hereby granted, provided that the │
|
||||||
|
│ above copyright notice and this permission notice appear in all copies. │
|
||||||
|
│ │
|
||||||
|
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||||
|
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||||
|
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||||
|
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||||
|
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||||
|
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||||
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||||
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||||
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
|
#include "libc/mem/alg.h"
|
||||||
|
#include "libc/nexgen32e/x86feature.h"
|
||||||
|
#include "third_party/vqsort/vqsort.h"
|
||||||
|
|
||||||
|
void vqsort_int32(int32_t *A, size_t n) {
|
||||||
|
if (X86_HAVE(AVX2)) {
|
||||||
|
vqsort_int32_avx2(A, n);
|
||||||
|
} else {
|
||||||
|
radix_sort_int32(A, n);
|
||||||
|
}
|
||||||
|
}
|
29
third_party/vqsort/vqsort_int64.c
vendored
Normal file
29
third_party/vqsort/vqsort_int64.c
vendored
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||||
|
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||||
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||||
|
│ Copyright 2023 Justine Alexandra Roberts Tunney │
|
||||||
|
│ │
|
||||||
|
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||||
|
│ any purpose with or without fee is hereby granted, provided that the │
|
||||||
|
│ above copyright notice and this permission notice appear in all copies. │
|
||||||
|
│ │
|
||||||
|
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||||
|
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||||
|
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||||
|
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||||
|
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||||
|
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||||
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||||
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||||
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
|
#include "libc/mem/alg.h"
|
||||||
|
#include "libc/nexgen32e/x86feature.h"
|
||||||
|
#include "third_party/vqsort/vqsort.h"
|
||||||
|
|
||||||
|
void vqsort_int64(int64_t *A, size_t n) {
|
||||||
|
if (X86_HAVE(AVX2)) {
|
||||||
|
vqsort_int64_avx2(A, n);
|
||||||
|
} else {
|
||||||
|
radix_sort_int64(A, n);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue