Add more sorting algorithms

This commit is contained in:
Justine Tunney 2023-04-27 05:42:10 -07:00
parent b7bf052a4b
commit 7c9ef924bf
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
17 changed files with 49004 additions and 5 deletions

View file

@ -143,6 +143,7 @@ include libc/time/time.mk # │
include libc/stdio/stdio.mk # │
include third_party/libcxx/libcxx.mk # │
include net/net.mk # │
include third_party/vqsort/vqsort.mk # │
include libc/log/log.mk # │
include third_party/bzip2/bzip2.mk # │
include dsp/core/core.mk # │

View file

@ -89,6 +89,7 @@ EXAMPLES_DIRECTDEPS = \
THIRD_PARTY_SED \
THIRD_PARTY_STB \
THIRD_PARTY_TR \
THIRD_PARTY_VQSORT \
THIRD_PARTY_XED \
THIRD_PARTY_ZLIB \
TOOL_BUILD_LIB \

25
examples/vqsort.c Normal file
View file

@ -0,0 +1,25 @@
#if 0
/*─────────────────────────────────────────────────────────────────╗
To the extent possible under law, Justine Tunney has waived
all copyright and related or neighboring rights to this file,
as it is written in the following disclaimers:
http://unlicense.org/ │
http://creativecommons.org/publicdomain/zero/1.0/ │
*/
#endif
#include "third_party/vqsort/vqsort.h"
#include "libc/macros.internal.h"
#include "libc/stdio/stdio.h"
#include "third_party/vqsort/vqsort.h"
// how to sort one gigabyte of 64-bit integers per second
int main(int argc, char *argv[]) {
int64_t A[] = {9, 3, -3, 5, 23, 7};
vqsort_int64(A, ARRAYLEN(A));
for (int i = 0; i < ARRAYLEN(A); ++i) {
if (i) printf(" ");
printf("%ld", A[i]);
}
printf("\n");
}

View file

@ -34,6 +34,9 @@ int _tarjan(int, const int (*)[2], int, int[], int[], int *)
char *_replacestr(const char *, const char *, const char *)
paramsnonnull() __algalloc;
bool radix_sort_int32(int32_t *, size_t);
bool radix_sort_int64(int64_t *, size_t);
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_ALG_ALG_H_ */

101
libc/mem/radix_sort_int32.c Normal file
View file

@ -0,0 +1,101 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2023 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/mem/alg.h"
#include "libc/mem/mem.h"
#include "libc/runtime/runtime.h"
#include "libc/str/str.h"
// Credit: Andrew Schein. 2009. Open-source C++ implementation of Radix
// Sort for double-precision floating points. (2009).
#define HIST_SIZE (size_t)2048
#define get_byte_0(v) ((v)&0x7FF)
#define get_byte_1(v) (((v) >> 11) & 0x7FF)
#define get_byte_2_flip_sign(v) (((unsigned)(v) >> 22) ^ 0x200)
bool radix_sort_int32(int32_t *A, size_t n) {
int32_t *T, *reader, *writer;
size_t i, pos, sum0, sum1, sum2, tsum, *b0, *b1, *b2;
if (n < HIST_SIZE) {
_intsort(A, n);
return true;
}
if (!(T = (int32_t *)malloc(n * sizeof(int32_t)))) {
return false;
}
if (!(b0 = (size_t *)calloc(HIST_SIZE * 3, sizeof(size_t)))) {
free(T);
return false;
}
b1 = b0 + HIST_SIZE;
b2 = b1 + HIST_SIZE;
for (i = 0; i < n; i++) {
b0[get_byte_0(A[i])]++;
b1[get_byte_1(A[i])]++;
b2[get_byte_2_flip_sign(A[i])]++;
}
sum0 = sum1 = sum2 = tsum = 0;
for (i = 0; i < HIST_SIZE; i++) {
tsum = b0[i] + sum0;
b0[i] = sum0 - 1;
sum0 = tsum;
tsum = b1[i] + sum1;
b1[i] = sum1 - 1;
sum1 = tsum;
tsum = b2[i] + sum2;
b2[i] = sum2 - 1;
sum2 = tsum;
}
writer = T;
reader = A;
for (i = 0; i < n; i++) {
pos = get_byte_0(reader[i]);
writer[++b0[pos]] = reader[i];
}
writer = A;
reader = T;
for (i = 0; i < n; i++) {
pos = get_byte_1(reader[i]);
writer[++b1[pos]] = reader[i];
}
writer = T;
reader = A;
for (i = 0; i < n; i++) {
pos = get_byte_2_flip_sign(reader[i]);
writer[++b2[pos]] = reader[i];
}
memcpy(A, T, n * sizeof(int));
free(b0);
free(T);
return true;
}

144
libc/mem/radix_sort_int64.c Normal file
View file

@ -0,0 +1,144 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2023 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/mem/alg.h"
#include "libc/mem/mem.h"
#include "libc/runtime/runtime.h"
#include "libc/str/str.h"
// Credit: Andrew Schein. 2009. Open-source C++ implementation of Radix
// Sort for double-precision floating points. (2009).
#define HIST_SIZE (size_t)2048
#define get_byte_0(v) ((v)&0x7FF)
#define get_byte_1(v) (((v) >> 11) & 0x7FF)
#define get_byte_2(v) (((v) >> 22) & 0x7FF)
#define get_byte_3(v) (((v) >> 33) & 0x7FF)
#define get_byte_4(v) (((v) >> 44) & 0x7FF)
#define get_byte_5(v) (((v) >> 55) & 0x7FF)
#define get_byte_2_flip_sign(v) (((unsigned)(v) >> 22) ^ 0x200)
#define get_byte_5_flip_sign(v) ((((v) >> 55) & 0x7FF) ^ 0x400)
bool radix_sort_int64(int64_t *A, size_t n) {
int64_t *T, *reader, *writer;
size_t *b0, *b1, *b2, *b3, *b4, *b5;
size_t i, pos, sum0, sum1, sum2, sum3, sum4, sum5, tsum;
if (n < HIST_SIZE) {
_longsort(A, n);
return true;
}
if (!(T = (int64_t *)malloc(n * sizeof(int64_t)))) {
return false;
}
if (!(b0 = (size_t *)calloc(HIST_SIZE * 6, sizeof(size_t)))) {
free(T);
return false;
}
b1 = b0 + HIST_SIZE;
b2 = b1 + HIST_SIZE;
b3 = b2 + HIST_SIZE;
b4 = b3 + HIST_SIZE;
b5 = b4 + HIST_SIZE;
for (i = 0; i < n; i++) {
b0[get_byte_0(A[i])]++;
b1[get_byte_1(A[i])]++;
b2[get_byte_2(A[i])]++;
b3[get_byte_3(A[i])]++;
b4[get_byte_4(A[i])]++;
b5[get_byte_5_flip_sign(A[i])]++;
}
sum0 = sum1 = sum2 = sum3 = sum4 = sum5 = tsum = 0;
for (i = 0; i < HIST_SIZE; i++) {
tsum = b0[i] + sum0;
b0[i] = sum0 - 1;
sum0 = tsum;
tsum = b1[i] + sum1;
b1[i] = sum1 - 1;
sum1 = tsum;
tsum = b2[i] + sum2;
b2[i] = sum2 - 1;
sum2 = tsum;
tsum = b3[i] + sum3;
b3[i] = sum3 - 1;
sum3 = tsum;
tsum = b4[i] + sum4;
b4[i] = sum4 - 1;
sum4 = tsum;
tsum = b5[i] + sum5;
b5[i] = sum5 - 1;
sum5 = tsum;
}
writer = T;
reader = A;
for (i = 0; i < n; i++) {
pos = get_byte_0(reader[i]);
writer[++b0[pos]] = reader[i];
}
writer = A;
reader = T;
for (i = 0; i < n; i++) {
pos = get_byte_1(reader[i]);
writer[++b1[pos]] = reader[i];
}
writer = T;
reader = A;
for (i = 0; i < n; i++) {
pos = get_byte_2(reader[i]);
writer[++b2[pos]] = reader[i];
}
writer = A;
reader = T;
for (i = 0; i < n; i++) {
pos = get_byte_3(reader[i]);
writer[++b3[pos]] = reader[i];
}
writer = T;
reader = A;
for (i = 0; i < n; i++) {
pos = get_byte_4(reader[i]);
writer[++b4[pos]] = reader[i];
}
writer = A;
reader = T;
for (i = 0; i < n; i++) {
pos = get_byte_5_flip_sign(reader[i]);
writer[++b5[pos]] = reader[i];
}
free(b0);
free(T);
return true;
}

View file

@ -21,9 +21,11 @@
#include "libc/mem/mem.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/rand.h"
#include "libc/stdio/stdio.h"
#include "libc/str/str.h"
#include "libc/testlib/ezbench.h"
#include "libc/testlib/testlib.h"
#include "third_party/vqsort/vqsort.h"
int CompareLong(const void *a, const void *b) {
const long *x = a;
@ -44,13 +46,88 @@ TEST(_longsort, test) {
ASSERT_EQ(0, memcmp(b, a, n * sizeof(long)));
}
TEST(vqsort_int64_avx2, test) {
if (!X86_HAVE(AVX2)) return;
size_t n = 5000;
long *a = gc(calloc(n, sizeof(long)));
long *b = gc(calloc(n, sizeof(long)));
rngset(a, n * sizeof(long), 0, 0);
memcpy(b, a, n * sizeof(long));
qsort(a, n, sizeof(long), CompareLong);
vqsort_int64_avx2(b, n);
ASSERT_EQ(0, memcmp(b, a, n * sizeof(long)));
}
TEST(vqsort_int64_sse4, test) {
if (!X86_HAVE(SSE4_2)) return;
size_t n = 5000;
long *a = gc(calloc(n, sizeof(long)));
long *b = gc(calloc(n, sizeof(long)));
rngset(a, n * sizeof(long), 0, 0);
memcpy(b, a, n * sizeof(long));
qsort(a, n, sizeof(long), CompareLong);
vqsort_int64_sse4(b, n);
ASSERT_EQ(0, memcmp(b, a, n * sizeof(long)));
}
TEST(vqsort_int64_ssse3, test) {
if (!X86_HAVE(SSSE3)) return;
size_t n = 5000;
long *a = gc(calloc(n, sizeof(long)));
long *b = gc(calloc(n, sizeof(long)));
rngset(a, n * sizeof(long), 0, 0);
memcpy(b, a, n * sizeof(long));
qsort(a, n, sizeof(long), CompareLong);
vqsort_int64_ssse3(b, n);
ASSERT_EQ(0, memcmp(b, a, n * sizeof(long)));
}
TEST(vqsort_int64_sse2, test) {
size_t n = 5000;
long *a = gc(calloc(n, sizeof(long)));
long *b = gc(calloc(n, sizeof(long)));
rngset(a, n * sizeof(long), 0, 0);
memcpy(b, a, n * sizeof(long));
qsort(a, n, sizeof(long), CompareLong);
vqsort_int64_sse2(b, n);
ASSERT_EQ(0, memcmp(b, a, n * sizeof(long)));
}
TEST(radix_sort_int64, test) {
size_t n = 5000;
long *a = gc(calloc(n, sizeof(long)));
long *b = gc(calloc(n, sizeof(long)));
rngset(a, n * sizeof(long), 0, 0);
memcpy(b, a, n * sizeof(long));
qsort(a, n, sizeof(long), CompareLong);
radix_sort_int64(b, n);
ASSERT_EQ(0, memcmp(b, a, n * sizeof(long)));
}
BENCH(_longsort, bench) {
size_t n = 1000;
printf("\n");
size_t n = 5000;
long *p1 = gc(malloc(n * sizeof(long)));
long *p2 = gc(malloc(n * sizeof(long)));
rngset(p1, n * sizeof(long), 0, 0);
EZBENCH2("_longsort", memcpy(p2, p1, n * sizeof(long)), _longsort(p2, n));
EZBENCH2("qsort", memcpy(p2, p1, n * sizeof(long)),
if (X86_HAVE(AVX2)) {
EZBENCH2("vqsort_int64_avx2", memcpy(p2, p1, n * sizeof(long)),
vqsort_int64_avx2(p2, n));
}
if (X86_HAVE(SSE4_2)) {
EZBENCH2("vqsort_int64_sse4", memcpy(p2, p1, n * sizeof(long)),
vqsort_int64_sse4(p2, n));
}
if (X86_HAVE(SSSE3)) {
EZBENCH2("vqsort_int64_ssse3", memcpy(p2, p1, n * sizeof(long)),
vqsort_int64_ssse3(p2, n));
}
EZBENCH2("vqsort_int64_sse2", memcpy(p2, p1, n * sizeof(long)),
vqsort_int64_sse2(p2, n));
EZBENCH2("radix_sort_int64", memcpy(p2, p1, n * sizeof(long)),
radix_sort_int64(p2, n));
EZBENCH2("qsort(long)", memcpy(p2, p1, n * sizeof(long)),
qsort(p2, n, sizeof(long), CompareLong));
}
@ -73,12 +150,88 @@ TEST(_intsort, test) {
ASSERT_EQ(0, memcmp(b, a, n * sizeof(int)));
}
TEST(vqsort_int32_avx2, test) {
if (!X86_HAVE(AVX2)) return;
size_t n = 5000;
int *a = gc(calloc(n, sizeof(int)));
int *b = gc(calloc(n, sizeof(int)));
rngset(a, n * sizeof(int), 0, 0);
memcpy(b, a, n * sizeof(int));
qsort(a, n, sizeof(int), CompareInt);
vqsort_int32_avx2(b, n);
ASSERT_EQ(0, memcmp(b, a, n * sizeof(int)));
}
TEST(vqsort_int32_sse4, test) {
if (!X86_HAVE(SSE4_2)) return;
size_t n = 5000;
int *a = gc(calloc(n, sizeof(int)));
int *b = gc(calloc(n, sizeof(int)));
rngset(a, n * sizeof(int), 0, 0);
memcpy(b, a, n * sizeof(int));
qsort(a, n, sizeof(int), CompareInt);
vqsort_int32_sse4(b, n);
ASSERT_EQ(0, memcmp(b, a, n * sizeof(int)));
}
TEST(vqsort_int32_ssse3, test) {
if (!X86_HAVE(SSSE3)) return;
size_t n = 5000;
int *a = gc(calloc(n, sizeof(int)));
int *b = gc(calloc(n, sizeof(int)));
rngset(a, n * sizeof(int), 0, 0);
memcpy(b, a, n * sizeof(int));
qsort(a, n, sizeof(int), CompareInt);
vqsort_int32_ssse3(b, n);
ASSERT_EQ(0, memcmp(b, a, n * sizeof(int)));
}
TEST(vqsort_int32_sse2, test) {
size_t n = 5000;
int *a = gc(calloc(n, sizeof(int)));
int *b = gc(calloc(n, sizeof(int)));
rngset(a, n * sizeof(int), 0, 0);
memcpy(b, a, n * sizeof(int));
qsort(a, n, sizeof(int), CompareInt);
vqsort_int32_sse2(b, n);
ASSERT_EQ(0, memcmp(b, a, n * sizeof(int)));
}
TEST(radix_sort_int32, test) {
size_t n = 5000;
int *a = gc(calloc(n, sizeof(int)));
int *b = gc(calloc(n, sizeof(int)));
rngset(a, n * sizeof(int), 0, 0);
memcpy(b, a, n * sizeof(int));
qsort(a, n, sizeof(int), CompareInt);
radix_sort_int32(b, n);
ASSERT_EQ(0, memcmp(b, a, n * sizeof(int)));
}
BENCH(_intsort, bench) {
size_t n = 1000;
printf("\n");
size_t n = 10000;
int *p1 = gc(malloc(n * sizeof(int)));
int *p2 = gc(malloc(n * sizeof(int)));
rngset(p1, n * sizeof(int), 0, 0);
EZBENCH2("_intsort", memcpy(p2, p1, n * sizeof(int)), _intsort(p2, n));
EZBENCH2("qsort", memcpy(p2, p1, n * sizeof(int)),
if (X86_HAVE(AVX2)) {
EZBENCH2("vqsort_int32_avx2", memcpy(p2, p1, n * sizeof(int)),
vqsort_int32_avx2(p2, n));
}
if (X86_HAVE(SSE4_2)) {
EZBENCH2("vqsort_int32_sse4", memcpy(p2, p1, n * sizeof(int)),
vqsort_int32_sse4(p2, n));
}
if (X86_HAVE(SSSE3)) {
EZBENCH2("vqsort_int32_ssse3", memcpy(p2, p1, n * sizeof(int)),
vqsort_int32_ssse3(p2, n));
}
EZBENCH2("vqsort_int32_sse2", memcpy(p2, p1, n * sizeof(int)),
vqsort_int32_sse2(p2, n));
EZBENCH2("djbsort", memcpy(p2, p1, n * sizeof(int)), djbsort(p2, n));
EZBENCH2("radix_sort_int32", memcpy(p2, p1, n * sizeof(int)),
radix_sort_int32(p2, n));
EZBENCH2("qsort(int)", memcpy(p2, p1, n * sizeof(int)),
qsort(p2, n, sizeof(int), CompareInt));
}

View file

@ -51,7 +51,8 @@ TEST_LIBC_STR_DIRECTDEPS = \
THIRD_PARTY_REGEX \
THIRD_PARTY_ZLIB \
THIRD_PARTY_LIBCXX \
THIRD_PARTY_SMALLZ4
THIRD_PARTY_SMALLZ4 \
THIRD_PARTY_VQSORT
TEST_LIBC_STR_DEPS := \
$(call uniq,$(foreach x,$(TEST_LIBC_STR_DIRECTDEPS),$($(x))))

View file

@ -22,6 +22,10 @@ STATIC_YOINK("huge_compiler_rt_license");
COMPILER_RT_ABI si_int
__popcountdi2(di_int a)
{
#ifdef __POPCNT__
asm("popcnt\t%1,%0" : "=r"(a) : "r"(a) : "cc");
return a;
#else
du_int x2 = (du_int)a;
x2 = x2 - ((x2 >> 1) & 0x5555555555555555uLL);
/* Every 2 bits holds the sum of every pair of bits (32) */
@ -36,4 +40,5 @@ __popcountdi2(di_int a)
/* The lower 16 bits hold two 32 bit sums (6 significant bits). */
/* Upper 16 bits are garbage */
return (x + (x >> 8)) & 0x0000007F; /* (7 significant bits) */
#endif
}

View file

@ -34,6 +34,7 @@ o/$(MODE)/third_party: \
o/$(MODE)/third_party/tidy \
o/$(MODE)/third_party/tr \
o/$(MODE)/third_party/unzip \
o/$(MODE)/third_party/vqsort \
o/$(MODE)/third_party/xed \
o/$(MODE)/third_party/zip \
o/$(MODE)/third_party/zlib

23
third_party/vqsort/README.cosmo vendored Normal file
View file

@ -0,0 +1,23 @@
DESCRIPTION
vqsort implements vectorized quicksort using avx2. this is the fastest
way to sort integers. this goes as fast as djbsort for 32-bit integers
except it supports 64-bit integers too, which go just as fast: about a
gigabyte of memory sorted per second. It's 3x faster than simple radix
sort. It's 5x faster than simple quicksort. It's 10x faster than qsort
LICENSE
Apache 2.o
ORIGIN
https://github.com/google/highway/
commit 50331e0523bbf5f6c94b94263a91680f118e0986
Author: Jan Wassenberg <janwas@google.com>
Date: Wed Apr 26 11:20:33 2023 -0700
Faster vqsort for small arrays (7x speedup! for N=100)
LOCAL CHANGES
Precompiled beacuse upstream codebase is slow, gigantic, and hairy.

20
third_party/vqsort/vqsort.h vendored Normal file
View file

@ -0,0 +1,20 @@
#ifndef COSMOPOLITAN_THIRD_PARTY_VQSORT_H_
#define COSMOPOLITAN_THIRD_PARTY_VQSORT_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
void vqsort_int64(int64_t *, size_t);
void vqsort_int64_avx2(int64_t *, size_t);
void vqsort_int64_sse4(int64_t *, size_t);
void vqsort_int64_ssse3(int64_t *, size_t);
void vqsort_int64_sse2(int64_t *, size_t);
void vqsort_int32(int32_t *, size_t);
void vqsort_int32_avx2(int32_t *, size_t);
void vqsort_int32_sse4(int32_t *, size_t);
void vqsort_int32_ssse3(int32_t *, size_t);
void vqsort_int32_sse2(int32_t *, size_t);
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_THIRD_PARTY_VQSORT_H_ */

52
third_party/vqsort/vqsort.mk vendored Normal file
View file

@ -0,0 +1,52 @@
#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐
#───vi: set et ft=make ts=8 tw=8 fenc=utf-8 :vi───────────────────────┘
PKGS += THIRD_PARTY_VQSORT
THIRD_PARTY_VQSORT_ARTIFACTS += THIRD_PARTY_VQSORT_A
THIRD_PARTY_VQSORT = $(THIRD_PARTY_VQSORT_A_DEPS) $(THIRD_PARTY_VQSORT_A)
THIRD_PARTY_VQSORT_A = o/$(MODE)/third_party/vqsort/vqsort.a
THIRD_PARTY_VQSORT_A_FILES := $(wildcard third_party/vqsort/*)
THIRD_PARTY_VQSORT_A_HDRS = $(filter %.h,$(THIRD_PARTY_VQSORT_A_FILES))
THIRD_PARTY_VQSORT_A_SRCS_C = $(filter %.c,$(THIRD_PARTY_VQSORT_A_FILES))
THIRD_PARTY_VQSORT_A_SRCS_S = $(filter %.S,$(THIRD_PARTY_VQSORT_A_FILES))
THIRD_PARTY_VQSORT_A_SRCS = $(THIRD_PARTY_VQSORT_A_SRCS_C) $(THIRD_PARTY_VQSORT_A_SRCS_S)
THIRD_PARTY_VQSORT_A_OBJS_C = $(THIRD_PARTY_VQSORT_A_SRCS_C:%.c=o/$(MODE)/%.o)
THIRD_PARTY_VQSORT_A_OBJS_S = $(THIRD_PARTY_VQSORT_A_SRCS_S:%.S=o/$(MODE)/%.o)
THIRD_PARTY_VQSORT_A_OBJS = $(THIRD_PARTY_VQSORT_A_OBJS_C) $(THIRD_PARTY_VQSORT_A_OBJS_S)
THIRD_PARTY_VQSORT_A_CHECKS = \
$(THIRD_PARTY_VQSORT_A).pkg \
$(THIRD_PARTY_VQSORT_A_HDRS:%=o/$(MODE)/%.ok)
THIRD_PARTY_VQSORT_A_DIRECTDEPS = \
LIBC_INTRIN \
LIBC_MEM \
LIBC_NEXGEN32E \
LIBC_RUNTIME \
LIBC_STDIO \
LIBC_STR \
LIBC_STUBS \
THIRD_PARTY_COMPILER_RT
THIRD_PARTY_VQSORT_A_DEPS := \
$(call uniq,$(foreach x,$(THIRD_PARTY_VQSORT_A_DIRECTDEPS),$($(x))))
$(THIRD_PARTY_VQSORT_A): \
third_party/vqsort/ \
$(THIRD_PARTY_VQSORT_A).pkg \
$(THIRD_PARTY_VQSORT_A_OBJS)
$(THIRD_PARTY_VQSORT_A).pkg: \
$(THIRD_PARTY_VQSORT_A_OBJS) \
$(foreach x,$(THIRD_PARTY_VQSORT_A_DIRECTDEPS),$($(x)_A).pkg)
THIRD_PARTY_VQSORT_LIBS = $(foreach x,$(THIRD_PARTY_VQSORT_ARTIFACTS),$($(x)))
THIRD_PARTY_VQSORT_SRCS = $(foreach x,$(THIRD_PARTY_VQSORT_ARTIFACTS),$($(x)_SRCS))
THIRD_PARTY_VQSORT_HDRS = $(foreach x,$(THIRD_PARTY_VQSORT_ARTIFACTS),$($(x)_HDRS))
THIRD_PARTY_VQSORT_CHECKS = $(foreach x,$(THIRD_PARTY_VQSORT_ARTIFACTS),$($(x)_CHECKS))
THIRD_PARTY_VQSORT_OBJS = $(foreach x,$(THIRD_PARTY_VQSORT_ARTIFACTS),$($(x)_OBJS))
$(THIRD_PARTY_VQSORT_OBJS): $(BUILD_FILES) third_party/vqsort/vqsort.mk
.PHONY: o/$(MODE)/third_party/vqsort
o/$(MODE)/third_party/vqsort: $(THIRD_PARTY_VQSORT_CHECKS)

24732
third_party/vqsort/vqsort_i32a.S vendored Normal file

File diff suppressed because it is too large Load diff

23679
third_party/vqsort/vqsort_i64a.S vendored Normal file

File diff suppressed because it is too large Load diff

29
third_party/vqsort/vqsort_int32.c vendored Normal file
View file

@ -0,0 +1,29 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2023 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/mem/alg.h"
#include "libc/nexgen32e/x86feature.h"
#include "third_party/vqsort/vqsort.h"
void vqsort_int32(int32_t *A, size_t n) {
if (X86_HAVE(AVX2)) {
vqsort_int32_avx2(A, n);
} else {
radix_sort_int32(A, n);
}
}

29
third_party/vqsort/vqsort_int64.c vendored Normal file
View file

@ -0,0 +1,29 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2023 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/mem/alg.h"
#include "libc/nexgen32e/x86feature.h"
#include "third_party/vqsort/vqsort.h"
void vqsort_int64(int64_t *A, size_t n) {
if (X86_HAVE(AVX2)) {
vqsort_int64_avx2(A, n);
} else {
radix_sort_int64(A, n);
}
}