mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-06-28 15:28:30 +00:00
Apply fixes and speedups
This commit is contained in:
parent
7521bf9e73
commit
725f4d79f6
36 changed files with 682 additions and 334 deletions
43
libc/bits/bextra.c
Normal file
43
libc/bits/bextra.c
Normal file
|
@ -0,0 +1,43 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/bits.h"
|
||||
|
||||
/**
|
||||
* Extracts bit field from array.
|
||||
*/
|
||||
unsigned bextra(const unsigned *p, size_t i, char b) {
|
||||
unsigned k, r, w;
|
||||
w = sizeof(unsigned) * CHAR_BIT;
|
||||
if (b) {
|
||||
b &= w - 1;
|
||||
i *= b;
|
||||
k = i & (w - 1);
|
||||
i /= w;
|
||||
if (k <= w - b) {
|
||||
return (p[i] >> k) & ((1u << (b - 1)) | ((1u << (b - 1)) - 1));
|
||||
} else {
|
||||
r = p[i] >> k;
|
||||
r |= p[i + 1] << (w - k);
|
||||
r &= (1ul << b) - 1;
|
||||
return r;
|
||||
}
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
|
@ -26,6 +26,7 @@ bool cmpxchg(void *, intptr_t, intptr_t, size_t);
|
|||
bool lockcmpxchg(void *, intptr_t, intptr_t, size_t);
|
||||
intptr_t atomic_load(void *, size_t);
|
||||
intptr_t atomic_store(void *, intptr_t, size_t);
|
||||
unsigned bextra(const unsigned *, size_t, char);
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § bits » no assembly required ─╬─│┼
|
||||
|
|
|
@ -97,7 +97,29 @@ void *memmove(void *dst, const void *src, size_t n) {
|
|||
d = dst;
|
||||
s = src;
|
||||
if (IsTiny()) {
|
||||
if (d <= s) {
|
||||
uint16_t w1, w2;
|
||||
uint32_t l1, l2;
|
||||
uint64_t q1, q2;
|
||||
if (n <= 16) {
|
||||
if (n >= 8) {
|
||||
__builtin_memcpy(&q1, s, 8);
|
||||
__builtin_memcpy(&q2, s + n - 8, 8);
|
||||
__builtin_memcpy(d, &q1, 8);
|
||||
__builtin_memcpy(d + n - 8, &q2, 8);
|
||||
} else if (n >= 4) {
|
||||
__builtin_memcpy(&l1, s, 4);
|
||||
__builtin_memcpy(&l2, s + n - 4, 4);
|
||||
__builtin_memcpy(d, &l1, 4);
|
||||
__builtin_memcpy(d + n - 4, &l2, 4);
|
||||
} else if (n >= 2) {
|
||||
__builtin_memcpy(&w1, s, 2);
|
||||
__builtin_memcpy(&w2, s + n - 2, 2);
|
||||
__builtin_memcpy(d, &w1, 2);
|
||||
__builtin_memcpy(d + n - 2, &w2, 2);
|
||||
} else if (n) {
|
||||
*d = *s;
|
||||
}
|
||||
} else if (d <= s) {
|
||||
asm("rep movsb"
|
||||
: "+D"(d), "+S"(s), "+c"(n), "=m"(*(char(*)[n])dst)
|
||||
: "m"(*(char(*)[n])src));
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
relegated wontreturn void __assert_fail(const char *expr, const char *file,
|
||||
int line) {
|
||||
static bool noreentry;
|
||||
__printf("%s:%d: assert(%s) failed\r\n", file, line, expr);
|
||||
__printf("\r\n%s:%d: assert(%s) failed\r\n", file, line, expr);
|
||||
if (cmpxchg(&noreentry, false, true)) {
|
||||
if (weaken(__die)) {
|
||||
weaken(__die)();
|
||||
|
|
74
libc/runtime/longsort.c
Normal file
74
libc/runtime/longsort.c
Normal file
|
@ -0,0 +1,74 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/dce.h"
|
||||
#include "libc/intrin/asan.internal.h"
|
||||
#include "libc/nexgen32e/bsr.h"
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
|
||||
forceinline void longsorter(long *x, size_t n, size_t t) {
|
||||
long a, b, c, p, q, i;
|
||||
for (p = t; p > 0; p >>= 1) {
|
||||
for (i = 0; i < n - p; ++i) {
|
||||
if (!(i & p)) {
|
||||
a = x[i + 0];
|
||||
b = x[i + p];
|
||||
if (a > b) c = a, a = b, b = c;
|
||||
x[i + 0] = a;
|
||||
x[i + p] = b;
|
||||
}
|
||||
}
|
||||
for (q = t; q > p; q >>= 1) {
|
||||
for (i = 0; i < n - q; ++i) {
|
||||
if (!(i & p)) {
|
||||
a = x[i + p];
|
||||
b = x[i + q];
|
||||
if (a > b) c = a, a = b, b = c;
|
||||
x[i + p] = a;
|
||||
x[i + q] = b;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static microarchitecture("avx2") optimizespeed noasan
|
||||
void longsort_avx2(long *x, size_t n, size_t t) {
|
||||
longsorter(x, n, t);
|
||||
}
|
||||
|
||||
static optimizesize noasan void longsort_pure(long *x, size_t n, size_t t) {
|
||||
longsorter(x, n, t);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sorting algorithm for longs that doesn't take long.
|
||||
*/
|
||||
void longsort(long *x, size_t n) {
|
||||
size_t t, m;
|
||||
if (IsAsan()) {
|
||||
if (__builtin_mul_overflow(n, sizeof(long), &m)) m = -1;
|
||||
__asan_check(x, m);
|
||||
}
|
||||
if (n > 1) {
|
||||
t = 1ul << bsrl(n - 1);
|
||||
if (X86_HAVE(AVX2)) return longsort_avx2(x, n, t);
|
||||
return longsort_pure(x, n, t);
|
||||
}
|
||||
}
|
|
@ -17,15 +17,14 @@
|
|||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/alg/alg.h"
|
||||
#include "libc/assert.h"
|
||||
#include "libc/bits/bits.h"
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/elf/def.h"
|
||||
#include "libc/elf/elf.h"
|
||||
#include "libc/errno.h"
|
||||
#include "libc/intrin/asan.internal.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/log/libfatal.internal.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/runtime/internal.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
|
@ -44,14 +43,15 @@
|
|||
struct SymbolTable *OpenSymbolTable(const char *filename) {
|
||||
int fd;
|
||||
void *map;
|
||||
long *stp;
|
||||
struct stat st;
|
||||
size_t n, m, tsz;
|
||||
unsigned i, j, k, x;
|
||||
unsigned i, j, x;
|
||||
const Elf64_Ehdr *elf;
|
||||
const char *name_base;
|
||||
struct SymbolTable *t;
|
||||
const Elf64_Sym *symtab, *sym;
|
||||
ptrdiff_t names_offset, name_base_offset, extra_offset;
|
||||
ptrdiff_t names_offset, name_base_offset, stp_offset;
|
||||
map = MAP_FAILED;
|
||||
if ((fd = open(filename, O_RDONLY)) == -1) return 0;
|
||||
if (fstat(fd, &st) == -1) goto SystemError;
|
||||
|
@ -69,21 +69,20 @@ struct SymbolTable *OpenSymbolTable(const char *filename) {
|
|||
tsz += sizeof(unsigned) * n;
|
||||
name_base_offset = tsz;
|
||||
tsz += m;
|
||||
extra_offset = tsz;
|
||||
tsz = ROUNDUP(tsz, FRAMESIZE);
|
||||
stp_offset = tsz;
|
||||
tsz += sizeof(const Elf64_Sym *) * n;
|
||||
tsz = ROUNDUP(tsz, FRAMESIZE);
|
||||
t = mmap(0, tsz, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
|
||||
if (t == MAP_FAILED) goto SystemError;
|
||||
if (IsAsan()) {
|
||||
__asan_poison((intptr_t)((char *)t + extra_offset), tsz - extra_offset,
|
||||
kAsanHeapOverrun);
|
||||
}
|
||||
t->mapsize = tsz;
|
||||
t->names = (const unsigned *)((const char *)t + names_offset);
|
||||
t->name_base = (const char *)((const char *)t + name_base_offset);
|
||||
t->names = (unsigned *)((char *)t + names_offset);
|
||||
t->name_base = (char *)((char *)t + name_base_offset);
|
||||
GetElfVirtualAddressRange(elf, st.st_size, &t->addr_base, &t->addr_end);
|
||||
memcpy(t->name_base, name_base, m);
|
||||
--t->addr_end;
|
||||
for (j = i = 0; i < n; ++i) {
|
||||
stp = (long *)((char *)t + stp_offset);
|
||||
for (m = i = 0; i < n; ++i) {
|
||||
sym = symtab + i;
|
||||
if (!(sym->st_size > 0 && (ELF64_ST_TYPE(sym->st_info) == STT_FUNC ||
|
||||
ELF64_ST_TYPE(sym->st_info) == STT_OBJECT))) {
|
||||
|
@ -92,23 +91,25 @@ struct SymbolTable *OpenSymbolTable(const char *filename) {
|
|||
if (sym->st_value > t->addr_end) continue;
|
||||
if (sym->st_value < t->addr_base) continue;
|
||||
x = sym->st_value - t->addr_base;
|
||||
for (k = j; k && x <= t->symbols[k - 1].x; --k) {
|
||||
t->symbols[k] = t->symbols[k - 1];
|
||||
t->names[k] = t->names[k - 1];
|
||||
}
|
||||
if (k && t->symbols[k - 1].y >= x) {
|
||||
t->symbols[k - 1].y = x - 1;
|
||||
}
|
||||
t->names[k] = sym->st_name;
|
||||
t->symbols[k].x = x;
|
||||
stp[m++] = (unsigned long)x << 32 | i;
|
||||
}
|
||||
longsort(stp, m);
|
||||
for (j = i = 0; i < m; ++i) {
|
||||
sym = symtab + (stp[i] & 0x7fffffff);
|
||||
x = stp[i] >> 32;
|
||||
if (j && x == t->symbols[j - 1].x) --j;
|
||||
if (j && t->symbols[j - 1].y >= x) t->symbols[j - 1].y = x - 1;
|
||||
t->names[j] = sym->st_name;
|
||||
t->symbols[j].x = x;
|
||||
if (sym->st_size) {
|
||||
t->symbols[k].y = x + sym->st_size - 1;
|
||||
t->symbols[j].y = x + sym->st_size - 1;
|
||||
} else {
|
||||
t->symbols[k].y = t->addr_end - t->addr_base;
|
||||
t->symbols[j].y = t->addr_end - t->addr_base;
|
||||
}
|
||||
j++;
|
||||
++j;
|
||||
}
|
||||
t->count = j;
|
||||
munmap(stp, ROUNDUP(sizeof(const Elf64_Sym *) * n, FRAMESIZE));
|
||||
munmap(map, st.st_size);
|
||||
close(fd);
|
||||
return t;
|
||||
|
|
|
@ -34,185 +34,177 @@ asm(".include \"libc/disclaimer.inc\"");
|
|||
|
||||
typedef int (*cmpfun)(const void *, const void *, void *);
|
||||
|
||||
forceinline unsigned bsfz0(unsigned x) {
|
||||
if (x) {
|
||||
return bsf(x);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
struct SmoothSort {
|
||||
size_t lp[12 * sizeof(size_t)];
|
||||
unsigned char *ar[14 * sizeof(size_t) + 1];
|
||||
unsigned char tmp[256];
|
||||
};
|
||||
|
||||
static inline int ntz(unsigned long x) {
|
||||
return __builtin_ctzl(x);
|
||||
}
|
||||
|
||||
forceinline unsigned pntz(unsigned p[2]) {
|
||||
unsigned r;
|
||||
assert(p[0] != 0);
|
||||
r = bsfz0(p[0] - 1);
|
||||
if (r != 0 ||
|
||||
(r = 8 * sizeof(unsigned) + bsfz0(p[1])) != 8 * sizeof(unsigned)) {
|
||||
static inline int pntz(size_t p[2]) {
|
||||
int r = ntz(p[0] - 1);
|
||||
if (r != 0 || (r = CHAR_BIT * sizeof(size_t) + ntz(p[1])) !=
|
||||
CHAR_BIT * sizeof(size_t)) {
|
||||
return r;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void cycle(size_t width, unsigned char *ar[], size_t n) {
|
||||
unsigned i, l;
|
||||
unsigned char tmp[256];
|
||||
if (n < 2) return;
|
||||
ar[n] = tmp;
|
||||
/* smoothsort_shl() and smoothsort_shr() need n > 0 */
|
||||
static inline void smoothsort_shl(size_t p[2], int n) {
|
||||
if (n >= CHAR_BIT * sizeof(size_t)) {
|
||||
n -= CHAR_BIT * sizeof(size_t);
|
||||
p[1] = p[0];
|
||||
p[0] = 0;
|
||||
}
|
||||
p[1] <<= n;
|
||||
p[1] |= p[0] >> (sizeof(size_t) * CHAR_BIT - n);
|
||||
p[0] <<= n;
|
||||
}
|
||||
|
||||
static inline void smoothsort_shr(size_t p[2], int n) {
|
||||
if (n >= CHAR_BIT * sizeof(size_t)) {
|
||||
n -= CHAR_BIT * sizeof(size_t);
|
||||
p[0] = p[1];
|
||||
p[1] = 0;
|
||||
}
|
||||
p[0] >>= n;
|
||||
p[0] |= p[1] << (sizeof(size_t) * CHAR_BIT - n);
|
||||
p[1] >>= n;
|
||||
}
|
||||
|
||||
static void smoothsort_cycle(struct SmoothSort *s, size_t width, int n) {
|
||||
size_t l;
|
||||
int i;
|
||||
if (n < 2) {
|
||||
return;
|
||||
}
|
||||
s->ar[n] = s->tmp;
|
||||
while (width) {
|
||||
l = sizeof(tmp) < width ? sizeof(tmp) : width;
|
||||
memcpy(ar[n], ar[0], l);
|
||||
l = sizeof(s->tmp) < width ? sizeof(s->tmp) : width;
|
||||
memcpy(s->ar[n], s->ar[0], l);
|
||||
for (i = 0; i < n; i++) {
|
||||
memcpy(ar[i], ar[i + 1], l);
|
||||
ar[i] += l;
|
||||
memcpy(s->ar[i], s->ar[i + 1], l);
|
||||
s->ar[i] += l;
|
||||
}
|
||||
width -= l;
|
||||
}
|
||||
}
|
||||
|
||||
forceinline void shl(unsigned p[2], size_t n) {
|
||||
assert(n > 0);
|
||||
if (n >= CHAR_BIT * sizeof(unsigned)) {
|
||||
n -= CHAR_BIT * sizeof(unsigned);
|
||||
p[1] = p[0];
|
||||
p[0] = 0;
|
||||
}
|
||||
p[1] <<= n;
|
||||
p[1] |= p[0] >> (sizeof(unsigned) * CHAR_BIT - n);
|
||||
p[0] <<= n;
|
||||
}
|
||||
|
||||
forceinline void shr(unsigned p[2], size_t n) {
|
||||
assert(n > 0);
|
||||
if (n >= CHAR_BIT * sizeof(unsigned)) {
|
||||
n -= CHAR_BIT * sizeof(unsigned);
|
||||
p[0] = p[1];
|
||||
p[1] = 0;
|
||||
}
|
||||
p[0] >>= n;
|
||||
p[0] |= p[1] << (sizeof(unsigned) * CHAR_BIT - n);
|
||||
p[1] >>= n;
|
||||
}
|
||||
|
||||
static void sift(unsigned char *head, cmpfun cmp, void *arg, int pshift,
|
||||
unsigned char *ar[hasatleast 14 * sizeof(unsigned) + 1],
|
||||
unsigned lp[hasatleast 12 * sizeof(unsigned)], size_t width) {
|
||||
unsigned i;
|
||||
static void smoothsort_sift(struct SmoothSort *s, unsigned char *head,
|
||||
size_t width, cmpfun cmp, void *arg, int pshift) {
|
||||
unsigned char *rt, *lf;
|
||||
i = 1;
|
||||
ar[0] = head;
|
||||
int i = 1;
|
||||
s->ar[0] = head;
|
||||
while (pshift > 1) {
|
||||
rt = head - width;
|
||||
lf = head - width - lp[pshift - 2];
|
||||
if ((*cmp)(ar[0], lf, arg) >= 0 && (*cmp)(ar[0], rt, arg) >= 0) {
|
||||
lf = head - width - s->lp[pshift - 2];
|
||||
if (cmp(s->ar[0], lf, arg) >= 0 && cmp(s->ar[0], rt, arg) >= 0) {
|
||||
break;
|
||||
}
|
||||
if ((*cmp)(lf, rt, arg) >= 0) {
|
||||
ar[i++] = lf;
|
||||
if (cmp(lf, rt, arg) >= 0) {
|
||||
s->ar[i++] = lf;
|
||||
head = lf;
|
||||
pshift -= 1;
|
||||
} else {
|
||||
ar[i++] = rt;
|
||||
s->ar[i++] = rt;
|
||||
head = rt;
|
||||
pshift -= 2;
|
||||
}
|
||||
}
|
||||
cycle(width, ar, i);
|
||||
smoothsort_cycle(s, width, i);
|
||||
}
|
||||
|
||||
static void trinkle(unsigned char *head, cmpfun cmp, void *arg, unsigned pp[2],
|
||||
unsigned char *ar[hasatleast 14 * sizeof(unsigned) + 1],
|
||||
unsigned lp[hasatleast 12 * sizeof(unsigned)], size_t width,
|
||||
int pshift, int trusty) {
|
||||
unsigned p[2];
|
||||
unsigned i, trail;
|
||||
static void smoothsort_trinkle(struct SmoothSort *s, unsigned char *head,
|
||||
size_t width, cmpfun cmp, void *arg,
|
||||
size_t pp[2], int pshift, int trusty) {
|
||||
unsigned char *stepson, *rt, *lf;
|
||||
i = 1;
|
||||
size_t p[2];
|
||||
int i = 1;
|
||||
int trail;
|
||||
p[0] = pp[0];
|
||||
p[1] = pp[1];
|
||||
ar[0] = head;
|
||||
s->ar[0] = head;
|
||||
while (p[0] != 1 || p[1] != 0) {
|
||||
stepson = head - lp[pshift];
|
||||
if ((*cmp)(stepson, ar[0], arg) <= 0) {
|
||||
stepson = head - s->lp[pshift];
|
||||
if (cmp(stepson, s->ar[0], arg) <= 0) {
|
||||
break;
|
||||
}
|
||||
if (!trusty && pshift > 1) {
|
||||
rt = head - width;
|
||||
lf = head - width - lp[pshift - 2];
|
||||
if ((*cmp)(rt, stepson, arg) >= 0 || (*cmp)(lf, stepson, arg) >= 0) {
|
||||
lf = head - width - s->lp[pshift - 2];
|
||||
if (cmp(rt, stepson, arg) >= 0 || cmp(lf, stepson, arg) >= 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
ar[i++] = stepson;
|
||||
s->ar[i++] = stepson;
|
||||
head = stepson;
|
||||
trail = pntz(p);
|
||||
shr(p, trail);
|
||||
smoothsort_shr(p, trail);
|
||||
pshift += trail;
|
||||
trusty = 0;
|
||||
}
|
||||
if (!trusty) {
|
||||
cycle(width, ar, i);
|
||||
sift(head, cmp, arg, pshift, ar, lp, width);
|
||||
smoothsort_cycle(s, width, i);
|
||||
smoothsort_sift(s, head, width, cmp, arg, pshift);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Smoothsort is an adaptive linearithmic sorting algorithm that's
|
||||
* nearly linear on mostly-sorted data, and consumes constant memory.
|
||||
*/
|
||||
static noinline void smoothsort(
|
||||
void *base, size_t count, size_t width, cmpfun cmp, void *arg,
|
||||
unsigned lp[hasatleast 12 * sizeof(unsigned)],
|
||||
unsigned char *ar[hasatleast 14 * sizeof(unsigned) + 1]) {
|
||||
unsigned i, size = width * count;
|
||||
static void smoothsort(struct SmoothSort *s, void *base, size_t nel,
|
||||
size_t width, cmpfun cmp, void *arg) {
|
||||
size_t i, size = width * nel;
|
||||
unsigned char *head, *high;
|
||||
unsigned p[2] = {1, 0};
|
||||
unsigned pshift = 1;
|
||||
unsigned trail;
|
||||
size_t p[2] = {1, 0};
|
||||
int pshift = 1;
|
||||
int trail;
|
||||
if (!size) return;
|
||||
head = (unsigned char *)base;
|
||||
head = base;
|
||||
high = head + size - width;
|
||||
/* Precompute Leonardo numbers, scaled by element width */
|
||||
for (lp[0] = lp[1] = width, i = 2;
|
||||
(lp[i] = lp[i - 2] + lp[i - 1] + width) < size; i++) {
|
||||
for (s->lp[0] = s->lp[1] = width, i = 2;
|
||||
(s->lp[i] = s->lp[i - 2] + s->lp[i - 1] + width) < size; i++) {
|
||||
}
|
||||
while (head < high) {
|
||||
if ((p[0] & 3) == 3) {
|
||||
sift(head, cmp, arg, pshift, ar, lp, width);
|
||||
shr(p, 2);
|
||||
smoothsort_sift(s, head, width, cmp, arg, pshift);
|
||||
smoothsort_shr(p, 2);
|
||||
pshift += 2;
|
||||
} else {
|
||||
if (lp[pshift - 1] >= high - head) {
|
||||
trinkle(head, cmp, arg, p, ar, lp, width, pshift, 0);
|
||||
if (s->lp[pshift - 1] >= high - head) {
|
||||
smoothsort_trinkle(s, head, width, cmp, arg, p, pshift, 0);
|
||||
} else {
|
||||
sift(head, cmp, arg, pshift, ar, lp, width);
|
||||
smoothsort_sift(s, head, width, cmp, arg, pshift);
|
||||
}
|
||||
if (pshift == 1) {
|
||||
shl(p, 1);
|
||||
smoothsort_shl(p, 1);
|
||||
pshift = 0;
|
||||
} else {
|
||||
shl(p, pshift - 1);
|
||||
smoothsort_shl(p, pshift - 1);
|
||||
pshift = 1;
|
||||
}
|
||||
}
|
||||
p[0] |= 1;
|
||||
head += width;
|
||||
}
|
||||
trinkle(head, cmp, arg, p, ar, lp, width, pshift, 0);
|
||||
smoothsort_trinkle(s, head, width, cmp, arg, p, pshift, 0);
|
||||
while (pshift != 1 || p[0] != 1 || p[1] != 0) {
|
||||
if (pshift <= 1) {
|
||||
trail = pntz(p);
|
||||
shr(p, trail);
|
||||
smoothsort_shr(p, trail);
|
||||
pshift += trail;
|
||||
} else {
|
||||
shl(p, 2);
|
||||
smoothsort_shl(p, 2);
|
||||
pshift -= 2;
|
||||
p[0] ^= 7;
|
||||
shr(p, 1);
|
||||
trinkle(head - lp[pshift] - width, cmp, arg, p, ar, lp, width, pshift + 1,
|
||||
1);
|
||||
shl(p, 1);
|
||||
smoothsort_shr(p, 1);
|
||||
smoothsort_trinkle(s, head - s->lp[pshift] - width, width, cmp, arg, p,
|
||||
pshift + 1, 1);
|
||||
smoothsort_shl(p, 1);
|
||||
p[0] |= 1;
|
||||
trinkle(head - width, cmp, arg, p, ar, lp, width, pshift, 1);
|
||||
smoothsort_trinkle(s, head - width, width, cmp, arg, p, pshift, 1);
|
||||
}
|
||||
head -= width;
|
||||
}
|
||||
|
@ -229,9 +221,8 @@ static noinline void smoothsort(
|
|||
* @see qsort()
|
||||
*/
|
||||
void qsort_r(void *base, size_t count, size_t width, cmpfun cmp, void *arg) {
|
||||
unsigned lp[12 * sizeof(unsigned)];
|
||||
unsigned char *ar[14 * sizeof(unsigned) + 1];
|
||||
smoothsort(base, count, width, (cmpfun)cmp, arg, lp, ar);
|
||||
struct SmoothSort s;
|
||||
smoothsort(&s, base, count, width, cmp, arg);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -241,9 +232,10 @@ void qsort_r(void *base, size_t count, size_t width, cmpfun cmp, void *arg) {
|
|||
* @param count is the item count
|
||||
* @param width is the size of each item
|
||||
* @param cmp is a callback returning <0, 0, or >0
|
||||
* @see qsort_r()
|
||||
* @see longsort(), djbsort()
|
||||
*/
|
||||
void qsort(void *base, size_t count, size_t width,
|
||||
int cmp(const void *, const void *)) {
|
||||
qsort_r(base, count, width, (cmpfun)cmp, NULL);
|
||||
struct SmoothSort s;
|
||||
smoothsort(&s, base, count, width, (cmpfun)cmp, 0);
|
||||
}
|
|
@ -77,6 +77,7 @@ int vhangup(void);
|
|||
int getdtablesize(void);
|
||||
int sethostname(const char *, size_t);
|
||||
int acct(const char *);
|
||||
void longsort(long *, size_t);
|
||||
|
||||
bool _isheap(void *);
|
||||
int NtGetVersion(void);
|
||||
|
|
|
@ -88,6 +88,10 @@ o/$(MODE)/libc/runtime/mman.greg.o: \
|
|||
-ffreestanding \
|
||||
-mgeneral-regs-only
|
||||
|
||||
o/$(MODE)/libc/runtime/qsort.o: \
|
||||
OVERRIDE_CFLAGS += \
|
||||
-Og
|
||||
|
||||
o/$(MODE)/libc/runtime/ftrace.greg.o: \
|
||||
OVERRIDE_CFLAGS += \
|
||||
-mgeneral-regs-only
|
||||
|
|
|
@ -24,7 +24,7 @@ struct SymbolTable {
|
|||
struct SymbolTable *GetSymbolTable(void);
|
||||
const char *FindComBinary(void);
|
||||
const char *FindDebugBinary(void);
|
||||
struct SymbolTable *OpenSymbolTable(const char *) nodiscard;
|
||||
struct SymbolTable *OpenSymbolTable(const char *);
|
||||
int CloseSymbolTable(struct SymbolTable **);
|
||||
void __hook(void *, struct SymbolTable *);
|
||||
|
||||
|
|
|
@ -16,8 +16,13 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/bits.h"
|
||||
#include "libc/zip.h"
|
||||
|
||||
typedef char v16qi __attribute__((__vector_size__(16)));
|
||||
typedef short v8hi __attribute__((__vector_size__(16)));
|
||||
typedef long long v2di __attribute__((__vector_size__(16), __aligned__(1)));
|
||||
|
||||
/**
|
||||
* Locates End Of Central Directory record in ZIP file.
|
||||
*
|
||||
|
@ -32,9 +37,23 @@
|
|||
* @return pointer to EOCD64 or EOCD, or NULL if not found
|
||||
*/
|
||||
void *GetZipCdir(const uint8_t *p, size_t n) {
|
||||
v2di x;
|
||||
size_t i, j;
|
||||
v8hi pk = {READ16LE("PK"), READ16LE("PK"), READ16LE("PK"), READ16LE("PK"),
|
||||
READ16LE("PK"), READ16LE("PK"), READ16LE("PK"), READ16LE("PK")};
|
||||
i = n - 4;
|
||||
asm("" : "+x"(pk));
|
||||
do {
|
||||
if (i >= 14) {
|
||||
x = *(const v2di *)(p + i - 14);
|
||||
if (!(__builtin_ia32_pmovmskb128(
|
||||
(v16qi)__builtin_ia32_pcmpeqw128((v8hi)x, pk)) |
|
||||
__builtin_ia32_pmovmskb128((v16qi)__builtin_ia32_pcmpeqw128(
|
||||
(v8hi)__builtin_ia32_psrldqi128(x, 8), pk)))) {
|
||||
i -= 13;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (READ32LE(p + i) == kZipCdir64LocatorMagic &&
|
||||
i + kZipCdir64LocatorSize <= n &&
|
||||
IsZipCdir64(p, n, ZIP_LOCATE64_OFFSET(p + i))) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue