Apply fixes and speedups

This commit is contained in:
Justine Tunney 2021-10-04 03:23:31 -07:00
parent 7521bf9e73
commit 725f4d79f6
36 changed files with 682 additions and 334 deletions

View file

@ -28,7 +28,7 @@
relegated wontreturn void __assert_fail(const char *expr, const char *file,
int line) {
static bool noreentry;
__printf("%s:%d: assert(%s) failed\r\n", file, line, expr);
__printf("\r\n%s:%d: assert(%s) failed\r\n", file, line, expr);
if (cmpxchg(&noreentry, false, true)) {
if (weaken(__die)) {
weaken(__die)();

74
libc/runtime/longsort.c Normal file
View file

@ -0,0 +1,74 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/dce.h"
#include "libc/intrin/asan.internal.h"
#include "libc/nexgen32e/bsr.h"
#include "libc/nexgen32e/x86feature.h"
#include "libc/runtime/runtime.h"
forceinline void longsorter(long *x, size_t n, size_t t) {
long a, b, c, p, q, i;
for (p = t; p > 0; p >>= 1) {
for (i = 0; i < n - p; ++i) {
if (!(i & p)) {
a = x[i + 0];
b = x[i + p];
if (a > b) c = a, a = b, b = c;
x[i + 0] = a;
x[i + p] = b;
}
}
for (q = t; q > p; q >>= 1) {
for (i = 0; i < n - q; ++i) {
if (!(i & p)) {
a = x[i + p];
b = x[i + q];
if (a > b) c = a, a = b, b = c;
x[i + p] = a;
x[i + q] = b;
}
}
}
}
}
static microarchitecture("avx2") optimizespeed noasan
void longsort_avx2(long *x, size_t n, size_t t) {
longsorter(x, n, t);
}
static optimizesize noasan void longsort_pure(long *x, size_t n, size_t t) {
longsorter(x, n, t);
}
/**
* Sorting algorithm for longs that doesn't take long.
*/
void longsort(long *x, size_t n) {
size_t t, m;
if (IsAsan()) {
if (__builtin_mul_overflow(n, sizeof(long), &m)) m = -1;
__asan_check(x, m);
}
if (n > 1) {
t = 1ul << bsrl(n - 1);
if (X86_HAVE(AVX2)) return longsort_avx2(x, n, t);
return longsort_pure(x, n, t);
}
}

View file

@ -17,15 +17,14 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/alg/alg.h"
#include "libc/assert.h"
#include "libc/bits/bits.h"
#include "libc/calls/calls.h"
#include "libc/dce.h"
#include "libc/elf/def.h"
#include "libc/elf/elf.h"
#include "libc/errno.h"
#include "libc/intrin/asan.internal.h"
#include "libc/limits.h"
#include "libc/log/libfatal.internal.h"
#include "libc/macros.internal.h"
#include "libc/runtime/internal.h"
#include "libc/runtime/runtime.h"
@ -44,14 +43,15 @@
struct SymbolTable *OpenSymbolTable(const char *filename) {
int fd;
void *map;
long *stp;
struct stat st;
size_t n, m, tsz;
unsigned i, j, k, x;
unsigned i, j, x;
const Elf64_Ehdr *elf;
const char *name_base;
struct SymbolTable *t;
const Elf64_Sym *symtab, *sym;
ptrdiff_t names_offset, name_base_offset, extra_offset;
ptrdiff_t names_offset, name_base_offset, stp_offset;
map = MAP_FAILED;
if ((fd = open(filename, O_RDONLY)) == -1) return 0;
if (fstat(fd, &st) == -1) goto SystemError;
@ -69,21 +69,20 @@ struct SymbolTable *OpenSymbolTable(const char *filename) {
tsz += sizeof(unsigned) * n;
name_base_offset = tsz;
tsz += m;
extra_offset = tsz;
tsz = ROUNDUP(tsz, FRAMESIZE);
stp_offset = tsz;
tsz += sizeof(const Elf64_Sym *) * n;
tsz = ROUNDUP(tsz, FRAMESIZE);
t = mmap(0, tsz, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (t == MAP_FAILED) goto SystemError;
if (IsAsan()) {
__asan_poison((intptr_t)((char *)t + extra_offset), tsz - extra_offset,
kAsanHeapOverrun);
}
t->mapsize = tsz;
t->names = (const unsigned *)((const char *)t + names_offset);
t->name_base = (const char *)((const char *)t + name_base_offset);
t->names = (unsigned *)((char *)t + names_offset);
t->name_base = (char *)((char *)t + name_base_offset);
GetElfVirtualAddressRange(elf, st.st_size, &t->addr_base, &t->addr_end);
memcpy(t->name_base, name_base, m);
--t->addr_end;
for (j = i = 0; i < n; ++i) {
stp = (long *)((char *)t + stp_offset);
for (m = i = 0; i < n; ++i) {
sym = symtab + i;
if (!(sym->st_size > 0 && (ELF64_ST_TYPE(sym->st_info) == STT_FUNC ||
ELF64_ST_TYPE(sym->st_info) == STT_OBJECT))) {
@ -92,23 +91,25 @@ struct SymbolTable *OpenSymbolTable(const char *filename) {
if (sym->st_value > t->addr_end) continue;
if (sym->st_value < t->addr_base) continue;
x = sym->st_value - t->addr_base;
for (k = j; k && x <= t->symbols[k - 1].x; --k) {
t->symbols[k] = t->symbols[k - 1];
t->names[k] = t->names[k - 1];
}
if (k && t->symbols[k - 1].y >= x) {
t->symbols[k - 1].y = x - 1;
}
t->names[k] = sym->st_name;
t->symbols[k].x = x;
stp[m++] = (unsigned long)x << 32 | i;
}
longsort(stp, m);
for (j = i = 0; i < m; ++i) {
sym = symtab + (stp[i] & 0x7fffffff);
x = stp[i] >> 32;
if (j && x == t->symbols[j - 1].x) --j;
if (j && t->symbols[j - 1].y >= x) t->symbols[j - 1].y = x - 1;
t->names[j] = sym->st_name;
t->symbols[j].x = x;
if (sym->st_size) {
t->symbols[k].y = x + sym->st_size - 1;
t->symbols[j].y = x + sym->st_size - 1;
} else {
t->symbols[k].y = t->addr_end - t->addr_base;
t->symbols[j].y = t->addr_end - t->addr_base;
}
j++;
++j;
}
t->count = j;
munmap(stp, ROUNDUP(sizeof(const Elf64_Sym *) * n, FRAMESIZE));
munmap(map, st.st_size);
close(fd);
return t;

241
libc/runtime/qsort.c Normal file
View file

@ -0,0 +1,241 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright (C) 2011 by Valentin Ochs
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.
*/
#include "libc/alg/alg.h"
#include "libc/assert.h"
#include "libc/nexgen32e/bsf.h"
#include "libc/str/str.h"
asm(".ident\t\"\\n\\n\
Smoothsort (MIT License)\\n\
Copyright 2011 Valentin Ochs\\n\
Discovered by Edsger Dijkstra\"");
asm(".include \"libc/disclaimer.inc\"");
typedef int (*cmpfun)(const void *, const void *, void *);
struct SmoothSort {
size_t lp[12 * sizeof(size_t)];
unsigned char *ar[14 * sizeof(size_t) + 1];
unsigned char tmp[256];
};
static inline int ntz(unsigned long x) {
return __builtin_ctzl(x);
}
static inline int pntz(size_t p[2]) {
int r = ntz(p[0] - 1);
if (r != 0 || (r = CHAR_BIT * sizeof(size_t) + ntz(p[1])) !=
CHAR_BIT * sizeof(size_t)) {
return r;
}
return 0;
}
/* smoothsort_shl() and smoothsort_shr() need n > 0 */
static inline void smoothsort_shl(size_t p[2], int n) {
if (n >= CHAR_BIT * sizeof(size_t)) {
n -= CHAR_BIT * sizeof(size_t);
p[1] = p[0];
p[0] = 0;
}
p[1] <<= n;
p[1] |= p[0] >> (sizeof(size_t) * CHAR_BIT - n);
p[0] <<= n;
}
static inline void smoothsort_shr(size_t p[2], int n) {
if (n >= CHAR_BIT * sizeof(size_t)) {
n -= CHAR_BIT * sizeof(size_t);
p[0] = p[1];
p[1] = 0;
}
p[0] >>= n;
p[0] |= p[1] << (sizeof(size_t) * CHAR_BIT - n);
p[1] >>= n;
}
static void smoothsort_cycle(struct SmoothSort *s, size_t width, int n) {
size_t l;
int i;
if (n < 2) {
return;
}
s->ar[n] = s->tmp;
while (width) {
l = sizeof(s->tmp) < width ? sizeof(s->tmp) : width;
memcpy(s->ar[n], s->ar[0], l);
for (i = 0; i < n; i++) {
memcpy(s->ar[i], s->ar[i + 1], l);
s->ar[i] += l;
}
width -= l;
}
}
static void smoothsort_sift(struct SmoothSort *s, unsigned char *head,
size_t width, cmpfun cmp, void *arg, int pshift) {
unsigned char *rt, *lf;
int i = 1;
s->ar[0] = head;
while (pshift > 1) {
rt = head - width;
lf = head - width - s->lp[pshift - 2];
if (cmp(s->ar[0], lf, arg) >= 0 && cmp(s->ar[0], rt, arg) >= 0) {
break;
}
if (cmp(lf, rt, arg) >= 0) {
s->ar[i++] = lf;
head = lf;
pshift -= 1;
} else {
s->ar[i++] = rt;
head = rt;
pshift -= 2;
}
}
smoothsort_cycle(s, width, i);
}
static void smoothsort_trinkle(struct SmoothSort *s, unsigned char *head,
size_t width, cmpfun cmp, void *arg,
size_t pp[2], int pshift, int trusty) {
unsigned char *stepson, *rt, *lf;
size_t p[2];
int i = 1;
int trail;
p[0] = pp[0];
p[1] = pp[1];
s->ar[0] = head;
while (p[0] != 1 || p[1] != 0) {
stepson = head - s->lp[pshift];
if (cmp(stepson, s->ar[0], arg) <= 0) {
break;
}
if (!trusty && pshift > 1) {
rt = head - width;
lf = head - width - s->lp[pshift - 2];
if (cmp(rt, stepson, arg) >= 0 || cmp(lf, stepson, arg) >= 0) {
break;
}
}
s->ar[i++] = stepson;
head = stepson;
trail = pntz(p);
smoothsort_shr(p, trail);
pshift += trail;
trusty = 0;
}
if (!trusty) {
smoothsort_cycle(s, width, i);
smoothsort_sift(s, head, width, cmp, arg, pshift);
}
}
static void smoothsort(struct SmoothSort *s, void *base, size_t nel,
size_t width, cmpfun cmp, void *arg) {
size_t i, size = width * nel;
unsigned char *head, *high;
size_t p[2] = {1, 0};
int pshift = 1;
int trail;
if (!size) return;
head = base;
high = head + size - width;
/* Precompute Leonardo numbers, scaled by element width */
for (s->lp[0] = s->lp[1] = width, i = 2;
(s->lp[i] = s->lp[i - 2] + s->lp[i - 1] + width) < size; i++) {
}
while (head < high) {
if ((p[0] & 3) == 3) {
smoothsort_sift(s, head, width, cmp, arg, pshift);
smoothsort_shr(p, 2);
pshift += 2;
} else {
if (s->lp[pshift - 1] >= high - head) {
smoothsort_trinkle(s, head, width, cmp, arg, p, pshift, 0);
} else {
smoothsort_sift(s, head, width, cmp, arg, pshift);
}
if (pshift == 1) {
smoothsort_shl(p, 1);
pshift = 0;
} else {
smoothsort_shl(p, pshift - 1);
pshift = 1;
}
}
p[0] |= 1;
head += width;
}
smoothsort_trinkle(s, head, width, cmp, arg, p, pshift, 0);
while (pshift != 1 || p[0] != 1 || p[1] != 0) {
if (pshift <= 1) {
trail = pntz(p);
smoothsort_shr(p, trail);
pshift += trail;
} else {
smoothsort_shl(p, 2);
pshift -= 2;
p[0] ^= 7;
smoothsort_shr(p, 1);
smoothsort_trinkle(s, head - s->lp[pshift] - width, width, cmp, arg, p,
pshift + 1, 1);
smoothsort_shl(p, 1);
p[0] |= 1;
smoothsort_trinkle(s, head - width, width, cmp, arg, p, pshift, 1);
}
head -= width;
}
}
/**
* Sorts array.
*
* @param base points to an array to sort in-place
* @param count is the item count
* @param width is the size of each item
* @param cmp is a callback returning <0, 0, or >0
* @param arg will optionally be passed as the third argument to cmp
* @see qsort()
*/
void qsort_r(void *base, size_t count, size_t width, cmpfun cmp, void *arg) {
struct SmoothSort s;
smoothsort(&s, base, count, width, cmp, arg);
}
/**
* Sorts array.
*
* @param base points to an array to sort in-place
* @param count is the item count
* @param width is the size of each item
* @param cmp is a callback returning <0, 0, or >0
* @see longsort(), djbsort()
*/
void qsort(void *base, size_t count, size_t width,
int cmp(const void *, const void *)) {
struct SmoothSort s;
smoothsort(&s, base, count, width, (cmpfun)cmp, 0);
}

View file

@ -77,6 +77,7 @@ int vhangup(void);
int getdtablesize(void);
int sethostname(const char *, size_t);
int acct(const char *);
void longsort(long *, size_t);
bool _isheap(void *);
int NtGetVersion(void);

View file

@ -88,6 +88,10 @@ o/$(MODE)/libc/runtime/mman.greg.o: \
-ffreestanding \
-mgeneral-regs-only
o/$(MODE)/libc/runtime/qsort.o: \
OVERRIDE_CFLAGS += \
-Og
o/$(MODE)/libc/runtime/ftrace.greg.o: \
OVERRIDE_CFLAGS += \
-mgeneral-regs-only

View file

@ -24,7 +24,7 @@ struct SymbolTable {
struct SymbolTable *GetSymbolTable(void);
const char *FindComBinary(void);
const char *FindDebugBinary(void);
struct SymbolTable *OpenSymbolTable(const char *) nodiscard;
struct SymbolTable *OpenSymbolTable(const char *);
int CloseSymbolTable(struct SymbolTable **);
void __hook(void *, struct SymbolTable *);