mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-07-03 09:48:29 +00:00
Apply fixes and speedups
This commit is contained in:
parent
7521bf9e73
commit
725f4d79f6
36 changed files with 682 additions and 334 deletions
|
@ -28,7 +28,7 @@
|
|||
relegated wontreturn void __assert_fail(const char *expr, const char *file,
|
||||
int line) {
|
||||
static bool noreentry;
|
||||
__printf("%s:%d: assert(%s) failed\r\n", file, line, expr);
|
||||
__printf("\r\n%s:%d: assert(%s) failed\r\n", file, line, expr);
|
||||
if (cmpxchg(&noreentry, false, true)) {
|
||||
if (weaken(__die)) {
|
||||
weaken(__die)();
|
||||
|
|
74
libc/runtime/longsort.c
Normal file
74
libc/runtime/longsort.c
Normal file
|
@ -0,0 +1,74 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/dce.h"
|
||||
#include "libc/intrin/asan.internal.h"
|
||||
#include "libc/nexgen32e/bsr.h"
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
|
||||
forceinline void longsorter(long *x, size_t n, size_t t) {
|
||||
long a, b, c, p, q, i;
|
||||
for (p = t; p > 0; p >>= 1) {
|
||||
for (i = 0; i < n - p; ++i) {
|
||||
if (!(i & p)) {
|
||||
a = x[i + 0];
|
||||
b = x[i + p];
|
||||
if (a > b) c = a, a = b, b = c;
|
||||
x[i + 0] = a;
|
||||
x[i + p] = b;
|
||||
}
|
||||
}
|
||||
for (q = t; q > p; q >>= 1) {
|
||||
for (i = 0; i < n - q; ++i) {
|
||||
if (!(i & p)) {
|
||||
a = x[i + p];
|
||||
b = x[i + q];
|
||||
if (a > b) c = a, a = b, b = c;
|
||||
x[i + p] = a;
|
||||
x[i + q] = b;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static microarchitecture("avx2") optimizespeed noasan
|
||||
void longsort_avx2(long *x, size_t n, size_t t) {
|
||||
longsorter(x, n, t);
|
||||
}
|
||||
|
||||
static optimizesize noasan void longsort_pure(long *x, size_t n, size_t t) {
|
||||
longsorter(x, n, t);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sorting algorithm for longs that doesn't take long.
|
||||
*/
|
||||
void longsort(long *x, size_t n) {
|
||||
size_t t, m;
|
||||
if (IsAsan()) {
|
||||
if (__builtin_mul_overflow(n, sizeof(long), &m)) m = -1;
|
||||
__asan_check(x, m);
|
||||
}
|
||||
if (n > 1) {
|
||||
t = 1ul << bsrl(n - 1);
|
||||
if (X86_HAVE(AVX2)) return longsort_avx2(x, n, t);
|
||||
return longsort_pure(x, n, t);
|
||||
}
|
||||
}
|
|
@ -17,15 +17,14 @@
|
|||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/alg/alg.h"
|
||||
#include "libc/assert.h"
|
||||
#include "libc/bits/bits.h"
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/elf/def.h"
|
||||
#include "libc/elf/elf.h"
|
||||
#include "libc/errno.h"
|
||||
#include "libc/intrin/asan.internal.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/log/libfatal.internal.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/runtime/internal.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
|
@ -44,14 +43,15 @@
|
|||
struct SymbolTable *OpenSymbolTable(const char *filename) {
|
||||
int fd;
|
||||
void *map;
|
||||
long *stp;
|
||||
struct stat st;
|
||||
size_t n, m, tsz;
|
||||
unsigned i, j, k, x;
|
||||
unsigned i, j, x;
|
||||
const Elf64_Ehdr *elf;
|
||||
const char *name_base;
|
||||
struct SymbolTable *t;
|
||||
const Elf64_Sym *symtab, *sym;
|
||||
ptrdiff_t names_offset, name_base_offset, extra_offset;
|
||||
ptrdiff_t names_offset, name_base_offset, stp_offset;
|
||||
map = MAP_FAILED;
|
||||
if ((fd = open(filename, O_RDONLY)) == -1) return 0;
|
||||
if (fstat(fd, &st) == -1) goto SystemError;
|
||||
|
@ -69,21 +69,20 @@ struct SymbolTable *OpenSymbolTable(const char *filename) {
|
|||
tsz += sizeof(unsigned) * n;
|
||||
name_base_offset = tsz;
|
||||
tsz += m;
|
||||
extra_offset = tsz;
|
||||
tsz = ROUNDUP(tsz, FRAMESIZE);
|
||||
stp_offset = tsz;
|
||||
tsz += sizeof(const Elf64_Sym *) * n;
|
||||
tsz = ROUNDUP(tsz, FRAMESIZE);
|
||||
t = mmap(0, tsz, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
|
||||
if (t == MAP_FAILED) goto SystemError;
|
||||
if (IsAsan()) {
|
||||
__asan_poison((intptr_t)((char *)t + extra_offset), tsz - extra_offset,
|
||||
kAsanHeapOverrun);
|
||||
}
|
||||
t->mapsize = tsz;
|
||||
t->names = (const unsigned *)((const char *)t + names_offset);
|
||||
t->name_base = (const char *)((const char *)t + name_base_offset);
|
||||
t->names = (unsigned *)((char *)t + names_offset);
|
||||
t->name_base = (char *)((char *)t + name_base_offset);
|
||||
GetElfVirtualAddressRange(elf, st.st_size, &t->addr_base, &t->addr_end);
|
||||
memcpy(t->name_base, name_base, m);
|
||||
--t->addr_end;
|
||||
for (j = i = 0; i < n; ++i) {
|
||||
stp = (long *)((char *)t + stp_offset);
|
||||
for (m = i = 0; i < n; ++i) {
|
||||
sym = symtab + i;
|
||||
if (!(sym->st_size > 0 && (ELF64_ST_TYPE(sym->st_info) == STT_FUNC ||
|
||||
ELF64_ST_TYPE(sym->st_info) == STT_OBJECT))) {
|
||||
|
@ -92,23 +91,25 @@ struct SymbolTable *OpenSymbolTable(const char *filename) {
|
|||
if (sym->st_value > t->addr_end) continue;
|
||||
if (sym->st_value < t->addr_base) continue;
|
||||
x = sym->st_value - t->addr_base;
|
||||
for (k = j; k && x <= t->symbols[k - 1].x; --k) {
|
||||
t->symbols[k] = t->symbols[k - 1];
|
||||
t->names[k] = t->names[k - 1];
|
||||
}
|
||||
if (k && t->symbols[k - 1].y >= x) {
|
||||
t->symbols[k - 1].y = x - 1;
|
||||
}
|
||||
t->names[k] = sym->st_name;
|
||||
t->symbols[k].x = x;
|
||||
stp[m++] = (unsigned long)x << 32 | i;
|
||||
}
|
||||
longsort(stp, m);
|
||||
for (j = i = 0; i < m; ++i) {
|
||||
sym = symtab + (stp[i] & 0x7fffffff);
|
||||
x = stp[i] >> 32;
|
||||
if (j && x == t->symbols[j - 1].x) --j;
|
||||
if (j && t->symbols[j - 1].y >= x) t->symbols[j - 1].y = x - 1;
|
||||
t->names[j] = sym->st_name;
|
||||
t->symbols[j].x = x;
|
||||
if (sym->st_size) {
|
||||
t->symbols[k].y = x + sym->st_size - 1;
|
||||
t->symbols[j].y = x + sym->st_size - 1;
|
||||
} else {
|
||||
t->symbols[k].y = t->addr_end - t->addr_base;
|
||||
t->symbols[j].y = t->addr_end - t->addr_base;
|
||||
}
|
||||
j++;
|
||||
++j;
|
||||
}
|
||||
t->count = j;
|
||||
munmap(stp, ROUNDUP(sizeof(const Elf64_Sym *) * n, FRAMESIZE));
|
||||
munmap(map, st.st_size);
|
||||
close(fd);
|
||||
return t;
|
||||
|
|
241
libc/runtime/qsort.c
Normal file
241
libc/runtime/qsort.c
Normal file
|
@ -0,0 +1,241 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||
│ Copyright (C) 2011 by Valentin Ochs │
|
||||
│ │
|
||||
│ Permission is hereby granted, free of charge, to any person obtaining a copy │
|
||||
│ of this software and associated documentation files (the "Software"), to │
|
||||
│ deal in the Software without restriction, including without limitation the │
|
||||
│ rights to use, copy, modify, merge, publish, distribute, sublicense, and/or │
|
||||
│ sell copies of the Software, and to permit persons to whom the Software is │
|
||||
│ furnished to do so, subject to the following conditions: │
|
||||
│ │
|
||||
│ The above copyright notice and this permission notice shall be included in │
|
||||
│ all copies or substantial portions of the Software. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR │
|
||||
│ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, │
|
||||
│ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE │
|
||||
│ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER │
|
||||
│ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING │
|
||||
│ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS │
|
||||
│ IN THE SOFTWARE. │
|
||||
└─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/alg/alg.h"
|
||||
#include "libc/assert.h"
|
||||
#include "libc/nexgen32e/bsf.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
asm(".ident\t\"\\n\\n\
|
||||
Smoothsort (MIT License)\\n\
|
||||
Copyright 2011 Valentin Ochs\\n\
|
||||
Discovered by Edsger Dijkstra\"");
|
||||
asm(".include \"libc/disclaimer.inc\"");
|
||||
|
||||
typedef int (*cmpfun)(const void *, const void *, void *);
|
||||
|
||||
struct SmoothSort {
|
||||
size_t lp[12 * sizeof(size_t)];
|
||||
unsigned char *ar[14 * sizeof(size_t) + 1];
|
||||
unsigned char tmp[256];
|
||||
};
|
||||
|
||||
static inline int ntz(unsigned long x) {
|
||||
return __builtin_ctzl(x);
|
||||
}
|
||||
|
||||
static inline int pntz(size_t p[2]) {
|
||||
int r = ntz(p[0] - 1);
|
||||
if (r != 0 || (r = CHAR_BIT * sizeof(size_t) + ntz(p[1])) !=
|
||||
CHAR_BIT * sizeof(size_t)) {
|
||||
return r;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* smoothsort_shl() and smoothsort_shr() need n > 0 */
|
||||
static inline void smoothsort_shl(size_t p[2], int n) {
|
||||
if (n >= CHAR_BIT * sizeof(size_t)) {
|
||||
n -= CHAR_BIT * sizeof(size_t);
|
||||
p[1] = p[0];
|
||||
p[0] = 0;
|
||||
}
|
||||
p[1] <<= n;
|
||||
p[1] |= p[0] >> (sizeof(size_t) * CHAR_BIT - n);
|
||||
p[0] <<= n;
|
||||
}
|
||||
|
||||
static inline void smoothsort_shr(size_t p[2], int n) {
|
||||
if (n >= CHAR_BIT * sizeof(size_t)) {
|
||||
n -= CHAR_BIT * sizeof(size_t);
|
||||
p[0] = p[1];
|
||||
p[1] = 0;
|
||||
}
|
||||
p[0] >>= n;
|
||||
p[0] |= p[1] << (sizeof(size_t) * CHAR_BIT - n);
|
||||
p[1] >>= n;
|
||||
}
|
||||
|
||||
static void smoothsort_cycle(struct SmoothSort *s, size_t width, int n) {
|
||||
size_t l;
|
||||
int i;
|
||||
if (n < 2) {
|
||||
return;
|
||||
}
|
||||
s->ar[n] = s->tmp;
|
||||
while (width) {
|
||||
l = sizeof(s->tmp) < width ? sizeof(s->tmp) : width;
|
||||
memcpy(s->ar[n], s->ar[0], l);
|
||||
for (i = 0; i < n; i++) {
|
||||
memcpy(s->ar[i], s->ar[i + 1], l);
|
||||
s->ar[i] += l;
|
||||
}
|
||||
width -= l;
|
||||
}
|
||||
}
|
||||
|
||||
static void smoothsort_sift(struct SmoothSort *s, unsigned char *head,
|
||||
size_t width, cmpfun cmp, void *arg, int pshift) {
|
||||
unsigned char *rt, *lf;
|
||||
int i = 1;
|
||||
s->ar[0] = head;
|
||||
while (pshift > 1) {
|
||||
rt = head - width;
|
||||
lf = head - width - s->lp[pshift - 2];
|
||||
if (cmp(s->ar[0], lf, arg) >= 0 && cmp(s->ar[0], rt, arg) >= 0) {
|
||||
break;
|
||||
}
|
||||
if (cmp(lf, rt, arg) >= 0) {
|
||||
s->ar[i++] = lf;
|
||||
head = lf;
|
||||
pshift -= 1;
|
||||
} else {
|
||||
s->ar[i++] = rt;
|
||||
head = rt;
|
||||
pshift -= 2;
|
||||
}
|
||||
}
|
||||
smoothsort_cycle(s, width, i);
|
||||
}
|
||||
|
||||
static void smoothsort_trinkle(struct SmoothSort *s, unsigned char *head,
|
||||
size_t width, cmpfun cmp, void *arg,
|
||||
size_t pp[2], int pshift, int trusty) {
|
||||
unsigned char *stepson, *rt, *lf;
|
||||
size_t p[2];
|
||||
int i = 1;
|
||||
int trail;
|
||||
p[0] = pp[0];
|
||||
p[1] = pp[1];
|
||||
s->ar[0] = head;
|
||||
while (p[0] != 1 || p[1] != 0) {
|
||||
stepson = head - s->lp[pshift];
|
||||
if (cmp(stepson, s->ar[0], arg) <= 0) {
|
||||
break;
|
||||
}
|
||||
if (!trusty && pshift > 1) {
|
||||
rt = head - width;
|
||||
lf = head - width - s->lp[pshift - 2];
|
||||
if (cmp(rt, stepson, arg) >= 0 || cmp(lf, stepson, arg) >= 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
s->ar[i++] = stepson;
|
||||
head = stepson;
|
||||
trail = pntz(p);
|
||||
smoothsort_shr(p, trail);
|
||||
pshift += trail;
|
||||
trusty = 0;
|
||||
}
|
||||
if (!trusty) {
|
||||
smoothsort_cycle(s, width, i);
|
||||
smoothsort_sift(s, head, width, cmp, arg, pshift);
|
||||
}
|
||||
}
|
||||
|
||||
static void smoothsort(struct SmoothSort *s, void *base, size_t nel,
|
||||
size_t width, cmpfun cmp, void *arg) {
|
||||
size_t i, size = width * nel;
|
||||
unsigned char *head, *high;
|
||||
size_t p[2] = {1, 0};
|
||||
int pshift = 1;
|
||||
int trail;
|
||||
if (!size) return;
|
||||
head = base;
|
||||
high = head + size - width;
|
||||
/* Precompute Leonardo numbers, scaled by element width */
|
||||
for (s->lp[0] = s->lp[1] = width, i = 2;
|
||||
(s->lp[i] = s->lp[i - 2] + s->lp[i - 1] + width) < size; i++) {
|
||||
}
|
||||
while (head < high) {
|
||||
if ((p[0] & 3) == 3) {
|
||||
smoothsort_sift(s, head, width, cmp, arg, pshift);
|
||||
smoothsort_shr(p, 2);
|
||||
pshift += 2;
|
||||
} else {
|
||||
if (s->lp[pshift - 1] >= high - head) {
|
||||
smoothsort_trinkle(s, head, width, cmp, arg, p, pshift, 0);
|
||||
} else {
|
||||
smoothsort_sift(s, head, width, cmp, arg, pshift);
|
||||
}
|
||||
if (pshift == 1) {
|
||||
smoothsort_shl(p, 1);
|
||||
pshift = 0;
|
||||
} else {
|
||||
smoothsort_shl(p, pshift - 1);
|
||||
pshift = 1;
|
||||
}
|
||||
}
|
||||
p[0] |= 1;
|
||||
head += width;
|
||||
}
|
||||
smoothsort_trinkle(s, head, width, cmp, arg, p, pshift, 0);
|
||||
while (pshift != 1 || p[0] != 1 || p[1] != 0) {
|
||||
if (pshift <= 1) {
|
||||
trail = pntz(p);
|
||||
smoothsort_shr(p, trail);
|
||||
pshift += trail;
|
||||
} else {
|
||||
smoothsort_shl(p, 2);
|
||||
pshift -= 2;
|
||||
p[0] ^= 7;
|
||||
smoothsort_shr(p, 1);
|
||||
smoothsort_trinkle(s, head - s->lp[pshift] - width, width, cmp, arg, p,
|
||||
pshift + 1, 1);
|
||||
smoothsort_shl(p, 1);
|
||||
p[0] |= 1;
|
||||
smoothsort_trinkle(s, head - width, width, cmp, arg, p, pshift, 1);
|
||||
}
|
||||
head -= width;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sorts array.
|
||||
*
|
||||
* @param base points to an array to sort in-place
|
||||
* @param count is the item count
|
||||
* @param width is the size of each item
|
||||
* @param cmp is a callback returning <0, 0, or >0
|
||||
* @param arg will optionally be passed as the third argument to cmp
|
||||
* @see qsort()
|
||||
*/
|
||||
void qsort_r(void *base, size_t count, size_t width, cmpfun cmp, void *arg) {
|
||||
struct SmoothSort s;
|
||||
smoothsort(&s, base, count, width, cmp, arg);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sorts array.
|
||||
*
|
||||
* @param base points to an array to sort in-place
|
||||
* @param count is the item count
|
||||
* @param width is the size of each item
|
||||
* @param cmp is a callback returning <0, 0, or >0
|
||||
* @see longsort(), djbsort()
|
||||
*/
|
||||
void qsort(void *base, size_t count, size_t width,
|
||||
int cmp(const void *, const void *)) {
|
||||
struct SmoothSort s;
|
||||
smoothsort(&s, base, count, width, (cmpfun)cmp, 0);
|
||||
}
|
|
@ -77,6 +77,7 @@ int vhangup(void);
|
|||
int getdtablesize(void);
|
||||
int sethostname(const char *, size_t);
|
||||
int acct(const char *);
|
||||
void longsort(long *, size_t);
|
||||
|
||||
bool _isheap(void *);
|
||||
int NtGetVersion(void);
|
||||
|
|
|
@ -88,6 +88,10 @@ o/$(MODE)/libc/runtime/mman.greg.o: \
|
|||
-ffreestanding \
|
||||
-mgeneral-regs-only
|
||||
|
||||
o/$(MODE)/libc/runtime/qsort.o: \
|
||||
OVERRIDE_CFLAGS += \
|
||||
-Og
|
||||
|
||||
o/$(MODE)/libc/runtime/ftrace.greg.o: \
|
||||
OVERRIDE_CFLAGS += \
|
||||
-mgeneral-regs-only
|
||||
|
|
|
@ -24,7 +24,7 @@ struct SymbolTable {
|
|||
struct SymbolTable *GetSymbolTable(void);
|
||||
const char *FindComBinary(void);
|
||||
const char *FindDebugBinary(void);
|
||||
struct SymbolTable *OpenSymbolTable(const char *) nodiscard;
|
||||
struct SymbolTable *OpenSymbolTable(const char *);
|
||||
int CloseSymbolTable(struct SymbolTable **);
|
||||
void __hook(void *, struct SymbolTable *);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue