Make fixes, improvements, and chibicc python bindings

- python now mixes audio 10x faster
- python octal notation is restored
- chibicc now builds code 3x faster
- chibicc now has help documentation
- chibicc can now generate basic python bindings
- linenoise now supports some paredit-like features

See #141
This commit is contained in:
Justine Tunney 2021-10-08 08:11:51 -07:00
parent 28997f3acb
commit 7061c79c22
121 changed files with 5272 additions and 1928 deletions

View file

@ -17,18 +17,55 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/alg/alg.h"
#include "libc/dce.h"
#include "libc/intrin/asan.internal.h"
#include "libc/nexgen32e/bsr.h"
#include "libc/nexgen32e/nexgen32e.h"
#include "libc/nexgen32e/x86feature.h"
void djbsort_avx2(int32_t *, long);
static noinline void intsort(int *x, size_t n, size_t t) {
int a, b, c;
size_t i, p, q;
for (p = t; p > 0; p >>= 1) {
for (i = 0; i < n - p; ++i) {
if (!(i & p)) {
a = x[i + 0];
b = x[i + p];
if (a > b) c = a, a = b, b = c;
x[i + 0] = a;
x[i + p] = b;
}
}
for (q = t; q > p; q >>= 1) {
for (i = 0; i < n - q; ++i) {
if (!(i & p)) {
a = x[i + p];
b = x[i + q];
if (a > b) c = a, a = b, b = c;
x[i + p] = a;
x[i + q] = b;
}
}
}
}
}
/**
* D.J. Bernstein's outrageously fast integer sorting algorithm.
*/
void djbsort(int32_t *a, size_t n) {
if (X86_HAVE(AVX2)) {
djbsort_avx2(a, n);
} else {
insertionsort(a, n);
size_t m;
if (IsAsan()) {
if (__builtin_mul_overflow(n, 4, &m)) m = -1;
__asan_check(a, m);
}
if (n > 1) {
if (X86_HAVE(AVX2)) {
djbsort_avx2(a, n);
} else {
intsort(a, n, 1ul << bsrl(n - 1));
}
}
}

View file

@ -390,14 +390,14 @@ static const unsigned kAstralCodes[][2] = {
* other things like blocks and emoji (So).
*/
int iswseparator(wint_t c) {
int m, l, r;
int m, l, r, n;
if (c < 0200) {
return !(('0' <= c && c <= '9') || ('A' <= c && c <= 'Z') ||
('a' <= c && c <= 'z'));
}
if (c <= 0xffff) {
l = 0;
r = sizeof(kCodes) / sizeof(kCodes[0]);
r = n = sizeof(kCodes) / sizeof(kCodes[0]);
while (l < r) {
m = (l + r) >> 1;
if (kCodes[m][1] < c) {
@ -406,10 +406,10 @@ int iswseparator(wint_t c) {
r = m;
}
}
return !(kCodes[l][0] <= c && c <= kCodes[l][1]);
return !(l < n && kCodes[l][0] <= c && c <= kCodes[l][1]);
} else {
l = 0;
r = sizeof(kAstralCodes) / sizeof(kAstralCodes[0]);
r = n = sizeof(kAstralCodes) / sizeof(kAstralCodes[0]);
while (l < r) {
m = (l + r) >> 1;
if (kAstralCodes[m][1] < c) {
@ -418,6 +418,6 @@ int iswseparator(wint_t c) {
r = m;
}
}
return !(kAstralCodes[l][0] <= c && c <= kAstralCodes[l][1]);
return !(l < n && kAstralCodes[l][0] <= c && c <= kAstralCodes[l][1]);
}
}

38
libc/str/khextoint.c Normal file
View file

@ -0,0 +1,38 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/str.h"
const signed char kHexToInt[256] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x00
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x10
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x20
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 0x30
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x40
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x50
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x60
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x70
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x80
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x90
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0xa0
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0xb0
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0xc0
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0xd0
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0xe0
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0xf0
};

79
libc/str/longsort.c Normal file
View file

@ -0,0 +1,79 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/dce.h"
#include "libc/intrin/asan.internal.h"
#include "libc/macros.internal.h"
#include "libc/nexgen32e/bsr.h"
#include "libc/nexgen32e/x86feature.h"
#include "libc/runtime/runtime.h"
forceinline void longsorter(long *x, size_t n, size_t t) {
long a, b, c;
size_t i, p, q;
for (p = t; p > 0; p >>= 1) {
for (i = 0; i < n - p; ++i) {
if (!(i & p)) {
a = x[i + 0];
b = x[i + p];
if (a > b) c = a, a = b, b = c;
x[i + 0] = a;
x[i + p] = b;
}
}
for (q = t; q > p; q >>= 1) {
for (i = 0; i < n - q; ++i) {
if (!(i & p)) {
a = x[i + p];
b = x[i + q];
if (a > b) c = a, a = b, b = c;
x[i + p] = a;
x[i + q] = b;
}
}
}
}
}
static microarchitecture("avx2") optimizespeed noasan
void longsort_avx2(long *x, size_t n, size_t t) {
longsorter(x, n, t);
}
static optimizesize noasan void longsort_pure(long *x, size_t n, size_t t) {
longsorter(x, n, t);
}
/**
* Sorting algorithm for longs that doesn't take long.
*/
void longsort(long *x, size_t n) {
size_t t, m;
if (IsAsan()) {
if (__builtin_mul_overflow(n, sizeof(long), &m)) m = -1;
__asan_check(x, m);
}
if (n > 1) {
t = 1ul << bsrl(n - 1);
if (X86_HAVE(AVX2)) {
longsort_avx2(x, n, t);
} else {
longsort_pure(x, n, t);
}
}
}

View file

@ -54,38 +54,12 @@ static inline noasan uint64_t UncheckedAlignedRead64(unsigned char *p) {
*/
void *memccpy(void *dst, const void *src, int c, size_t n) {
size_t i;
uint64_t v, w;
unsigned char *d, *q;
unsigned char *d;
const unsigned char *s;
i = 0;
d = dst;
s = src;
c &= 255;
v = 0x0101010101010101ul * c;
for (; (uintptr_t)(s + i) & 7; ++i) {
if (i == n) return NULL;
if ((d[i] = s[i]) == c) return d + i + 1;
}
for (; i + 8 <= n; i += 8) {
w = UncheckedAlignedRead64(s + i);
if (~(w ^ v) & ((w ^ v) - 0x0101010101010101) & 0x8080808080808080) {
break;
} else {
q = d + i;
q[0] = (w & 0x00000000000000ff) >> 000;
q[1] = (w & 0x000000000000ff00) >> 010;
q[2] = (w & 0x0000000000ff0000) >> 020;
q[3] = (w & 0x00000000ff000000) >> 030;
q[4] = (w & 0x000000ff00000000) >> 040;
q[5] = (w & 0x0000ff0000000000) >> 050;
q[6] = (w & 0x00ff000000000000) >> 060;
q[7] = (w & 0xff00000000000000) >> 070;
}
}
for (; i < n; ++i) {
if ((d[i] = s[i]) == c) {
for (d = dst, s = src, i = 0; i < n; ++i) {
if ((d[i] = s[i]) == (c & 255)) {
return d + i + 1;
}
}
return NULL;
return 0;
}

View file

@ -16,35 +16,64 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/dce.h"
#include "libc/intrin/asan.internal.h"
#include "libc/nexgen32e/x86feature.h"
#include "libc/str/str.h"
typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(1)));
static inline const unsigned char *memchr_pure(const unsigned char *s,
unsigned char c, size_t n) {
size_t i;
for (i = 0; i < n; ++i) {
if (s[i] == c) {
return s + i;
}
}
return 0;
}
noasan static inline const unsigned char *memchr_sse(const unsigned char *s,
unsigned char c,
size_t n) {
size_t i;
unsigned k;
unsigned m;
xmm_t v, *p;
xmm_t t = {c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c};
for (; n >= 16; n -= 16, s += 16) {
v = *(const xmm_t *)s;
m = __builtin_ia32_pmovmskb128(v == t);
if (m) {
m = __builtin_ctzll(m);
return s + m;
}
}
for (i = 0; i < n; ++i) {
if (s[i] == c) {
return s + i;
}
}
return 0;
}
/**
* Returns pointer to first instance of character.
*
* @param m is memory to search
* @param s is memory to search
* @param c is search byte which is masked with 255
* @param n is byte length of p
* @return is pointer to first instance of c or NULL if not found
* @asyncsignalsafe
*/
void *memchr(const void *m, int c, size_t n) {
uint64_t v, w;
const char *p, *pe;
c &= 255;
v = 0x0101010101010101ul * c;
for (p = m, pe = p + n; p + 8 <= pe; p += 8) {
w = (uint64_t)(255 & p[7]) << 070 | (uint64_t)(255 & p[6]) << 060 |
(uint64_t)(255 & p[5]) << 050 | (uint64_t)(255 & p[4]) << 040 |
(uint64_t)(255 & p[3]) << 030 | (uint64_t)(255 & p[2]) << 020 |
(uint64_t)(255 & p[1]) << 010 | (uint64_t)(255 & p[0]) << 000;
if ((w = ~(w ^ v) & ((w ^ v) - 0x0101010101010101) & 0x8080808080808080)) {
return p + ((unsigned)__builtin_ctzll(w) >> 3);
}
void *memchr(const void *s, int c, size_t n) {
const void *r;
if (X86_HAVE(SSE)) {
if (IsAsan()) __asan_check(s, n);
r = memchr_sse(s, c, n);
} else {
r = memchr_pure(s, c, n);
}
for (; p < pe; ++p) {
if ((*p & 255) == c) {
return p;
}
}
return NULL;
return (void *)r;
}

241
libc/str/qsort.c Normal file
View file

@ -0,0 +1,241 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright (C) 2011 by Valentin Ochs
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.
*/
#include "libc/alg/alg.h"
#include "libc/assert.h"
#include "libc/nexgen32e/bsf.h"
#include "libc/str/str.h"
asm(".ident\t\"\\n\\n\
Smoothsort (MIT License)\\n\
Copyright 2011 Valentin Ochs\\n\
Discovered by Edsger Dijkstra\"");
asm(".include \"libc/disclaimer.inc\"");
typedef int (*cmpfun)(const void *, const void *, void *);
struct SmoothSort {
size_t lp[12 * sizeof(size_t)];
unsigned char *ar[14 * sizeof(size_t) + 1];
unsigned char tmp[256];
};
static inline int ntz(unsigned long x) {
return __builtin_ctzl(x);
}
static inline int pntz(size_t p[2]) {
int r = ntz(p[0] - 1);
if (r != 0 || (r = CHAR_BIT * sizeof(size_t) + ntz(p[1])) !=
CHAR_BIT * sizeof(size_t)) {
return r;
}
return 0;
}
/* smoothsort_shl() and smoothsort_shr() need n > 0 */
static inline void smoothsort_shl(size_t p[2], int n) {
if (n >= CHAR_BIT * sizeof(size_t)) {
n -= CHAR_BIT * sizeof(size_t);
p[1] = p[0];
p[0] = 0;
}
p[1] <<= n;
p[1] |= p[0] >> (sizeof(size_t) * CHAR_BIT - n);
p[0] <<= n;
}
static inline void smoothsort_shr(size_t p[2], int n) {
if (n >= CHAR_BIT * sizeof(size_t)) {
n -= CHAR_BIT * sizeof(size_t);
p[0] = p[1];
p[1] = 0;
}
p[0] >>= n;
p[0] |= p[1] << (sizeof(size_t) * CHAR_BIT - n);
p[1] >>= n;
}
static void smoothsort_cycle(struct SmoothSort *s, size_t width, int n) {
size_t l;
int i;
if (n < 2) {
return;
}
s->ar[n] = s->tmp;
while (width) {
l = sizeof(s->tmp) < width ? sizeof(s->tmp) : width;
memcpy(s->ar[n], s->ar[0], l);
for (i = 0; i < n; i++) {
memcpy(s->ar[i], s->ar[i + 1], l);
s->ar[i] += l;
}
width -= l;
}
}
static void smoothsort_sift(struct SmoothSort *s, unsigned char *head,
size_t width, cmpfun cmp, void *arg, int pshift) {
unsigned char *rt, *lf;
int i = 1;
s->ar[0] = head;
while (pshift > 1) {
rt = head - width;
lf = head - width - s->lp[pshift - 2];
if (cmp(s->ar[0], lf, arg) >= 0 && cmp(s->ar[0], rt, arg) >= 0) {
break;
}
if (cmp(lf, rt, arg) >= 0) {
s->ar[i++] = lf;
head = lf;
pshift -= 1;
} else {
s->ar[i++] = rt;
head = rt;
pshift -= 2;
}
}
smoothsort_cycle(s, width, i);
}
static void smoothsort_trinkle(struct SmoothSort *s, unsigned char *head,
size_t width, cmpfun cmp, void *arg,
size_t pp[2], int pshift, int trusty) {
unsigned char *stepson, *rt, *lf;
size_t p[2];
int i = 1;
int trail;
p[0] = pp[0];
p[1] = pp[1];
s->ar[0] = head;
while (p[0] != 1 || p[1] != 0) {
stepson = head - s->lp[pshift];
if (cmp(stepson, s->ar[0], arg) <= 0) {
break;
}
if (!trusty && pshift > 1) {
rt = head - width;
lf = head - width - s->lp[pshift - 2];
if (cmp(rt, stepson, arg) >= 0 || cmp(lf, stepson, arg) >= 0) {
break;
}
}
s->ar[i++] = stepson;
head = stepson;
trail = pntz(p);
smoothsort_shr(p, trail);
pshift += trail;
trusty = 0;
}
if (!trusty) {
smoothsort_cycle(s, width, i);
smoothsort_sift(s, head, width, cmp, arg, pshift);
}
}
static void smoothsort(struct SmoothSort *s, void *base, size_t nel,
size_t width, cmpfun cmp, void *arg) {
size_t i, size = width * nel;
unsigned char *head, *high;
size_t p[2] = {1, 0};
int pshift = 1;
int trail;
if (!size) return;
head = base;
high = head + size - width;
/* Precompute Leonardo numbers, scaled by element width */
for (s->lp[0] = s->lp[1] = width, i = 2;
(s->lp[i] = s->lp[i - 2] + s->lp[i - 1] + width) < size; i++) {
}
while (head < high) {
if ((p[0] & 3) == 3) {
smoothsort_sift(s, head, width, cmp, arg, pshift);
smoothsort_shr(p, 2);
pshift += 2;
} else {
if (s->lp[pshift - 1] >= high - head) {
smoothsort_trinkle(s, head, width, cmp, arg, p, pshift, 0);
} else {
smoothsort_sift(s, head, width, cmp, arg, pshift);
}
if (pshift == 1) {
smoothsort_shl(p, 1);
pshift = 0;
} else {
smoothsort_shl(p, pshift - 1);
pshift = 1;
}
}
p[0] |= 1;
head += width;
}
smoothsort_trinkle(s, head, width, cmp, arg, p, pshift, 0);
while (pshift != 1 || p[0] != 1 || p[1] != 0) {
if (pshift <= 1) {
trail = pntz(p);
smoothsort_shr(p, trail);
pshift += trail;
} else {
smoothsort_shl(p, 2);
pshift -= 2;
p[0] ^= 7;
smoothsort_shr(p, 1);
smoothsort_trinkle(s, head - s->lp[pshift] - width, width, cmp, arg, p,
pshift + 1, 1);
smoothsort_shl(p, 1);
p[0] |= 1;
smoothsort_trinkle(s, head - width, width, cmp, arg, p, pshift, 1);
}
head -= width;
}
}
/**
* Sorts array.
*
* @param base points to an array to sort in-place
* @param count is the item count
* @param width is the size of each item
* @param cmp is a callback returning <0, 0, or >0
* @param arg will optionally be passed as the third argument to cmp
* @see qsort()
*/
void qsort_r(void *base, size_t count, size_t width, cmpfun cmp, void *arg) {
struct SmoothSort s;
smoothsort(&s, base, count, width, cmp, arg);
}
/**
* Sorts array.
*
* @param base points to an array to sort in-place
* @param count is the item count
* @param width is the size of each item
* @param cmp is a callback returning <0, 0, or >0
* @see longsort(), djbsort()
*/
void qsort(void *base, size_t count, size_t width,
int cmp(const void *, const void *)) {
struct SmoothSort s;
smoothsort(&s, base, count, width, (cmpfun)cmp, 0);
}

View file

@ -17,12 +17,39 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/dce.h"
#include "libc/intrin/asan.internal.h"
#include "libc/nexgen32e/x86feature.h"
#include "libc/str/str.h"
static inline noasan uint64_t UncheckedAlignedRead64(unsigned char *p) {
return (uint64_t)p[7] << 070 | (uint64_t)p[6] << 060 | (uint64_t)p[5] << 050 |
(uint64_t)p[4] << 040 | (uint64_t)p[3] << 030 | (uint64_t)p[2] << 020 |
(uint64_t)p[1] << 010 | (uint64_t)p[0] << 000;
typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
static inline const unsigned char *rawmemchr_pure(const unsigned char *s,
unsigned char c) {
for (;; ++s) {
if (*s == c) {
return s;
}
}
}
noasan static inline const char *rawmemchr_sse(const char *s, unsigned char c) {
unsigned k;
unsigned m;
xmm_t v, *p;
xmm_t n = {c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c};
k = (uintptr_t)s & 15;
p = (const xmm_t *)((uintptr_t)s & -16);
v = *p;
m = __builtin_ia32_pmovmskb128(v == n);
m >>= k;
m <<= k;
while (!m) {
v = *++p;
m = __builtin_ia32_pmovmskb128(v == n);
}
m = __builtin_ctzll(m);
return (const char *)p + m;
}
/**
@ -32,22 +59,13 @@ static inline noasan uint64_t UncheckedAlignedRead64(unsigned char *p) {
* @param c is search byte which is masked with 255
* @return is pointer to first instance of c
*/
void *rawmemchr(const void *m, int c) {
uint64_t v, w;
const unsigned char *s;
s = m;
c &= 255;
v = 0x0101010101010101ul * c;
for (; (uintptr_t)s & 7; ++s) {
if (*s == c) return s;
void *rawmemchr(const void *s, int c) {
const void *r;
if (X86_HAVE(SSE)) {
if (IsAsan()) __asan_check(s, 1);
r = rawmemchr_sse(s, c);
} else {
r = rawmemchr_pure(s, c);
}
for (;; s += 8) {
w = UncheckedAlignedRead64(s);
if ((w = ~(w ^ v) & ((w ^ v) - 0x0101010101010101) & 0x8080808080808080)) {
s += (unsigned)__builtin_ctzll(w) >> 3;
break;
}
}
assert(*s == c);
return s;
return (void *)r;
}

View file

@ -7,6 +7,7 @@ COSMOPOLITAN_C_START_
fourth age telecommunications */
extern const int8_t kHexToInt[256];
extern const uint8_t gperf_downcase[256];
extern const uint8_t kToLower[256];
extern const uint8_t kToUpper[256];

View file

@ -17,37 +17,42 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/dce.h"
#include "libc/intrin/asan.internal.h"
#include "libc/nexgen32e/x86feature.h"
#include "libc/str/str.h"
static noasan inline const char *strchr_x64(const char *p, uint64_t c) {
unsigned a, b;
uint64_t w, x, y;
for (c *= 0x0101010101010101;; p += 8) {
w = (uint64_t)(255 & p[7]) << 070 | (uint64_t)(255 & p[6]) << 060 |
(uint64_t)(255 & p[5]) << 050 | (uint64_t)(255 & p[4]) << 040 |
(uint64_t)(255 & p[3]) << 030 | (uint64_t)(255 & p[2]) << 020 |
(uint64_t)(255 & p[1]) << 010 | (uint64_t)(255 & p[0]) << 000;
if ((x = ~(w ^ c) & ((w ^ c) - 0x0101010101010101) & 0x8080808080808080) |
(y = ~w & (w - 0x0101010101010101) & 0x8080808080808080)) {
if (x) {
a = __builtin_ctzll(x);
if (y) {
b = __builtin_ctzll(y);
if (a <= b) {
return p + (a >> 3);
} else {
return 0;
}
} else {
return p + (a >> 3);
}
} else {
return 0;
}
}
typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
static inline const char *strchr_pure(const char *s, int c) {
for (;; ++s) {
if ((*s & 255) == (c & 255)) return s;
if (!*s) return 0;
}
}
noasan static inline const char *strchr_sse(const char *s, unsigned char c) {
unsigned k;
unsigned m;
xmm_t v, *p;
xmm_t z = {0};
xmm_t n = {c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c};
k = (uintptr_t)s & 15;
p = (const xmm_t *)((uintptr_t)s & -16);
v = *p;
m = __builtin_ia32_pmovmskb128((v == z) | (v == n));
m >>= k;
m <<= k;
while (!m) {
v = *++p;
m = __builtin_ia32_pmovmskb128((v == z) | (v == n));
}
m = __builtin_ctzl(m);
s = (const char *)p + m;
if (c && !*s) s = 0;
return s;
}
/**
* Returns pointer to first instance of character.
*
@ -58,12 +63,13 @@ static noasan inline const char *strchr_x64(const char *p, uint64_t c) {
* @asyncsignalsafe
*/
char *strchr(const char *s, int c) {
char *r;
for (c &= 255; (uintptr_t)s & 7; ++s) {
if ((*s & 255) == c) return s;
if (!*s) return NULL;
const char *r;
if (X86_HAVE(SSE)) {
if (IsAsan()) __asan_check(s, 1);
r = strchr_sse(s, c);
} else {
r = strchr_pure(s, c);
}
r = strchr_x64(s, c);
assert(!r || *r || !c);
return r;
assert(!r || *r || !(c & 255));
return (char *)r;
}

View file

@ -17,38 +17,39 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/dce.h"
#include "libc/intrin/asan.internal.h"
#include "libc/nexgen32e/x86feature.h"
#include "libc/str/str.h"
noasan static const char *strchrnul_x64(const char *p, uint64_t c) {
unsigned a, b;
uint64_t w, x, y;
for (c *= 0x0101010101010101;; p += 8) {
w = (uint64_t)(255 & p[7]) << 070 | (uint64_t)(255 & p[6]) << 060 |
(uint64_t)(255 & p[5]) << 050 | (uint64_t)(255 & p[4]) << 040 |
(uint64_t)(255 & p[3]) << 030 | (uint64_t)(255 & p[2]) << 020 |
(uint64_t)(255 & p[1]) << 010 | (uint64_t)(255 & p[0]) << 000;
if ((x = ~(w ^ c) & ((w ^ c) - 0x0101010101010101) & 0x8080808080808080) |
(y = ~w & (w - 0x0101010101010101) & 0x8080808080808080)) {
if (x) {
a = __builtin_ctzll(x);
if (y) {
b = __builtin_ctzll(y);
if (a <= b) {
return p + (a >> 3);
} else {
return p + (b >> 3);
}
} else {
return p + (a >> 3);
}
} else {
b = __builtin_ctzll(y);
return p + (b >> 3);
}
}
typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
static inline const char *strchrnul_pure(const char *s, int c) {
for (;; ++s) {
if ((*s & 255) == (c & 255)) return s;
if (!*s) return s;
}
}
noasan static inline const char *strchrnul_sse(const char *s, unsigned char c) {
unsigned k;
unsigned m;
xmm_t v, *p;
xmm_t z = {0};
xmm_t n = {c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c};
k = (uintptr_t)s & 15;
p = (const xmm_t *)((uintptr_t)s & -16);
v = *p;
m = __builtin_ia32_pmovmskb128((v == z) | (v == n));
m >>= k;
m <<= k;
while (!m) {
v = *++p;
m = __builtin_ia32_pmovmskb128((v == z) | (v == n));
}
return (const char *)p + __builtin_ctzl(m);
}
/**
* Returns pointer to first instance of character.
*
@ -58,12 +59,13 @@ noasan static const char *strchrnul_x64(const char *p, uint64_t c) {
* NUL terminator if c is not found
*/
char *strchrnul(const char *s, int c) {
char *r;
for (c &= 255; (uintptr_t)s & 7; ++s) {
if ((*s & 0xff) == c) return s;
if (!*s) return s;
const char *r;
if (X86_HAVE(SSE)) {
if (IsAsan()) __asan_check(s, 1);
r = strchrnul_sse(s, c);
} else {
r = strchrnul_pure(s, c);
}
r = strchrnul_x64(s, c);
assert((*r & 255) == c || !*r);
return r;
assert((*r & 255) == (c & 255) || !*r);
return (char *)r;
}

View file

@ -34,9 +34,11 @@ static inline noasan uint64_t UncheckedAlignedRead64(const char *p) {
* @asyncsignalsafe
*/
int strcmp(const char *a, const char *b) {
int c;
size_t i = 0;
uint64_t v, w, d;
if (a == b) return 0;
if ((c = (*a & 255) - (*b & 255))) return c;
if (((uintptr_t)a & 7) == ((uintptr_t)b & 7)) {
for (; (uintptr_t)(a + i) & 7; ++i) {
if (a[i] != b[i] || !b[i]) {

View file

@ -16,7 +16,6 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/nexgen32e/hascharacter.internal.h"
#include "libc/str/str.h"
/**
@ -24,17 +23,21 @@
* @asyncsignalsafe
*/
char *strpbrk(const char *s, const char *accept) {
size_t i;
bool lut[256];
if (accept[0]) {
if (!accept[1]) {
return strchr(s, accept[0]);
} else {
for (i = 0; s[i]; ++i) {
if (HasCharacter(s[i], accept)) {
return (/*unconst*/ char *)&s[i];
memset(lut, 0, sizeof(lut));
while (*accept) {
lut[*accept++ & 255] = true;
}
for (; *s; ++s) {
if (lut[*s & 255]) {
return (/*unconst*/ char *)s;
}
}
}
}
return NULL;
return 0;
}

View file

@ -49,14 +49,12 @@ noasan char *strstr(const char *haystack, const char *needle) {
k = (uintptr_t)haystack & 15;
p = (const xmm_t *)((uintptr_t)haystack & -16);
v = *p;
m = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128(v, z) |
__builtin_ia32_pcmpeqb128(v, n));
m = __builtin_ia32_pmovmskb128((v == z) | (v == n));
m >>= k;
m <<= k;
while (!m) {
v = *++p;
m = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128(v, z) |
__builtin_ia32_pcmpeqb128(v, n));
m = __builtin_ia32_pmovmskb128((v == z) | (v == n));
}
haystack = (const char *)p + __builtin_ctzl(m);
for (i = 0;; ++i) {

View file

@ -18,14 +18,15 @@
*/
#include "libc/bits/likely.h"
#include "libc/dce.h"
#include "libc/intrin/asan.internal.h"
#include "libc/nexgen32e/x86feature.h"
#include "libc/str/str.h"
typedef uint64_t xmm_t __attribute__((__vector_size__(16), __aligned__(1)));
static noinline antiquity unsigned timingsafe_bcmp_sse(const char *p,
const char *q,
size_t n) {
noasan static noinline antiquity unsigned timingsafe_bcmp_sse(const char *p,
const char *q,
size_t n) {
uint64_t w;
xmm_t a = {0};
while (n > 16 + 16) {
@ -40,9 +41,9 @@ static noinline antiquity unsigned timingsafe_bcmp_sse(const char *p,
return w | w >> 32;
}
microarchitecture("avx") static int timingsafe_bcmp_avx(const char *p,
const char *q,
size_t n) {
noasan static microarchitecture("avx") int timingsafe_bcmp_avx(const char *p,
const char *q,
size_t n) {
uint64_t w;
xmm_t a = {0};
if (n > 32) {
@ -134,10 +135,16 @@ int timingsafe_bcmp(const void *a, const void *b, size_t n) {
__builtin_memcpy(&w3, q + n - 8, 8);
w = (w0 ^ w1) | (w2 ^ w3);
return w | w >> 32;
} else if (X86_HAVE(AVX)) {
return timingsafe_bcmp_avx(p, q, n);
} else {
return timingsafe_bcmp_sse(p, q, n);
if (IsAsan()) {
__asan_check(a, n);
__asan_check(b, n);
}
if (X86_HAVE(AVX)) {
return timingsafe_bcmp_avx(p, q, n);
} else {
return timingsafe_bcmp_sse(p, q, n);
}
}
} else if (n >= 4) {
__builtin_memcpy(&u0, p, 4);

View file

@ -177,7 +177,7 @@ static const int kAstralLower[][3] = {
* Converts wide character to lower case.
*/
wint_t towlower(wint_t c) {
int m, l, r;
int m, l, r, n;
if (c < 0200) {
if ('A' <= c && c <= 'Z') {
return c + 32;
@ -199,7 +199,7 @@ wint_t towlower(wint_t c) {
return c + 38864; /* 80x ..Ꮿ → ꭰ ..ꮿ Cherokee */
} else {
l = 0;
r = sizeof(kLower) / sizeof(kLower[0]);
r = n = sizeof(kLower) / sizeof(kLower[0]);
while (l < r) {
m = (l + r) >> 1;
if (kLower[m].y < c) {
@ -208,7 +208,7 @@ wint_t towlower(wint_t c) {
r = m;
}
}
if (kLower[l].x <= c && c <= kLower[l].y) {
if (l < n && kLower[l].x <= c && c <= kLower[l].y) {
return c + kLower[l].d;
} else {
return c;
@ -216,7 +216,7 @@ wint_t towlower(wint_t c) {
}
} else {
l = 0;
r = sizeof(kAstralLower) / sizeof(kAstralLower[0]);
r = n = sizeof(kAstralLower) / sizeof(kAstralLower[0]);
while (l < r) {
m = (l + r) >> 1;
if (kAstralLower[m][1] < c) {
@ -225,7 +225,7 @@ wint_t towlower(wint_t c) {
r = m;
}
}
if (kAstralLower[l][0] <= c && c <= kAstralLower[l][1]) {
if (l < n && kAstralLower[l][0] <= c && c <= kAstralLower[l][1]) {
return c + kAstralLower[l][2];
} else {
return c;

View file

@ -140,7 +140,7 @@ static const int kAstralUpper[][3] = {
* Converts wide character to upper case.
*/
wint_t towupper(wint_t c) {
int m, l, r;
int m, l, r, n;
if (c < 0200) {
if ('a' <= c && c <= 'z') {
return c - 32;
@ -162,7 +162,7 @@ wint_t towupper(wint_t c) {
return c - 38864; /* 80x ꭰ ..ꮿ → ..Ꮿ Cherokee Supplement */
} else {
l = 0;
r = sizeof(kUpper) / sizeof(kUpper[0]);
r = n = sizeof(kUpper) / sizeof(kUpper[0]);
while (l < r) {
m = (l + r) >> 1;
if (kUpper[m].y < c) {
@ -171,7 +171,7 @@ wint_t towupper(wint_t c) {
r = m;
}
}
if (kUpper[l].x <= c && c <= kUpper[l].y) {
if (l < n && kUpper[l].x <= c && c <= kUpper[l].y) {
return c + kUpper[l].d;
} else {
return c;
@ -179,7 +179,7 @@ wint_t towupper(wint_t c) {
}
} else {
l = 0;
r = sizeof(kAstralUpper) / sizeof(kAstralUpper[0]);
r = n = sizeof(kAstralUpper) / sizeof(kAstralUpper[0]);
while (l < r) {
m = (l + r) >> 1;
if (kAstralUpper[m][1] < c) {
@ -188,7 +188,7 @@ wint_t towupper(wint_t c) {
r = m;
}
}
if (kAstralUpper[l][0] <= c && c <= kAstralUpper[l][1]) {
if (l < n && kAstralUpper[l][0] <= c && c <= kAstralUpper[l][1]) {
return c + kAstralUpper[l][2];
} else {
return c;