Make numerous improvements

- Python static hello world now 1.8mb
- Python static fully loaded now 10mb
- Python HTTPS client now uses MbedTLS
- Python REPL now completes import stmts
- Increase stack size for Python for now
- Begin synthesizing posixpath and ntpath
- Restore Python \N{UNICODE NAME} support
- Restore Python NFKD symbol normalization
- Add optimized code path for Intel SHA-NI
- Get more Python unit tests passing faster
- Get Python help() pagination working on NT
- Python hashlib now supports MbedTLS PBKDF2
- Make memcpy/memmove/memcmp/bcmp/etc. faster
- Add Mersenne Twister and Vigna to LIBC_RAND
- Provide privileged __printf() for error code
- Fix zipos opendir() so that it reports ENOTDIR
- Add basic chmod() implementation for Windows NT
- Add Cosmo's best functions to Python cosmo module
- Pin function trace indent depth to that of caller
- Show memory diagram on invalid access in MODE=dbg
- Differentiate stack overflow on crash in MODE=dbg
- Add stb_truetype and tools for analyzing font files
- Upgrade to UNICODE 13 and reduce its binary footprint
- COMPILE.COM now logs resource usage of build commands
- Start implementing basic poll() support on bare metal
- Set getauxval(AT_EXECFN) to GetModuleFileName() on NT
- Add descriptions to strerror() in non-TINY build modes
- Add COUNTBRANCH() macro to help with micro-optimizations
- Make error / backtrace / asan / memory code more unbreakable
- Add fast perfect C implementation of μ-Law and a-Law audio codecs
- Make strtol() functions consistent with other libc implementations
- Improve Linenoise implementation (see also github.com/jart/bestline)
- COMPILE.COM now suppresses stdout/stderr of successful build commands
This commit is contained in:
Justine Tunney 2021-09-27 22:58:51 -07:00
parent fa7b4f5bd1
commit 39bf41f4eb
806 changed files with 77494 additions and 63859 deletions

View file

@ -24,6 +24,7 @@
#include "libc/calls/struct/iovec.h"
#include "libc/dce.h"
#include "libc/intrin/asan.internal.h"
#include "libc/log/libfatal.internal.h"
#include "libc/log/log.h"
#include "libc/macros.internal.h"
#include "libc/mem/hook/hook.internal.h"
@ -32,12 +33,16 @@
#include "libc/runtime/directmap.internal.h"
#include "libc/runtime/memtrack.internal.h"
#include "libc/runtime/runtime.h"
#include "libc/str/str.h"
#include "libc/str/tpenc.h"
#include "libc/sysv/consts/auxv.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/nr.h"
#include "libc/sysv/consts/prot.h"
#include "third_party/dlmalloc/dlmalloc.internal.h"
#define COOKIE 21578
STATIC_YOINK("_init_asan");
/**
@ -85,6 +90,8 @@ STATIC_YOINK("_init_asan");
} \
} while (0)
typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(1)));
struct AsanSourceLocation {
const char *filename;
int line;
@ -129,6 +136,28 @@ static uint64_t __asan_rounddown2pow(uint64_t x) {
return x ? 1ull << __asan_bsrl(x) : 0;
}
static uint64_t __asan_encodeutf8(unsigned c) {
static const unsigned short kTpEnc[32 - 7] = {
1 | 0300 << 8, 1 | 0300 << 8, 1 | 0300 << 8, 1 | 0300 << 8, 2 | 0340 << 8,
2 | 0340 << 8, 2 | 0340 << 8, 2 | 0340 << 8, 2 | 0340 << 8, 3 | 0360 << 8,
3 | 0360 << 8, 3 | 0360 << 8, 3 | 0360 << 8, 3 | 0360 << 8, 4 | 0370 << 8,
4 | 0370 << 8, 4 | 0370 << 8, 4 | 0370 << 8, 4 | 0370 << 8, 5 | 0374 << 8,
5 | 0374 << 8, 5 | 0374 << 8, 5 | 0374 << 8, 5 | 0374 << 8, 5 | 0374 << 8,
};
int e, n;
unsigned long long w;
if (c < 0200) return c;
e = kTpEnc[__asan_bsrl(c) - 7];
n = e & 0xff;
w = 0;
do {
w |= 0200 | (c & 077);
w <<= 8;
c >>= 6;
} while (--n);
return c | w | e >> 8;
}
static size_t __asan_strlen(const char *s) {
size_t n = 0;
while (*s++) ++n;
@ -299,20 +328,68 @@ static char *__asan_hexcpy(char *p, uint64_t x, uint8_t k) {
return p;
}
static char *__asan_uint2str(char *p, uint64_t i) {
int j = 0;
static char *__asan_uintcpy(char p[hasatleast 21], uint64_t x) {
char t;
size_t i, a, b;
i = 0;
do {
p[j++] = i % 10 + '0';
i /= 10;
} while (i > 0);
reverse(p, j);
return p + j;
p[i++] = x % 10 + '0';
x = x / 10;
} while (x > 0);
p[i] = '\0';
if (i) {
for (a = 0, b = i - 1; a < b; ++a, --b) {
t = p[a];
p[a] = p[b];
p[b] = t;
}
}
return p + i;
}
static char *__asan_intcpy(char *p, int64_t i) {
if (i >= 0) return __asan_uint2str(p, i);
*p++ = '-';
return __asan_uint2str(p, -i);
static char *__asan_intcpy(char p[hasatleast 21], int64_t x) {
if (x < 0) *p++ = '-', x = -(uint64_t)x;
return __asan_uintcpy(p, x);
}
privileged noinline wontreturn void __asan_exit(int rc) {
if (!IsWindows()) {
asm volatile("syscall"
: /* no outputs */
: "a"(__NR_exit_group), "D"(rc)
: "memory");
unreachable;
} else {
ExitProcess(rc);
}
}
privileged noinline ssize_t __asan_write(const void *data, size_t size) {
ssize_t rc;
uint32_t wrote;
if (!IsWindows()) {
asm volatile("syscall"
: "=a"(rc)
: "0"(__NR_write), "D"(2), "S"(data), "d"(size)
: "rcx", "r11", "memory");
return rc;
} else {
if (WriteFile(GetStdHandle(kNtStdErrorHandle), data, size, &wrote, 0)) {
return wrote;
} else {
return -1;
}
}
}
static ssize_t __asan_write_string(const char *s) {
return __asan_write(s, __asan_strlen(s));
}
wontreturn void __asan_die(const char *msg) {
__asan_write_string(msg);
if (weaken(__die)) weaken(__die)();
__asan_exit(134);
}
void __asan_poison(uintptr_t p, size_t n, int t) {
@ -320,13 +397,16 @@ void __asan_poison(uintptr_t p, size_t n, int t) {
k = p & 7;
s = (signed char *)((p >> 3) + 0x7fff8000);
if (UNLIKELY(k)) {
if (n && (!*s || *s > k) && 8 - k >= n) *s = k;
++s, n -= MIN(8 - k, n);
if (k < *s && *s <= k + n) *s = k;
n -= MIN(8 - k, n);
s += 1;
}
__asan_memset(s, t, n >> 3);
if ((k = n & 7)) {
s += n >> 3;
if (*s < 0 || 0 < *s && *s <= k) *s = t;
if (*s >= 0) {
*s = kAsanHeapOverrun;
}
}
}
@ -335,54 +415,126 @@ void __asan_unpoison(uintptr_t p, size_t n) {
k = p & 7;
s = (signed char *)((p >> 3) + 0x7fff8000);
if (UNLIKELY(k)) {
if (n) *s = 0;
++s, n -= MIN(8 - k, n);
if (!n) return;
if (k + n < 8) {
*s = MAX(*s, k + n);
return;
} else {
*s = 0;
}
n -= MIN(8 - k, n);
s += 1;
}
__asan_memset(s, 0, n >> 3);
if ((k = n & 7)) {
s += n >> 3;
if (*s && *s < k) *s = k;
if (*s < 0) {
*s = k;
} else if (*s > 0) {
*s = MAX(*s, k);
}
}
}
bool __asan_is_valid(const void *p, size_t n) {
signed char k, *s, *e;
if (n) {
if (p) {
k = (uintptr_t)p & 7;
s = (signed char *)(((uintptr_t)p >> 3) + 0x7fff8000);
if (UNLIKELY(k)) {
if (n && !(!*s || *s >= k + n)) return false;
++s, n -= MIN(8 - k, n);
}
e = s;
k = n & 7;
e += n >> 3;
for (; s + 8 <= e; s += 8) {
if ((uint64_t)(255 & s[0]) << 000 | (uint64_t)(255 & s[1]) << 010 |
(uint64_t)(255 & s[2]) << 020 | (uint64_t)(255 & s[3]) << 030 |
(uint64_t)(255 & s[4]) << 040 | (uint64_t)(255 & s[5]) << 050 |
(uint64_t)(255 & s[6]) << 060 | (uint64_t)(255 & s[7]) << 070) {
return false;
}
}
while (s < e) {
if (*s++) {
return false;
}
}
if (k) {
if (!(!*s || *s >= k)) {
return false;
}
}
return true;
} else {
return false;
}
static inline bool __asan_is_mapped(int x) {
int i;
struct MemoryIntervals *m;
m = weaken(_mmi);
i = FindMemoryInterval(m, x);
return i < m->i && m->p[i].x <= x && x <= m->p[i].y;
}
static inline bool __asan_is_image(const unsigned char *p) {
return _base <= p && p < _end;
}
static inline bool __asan_exists(const void *x) {
return __asan_is_image(x) || __asan_is_mapped((intptr_t)x >> 16);
}
static struct AsanFault __asan_fault(signed char *s, char dflt) {
struct AsanFault r;
if (s[0] < 0) {
r.kind = s[0];
} else if (((uintptr_t)(s + 1) & (PAGESIZE - 1)) && s[1] < 0) {
r.kind = s[1];
} else {
return true;
r.kind = dflt;
}
r.shadow = s;
return r;
}
struct AsanFault __asan_check(const void *p, size_t n) {
intptr_t a;
uint64_t w;
unsigned u, r;
signed char k, *s, *e, *f;
if (!n) return (struct AsanFault){0};
k = (intptr_t)p & 7;
a = ((intptr_t)p >> 3) + 0x7fff8000;
s = (signed char *)a;
if (!__asan_is_mapped(a >> 16)) {
return (struct AsanFault){kAsanUnmapped, s};
}
if (UNLIKELY(k)) {
if (!*s) {
n -= MIN(8 - k, n);
s += 1;
} else if (*s > 0 && k + n < 8 && *s >= k + n) {
return (struct AsanFault){0};
} else {
return __asan_fault(s, kAsanHeapOverrun);
}
}
e = s;
k = n & 7;
e += n >> 3;
while (s < e && ((intptr_t)s & 7)) {
if (*s++) {
return __asan_fault(s - 1, kAsanHeapOverrun);
}
}
for (; s + 8 <= e; s += 8) {
if (UNLIKELY(!((a = (intptr_t)s) & 0xffff))) {
if (!__asan_is_mapped(a >> 16)) {
return (struct AsanFault){kAsanUnmapped, s};
}
}
if ((w = ((uint64_t)(255 & s[0]) << 000 | (uint64_t)(255 & s[1]) << 010 |
(uint64_t)(255 & s[2]) << 020 | (uint64_t)(255 & s[3]) << 030 |
(uint64_t)(255 & s[4]) << 040 | (uint64_t)(255 & s[5]) << 050 |
(uint64_t)(255 & s[6]) << 060 | (uint64_t)(255 & s[7]) << 070))) {
s += (unsigned)__builtin_ctzll(w) >> 3;
return __asan_fault(s, kAsanHeapOverrun);
}
}
while (s < e) {
if (*s++) {
return __asan_fault(s - 1, kAsanHeapOverrun);
}
}
if (!k || !*s || k <= *s) {
return (struct AsanFault){0};
} else {
return __asan_fault(s, kAsanHeapOverrun);
}
}
void __asan_verify(const void *p, size_t n) {
const char *q;
struct AsanFault f;
if (!(f = __asan_check(p, n)).kind) return;
q = UNSHADOW(f.shadow);
if ((uintptr_t)q != ((uintptr_t)p & -8) && (uintptr_t)q - (uintptr_t)p < n) {
n -= (uintptr_t)q - (uintptr_t)p;
p = q;
}
__asan_report(p, n, "verify", f.kind);
}
bool __asan_is_valid(const void *p, size_t n) {
return !__asan_check(p, n).kind;
}
bool __asan_is_valid_iov(const struct iovec *iov, int iovlen) {
@ -423,10 +575,8 @@ static const char *__asan_dscribe_heap_poison(long c) {
}
}
static const char *__asan_describe_access_poison(signed char *p) {
int c = p[0];
if (1 <= c && c <= 7) c = p[1];
switch (c) {
static const char *__asan_describe_access_poison(char kind) {
switch (kind) {
case kAsanHeapFree:
return "heap use after free";
case kAsanStackFree:
@ -453,89 +603,163 @@ static const char *__asan_describe_access_poison(signed char *p) {
return "unscoped";
case kAsanUnmapped:
return "unmapped";
case kAsanProtected:
return "protected";
case kAsanStackGuard:
return "stack overflow";
default:
return "poisoned";
}
}
static privileged noinline wontreturn void __asan_exit(int rc) {
if (!IsWindows()) {
asm volatile("syscall"
: /* no outputs */
: "a"(__NR_exit_group), "D"(rc)
: "memory");
unreachable;
} else {
ExitProcess(rc);
}
}
static privileged noinline ssize_t __asan_write(const void *data, size_t size) {
ssize_t rc;
uint32_t wrote;
if (!IsWindows()) {
asm volatile("syscall"
: "=a"(rc)
: "0"(__NR_write), "D"(2), "S"(data), "d"(size)
: "rcx", "r11", "memory");
return rc;
} else {
if (WriteFile(GetStdHandle(kNtStdErrorHandle), data, size, &wrote, 0)) {
return wrote;
} else {
return -1;
}
}
}
static ssize_t __asan_write_string(const char *s) {
return __asan_write(s, __asan_strlen(s));
}
static wontreturn void __asan_die(const char *msg) {
__asan_write_string(msg);
if (weaken(__die)) weaken(__die)();
__asan_exit(134);
}
static char *__asan_report_start(char *p) {
bool ansi;
static bool __asan_ansi(void) {
const char *term;
term = weaken(getenv) ? weaken(getenv)("TERM") : NULL;
ansi = !term || __asan_strcmp(term, "dumb") != 0;
return !term || __asan_strcmp(term, "dumb") != 0;
}
static char *__asan_report_start(char *p, bool ansi) {
if (ansi) p = __asan_stpcpy(p, "\r\e[J\e[1;91m");
p = __asan_stpcpy(p, "asan error");
if (ansi) p = __asan_stpcpy(p, "\e[0m");
return __asan_stpcpy(p, ": ");
}
static wontreturn void __asan_report_heap_fault(void *addr, long c) {
char *p, ibuf[21], buf[256];
p = __asan_report_start(buf);
p = __asan_stpcpy(p, __asan_dscribe_heap_poison(c));
p = __asan_stpcpy(p, " at 0x");
static wontreturn void __asan_report_invalid_pointer(void *addr) {
char *p;
p = __asan_report_start(__fatalbuf, __asan_ansi());
p = __asan_stpcpy(p, "invalid pointer 0x");
p = __asan_hexcpy(p, (intptr_t)addr, 48);
p = __asan_stpcpy(p, " shadow 0x");
p = __asan_hexcpy(p, (intptr_t)SHADOW(addr), 48);
p = __asan_stpcpy(p, "\r\n");
__asan_die(buf);
__asan_die(__fatalbuf);
}
static wontreturn void __asan_report_memory_fault(uint8_t *addr, int size,
const char *kind) {
char *p, ibuf[21], buf[256];
p = __asan_report_start(buf);
p = __asan_stpcpy(p, __asan_describe_access_poison(SHADOW(addr)));
static char *__asan_format_interval(char *p, intptr_t a, intptr_t b) {
p = __asan_hexcpy(p, a, 48);
*p++ = '-';
p = __asan_hexcpy(p, b, 48);
return p;
}
static char *__asan_format_section(char *p, void *p1, void *p2,
const char *name, void *addr) {
intptr_t a, b;
if ((a = (intptr_t)p1) < (b = (intptr_t)p2)) {
p = __asan_format_interval(p, a, b);
*p++ = ' ';
p = __asan_stpcpy(p, name);
if (a <= (intptr_t)addr && (intptr_t)addr <= b) {
p = __asan_stpcpy(p, " ←address");
}
*p++ = '\r';
*p++ = '\n';
}
return p;
}
wontreturn void __asan_report(void *addr, int size, const char *message,
char kind) {
bool a;
wint_t c;
int i, cc;
signed char t;
uint64_t x, y, z;
char *p, *q, *base;
struct MemoryIntervals *m;
a = __asan_ansi();
p = __asan_report_start(__fatalbuf, a);
p = __asan_stpcpy(p, __asan_describe_access_poison(kind));
p = __asan_stpcpy(p, " ");
p = __asan_intcpy(p, size);
p = __asan_stpcpy(p, "-byte ");
p = __asan_stpcpy(p, kind);
p = __asan_stpcpy(p, message);
p = __asan_stpcpy(p, " at 0x");
p = __asan_hexcpy(p, (uintptr_t)addr, 48);
p = __asan_stpcpy(p, " shadow 0x");
p = __asan_hexcpy(p, (uintptr_t)SHADOW(addr), 48);
p = __asan_stpcpy(p, "\r\n");
__asan_die(buf);
*p++ = '\r', *p++ = '\n';
if (0 < size && size < 80) {
base = (char *)addr - ((80 >> 1) - (size >> 1));
for (i = 0; i < 80; ++i) {
if ((char *)addr <= base + i && base + i < (char *)addr + size) {
if (__asan_is_valid(base + i, 1)) {
*p++ = '*';
} else {
*p++ = 'x';
}
} else {
*p++ = ' ';
}
}
*p++ = '\r', *p++ = '\n';
for (c = i = 0; i < 80; ++i) {
if (!(t = __asan_check(base + i, 1).kind)) {
if (a && c != 32) {
p = __asan_stpcpy(p, "\e[32m");
c = 32;
}
*p++ = '.';
} else {
if (a && c != 31) {
p = __asan_stpcpy(p, "\e[31m");
c = 31;
}
*p++ = "FFRRUOOGUOUOSMP~"[-t & 15];
}
}
if (a) p = __asan_stpcpy(p, "\e[39m");
*p++ = '\r', *p++ = '\n';
for (i = 0; (intptr_t)(base + i) & 7; ++i) *p++ = ' ';
for (; i + 8 <= 80; i += 8) {
q = p + 8;
*p++ = '|';
z = ((intptr_t)(base + i) >> 3) + 0x7fff8000;
if (__asan_is_mapped(z >> 16)) {
p = __asan_intcpy(p, *(signed char *)z);
} else {
*p++ = '!';
}
while (p < q) {
*p++ = ' ';
}
}
for (; i < 80; ++i) *p++ = ' ';
*p++ = '\r', *p++ = '\n';
for (i = 0; i < 80; ++i) {
if (__asan_exists(base + i)) {
c = kCp437[((unsigned char *)base)[i]];
} else {
c = u'';
}
z = __asan_encodeutf8(c);
do {
*p++ = z;
} while ((z >>= 8));
}
*p++ = '\r', *p++ = '\n';
}
p = __asan_format_section(p, _base, _etext, ".text", addr);
p = __asan_format_section(p, _etext, _edata, ".data", addr);
p = __asan_format_section(p, _end, _edata, ".bss", addr);
for (m = weaken(_mmi), i = 0; i < m->i; ++i) {
x = m->p[i].x;
y = m->p[i].y;
p = __asan_format_interval(p, x << 16, (y << 16) + (FRAMESIZE - 1));
z = (intptr_t)addr >> 16;
if (x <= z && z <= y) p = __asan_stpcpy(p, " ←address");
z = (((intptr_t)addr >> 3) + 0x7fff8000) >> 16;
if (x <= z && z <= y) p = __asan_stpcpy(p, " ←shadow");
*p++ = '\r', *p++ = '\n';
}
*p = 0;
__asan_die(__fatalbuf);
}
wontreturn void __asan_report_memory_fault(void *addr, int size,
const char *message) {
__asan_report(addr, size, message, __asan_fault(SHADOW(addr), -128).kind);
}
const void *__asan_morgue_add(void *p) {
@ -566,75 +790,108 @@ static void __asan_morgue_flush(void) {
}
static size_t __asan_heap_size(size_t n) {
if (n < -8) {
if (n <= 0x7fffffffffff) {
return __asan_roundup2pow(ROUNDUP(n, 8) + 8);
} else {
return -1;
}
}
static size_t __asan_user_size(size_t n) {
if (n) {
return n;
} else {
return 1;
}
}
static size_t __asan_stack_size(void) {
extern char ape_stack_memsz[] __attribute__((__weak__));
if (ape_stack_memsz) {
return (uintptr_t)ape_stack_memsz;
} else {
return STACKSIZE;
}
}
forceinline void __asan_write48(char *p, uint64_t x) {
uint64_t value, cookie;
cookie = COOKIE;
cookie ^= x & 0xffff;
value = (x & 0xffffffffffff) | cookie << 48;
WRITE64BE(p, value);
}
forceinline bool __asan_read48(const char *p, uint64_t *x) {
uint64_t value, cookie;
value = READ64BE(p);
cookie = value >> 48;
cookie ^= value & 0xffff;
*x = (int64_t)(value << 16) >> 16;
return cookie == COOKIE;
}
static void *__asan_allocate(size_t a, size_t n, int underrun, int overrun) {
char *p;
size_t c;
char *p, *f;
if ((p = weaken(dlmemalign)(a, __asan_heap_size(n)))) {
n = __asan_user_size(n);
c = weaken(dlmalloc_usable_size)(p);
__asan_unpoison((uintptr_t)p, n);
__asan_poison((uintptr_t)p - 16, 16, underrun); /* see dlmalloc design */
__asan_poison((uintptr_t)p + n, c - n, overrun);
__asan_memset(p, 0xF9, n);
f = p + c - 8;
WRITE64BE(f, n);
__asan_write48(p + c - 8, n);
}
return p;
}
static size_t __asan_malloc_usable_size(const void *p) {
size_t c, n;
if ((c = weaken(dlmalloc_usable_size)(p)) >= 8) {
if ((n = READ64BE((char *)p + c - 8)) <= c) {
return n;
} else {
__asan_report_heap_fault(p, n);
struct AsanFault f;
if (!(f = __asan_check(p, 1)).kind) {
if ((c = weaken(dlmalloc_usable_size)(p)) >= 8) {
if (__asan_read48((char *)p + c - 8, &n) && n <= c) {
return n;
}
}
__asan_report_invalid_pointer(p);
} else {
__asan_report_heap_fault(p, 0);
__asan_report(p, 1, "heaping", f.kind);
}
}
static void __asan_deallocate(char *p, long kind) {
size_t c, n;
if ((c = weaken(dlmalloc_usable_size)(p)) >= 8) {
if ((n = READ64BE(p + c - 8)) <= c) {
WRITE64BE(p + c - 8, kind);
__asan_poison((uintptr_t)p, c - 8, kind);
if (weaken(dlfree)) {
if (c <= FRAMESIZE) {
p = __asan_morgue_add(p);
}
weaken(dlfree)(p);
if (__asan_read48(p + c - 8, &n) && n <= c) {
__asan_poison((uintptr_t)p, c, kind);
if (c <= FRAMESIZE) {
p = __asan_morgue_add(p);
}
weaken(dlfree)(p);
} else {
__asan_report_heap_fault(p, n);
__asan_report_invalid_pointer(p);
}
} else {
__asan_report_heap_fault(p, 0);
__asan_report_invalid_pointer(p);
}
}
static void __asan_free(void *p) {
void __asan_free(void *p) {
if (!p) return;
__asan_deallocate(p, kAsanHeapFree);
}
static void *__asan_memalign(size_t align, size_t size) {
void *__asan_memalign(size_t align, size_t size) {
return __asan_allocate(align, size, kAsanHeapUnderrun, kAsanHeapOverrun);
}
static void *__asan_malloc(size_t size) {
void *__asan_malloc(size_t size) {
return __asan_memalign(__BIGGEST_ALIGNMENT__, size);
}
static void *__asan_calloc(size_t n, size_t m) {
void *__asan_calloc(size_t n, size_t m) {
char *p;
if (__builtin_mul_overflow(n, m, &n)) n = -1;
if ((p = __asan_malloc(n))) __asan_memset(p, 0, n);
@ -660,27 +917,27 @@ static void *__asan_realloc_impl(void *p, size_t n,
size_t c, m;
if ((c = weaken(dlmalloc_usable_size)(p)) >= 8) {
f = (char *)p + c - 8;
if ((m = READ64BE(f)) <= c) {
if (__asan_read48(f, &m) && m <= c) {
if (n <= m) { /* shrink */
__asan_poison((uintptr_t)p + n, m - n, kAsanHeapOverrun);
WRITE64BE(f, n);
__asan_write48(f, n);
return p;
} else if (n <= c - 8) { /* small growth */
__asan_unpoison((uintptr_t)p + m, n - m);
WRITE64BE(f, n);
__asan_write48(f, n);
return p;
} else { /* exponential growth */
return grow(p, n, m);
}
} else {
__asan_report_heap_fault(p, m);
__asan_report_invalid_pointer(p);
}
} else {
__asan_report_heap_fault(p, 0);
__asan_report_invalid_pointer(p);
}
}
static void *__asan_realloc(void *p, size_t n) {
void *__asan_realloc(void *p, size_t n) {
if (p) {
if (n) {
return __asan_realloc_impl(p, n, __asan_realloc_grow);
@ -693,7 +950,7 @@ static void *__asan_realloc(void *p, size_t n) {
}
}
static void *__asan_realloc_in_place(void *p, size_t n) {
void *__asan_realloc_in_place(void *p, size_t n) {
if (p) {
return __asan_realloc_impl(p, n, __asan_realloc_nogrow);
} else {
@ -701,15 +958,15 @@ static void *__asan_realloc_in_place(void *p, size_t n) {
}
}
static void *__asan_valloc(size_t n) {
void *__asan_valloc(size_t n) {
return __asan_memalign(PAGESIZE, n);
}
static void *__asan_pvalloc(size_t n) {
void *__asan_pvalloc(size_t n) {
return __asan_valloc(ROUNDUP(n, PAGESIZE));
}
static int __asan_malloc_trim(size_t pad) {
int __asan_malloc_trim(size_t pad) {
__asan_morgue_flush();
if (weaken(dlmalloc_trim)) {
return weaken(dlmalloc_trim)(pad);
@ -727,9 +984,10 @@ void __asan_stack_free(char *p, size_t size, int classid) {
}
void __asan_handle_no_return(void) {
uintptr_t rsp;
rsp = (uintptr_t)__builtin_frame_address(0);
__asan_unpoison(rsp, ROUNDUP(rsp, STACKSIZE) - rsp);
uintptr_t stk, ssz;
stk = (uintptr_t)__builtin_frame_address(0);
ssz = __asan_stack_size();
__asan_unpoison(stk, ROUNDUP(stk, ssz) - stk);
}
void __asan_register_globals(struct AsanGlobal g[], int n) {
@ -797,14 +1055,6 @@ void __asan_install_malloc_hooks(void) {
HOOK(hook_malloc_usable_size, __asan_malloc_usable_size);
}
static bool __asan_is_mapped(int x) {
int i;
struct MemoryIntervals *m;
m = weaken(_mmi);
i = weaken(FindMemoryInterval)(m, x);
return i < m->i && x >= m->p[i].x && x <= m->p[i].y;
}
void __asan_map_shadow(uintptr_t p, size_t n) {
int i, x, a, b;
struct DirectMap sm;
@ -862,17 +1112,22 @@ static textstartup void __asan_shadow_string_list(char **list) {
static textstartup void __asan_shadow_existing_mappings(void) {
size_t i;
uintptr_t rsp, stk, ssz;
struct MemoryIntervals m;
__asan_memcpy(&m, weaken(_mmi), sizeof(m));
for (i = 0; i < m.i; ++i) {
__asan_map_shadow((uintptr_t)m.p[i].x << 16,
(uintptr_t)(m.p[i].y - m.p[i].x + 1) << 16);
}
rsp = (uintptr_t)__builtin_frame_address(0);
ssz = __asan_stack_size();
stk = ROUNDDOWN(rsp, ssz);
__asan_poison(stk, PAGESIZE, kAsanStackGuard);
}
static textstartup bool IsMemoryManagementRuntimeLinked(void) {
return weaken(_mmi) && weaken(sys_mmap) && weaken(MAP_ANONYMOUS) &&
weaken(FindMemoryInterval) && weaken(TrackMemoryInterval);
weaken(TrackMemoryInterval);
}
textstartup void __asan_init(int argc, char **argv, char **envp,
@ -886,7 +1141,6 @@ textstartup void __asan_init(int argc, char **argv, char **envp,
REQUIRE(_mmi);
REQUIRE(sys_mmap);
REQUIRE(MAP_ANONYMOUS);
REQUIRE(FindMemoryInterval);
REQUIRE(TrackMemoryInterval);
if (weaken(hook_malloc) || weaken(hook_calloc) || weaken(hook_realloc) ||
weaken(hook_realloc_in_place) || weaken(hook_pvalloc) ||

View file

@ -4,27 +4,49 @@
#define kAsanScale 3
#define kAsanMagic 0x7fff8000
#define kAsanHeapFree -1
#define kAsanStackFree -2
#define kAsanRelocated -3
#define kAsanHeapUnderrun -4
#define kAsanHeapOverrun -5
#define kAsanGlobalOverrun -6
#define kAsanGlobalUnregistered -7
#define kAsanStackUnderrun -8
#define kAsanStackOverrun -9
#define kAsanAllocaUnderrun -10
#define kAsanAllocaOverrun -11
#define kAsanUnscoped -12
#define kAsanUnmapped -13
#define kAsanHeapFree -1 /* F */
#define kAsanStackFree -2 /* F */
#define kAsanRelocated -3 /* R */
#define kAsanHeapUnderrun -4 /* U */
#define kAsanHeapOverrun -5 /* O */
#define kAsanGlobalOverrun -6 /* O */
#define kAsanGlobalUnregistered -7 /* G */
#define kAsanStackUnderrun -8 /* U */
#define kAsanStackOverrun -9 /* O */
#define kAsanAllocaUnderrun -10 /* U */
#define kAsanAllocaOverrun -11 /* O */
#define kAsanUnscoped -12 /* S */
#define kAsanUnmapped -13 /* M */
#define kAsanProtected -14 /* P */
#define kAsanStackGuard -15 /* _ */
#define SHADOW(x) ((signed char *)(((uintptr_t)(x) >> kAsanScale) + kAsanMagic))
#define SHADOW(x) ((signed char *)(((uintptr_t)(x) >> kAsanScale) + kAsanMagic))
#define UNSHADOW(x) ((void *)(((uintptr_t)(x) + 0x7fff8000) << 3))
struct AsanFault {
char kind;
signed char *shadow;
};
void __asan_unpoison(uintptr_t, size_t);
void __asan_verify(const void *, size_t);
void __asan_map_shadow(uintptr_t, size_t);
void __asan_poison(uintptr_t, size_t, int);
void __asan_unpoison(uintptr_t, size_t);
bool __asan_is_valid(const void *, size_t);
bool __asan_is_valid_iov(const struct iovec *, int);
bool __asan_is_valid_strlist(char *const *);
bool __asan_is_valid_iov(const struct iovec *, int);
struct AsanFault __asan_check(const void *, size_t);
void __asan_report_memory_fault(void *, int, const char *) wontreturn;
void __asan_report(void *, int, const char *, char) wontreturn;
void *__asan_memalign(size_t, size_t);
void __asan_free(void *);
void *__asan_malloc(size_t);
void *__asan_calloc(size_t, size_t);
void *__asan_realloc(void *, size_t);
void *__asan_realloc_in_place(void *, size_t);
void *__asan_valloc(size_t);
void *__asan_pvalloc(size_t);
int __asan_malloc_trim(size_t);
void __asan_die(const char *) wontreturn;
#endif /* COSMOPOLITAN_LIBC_INTRIN_ASAN_H_ */

157
libc/intrin/bzero.c Normal file
View file

@ -0,0 +1,157 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/dce.h"
#include "libc/nexgen32e/nexgen32e.h"
#include "libc/nexgen32e/x86feature.h"
#include "libc/str/str.h"
typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(1)));
typedef long long xmm_a __attribute__((__vector_size__(16), __aligned__(16)));
static noinline antiquity void bzero_sse(char *p, size_t n) {
xmm_t v = {0};
if (n <= 32) {
*(xmm_t *)(p + n - 16) = v;
*(xmm_t *)p = v;
} else {
do {
n -= 32;
*(xmm_t *)(p + n) = v;
*(xmm_t *)(p + n + 16) = v;
} while (n > 32);
*(xmm_t *)(p + 16) = v;
*(xmm_t *)p = v;
}
}
microarchitecture("avx") static void bzero_avx(char *p, size_t n) {
xmm_t v = {0};
if (n <= 32) {
*(xmm_t *)(p + n - 16) = v;
*(xmm_t *)p = v;
} else if (!IsAsan() && n >= 1024 && X86_HAVE(ERMS)) {
asm("rep stosb" : "+D"(p), "+c"(n), "=m"(*(char(*)[n])p) : "a"(0));
} else {
if (n < kHalfCache3 || !kHalfCache3) {
do {
n -= 32;
*(xmm_t *)(p + n) = v;
*(xmm_t *)(p + n + 16) = v;
} while (n > 32);
} else {
while ((uintptr_t)(p + n) & 15) {
p[--n] = 0;
}
do {
n -= 32;
__builtin_ia32_movntdq((xmm_a *)(p + n), (xmm_a)v);
__builtin_ia32_movntdq((xmm_a *)(p + n + 16), (xmm_a)v);
} while (n > 32);
asm("sfence");
}
*(xmm_t *)(p + 16) = v;
*(xmm_t *)p = v;
}
}
/**
* Sets memory to zero.
*
* bzero n=0 661 picoseconds
* bzero n=1 661 ps/byte 1,476 mb/s
* bzero n=2 330 ps/byte 2,952 mb/s
* bzero n=3 220 ps/byte 4,428 mb/s
* bzero n=4 165 ps/byte 5,904 mb/s
* bzero n=7 94 ps/byte 10,333 mb/s
* bzero n=8 41 ps/byte 23,618 mb/s
* bzero n=15 44 ps/byte 22,142 mb/s
* bzero n=16 20 ps/byte 47,236 mb/s
* bzero n=31 21 ps/byte 45,760 mb/s
* bzero n=32 20 ps/byte 47,236 mb/s
* bzero n=63 10 ps/byte 92,997 mb/s
* bzero n=64 15 ps/byte 62,982 mb/s
* bzero n=127 15 ps/byte 62,490 mb/s
* bzero n=128 10 ps/byte 94,473 mb/s
* bzero n=255 14 ps/byte 68,439 mb/s
* bzero n=256 9 ps/byte 105 gb/s
* bzero n=511 15 ps/byte 62,859 mb/s
* bzero n=512 11 ps/byte 83,976 mb/s
* bzero n=1023 15 ps/byte 61,636 mb/s
* bzero n=1024 10 ps/byte 88,916 mb/s
* bzero n=2047 9 ps/byte 105 gb/s
* bzero n=2048 8 ps/byte 109 gb/s
* bzero n=4095 8 ps/byte 115 gb/s
* bzero n=4096 8 ps/byte 118 gb/s
* bzero n=8191 7 ps/byte 129 gb/s
* bzero n=8192 7 ps/byte 130 gb/s
* bzero n=16383 6 ps/byte 136 gb/s
* bzero n=16384 6 ps/byte 137 gb/s
* bzero n=32767 6 ps/byte 140 gb/s
* bzero n=32768 6 ps/byte 141 gb/s
* bzero n=65535 15 ps/byte 64,257 mb/s
* bzero n=65536 15 ps/byte 64,279 mb/s
* bzero n=131071 15 ps/byte 63,166 mb/s
* bzero n=131072 15 ps/byte 63,115 mb/s
* bzero n=262143 15 ps/byte 62,052 mb/s
* bzero n=262144 15 ps/byte 62,097 mb/s
* bzero n=524287 15 ps/byte 61,699 mb/s
* bzero n=524288 15 ps/byte 61,674 mb/s
* bzero n=1048575 16 ps/byte 60,179 mb/s
* bzero n=1048576 15 ps/byte 61,330 mb/s
* bzero n=2097151 15 ps/byte 61,071 mb/s
* bzero n=2097152 15 ps/byte 61,065 mb/s
* bzero n=4194303 16 ps/byte 60,942 mb/s
* bzero n=4194304 16 ps/byte 60,947 mb/s
* bzero n=8388607 16 ps/byte 60,872 mb/s
* bzero n=8388608 16 ps/byte 60,879 mb/s
*
* @param p is memory address
* @param n is byte length
* @return p
* @asyncsignalsafe
*/
void(bzero)(void *p, size_t n) {
char *b;
uint64_t x;
b = p;
if (IsTiny()) {
asm("rep stosb" : "+D"(b), "+c"(n), "=m"(*(char(*)[n])b) : "0"(p), "a"(0));
return;
}
asm("xorl\t%k0,%k0" : "=r"(x));
if (n <= 16) {
if (n >= 8) {
__builtin_memcpy(b, &x, 8);
__builtin_memcpy(b + n - 8, &x, 8);
} else if (n >= 4) {
__builtin_memcpy(b, &x, 4);
__builtin_memcpy(b + n - 4, &x, 4);
} else if (n) {
do {
asm volatile("" ::: "memory");
b[--n] = x;
} while (n);
}
} else if (X86_HAVE(AVX)) {
bzero_avx(b, n);
} else {
bzero_sse(b, n);
}
}

21
libc/intrin/fatalbuf.c Normal file
View file

@ -0,0 +1,21 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/log/libfatal.internal.h"
char __fatalbuf[FRAMESIZE];

1
libc/intrin/ho.s Normal file
View file

@ -0,0 +1 @@
bsr %rdi,%rax

View file

@ -46,8 +46,27 @@ o/$(MODE)/libc/intrin/ubsan.o: \
OVERRIDE_CFLAGS += \
-fno-sanitize=all \
-fno-stack-protector \
-mgeneral-regs-only \
-O2
-O3
o/$(MODE)/libc/intrin/memcmp.o: \
OVERRIDE_CFLAGS += \
-Os
o//libc/intrin/memmove.o: \
OVERRIDE_CFLAGS += \
-fno-toplevel-reorder
o//libc/intrin/bzero.o \
o//libc/intrin/memset.o \
o//libc/intrin/memmove.o: \
OVERRIDE_CFLAGS += \
-O3
o/tiny/libc/intrin/memcmp.o \
o/tiny/libc/intrin/memmove.o \
o/tiny/libc/intrin/memmove-gcc.asm: \
OVERRIDE_CFLAGS += \
-fpie
LIBC_INTRIN_LIBS = $(foreach x,$(LIBC_INTRIN_ARTIFACTS),$($(x)))
LIBC_INTRIN_HDRS = $(foreach x,$(LIBC_INTRIN_ARTIFACTS),$($(x)_HDRS))

210
libc/intrin/memcmp.c Normal file
View file

@ -0,0 +1,210 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/likely.h"
#include "libc/dce.h"
#include "libc/nexgen32e/x86feature.h"
#include "libc/str/str.h"
typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(1)));
static noinline antiquity int memcmp_sse(const unsigned char *p,
const unsigned char *q, size_t n) {
uint64_t w;
unsigned u, u0, u1, u2, u3;
if (n > 32) {
while (n > 16 + 16) {
if (!(u = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128(
*(const xmm_t *)p, *(const xmm_t *)q)) -
0xffff)) {
n -= 16;
p += 16;
q += 16;
} else {
u = __builtin_ctzl(u);
return p[u] - q[u];
}
}
}
if (!(u = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128(
*(const xmm_t *)p, *(const xmm_t *)q)) -
0xffff)) {
if (!(u = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128(
*(const xmm_t *)(p + n - 16), *(const xmm_t *)(q + n - 16))) -
0xffff)) {
return 0;
} else {
u = __builtin_ctzl(u);
return p[n - 16 + u] - q[n - 16 + u];
}
} else {
u = __builtin_ctzl(u);
return p[u] - q[u];
}
}
microarchitecture("avx") static int memcmp_avx(const unsigned char *p,
const unsigned char *q,
size_t n) {
uint64_t w;
unsigned u, u0, u1, u2, u3;
if (n > 32) {
while (n >= 16 + 64) {
u0 = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128(
((const xmm_t *)p)[0], ((const xmm_t *)q)[0]));
u1 = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128(
((const xmm_t *)p)[1], ((const xmm_t *)q)[1]));
u2 = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128(
((const xmm_t *)p)[2], ((const xmm_t *)q)[2]));
u3 = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128(
((const xmm_t *)p)[3], ((const xmm_t *)q)[3]));
w = (uint64_t)u0 | (uint64_t)u1 << 16 | (uint64_t)u2 << 32 |
(uint64_t)u3 << 48;
if (w == -1) {
n -= 64;
p += 64;
q += 64;
} else {
w = __builtin_ctzll(w ^ -1);
return p[w] - q[w];
}
}
while (n > 16 + 16) {
if (!(u = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128(
*(const xmm_t *)p, *(const xmm_t *)q)) -
0xffff)) {
n -= 16;
p += 16;
q += 16;
} else {
u = __builtin_ctzl(u);
return p[u] - q[u];
}
}
}
if (!(u = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128(
*(const xmm_t *)p, *(const xmm_t *)q)) -
0xffff)) {
if (!(u = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128(
*(const xmm_t *)(p + n - 16), *(const xmm_t *)(q + n - 16))) -
0xffff)) {
return 0;
} else {
u = __builtin_ctzl(u);
return p[n - 16 + u] - q[n - 16 + u];
}
} else {
u = __builtin_ctzl(u);
return p[u] - q[u];
}
}
/**
* Compares memory byte by byte.
*
* memcmp n=0 992 picoseconds
* memcmp n=1 1 ns/byte 738 mb/s
* memcmp n=2 661 ps/byte 1,476 mb/s
* memcmp n=3 551 ps/byte 1,771 mb/s
* memcmp n=4 248 ps/byte 3,936 mb/s
* memcmp n=5 198 ps/byte 4,920 mb/s
* memcmp n=6 165 ps/byte 5,904 mb/s
* memcmp n=7 141 ps/byte 6,889 mb/s
* memcmp n=8 124 ps/byte 7,873 mb/s
* memcmp n=9 110 ps/byte 8,857 mb/s
* memcmp n=15 44 ps/byte 22,143 mb/s
* memcmp n=16 41 ps/byte 23,619 mb/s
* memcmp n=17 77 ps/byte 12,547 mb/s
* memcmp n=31 42 ps/byte 22,881 mb/s
* memcmp n=32 41 ps/byte 23,619 mb/s
* memcmp n=33 60 ps/byte 16,238 mb/s
* memcmp n=80 53 ps/byte 18,169 mb/s
* memcmp n=128 38 ps/byte 25,194 mb/s
* memcmp n=256 32 ps/byte 30,233 mb/s
* memcmp n=16384 27 ps/byte 35,885 mb/s
* memcmp n=32768 29 ps/byte 32,851 mb/s
* memcmp n=131072 33 ps/byte 28,983 mb/s
*
* @return unsigned char subtraction at stop index
* @asyncsignalsafe
*/
int memcmp(const void *a, const void *b, size_t n) {
int c;
unsigned u;
uint32_t k, i, j;
uint64_t w, x, y;
const unsigned char *p, *q;
if ((p = a) == (q = b)) return 0;
if (!IsTiny()) {
if (n <= 16) {
if (n >= 8) {
if (!(w = (x = ((uint64_t)p[0] << 000 | (uint64_t)p[1] << 010 |
(uint64_t)p[2] << 020 | (uint64_t)p[3] << 030 |
(uint64_t)p[4] << 040 | (uint64_t)p[5] << 050 |
(uint64_t)p[6] << 060 | (uint64_t)p[7] << 070)) ^
(y = ((uint64_t)q[0] << 000 | (uint64_t)q[1] << 010 |
(uint64_t)q[2] << 020 | (uint64_t)q[3] << 030 |
(uint64_t)q[4] << 040 | (uint64_t)q[5] << 050 |
(uint64_t)q[6] << 060 | (uint64_t)q[7] << 070)))) {
p += n - 8;
q += n - 8;
if (!(w = (x = ((uint64_t)p[0] << 000 | (uint64_t)p[1] << 010 |
(uint64_t)p[2] << 020 | (uint64_t)p[3] << 030 |
(uint64_t)p[4] << 040 | (uint64_t)p[5] << 050 |
(uint64_t)p[6] << 060 | (uint64_t)p[7] << 070)) ^
(y = ((uint64_t)q[0] << 000 | (uint64_t)q[1] << 010 |
(uint64_t)q[2] << 020 | (uint64_t)q[3] << 030 |
(uint64_t)q[4] << 040 | (uint64_t)q[5] << 050 |
(uint64_t)q[6] << 060 | (uint64_t)q[7] << 070)))) {
return 0;
}
}
u = __builtin_ctzll(w);
u = u & -8;
return ((x >> u) & 255) - ((y >> u) & 255);
} else if (n >= 4) {
if (!(k = (i = ((uint32_t)p[0] << 000 | (uint32_t)p[1] << 010 |
(uint32_t)p[2] << 020 | (uint32_t)p[3] << 030)) ^
(j = ((uint32_t)q[0] << 000 | (uint32_t)q[1] << 010 |
(uint32_t)q[2] << 020 | (uint32_t)q[3] << 030)))) {
p += n - 4;
q += n - 4;
if (!(k = (i = ((uint32_t)p[0] << 000 | (uint32_t)p[1] << 010 |
(uint32_t)p[2] << 020 | (uint32_t)p[3] << 030)) ^
(j = ((uint32_t)q[0] << 000 | (uint32_t)q[1] << 010 |
(uint32_t)q[2] << 020 | (uint32_t)q[3] << 030)))) {
return 0;
}
}
u = __builtin_ctzl(k);
u = u & -8;
return ((i >> u) & 255) - ((j >> u) & 255);
}
} else if (LIKELY(X86_HAVE(AVX))) {
return memcmp_avx(p, q, n);
} else {
return memcmp_sse(p, q, n);
}
}
for (; n; ++p, ++q, --n) {
if ((c = *p - *q)) {
return c;
}
}
return 0;
}

309
libc/intrin/memmove.c Normal file
View file

@ -0,0 +1,309 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/dce.h"
#include "libc/intrin/asan.internal.h"
#include "libc/nexgen32e/nexgen32e.h"
#include "libc/nexgen32e/x86feature.h"
#include "libc/str/str.h"
typedef long long xmm_t __attribute__((__vector_size__(16), __aligned__(1)));
typedef long long xmm_a __attribute__((__vector_size__(16), __aligned__(16)));
asm("memcpy = memmove\n\t"
".globl\tmemcpy");
/**
* Copies memory.
*
* memmove n=0 661 picoseconds
* memmove n=1 661 ps/byte 1,476 mb/s
* memmove n=2 330 ps/byte 2,952 mb/s
* memmove n=3 330 ps/byte 2,952 mb/s
* memmove n=4 165 ps/byte 5,904 mb/s
* memmove n=7 141 ps/byte 6,888 mb/s
* memmove n=8 82 ps/byte 11 GB/s
* memmove n=15 44 ps/byte 21 GB/s
* memmove n=16 41 ps/byte 23 GB/s
* memmove n=31 32 ps/byte 29 GB/s
* memmove n=32 31 ps/byte 30 GB/s
* memmove n=63 21 ps/byte 45 GB/s
* memmove n=64 15 ps/byte 61 GB/s
* memmove n=127 13 ps/byte 73 GB/s
* memmove n=128 31 ps/byte 30 GB/s
* memmove n=255 20 ps/byte 45 GB/s
* memmove n=256 19 ps/byte 49 GB/s
* memmove n=511 16 ps/byte 56 GB/s
* memmove n=512 17 ps/byte 54 GB/s
* memmove n=1023 18 ps/byte 52 GB/s
* memmove n=1024 13 ps/byte 72 GB/s
* memmove n=2047 9 ps/byte 96 GB/s
* memmove n=2048 9 ps/byte 98 GB/s
* memmove n=4095 8 ps/byte 112 GB/s
* memmove n=4096 8 ps/byte 109 GB/s
* memmove n=8191 7 ps/byte 124 GB/s
* memmove n=8192 7 ps/byte 125 GB/s
* memmove n=16383 7 ps/byte 134 GB/s
* memmove n=16384 7 ps/byte 134 GB/s
* memmove n=32767 13 ps/byte 72 GB/s
* memmove n=32768 13 ps/byte 72 GB/s
* memmove n=65535 13 ps/byte 68 GB/s
* memmove n=65536 14 ps/byte 67 GB/s
* memmove n=131071 14 ps/byte 65 GB/s
* memmove n=131072 14 ps/byte 64 GB/s
* memmove n=262143 15 ps/byte 63 GB/s
* memmove n=262144 15 ps/byte 63 GB/s
* memmove n=524287 15 ps/byte 61 GB/s
* memmove n=524288 15 ps/byte 61 GB/s
* memmove n=1048575 15 ps/byte 61 GB/s
* memmove n=1048576 15 ps/byte 61 GB/s
* memmove n=2097151 19 ps/byte 48 GB/s
* memmove n=2097152 27 ps/byte 35 GB/s
* memmove n=4194303 28 ps/byte 33 GB/s
* memmove n=4194304 28 ps/byte 33 GB/s
* memmove n=8388607 28 ps/byte 33 GB/s
* memmove n=8388608 28 ps/byte 33 GB/s
*
* DST and SRC may overlap.
*
* @param dst is destination
* @param src is memory to copy
* @param n is number of bytes to copy
* @return dst
* @asyncsignalsafe
*/
void *memmove(void *dst, const void *src, size_t n) {
size_t i;
char *d, *r;
const char *s;
uint64_t a, b;
xmm_t v, w, x, y, V, W, X, Y, wut;
d = dst;
s = src;
if (IsTiny()) {
if (d <= s) {
asm("rep movsb"
: "+D"(d), "+S"(s), "+c"(n), "=m"(*(char(*)[n])dst)
: "m"(*(char(*)[n])src));
} else {
d += n - 1;
s += n - 1;
asm("std\n\t"
"rep movsb\n\t"
"cld"
: "+D"(d), "+S"(s), "+c"(n), "=m"(*(char(*)[n])dst)
: "m"(*(char(*)[n])src));
}
return dst;
}
switch (n) {
case 0:
return d;
case 1:
*d = *s;
return d;
case 2:
__builtin_memcpy(&a, s, 2);
__builtin_memcpy(d, &a, 2);
return d;
case 3:
__builtin_memcpy(&a, s, 2);
__builtin_memcpy(&b, s + 1, 2);
__builtin_memcpy(d, &a, 2);
__builtin_memcpy(d + 1, &b, 2);
return d;
case 4:
__builtin_memcpy(&a, s, 4);
__builtin_memcpy(d, &a, 4);
return d;
case 5 ... 7:
__builtin_memcpy(&a, s, 4);
__builtin_memcpy(&b, s + n - 4, 4);
__builtin_memcpy(d, &a, 4);
__builtin_memcpy(d + n - 4, &b, 4);
return d;
case 8:
__builtin_memcpy(&a, s, 8);
__builtin_memcpy(d, &a, 8);
return d;
case 9 ... 15:
__builtin_memcpy(&a, s, 8);
__builtin_memcpy(&b, s + n - 8, 8);
__builtin_memcpy(d, &a, 8);
__builtin_memcpy(d + n - 8, &b, 8);
return d;
case 16:
*(xmm_t *)d = *(xmm_t *)s;
return d;
case 17 ... 32:
v = *(xmm_t *)s;
w = *(xmm_t *)(s + n - 16);
*(xmm_t *)d = v;
*(xmm_t *)(d + n - 16) = w;
return d;
case 33 ... 64:
v = *(xmm_t *)s;
w = *(xmm_t *)(s + 16);
x = *(xmm_t *)(s + n - 32);
y = *(xmm_t *)(s + n - 16);
*(xmm_t *)d = v;
*(xmm_t *)(d + 16) = w;
*(xmm_t *)(d + n - 32) = x;
*(xmm_t *)(d + n - 16) = y;
return d;
case 65 ... 127:
v = *(xmm_t *)s;
w = *(xmm_t *)(s + 16);
x = *(xmm_t *)(s + 32);
y = *(xmm_t *)(s + 48);
V = *(xmm_t *)(s + n - 64);
W = *(xmm_t *)(s + n - 48);
X = *(xmm_t *)(s + n - 32);
Y = *(xmm_t *)(s + n - 16);
*(xmm_t *)d = v;
*(xmm_t *)(d + 16) = w;
*(xmm_t *)(d + 32) = x;
*(xmm_t *)(d + 48) = y;
*(xmm_t *)(d + n - 64) = V;
*(xmm_t *)(d + n - 48) = W;
*(xmm_t *)(d + n - 32) = X;
*(xmm_t *)(d + n - 16) = Y;
return d;
default:
r = d;
if (d == s) return d;
if (n < kHalfCache3 || !kHalfCache3) {
if (d > s) {
if (IsAsan() || n < 1024 || !X86_HAVE(ERMS)) {
do {
n -= 32;
v = *(const xmm_t *)(s + n);
w = *(const xmm_t *)(s + n + 16);
*(xmm_t *)(d + n) = v;
*(xmm_t *)(d + n + 16) = w;
} while (n >= 32);
} else {
asm("std\n\t"
"rep movsb\n\t"
"cld"
: "=D"(d), "=S"(s), "+c"(n), "=m"(*(char(*)[n])d)
: "0"(d + n - 1), "1"(s + n - 1), "m"(*(char(*)[n])s));
return r;
}
} else {
if (IsAsan() || n < 1024 || !X86_HAVE(ERMS)) {
i = 0;
do {
v = *(const xmm_t *)(s + i);
w = *(const xmm_t *)(s + i + 16);
*(xmm_t *)(d + i) = v;
*(xmm_t *)(d + i + 16) = w;
} while ((i += 32) + 32 <= n);
d += i;
s += i;
n -= i;
} else {
asm("rep movsb"
: "+D"(d), "+S"(s), "+c"(n), "=m"(*(char(*)[n])d)
: "m"(*(char(*)[n])s));
return r;
}
}
} else {
if (d > s) {
while ((uintptr_t)(d + n) & 15) {
--n;
d[n] = s[n];
}
do {
n -= 32;
v = *(const xmm_t *)(s + n);
w = *(const xmm_t *)(s + n + 16);
__builtin_ia32_movntdq((xmm_a *)(d + n), v);
__builtin_ia32_movntdq((xmm_a *)(d + n + 16), w);
} while (n >= 32);
} else {
i = 0;
while ((uintptr_t)(d + i) & 15) {
d[i] = s[i];
++i;
}
do {
v = *(const xmm_t *)(s + i);
w = *(const xmm_t *)(s + i + 16);
__builtin_ia32_movntdq((xmm_a *)(d + i), v);
__builtin_ia32_movntdq((xmm_a *)(d + i + 16), w);
} while ((i += 32) + 32 <= n);
d += i;
s += i;
n -= i;
}
asm("sfence");
}
switch (n) {
case 0:
return r;
case 17 ... 31:
__builtin_memcpy(&v, s, 16);
__builtin_memcpy(&w, s + n - 16, 16);
__builtin_memcpy(d, &v, 16);
__builtin_memcpy(d + n - 16, &w, 16);
return r;
case 9 ... 15:
__builtin_memcpy(&a, s, 8);
__builtin_memcpy(&b, s + n - 8, 8);
__builtin_memcpy(d, &a, 8);
__builtin_memcpy(d + n - 8, &b, 8);
return r;
case 5 ... 7:
__builtin_memcpy(&a, s, 4);
__builtin_memcpy(&b, s + n - 4, 4);
__builtin_memcpy(d, &a, 4);
__builtin_memcpy(d + n - 4, &b, 4);
return r;
case 16:
__builtin_memcpy(&v, s, 16);
__builtin_memcpy(d, &v, 16);
return r;
case 8:
__builtin_memcpy(&a, s, 8);
__builtin_memcpy(d, &a, 8);
return r;
case 4:
__builtin_memcpy(&a, s, 4);
__builtin_memcpy(d, &a, 4);
return r;
case 1:
*d = *s;
return r;
case 2:
__builtin_memcpy(&a, s, 2);
__builtin_memcpy(d, &a, 2);
return r;
case 3:
__builtin_memcpy(&a, s, 2);
__builtin_memcpy(&b, s + 1, 2);
__builtin_memcpy(d, &a, 2);
__builtin_memcpy(d + 1, &b, 2);
return r;
default:
unreachable;
}
}
}

24
libc/intrin/mempcpy.c Normal file
View file

@ -0,0 +1,24 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/str.h"
void *mempcpy(void *dst, const void *src, size_t n) {
memmove(dst, src, n);
return (char *)dst + n;
}

164
libc/intrin/memset.c Normal file
View file

@ -0,0 +1,164 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/dce.h"
#include "libc/nexgen32e/nexgen32e.h"
#include "libc/nexgen32e/x86feature.h"
#include "libc/str/str.h"
typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(1)));
typedef long long xmm_a __attribute__((__vector_size__(16), __aligned__(16)));
static noinline antiquity void *memset_sse(char *p, char c, size_t n) {
xmm_t v = {c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c};
if (n <= 32) {
*(xmm_t *)(p + n - 16) = v;
*(xmm_t *)p = v;
} else {
do {
n -= 32;
*(xmm_t *)(p + n) = v;
*(xmm_t *)(p + n + 16) = v;
} while (n > 32);
*(xmm_t *)(p + 16) = v;
*(xmm_t *)p = v;
}
return p;
}
microarchitecture("avx") static void *memset_avx(char *p, char c, size_t n) {
char *t;
xmm_t v = {c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c};
if (n <= 32) {
*(xmm_t *)(p + n - 16) = v;
*(xmm_t *)p = v;
} else if (!IsAsan() && n >= 1024 && X86_HAVE(ERMS)) {
asm("rep stosb" : "=D"(t), "+c"(n), "=m"(*(char(*)[n])p) : "0"(p), "a"(c));
} else {
if (n < kHalfCache3 || !kHalfCache3) {
do {
n -= 32;
*(xmm_t *)(p + n) = v;
*(xmm_t *)(p + n + 16) = v;
} while (n > 32);
} else {
while ((uintptr_t)(p + n) & 15) {
p[--n] = c;
}
do {
n -= 32;
__builtin_ia32_movntdq((xmm_a *)(p + n), (xmm_a)v);
__builtin_ia32_movntdq((xmm_a *)(p + n + 16), (xmm_a)v);
} while (n > 32);
asm("sfence");
}
*(xmm_t *)(p + 16) = v;
*(xmm_t *)p = v;
}
return p;
}
/**
* Sets memory.
*
* memset n=0 992 picoseconds
* memset n=1 992 ps/byte 984 mb/s
* memset n=2 330 ps/byte 2,952 mb/s
* memset n=3 330 ps/byte 2,952 mb/s
* memset n=4 165 ps/byte 5,904 mb/s
* memset n=7 94 ps/byte 10,333 mb/s
* memset n=8 124 ps/byte 7,872 mb/s
* memset n=15 66 ps/byte 14,761 mb/s
* memset n=16 62 ps/byte 15,745 mb/s
* memset n=31 32 ps/byte 30,506 mb/s
* memset n=32 20 ps/byte 47,236 mb/s
* memset n=63 26 ps/byte 37,198 mb/s
* memset n=64 20 ps/byte 47,236 mb/s
* memset n=127 23 ps/byte 41,660 mb/s
* memset n=128 12 ps/byte 75,578 mb/s
* memset n=255 18 ps/byte 53,773 mb/s
* memset n=256 12 ps/byte 75,578 mb/s
* memset n=511 17 ps/byte 55,874 mb/s
* memset n=512 12 ps/byte 75,578 mb/s
* memset n=1023 16 ps/byte 58,080 mb/s
* memset n=1024 11 ps/byte 86,375 mb/s
* memset n=2047 9 ps/byte 101 gb/s
* memset n=2048 8 ps/byte 107 gb/s
* memset n=4095 8 ps/byte 113 gb/s
* memset n=4096 8 ps/byte 114 gb/s
* memset n=8191 7 ps/byte 126 gb/s
* memset n=8192 7 ps/byte 126 gb/s
* memset n=16383 7 ps/byte 133 gb/s
* memset n=16384 7 ps/byte 131 gb/s
* memset n=32767 14 ps/byte 69,246 mb/s
* memset n=32768 6 ps/byte 138 gb/s
* memset n=65535 15 ps/byte 62,756 mb/s
* memset n=65536 15 ps/byte 62,982 mb/s
* memset n=131071 18 ps/byte 52,834 mb/s
* memset n=131072 15 ps/byte 62,023 mb/s
* memset n=262143 15 ps/byte 61,169 mb/s
* memset n=262144 16 ps/byte 61,011 mb/s
* memset n=524287 16 ps/byte 60,633 mb/s
* memset n=524288 16 ps/byte 57,902 mb/s
* memset n=1048575 16 ps/byte 60,405 mb/s
* memset n=1048576 16 ps/byte 58,754 mb/s
* memset n=2097151 16 ps/byte 59,329 mb/s
* memset n=2097152 16 ps/byte 58,729 mb/s
* memset n=4194303 16 ps/byte 59,329 mb/s
* memset n=4194304 16 ps/byte 59,262 mb/s
* memset n=8388607 16 ps/byte 59,530 mb/s
* memset n=8388608 16 ps/byte 60,205 mb/s
*
* @param p is memory address
* @param c is masked with 255 and used as repeated byte
* @param n is byte length
* @return p
* @asyncsignalsafe
*/
void *memset(void *p, int c, size_t n) {
char *b;
uint32_t u;
uint64_t x;
b = p;
if (IsTiny()) {
asm("rep stosb" : "+D"(b), "+c"(n), "=m"(*(char(*)[n])b) : "0"(p), "a"(c));
return p;
}
if (n <= 16) {
if (n >= 8) {
x = 0x0101010101010101ul * (c & 255);
__builtin_memcpy(b, &x, 8);
__builtin_memcpy(b + n - 8, &x, 8);
} else if (n >= 4) {
u = 0x01010101u * (c & 255);
__builtin_memcpy(b, &u, 4);
__builtin_memcpy(b + n - 4, &u, 4);
} else if (n) {
do {
asm volatile("" ::: "memory");
b[--n] = c;
} while (n);
}
return b;
} else if (X86_HAVE(AVX)) {
return memset_avx(b, c, n);
} else {
return memset_sse(b, c, n);
}
}

185
libc/intrin/printf.c Normal file
View file

@ -0,0 +1,185 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/log/libfatal.internal.h"
#include "libc/nexgen32e/bsr.h"
#include "libc/nexgen32e/uart.internal.h"
#include "libc/nt/runtime.h"
#include "libc/runtime/runtime.h"
#include "libc/str/tpenc.h"
#include "libc/sysv/consts/nr.h"
/**
* Low-level printf.
*
* This will work without any cosmopolitan runtime support once the
* executable has been loaded into memory.
*/
privileged noasan noinstrument void __printf(const char *fmt, ...) {
long d, ax;
va_list va;
uint16_t dx;
const char *s;
uint32_t wrote;
unsigned long x;
unsigned char al;
const char16_t *S;
int n, t, w, plus;
char c, f, *p, *e, b[2048];
w = 0;
p = b;
e = p + sizeof(b);
va_start(va, fmt);
do {
switch ((c = *fmt++)) {
default:
if (p < e) {
*p++ = c;
}
break;
case '\0':
break;
case '%':
w = 0;
f = ' ';
plus = 0;
NeedMoar:
switch ((c = *fmt++)) {
case '\0':
break;
case '0':
f = c;
goto NeedMoar;
case '+':
plus = c;
goto NeedMoar;
case '*':
w = va_arg(va, int);
goto NeedMoar;
case 'd':
d = va_arg(va, long);
ApiAbuse:
if (p + 22 <= e) {
if (d || !plus) {
if (d > 0 && plus) {
*p++ = plus;
}
p = __intcpy(p, d);
}
}
break;
case 'p':
w = 12;
f = '0';
/* fallthrough */
case 'x':
x = va_arg(va, unsigned long);
if (x) {
n = __builtin_clzl(x) ^ (sizeof(long) * 8 - 1);
n >>= 2;
n += 1;
} else {
n = 1;
}
while (w-- > n) {
if (p < e) {
*p++ = f;
}
}
while (n--) {
if (p < e) {
*p++ = "0123456789abcdef"[(x >> (n << 2)) & 15];
}
}
break;
case 's':
s = va_arg(va, const char *);
if (!s) {
EmitNullString:
s = "NULL";
}
if ((uintptr_t)s < PAGESIZE) {
d = (intptr_t)s;
goto ApiAbuse;
}
for (n = 0; s[n];) ++n;
while (w-- > n) {
if (p < e) {
*p++ = f;
}
}
while ((t = *s++)) {
if (p < e) {
*p++ = t;
}
}
break;
case 'S':
S = va_arg(va, const char16_t *);
if (!S) goto EmitNullString;
while ((t = *S++)) {
if (p + 3 <= e && (t & 0xfc00) != 0xdc00) {
if (t <= 0x7ff) {
p[0] = 0300 | t >> 6;
p[1] = 0200 | x << 8 | t & 077;
p += 2;
} else {
if (t > 0xffff) t = 0xfffd;
p[0] = 0340 | t >> 12;
p[1] = 0200 | x << 8 | (t >> 6) & 077;
p[2] = 0200 | x << 8 | t & 077;
p += 3;
}
}
}
break;
default:
break;
}
break;
}
} while (c);
va_end(va);
if (p == e) {
e[-4] = '.';
e[-3] = '.';
e[-2] = '.';
e[-1] = '\n';
}
if (IsWindows()) {
WriteFile(GetStdHandle(kNtStdErrorHandle), b, p - b, &wrote, 0);
} else if (IsMetal()) {
for (e = p, p = b; p < e; ++p) {
for (;;) {
dx = 0x3F8 + UART_LSR;
asm("inb\t%1,%0" : "=a"(al) : "dN"(dx));
if (al & UART_TTYTXR) break;
asm("pause");
}
dx = 0x3F8;
asm volatile("outb\t%0,%1"
: /* no inputs */
: "a"(*p), "dN"(dx));
}
} else {
asm volatile("syscall"
: "=a"(ax)
: "0"(__NR_write), "D"(2), "S"(b), "d"(p - b)
: "rcx", "r11", "memory");
}
}

43
libc/intrin/strlen.c Normal file
View file

@ -0,0 +1,43 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/dce.h"
#include "libc/intrin/asan.internal.h"
#include "libc/str/str.h"
typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
/**
* Returns length of NUL-terminated string.
*
* @param s is non-null NUL-terminated string pointer
* @return number of bytes (excluding NUL)
* @asyncsignalsafe
*/
noasan size_t strlen(const char *s) {
size_t n;
xmm_t v, z = {0};
unsigned m, k = (uintptr_t)s & 15;
const xmm_t *p = (const xmm_t *)((uintptr_t)s & -16);
if (IsAsan()) __asan_verify(s, 1);
m = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128(*p, z)) >> k << k;
while (!m) m = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128(*++p, z));
n = (const char *)p + __builtin_ctzl(m) - s;
if (IsAsan()) __asan_verify(s, n);
return n;
}

24
libc/intrin/syscall.S Normal file
View file

@ -0,0 +1,24 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
__syscall__:
syscall
ret
.endfn __syscall__,globl,hidden

66
libc/intrin/tpenc.S Normal file
View file

@ -0,0 +1,66 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
.source __FILE__
// Encodes Thompson-Pike varint.
//
// @param edi is int to encode
// @return rax is word-encoded byte buffer
// @note invented on a napkin in a new jersey diner
tpenc: .leafprologue
.profilable
mov %edi,%edi
xor %eax,%eax
cmp $127,%edi
jbe 3f
bsr %edi,%ecx
mov kTpenc-7*(1+1)(,%rcx,2),%ecx
1: mov %edi,%edx
shr $6,%edi
and $0b00111111,%dl
or $0b10000000,%al
or %dl,%al
shl $8,%rax
dec %cl
jnz 1b
2: or %ch,%al
3: or %rdi,%rax
.leafepilogue
.endfn tpenc,globl
.rodata
.align 4
kTpenc: .rept 4 # MSB10 (0x7FF)
.byte 1,0b11000000 # len,mark
.endr
.rept 5 # MSB15 (0xFFFF)
.byte 2,0b11100000 # len,mark
.endr
.rept 5 # MSB20 (0x1FFFFF)
.byte 3,0b11110000 # len,mark
.endr
.rept 5 # MSB25 (0x3FFFFFF)
.byte 4,0b11111000 # len,mark
.endr
.rept 6 # MSB31 (0xffffffff)
.byte 5,0b11111100 # len,mark
.endr
.zero 2
.endobj kTpenc