cosmopolitan/libc/intrin/bzero.c
Justine Tunney 226aaf3547 Improve memory safety
This commit makes numerous refinements to cosmopolitan memory handling.

The default stack size has been reduced from 2mb to 128kb. A new macro
is now provided so you can easily reconfigure the stack size to be any
value you want. Work around the breaking change by adding to your main:

    STATIC_STACK_SIZE(0x00200000);  // 2mb stack

If you're not sure how much stack you need, then you can use:

    STATIC_YOINK("stack_usage_logging");

After which you can `sort -nr o/$MODE/stack.log`. Based on the unit test
suite, nothing in the Cosmopolitan repository (except for Python) needs
a stack size greater than 30kb. There are also new macros for detecting
the size and address of the stack at runtime, e.g. GetStackAddr(). We
also now support sigaltstack() so if you want to see nice looking crash
reports whenever a stack overflow happens, you can put this in main():

    ShowCrashReports();

Under `make MODE=dbg` and `make MODE=asan` the unit testing framework
will now automatically print backtraces of memory allocations when
things like memory leaks happen. Bugs are now fixed in ASAN global
variable overrun detection. The memtrack and asan runtimes also handle
edge cases now. The new tools helped to identify a few memory leaks,
which are fixed by this change.

This change should fix an issue reported in #288 with ARG_MAX limits.
Fixing this doubled the performance of MKDEPS.COM and AR.COM yet again.
2021-10-13 17:27:13 -07:00

159 lines
7.4 KiB
C

/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
╞══════════════════════════════════════════════════════════════════════════════╡
│ Copyright 2021 Justine Alexandra Roberts Tunney │
│ │
│ Permission to use, copy, modify, and/or distribute this software for │
│ any purpose with or without fee is hereby granted, provided that the │
│ above copyright notice and this permission notice appear in all copies. │
│ │
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
│ PERFORMANCE OF THIS SOFTWARE. │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "libc/assert.h"
#include "libc/dce.h"
#include "libc/intrin/asan.internal.h"
#include "libc/nexgen32e/nexgen32e.h"
#include "libc/nexgen32e/x86feature.h"
#include "libc/str/str.h"
typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(1)));
typedef long long xmm_a __attribute__((__vector_size__(16), __aligned__(16)));
noasan static noinline antiquity void bzero_sse(char *p, size_t n) {
xmm_t v = {0};
if (IsAsan()) __asan_verify(p, n);
if (n <= 32) {
*(xmm_t *)(p + n - 16) = v;
*(xmm_t *)p = v;
} else {
do {
n -= 32;
*(xmm_t *)(p + n) = v;
*(xmm_t *)(p + n + 16) = v;
} while (n > 32);
*(xmm_t *)(p + 16) = v;
*(xmm_t *)p = v;
}
}
noasan microarchitecture("avx") static void bzero_avx(char *p, size_t n) {
xmm_t v = {0};
if (IsAsan()) __asan_verify(p, n);
if (n <= 32) {
*(xmm_t *)(p + n - 16) = v;
*(xmm_t *)p = v;
} else if (n >= 1024 && X86_HAVE(ERMS)) {
asm("rep stosb" : "+D"(p), "+c"(n), "=m"(*(char(*)[n])p) : "a"(0));
} else {
if (n < kHalfCache3 || !kHalfCache3) {
do {
n -= 32;
*(xmm_t *)(p + n) = v;
*(xmm_t *)(p + n + 16) = v;
} while (n > 32);
} else {
while ((uintptr_t)(p + n) & 15) {
p[--n] = 0;
}
do {
n -= 32;
__builtin_ia32_movntdq((xmm_a *)(p + n), (xmm_a)v);
__builtin_ia32_movntdq((xmm_a *)(p + n + 16), (xmm_a)v);
} while (n > 32);
asm("sfence");
}
*(xmm_t *)(p + 16) = v;
*(xmm_t *)p = v;
}
}
/**
* Sets memory to zero.
*
* bzero n=0 661 picoseconds
* bzero n=1 661 ps/byte 1,476 mb/s
* bzero n=2 330 ps/byte 2,952 mb/s
* bzero n=3 220 ps/byte 4,428 mb/s
* bzero n=4 165 ps/byte 5,904 mb/s
* bzero n=7 94 ps/byte 10,333 mb/s
* bzero n=8 41 ps/byte 23,618 mb/s
* bzero n=15 44 ps/byte 22,142 mb/s
* bzero n=16 20 ps/byte 47,236 mb/s
* bzero n=31 21 ps/byte 45,760 mb/s
* bzero n=32 20 ps/byte 47,236 mb/s
* bzero n=63 10 ps/byte 92,997 mb/s
* bzero n=64 15 ps/byte 62,982 mb/s
* bzero n=127 15 ps/byte 62,490 mb/s
* bzero n=128 10 ps/byte 94,473 mb/s
* bzero n=255 14 ps/byte 68,439 mb/s
* bzero n=256 9 ps/byte 105 gb/s
* bzero n=511 15 ps/byte 62,859 mb/s
* bzero n=512 11 ps/byte 83,976 mb/s
* bzero n=1023 15 ps/byte 61,636 mb/s
* bzero n=1024 10 ps/byte 88,916 mb/s
* bzero n=2047 9 ps/byte 105 gb/s
* bzero n=2048 8 ps/byte 109 gb/s
* bzero n=4095 8 ps/byte 115 gb/s
* bzero n=4096 8 ps/byte 118 gb/s
* bzero n=8191 7 ps/byte 129 gb/s
* bzero n=8192 7 ps/byte 130 gb/s
* bzero n=16383 6 ps/byte 136 gb/s
* bzero n=16384 6 ps/byte 137 gb/s
* bzero n=32767 6 ps/byte 140 gb/s
* bzero n=32768 6 ps/byte 141 gb/s
* bzero n=65535 15 ps/byte 64,257 mb/s
* bzero n=65536 15 ps/byte 64,279 mb/s
* bzero n=131071 15 ps/byte 63,166 mb/s
* bzero n=131072 15 ps/byte 63,115 mb/s
* bzero n=262143 15 ps/byte 62,052 mb/s
* bzero n=262144 15 ps/byte 62,097 mb/s
* bzero n=524287 15 ps/byte 61,699 mb/s
* bzero n=524288 15 ps/byte 61,674 mb/s
* bzero n=1048575 16 ps/byte 60,179 mb/s
* bzero n=1048576 15 ps/byte 61,330 mb/s
* bzero n=2097151 15 ps/byte 61,071 mb/s
* bzero n=2097152 15 ps/byte 61,065 mb/s
* bzero n=4194303 16 ps/byte 60,942 mb/s
* bzero n=4194304 16 ps/byte 60,947 mb/s
* bzero n=8388607 16 ps/byte 60,872 mb/s
* bzero n=8388608 16 ps/byte 60,879 mb/s
*
* @param p is memory address
* @param n is byte length
* @return p
* @asyncsignalsafe
*/
void(bzero)(void *p, size_t n) {
char *b;
uint64_t x;
b = p;
asm("xorl\t%k0,%k0" : "=r"(x));
if (n <= 16) {
if (n >= 8) {
__builtin_memcpy(b, &x, 8);
__builtin_memcpy(b + n - 8, &x, 8);
} else if (n >= 4) {
__builtin_memcpy(b, &x, 4);
__builtin_memcpy(b + n - 4, &x, 4);
} else if (n) {
do {
asm volatile("" ::: "memory");
b[--n] = x;
} while (n);
}
} else if (IsTiny()) {
asm("rep stosb" : "+D"(b), "+c"(n), "=m"(*(char(*)[n])b) : "0"(p), "a"(0));
return;
} else if (X86_HAVE(AVX)) {
bzero_avx(b, n);
} else {
bzero_sse(b, n);
}
}