mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-05-23 05:42:29 +00:00
Make SSL handshakes much faster
This change boosts SSL handshake performance from 2,627 to ~10,000 per second which is the same level of performance as NGINX at establishing secure connections. That's impressive if we consider that redbean is a forking frontend application server. This was accomplished by: 1. Enabling either SSL session caching or SSL tickets. We choose to use tickets since they reduce network round trips too and that's a more important metric than wrk'ing localhost. 2. Fixing mbedtls_mpi_sub_abs() which is the most frequently called function. It's called about 12,000 times during an SSL handshake since it's the basis of most arithmetic operations like addition and for some strange reason it was designed to make two needless copies in addition to calling malloc and free. That's now fixed. 3. Improving TLS output buffering during the SSL handshake only, so that only a single is write and read system call is needed until blocking on the ping pong. redbean will now do a better job wiping sensitive memory from a child process as soon as it's not needed. The nice thing about fork is it's much faster than reverse proxying so the goal is to use the different address spaces along with setuid() to minimize the risk that a server key will be compromised in the event that application code is hacked.
This commit is contained in:
parent
8c4cce043c
commit
f3e28aa192
103 changed files with 1310 additions and 1085 deletions
|
@ -20,6 +20,7 @@
|
|||
#include "libc/calls/internal.h"
|
||||
#include "libc/calls/struct/siginfo.h"
|
||||
#include "libc/calls/ucontext.h"
|
||||
#include "libc/intrin/repstosb.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
|
@ -401,7 +402,14 @@ noasan static void xnuthreadstate2linux(
|
|||
mc->fs = xnuss->__fs;
|
||||
mc->eflags = xnuss->__rflags;
|
||||
uc->uc_flags = xnuss->__rflags;
|
||||
memcpy(&mc->r8, &xnuss->__r8, 8 * sizeof(int64_t));
|
||||
mc->r8 = xnuss->__r8;
|
||||
mc->r9 = xnuss->__r9;
|
||||
mc->r10 = xnuss->__r10;
|
||||
mc->r11 = xnuss->__r11;
|
||||
mc->r12 = xnuss->__r12;
|
||||
mc->r13 = xnuss->__r13;
|
||||
mc->r14 = xnuss->__r14;
|
||||
mc->r15 = xnuss->__r15;
|
||||
}
|
||||
|
||||
noasan static void linuxthreadstate2xnu(
|
||||
|
@ -420,7 +428,21 @@ noasan static void linuxthreadstate2xnu(
|
|||
xnuss->__fs = mc->fs;
|
||||
xnuss->__rflags = mc->eflags;
|
||||
xnuss->__rflags = uc->uc_flags;
|
||||
memcpy(&xnuss->__r8, &mc->r8, 8 * sizeof(int64_t));
|
||||
xnuss->__r8 = mc->r8;
|
||||
xnuss->__r9 = mc->r9;
|
||||
xnuss->__r10 = mc->r10;
|
||||
xnuss->__r11 = mc->r11;
|
||||
xnuss->__r12 = mc->r12;
|
||||
xnuss->__r13 = mc->r13;
|
||||
xnuss->__r14 = mc->r14;
|
||||
xnuss->__r15 = mc->r15;
|
||||
}
|
||||
|
||||
noasan static void CopyFpXmmRegs(void *d, const void *s) {
|
||||
size_t i;
|
||||
for (i = 0; i < (8 + 16) * 16; i += 16) {
|
||||
__builtin_memcpy((char *)d + i, (const char *)s + i, 16);
|
||||
}
|
||||
}
|
||||
|
||||
noasan static void xnussefpustate2linux(
|
||||
|
@ -433,8 +455,7 @@ noasan static void xnussefpustate2linux(
|
|||
fs->rdp = xnufs->__fpu_dp;
|
||||
fs->mxcsr = xnufs->__fpu_mxcsr;
|
||||
fs->mxcr_mask = xnufs->__fpu_mxcsrmask;
|
||||
/* copy st0-st7 as well as xmm0-xmm15 */
|
||||
memcpy(fs->st, &xnufs->__fpu_stmm0, (8 + 16) * sizeof(uint128_t));
|
||||
CopyFpXmmRegs(fs->st, &xnufs->__fpu_stmm0);
|
||||
}
|
||||
|
||||
noasan static void linuxssefpustate2xnu(
|
||||
|
@ -447,8 +468,7 @@ noasan static void linuxssefpustate2xnu(
|
|||
xnufs->__fpu_dp = fs->rdp;
|
||||
xnufs->__fpu_mxcsr = fs->mxcsr;
|
||||
xnufs->__fpu_mxcsrmask = fs->mxcr_mask;
|
||||
/* copy st0-st7 as well as xmm0-xmm15 */
|
||||
memcpy(&xnufs->__fpu_stmm0, fs->st, (8 + 16) * sizeof(uint128_t));
|
||||
CopyFpXmmRegs(&xnufs->__fpu_stmm0, fs->st);
|
||||
}
|
||||
|
||||
noasan void __sigenter_xnu(void *fn, int infostyle, int sig,
|
||||
|
@ -462,10 +482,9 @@ noasan void __sigenter_xnu(void *fn, int infostyle, int sig,
|
|||
} g;
|
||||
rva = __sighandrvas[sig & (NSIG - 1)];
|
||||
if (rva >= kSigactionMinRva) {
|
||||
memset(&g, 0, sizeof(g));
|
||||
repstosb(&g, 0, sizeof(g));
|
||||
if (xnuctx) {
|
||||
memcpy(&g.uc.uc_sigmask, &xnuctx->uc_sigmask,
|
||||
MIN(sizeof(g.uc.uc_sigmask), sizeof(xnuctx->uc_sigmask)));
|
||||
g.uc.uc_sigmask.__bits[0] = xnuctx->uc_sigmask;
|
||||
g.uc.uc_stack.ss_sp = xnuctx->uc_stack.ss_sp;
|
||||
g.uc.uc_stack.ss_flags = xnuctx->uc_stack.ss_flags;
|
||||
g.uc.uc_stack.ss_size = xnuctx->uc_stack.ss_size;
|
||||
|
|
|
@ -40,5 +40,5 @@ void(mpsadbw)(uint16_t c[8], const uint8_t b[16], const uint8_t a[16],
|
|||
r[i] += ABS(b[(control & 4) + i + j] - a[(control & 3) * 4 + j]);
|
||||
}
|
||||
}
|
||||
memcpy(c, r, 16);
|
||||
__builtin_memcpy(c, r, 16);
|
||||
}
|
||||
|
|
|
@ -30,5 +30,5 @@ void(pabsb)(uint8_t a[16], const int8_t b[16]) {
|
|||
for (i = 0; i < 16; ++i) {
|
||||
r[i] = ABS(b[i]);
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -30,5 +30,5 @@ void(pabsd)(uint32_t a[4], const int32_t b[4]) {
|
|||
for (i = 0; i < 4; ++i) {
|
||||
r[i] = b[i] >= 0 ? b[i] : -(uint32_t)b[i];
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -30,5 +30,5 @@ void(pabsw)(uint16_t a[8], const int16_t b[8]) {
|
|||
for (i = 0; i < 8; ++i) {
|
||||
r[i] = ABS(b[i]);
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -34,5 +34,5 @@ void(packsswb)(int8_t a[16], const int16_t b[8], const int16_t c[8]) {
|
|||
int8_t r[16];
|
||||
for (i = 0; i < 8; ++i) r[i + 0] = MIN(INT8_MAX, MAX(INT8_MIN, b[i]));
|
||||
for (i = 0; i < 8; ++i) r[i + 8] = MIN(INT8_MAX, MAX(INT8_MIN, c[i]));
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -30,5 +30,5 @@ void(packusdw)(uint16_t a[8], const int32_t b[4], const int32_t c[4]) {
|
|||
uint16_t r[8];
|
||||
for (i = 0; i < 4; ++i) r[i + 0] = MIN(UINT16_MAX, MAX(UINT16_MIN, b[i]));
|
||||
for (i = 0; i < 4; ++i) r[i + 4] = MIN(UINT16_MAX, MAX(UINT16_MIN, c[i]));
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -34,5 +34,5 @@ void(packuswb)(uint8_t a[16], const int16_t b[8], const int16_t c[8]) {
|
|||
uint8_t r[16];
|
||||
for (i = 0; i < 8; ++i) r[i + 0] = MIN(UINT8_MAX, MAX(UINT8_MIN, b[i]));
|
||||
for (i = 0; i < 8; ++i) r[i + 8] = MIN(UINT8_MAX, MAX(UINT8_MIN, c[i]));
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -31,5 +31,5 @@ void(paddb)(int8_t a[16], const int8_t b[16], const int8_t c[16]) {
|
|||
unsigned i;
|
||||
int8_t r[16];
|
||||
for (i = 0; i < 16; ++i) r[i] = b[i] + c[i];
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -33,5 +33,5 @@ void(paddd)(uint32_t a[4], const uint32_t b[4], const uint32_t c[4]) {
|
|||
for (i = 0; i < 4; ++i) {
|
||||
r[i] = b[i] + c[i];
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -31,5 +31,5 @@ void(paddq)(uint64_t a[2], const uint64_t b[2], const uint64_t c[2]) {
|
|||
unsigned i;
|
||||
uint64_t r[2];
|
||||
for (i = 0; i < 2; ++i) r[i] = b[i] + c[i];
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -35,5 +35,5 @@ void(paddsb)(int8_t a[16], const int8_t b[16], const int8_t c[16]) {
|
|||
for (i = 0; i < 16; ++i) {
|
||||
r[i] = MIN(INT8_MAX, MAX(INT8_MIN, b[i] + c[i]));
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -35,5 +35,5 @@ void(paddsw)(int16_t a[8], const int16_t b[8], const int16_t c[8]) {
|
|||
for (i = 0; i < 8; ++i) {
|
||||
r[i] = MIN(SHRT_MAX, MAX(SHRT_MIN, b[i] + c[i]));
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -35,5 +35,5 @@ void(paddusb)(uint8_t a[16], const uint8_t b[16], const uint8_t c[16]) {
|
|||
for (i = 0; i < 16; ++i) {
|
||||
r[i] = MIN(UINT8_MAX, b[i] + c[i]);
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -35,5 +35,5 @@ void(paddusw)(uint16_t a[8], const uint16_t b[8], const uint16_t c[8]) {
|
|||
for (i = 0; i < 8; ++i) {
|
||||
r[i] = MIN(UINT16_MAX, b[i] + c[i]);
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -35,5 +35,5 @@ void(paddw)(int16_t a[8], const int16_t b[8], const int16_t c[8]) {
|
|||
for (i = 0; i < 8; ++i) {
|
||||
r[i] = b[i] + c[i];
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -36,8 +36,8 @@
|
|||
*/
|
||||
void(palignr)(void *c, const void *b, const void *a, unsigned long i) {
|
||||
char t[48];
|
||||
memcpy(t, a, 16);
|
||||
memcpy(t + 16, b, 16);
|
||||
memset(t + 32, 0, 16);
|
||||
memcpy(c, t + MIN(i, 32), 16);
|
||||
__builtin_memcpy(t, a, 16);
|
||||
__builtin_memcpy(t + 16, b, 16);
|
||||
__builtin_memset(t + 32, 0, 16);
|
||||
__builtin_memcpy(c, t + MIN(i, 32), 16);
|
||||
}
|
||||
|
|
|
@ -33,5 +33,5 @@ void(pavgb)(uint8_t a[16], const uint8_t b[16], const uint8_t c[16]) {
|
|||
for (i = 0; i < 16; ++i) {
|
||||
r[i] = (b[i] + c[i] + 1) >> 1;
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -33,5 +33,5 @@ void(pavgw)(uint16_t a[8], const uint16_t b[8], const uint16_t c[8]) {
|
|||
for (i = 0; i < 8; ++i) {
|
||||
r[i] = (b[i] + c[i] + 1) >> 1;
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -31,5 +31,5 @@ void(pcmpeqb)(uint8_t a[16], const uint8_t b[16], const uint8_t c[16]) {
|
|||
unsigned i;
|
||||
uint8_t r[16];
|
||||
for (i = 0; i < 16; ++i) r[i] = -(b[i] == c[i]);
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -31,5 +31,5 @@ void(pcmpeqd)(int32_t a[4], const int32_t b[4], const int32_t c[4]) {
|
|||
unsigned i;
|
||||
int32_t r[4];
|
||||
for (i = 0; i < 4; ++i) r[i] = -(b[i] == c[i]);
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -31,5 +31,5 @@ void(pcmpeqw)(int16_t a[8], const int16_t b[8], const int16_t c[8]) {
|
|||
unsigned i;
|
||||
int16_t r[8];
|
||||
for (i = 0; i < 8; ++i) r[i] = -(b[i] == c[i]);
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -33,5 +33,5 @@ void(pcmpgtb)(int8_t a[16], const int8_t b[16], const int8_t c[16]) {
|
|||
unsigned i;
|
||||
int8_t r[16];
|
||||
for (i = 0; i < 16; ++i) r[i] = -(b[i] > c[i]);
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -31,5 +31,5 @@ void(pcmpgtd)(int32_t a[4], const int32_t b[4], const int32_t c[4]) {
|
|||
unsigned i;
|
||||
int32_t r[4];
|
||||
for (i = 0; i < 4; ++i) r[i] = -(b[i] > c[i]);
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -31,5 +31,5 @@ void(pcmpgtw)(int16_t a[8], const int16_t b[8], const int16_t c[8]) {
|
|||
unsigned i;
|
||||
int16_t r[8];
|
||||
for (i = 0; i < 8; ++i) r[i] = -(b[i] > c[i]);
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -34,5 +34,5 @@ void(phaddd)(uint32_t a[4], const uint32_t b[4], const uint32_t c[4]) {
|
|||
t[1] = b[2] + b[3];
|
||||
t[2] = c[0] + c[1];
|
||||
t[3] = c[2] + c[3];
|
||||
memcpy(a, t, sizeof(t));
|
||||
__builtin_memcpy(a, t, sizeof(t));
|
||||
}
|
||||
|
|
|
@ -38,5 +38,5 @@ void(phaddw)(int16_t a[8], const int16_t b[8], const int16_t c[8]) {
|
|||
t[5] = c[2] + c[3];
|
||||
t[6] = c[4] + c[5];
|
||||
t[7] = c[6] + c[7];
|
||||
memcpy(a, t, sizeof(t));
|
||||
__builtin_memcpy(a, t, sizeof(t));
|
||||
}
|
||||
|
|
|
@ -34,5 +34,5 @@ void(phsubd)(uint32_t a[4], const uint32_t b[4], const uint32_t c[4]) {
|
|||
t[1] = b[2] - b[3];
|
||||
t[2] = c[0] - c[1];
|
||||
t[3] = c[2] - c[3];
|
||||
memcpy(a, t, sizeof(t));
|
||||
__builtin_memcpy(a, t, sizeof(t));
|
||||
}
|
||||
|
|
|
@ -38,5 +38,5 @@ void(phsubw)(int16_t a[8], const int16_t b[8], const int16_t c[8]) {
|
|||
t[5] = c[2] - c[3];
|
||||
t[6] = c[4] - c[5];
|
||||
t[7] = c[6] - c[7];
|
||||
memcpy(a, t, sizeof(t));
|
||||
__builtin_memcpy(a, t, sizeof(t));
|
||||
}
|
||||
|
|
|
@ -34,5 +34,5 @@ void(pmaxsw)(int16_t a[8], const int16_t b[8], const int16_t c[8]) {
|
|||
for (i = 0; i < 8; ++i) {
|
||||
r[i] = MAX(b[i], c[i]);
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -34,5 +34,5 @@ void(pminsw)(int16_t a[8], const int16_t b[8], const int16_t c[8]) {
|
|||
for (i = 0; i < 8; ++i) {
|
||||
r[i] = MIN(b[i], c[i]);
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -31,5 +31,5 @@ void(pmulhrsw)(int16_t a[8], const int16_t b[8], const int16_t c[8]) {
|
|||
unsigned i;
|
||||
int16_t r[8];
|
||||
for (i = 0; i < 8; ++i) r[i] = (((b[i] * c[i]) >> 14) + 1) >> 1;
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -37,5 +37,5 @@ void(pmulhuw)(uint16_t a[8], const uint16_t b[8], const uint16_t c[8]) {
|
|||
x >>= 16;
|
||||
r[i] = x;
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -33,5 +33,5 @@ void(pmulhw)(int16_t a[8], const int16_t b[8], const int16_t c[8]) {
|
|||
for (i = 0; i < 8; ++i) {
|
||||
r[i] = ((b[i] * c[i]) & 0xffff0000) >> 16;
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -34,5 +34,5 @@ void(pmulld)(uint32_t a[4], const uint32_t b[4], const uint32_t c[4]) {
|
|||
for (i = 0; i < 4; ++i) {
|
||||
r[i] = b[i] * c[i];
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -33,5 +33,5 @@ void(pmullw)(int16_t a[8], const int16_t b[8], const int16_t c[8]) {
|
|||
for (i = 0; i < 8; ++i) {
|
||||
r[i] = b[i] * c[i];
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -33,5 +33,5 @@ void(pshufb)(uint8_t a[16], const uint8_t b[16], const uint8_t c[16]) {
|
|||
unsigned i;
|
||||
uint8_t r[16];
|
||||
for (i = 0; i < 16; ++i) r[i] = (c[i] & 0x80) ? 0 : b[c[i] & 0x0F];
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -30,5 +30,5 @@ void(pshufd)(int32_t b[4], const int32_t a[4], uint8_t m) {
|
|||
t[1] = a[(m & 0b00001100) >> 2];
|
||||
t[2] = a[(m & 0b00110000) >> 4];
|
||||
t[3] = a[(m & 0b11000000) >> 6];
|
||||
memcpy(b, t, 16);
|
||||
__builtin_memcpy(b, t, 16);
|
||||
}
|
||||
|
|
|
@ -32,6 +32,6 @@ void(pslld)(uint32_t a[4], const uint32_t b[4], unsigned char c) {
|
|||
a[i] = b[i] << c;
|
||||
}
|
||||
} else {
|
||||
memset(a, 0, 16);
|
||||
__builtin_memset(a, 0, 16);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,5 +32,5 @@ void(pslldq)(uint8_t b[16], const uint8_t a[16], unsigned long n) {
|
|||
if (n > 16) n = 16;
|
||||
for (i = 0; i < n; ++i) t[i] = 0;
|
||||
for (i = 0; i < 16 - n; ++i) t[n + i] = a[i];
|
||||
memcpy(b, t, 16);
|
||||
__builtin_memcpy(b, t, 16);
|
||||
}
|
||||
|
|
|
@ -30,6 +30,6 @@ void(pslldv)(uint32_t a[4], const uint32_t b[4], const uint64_t c[2]) {
|
|||
a[i] = b[i] << c[0];
|
||||
}
|
||||
} else {
|
||||
memset(a, 0, 16);
|
||||
__builtin_memset(a, 0, 16);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,6 +32,6 @@ void(psllq)(uint64_t a[2], const uint64_t b[2], unsigned char c) {
|
|||
a[i] = b[i] << c;
|
||||
}
|
||||
} else {
|
||||
memset(a, 0, 16);
|
||||
__builtin_memset(a, 0, 16);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,6 +30,6 @@ void(psllqv)(uint64_t a[2], const uint64_t b[2], const uint64_t c[2]) {
|
|||
a[i] = b[i] << c[0];
|
||||
}
|
||||
} else {
|
||||
memset(a, 0, 16);
|
||||
__builtin_memset(a, 0, 16);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,6 +32,6 @@ void(psllw)(uint16_t a[8], const uint16_t b[8], unsigned char c) {
|
|||
a[i] = b[i] << c;
|
||||
}
|
||||
} else {
|
||||
memset(a, 0, 16);
|
||||
__builtin_memset(a, 0, 16);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,6 +31,6 @@ void(psllwv)(uint16_t a[8], const uint16_t b[8], const uint64_t c[2]) {
|
|||
a[i] = b[i] << c[0];
|
||||
}
|
||||
} else {
|
||||
memset(a, 0, 16);
|
||||
__builtin_memset(a, 0, 16);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,6 +33,6 @@ void(psrld)(uint32_t a[4], const uint32_t b[4], unsigned char c) {
|
|||
a[i] = b[i] >> c;
|
||||
}
|
||||
} else {
|
||||
memset(a, 0, 16);
|
||||
__builtin_memset(a, 0, 16);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,6 +32,6 @@ void(psrldv)(uint32_t a[4], const uint32_t b[4], const uint64_t c[2]) {
|
|||
a[i] = b[i] >> c[0];
|
||||
}
|
||||
} else {
|
||||
memset(a, 0, 16);
|
||||
__builtin_memset(a, 0, 16);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,6 +33,6 @@ void(psrlq)(uint64_t a[2], const uint64_t b[2], unsigned char c) {
|
|||
a[i] = b[i] >> c;
|
||||
}
|
||||
} else {
|
||||
memset(a, 0, 16);
|
||||
__builtin_memset(a, 0, 16);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,6 +32,6 @@ void(psrlqv)(uint64_t a[2], const uint64_t b[2], const uint64_t c[2]) {
|
|||
a[i] = b[i] >> c[0];
|
||||
}
|
||||
} else {
|
||||
memset(a, 0, 16);
|
||||
__builtin_memset(a, 0, 16);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,6 +33,6 @@ void(psrlw)(uint16_t a[8], const uint16_t b[8], unsigned char c) {
|
|||
a[i] = b[i] >> c;
|
||||
}
|
||||
} else {
|
||||
memset(a, 0, 16);
|
||||
__builtin_memset(a, 0, 16);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,6 +32,6 @@ void(psrlwv)(uint16_t a[8], const uint16_t b[8], const uint64_t c[2]) {
|
|||
a[i] = b[i] >> c[0];
|
||||
}
|
||||
} else {
|
||||
memset(a, 0, 16);
|
||||
__builtin_memset(a, 0, 16);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,5 +33,5 @@ void(psubb)(uint8_t a[16], const uint8_t b[16], const uint8_t c[16]) {
|
|||
for (i = 0; i < 16; ++i) {
|
||||
r[i] = b[i] - c[i];
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -33,5 +33,5 @@ void(psubd)(uint32_t a[4], const uint32_t b[4], const uint32_t c[4]) {
|
|||
for (i = 0; i < 4; ++i) {
|
||||
r[i] = b[i] - c[i];
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -33,5 +33,5 @@ void(psubq)(uint64_t a[2], const uint64_t b[2], const uint64_t c[2]) {
|
|||
for (i = 0; i < 2; ++i) {
|
||||
r[i] = b[i] - c[i];
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -33,5 +33,5 @@ void(psubsb)(int8_t a[16], const int8_t b[16], const int8_t c[16]) {
|
|||
unsigned i;
|
||||
int8_t r[16];
|
||||
for (i = 0; i < 16; ++i) r[i] = MIN(INT8_MAX, MAX(INT8_MIN, b[i] - c[i]));
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -33,5 +33,5 @@ void(psubsw)(int16_t a[8], const int16_t b[8], const int16_t c[8]) {
|
|||
unsigned i;
|
||||
int16_t r[8];
|
||||
for (i = 0; i < 8; ++i) r[i] = MIN(INT16_MAX, MAX(INT16_MIN, b[i] - c[i]));
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -35,5 +35,5 @@ void(psubusb)(uint8_t a[16], const uint8_t b[16], const uint8_t c[16]) {
|
|||
for (i = 0; i < 16; ++i) {
|
||||
r[i] = MIN(UINT8_MAX, MAX(UINT8_MIN, b[i] - c[i]));
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -35,5 +35,5 @@ void(psubusw)(uint16_t a[8], const uint16_t b[8], const uint16_t c[8]) {
|
|||
for (i = 0; i < 8; ++i) {
|
||||
r[i] = MIN(UINT16_MAX, MAX(UINT16_MIN, b[i] - c[i]));
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -33,5 +33,5 @@ void(psubw)(int16_t a[8], const int16_t b[8], const int16_t c[8]) {
|
|||
for (i = 0; i < 8; ++i) {
|
||||
r[i] = b[i] - c[i];
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -28,5 +28,5 @@ void(shufpd)(double c[2], const double b[2], const double a[2], uint8_t m) {
|
|||
double t[2];
|
||||
t[0] = a[(m & 0b0000001) >> 0];
|
||||
t[1] = b[(m & 0b0000010) >> 1];
|
||||
memcpy(c, t, 16);
|
||||
__builtin_memcpy(c, t, 16);
|
||||
}
|
||||
|
|
|
@ -30,5 +30,5 @@ void(shufps)(float c[4], const float b[4], const float a[4], uint8_t m) {
|
|||
t[1] = b[(m & 0b00001100) >> 2];
|
||||
t[2] = a[(m & 0b00110000) >> 4];
|
||||
t[3] = a[(m & 0b11000000) >> 6];
|
||||
memcpy(c, t, 16);
|
||||
__builtin_memcpy(c, t, 16);
|
||||
}
|
||||
|
|
|
@ -135,10 +135,12 @@ static int PrintBacktrace(int fd, const struct StackFrame *bp) {
|
|||
|
||||
void ShowBacktrace(int fd, const struct StackFrame *bp) {
|
||||
static bool noreentry;
|
||||
++ftrace;
|
||||
if (!bp) bp = __builtin_frame_address(0);
|
||||
if (!noreentry) {
|
||||
noreentry = true;
|
||||
PrintBacktrace(fd, bp);
|
||||
noreentry = 0;
|
||||
noreentry = false;
|
||||
}
|
||||
--ftrace;
|
||||
}
|
||||
|
|
|
@ -42,6 +42,7 @@
|
|||
*/
|
||||
int PrintBacktraceUsingSymbols(int fd, const struct StackFrame *bp,
|
||||
struct SymbolTable *st) {
|
||||
int rc;
|
||||
char *p;
|
||||
size_t gi;
|
||||
intptr_t addr;
|
||||
|
@ -50,10 +51,11 @@ int PrintBacktraceUsingSymbols(int fd, const struct StackFrame *bp,
|
|||
char buf[256], ibuf[21];
|
||||
const struct Symbol *symbol;
|
||||
const struct StackFrame *frame;
|
||||
++ftrace;
|
||||
if (!bp) bp = __builtin_frame_address(0);
|
||||
garbage = weaken(__garbage);
|
||||
gi = garbage ? garbage->i : 0;
|
||||
for (frame = bp; frame; frame = frame->next) {
|
||||
for (rc = 0, frame = bp; frame; frame = frame->next) {
|
||||
addr = frame->addr;
|
||||
if (addr == weakaddr("__gc")) {
|
||||
do {
|
||||
|
@ -80,8 +82,10 @@ int PrintBacktraceUsingSymbols(int fd, const struct StackFrame *bp,
|
|||
}
|
||||
*p++ = '\n';
|
||||
if (write(fd, buf, p - buf) == -1) {
|
||||
return -1;
|
||||
rc = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
--ftrace;
|
||||
return rc;
|
||||
}
|
||||
|
|
|
@ -30,10 +30,12 @@ struct SymbolTable *GetSymbolTable(void) {
|
|||
const char *debugbin;
|
||||
if (!once) {
|
||||
once = true;
|
||||
++ftrace;
|
||||
if ((debugbin = FindDebugBinary()) &&
|
||||
(singleton = OpenSymbolTable(debugbin))) {
|
||||
__cxa_atexit(CloseSymbolTable, &singleton, NULL);
|
||||
}
|
||||
--ftrace;
|
||||
}
|
||||
return singleton;
|
||||
}
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#define kLogInfo 3
|
||||
#define kLogVerbose 4
|
||||
#define kLogDebug 5
|
||||
#define kLogNoise 6
|
||||
|
||||
/**
|
||||
* Log level for compile-time DCE.
|
||||
|
@ -60,6 +61,19 @@ extern unsigned __log_level; /* log level for runtime check */
|
|||
((!__builtin_constant_p(LEVEL) || (LEVEL) <= LOGGABLELEVEL) && \
|
||||
(LEVEL) <= __log_level)
|
||||
|
||||
#define FATALF(FMT, ...) \
|
||||
do { \
|
||||
ffatalf(kLogFatal, __FILE__, __LINE__, NULL, FMT, ##__VA_ARGS__); \
|
||||
unreachable; \
|
||||
} while (0)
|
||||
|
||||
#define WARNF(FMT, ...) \
|
||||
do { \
|
||||
if (LOGGABLE(kLogWarn)) { \
|
||||
flogf(kLogWarn, __FILE__, __LINE__, NULL, FMT, ##__VA_ARGS__); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define LOGF(FMT, ...) \
|
||||
do { \
|
||||
if (LOGGABLE(kLogInfo)) { \
|
||||
|
@ -67,6 +81,27 @@ extern unsigned __log_level; /* log level for runtime check */
|
|||
} \
|
||||
} while (0)
|
||||
|
||||
#define VERBOSEF(FMT, ...) \
|
||||
do { \
|
||||
if (LOGGABLE(kLogVerbose)) { \
|
||||
fverbosef(kLogVerbose, __FILE__, __LINE__, NULL, FMT, ##__VA_ARGS__); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define DEBUGF(FMT, ...) \
|
||||
do { \
|
||||
if (LOGGABLE(kLogDebug)) { \
|
||||
fdebugf(kLogDebug, __FILE__, __LINE__, NULL, FMT, ##__VA_ARGS__); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define NOISEF(FMT, ...) \
|
||||
do { \
|
||||
if (LOGGABLE(kLogNoise)) { \
|
||||
fnoisef(kLogNoise, __FILE__, __LINE__, NULL, FMT, ##__VA_ARGS__); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define VFLOG(FMT, VA) \
|
||||
do { \
|
||||
if (LOGGABLE(kLogInfo)) { \
|
||||
|
@ -88,13 +123,6 @@ extern unsigned __log_level; /* log level for runtime check */
|
|||
} \
|
||||
} while (0)
|
||||
|
||||
#define WARNF(FMT, ...) \
|
||||
do { \
|
||||
if (LOGGABLE(kLogWarn)) { \
|
||||
flogf(kLogWarn, __FILE__, __LINE__, NULL, FMT, ##__VA_ARGS__); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define VWARNF(FMT, VA) \
|
||||
do { \
|
||||
if (LOGGABLE(kLogWarn)) { \
|
||||
|
@ -116,12 +144,6 @@ extern unsigned __log_level; /* log level for runtime check */
|
|||
} \
|
||||
} while (0)
|
||||
|
||||
#define FATALF(FMT, ...) \
|
||||
do { \
|
||||
ffatalf(kLogFatal, __FILE__, __LINE__, NULL, FMT, ##__VA_ARGS__); \
|
||||
unreachable; \
|
||||
} while (0)
|
||||
|
||||
#define VFATALF(FMT, VA) \
|
||||
do { \
|
||||
vffatalf(kLogFatal, __FILE__, __LINE__, NULL, FMT, VA); \
|
||||
|
@ -140,20 +162,6 @@ extern unsigned __log_level; /* log level for runtime check */
|
|||
unreachable; \
|
||||
} while (0)
|
||||
|
||||
#define DEBUGF(FMT, ...) \
|
||||
do { \
|
||||
if (LOGGABLE(kLogDebug)) { \
|
||||
fdebugf(kLogDebug, __FILE__, __LINE__, NULL, FMT, ##__VA_ARGS__); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define VERBOSEF(FMT, ...) \
|
||||
do { \
|
||||
if (LOGGABLE(kLogVerbose)) { \
|
||||
fverbosef(kLogVerbose, __FILE__, __LINE__, NULL, FMT, ##__VA_ARGS__); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define VDEBUGF(FMT, VA) \
|
||||
do { \
|
||||
if (LOGGABLE(kLogDebug)) { \
|
||||
|
@ -182,6 +190,20 @@ extern unsigned __log_level; /* log level for runtime check */
|
|||
} \
|
||||
} while (0)
|
||||
|
||||
#define VNOISEF(FMT, VA) \
|
||||
do { \
|
||||
if (LOGGABLE(kLogNoise)) { \
|
||||
vfnoisef(kLogNoise, __FILE__, __LINE__, NULL, FMT, VA); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define FNOISEF(F, FMT, ...) \
|
||||
do { \
|
||||
if (LOGGABLE(kLogNoise)) { \
|
||||
fnoisef(kLogNoise, __FILE__, __LINE__, F, FMT, ##__VA_ARGS__); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § liblog » on error resume next ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
@ -219,6 +241,8 @@ void fverbosef(ARGS, ...) asm("flogf") ATTR relegated libcesque;
|
|||
void vfverbosef(ARGS, va_list) asm("vflogf") ATTRV relegated libcesque;
|
||||
void fdebugf(ARGS, ...) asm("flogf") ATTR relegated libcesque;
|
||||
void vfdebugf(ARGS, va_list) asm("vflogf") ATTRV relegated libcesque;
|
||||
void fnoisef(ARGS, ...) asm("flogf") ATTR relegated libcesque;
|
||||
void vfnoisef(ARGS, va_list) asm("vflogf") ATTRV relegated libcesque;
|
||||
void ffatalf(ARGS, ...) asm("flogf") ATTR relegated wontreturn libcesque;
|
||||
void vffatalf(ARGS, va_list) asm("vflogf") ATTRV relegated wontreturn libcesque;
|
||||
#undef ARGS
|
||||
|
|
|
@ -39,10 +39,6 @@
|
|||
|
||||
static struct timespec vflogf_ts;
|
||||
|
||||
static int vflogf_loglevel2char(unsigned level) {
|
||||
return "FEWIVDYZ"[level & 7];
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes corrective action if logging is on the fritz.
|
||||
*/
|
||||
|
@ -87,6 +83,7 @@ void(vflogf)(unsigned level, const char *file, int line, FILE *f,
|
|||
int64_t secs, nsec, dots;
|
||||
if (!f) f = __log_file;
|
||||
if (!f) return;
|
||||
++ftrace;
|
||||
t2 = nowl();
|
||||
secs = t2;
|
||||
nsec = (t2 - secs) * 1e9L;
|
||||
|
@ -104,8 +101,8 @@ void(vflogf)(unsigned level, const char *file, int line, FILE *f,
|
|||
prog = basename(program_invocation_name);
|
||||
bufmode = f->bufmode;
|
||||
if (bufmode == _IOLBF) f->bufmode = _IOFBF;
|
||||
if ((fprintf)(f, "%c%s%06ld:%s:%d:%.*s:%d] ", vflogf_loglevel2char(level),
|
||||
buf32p, rem1000000int64(div1000int64(dots)), file, line,
|
||||
if ((fprintf)(f, "%c%s%06ld:%s:%d:%.*s:%d] ", "FEWIVDNT"[level & 7], buf32p,
|
||||
rem1000000int64(div1000int64(dots)), file, line,
|
||||
strchrnul(prog, '.') - prog, prog, getpid()) <= 0) {
|
||||
vflogf_onfail(f);
|
||||
}
|
||||
|
@ -124,4 +121,5 @@ void(vflogf)(unsigned level, const char *file, int line, FILE *f,
|
|||
__die();
|
||||
unreachable;
|
||||
}
|
||||
--ftrace;
|
||||
}
|
||||
|
|
39
libc/nexgen32e/adc.S
Normal file
39
libc/nexgen32e/adc.S
Normal file
|
@ -0,0 +1,39 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// Computes C = A + B
|
||||
//
|
||||
// @param rdi is C
|
||||
// @param rsi is A
|
||||
// @param rdx is B
|
||||
// @param rcx is number of additions
|
||||
// @return al has carry
|
||||
adc: .leafprologue
|
||||
test %ecx,%ecx
|
||||
jz 1f
|
||||
xor %r9d,%r9d
|
||||
0: mov (%rsi,%r9,8),%rax
|
||||
adc (%rdx,%r9,8),%rax
|
||||
mov %rax,(%rdi,%r9,8)
|
||||
inc %r9d
|
||||
loop 0b
|
||||
1: setb %al
|
||||
.leafepilogue
|
||||
.endfn adc,globl
|
|
@ -22,6 +22,9 @@ int64_t rem10000int64(int64_t) libcesque pureconst;
|
|||
int64_t rem1000000int64(int64_t) libcesque pureconst;
|
||||
int64_t rem1000000000int64(int64_t) libcesque pureconst;
|
||||
|
||||
char sbb(uint64_t *, const uint64_t *, const uint64_t *, size_t);
|
||||
char adc(uint64_t *, const uint64_t *, const uint64_t *, size_t);
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_NEXGEN32E_NEXGEN32E_H_ */
|
||||
|
|
41
libc/nexgen32e/sub.S
Normal file
41
libc/nexgen32e/sub.S
Normal file
|
@ -0,0 +1,41 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// Computes C = A - B
|
||||
//
|
||||
// Aliasing such as sbb(A,A,B) or sbb(B,A,B) is OK.
|
||||
//
|
||||
// @param rdi is C
|
||||
// @param rsi is A
|
||||
// @param rdx is B
|
||||
// @param rcx is number of subtracts
|
||||
// @return al is carry
|
||||
sbb: .leafprologue
|
||||
test %ecx,%ecx
|
||||
jz 1f
|
||||
xor %r9d,%r9d
|
||||
0: mov (%rsi,%r9,8),%rax
|
||||
sbb (%rdx,%r9,8),%rax
|
||||
mov %rax,(%rdi,%r9,8)
|
||||
inc %r9d
|
||||
loop 0b
|
||||
1: setb %al
|
||||
.leafepilogue
|
||||
.endfn sbb,globl
|
|
@ -20,7 +20,10 @@
|
|||
.privileged
|
||||
|
||||
ftrace_hook:
|
||||
push %rbp
|
||||
cmp $0,ftrace(%rip)
|
||||
je 1f
|
||||
ret
|
||||
1: push %rbp
|
||||
mov %rsp,%rbp
|
||||
and $-16,%rsp
|
||||
sub $0x80,%rsp
|
||||
|
@ -42,7 +45,7 @@ ftrace_hook:
|
|||
push %r9
|
||||
push %r10
|
||||
push %r11
|
||||
call ftrace
|
||||
call ftracer
|
||||
pop %r11
|
||||
pop %r10
|
||||
pop %r9
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
|
@ -16,109 +16,6 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/alg/bisectcarleft.internal.h"
|
||||
#include "libc/bits/bits.h"
|
||||
#include "libc/bits/safemacros.internal.h"
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/calls/internal.h"
|
||||
#include "libc/calls/struct/sigset.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/fmt/itoa.h"
|
||||
#include "libc/intrin/repmovsb.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/nexgen32e/rdtsc.h"
|
||||
#include "libc/nexgen32e/rdtscp.h"
|
||||
#include "libc/nexgen32e/stackframe.h"
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
#include "libc/nt/files.h"
|
||||
#include "libc/nt/runtime.h"
|
||||
#include "libc/nt/thunk/msabi.h"
|
||||
#include "libc/runtime/internal.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/runtime/symbols.internal.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/sysv/consts/fileno.h"
|
||||
#include "libc/sysv/consts/nr.h"
|
||||
#include "libc/sysv/consts/prot.h"
|
||||
#include "libc/sysv/consts/sig.h"
|
||||
|
||||
/**
|
||||
* @fileoverview Plain-text function call logging.
|
||||
*
|
||||
* Able to log ~2 million function calls per second, which is mostly
|
||||
* bottlenecked by system call overhead. Log size is reasonable if piped
|
||||
* into gzip.
|
||||
*/
|
||||
|
||||
void ftrace_hook(void);
|
||||
|
||||
static int noreentry;
|
||||
static uint64_t laststamp;
|
||||
static char g_buf[512];
|
||||
static const char *g_lastsymbol;
|
||||
static struct SymbolTable *g_symbols;
|
||||
|
||||
static noasan int GetNestingLevel(struct StackFrame *frame) {
|
||||
int nesting = -2;
|
||||
while (frame) {
|
||||
++nesting;
|
||||
frame = frame->next;
|
||||
}
|
||||
return max(0, nesting);
|
||||
}
|
||||
|
||||
/**
|
||||
* Prints name of function being called.
|
||||
*
|
||||
* We insert CALL instructions that point to this function, in the
|
||||
* prologues of other functions. We assume those functions behave
|
||||
* according to the System Five NexGen32e ABI.
|
||||
*/
|
||||
privileged noasan void ftrace(void) {
|
||||
char *p;
|
||||
uint64_t stamp;
|
||||
const char *symbol;
|
||||
struct StackFrame *frame;
|
||||
size_t nesting, symbolsize;
|
||||
if (!cmpxchg(&noreentry, 0, 1)) return;
|
||||
if (g_symbols) {
|
||||
stamp = rdtsc();
|
||||
frame = __builtin_frame_address(0);
|
||||
frame = frame->next;
|
||||
symbol =
|
||||
&g_symbols->name_base[g_symbols
|
||||
->symbols[bisectcarleft(
|
||||
(const int32_t(*)[2])g_symbols->symbols,
|
||||
g_symbols->count,
|
||||
frame->addr - g_symbols->addr_base)]
|
||||
.name_rva];
|
||||
if (symbol != g_lastsymbol) {
|
||||
symbolsize = strlen(symbol);
|
||||
nesting = GetNestingLevel(frame);
|
||||
if (2 + nesting * 2 + symbolsize + 1 + 21 + 2 <= ARRAYLEN(g_buf)) {
|
||||
p = g_buf;
|
||||
*p++ = '+';
|
||||
*p++ = ' ';
|
||||
memset(p, ' ', nesting * 2);
|
||||
p += nesting * 2;
|
||||
p = mempcpy(p, symbol, symbolsize);
|
||||
*p++ = ' ';
|
||||
p += uint64toarray_radix10((stamp - laststamp) / 3.3, p);
|
||||
*p++ = '\r';
|
||||
*p++ = '\n';
|
||||
write(2, g_buf, p - g_buf);
|
||||
}
|
||||
}
|
||||
g_lastsymbol = symbol;
|
||||
laststamp = X86_HAVE(RDTSCP) ? rdtscp(0) : rdtsc();
|
||||
}
|
||||
noreentry = 0;
|
||||
}
|
||||
|
||||
textstartup void ftrace_install(void) {
|
||||
if ((g_symbols = OpenSymbolTable(FindDebugBinary()))) {
|
||||
__hook(ftrace_hook, g_symbols);
|
||||
} else {
|
||||
write(2, "error: --ftrace needs the concomitant .com.dbg binary\n", 54);
|
||||
}
|
||||
}
|
||||
int ftrace;
|
||||
|
|
124
libc/runtime/ftracer.c
Normal file
124
libc/runtime/ftracer.c
Normal file
|
@ -0,0 +1,124 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/alg/bisectcarleft.internal.h"
|
||||
#include "libc/bits/bits.h"
|
||||
#include "libc/bits/safemacros.internal.h"
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/calls/internal.h"
|
||||
#include "libc/calls/struct/sigset.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/fmt/itoa.h"
|
||||
#include "libc/intrin/repmovsb.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/nexgen32e/rdtsc.h"
|
||||
#include "libc/nexgen32e/rdtscp.h"
|
||||
#include "libc/nexgen32e/stackframe.h"
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
#include "libc/nt/files.h"
|
||||
#include "libc/nt/runtime.h"
|
||||
#include "libc/nt/thunk/msabi.h"
|
||||
#include "libc/runtime/internal.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/runtime/symbols.internal.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/sysv/consts/fileno.h"
|
||||
#include "libc/sysv/consts/nr.h"
|
||||
#include "libc/sysv/consts/prot.h"
|
||||
#include "libc/sysv/consts/sig.h"
|
||||
|
||||
/**
|
||||
* @fileoverview Plain-text function call logging.
|
||||
*
|
||||
* Able to log ~2 million function calls per second, which is mostly
|
||||
* bottlenecked by system call overhead. Log size is reasonable if piped
|
||||
* into gzip.
|
||||
*/
|
||||
|
||||
void ftrace_hook(void);
|
||||
|
||||
static int noreentry;
|
||||
static uint64_t laststamp;
|
||||
static char g_buf[512];
|
||||
static const char *g_lastsymbol;
|
||||
static struct SymbolTable *g_symbols;
|
||||
|
||||
static noasan int GetNestingLevel(struct StackFrame *frame) {
|
||||
int nesting = -2;
|
||||
while (frame) {
|
||||
++nesting;
|
||||
frame = frame->next;
|
||||
}
|
||||
return max(0, nesting);
|
||||
}
|
||||
|
||||
/**
|
||||
* Prints name of function being called.
|
||||
*
|
||||
* We insert CALL instructions that point to this function, in the
|
||||
* prologues of other functions. We assume those functions behave
|
||||
* according to the System Five NexGen32e ABI.
|
||||
*/
|
||||
privileged noasan void ftracer(void) {
|
||||
char *p;
|
||||
uint64_t stamp;
|
||||
const char *symbol;
|
||||
struct StackFrame *frame;
|
||||
size_t nesting, symbolsize;
|
||||
if (!cmpxchg(&noreentry, 0, 1)) return;
|
||||
if (g_symbols) {
|
||||
stamp = rdtsc();
|
||||
frame = __builtin_frame_address(0);
|
||||
frame = frame->next;
|
||||
symbol =
|
||||
&g_symbols->name_base[g_symbols
|
||||
->symbols[bisectcarleft(
|
||||
(const int32_t(*)[2])g_symbols->symbols,
|
||||
g_symbols->count,
|
||||
frame->addr - g_symbols->addr_base)]
|
||||
.name_rva];
|
||||
if (symbol != g_lastsymbol) {
|
||||
symbolsize = strlen(symbol);
|
||||
nesting = GetNestingLevel(frame);
|
||||
if (2 + nesting * 2 + symbolsize + 1 + 21 + 2 <= ARRAYLEN(g_buf)) {
|
||||
p = g_buf;
|
||||
*p++ = '+';
|
||||
*p++ = ' ';
|
||||
memset(p, ' ', nesting * 2);
|
||||
p += nesting * 2;
|
||||
p = mempcpy(p, symbol, symbolsize);
|
||||
*p++ = ' ';
|
||||
p += uint64toarray_radix10((stamp - laststamp) / 3.3, p);
|
||||
*p++ = '\r';
|
||||
*p++ = '\n';
|
||||
write(2, g_buf, p - g_buf);
|
||||
}
|
||||
g_lastsymbol = symbol;
|
||||
laststamp = X86_HAVE(RDTSCP) ? rdtscp(0) : rdtsc();
|
||||
}
|
||||
}
|
||||
noreentry = 0;
|
||||
}
|
||||
|
||||
textstartup void ftrace_install(void) {
|
||||
if ((g_symbols = OpenSymbolTable(FindDebugBinary()))) {
|
||||
__hook(ftrace_hook, g_symbols);
|
||||
} else {
|
||||
write(2, "error: --ftrace needs the concomitant .com.dbg binary\n", 54);
|
||||
}
|
||||
}
|
|
@ -24,12 +24,146 @@
|
|||
#include "libc/str/str.h"
|
||||
#include "libc/sysv/errfuns.h"
|
||||
|
||||
typedef long long xmm_t __attribute__((__vector_size__(16), __aligned__(1)));
|
||||
|
||||
static noasan void *MoveMemoryNoAsan(void *dst, const void *src, size_t n) {
|
||||
size_t i;
|
||||
xmm_t v, w;
|
||||
char *d, *r;
|
||||
const char *s;
|
||||
uint64_t a, b;
|
||||
d = dst;
|
||||
s = src;
|
||||
switch (n) {
|
||||
case 9 ... 15:
|
||||
__builtin_memcpy(&a, s, 8);
|
||||
__builtin_memcpy(&b, s + n - 8, 8);
|
||||
__builtin_memcpy(d, &a, 8);
|
||||
__builtin_memcpy(d + n - 8, &b, 8);
|
||||
return d;
|
||||
case 5 ... 7:
|
||||
__builtin_memcpy(&a, s, 4);
|
||||
__builtin_memcpy(&b, s + n - 4, 4);
|
||||
__builtin_memcpy(d, &a, 4);
|
||||
__builtin_memcpy(d + n - 4, &b, 4);
|
||||
return d;
|
||||
case 17 ... 32:
|
||||
__builtin_memcpy(&v, s, 16);
|
||||
__builtin_memcpy(&w, s + n - 16, 16);
|
||||
__builtin_memcpy(d, &v, 16);
|
||||
__builtin_memcpy(d + n - 16, &w, 16);
|
||||
return d;
|
||||
case 16:
|
||||
__builtin_memcpy(&v, s, 16);
|
||||
__builtin_memcpy(d, &v, 16);
|
||||
return d;
|
||||
case 0:
|
||||
return d;
|
||||
case 1:
|
||||
*d = *s;
|
||||
return d;
|
||||
case 8:
|
||||
__builtin_memcpy(&a, s, 8);
|
||||
__builtin_memcpy(d, &a, 8);
|
||||
return d;
|
||||
case 4:
|
||||
__builtin_memcpy(&a, s, 4);
|
||||
__builtin_memcpy(d, &a, 4);
|
||||
return d;
|
||||
case 2:
|
||||
__builtin_memcpy(&a, s, 2);
|
||||
__builtin_memcpy(d, &a, 2);
|
||||
return d;
|
||||
case 3:
|
||||
__builtin_memcpy(&a, s, 2);
|
||||
__builtin_memcpy(&b, s + 1, 2);
|
||||
__builtin_memcpy(d, &a, 2);
|
||||
__builtin_memcpy(d + 1, &b, 2);
|
||||
return d;
|
||||
default:
|
||||
r = d;
|
||||
if (d > s) {
|
||||
do {
|
||||
n -= 32;
|
||||
__builtin_memcpy(&v, s + n, 16);
|
||||
__builtin_memcpy(&w, s + n + 16, 16);
|
||||
__builtin_memcpy(d + n, &v, 16);
|
||||
__builtin_memcpy(d + n + 16, &w, 16);
|
||||
} while (n >= 32);
|
||||
} else {
|
||||
i = 0;
|
||||
do {
|
||||
__builtin_memcpy(&v, s + i, 16);
|
||||
__builtin_memcpy(&w, s + i + 16, 16);
|
||||
__builtin_memcpy(d + i, &v, 16);
|
||||
__builtin_memcpy(d + i + 16, &w, 16);
|
||||
} while ((i += 32) + 32 <= n);
|
||||
d += i;
|
||||
s += i;
|
||||
n -= i;
|
||||
}
|
||||
switch (n) {
|
||||
case 0:
|
||||
return r;
|
||||
case 17 ... 31:
|
||||
__builtin_memcpy(&v, s, 16);
|
||||
__builtin_memcpy(&w, s + n - 16, 16);
|
||||
__builtin_memcpy(d, &v, 16);
|
||||
__builtin_memcpy(d + n - 16, &w, 16);
|
||||
return r;
|
||||
case 9 ... 15:
|
||||
__builtin_memcpy(&a, s, 8);
|
||||
__builtin_memcpy(&b, s + n - 8, 8);
|
||||
__builtin_memcpy(d, &a, 8);
|
||||
__builtin_memcpy(d + n - 8, &b, 8);
|
||||
return r;
|
||||
case 5 ... 7:
|
||||
__builtin_memcpy(&a, s, 4);
|
||||
__builtin_memcpy(&b, s + n - 4, 4);
|
||||
__builtin_memcpy(d, &a, 4);
|
||||
__builtin_memcpy(d + n - 4, &b, 4);
|
||||
return r;
|
||||
case 16:
|
||||
__builtin_memcpy(&v, s, 16);
|
||||
__builtin_memcpy(d, &v, 16);
|
||||
return r;
|
||||
case 8:
|
||||
__builtin_memcpy(&a, s, 8);
|
||||
__builtin_memcpy(d, &a, 8);
|
||||
return r;
|
||||
case 4:
|
||||
__builtin_memcpy(&a, s, 4);
|
||||
__builtin_memcpy(d, &a, 4);
|
||||
return r;
|
||||
case 1:
|
||||
*d = *s;
|
||||
return r;
|
||||
case 2:
|
||||
__builtin_memcpy(&a, s, 2);
|
||||
__builtin_memcpy(d, &a, 2);
|
||||
return r;
|
||||
case 3:
|
||||
__builtin_memcpy(&a, s, 2);
|
||||
__builtin_memcpy(&b, s + 1, 2);
|
||||
__builtin_memcpy(d, &a, 2);
|
||||
__builtin_memcpy(d + 1, &b, 2);
|
||||
return r;
|
||||
default:
|
||||
unreachable;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef __FSANITIZE_ADDRESS__
|
||||
#define MoveMemoryNoAsan memmove
|
||||
#endif
|
||||
|
||||
static noasan void RemoveMemoryIntervals(struct MemoryIntervals *mm, int i,
|
||||
int n) {
|
||||
assert(i >= 0);
|
||||
assert(i + n <= mm->i);
|
||||
memcpy(mm->p + i, mm->p + i + n,
|
||||
(intptr_t)(mm->p + mm->i) - (intptr_t)(mm->p + i + n));
|
||||
MoveMemoryNoAsan(mm->p + i, mm->p + i + n,
|
||||
(intptr_t)(mm->p + mm->i) - (intptr_t)(mm->p + i + n));
|
||||
mm->i -= n;
|
||||
}
|
||||
|
||||
|
@ -37,8 +171,8 @@ static noasan void CreateMemoryInterval(struct MemoryIntervals *mm, int i) {
|
|||
assert(i >= 0);
|
||||
assert(i <= mm->i);
|
||||
assert(mm->i < ARRAYLEN(mm->p));
|
||||
memmove(mm->p + i + 1, mm->p + i,
|
||||
(intptr_t)(mm->p + mm->i) - (intptr_t)(mm->p + i));
|
||||
MoveMemoryNoAsan(mm->p + i + 1, mm->p + i,
|
||||
(intptr_t)(mm->p + mm->i) - (intptr_t)(mm->p + i));
|
||||
++mm->i;
|
||||
}
|
||||
|
||||
|
|
|
@ -14,6 +14,7 @@ extern char **environ; /* CRT */
|
|||
extern unsigned long *__auxv; /* CRT */
|
||||
extern char *program_invocation_name; /* RII */
|
||||
extern char *program_invocation_short_name; /* RII */
|
||||
extern int ftrace; /* CRT */
|
||||
extern uint64_t g_syscount; /* RII */
|
||||
extern const uint64_t kStartTsc; /* RII */
|
||||
extern const char kTmpPath[]; /* RII */
|
||||
|
|
24
libc/str/mempcpy-pure.c
Normal file
24
libc/str/mempcpy-pure.c
Normal file
|
@ -0,0 +1,24 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/str/str.h"
|
||||
|
||||
void *mempcpy_pure(void *dst, const void *src, size_t n) {
|
||||
memmove_pure(dst, src, n);
|
||||
return (char *)dst + n;
|
||||
}
|
|
@ -23,11 +23,11 @@
|
|||
static inline noasan size_t stpcpy_sse2(char *d, const char *s, size_t i) {
|
||||
uint8_t v1[16], v2[16], vz[16];
|
||||
for (;;) {
|
||||
memset(vz, 0, 16);
|
||||
memcpy(v1, s + i, 16);
|
||||
__builtin_memset(vz, 0, 16);
|
||||
__builtin_memcpy(v1, s + i, 16);
|
||||
pcmpeqb(v2, v1, vz);
|
||||
if (!pmovmskb(v2)) {
|
||||
memcpy(d + i, v1, 16);
|
||||
__builtin_memcpy(d + i, v1, 16);
|
||||
i += 16;
|
||||
} else {
|
||||
break;
|
||||
|
|
|
@ -199,6 +199,12 @@ wchar_t *wchomp(wchar_t *);
|
|||
|
||||
bool escapedos(char16_t *, unsigned, const char16_t *, unsigned);
|
||||
|
||||
void *memset_pure(void *, int, size_t) memcpyesque;
|
||||
void *memmove_pure(void *, const void *, size_t) memcpyesque;
|
||||
void *mempcpy_pure(void *, const void *, size_t) memcpyesque;
|
||||
size_t strlen_pure(const char *) strlenesque;
|
||||
size_t strcspn_pure(const char *, const char *) strlenesque;
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § strings » multibyte ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
@ -374,41 +380,20 @@ char *strsignal(int) returnsnonnull libcesque;
|
|||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § strings » address sanitizer ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
void *memset_pure(void *, int, size_t) memcpyesque;
|
||||
void *memmove_pure(void *, const void *, size_t) memcpyesque;
|
||||
size_t strlen_pure(const char *) strlenesque;
|
||||
size_t strcspn_pure(const char *, const char *) strlenesque;
|
||||
#if defined(__FSANITIZE_ADDRESS__)
|
||||
|
||||
#define strcspn(STR, REJECT) strcspn_pure(STR, REJECT)
|
||||
|
||||
#undef strlen
|
||||
#define strlen(STR) \
|
||||
(__builtin_constant_p(STR) ? __builtin_strlen(STR) : strlen_pure(STR))
|
||||
|
||||
#undef memset
|
||||
#define memset(DST, CHAR, SIZE) \
|
||||
(__memcpy_isgoodsize(SIZE) ? __builtin_memset(DST, CHAR, SIZE) \
|
||||
: memset_pure(DST, CHAR, SIZE))
|
||||
|
||||
#undef memmove
|
||||
#define memmove(DST, SRC, SIZE) \
|
||||
(__memcpy_isgoodsize(SIZE) ? __builtin_memmove(DST, SRC, SIZE) \
|
||||
: memmove_pure(DST, SRC, SIZE))
|
||||
|
||||
#undef memcpy
|
||||
#define memcpy(DST, SRC, SIZE) \
|
||||
(__memcpy_isgoodsize(SIZE) ? __builtin_memcpy(DST, SRC, SIZE) \
|
||||
: memmove_pure(DST, SRC, SIZE))
|
||||
|
||||
#undef memmove
|
||||
#undef mempcpy
|
||||
#define mempcpy(DST, SRC, SIZE) \
|
||||
(__memcpy_isgoodsize(SIZE) ? __builtin_mempcpy(DST, SRC, SIZE) : ({ \
|
||||
void *DsT = (DST); \
|
||||
size_t SiZe = (SIZE); \
|
||||
memmove_pure(DsT, SRC, SiZe); \
|
||||
(void *)((char *)DsT + SiZe); \
|
||||
}))
|
||||
#undef memset
|
||||
#undef strlen
|
||||
|
||||
#define memcpy memmove_pure
|
||||
#define memmove memmove_pure
|
||||
#define mempcpy mempcpy_pure
|
||||
#define memset memset_pure
|
||||
#define strcspn strcspn_pure
|
||||
#define strlen strlen_pure
|
||||
|
||||
#endif /* __FSANITIZE_ADDRESS__ */
|
||||
#endif /* __GNUC__ && !__STRICT_ANSI__ */
|
||||
|
|
|
@ -23,11 +23,11 @@
|
|||
static noasan size_t strcpy_sse2(char *d, const char *s, size_t i) {
|
||||
uint8_t v1[16], v2[16], vz[16];
|
||||
for (;;) {
|
||||
memset(vz, 0, 16);
|
||||
memcpy(v1, s + i, 16);
|
||||
__builtin_memset(vz, 0, 16);
|
||||
__builtin_memcpy(v1, s + i, 16);
|
||||
pcmpeqb(v2, v1, vz);
|
||||
if (!pmovmskb(v2)) {
|
||||
memcpy(d + i, v1, 16);
|
||||
__builtin_memcpy(d + i, v1, 16);
|
||||
i += 16;
|
||||
} else {
|
||||
break;
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
#include "libc/bits/bits.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
static noasan size_t strlen_pure_x64(const char *s, size_t i) {
|
||||
static inline noasan size_t strlen_pure_x64(const char *s, size_t i) {
|
||||
uint64_t w;
|
||||
for (;; i += 8) {
|
||||
w = READ64LE(s + i);
|
||||
|
|
|
@ -31,15 +31,15 @@ static const int16_t kDel16[8] = {127, 127, 127, 127, 127, 127, 127, 127};
|
|||
static noasan axdx_t tprecode16to8_sse2(char *dst, size_t dstsize,
|
||||
const char16_t *src, axdx_t r) {
|
||||
int16_t v1[8], v2[8], v3[8], vz[8];
|
||||
memset(vz, 0, 16);
|
||||
__builtin_memset(vz, 0, 16);
|
||||
while (r.ax + 8 < dstsize) {
|
||||
memcpy(v1, src + r.dx, 16);
|
||||
__builtin_memcpy(v1, src + r.dx, 16);
|
||||
pcmpgtw(v2, v1, vz);
|
||||
pcmpgtw(v3, v1, kDel16);
|
||||
pandn((void *)v2, (void *)v3, (void *)v2);
|
||||
if (pmovmskb((void *)v2) != 0xFFFF) break;
|
||||
packsswb((void *)v1, v1, v1);
|
||||
memcpy(dst + r.ax, v1, 8);
|
||||
__builtin_memcpy(dst + r.ax, v1, 8);
|
||||
r.ax += 8;
|
||||
r.dx += 8;
|
||||
}
|
||||
|
|
|
@ -28,15 +28,15 @@
|
|||
static inline noasan axdx_t tprecode8to16_sse2(char16_t *dst, size_t dstsize,
|
||||
const char *src, axdx_t r) {
|
||||
uint8_t v1[16], v2[16], vz[16];
|
||||
memset(vz, 0, 16);
|
||||
__builtin_memset(vz, 0, 16);
|
||||
while (r.ax + 16 < dstsize) {
|
||||
memcpy(v1, src + r.dx, 16);
|
||||
__builtin_memcpy(v1, src + r.dx, 16);
|
||||
pcmpgtb((int8_t *)v2, (int8_t *)v1, (int8_t *)vz);
|
||||
if (pmovmskb(v2) != 0xFFFF) break;
|
||||
punpcklbw(v2, v1, vz);
|
||||
punpckhbw(v1, v1, vz);
|
||||
memcpy(dst + r.ax + 0, v2, 16);
|
||||
memcpy(dst + r.ax + 8, v1, 16);
|
||||
__builtin_memcpy(dst + r.ax + 0, v2, 16);
|
||||
__builtin_memcpy(dst + r.ax + 8, v1, 16);
|
||||
r.ax += 16;
|
||||
r.dx += 16;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue