mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-07-25 20:10:29 +00:00
Make numerous improvements
- Python static hello world now 1.8mb - Python static fully loaded now 10mb - Python HTTPS client now uses MbedTLS - Python REPL now completes import stmts - Increase stack size for Python for now - Begin synthesizing posixpath and ntpath - Restore Python \N{UNICODE NAME} support - Restore Python NFKD symbol normalization - Add optimized code path for Intel SHA-NI - Get more Python unit tests passing faster - Get Python help() pagination working on NT - Python hashlib now supports MbedTLS PBKDF2 - Make memcpy/memmove/memcmp/bcmp/etc. faster - Add Mersenne Twister and Vigna to LIBC_RAND - Provide privileged __printf() for error code - Fix zipos opendir() so that it reports ENOTDIR - Add basic chmod() implementation for Windows NT - Add Cosmo's best functions to Python cosmo module - Pin function trace indent depth to that of caller - Show memory diagram on invalid access in MODE=dbg - Differentiate stack overflow on crash in MODE=dbg - Add stb_truetype and tools for analyzing font files - Upgrade to UNICODE 13 and reduce its binary footprint - COMPILE.COM now logs resource usage of build commands - Start implementing basic poll() support on bare metal - Set getauxval(AT_EXECFN) to GetModuleFileName() on NT - Add descriptions to strerror() in non-TINY build modes - Add COUNTBRANCH() macro to help with micro-optimizations - Make error / backtrace / asan / memory code more unbreakable - Add fast perfect C implementation of μ-Law and a-Law audio codecs - Make strtol() functions consistent with other libc implementations - Improve Linenoise implementation (see also github.com/jart/bestline) - COMPILE.COM now suppresses stdout/stderr of successful build commands
This commit is contained in:
parent
fa7b4f5bd1
commit
39bf41f4eb
806 changed files with 77494 additions and 63859 deletions
115
libc/str/bcmp.c
115
libc/str/bcmp.c
|
@ -16,17 +16,122 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/likely.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
typedef uint64_t xmm_t __attribute__((__vector_size__(16), __aligned__(1)));
|
||||
|
||||
static noinline antiquity int memcmp_sse(const char *p, const char *q,
|
||||
size_t n) {
|
||||
xmm_t a;
|
||||
while (n > 32) {
|
||||
a = *(const xmm_t *)p ^ *(const xmm_t *)q;
|
||||
if (a[0] | a[1]) return 1;
|
||||
p += 16;
|
||||
q += 16;
|
||||
n -= 16;
|
||||
}
|
||||
a = *(const xmm_t *)p ^ *(const xmm_t *)q |
|
||||
*(const xmm_t *)(p + n - 16) ^ *(const xmm_t *)(q + n - 16);
|
||||
return !!(a[0] | a[1]);
|
||||
}
|
||||
|
||||
microarchitecture("avx") static int memcmp_avx(const char *p, const char *q,
|
||||
size_t n) {
|
||||
xmm_t a, b, c, d;
|
||||
if (n > 32) {
|
||||
if (n >= 16 + 64) {
|
||||
do {
|
||||
a = ((const xmm_t *)p)[0] ^ ((const xmm_t *)q)[0];
|
||||
b = ((const xmm_t *)p)[1] ^ ((const xmm_t *)q)[1];
|
||||
c = ((const xmm_t *)p)[2] ^ ((const xmm_t *)q)[2];
|
||||
d = ((const xmm_t *)p)[3] ^ ((const xmm_t *)q)[3];
|
||||
a = a | b | c | d;
|
||||
if (a[0] | a[1]) return 1;
|
||||
p += 64;
|
||||
q += 64;
|
||||
n -= 64;
|
||||
} while (n >= 16 + 64);
|
||||
}
|
||||
while (n > 16 + 16) {
|
||||
a = *(const xmm_t *)p ^ *(const xmm_t *)q;
|
||||
if (a[0] | a[1]) return 1;
|
||||
p += 16;
|
||||
q += 16;
|
||||
n -= 16;
|
||||
}
|
||||
}
|
||||
a = *(const xmm_t *)p ^ *(const xmm_t *)q |
|
||||
*(const xmm_t *)(p + n - 16) ^ *(const xmm_t *)(q + n - 16);
|
||||
return !!(a[0] | a[1]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares memory.
|
||||
* Tests inequality of first 𝑛 bytes of 𝑝 and 𝑞.
|
||||
*
|
||||
* This API was thought to be nearly extinct until recent versions
|
||||
* of Clang (c. 2019) started generating synthetic calls to it.
|
||||
* bcmp n=0 992 picoseconds
|
||||
* bcmp n=1 992 ps/byte 984 mb/s
|
||||
* bcmp n=2 661 ps/byte 1,476 mb/s
|
||||
* bcmp n=3 441 ps/byte 2,214 mb/s
|
||||
* bcmp n=4 330 ps/byte 2,952 mb/s
|
||||
* bcmp n=5 264 ps/byte 3,690 mb/s
|
||||
* bcmp n=6 165 ps/byte 5,905 mb/s
|
||||
* bcmp n=7 189 ps/byte 5,166 mb/s
|
||||
* bcmp n=8 124 ps/byte 7,873 mb/s
|
||||
* bcmp n=9 183 ps/byte 5,314 mb/s
|
||||
* bcmp n=15 110 ps/byte 8,857 mb/s
|
||||
* bcmp n=16 62 ps/byte 15,746 mb/s
|
||||
* bcmp n=17 175 ps/byte 5,577 mb/s
|
||||
* bcmp n=31 96 ps/byte 10,169 mb/s
|
||||
* bcmp n=32 93 ps/byte 10,497 mb/s
|
||||
* bcmp n=33 80 ps/byte 12,179 mb/s
|
||||
* bcmp n=80 37 ps/byte 26,244 mb/s
|
||||
* bcmp n=128 36 ps/byte 26,994 mb/s
|
||||
* bcmp n=256 27 ps/byte 35,992 mb/s
|
||||
* bcmp n=16384 19 ps/byte 49,411 mb/s
|
||||
* bcmp n=32768 27 ps/byte 34,914 mb/s
|
||||
* bcmp n=131072 30 ps/byte 32,303 mb/s
|
||||
*
|
||||
* @return 0 if a and b have equal contents, otherwise non-zero
|
||||
* @return 0 if a and b have equal contents, otherwise nonzero
|
||||
* @see timingsafe_bcmp()
|
||||
* @asyncsignalsafe
|
||||
*/
|
||||
int bcmp(const void *a, const void *b, size_t n) {
|
||||
return memcmp(a, b, n);
|
||||
int c;
|
||||
unsigned u;
|
||||
uint32_t i, j;
|
||||
uint64_t x, y;
|
||||
const char *p, *q;
|
||||
if ((p = a) == (q = b)) return 0;
|
||||
if (!IsTiny()) {
|
||||
if (n <= 16) {
|
||||
if (n >= 8) {
|
||||
__builtin_memcpy(&x, p, 8);
|
||||
__builtin_memcpy(&y, q, 8);
|
||||
if (x ^ y) return 1;
|
||||
__builtin_memcpy(&x, p + n - 8, 8);
|
||||
__builtin_memcpy(&y, q + n - 8, 8);
|
||||
return !!(x ^ y);
|
||||
} else if (n >= 4) {
|
||||
__builtin_memcpy(&i, p, 4);
|
||||
__builtin_memcpy(&j, q, 4);
|
||||
if (i ^ j) return 1;
|
||||
__builtin_memcpy(&i, p + n - 4, 4);
|
||||
__builtin_memcpy(&j, q + n - 4, 4);
|
||||
return !!(i ^ j);
|
||||
}
|
||||
} else if (LIKELY(X86_HAVE(AVX))) {
|
||||
return memcmp_avx(p, q, n);
|
||||
} else {
|
||||
return memcmp_sse(p, q, n);
|
||||
}
|
||||
}
|
||||
while (n--) {
|
||||
if ((c = p[n] ^ q[n])) {
|
||||
return c;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -153,6 +153,20 @@ int BLAKE2B256_Final(struct Blake2b *b2b,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes blake2b 256bit message digest.
|
||||
*
|
||||
* blake2b256 n=0 191 nanoseconds
|
||||
* blake2b256 n=8 23 ns/byte 40,719 kb/s
|
||||
* blake2b256 n=31 6 ns/byte 153 mb/s
|
||||
* blake2b256 n=32 6 ns/byte 158 mb/s
|
||||
* blake2b256 n=63 3 ns/byte 312 mb/s
|
||||
* blake2b256 n=64 3 ns/byte 317 mb/s
|
||||
* blake2b256 n=128 1 ns/byte 640 mb/s
|
||||
* blake2b256 n=256 1 ns/byte 662 mb/s
|
||||
* blake2b256 n=22851 1 ns/byte 683 mb/s
|
||||
*
|
||||
*/
|
||||
int BLAKE2B256(const void *data, size_t len,
|
||||
uint8_t out[BLAKE2B256_DIGEST_LENGTH]) {
|
||||
struct Blake2b ctx;
|
||||
|
|
|
@ -1,71 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/str/str.h"
|
||||
|
||||
typedef long long xmm_t __attribute__((__vector_size__(16), __aligned__(1)));
|
||||
|
||||
/**
|
||||
* Sets memory to zero.
|
||||
*/
|
||||
void bzero(void *p, size_t n) {
|
||||
char *b;
|
||||
uint64_t x;
|
||||
x = 0;
|
||||
b = p;
|
||||
switch (n) {
|
||||
case 0:
|
||||
break;
|
||||
case 1:
|
||||
__builtin_memcpy(b, &x, 1);
|
||||
break;
|
||||
case 2:
|
||||
__builtin_memcpy(b, &x, 2);
|
||||
break;
|
||||
case 3:
|
||||
__builtin_memcpy(b, &x, 2);
|
||||
__builtin_memcpy(b + 1, &x, 2);
|
||||
break;
|
||||
case 4:
|
||||
__builtin_memcpy(b, &x, 4);
|
||||
break;
|
||||
case 5 ... 7:
|
||||
__builtin_memcpy(b, &x, 4);
|
||||
__builtin_memcpy(b + n - 4, &x, 4);
|
||||
break;
|
||||
case 8:
|
||||
__builtin_memcpy(b, &x, 8);
|
||||
break;
|
||||
case 9 ... 15:
|
||||
__builtin_memcpy(b, &x, 8);
|
||||
__builtin_memcpy(b + n - 8, &x, 8);
|
||||
break;
|
||||
case 16:
|
||||
*(xmm_t *)b = (xmm_t){0};
|
||||
break;
|
||||
default:
|
||||
while (n > 32) {
|
||||
*(xmm_t *)(b + n - 16) = (xmm_t){0};
|
||||
*(xmm_t *)(b + n - 32) = (xmm_t){0};
|
||||
n -= 32;
|
||||
}
|
||||
if (n > 16) *(xmm_t *)(b + n - 16) = (xmm_t){0};
|
||||
*(xmm_t *)b = (xmm_t){0};
|
||||
break;
|
||||
}
|
||||
}
|
|
@ -23,7 +23,7 @@
|
|||
// @param edi is init crc32 value
|
||||
// @param rsi is nullable pointer to data
|
||||
// @param edx is int size per zlib interface
|
||||
crc32: movslq %edx,%rdx
|
||||
crc32: mov %edx,%edx
|
||||
jmp crc32_z
|
||||
.endfn crc32,globl
|
||||
.source __FILE__
|
||||
|
|
|
@ -16,11 +16,21 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/assert.h"
|
||||
#include "libc/bits/safemacros.internal.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/intrin/asan.internal.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/nexgen32e/crc32.h"
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
static uint32_t kCrc32Tab[256];
|
||||
static inline noasan uint64_t WildRead64(const signed char *p) {
|
||||
return (uint64_t)(255 & p[7]) << 070 | (uint64_t)(255 & p[6]) << 060 |
|
||||
(uint64_t)(255 & p[5]) << 050 | (uint64_t)(255 & p[4]) << 040 |
|
||||
(uint64_t)(255 & p[3]) << 030 | (uint64_t)(255 & p[2]) << 020 |
|
||||
(uint64_t)(255 & p[1]) << 010 | (uint64_t)(255 & p[0]) << 000;
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes Phil Katz CRC-32 used by zip/zlib/gzip/etc.
|
||||
|
@ -34,28 +44,29 @@ static uint32_t kCrc32Tab[256];
|
|||
* @param h is initial value
|
||||
*/
|
||||
uint32_t crc32_z(uint32_t h, const void *data, size_t size) {
|
||||
const unsigned char *p, *pe;
|
||||
size_t n;
|
||||
static bool once;
|
||||
size_t skip;
|
||||
const unsigned char *p, *e;
|
||||
static uint32_t kCrc32Tab[256];
|
||||
if (!once) {
|
||||
crc32init(kCrc32Tab, 0xedb88320);
|
||||
once = true;
|
||||
once = 0;
|
||||
}
|
||||
if (data) {
|
||||
h ^= 0xffffffff;
|
||||
if (size >= 64 && X86_HAVE(PCLMUL)) {
|
||||
h = crc32_pclmul(h, data, size); /* 51x faster */
|
||||
skip = rounddown(size, 16);
|
||||
} else {
|
||||
skip = 0;
|
||||
}
|
||||
p = (const unsigned char *)data + skip;
|
||||
pe = (const unsigned char *)data + size;
|
||||
while (p < pe) {
|
||||
if (size == -1) {
|
||||
size = data ? strlen(data) : 0;
|
||||
}
|
||||
p = data;
|
||||
e = p + size;
|
||||
h ^= 0xffffffff;
|
||||
if (X86_HAVE(PCLMUL)) {
|
||||
while (((intptr_t)p & 15) && p < e)
|
||||
h = h >> 8 ^ kCrc32Tab[(h & 0xff) ^ *p++];
|
||||
if ((n = ROUNDDOWN(e - p, 16)) >= 64) {
|
||||
if (IsAsan()) __asan_verify(p, n);
|
||||
h = crc32_pclmul(h, p, n); /* 51x faster */
|
||||
p += n;
|
||||
}
|
||||
return h ^ 0xffffffff;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
while (p < e) h = h >> 8 ^ kCrc32Tab[(h & 0xff) ^ *p++];
|
||||
return h ^ 0xffffffff;
|
||||
}
|
||||
|
|
|
@ -152,8 +152,21 @@ static void ProcessAll(const uint8_t *data, size_t size, const uint64_t key[4],
|
|||
if ((size & 31) != 0) HighwayHashUpdateRemainder(data + i, size & 31, state);
|
||||
}
|
||||
|
||||
uint64_t HighwayHash64(const uint8_t *data, size_t size,
|
||||
const uint64_t key[4]) {
|
||||
/**
|
||||
* Computes Highway Hash.
|
||||
*
|
||||
* highwayhash64 n=0 121 nanoseconds
|
||||
* highwayhash64 n=8 16 ns/byte 59,865 kb/s
|
||||
* highwayhash64 n=31 4 ns/byte 222 mb/s
|
||||
* highwayhash64 n=32 3 ns/byte 248 mb/s
|
||||
* highwayhash64 n=63 2 ns/byte 387 mb/s
|
||||
* highwayhash64 n=64 2 ns/byte 422 mb/s
|
||||
* highwayhash64 n=128 1 ns/byte 644 mb/s
|
||||
* highwayhash64 n=256 1 ns/byte 875 mb/s
|
||||
* highwayhash64 n=22851 721 ps/byte 1,354 mb/s
|
||||
*
|
||||
*/
|
||||
uint64_t HighwayHash64(const void *data, size_t size, const uint64_t key[4]) {
|
||||
HighwayHashState state;
|
||||
ProcessAll(data, size, key, &state);
|
||||
return HighwayHashFinalize64(&state);
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
uint64_t HighwayHash64(const uint8_t *, size_t, const uint64_t[4]);
|
||||
uint64_t HighwayHash64(const void *, size_t, const uint64_t[4]);
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
|
|
|
@ -21,20 +21,51 @@
|
|||
|
||||
/**
|
||||
* Compares memory case-insensitively.
|
||||
*
|
||||
* memcasecmp n=0 992 picoseconds
|
||||
* memcasecmp n=1 1 ns/byte 590 mb/s
|
||||
* memcasecmp n=2 1 ns/byte 843 mb/s
|
||||
* memcasecmp n=3 1 ns/byte 885 mb/s
|
||||
* memcasecmp n=4 1 ns/byte 843 mb/s
|
||||
* memcasecmp n=5 1 ns/byte 820 mb/s
|
||||
* memcasecmp n=6 1 ns/byte 770 mb/s
|
||||
* memcasecmp n=7 1 ns/byte 765 mb/s
|
||||
* memcasecmp n=8 206 ps/byte 4,724 mb/s
|
||||
* memcasecmp n=9 220 ps/byte 4,428 mb/s
|
||||
* memcasecmp n=15 617 ps/byte 1,581 mb/s
|
||||
* memcasecmp n=16 124 ps/byte 7,873 mb/s
|
||||
* memcasecmp n=17 155 ps/byte 6,274 mb/s
|
||||
* memcasecmp n=31 341 ps/byte 2,860 mb/s
|
||||
* memcasecmp n=32 82 ps/byte 11,810 mb/s
|
||||
* memcasecmp n=33 100 ps/byte 9,743 mb/s
|
||||
* memcasecmp n=80 53 ps/byte 18,169 mb/s
|
||||
* memcasecmp n=128 49 ps/byte 19,890 mb/s
|
||||
* memcasecmp n=256 45 ps/byte 21,595 mb/s
|
||||
* memcasecmp n=16384 42 ps/byte 22,721 mb/s
|
||||
* memcasecmp n=32768 40 ps/byte 24,266 mb/s
|
||||
* memcasecmp n=131072 40 ps/byte 24,337 mb/s
|
||||
*
|
||||
* @return is <0, 0, or >0 based on uint8_t comparison
|
||||
*/
|
||||
int memcasecmp(const void *p, const void *q, size_t n) {
|
||||
int c;
|
||||
size_t i;
|
||||
unsigned u;
|
||||
uint64_t w;
|
||||
const unsigned char *a, *b;
|
||||
if ((a = p) != (b = q)) {
|
||||
for (i = 0; i < n; ++i) {
|
||||
while (i + 8 <= n) {
|
||||
w = READ64LE(a);
|
||||
w ^= READ64LE(b);
|
||||
if (w) {
|
||||
i += (unsigned)__builtin_ctzll(w) >> 3;
|
||||
if ((w = (((uint64_t)a[0] << 000 | (uint64_t)a[1] << 010 |
|
||||
(uint64_t)a[2] << 020 | (uint64_t)a[3] << 030 |
|
||||
(uint64_t)a[4] << 040 | (uint64_t)a[5] << 050 |
|
||||
(uint64_t)a[6] << 060 | (uint64_t)a[7] << 070) ^
|
||||
((uint64_t)b[0] << 000 | (uint64_t)b[1] << 010 |
|
||||
(uint64_t)b[2] << 020 | (uint64_t)b[3] << 030 |
|
||||
(uint64_t)b[4] << 040 | (uint64_t)b[5] << 050 |
|
||||
(uint64_t)b[6] << 060 | (uint64_t)b[7] << 070)))) {
|
||||
u = __builtin_ctzll(w);
|
||||
i += u >> 3;
|
||||
break;
|
||||
} else {
|
||||
i += 8;
|
||||
|
|
|
@ -1,146 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/bits.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/intrin/pcmpeqb.h"
|
||||
#include "libc/intrin/pmovmskb.h"
|
||||
#include "libc/nexgen32e/bsf.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Compares memory.
|
||||
*
|
||||
* @return unsigned char subtraction at stop index
|
||||
* @asyncsignalsafe
|
||||
*/
|
||||
int memcmp(const void *a, const void *b, size_t n) {
|
||||
int c;
|
||||
uint64_t w;
|
||||
unsigned m;
|
||||
uint8_t A[16], B[16];
|
||||
const uint8_t *p = a, *q = b;
|
||||
if (p == q) return 0;
|
||||
if (IsTiny()) {
|
||||
for (; n >= 8; p += 8, q += 8, n -= 8) {
|
||||
w = READ64LE(p) ^ READ64LE(q);
|
||||
if (w) {
|
||||
m = bsfl(w) >> 3;
|
||||
return p[m] - q[m];
|
||||
}
|
||||
}
|
||||
for (; n; ++p, ++q, --n) {
|
||||
if ((c = *p - *q)) {
|
||||
return c;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
StartOver:
|
||||
switch (n) {
|
||||
case 0:
|
||||
return 0;
|
||||
case 1:
|
||||
return *p - *q;
|
||||
case 2:
|
||||
w = (p[0] << 000 | p[1] << 010) ^ (q[0] << 000 | q[1] << 010);
|
||||
break;
|
||||
case 3:
|
||||
w = (p[0] << 000 | p[1] << 010 | p[2] << 020) ^
|
||||
(q[0] << 000 | q[1] << 010 | q[2] << 020);
|
||||
break;
|
||||
case 4:
|
||||
w = ((uint32_t)p[0] << 000 | (uint32_t)p[1] << 010 |
|
||||
(uint32_t)p[2] << 020 | (uint32_t)p[3] << 030) ^
|
||||
((uint32_t)q[0] << 000 | (uint32_t)q[1] << 010 |
|
||||
(uint32_t)q[2] << 020 | (uint32_t)q[3] << 030);
|
||||
break;
|
||||
case 5:
|
||||
w = ((uint64_t)p[0] << 000 | (uint64_t)p[1] << 010 |
|
||||
(uint64_t)p[2] << 020 | (uint64_t)p[3] << 030 |
|
||||
(uint64_t)p[4] << 040) ^
|
||||
((uint64_t)q[0] << 000 | (uint64_t)q[1] << 010 |
|
||||
(uint64_t)q[2] << 020 | (uint64_t)q[3] << 030 |
|
||||
(uint64_t)q[4] << 040);
|
||||
break;
|
||||
case 6:
|
||||
w = ((uint64_t)p[0] << 000 | (uint64_t)p[1] << 010 |
|
||||
(uint64_t)p[2] << 020 | (uint64_t)p[3] << 030 |
|
||||
(uint64_t)p[4] << 040 | (uint64_t)p[5] << 050) ^
|
||||
((uint64_t)q[0] << 000 | (uint64_t)q[1] << 010 |
|
||||
(uint64_t)q[2] << 020 | (uint64_t)q[3] << 030 |
|
||||
(uint64_t)q[4] << 040 | (uint64_t)q[5] << 050);
|
||||
break;
|
||||
case 7:
|
||||
w = ((uint64_t)p[0] << 000 | (uint64_t)p[1] << 010 |
|
||||
(uint64_t)p[2] << 020 | (uint64_t)p[3] << 030 |
|
||||
(uint64_t)p[4] << 040 | (uint64_t)p[5] << 050 |
|
||||
(uint64_t)p[6] << 060) ^
|
||||
((uint64_t)q[0] << 000 | (uint64_t)q[1] << 010 |
|
||||
(uint64_t)q[2] << 020 | (uint64_t)q[3] << 030 |
|
||||
(uint64_t)q[4] << 040 | (uint64_t)q[5] << 050 |
|
||||
(uint64_t)q[6] << 060);
|
||||
break;
|
||||
case 8:
|
||||
w = ((uint64_t)p[0] << 000 | (uint64_t)p[1] << 010 |
|
||||
(uint64_t)p[2] << 020 | (uint64_t)p[3] << 030 |
|
||||
(uint64_t)p[4] << 040 | (uint64_t)p[5] << 050 |
|
||||
(uint64_t)p[6] << 060 | (uint64_t)p[7] << 070) ^
|
||||
((uint64_t)q[0] << 000 | (uint64_t)q[1] << 010 |
|
||||
(uint64_t)q[2] << 020 | (uint64_t)q[3] << 030 |
|
||||
(uint64_t)q[4] << 040 | (uint64_t)q[5] << 050 |
|
||||
(uint64_t)q[6] << 060 | (uint64_t)q[7] << 070);
|
||||
break;
|
||||
default:
|
||||
for (; n >= 16; p += 16, q += 16, n -= 16) {
|
||||
memcpy(A, p, 16);
|
||||
memcpy(B, q, 16);
|
||||
pcmpeqb(A, A, B);
|
||||
if ((m = pmovmskb(A) - 0xffff)) {
|
||||
m = bsf(m);
|
||||
return p[m] - q[m];
|
||||
}
|
||||
}
|
||||
if (n > 8) {
|
||||
case 9:
|
||||
case 10:
|
||||
case 11:
|
||||
case 12:
|
||||
case 13:
|
||||
case 14:
|
||||
case 15:
|
||||
w = ((uint64_t)p[0] << 000 | (uint64_t)p[1] << 010 |
|
||||
(uint64_t)p[2] << 020 | (uint64_t)p[3] << 030 |
|
||||
(uint64_t)p[4] << 040 | (uint64_t)p[5] << 050 |
|
||||
(uint64_t)p[6] << 060 | (uint64_t)p[7] << 070) ^
|
||||
((uint64_t)q[0] << 000 | (uint64_t)q[1] << 010 |
|
||||
(uint64_t)q[2] << 020 | (uint64_t)q[3] << 030 |
|
||||
(uint64_t)q[4] << 040 | (uint64_t)q[5] << 050 |
|
||||
(uint64_t)q[6] << 060 | (uint64_t)q[7] << 070);
|
||||
if (w) goto ItsDifferent;
|
||||
p += 8;
|
||||
q += 8;
|
||||
n -= 8;
|
||||
}
|
||||
goto StartOver;
|
||||
}
|
||||
if (!w) return 0;
|
||||
ItsDifferent:
|
||||
m = bsfl(w) >> 3;
|
||||
return p[m] - q[m];
|
||||
}
|
|
@ -1,160 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/str/str.h"
|
||||
|
||||
typedef long long xmm_t __attribute__((__vector_size__(16), __aligned__(1)));
|
||||
|
||||
/**
|
||||
* Copies memory.
|
||||
*
|
||||
* DST and SRC may overlap.
|
||||
*
|
||||
* @param dst is destination
|
||||
* @param src is memory to copy
|
||||
* @param n is number of bytes to copy
|
||||
* @return dst
|
||||
* @asyncsignalsafe
|
||||
*/
|
||||
void *memmove_pure(void *dst, const void *src, size_t n) {
|
||||
size_t i;
|
||||
xmm_t v, w;
|
||||
char *d, *r;
|
||||
const char *s;
|
||||
uint64_t a, b;
|
||||
d = dst;
|
||||
s = src;
|
||||
switch (n) {
|
||||
case 9 ... 15:
|
||||
__builtin_memcpy(&a, s, 8);
|
||||
__builtin_memcpy(&b, s + n - 8, 8);
|
||||
__builtin_memcpy(d, &a, 8);
|
||||
__builtin_memcpy(d + n - 8, &b, 8);
|
||||
return d;
|
||||
case 5 ... 7:
|
||||
__builtin_memcpy(&a, s, 4);
|
||||
__builtin_memcpy(&b, s + n - 4, 4);
|
||||
__builtin_memcpy(d, &a, 4);
|
||||
__builtin_memcpy(d + n - 4, &b, 4);
|
||||
return d;
|
||||
case 17 ... 32:
|
||||
__builtin_memcpy(&v, s, 16);
|
||||
__builtin_memcpy(&w, s + n - 16, 16);
|
||||
__builtin_memcpy(d, &v, 16);
|
||||
__builtin_memcpy(d + n - 16, &w, 16);
|
||||
return d;
|
||||
case 16:
|
||||
__builtin_memcpy(&v, s, 16);
|
||||
__builtin_memcpy(d, &v, 16);
|
||||
return d;
|
||||
case 0:
|
||||
return d;
|
||||
case 1:
|
||||
*d = *s;
|
||||
return d;
|
||||
case 8:
|
||||
__builtin_memcpy(&a, s, 8);
|
||||
__builtin_memcpy(d, &a, 8);
|
||||
return d;
|
||||
case 4:
|
||||
__builtin_memcpy(&a, s, 4);
|
||||
__builtin_memcpy(d, &a, 4);
|
||||
return d;
|
||||
case 2:
|
||||
__builtin_memcpy(&a, s, 2);
|
||||
__builtin_memcpy(d, &a, 2);
|
||||
return d;
|
||||
case 3:
|
||||
__builtin_memcpy(&a, s, 2);
|
||||
__builtin_memcpy(&b, s + 1, 2);
|
||||
__builtin_memcpy(d, &a, 2);
|
||||
__builtin_memcpy(d + 1, &b, 2);
|
||||
return d;
|
||||
default:
|
||||
r = d;
|
||||
if (d > s) {
|
||||
do {
|
||||
n -= 32;
|
||||
__builtin_memcpy(&v, s + n, 16);
|
||||
__builtin_memcpy(&w, s + n + 16, 16);
|
||||
__builtin_memcpy(d + n, &v, 16);
|
||||
__builtin_memcpy(d + n + 16, &w, 16);
|
||||
} while (n >= 32);
|
||||
} else {
|
||||
i = 0;
|
||||
do {
|
||||
__builtin_memcpy(&v, s + i, 16);
|
||||
__builtin_memcpy(&w, s + i + 16, 16);
|
||||
__builtin_memcpy(d + i, &v, 16);
|
||||
__builtin_memcpy(d + i + 16, &w, 16);
|
||||
} while ((i += 32) + 32 <= n);
|
||||
d += i;
|
||||
s += i;
|
||||
n -= i;
|
||||
}
|
||||
switch (n) {
|
||||
case 0:
|
||||
return r;
|
||||
case 17 ... 31:
|
||||
__builtin_memcpy(&v, s, 16);
|
||||
__builtin_memcpy(&w, s + n - 16, 16);
|
||||
__builtin_memcpy(d, &v, 16);
|
||||
__builtin_memcpy(d + n - 16, &w, 16);
|
||||
return r;
|
||||
case 9 ... 15:
|
||||
__builtin_memcpy(&a, s, 8);
|
||||
__builtin_memcpy(&b, s + n - 8, 8);
|
||||
__builtin_memcpy(d, &a, 8);
|
||||
__builtin_memcpy(d + n - 8, &b, 8);
|
||||
return r;
|
||||
case 5 ... 7:
|
||||
__builtin_memcpy(&a, s, 4);
|
||||
__builtin_memcpy(&b, s + n - 4, 4);
|
||||
__builtin_memcpy(d, &a, 4);
|
||||
__builtin_memcpy(d + n - 4, &b, 4);
|
||||
return r;
|
||||
case 16:
|
||||
__builtin_memcpy(&v, s, 16);
|
||||
__builtin_memcpy(d, &v, 16);
|
||||
return r;
|
||||
case 8:
|
||||
__builtin_memcpy(&a, s, 8);
|
||||
__builtin_memcpy(d, &a, 8);
|
||||
return r;
|
||||
case 4:
|
||||
__builtin_memcpy(&a, s, 4);
|
||||
__builtin_memcpy(d, &a, 4);
|
||||
return r;
|
||||
case 1:
|
||||
*d = *s;
|
||||
return r;
|
||||
case 2:
|
||||
__builtin_memcpy(&a, s, 2);
|
||||
__builtin_memcpy(d, &a, 2);
|
||||
return r;
|
||||
case 3:
|
||||
__builtin_memcpy(&a, s, 2);
|
||||
__builtin_memcpy(&b, s + 1, 2);
|
||||
__builtin_memcpy(d, &a, 2);
|
||||
__builtin_memcpy(d + 1, &b, 2);
|
||||
return r;
|
||||
default:
|
||||
unreachable;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,100 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Sets memory.
|
||||
*
|
||||
* @param p is memory address
|
||||
* @param c is masked with 255 and used as repeated byte
|
||||
* @param n is byte length
|
||||
* @return p
|
||||
* @asyncsignalsafe
|
||||
*/
|
||||
void *memset_pure(void *p, int c, size_t n) {
|
||||
char *b;
|
||||
uint64_t x;
|
||||
b = p;
|
||||
x = 0x0101010101010101ul * (c & 0xff);
|
||||
switch (n) {
|
||||
case 0:
|
||||
return p;
|
||||
case 1:
|
||||
__builtin_memcpy(b, &x, 1);
|
||||
return p;
|
||||
case 2:
|
||||
__builtin_memcpy(b, &x, 2);
|
||||
return p;
|
||||
case 3:
|
||||
__builtin_memcpy(b, &x, 2);
|
||||
__builtin_memcpy(b + 1, &x, 2);
|
||||
return p;
|
||||
case 4:
|
||||
__builtin_memcpy(b, &x, 4);
|
||||
return p;
|
||||
case 5 ... 7:
|
||||
__builtin_memcpy(b, &x, 4);
|
||||
__builtin_memcpy(b + n - 4, &x, 4);
|
||||
return p;
|
||||
case 8:
|
||||
__builtin_memcpy(b, &x, 8);
|
||||
return p;
|
||||
case 9 ... 16:
|
||||
__builtin_memcpy(b, &x, 8);
|
||||
__builtin_memcpy(b + n - 8, &x, 8);
|
||||
return p;
|
||||
default:
|
||||
do {
|
||||
n -= 16;
|
||||
__builtin_memcpy(b + n, &x, 8);
|
||||
asm volatile("" ::: "memory");
|
||||
__builtin_memcpy(b + n + 8, &x, 8);
|
||||
} while (n >= 16);
|
||||
switch (n) {
|
||||
case 0:
|
||||
return p;
|
||||
case 1:
|
||||
__builtin_memcpy(b, &x, 1);
|
||||
return p;
|
||||
case 2:
|
||||
__builtin_memcpy(b, &x, 2);
|
||||
return p;
|
||||
case 3:
|
||||
__builtin_memcpy(b, &x, 2);
|
||||
__builtin_memcpy(b + 1, &x, 2);
|
||||
return p;
|
||||
case 4:
|
||||
__builtin_memcpy(b, &x, 4);
|
||||
return p;
|
||||
case 5 ... 7:
|
||||
__builtin_memcpy(b, &x, 4);
|
||||
__builtin_memcpy(b + n - 4, &x, 4);
|
||||
return p;
|
||||
case 8:
|
||||
__builtin_memcpy(b, &x, 8);
|
||||
return p;
|
||||
case 9 ... 15:
|
||||
__builtin_memcpy(b, &x, 8);
|
||||
__builtin_memcpy(b + n - 8, &x, 8);
|
||||
return p;
|
||||
default:
|
||||
unreachable;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -16,9 +16,13 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/nexgen32e/kompressor.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
void *mempcpy_pure(void *dst, const void *src, size_t n) {
|
||||
memmove_pure(dst, src, n);
|
||||
return (char *)dst + n;
|
||||
void rldecode2(void *d, const struct RlDecode *r) {
|
||||
char *p;
|
||||
for (p = d; r->repititions; ++r) {
|
||||
memset(p, r->byte, r->repititions);
|
||||
p += r->repititions;
|
||||
}
|
||||
}
|
|
@ -36,7 +36,7 @@
|
|||
char *stpncpy(char *dest, const char *src, size_t stride) {
|
||||
char *p;
|
||||
if ((p = memccpy(dest, src, '\0', stride))) {
|
||||
memset(p, 0, dest + stride - p);
|
||||
bzero(p, dest + stride - p);
|
||||
}
|
||||
return dest + stride;
|
||||
}
|
||||
|
|
154
libc/str/str.h
154
libc/str/str.h
|
@ -81,15 +81,20 @@ wint_t towupper(wint_t);
|
|||
│ cosmopolitan § strings ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
void bzero(void *, size_t) libcesque;
|
||||
void bzero(void *, size_t) memcpyesque;
|
||||
void *memset(void *, int, size_t) memcpyesque;
|
||||
void *memmove(void *, const void *, size_t) memcpyesque;
|
||||
void *memcpy(void *restrict, const void *restrict, size_t) memcpyesque;
|
||||
void *mempcpy(void *restrict, const void *restrict, size_t) memcpyesque;
|
||||
void *memccpy(void *restrict, const void *restrict, int, size_t) memcpyesque;
|
||||
void *memmove(void *, const void *, size_t) memcpyesque;
|
||||
void *memeqmask(void *, const void *, const void *, size_t) memcpyesque;
|
||||
void explicit_bzero(void *, size_t);
|
||||
|
||||
int bcmp(const void *, const void *, size_t) strlenesque;
|
||||
int memcmp(const void *, const void *, size_t) strlenesque;
|
||||
int timingsafe_bcmp(const void *, const void *, size_t);
|
||||
int timingsafe_memcmp(const void *, const void *, size_t);
|
||||
|
||||
size_t strlen(const char *) strlenesque;
|
||||
size_t strnlen(const char *, size_t) strlenesque;
|
||||
size_t strnlen_s(const char *, size_t);
|
||||
|
@ -115,7 +120,6 @@ char *strstr(const char *, const char *) strlenesque;
|
|||
char16_t *strstr16(const char16_t *, const char16_t *) strlenesque;
|
||||
wchar_t *wcsstr(const wchar_t *, const wchar_t *) strlenesque;
|
||||
void *rawwmemchr(const void *, wchar_t) strlenesque returnsnonnull;
|
||||
int memcmp(const void *, const void *, size_t) strlenesque;
|
||||
int strcmp(const char *, const char *) strlenesque;
|
||||
int strncmp(const char *, const char *, size_t) strlenesque;
|
||||
int strcmp16(const char16_t *, const char16_t *) strlenesque;
|
||||
|
@ -182,10 +186,9 @@ const char *IndexDoubleNulString(const char *, unsigned) strlenesque;
|
|||
int strverscmp(const char *, const char *);
|
||||
wchar_t *wmemset(wchar_t *, wchar_t, size_t) memcpyesque;
|
||||
char16_t *memset16(char16_t *, char16_t, size_t) memcpyesque;
|
||||
compatfn wchar_t *wmemcpy(wchar_t *, const wchar_t *, size_t) memcpyesque;
|
||||
compatfn wchar_t *wmempcpy(wchar_t *, const wchar_t *, size_t) memcpyesque;
|
||||
compatfn wchar_t *wmemmove(wchar_t *, const wchar_t *, size_t) memcpyesque;
|
||||
int timingsafe_memcmp(const void *, const void *, size_t);
|
||||
wchar_t *wmemcpy(wchar_t *, const wchar_t *, size_t) memcpyesque;
|
||||
wchar_t *wmempcpy(wchar_t *, const wchar_t *, size_t) memcpyesque;
|
||||
wchar_t *wmemmove(wchar_t *, const wchar_t *, size_t) memcpyesque;
|
||||
void *tinymemccpy(void *, const void *, int, size_t) memcpyesque;
|
||||
void *memmem(const void *, size_t, const void *, size_t) libcesque nosideeffect;
|
||||
char *strerror(int) returnsnonnull nothrow nocallback;
|
||||
|
@ -203,11 +206,6 @@ bool IsText(const void *, size_t);
|
|||
bool IsUtf8(const void *, size_t);
|
||||
bool _isabspath(const char *) strlenesque;
|
||||
bool escapedos(char16_t *, unsigned, const char16_t *, unsigned);
|
||||
void *memset_pure(void *, int, size_t) memcpyesque;
|
||||
void *memmove_pure(void *, const void *, size_t) memcpyesque;
|
||||
void *mempcpy_pure(void *, const void *, size_t) memcpyesque;
|
||||
size_t strlen_pure(const char *) strlenesque;
|
||||
size_t strcspn_pure(const char *, const char *) strlenesque;
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § strings » multibyte ─╬─│┼
|
||||
|
@ -262,133 +260,11 @@ int iswctype(wint_t, wctype_t) pureconst;
|
|||
char *strsignal(int) returnsnonnull libcesque;
|
||||
|
||||
#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § strings » optimizations ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
#define __memcpy_isgoodsize(SIZE) \
|
||||
(__builtin_constant_p(SIZE) && ((SIZE) <= __BIGGEST_ALIGNMENT__ && \
|
||||
__builtin_popcountl((unsigned)(SIZE)) == 1))
|
||||
|
||||
#define __memset_isgoodsize(SIZE) \
|
||||
(__builtin_constant_p(SIZE) && \
|
||||
(((SIZE) <= __BIGGEST_ALIGNMENT__ && \
|
||||
__builtin_popcountl((unsigned)(SIZE)) == 1) || \
|
||||
((SIZE) % __BIGGEST_ALIGNMENT__ == 0 && \
|
||||
(SIZE) / __BIGGEST_ALIGNMENT__ <= 3)))
|
||||
|
||||
#define memcpy(DEST, SRC, SIZE) \
|
||||
(__memcpy_isgoodsize(SIZE) ? __builtin_memcpy(DEST, SRC, SIZE) \
|
||||
: __memcpy("MemCpy", DEST, SRC, SIZE))
|
||||
|
||||
#define memset(DEST, BYTE, SIZE) \
|
||||
(__memset_isgoodsize(SIZE) ? __builtin_memset(DEST, BYTE, SIZE) \
|
||||
: __memset(DEST, BYTE, SIZE))
|
||||
|
||||
#if defined(__STDC_HOSTED__) && defined(__SSE2__)
|
||||
|
||||
#define strlen(STR) \
|
||||
(__builtin_constant_p(STR) ? __builtin_strlen(STR) : ({ \
|
||||
size_t LeN; \
|
||||
const char *StR = (STR); \
|
||||
asm("call\tstrlen" \
|
||||
: "=a"(LeN) \
|
||||
: "D"(StR), "m"(*(char(*)[0x7fffffff])StR) \
|
||||
: "rcx", "rdx", "xmm3", "xmm4", "cc"); \
|
||||
LeN; \
|
||||
}))
|
||||
|
||||
#define memmove(DEST, SRC, SIZE) __memcpy("MemMove", (DEST), (SRC), (SIZE))
|
||||
|
||||
#define mempcpy(DEST, SRC, SIZE) \
|
||||
({ \
|
||||
size_t SIze = (SIZE); \
|
||||
(void *)((char *)memcpy((DEST), (SRC), SIze) + SIze); \
|
||||
})
|
||||
|
||||
#define __memcpy(FN, DEST, SRC, SIZE) \
|
||||
({ \
|
||||
void *DeSt = (DEST); \
|
||||
const void *SrC = (SRC); \
|
||||
size_t SiZe = (SIZE); \
|
||||
asm("call\t" FN \
|
||||
: "=m"(*(char(*)[SiZe])(DeSt)) \
|
||||
: "D"(DeSt), "S"(SrC), "d"(SiZe), "m"(*(const char(*)[SiZe])(SrC)) \
|
||||
: "xmm3", "xmm4", "rcx", "cc"); \
|
||||
DeSt; \
|
||||
})
|
||||
|
||||
#define __memset(DEST, BYTE, SIZE) \
|
||||
({ \
|
||||
void *DeSt = (DEST); \
|
||||
size_t SiZe = (SIZE); \
|
||||
asm("call\tMemSet" \
|
||||
: "=m"(*(char(*)[SiZe])(DeSt)) \
|
||||
: "D"(DeSt), "S"(BYTE), "d"(SiZe) \
|
||||
: "xmm3", "xmm4", "rcx", "cc"); \
|
||||
DeSt; \
|
||||
})
|
||||
|
||||
#else /* hosted+sse2 */
|
||||
|
||||
#define mempcpy(DEST, SRC, SIZE) \
|
||||
({ \
|
||||
void *Rdi, *Dest = (DEST); \
|
||||
const void *Rsi, *Src = (SRC); \
|
||||
size_t SiZe = (SIZE); \
|
||||
size_t Rcx; \
|
||||
asm("rep movsb" \
|
||||
: "=D"(Rdi), "=S"(Rsi), "=c"(Rcx), "=m"(*(char(*)[SiZe])(Dest)) \
|
||||
: "0"(Dest), "1"(Src), "2"(SiZe), "m"(*(const char(*)[SiZe])(Src)) \
|
||||
: "cc"); \
|
||||
Rdi; \
|
||||
})
|
||||
|
||||
#define __memcpy(FN, DEST, SRC, SIZE) \
|
||||
({ \
|
||||
void *Rdi, *Dest = (DEST); \
|
||||
const void *Rsi, *Src = (SRC); \
|
||||
size_t SiZe = (SIZE); \
|
||||
size_t Rcx; \
|
||||
asm("rep movsb" \
|
||||
: "=D"(Rdi), "=S"(Rsi), "=c"(Rcx), "=m"(*(char(*)[SiZe])(Dest)) \
|
||||
: "0"(Dest), "1"(Src), "2"(SiZe), "m"(*(const char(*)[SiZe])(Src)) \
|
||||
: "cc"); \
|
||||
Dest; \
|
||||
})
|
||||
|
||||
#define __memset(DEST, BYTE, SIZE) \
|
||||
({ \
|
||||
void *Rdi, *Dest = (DEST); \
|
||||
size_t SiZe = (SIZE); \
|
||||
size_t Rcx; \
|
||||
asm("rep stosb" \
|
||||
: "=D"(Rdi), "=c"(Rcx), "=m"(*(char(*)[SiZe])(Dest)) \
|
||||
: "0"(Dest), "1"(SiZe), "a"(BYTE) \
|
||||
: "cc"); \
|
||||
Dest; \
|
||||
})
|
||||
|
||||
#endif /* hosted/sse2/unbloat */
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § strings » address sanitizer ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
#if defined(__FSANITIZE_ADDRESS__)
|
||||
|
||||
#undef memcpy
|
||||
#undef memmove
|
||||
#undef mempcpy
|
||||
#undef memset
|
||||
#undef strlen
|
||||
|
||||
#define memcpy memmove_pure
|
||||
#define memmove memmove_pure
|
||||
#define mempcpy mempcpy_pure
|
||||
#define memset memset_pure
|
||||
#define strcspn strcspn_pure
|
||||
#define strlen strlen_pure
|
||||
|
||||
#endif /* __FSANITIZE_ADDRESS__ */
|
||||
/* gcc rewrites to memset otherwise :'( */
|
||||
void __bzero(void *, size_t) asm("bzero") memcpyesque;
|
||||
#define bzero(DEST, SIZE) \
|
||||
((void)((__builtin_constant_p(SIZE)) ? memset(DEST, 0, SIZE) \
|
||||
: __bzero(DEST, SIZE)))
|
||||
#endif /* __GNUC__ && !__STRICT_ANSI__ */
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
|
|
|
@ -31,7 +31,8 @@ LIBC_STR_A_DIRECTDEPS = \
|
|||
LIBC_INTRIN \
|
||||
LIBC_STUBS \
|
||||
LIBC_SYSV \
|
||||
LIBC_NEXGEN32E
|
||||
LIBC_NEXGEN32E \
|
||||
THIRD_PARTY_COMPILER_RT
|
||||
|
||||
LIBC_STR_A_DEPS := \
|
||||
$(call uniq,$(foreach x,$(LIBC_STR_A_DIRECTDEPS),$($(x))))
|
||||
|
@ -48,15 +49,10 @@ o/$(MODE)/libc/str/memmem.o: \
|
|||
OVERRIDE_CPPFLAGS += \
|
||||
-DSTACK_FRAME_UNLIMITED
|
||||
|
||||
o//libc/str/bzero.o: \
|
||||
OVERRIDE_CFLAGS += \
|
||||
-O2
|
||||
|
||||
o/$(MODE)/libc/str/dosdatetimetounix.o: \
|
||||
OVERRIDE_CFLAGS += \
|
||||
-O3
|
||||
|
||||
o//libc/str/memcmp.o \
|
||||
o/$(MODE)/libc/str/getzipcdir.o \
|
||||
o/$(MODE)/libc/str/getzipcdircomment.o \
|
||||
o/$(MODE)/libc/str/getzipcdircommentsize.o \
|
||||
|
@ -88,11 +84,6 @@ o/$(MODE)/libc/str/windowstimetotimespec.o: \
|
|||
OVERRIDE_CFLAGS += \
|
||||
-O3
|
||||
|
||||
o/$(MODE)/libc/str/hey-gcc.asm \
|
||||
o/$(MODE)/libc/str/hey.o: \
|
||||
OVERRIDE_CFLAGS += \
|
||||
-fsanitize=undefined
|
||||
|
||||
LIBC_STR_LIBS = $(foreach x,$(LIBC_STR_ARTIFACTS),$($(x)))
|
||||
LIBC_STR_SRCS = $(foreach x,$(LIBC_STR_ARTIFACTS),$($(x)_SRCS))
|
||||
LIBC_STR_HDRS = $(foreach x,$(LIBC_STR_ARTIFACTS),$($(x)_HDRS))
|
||||
|
@ -101,7 +92,7 @@ LIBC_STR_BINS = $(foreach x,$(LIBC_STR_ARTIFACTS),$($(x)_BINS))
|
|||
LIBC_STR_CHECKS = $(foreach x,$(LIBC_STR_ARTIFACTS),$($(x)_CHECKS))
|
||||
LIBC_STR_OBJS = $(foreach x,$(LIBC_STR_ARTIFACTS),$($(x)_OBJS))
|
||||
LIBC_STR_TESTS = $(foreach x,$(LIBC_STR_ARTIFACTS),$($(x)_TESTS))
|
||||
$(LIBC_STR_OBJS): $(BUILD_FILES) libc/str/str.mk
|
||||
# $(LIBC_STR_OBJS): $(BUILD_FILES) libc/str/str.mk
|
||||
|
||||
.PHONY: o/$(MODE)/libc/str
|
||||
o/$(MODE)/libc/str: $(LIBC_STR_CHECKS)
|
||||
|
|
|
@ -36,13 +36,13 @@ static noasan inline const char *strchr_x64(const char *p, uint64_t c) {
|
|||
if (a <= b) {
|
||||
return p + (a >> 3);
|
||||
} else {
|
||||
return NULL;
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
return p + (a >> 3);
|
||||
}
|
||||
} else {
|
||||
return NULL;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,12 +34,12 @@
|
|||
* @see strspn(), strtok_r()
|
||||
* @asyncsignalsafe
|
||||
*/
|
||||
size_t strcspn_pure(const char *s, const char *reject) {
|
||||
size_t strcspn(const char *s, const char *reject) {
|
||||
size_t i, n;
|
||||
unsigned m;
|
||||
char cv[16], sv[16];
|
||||
if ((n = strlen(reject)) < 16) {
|
||||
memset(sv, 0, 16);
|
||||
bzero(sv, 16);
|
||||
memcpy(sv, reject, n);
|
||||
for (i = 0;; ++i) {
|
||||
cv[0] = s[i];
|
|
@ -1,45 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/assert.h"
|
||||
#include "libc/bits/bits.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
static inline noasan size_t strlen_pure_x64(const char *s, size_t i) {
|
||||
uint64_t w;
|
||||
for (;; i += 8) {
|
||||
w = READ64LE(s + i);
|
||||
if ((w = ~w & (w - 0x0101010101010101) & 0x8080808080808080)) {
|
||||
return i + ((unsigned)__builtin_ctzll(w) >> 3);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns length of NUL-terminated string.
|
||||
*/
|
||||
size_t strlen_pure(const char *s) {
|
||||
size_t i;
|
||||
for (i = 0; (uintptr_t)(s + i) & 7; ++i) {
|
||||
if (!s[i]) return i;
|
||||
}
|
||||
i = strlen_pure_x64(s, i);
|
||||
assert(!i || s[0]);
|
||||
assert(!s[i]);
|
||||
return i;
|
||||
}
|
|
@ -66,6 +66,6 @@ char *strncpy(char *dest, const char *src, size_t stride) {
|
|||
for (i = 0; i < stride; ++i) {
|
||||
if (!(dest[i] = src[i])) break;
|
||||
}
|
||||
memset(dest + i, 0, stride - i);
|
||||
bzero(dest + i, stride - i);
|
||||
return dest;
|
||||
}
|
||||
|
|
154
libc/str/timingsafe_bcmp.c
Normal file
154
libc/str/timingsafe_bcmp.c
Normal file
|
@ -0,0 +1,154 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/likely.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
typedef uint64_t xmm_t __attribute__((__vector_size__(16), __aligned__(1)));
|
||||
|
||||
static noinline antiquity unsigned timingsafe_bcmp_sse(const char *p,
|
||||
const char *q,
|
||||
size_t n) {
|
||||
uint64_t w;
|
||||
xmm_t a = {0};
|
||||
while (n > 16 + 16) {
|
||||
a |= *(const xmm_t *)p ^ *(const xmm_t *)q;
|
||||
p += 16;
|
||||
q += 16;
|
||||
n -= 16;
|
||||
}
|
||||
a |= *(const xmm_t *)p ^ *(const xmm_t *)q;
|
||||
a |= *(const xmm_t *)(p + n - 16) ^ *(const xmm_t *)(q + n - 16);
|
||||
w = a[0] | a[1];
|
||||
return w | w >> 32;
|
||||
}
|
||||
|
||||
microarchitecture("avx") static int timingsafe_bcmp_avx(const char *p,
|
||||
const char *q,
|
||||
size_t n) {
|
||||
uint64_t w;
|
||||
xmm_t a = {0};
|
||||
if (n > 32) {
|
||||
if (n >= 16 + 64) {
|
||||
xmm_t b = {0};
|
||||
xmm_t c = {0};
|
||||
xmm_t d = {0};
|
||||
do {
|
||||
a |= ((const xmm_t *)p)[0] ^ ((const xmm_t *)q)[0];
|
||||
b |= ((const xmm_t *)p)[1] ^ ((const xmm_t *)q)[1];
|
||||
c |= ((const xmm_t *)p)[2] ^ ((const xmm_t *)q)[2];
|
||||
d |= ((const xmm_t *)p)[3] ^ ((const xmm_t *)q)[3];
|
||||
p += 64;
|
||||
q += 64;
|
||||
n -= 64;
|
||||
} while (n >= 16 + 64);
|
||||
a = a | b | c | d;
|
||||
}
|
||||
while (n > 32) {
|
||||
a |= *(const xmm_t *)p ^ *(const xmm_t *)q;
|
||||
p += 16;
|
||||
q += 16;
|
||||
n -= 16;
|
||||
}
|
||||
}
|
||||
a |= *(const xmm_t *)p ^ *(const xmm_t *)q;
|
||||
a |= *(const xmm_t *)(p + n - 16) ^ *(const xmm_t *)(q + n - 16);
|
||||
w = a[0] | a[1];
|
||||
return w | w >> 32;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests inequality of first 𝑛 bytes of 𝑝 and 𝑞.
|
||||
*
|
||||
* The following expression:
|
||||
*
|
||||
* !!timingsafe_bcmp(p, q, n)
|
||||
*
|
||||
* Is functionally equivalent to:
|
||||
*
|
||||
* !!memcmp(p, q, n)
|
||||
*
|
||||
* This function is faster than memcmp() and bcmp() when byte sequences
|
||||
* are assumed to always be the same; that makes it best for assertions
|
||||
* or hash table lookups, assuming 𝑛 is variable (since no gcc builtin)
|
||||
*
|
||||
* timingsafe_bcmp n=0 992 picoseconds
|
||||
* timingsafe_bcmp n=1 1 ns/byte 738 mb/s
|
||||
* timingsafe_bcmp n=2 826 ps/byte 1,181 mb/s
|
||||
* timingsafe_bcmp n=3 661 ps/byte 1,476 mb/s
|
||||
* timingsafe_bcmp n=4 330 ps/byte 2,952 mb/s
|
||||
* timingsafe_bcmp n=5 264 ps/byte 3,690 mb/s
|
||||
* timingsafe_bcmp n=6 220 ps/byte 4,428 mb/s
|
||||
* timingsafe_bcmp n=7 189 ps/byte 5,166 mb/s
|
||||
* timingsafe_bcmp n=8 124 ps/byte 7,873 mb/s
|
||||
* timingsafe_bcmp n=9 147 ps/byte 6,643 mb/s
|
||||
* timingsafe_bcmp n=15 88 ps/byte 11,072 mb/s
|
||||
* timingsafe_bcmp n=16 62 ps/byte 15,746 mb/s
|
||||
* timingsafe_bcmp n=17 136 ps/byte 7,170 mb/s
|
||||
* timingsafe_bcmp n=31 74 ps/byte 13,075 mb/s
|
||||
* timingsafe_bcmp n=32 72 ps/byte 13,497 mb/s
|
||||
* timingsafe_bcmp n=33 80 ps/byte 12,179 mb/s
|
||||
* timingsafe_bcmp n=80 57 ps/byte 16,871 mb/s
|
||||
* timingsafe_bcmp n=128 49 ps/byte 19,890 mb/s
|
||||
* timingsafe_bcmp n=256 31 ps/byte 31,493 mb/s
|
||||
* timingsafe_bcmp n=16384 14 ps/byte 67,941 mb/s
|
||||
* timingsafe_bcmp n=32768 29 ps/byte 33,121 mb/s
|
||||
* timingsafe_bcmp n=131072 29 ps/byte 32,949 mb/s
|
||||
*
|
||||
* Running time is independent of the byte sequences compared, making
|
||||
* this safe to use for comparing secret values such as cryptographic
|
||||
* MACs. In contrast, memcmp() may short-circuit after finding the first
|
||||
* differing byte.
|
||||
*
|
||||
* @return nonzero if unequal, otherwise zero
|
||||
* @see timingsafe_memcmp()
|
||||
* @asyncsignalsafe
|
||||
*/
|
||||
int timingsafe_bcmp(const void *a, const void *b, size_t n) {
|
||||
const char *p = a, *q = b;
|
||||
uint32_t u, u0, u1, u2, u3;
|
||||
uint64_t w, w0, w1, w2, w3;
|
||||
if (!IsTiny()) {
|
||||
if (n >= 8) {
|
||||
if (n <= 16) {
|
||||
__builtin_memcpy(&w0, p, 8);
|
||||
__builtin_memcpy(&w1, q, 8);
|
||||
__builtin_memcpy(&w2, p + n - 8, 8);
|
||||
__builtin_memcpy(&w3, q + n - 8, 8);
|
||||
w = (w0 ^ w1) | (w2 ^ w3);
|
||||
return w | w >> 32;
|
||||
} else if (X86_HAVE(AVX)) {
|
||||
return timingsafe_bcmp_avx(p, q, n);
|
||||
} else {
|
||||
return timingsafe_bcmp_sse(p, q, n);
|
||||
}
|
||||
} else if (n >= 4) {
|
||||
__builtin_memcpy(&u0, p, 4);
|
||||
__builtin_memcpy(&u1, q, 4);
|
||||
__builtin_memcpy(&u2, p + n - 4, 4);
|
||||
__builtin_memcpy(&u3, q + n - 4, 4);
|
||||
return (u0 ^ u1) | (u2 ^ u3);
|
||||
}
|
||||
}
|
||||
for (u = 0; n--;) {
|
||||
u |= p[n] ^ q[n];
|
||||
}
|
||||
return u;
|
||||
}
|
|
@ -24,20 +24,62 @@ Copyright 2014 Google Inc.\"");
|
|||
asm(".include \"libc/disclaimer.inc\"");
|
||||
|
||||
/**
|
||||
* Lexicographically compares the first len bytes in b1 and b2.
|
||||
* Lexicographically compares the first 𝑛 bytes in 𝑝 and 𝑞.
|
||||
*
|
||||
* The following expression:
|
||||
*
|
||||
* timingsafe_memcmp(p, q, n)
|
||||
*
|
||||
* Is functionally equivalent to:
|
||||
*
|
||||
* MAX(-1, MIN(1, memcmp(p, q, n)))
|
||||
*
|
||||
* Running time is independent of the byte sequences compared, making
|
||||
* this safe to use for comparing secret values such as cryptographic
|
||||
* MACs. In contrast, memcmp() may short-circuit after finding the first
|
||||
* differing byte.
|
||||
*
|
||||
* timingsafe_memcmp n=0 661 picoseconds
|
||||
* timingsafe_memcmp n=1 1 ns/byte 590 mb/s
|
||||
* timingsafe_memcmp n=2 1 ns/byte 738 mb/s
|
||||
* timingsafe_memcmp n=3 1 ns/byte 805 mb/s
|
||||
* timingsafe_memcmp n=4 1 ns/byte 843 mb/s
|
||||
* timingsafe_memcmp n=5 1 ns/byte 922 mb/s
|
||||
* timingsafe_memcmp n=6 1 ns/byte 932 mb/s
|
||||
* timingsafe_memcmp n=7 1 ns/byte 939 mb/s
|
||||
* timingsafe_memcmp n=8 992 ps/byte 984 mb/s
|
||||
* timingsafe_memcmp n=9 992 ps/byte 984 mb/s
|
||||
* timingsafe_memcmp n=15 926 ps/byte 1,054 mb/s
|
||||
* timingsafe_memcmp n=16 950 ps/byte 1,026 mb/s
|
||||
* timingsafe_memcmp n=17 933 ps/byte 1,045 mb/s
|
||||
* timingsafe_memcmp n=31 896 ps/byte 1,089 mb/s
|
||||
* timingsafe_memcmp n=32 888 ps/byte 1,098 mb/s
|
||||
* timingsafe_memcmp n=33 972 ps/byte 1,004 mb/s
|
||||
* timingsafe_memcmp n=80 913 ps/byte 1,068 mb/s
|
||||
* timingsafe_memcmp n=128 891 ps/byte 1,095 mb/s
|
||||
* timingsafe_memcmp n=256 873 ps/byte 1,118 mb/s
|
||||
* timingsafe_memcmp n=16384 858 ps/byte 1,138 mb/s
|
||||
* timingsafe_memcmp n=32768 856 ps/byte 1,140 mb/s
|
||||
* timingsafe_memcmp n=131072 857 ps/byte 1,138 mb/s
|
||||
* bcmp ne n=256 3 ps/byte 246 gb/s
|
||||
* bcmp eq n=256 32 ps/byte 30,233 mb/s
|
||||
* memcmp ne n=256 3 ps/byte 246 gb/s
|
||||
* memcmp eq n=256 31 ps/byte 31,493 mb/s
|
||||
* timingsafe_bcmp ne n=256 27 ps/byte 35,992 mb/s
|
||||
* timingsafe_bcmp eq n=256 27 ps/byte 35,992 mb/s
|
||||
* timingsafe_memcmp ne n=256 877 ps/byte 1,113 mb/s
|
||||
* timingsafe_memcmp eq n=256 883 ps/byte 1,105 mb/s
|
||||
*
|
||||
* @note each byte is interpreted as unsigned char
|
||||
* @return -1, 0, or 1 based on comparison
|
||||
* @see timingsafe_bcmp() it's 100x faster
|
||||
* @asyncsignalsafe
|
||||
*/
|
||||
int timingsafe_memcmp(const void *b1, const void *b2, size_t len) {
|
||||
const unsigned char *p1 = b1, *p2 = b2;
|
||||
int timingsafe_memcmp(const void *p, const void *q, size_t n) {
|
||||
const unsigned char *p1 = p, *p2 = q;
|
||||
size_t i;
|
||||
int res = 0, done = 0;
|
||||
for (i = 0; i < len; i++) {
|
||||
for (i = 0; i < n; i++) {
|
||||
/* lt is -1 if p1[i] < p2[i]; else 0. */
|
||||
int lt = (p1[i] - p2[i]) >> CHAR_BIT;
|
||||
/* gt is -1 if p1[i] > p2[i]; else 0. */
|
||||
|
|
|
@ -1,66 +0,0 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
.source __FILE__
|
||||
|
||||
// Encodes Thompson-Pike varint.
|
||||
//
|
||||
// @param edi is int to encode
|
||||
// @return rax is word-encoded byte buffer
|
||||
// @note invented on a napkin in a new jersey diner
|
||||
tpenc: .leafprologue
|
||||
.profilable
|
||||
mov %edi,%edi
|
||||
xor %eax,%eax
|
||||
cmp $127,%edi
|
||||
jbe 3f
|
||||
bsr %edi,%ecx
|
||||
mov kTpenc-7*(1+1)(,%rcx,2),%ecx
|
||||
1: mov %edi,%edx
|
||||
shr $6,%edi
|
||||
and $0b00111111,%dl
|
||||
or $0b10000000,%al
|
||||
or %dl,%al
|
||||
shl $8,%rax
|
||||
dec %cl
|
||||
jnz 1b
|
||||
2: or %ch,%al
|
||||
3: or %rdi,%rax
|
||||
.leafepilogue
|
||||
.endfn tpenc,globl
|
||||
|
||||
.rodata
|
||||
.align 4
|
||||
kTpenc: .rept 4 # MSB≤10 (0x7FF)
|
||||
.byte 1,0b11000000 # len,mark
|
||||
.endr
|
||||
.rept 5 # MSB≤15 (0xFFFF)
|
||||
.byte 2,0b11100000 # len,mark
|
||||
.endr
|
||||
.rept 5 # MSB≤20 (0x1FFFFF)
|
||||
.byte 3,0b11110000 # len,mark
|
||||
.endr
|
||||
.rept 5 # MSB≤25 (0x3FFFFFF)
|
||||
.byte 4,0b11111000 # len,mark
|
||||
.endr
|
||||
.rept 6 # MSB≤31 (0xffffffff)
|
||||
.byte 5,0b11111100 # len,mark
|
||||
.endr
|
||||
.zero 2
|
||||
.endobj kTpenc
|
|
@ -74,7 +74,7 @@ static uint32_t undeflatetree(struct DeflateState *ds, uint32_t *tree,
|
|||
size_t i, len;
|
||||
uint32_t code, slot;
|
||||
uint16_t codes[16], first[16], counts[16];
|
||||
memset(counts, 0, sizeof(counts));
|
||||
bzero(counts, sizeof(counts));
|
||||
for (i = 0; i < symcount; i++) {
|
||||
counts[lens[i]]++;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue