mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-06-27 23:08:31 +00:00
Make improvements
- Invent openatemp() API - Invent O_UNLINK open flag - Introduce getenv_secure() API - Remove `git pull` from cosmocc - Fix utimes() when path is NULL - Fix mktemp() to never return NULL - Fix utimensat() UTIME_OMIT on XNU - Improve utimensat() code for RHEL5 - Turn `argv[0]` C:/ to /C/ on Windows - Introduce tmpnam() and tmpnam_r() APIs - Fix more const issues with internal APIs - Permit utimes() on WIN32 in O_RDONLY mode - Fix fdopendir() to check fd is a directory - Fix recent crash regression in landlock make - Fix futimens(AT_FDCWD, NULL) to return EBADF - Use workaround so `make -j` doesn't fork bomb - Rename dontdiscard to __wur (just like glibc) - Fix st_size for WIN32 symlinks containing UTF-8 - Introduce stdio ext APIs needed by GNU coreutils - Fix lstat() on WIN32 for symlinks to directories - Move some constants from normalize.inc to limits.h - Fix segv with memchr() and memcmp() overlapping page - Implement POSIX fflush() behavior for reader streams - Implement AT_SYMLINK_NOFOLLOW for utimensat() on WIN32 - Don't change read-only status of existing files on WIN32 - Correctly handle `0x[^[:xdigit:]]` case in strtol() functions
This commit is contained in:
parent
8596e83cce
commit
f531acc8f9
297 changed files with 1920 additions and 1681 deletions
115
libc/str/bcmp.c
115
libc/str/bcmp.c
|
@ -16,128 +16,15 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/dce.h"
|
||||
#include "libc/intrin/likely.h"
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
typedef uint64_t xmm_t __attribute__((__vector_size__(16), __aligned__(1)));
|
||||
|
||||
#if !defined(__chibicc__)
|
||||
static int bcmp_sse(const char *p, const char *q, size_t n) {
|
||||
xmm_t a;
|
||||
while (n > 32) {
|
||||
a = *(const xmm_t *)p ^ *(const xmm_t *)q;
|
||||
if (a[0] | a[1]) return 1;
|
||||
p += 16;
|
||||
q += 16;
|
||||
n -= 16;
|
||||
}
|
||||
a = (*(const xmm_t *)p ^ *(const xmm_t *)q) |
|
||||
(*(const xmm_t *)(p + n - 16) ^ *(const xmm_t *)(q + n - 16));
|
||||
return !!(a[0] | a[1]);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) && !defined(__chibicc__)
|
||||
_Microarchitecture("avx") static int bcmp_avx(const char *p, const char *q,
|
||||
size_t n) {
|
||||
xmm_t a, b, c, d;
|
||||
if (n > 32) {
|
||||
if (n >= 16 + 64) {
|
||||
do {
|
||||
a = ((const xmm_t *)p)[0] ^ ((const xmm_t *)q)[0];
|
||||
b = ((const xmm_t *)p)[1] ^ ((const xmm_t *)q)[1];
|
||||
c = ((const xmm_t *)p)[2] ^ ((const xmm_t *)q)[2];
|
||||
d = ((const xmm_t *)p)[3] ^ ((const xmm_t *)q)[3];
|
||||
a = a | b | c | d;
|
||||
if (a[0] | a[1]) return 1;
|
||||
p += 64;
|
||||
q += 64;
|
||||
n -= 64;
|
||||
} while (n >= 16 + 64);
|
||||
}
|
||||
while (n > 16 + 16) {
|
||||
a = *(const xmm_t *)p ^ *(const xmm_t *)q;
|
||||
if (a[0] | a[1]) return 1;
|
||||
p += 16;
|
||||
q += 16;
|
||||
n -= 16;
|
||||
}
|
||||
}
|
||||
a = (*(const xmm_t *)p ^ *(const xmm_t *)q) |
|
||||
(*(const xmm_t *)(p + n - 16) ^ *(const xmm_t *)(q + n - 16));
|
||||
return !!(a[0] | a[1]);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Tests inequality of first 𝑛 bytes of 𝑝 and 𝑞.
|
||||
*
|
||||
* bcmp n=0 992 picoseconds
|
||||
* bcmp n=1 992 ps/byte 984 mb/s
|
||||
* bcmp n=2 661 ps/byte 1,476 mb/s
|
||||
* bcmp n=3 441 ps/byte 2,214 mb/s
|
||||
* bcmp n=4 330 ps/byte 2,952 mb/s
|
||||
* bcmp n=5 264 ps/byte 3,690 mb/s
|
||||
* bcmp n=6 165 ps/byte 5,905 mb/s
|
||||
* bcmp n=7 189 ps/byte 5,166 mb/s
|
||||
* bcmp n=8 124 ps/byte 7,873 mb/s
|
||||
* bcmp n=9 183 ps/byte 5,314 mb/s
|
||||
* bcmp n=15 110 ps/byte 8,857 mb/s
|
||||
* bcmp n=16 62 ps/byte 15,746 mb/s
|
||||
* bcmp n=17 175 ps/byte 5,577 mb/s
|
||||
* bcmp n=31 96 ps/byte 10,169 mb/s
|
||||
* bcmp n=32 93 ps/byte 10,497 mb/s
|
||||
* bcmp n=33 80 ps/byte 12,179 mb/s
|
||||
* bcmp n=80 37 ps/byte 26,244 mb/s
|
||||
* bcmp n=128 36 ps/byte 26,994 mb/s
|
||||
* bcmp n=256 27 ps/byte 35,992 mb/s
|
||||
* bcmp n=16384 19 ps/byte 49,411 mb/s
|
||||
* bcmp n=32768 27 ps/byte 34,914 mb/s
|
||||
* bcmp n=131072 30 ps/byte 32,303 mb/s
|
||||
*
|
||||
* @return 0 if a and b have equal contents, otherwise nonzero
|
||||
* @see timingsafe_bcmp()
|
||||
* @asyncsignalsafe
|
||||
*/
|
||||
int bcmp(const void *a, const void *b, size_t n) {
|
||||
int c;
|
||||
uint32_t i, j;
|
||||
uint64_t x, y;
|
||||
const char *p, *q;
|
||||
if ((p = a) == (q = b)) return 0;
|
||||
if (!IsTiny()) {
|
||||
if (n <= 16) {
|
||||
if (n >= 8) {
|
||||
__builtin_memcpy(&x, p, 8);
|
||||
__builtin_memcpy(&y, q, 8);
|
||||
if (x ^ y) return 1;
|
||||
__builtin_memcpy(&x, p + n - 8, 8);
|
||||
__builtin_memcpy(&y, q + n - 8, 8);
|
||||
return !!(x ^ y);
|
||||
} else if (n >= 4) {
|
||||
__builtin_memcpy(&i, p, 4);
|
||||
__builtin_memcpy(&j, q, 4);
|
||||
if (i ^ j) return 1;
|
||||
__builtin_memcpy(&i, p + n - 4, 4);
|
||||
__builtin_memcpy(&j, q + n - 4, 4);
|
||||
return !!(i ^ j);
|
||||
}
|
||||
#ifndef __chibicc__
|
||||
#ifdef __x86_64__
|
||||
} else if (LIKELY(X86_HAVE(AVX))) {
|
||||
return bcmp_avx(p, q, n);
|
||||
#endif
|
||||
} else {
|
||||
return bcmp_sse(p, q, n);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
while (n--) {
|
||||
if ((c = p[n] ^ q[n])) {
|
||||
return c;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
return memcmp(a, b, n);
|
||||
}
|
||||
|
|
|
@ -25,8 +25,8 @@
|
|||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||
│ │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/stdio/stdio.h"
|
||||
#include "libc/str/mb.internal.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/dce.h"
|
||||
#include "libc/intrin/asan.internal.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
└─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/assert.h"
|
||||
#include "libc/intrin/bsf.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/mem/alg.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
|
|
|
@ -191,7 +191,7 @@ bool startswithi(const char *, const char *) strlenesque;
|
|||
bool endswith(const char *, const char *) strlenesque;
|
||||
bool istext(const void *, size_t) libcesque;
|
||||
bool isutf8(const void *, size_t) libcesque;
|
||||
char *strsignal_r(int, char[21]) returnsnonnull libcesque dontdiscard;
|
||||
char *strsignal_r(int, char[21]) returnsnonnull libcesque __wur;
|
||||
int strerror_wr(int, uint32_t, char *, size_t)
|
||||
dontthrow nocallback;
|
||||
char16_t *chomp16(char16_t *) libcesque;
|
||||
|
|
|
@ -16,151 +16,15 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/dce.h"
|
||||
#include "libc/intrin/asan.internal.h"
|
||||
#include "libc/intrin/likely.h"
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
#ifndef __chibicc__
|
||||
typedef uint64_t xmm_t __attribute__((__vector_size__(16), __aligned__(1)));
|
||||
dontasan static unsigned timingsafe_bcmp_sse(const char *p, const char *q,
|
||||
size_t n) {
|
||||
uint64_t w;
|
||||
xmm_t a = {0};
|
||||
while (n > 16 + 16) {
|
||||
a |= *(const xmm_t *)p ^ *(const xmm_t *)q;
|
||||
p += 16;
|
||||
q += 16;
|
||||
n -= 16;
|
||||
}
|
||||
a |= *(const xmm_t *)p ^ *(const xmm_t *)q;
|
||||
a |= *(const xmm_t *)(p + n - 16) ^ *(const xmm_t *)(q + n - 16);
|
||||
w = a[0] | a[1];
|
||||
return w | w >> 32;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) && !defined(__chibicc__)
|
||||
dontasan static _Microarchitecture("avx") int timingsafe_bcmp_avx(const char *p,
|
||||
const char *q,
|
||||
size_t n) {
|
||||
uint64_t w;
|
||||
xmm_t a = {0};
|
||||
if (n > 32) {
|
||||
if (n >= 16 + 64) {
|
||||
xmm_t b = {0};
|
||||
xmm_t c = {0};
|
||||
xmm_t d = {0};
|
||||
do {
|
||||
a |= ((const xmm_t *)p)[0] ^ ((const xmm_t *)q)[0];
|
||||
b |= ((const xmm_t *)p)[1] ^ ((const xmm_t *)q)[1];
|
||||
c |= ((const xmm_t *)p)[2] ^ ((const xmm_t *)q)[2];
|
||||
d |= ((const xmm_t *)p)[3] ^ ((const xmm_t *)q)[3];
|
||||
p += 64;
|
||||
q += 64;
|
||||
n -= 64;
|
||||
} while (n >= 16 + 64);
|
||||
a = a | b | c | d;
|
||||
}
|
||||
while (n > 32) {
|
||||
a |= *(const xmm_t *)p ^ *(const xmm_t *)q;
|
||||
p += 16;
|
||||
q += 16;
|
||||
n -= 16;
|
||||
}
|
||||
}
|
||||
a |= *(const xmm_t *)p ^ *(const xmm_t *)q;
|
||||
a |= *(const xmm_t *)(p + n - 16) ^ *(const xmm_t *)(q + n - 16);
|
||||
w = a[0] | a[1];
|
||||
return w | w >> 32;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Tests inequality of first 𝑛 bytes of 𝑝 and 𝑞.
|
||||
*
|
||||
* The following expression:
|
||||
*
|
||||
* !!timingsafe_bcmp(p, q, n)
|
||||
*
|
||||
* Is functionally equivalent to:
|
||||
*
|
||||
* !!memcmp(p, q, n)
|
||||
*
|
||||
* This function is faster than memcmp() and bcmp() when byte sequences
|
||||
* are assumed to always be the same; that makes it best for assertions
|
||||
* or hash table lookups, assuming 𝑛 is variable (since no gcc builtin)
|
||||
*
|
||||
* timingsafe_bcmp n=0 992 picoseconds
|
||||
* timingsafe_bcmp n=1 1 ns/byte 738 mb/s
|
||||
* timingsafe_bcmp n=2 826 ps/byte 1,181 mb/s
|
||||
* timingsafe_bcmp n=3 661 ps/byte 1,476 mb/s
|
||||
* timingsafe_bcmp n=4 330 ps/byte 2,952 mb/s
|
||||
* timingsafe_bcmp n=5 264 ps/byte 3,690 mb/s
|
||||
* timingsafe_bcmp n=6 220 ps/byte 4,428 mb/s
|
||||
* timingsafe_bcmp n=7 189 ps/byte 5,166 mb/s
|
||||
* timingsafe_bcmp n=8 124 ps/byte 7,873 mb/s
|
||||
* timingsafe_bcmp n=9 147 ps/byte 6,643 mb/s
|
||||
* timingsafe_bcmp n=15 88 ps/byte 11,072 mb/s
|
||||
* timingsafe_bcmp n=16 62 ps/byte 15,746 mb/s
|
||||
* timingsafe_bcmp n=17 136 ps/byte 7,170 mb/s
|
||||
* timingsafe_bcmp n=31 74 ps/byte 13,075 mb/s
|
||||
* timingsafe_bcmp n=32 72 ps/byte 13,497 mb/s
|
||||
* timingsafe_bcmp n=33 80 ps/byte 12,179 mb/s
|
||||
* timingsafe_bcmp n=80 57 ps/byte 16,871 mb/s
|
||||
* timingsafe_bcmp n=128 49 ps/byte 19,890 mb/s
|
||||
* timingsafe_bcmp n=256 31 ps/byte 31,493 mb/s
|
||||
* timingsafe_bcmp n=16384 14 ps/byte 67,941 mb/s
|
||||
* timingsafe_bcmp n=32768 29 ps/byte 33,121 mb/s
|
||||
* timingsafe_bcmp n=131072 29 ps/byte 32,949 mb/s
|
||||
*
|
||||
* Running time is independent of the byte sequences compared, making
|
||||
* this safe to use for comparing secret values such as cryptographic
|
||||
* MACs. In contrast, memcmp() may short-circuit after finding the first
|
||||
* differing byte.
|
||||
*
|
||||
* @return nonzero if unequal, otherwise zero
|
||||
* @see timingsafe_memcmp()
|
||||
* @asyncsignalsafe
|
||||
*/
|
||||
int timingsafe_bcmp(const void *a, const void *b, size_t n) {
|
||||
const char *p = a, *q = b;
|
||||
uint32_t u, u0, u1, u2, u3;
|
||||
uint64_t w, w0, w1, w2, w3;
|
||||
if (!IsTiny()) {
|
||||
if (n >= 8) {
|
||||
if (n <= 16) {
|
||||
__builtin_memcpy(&w0, p, 8);
|
||||
__builtin_memcpy(&w1, q, 8);
|
||||
__builtin_memcpy(&w2, p + n - 8, 8);
|
||||
__builtin_memcpy(&w3, q + n - 8, 8);
|
||||
w = (w0 ^ w1) | (w2 ^ w3);
|
||||
return w | w >> 32;
|
||||
} else {
|
||||
if (IsAsan()) {
|
||||
__asan_verify(a, n);
|
||||
__asan_verify(b, n);
|
||||
}
|
||||
#ifndef __chibicc__
|
||||
#ifdef __x86_64__
|
||||
if (X86_HAVE(AVX)) {
|
||||
return timingsafe_bcmp_avx(p, q, n);
|
||||
}
|
||||
#endif
|
||||
return timingsafe_bcmp_sse(p, q, n);
|
||||
#endif
|
||||
}
|
||||
} else if (n >= 4) {
|
||||
__builtin_memcpy(&u0, p, 4);
|
||||
__builtin_memcpy(&u1, q, 4);
|
||||
__builtin_memcpy(&u2, p + n - 4, 4);
|
||||
__builtin_memcpy(&u3, q + n - 4, 4);
|
||||
return (u0 ^ u1) | (u2 ^ u3);
|
||||
}
|
||||
}
|
||||
for (u = 0; n--;) {
|
||||
u |= p[n] ^ q[n];
|
||||
}
|
||||
return u;
|
||||
return timingsafe_memcmp(a, b, n);
|
||||
}
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/limits.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
asm(".ident\t\"\\n\\n\
|
||||
|
|
|
@ -25,8 +25,8 @@
|
|||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||
│ │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/stdio/stdio.h"
|
||||
#include "libc/str/mb.internal.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/dce.h"
|
||||
#include "libc/intrin/asan.internal.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
#include "libc/stdckdint.h"
|
||||
#include "libc/str/str.h"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue