/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ │vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ ╞══════════════════════════════════════════════════════════════════════════════╡ │ Copyright 2021 Justine Alexandra Roberts Tunney │ │ │ │ Permission to use, copy, modify, and/or distribute this software for │ │ any purpose with or without fee is hereby granted, provided that the │ │ above copyright notice and this permission notice appear in all copies. │ │ │ │ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ │ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ │ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ │ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ │ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ │ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/dce.h" #include "libc/intrin/asan.internal.h" #include "libc/intrin/likely.h" #include "libc/nexgen32e/x86feature.h" #include "libc/str/str.h" typedef uint64_t xmm_t __attribute__((__vector_size__(16), __aligned__(1))); noasan static unsigned timingsafe_bcmp_sse(const char *p, const char *q, size_t n) { uint64_t w; xmm_t a = {0}; while (n > 16 + 16) { a |= *(const xmm_t *)p ^ *(const xmm_t *)q; p += 16; q += 16; n -= 16; } a |= *(const xmm_t *)p ^ *(const xmm_t *)q; a |= *(const xmm_t *)(p + n - 16) ^ *(const xmm_t *)(q + n - 16); w = a[0] | a[1]; return w | w >> 32; } #ifdef __x86_64__ noasan static microarchitecture("avx") int timingsafe_bcmp_avx(const char *p, const char *q, size_t n) { uint64_t w; xmm_t a = {0}; if (n > 32) { if (n >= 16 + 64) { xmm_t b = {0}; xmm_t c = {0}; xmm_t d = {0}; do { a |= ((const xmm_t *)p)[0] ^ ((const xmm_t *)q)[0]; b |= ((const xmm_t *)p)[1] ^ ((const xmm_t *)q)[1]; c |= ((const xmm_t *)p)[2] ^ ((const xmm_t *)q)[2]; d |= ((const xmm_t *)p)[3] ^ ((const xmm_t *)q)[3]; p += 64; q += 64; n -= 64; } while (n >= 16 + 64); a = a | b | c | d; } while (n > 32) { a |= *(const xmm_t *)p ^ *(const xmm_t *)q; p += 16; q += 16; n -= 16; } } a |= *(const xmm_t *)p ^ *(const xmm_t *)q; a |= *(const xmm_t *)(p + n - 16) ^ *(const xmm_t *)(q + n - 16); w = a[0] | a[1]; return w | w >> 32; } #endif /** * Tests inequality of first 𝑛 bytes of 𝑝 and 𝑞. * * The following expression: * * !!timingsafe_bcmp(p, q, n) * * Is functionally equivalent to: * * !!memcmp(p, q, n) * * This function is faster than memcmp() and bcmp() when byte sequences * are assumed to always be the same; that makes it best for assertions * or hash table lookups, assuming 𝑛 is variable (since no gcc builtin) * * timingsafe_bcmp n=0 992 picoseconds * timingsafe_bcmp n=1 1 ns/byte 738 mb/s * timingsafe_bcmp n=2 826 ps/byte 1,181 mb/s * timingsafe_bcmp n=3 661 ps/byte 1,476 mb/s * timingsafe_bcmp n=4 330 ps/byte 2,952 mb/s * timingsafe_bcmp n=5 264 ps/byte 3,690 mb/s * timingsafe_bcmp n=6 220 ps/byte 4,428 mb/s * timingsafe_bcmp n=7 189 ps/byte 5,166 mb/s * timingsafe_bcmp n=8 124 ps/byte 7,873 mb/s * timingsafe_bcmp n=9 147 ps/byte 6,643 mb/s * timingsafe_bcmp n=15 88 ps/byte 11,072 mb/s * timingsafe_bcmp n=16 62 ps/byte 15,746 mb/s * timingsafe_bcmp n=17 136 ps/byte 7,170 mb/s * timingsafe_bcmp n=31 74 ps/byte 13,075 mb/s * timingsafe_bcmp n=32 72 ps/byte 13,497 mb/s * timingsafe_bcmp n=33 80 ps/byte 12,179 mb/s * timingsafe_bcmp n=80 57 ps/byte 16,871 mb/s * timingsafe_bcmp n=128 49 ps/byte 19,890 mb/s * timingsafe_bcmp n=256 31 ps/byte 31,493 mb/s * timingsafe_bcmp n=16384 14 ps/byte 67,941 mb/s * timingsafe_bcmp n=32768 29 ps/byte 33,121 mb/s * timingsafe_bcmp n=131072 29 ps/byte 32,949 mb/s * * Running time is independent of the byte sequences compared, making * this safe to use for comparing secret values such as cryptographic * MACs. In contrast, memcmp() may short-circuit after finding the first * differing byte. * * @return nonzero if unequal, otherwise zero * @see timingsafe_memcmp() * @asyncsignalsafe */ int timingsafe_bcmp(const void *a, const void *b, size_t n) { const char *p = a, *q = b; uint32_t u, u0, u1, u2, u3; uint64_t w, w0, w1, w2, w3; if (!IsTiny()) { if (n >= 8) { if (n <= 16) { __builtin_memcpy(&w0, p, 8); __builtin_memcpy(&w1, q, 8); __builtin_memcpy(&w2, p + n - 8, 8); __builtin_memcpy(&w3, q + n - 8, 8); w = (w0 ^ w1) | (w2 ^ w3); return w | w >> 32; } else { if (IsAsan()) { __asan_verify(a, n); __asan_verify(b, n); } #ifdef __x86_64__ if (X86_HAVE(AVX)) { return timingsafe_bcmp_avx(p, q, n); } #endif return timingsafe_bcmp_sse(p, q, n); } } else if (n >= 4) { __builtin_memcpy(&u0, p, 4); __builtin_memcpy(&u1, q, 4); __builtin_memcpy(&u2, p + n - 4, 4); __builtin_memcpy(&u3, q + n - 4, 4); return (u0 ^ u1) | (u2 ^ u3); } } for (u = 0; n--;) { u |= p[n] ^ q[n]; } return u; }