2021-09-28 05:58:51 +00:00
|
|
|
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
|
|
|
|
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
|
|
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
|
|
|
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
|
|
|
|
│ │
|
|
|
|
|
│ Permission to use, copy, modify, and/or distribute this software for │
|
|
|
|
|
│ any purpose with or without fee is hereby granted, provided that the │
|
|
|
|
|
│ above copyright notice and this permission notice appear in all copies. │
|
|
|
|
|
│ │
|
|
|
|
|
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
|
|
|
|
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
|
|
|
|
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
|
|
|
|
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
|
|
|
|
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
|
|
|
|
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
|
|
|
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
|
|
|
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
|
|
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
2022-08-11 19:13:18 +00:00
|
|
|
|
#include "libc/intrin/likely.h"
|
2021-09-28 05:58:51 +00:00
|
|
|
|
#include "libc/dce.h"
|
2021-10-08 15:11:51 +00:00
|
|
|
|
#include "libc/intrin/asan.internal.h"
|
2021-09-28 05:58:51 +00:00
|
|
|
|
#include "libc/nexgen32e/x86feature.h"
|
|
|
|
|
#include "libc/str/str.h"
|
|
|
|
|
|
|
|
|
|
typedef uint64_t xmm_t __attribute__((__vector_size__(16), __aligned__(1)));
|
|
|
|
|
|
2021-11-12 23:12:18 +00:00
|
|
|
|
noasan static dontinline antiquity unsigned timingsafe_bcmp_sse(const char *p,
|
2021-10-08 15:11:51 +00:00
|
|
|
|
const char *q,
|
|
|
|
|
size_t n) {
|
2021-09-28 05:58:51 +00:00
|
|
|
|
uint64_t w;
|
|
|
|
|
xmm_t a = {0};
|
|
|
|
|
while (n > 16 + 16) {
|
|
|
|
|
a |= *(const xmm_t *)p ^ *(const xmm_t *)q;
|
|
|
|
|
p += 16;
|
|
|
|
|
q += 16;
|
|
|
|
|
n -= 16;
|
|
|
|
|
}
|
|
|
|
|
a |= *(const xmm_t *)p ^ *(const xmm_t *)q;
|
|
|
|
|
a |= *(const xmm_t *)(p + n - 16) ^ *(const xmm_t *)(q + n - 16);
|
|
|
|
|
w = a[0] | a[1];
|
|
|
|
|
return w | w >> 32;
|
|
|
|
|
}
|
|
|
|
|
|
2021-10-08 15:11:51 +00:00
|
|
|
|
noasan static microarchitecture("avx") int timingsafe_bcmp_avx(const char *p,
|
|
|
|
|
const char *q,
|
|
|
|
|
size_t n) {
|
2021-09-28 05:58:51 +00:00
|
|
|
|
uint64_t w;
|
|
|
|
|
xmm_t a = {0};
|
|
|
|
|
if (n > 32) {
|
|
|
|
|
if (n >= 16 + 64) {
|
|
|
|
|
xmm_t b = {0};
|
|
|
|
|
xmm_t c = {0};
|
|
|
|
|
xmm_t d = {0};
|
|
|
|
|
do {
|
|
|
|
|
a |= ((const xmm_t *)p)[0] ^ ((const xmm_t *)q)[0];
|
|
|
|
|
b |= ((const xmm_t *)p)[1] ^ ((const xmm_t *)q)[1];
|
|
|
|
|
c |= ((const xmm_t *)p)[2] ^ ((const xmm_t *)q)[2];
|
|
|
|
|
d |= ((const xmm_t *)p)[3] ^ ((const xmm_t *)q)[3];
|
|
|
|
|
p += 64;
|
|
|
|
|
q += 64;
|
|
|
|
|
n -= 64;
|
|
|
|
|
} while (n >= 16 + 64);
|
|
|
|
|
a = a | b | c | d;
|
|
|
|
|
}
|
|
|
|
|
while (n > 32) {
|
|
|
|
|
a |= *(const xmm_t *)p ^ *(const xmm_t *)q;
|
|
|
|
|
p += 16;
|
|
|
|
|
q += 16;
|
|
|
|
|
n -= 16;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
a |= *(const xmm_t *)p ^ *(const xmm_t *)q;
|
|
|
|
|
a |= *(const xmm_t *)(p + n - 16) ^ *(const xmm_t *)(q + n - 16);
|
|
|
|
|
w = a[0] | a[1];
|
|
|
|
|
return w | w >> 32;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Tests inequality of first 𝑛 bytes of 𝑝 and 𝑞.
|
|
|
|
|
*
|
|
|
|
|
* The following expression:
|
|
|
|
|
*
|
|
|
|
|
* !!timingsafe_bcmp(p, q, n)
|
|
|
|
|
*
|
|
|
|
|
* Is functionally equivalent to:
|
|
|
|
|
*
|
|
|
|
|
* !!memcmp(p, q, n)
|
|
|
|
|
*
|
|
|
|
|
* This function is faster than memcmp() and bcmp() when byte sequences
|
|
|
|
|
* are assumed to always be the same; that makes it best for assertions
|
|
|
|
|
* or hash table lookups, assuming 𝑛 is variable (since no gcc builtin)
|
|
|
|
|
*
|
|
|
|
|
* timingsafe_bcmp n=0 992 picoseconds
|
|
|
|
|
* timingsafe_bcmp n=1 1 ns/byte 738 mb/s
|
|
|
|
|
* timingsafe_bcmp n=2 826 ps/byte 1,181 mb/s
|
|
|
|
|
* timingsafe_bcmp n=3 661 ps/byte 1,476 mb/s
|
|
|
|
|
* timingsafe_bcmp n=4 330 ps/byte 2,952 mb/s
|
|
|
|
|
* timingsafe_bcmp n=5 264 ps/byte 3,690 mb/s
|
|
|
|
|
* timingsafe_bcmp n=6 220 ps/byte 4,428 mb/s
|
|
|
|
|
* timingsafe_bcmp n=7 189 ps/byte 5,166 mb/s
|
|
|
|
|
* timingsafe_bcmp n=8 124 ps/byte 7,873 mb/s
|
|
|
|
|
* timingsafe_bcmp n=9 147 ps/byte 6,643 mb/s
|
|
|
|
|
* timingsafe_bcmp n=15 88 ps/byte 11,072 mb/s
|
|
|
|
|
* timingsafe_bcmp n=16 62 ps/byte 15,746 mb/s
|
|
|
|
|
* timingsafe_bcmp n=17 136 ps/byte 7,170 mb/s
|
|
|
|
|
* timingsafe_bcmp n=31 74 ps/byte 13,075 mb/s
|
|
|
|
|
* timingsafe_bcmp n=32 72 ps/byte 13,497 mb/s
|
|
|
|
|
* timingsafe_bcmp n=33 80 ps/byte 12,179 mb/s
|
|
|
|
|
* timingsafe_bcmp n=80 57 ps/byte 16,871 mb/s
|
|
|
|
|
* timingsafe_bcmp n=128 49 ps/byte 19,890 mb/s
|
|
|
|
|
* timingsafe_bcmp n=256 31 ps/byte 31,493 mb/s
|
|
|
|
|
* timingsafe_bcmp n=16384 14 ps/byte 67,941 mb/s
|
|
|
|
|
* timingsafe_bcmp n=32768 29 ps/byte 33,121 mb/s
|
|
|
|
|
* timingsafe_bcmp n=131072 29 ps/byte 32,949 mb/s
|
|
|
|
|
*
|
|
|
|
|
* Running time is independent of the byte sequences compared, making
|
|
|
|
|
* this safe to use for comparing secret values such as cryptographic
|
|
|
|
|
* MACs. In contrast, memcmp() may short-circuit after finding the first
|
|
|
|
|
* differing byte.
|
|
|
|
|
*
|
|
|
|
|
* @return nonzero if unequal, otherwise zero
|
|
|
|
|
* @see timingsafe_memcmp()
|
|
|
|
|
* @asyncsignalsafe
|
|
|
|
|
*/
|
|
|
|
|
int timingsafe_bcmp(const void *a, const void *b, size_t n) {
|
|
|
|
|
const char *p = a, *q = b;
|
|
|
|
|
uint32_t u, u0, u1, u2, u3;
|
|
|
|
|
uint64_t w, w0, w1, w2, w3;
|
|
|
|
|
if (!IsTiny()) {
|
|
|
|
|
if (n >= 8) {
|
|
|
|
|
if (n <= 16) {
|
|
|
|
|
__builtin_memcpy(&w0, p, 8);
|
|
|
|
|
__builtin_memcpy(&w1, q, 8);
|
|
|
|
|
__builtin_memcpy(&w2, p + n - 8, 8);
|
|
|
|
|
__builtin_memcpy(&w3, q + n - 8, 8);
|
|
|
|
|
w = (w0 ^ w1) | (w2 ^ w3);
|
|
|
|
|
return w | w >> 32;
|
|
|
|
|
} else {
|
2021-10-08 15:11:51 +00:00
|
|
|
|
if (IsAsan()) {
|
2021-10-14 00:27:13 +00:00
|
|
|
|
__asan_verify(a, n);
|
|
|
|
|
__asan_verify(b, n);
|
2021-10-08 15:11:51 +00:00
|
|
|
|
}
|
|
|
|
|
if (X86_HAVE(AVX)) {
|
|
|
|
|
return timingsafe_bcmp_avx(p, q, n);
|
|
|
|
|
} else {
|
|
|
|
|
return timingsafe_bcmp_sse(p, q, n);
|
|
|
|
|
}
|
2021-09-28 05:58:51 +00:00
|
|
|
|
}
|
|
|
|
|
} else if (n >= 4) {
|
|
|
|
|
__builtin_memcpy(&u0, p, 4);
|
|
|
|
|
__builtin_memcpy(&u1, q, 4);
|
|
|
|
|
__builtin_memcpy(&u2, p + n - 4, 4);
|
|
|
|
|
__builtin_memcpy(&u3, q + n - 4, 4);
|
|
|
|
|
return (u0 ^ u1) | (u2 ^ u3);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
for (u = 0; n--;) {
|
|
|
|
|
u |= p[n] ^ q[n];
|
|
|
|
|
}
|
|
|
|
|
return u;
|
|
|
|
|
}
|