cosmopolitan/libc/str/timingsafe_bcmp.c
Gautham 6f658f058b
Change noinline to dontinline (#312)
We defined `noinline` as an abbreviation for the longer version
`__attribute__((__noinline__))` which caused name clashes since
third party codebases often write it as `__attribute__((noinline))`.
2021-11-12 15:12:18 -08:00

161 lines
6.8 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
╞══════════════════════════════════════════════════════════════════════════════╡
│ Copyright 2021 Justine Alexandra Roberts Tunney │
│ │
│ Permission to use, copy, modify, and/or distribute this software for │
│ any purpose with or without fee is hereby granted, provided that the │
│ above copyright notice and this permission notice appear in all copies. │
│ │
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
│ PERFORMANCE OF THIS SOFTWARE. │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "libc/bits/likely.h"
#include "libc/dce.h"
#include "libc/intrin/asan.internal.h"
#include "libc/nexgen32e/x86feature.h"
#include "libc/str/str.h"
typedef uint64_t xmm_t __attribute__((__vector_size__(16), __aligned__(1)));
noasan static dontinline antiquity unsigned timingsafe_bcmp_sse(const char *p,
const char *q,
size_t n) {
uint64_t w;
xmm_t a = {0};
while (n > 16 + 16) {
a |= *(const xmm_t *)p ^ *(const xmm_t *)q;
p += 16;
q += 16;
n -= 16;
}
a |= *(const xmm_t *)p ^ *(const xmm_t *)q;
a |= *(const xmm_t *)(p + n - 16) ^ *(const xmm_t *)(q + n - 16);
w = a[0] | a[1];
return w | w >> 32;
}
noasan static microarchitecture("avx") int timingsafe_bcmp_avx(const char *p,
const char *q,
size_t n) {
uint64_t w;
xmm_t a = {0};
if (n > 32) {
if (n >= 16 + 64) {
xmm_t b = {0};
xmm_t c = {0};
xmm_t d = {0};
do {
a |= ((const xmm_t *)p)[0] ^ ((const xmm_t *)q)[0];
b |= ((const xmm_t *)p)[1] ^ ((const xmm_t *)q)[1];
c |= ((const xmm_t *)p)[2] ^ ((const xmm_t *)q)[2];
d |= ((const xmm_t *)p)[3] ^ ((const xmm_t *)q)[3];
p += 64;
q += 64;
n -= 64;
} while (n >= 16 + 64);
a = a | b | c | d;
}
while (n > 32) {
a |= *(const xmm_t *)p ^ *(const xmm_t *)q;
p += 16;
q += 16;
n -= 16;
}
}
a |= *(const xmm_t *)p ^ *(const xmm_t *)q;
a |= *(const xmm_t *)(p + n - 16) ^ *(const xmm_t *)(q + n - 16);
w = a[0] | a[1];
return w | w >> 32;
}
/**
* Tests inequality of first 𝑛 bytes of 𝑝 and 𝑞.
*
* The following expression:
*
* !!timingsafe_bcmp(p, q, n)
*
* Is functionally equivalent to:
*
* !!memcmp(p, q, n)
*
* This function is faster than memcmp() and bcmp() when byte sequences
* are assumed to always be the same; that makes it best for assertions
* or hash table lookups, assuming 𝑛 is variable (since no gcc builtin)
*
* timingsafe_bcmp n=0 992 picoseconds
* timingsafe_bcmp n=1 1 ns/byte 738 mb/s
* timingsafe_bcmp n=2 826 ps/byte 1,181 mb/s
* timingsafe_bcmp n=3 661 ps/byte 1,476 mb/s
* timingsafe_bcmp n=4 330 ps/byte 2,952 mb/s
* timingsafe_bcmp n=5 264 ps/byte 3,690 mb/s
* timingsafe_bcmp n=6 220 ps/byte 4,428 mb/s
* timingsafe_bcmp n=7 189 ps/byte 5,166 mb/s
* timingsafe_bcmp n=8 124 ps/byte 7,873 mb/s
* timingsafe_bcmp n=9 147 ps/byte 6,643 mb/s
* timingsafe_bcmp n=15 88 ps/byte 11,072 mb/s
* timingsafe_bcmp n=16 62 ps/byte 15,746 mb/s
* timingsafe_bcmp n=17 136 ps/byte 7,170 mb/s
* timingsafe_bcmp n=31 74 ps/byte 13,075 mb/s
* timingsafe_bcmp n=32 72 ps/byte 13,497 mb/s
* timingsafe_bcmp n=33 80 ps/byte 12,179 mb/s
* timingsafe_bcmp n=80 57 ps/byte 16,871 mb/s
* timingsafe_bcmp n=128 49 ps/byte 19,890 mb/s
* timingsafe_bcmp n=256 31 ps/byte 31,493 mb/s
* timingsafe_bcmp n=16384 14 ps/byte 67,941 mb/s
* timingsafe_bcmp n=32768 29 ps/byte 33,121 mb/s
* timingsafe_bcmp n=131072 29 ps/byte 32,949 mb/s
*
* Running time is independent of the byte sequences compared, making
* this safe to use for comparing secret values such as cryptographic
* MACs. In contrast, memcmp() may short-circuit after finding the first
* differing byte.
*
* @return nonzero if unequal, otherwise zero
* @see timingsafe_memcmp()
* @asyncsignalsafe
*/
int timingsafe_bcmp(const void *a, const void *b, size_t n) {
const char *p = a, *q = b;
uint32_t u, u0, u1, u2, u3;
uint64_t w, w0, w1, w2, w3;
if (!IsTiny()) {
if (n >= 8) {
if (n <= 16) {
__builtin_memcpy(&w0, p, 8);
__builtin_memcpy(&w1, q, 8);
__builtin_memcpy(&w2, p + n - 8, 8);
__builtin_memcpy(&w3, q + n - 8, 8);
w = (w0 ^ w1) | (w2 ^ w3);
return w | w >> 32;
} else {
if (IsAsan()) {
__asan_verify(a, n);
__asan_verify(b, n);
}
if (X86_HAVE(AVX)) {
return timingsafe_bcmp_avx(p, q, n);
} else {
return timingsafe_bcmp_sse(p, q, n);
}
}
} else if (n >= 4) {
__builtin_memcpy(&u0, p, 4);
__builtin_memcpy(&u1, q, 4);
__builtin_memcpy(&u2, p + n - 4, 4);
__builtin_memcpy(&u3, q + n - 4, 4);
return (u0 ^ u1) | (u2 ^ u3);
}
}
for (u = 0; n--;) {
u |= p[n] ^ q[n];
}
return u;
}