Avoid legacy instruction penalties on x86

This commit is contained in:
Justine Tunney 2024-07-31 01:02:24 -07:00
parent 1fba310e22
commit 8d8aecb6d9
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
16 changed files with 199 additions and 158 deletions

View file

@ -18,12 +18,13 @@
*/
#include "libc/str/blake2.h"
#include "libc/assert.h"
#include "libc/calls/struct/timespec.h"
#include "libc/mem/mem.h"
#include "libc/stdio/rand.h"
#include "libc/stdio/stdio.h"
#include "libc/str/str.h"
#include "libc/str/tab.internal.h"
#include "libc/testlib/ezbench.h"
#include "libc/testlib/benchmark.h"
#include "libc/testlib/hyperion.h"
#include "libc/testlib/testlib.h"
@ -90,17 +91,18 @@ TEST(BLAKE2B256Test, vectors) {
free(line);
}
BENCH(blake2, bench) {
BENCH(blake2, benchmark) {
char fun[256];
rngset(fun, 256, _rand64, -1);
EZBENCH_N("blake2b256", 0, EZBLAKE2B256(0, 0));
EZBENCH_N("blake2b256", 8, EZBLAKE2B256("helloooo", 8));
EZBENCH_N("blake2b256", 31, EZBLAKE2B256(fun, 31));
EZBENCH_N("blake2b256", 32, EZBLAKE2B256(fun, 32));
EZBENCH_N("blake2b256", 63, EZBLAKE2B256(fun, 63));
EZBENCH_N("blake2b256", 64, EZBLAKE2B256(fun, 64));
EZBENCH_N("blake2b256", 128, EZBLAKE2B256(fun, 128));
EZBENCH_N("blake2b256", 256, EZBLAKE2B256(fun, 256));
EZBENCH_N("blake2b256", kHyperionSize,
EZBLAKE2B256(kHyperion, kHyperionSize));
BENCHMARK(100, 0, __expropriate(EZBLAKE2B256(0, 0)));
BENCHMARK(100, 1, __expropriate(EZBLAKE2B256("h", 1)));
BENCHMARK(100, 8, __expropriate(EZBLAKE2B256("helloooo", 8)));
BENCHMARK(100, 31, __expropriate(EZBLAKE2B256(fun, 31)));
BENCHMARK(100, 32, __expropriate(EZBLAKE2B256(fun, 32)));
BENCHMARK(100, 63, __expropriate(EZBLAKE2B256(fun, 63)));
BENCHMARK(100, 64, __expropriate(EZBLAKE2B256(fun, 64)));
BENCHMARK(100, 128, __expropriate(EZBLAKE2B256(fun, 128)));
BENCHMARK(100, 256, __expropriate(EZBLAKE2B256(fun, 256)));
BENCHMARK(100, kHyperionSize,
__expropriate(EZBLAKE2B256(kHyperion, kHyperionSize)));
}

View file

@ -16,13 +16,14 @@
limitations under the License.
*/
#include "libc/str/highwayhash64.h"
#include "libc/calls/struct/timespec.h"
#include "libc/inttypes.h"
#include "libc/nexgen32e/crc32.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/rand.h"
#include "libc/stdio/stdio.h"
#include "libc/str/str.h"
#include "libc/testlib/ezbench.h"
#include "libc/testlib/benchmark.h"
#include "libc/testlib/hyperion.h"
#include "libc/testlib/testlib.h"
#include "third_party/zlib/zlib.h"
@ -100,33 +101,31 @@ TEST(highwayhash64, test) {
BENCH(highwayhash64, newbench) {
char fun[256];
rngset(fun, 256, _rand64, -1);
EZBENCH_N("highwayhash64", 0, HighwayHash64(0, 0, kTestKey1));
EZBENCH_N("highwayhash64", 8, HighwayHash64("helloooo", 8, kTestKey1));
EZBENCH_N("highwayhash64", 31, HighwayHash64(fun, 31, kTestKey1));
EZBENCH_N("highwayhash64", 32, HighwayHash64(fun, 32, kTestKey1));
EZBENCH_N("highwayhash64", 63, HighwayHash64(fun, 63, kTestKey1));
EZBENCH_N("highwayhash64", 64, HighwayHash64(fun, 64, kTestKey1));
EZBENCH_N("highwayhash64", 128, HighwayHash64(fun, 128, kTestKey1));
EZBENCH_N("highwayhash64", 256, HighwayHash64(fun, 256, kTestKey1));
EZBENCH_N("highwayhash64", kHyperionSize,
BENCHMARK(10, 0, HighwayHash64(0, 0, kTestKey1));
BENCHMARK(10, 8, HighwayHash64("helloooo", 8, kTestKey1));
BENCHMARK(10, 31, HighwayHash64(fun, 31, kTestKey1));
BENCHMARK(10, 32, HighwayHash64(fun, 32, kTestKey1));
BENCHMARK(10, 63, HighwayHash64(fun, 63, kTestKey1));
BENCHMARK(10, 64, HighwayHash64(fun, 64, kTestKey1));
BENCHMARK(10, 128, HighwayHash64(fun, 128, kTestKey1));
BENCHMARK(10, 256, HighwayHash64(fun, 256, kTestKey1));
BENCHMARK(10, kHyperionSize,
HighwayHash64(kHyperion, kHyperionSize, kTestKey1));
}
BENCH(highwayhash64, bench) {
EZBENCH2("knuth small", donothing,
__expropriate(KnuthMultiplicativeHash32(__veil("r", "hello"), 5)));
EZBENCH2("crc32c small", donothing, __expropriate(crc32c(0, "hello", 5)));
EZBENCH2("crc32 small", donothing,
__expropriate(crc32_z(0, __veil("r", "hello"), 5)));
EZBENCH2("highwayhash64 small", donothing,
HighwayHash64((void *)"hello", 5, kTestKey1));
EZBENCH2("crc32 big", donothing,
__expropriate(crc32_z(0, kHyperion, kHyperionSize)));
EZBENCH2("crc32c big", donothing,
__expropriate(crc32c(0, kHyperion, kHyperionSize)));
EZBENCH2("highwayhash64 big", donothing,
HighwayHash64((void *)kHyperion, kHyperionSize, kTestKey1));
EZBENCH2("knuth big", donothing,
__expropriate(KnuthMultiplicativeHash32(__veil("r", kHyperion),
kHyperionSize)));
BENCHMARK(10, 5,
__expropriate(KnuthMultiplicativeHash32(__veil("r", "hello"), 5)));
BENCHMARK(10, 5, __expropriate(crc32c(0, "hello", 5)));
BENCHMARK(10, 5, __expropriate(crc32_z(0, __veil("r", "hello"), 5)));
BENCHMARK(10, 5, HighwayHash64((void *)"hello", 5, kTestKey1));
BENCHMARK(10, kHyperionSize,
__expropriate(crc32_z(0, kHyperion, kHyperionSize)));
BENCHMARK(10, kHyperionSize,
__expropriate(crc32c(0, kHyperion, kHyperionSize)));
BENCHMARK(10, kHyperionSize,
HighwayHash64((void *)kHyperion, kHyperionSize, kTestKey1));
BENCHMARK(10, kHyperionSize,
__expropriate(KnuthMultiplicativeHash32(__veil("r", kHyperion),
kHyperionSize)));
}

View file

@ -8,6 +8,7 @@
#include "libc/mem/mem.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/stdio.h"
#include "libc/testlib/benchmark.h"
#include "libc/x/xasprintf.h"
#define EXPENSIVE_TESTS 0
@ -237,20 +238,6 @@ float nothing(float x) {
float (*barrier)(float) = nothing;
#define BENCH(ITERATIONS, WORK_PER_RUN, CODE) \
do { \
struct timespec start = timespec_real(); \
for (int __i = 0; __i < ITERATIONS; ++__i) { \
asm volatile("" ::: "memory"); \
CODE; \
} \
long long work = (WORK_PER_RUN) * (ITERATIONS); \
long nanos = \
(timespec_tonanos(timespec_sub(timespec_real(), start)) + work - 1) / \
(double)work; \
printf("%8ld ns %2dx %s\n", nanos, (ITERATIONS), #CODE); \
} while (0)
int main() {
ShowCrashReports();
@ -270,12 +257,12 @@ int main() {
test_fdotf_naive();
test_fdotf_hefty();
test_fdotf_ruler();
BENCH(20, 1, (kahan = barrier(fdotf_kahan(A, B, n))));
BENCH(20, 1, (dubble = barrier(fdotf_dubble(A, B, n))));
BENCH(20, 1, (naive = barrier(fdotf_naive(A, B, n))));
BENCH(20, 1, (recursive = barrier(fdotf_recursive(A, B, n))));
BENCH(20, 1, (ruler = barrier(fdotf_ruler(A, B, n))));
BENCH(20, 1, (hefty = barrier(fdotf_hefty(A, B, n))));
BENCHMARK(20, 1, (kahan = barrier(fdotf_kahan(A, B, n))));
BENCHMARK(20, 1, (dubble = barrier(fdotf_dubble(A, B, n))));
BENCHMARK(20, 1, (naive = barrier(fdotf_naive(A, B, n))));
BENCHMARK(20, 1, (recursive = barrier(fdotf_recursive(A, B, n))));
BENCHMARK(20, 1, (ruler = barrier(fdotf_ruler(A, B, n))));
BENCHMARK(20, 1, (hefty = barrier(fdotf_hefty(A, B, n))));
printf("dubble = %f (%g)\n", dubble, fabs(dubble - dubble));
printf("kahan = %f (%g)\n", kahan, fabs(kahan - dubble));
printf("naive = %f (%g)\n", naive, fabs(naive - dubble));

View file

@ -8,6 +8,7 @@
#include "libc/mem/mem.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/stdio.h"
#include "libc/testlib/benchmark.h"
#include "libc/x/xasprintf.h"
#define EXPENSIVE_TESTS 0
@ -225,20 +226,6 @@ float nothing(float x) {
float (*barrier)(float) = nothing;
#define BENCH(ITERATIONS, WORK_PER_RUN, CODE) \
do { \
struct timespec start = timespec_real(); \
for (int __i = 0; __i < ITERATIONS; ++__i) { \
asm volatile("" ::: "memory"); \
CODE; \
} \
long long work = (WORK_PER_RUN) * (ITERATIONS); \
long nanos = \
(timespec_tonanos(timespec_sub(timespec_real(), start)) + work - 1) / \
(double)work; \
printf("%8ld ns %2dx %s\n", nanos, (ITERATIONS), #CODE); \
} while (0)
int main() {
ShowCrashReports();
@ -255,12 +242,12 @@ int main() {
test_fsumf_naive();
test_fsumf_hefty();
test_fsumf_ruler();
BENCH(20, 1, (kahan = barrier(fsumf_kahan(p, n))));
BENCH(20, 1, (dubble = barrier(fsumf_dubble(p, n))));
BENCH(20, 1, (naive = barrier(fsumf_naive(p, n))));
BENCH(20, 1, (recursive = barrier(fsumf_recursive(p, n))));
BENCH(20, 1, (ruler = barrier(fsumf_ruler(p, n))));
BENCH(20, 1, (hefty = barrier(fsumf_hefty(p, n))));
BENCHMARK(20, 1, (kahan = barrier(fsumf_kahan(p, n))));
BENCHMARK(20, 1, (dubble = barrier(fsumf_dubble(p, n))));
BENCHMARK(20, 1, (naive = barrier(fsumf_naive(p, n))));
BENCHMARK(20, 1, (recursive = barrier(fsumf_recursive(p, n))));
BENCHMARK(20, 1, (ruler = barrier(fsumf_ruler(p, n))));
BENCHMARK(20, 1, (hefty = barrier(fsumf_hefty(p, n))));
printf("dubble = %f (%g)\n", dubble, fabs(dubble - dubble));
printf("kahan = %f (%g)\n", kahan, fabs(kahan - dubble));
printf("naive = %f (%g)\n", naive, fabs(naive - dubble));