mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-02-07 06:53:33 +00:00
Avoid legacy instruction penalties on x86
This commit is contained in:
parent
1fba310e22
commit
8d8aecb6d9
16 changed files with 199 additions and 158 deletions
2
Makefile
2
Makefile
|
@ -540,7 +540,7 @@ COSMOCC_HDRS = \
|
||||||
$(foreach x,$(COSMOCC_PKGS),$($(x)_HDRS)) \
|
$(foreach x,$(COSMOCC_PKGS),$($(x)_HDRS)) \
|
||||||
$(foreach x,$(COSMOCC_PKGS),$($(x)_INCS))
|
$(foreach x,$(COSMOCC_PKGS),$($(x)_INCS))
|
||||||
|
|
||||||
o/cosmocc.h.txt: Makefile
|
o/cosmocc.h.txt: Makefile libc $(MAKEFILES) $(call uniq,$(foreach x,$(HDRS) $(INCS),$(dir $(x)))) $(HDRS) $(INCS)
|
||||||
$(file >$@, $(call uniq,$(COSMOCC_HDRS)))
|
$(file >$@, $(call uniq,$(COSMOCC_HDRS)))
|
||||||
|
|
||||||
COSMOPOLITAN_H_ROOT_HDRS = \
|
COSMOPOLITAN_H_ROOT_HDRS = \
|
||||||
|
|
|
@ -154,6 +154,66 @@ o/$(MODE)/libc/calls/sigcrashsig.o: private \
|
||||||
CFLAGS += \
|
CFLAGS += \
|
||||||
-Os
|
-Os
|
||||||
|
|
||||||
|
# avoid legacy sse decoding penalty on avx systems
|
||||||
|
o//libc/calls/cfmakeraw.o \
|
||||||
|
o//libc/calls/clock_gettime-xnu.o \
|
||||||
|
o//libc/calls/CPU_AND.o \
|
||||||
|
o//libc/calls/CPU_OR.o \
|
||||||
|
o//libc/calls/CPU_XOR.o \
|
||||||
|
o//libc/calls/dl_iterate_phdr.o \
|
||||||
|
o//libc/calls/dup-nt.o \
|
||||||
|
o//libc/calls/fcntl-nt.o \
|
||||||
|
o//libc/calls/flock-nt.o \
|
||||||
|
o//libc/calls/fstatfs-nt.o \
|
||||||
|
o//libc/calls/fstat-nt.o \
|
||||||
|
o//libc/calls/futimesat.o \
|
||||||
|
o//libc/calls/futimes.o \
|
||||||
|
o//libc/calls/getrlimit.o \
|
||||||
|
o//libc/calls/gettimeofday.o \
|
||||||
|
o//libc/calls/ioctl.o \
|
||||||
|
o//libc/calls/lutimes.o \
|
||||||
|
o//libc/calls/metaflock.o \
|
||||||
|
o//libc/calls/ntaccesscheck.o \
|
||||||
|
o//libc/calls/ntspawn.o \
|
||||||
|
o//libc/calls/open-nt.o \
|
||||||
|
o//libc/calls/pledge-linux.o \
|
||||||
|
o//libc/calls/ppoll.o \
|
||||||
|
o//libc/calls/preadv.o \
|
||||||
|
o//libc/calls/pselect.o \
|
||||||
|
o//libc/calls/pwritev.o \
|
||||||
|
o//libc/calls/read-nt.o \
|
||||||
|
o//libc/calls/readv.o \
|
||||||
|
o//libc/calls/readwrite-nt.o \
|
||||||
|
o//libc/calls/releasefd.o \
|
||||||
|
o//libc/calls/select.o \
|
||||||
|
o//libc/calls/sigaction.o \
|
||||||
|
o//libc/calls/sigenter-freebsd.o \
|
||||||
|
o//libc/calls/sigenter-netbsd.o \
|
||||||
|
o//libc/calls/sigenter-openbsd.o \
|
||||||
|
o//libc/calls/sigenter-xnu.o \
|
||||||
|
o//libc/calls/sigignore.o \
|
||||||
|
o//libc/calls/siginfo2cosmo.o \
|
||||||
|
o//libc/calls/signal.o \
|
||||||
|
o//libc/calls/sig.o \
|
||||||
|
o//libc/calls/sigtimedwait.o \
|
||||||
|
o//libc/calls/stat2cosmo.o \
|
||||||
|
o//libc/calls/statfs2cosmo.o \
|
||||||
|
o//libc/calls/statfs2statvfs.o \
|
||||||
|
o//libc/calls/tcgetattr-nt.o \
|
||||||
|
o//libc/calls/tcgetattr.o \
|
||||||
|
o//libc/calls/tcgetwinsize-nt.o \
|
||||||
|
o//libc/calls/tcsetattr-nt.o \
|
||||||
|
o//libc/calls/tcsetwinsize-nt.o \
|
||||||
|
o//libc/calls/termios2host.o \
|
||||||
|
o//libc/calls/timespec_sleep.o \
|
||||||
|
o//libc/calls/uname.o \
|
||||||
|
o//libc/calls/utimensat-old.o \
|
||||||
|
o//libc/calls/utimes.o \
|
||||||
|
o//libc/calls/winexec.o \
|
||||||
|
o//libc/calls/writev.o: private \
|
||||||
|
COPTS += \
|
||||||
|
-mgeneral-regs-only
|
||||||
|
|
||||||
# these assembly files are safe to build on aarch64
|
# these assembly files are safe to build on aarch64
|
||||||
o/$(MODE)/libc/calls/getcontext.o: libc/calls/getcontext.S
|
o/$(MODE)/libc/calls/getcontext.o: libc/calls/getcontext.S
|
||||||
@$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) -c $<
|
@$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) -c $<
|
||||||
|
|
|
@ -97,6 +97,14 @@ o/$(MODE)/libc/intrin/x86.o: private \
|
||||||
-fpatchable-function-entry=0 \
|
-fpatchable-function-entry=0 \
|
||||||
-Os
|
-Os
|
||||||
|
|
||||||
|
# avoid the legacy sse decoding penalty on avx systems
|
||||||
|
o//libc/intrin/dll.o \
|
||||||
|
o//libc/intrin/fds.o \
|
||||||
|
o//libc/intrin/mmap.o \
|
||||||
|
o//libc/intrin/demangle.o: private \
|
||||||
|
CFLAGS += \
|
||||||
|
-mgeneral-regs-only
|
||||||
|
|
||||||
# these assembly files are safe to build on aarch64
|
# these assembly files are safe to build on aarch64
|
||||||
o/$(MODE)/libc/intrin/aarch64/%.o: libc/intrin/aarch64/%.S
|
o/$(MODE)/libc/intrin/aarch64/%.o: libc/intrin/aarch64/%.S
|
||||||
@$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) -c $<
|
@$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) -c $<
|
||||||
|
|
|
@ -1,25 +0,0 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
|
||||||
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
|
||||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
||||||
│ Copyright 2022 Justine Alexandra Roberts Tunney │
|
|
||||||
│ │
|
|
||||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
|
||||||
│ any purpose with or without fee is hereby granted, provided that the │
|
|
||||||
│ above copyright notice and this permission notice appear in all copies. │
|
|
||||||
│ │
|
|
||||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
|
||||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
|
||||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
|
||||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
|
||||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
|
||||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
|
||||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
|
||||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
||||||
#include "libc/str/locale.h"
|
|
||||||
#include "libc/sysv/errfuns.h"
|
|
||||||
|
|
||||||
locale_t uselocale(locale_t l) {
|
|
||||||
// TODO: implement me!
|
|
||||||
return 0;
|
|
||||||
}
|
|
|
@ -22,6 +22,7 @@ LIBC_TESTLIB_A_ASSETS = \
|
||||||
LIBC_TESTLIB_A_HDRS = \
|
LIBC_TESTLIB_A_HDRS = \
|
||||||
libc/testlib/aspect.internal.h \
|
libc/testlib/aspect.internal.h \
|
||||||
libc/testlib/bench.h \
|
libc/testlib/bench.h \
|
||||||
|
libc/testlib/benchmark.h \
|
||||||
libc/testlib/blocktronics.h \
|
libc/testlib/blocktronics.h \
|
||||||
libc/testlib/ezbench.h \
|
libc/testlib/ezbench.h \
|
||||||
libc/testlib/fastrandomstring.h \
|
libc/testlib/fastrandomstring.h \
|
||||||
|
|
26
libc/testlib/benchmark.h
Normal file
26
libc/testlib/benchmark.h
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
#ifndef COSMOPOLITAN_LIBC_TESTLIB_BENCHMARK_H_
|
||||||
|
#define COSMOPOLITAN_LIBC_TESTLIB_BENCHMARK_H_
|
||||||
|
#include "libc/calls/struct/timespec.h"
|
||||||
|
#include "libc/stdio/stdio.h"
|
||||||
|
COSMOPOLITAN_C_START_
|
||||||
|
|
||||||
|
#define BENCHMARK(ITERATIONS, WORK_PER_RUN, CODE) \
|
||||||
|
do { \
|
||||||
|
struct timespec start = timespec_real(); \
|
||||||
|
for (int __i = 0; __i < ITERATIONS; ++__i) { \
|
||||||
|
asm volatile("" ::: "memory"); \
|
||||||
|
CODE; \
|
||||||
|
} \
|
||||||
|
long long work = ((WORK_PER_RUN) ? (WORK_PER_RUN) : 1) * (ITERATIONS); \
|
||||||
|
double nanos = \
|
||||||
|
(timespec_tonanos(timespec_sub(timespec_real(), start)) + work - 1) / \
|
||||||
|
(double)work; \
|
||||||
|
if (nanos < 1000) { \
|
||||||
|
printf("%10g ns %2dx %s\n", nanos, (ITERATIONS), #CODE); \
|
||||||
|
} else { \
|
||||||
|
printf("%10lld ns %2dx %s\n", (long long)nanos, (ITERATIONS), #CODE); \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
COSMOPOLITAN_C_END_
|
||||||
|
#endif /* COSMOPOLITAN_LIBC_TESTLIB_BENCHMARK_H_ */
|
|
@ -22,26 +22,12 @@
|
||||||
#include "libc/mem/leaks.h"
|
#include "libc/mem/leaks.h"
|
||||||
#include "libc/stdio/stdio.h"
|
#include "libc/stdio/stdio.h"
|
||||||
#include "libc/sysv/consts/rusage.h"
|
#include "libc/sysv/consts/rusage.h"
|
||||||
|
#include "libc/testlib/benchmark.h"
|
||||||
|
|
||||||
// #include <set>
|
// #include <set>
|
||||||
// #define ctl std
|
// #define ctl std
|
||||||
// #define check() size()
|
// #define check() size()
|
||||||
|
|
||||||
#define BENCH(ITERATIONS, WORK_PER_RUN, CODE) \
|
|
||||||
do { \
|
|
||||||
struct timespec start = timespec_real(); \
|
|
||||||
for (int __i = 0; __i < ITERATIONS; ++__i) { \
|
|
||||||
asm volatile("" ::: "memory"); \
|
|
||||||
CODE; \
|
|
||||||
} \
|
|
||||||
long long work = (WORK_PER_RUN) * (ITERATIONS); \
|
|
||||||
double nanos = \
|
|
||||||
(timespec_tonanos(timespec_sub(timespec_real(), start)) + work - \
|
|
||||||
1) / \
|
|
||||||
(double)work; \
|
|
||||||
printf("%10g ns %2dx %s\n", nanos, (ITERATIONS), #CODE); \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
int
|
int
|
||||||
rand32(void)
|
rand32(void)
|
||||||
{
|
{
|
||||||
|
@ -68,19 +54,19 @@ main()
|
||||||
{
|
{
|
||||||
long x = 0;
|
long x = 0;
|
||||||
ctl::set<long> s;
|
ctl::set<long> s;
|
||||||
BENCH(1000000, 1, s.insert(rand32() % 1000000));
|
BENCHMARK(1000000, 1, s.insert(rand32() % 1000000));
|
||||||
// s.check();
|
// s.check();
|
||||||
BENCH(1000000, 1, {
|
BENCHMARK(1000000, 1, {
|
||||||
auto i = s.find(rand32() % 1000000);
|
auto i = s.find(rand32() % 1000000);
|
||||||
if (i != s.end())
|
if (i != s.end())
|
||||||
x += *i;
|
x += *i;
|
||||||
});
|
});
|
||||||
BENCH(1000000, 1, {
|
BENCHMARK(1000000, 1, {
|
||||||
auto i = s.lower_bound(rand32() % 1000000);
|
auto i = s.lower_bound(rand32() % 1000000);
|
||||||
if (i != s.end())
|
if (i != s.end())
|
||||||
x += *i;
|
x += *i;
|
||||||
});
|
});
|
||||||
BENCH(1000000, 1, s.erase(rand32() % 1000000));
|
BENCHMARK(1000000, 1, s.erase(rand32() % 1000000));
|
||||||
eat(x);
|
eat(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,27 +20,13 @@
|
||||||
#include "ctl/utility.h"
|
#include "ctl/utility.h"
|
||||||
#include "libc/dce.h"
|
#include "libc/dce.h"
|
||||||
#include "libc/mem/leaks.h"
|
#include "libc/mem/leaks.h"
|
||||||
|
#include "libc/testlib/benchmark.h"
|
||||||
|
|
||||||
#include "libc/calls/struct/timespec.h"
|
#include "libc/calls/struct/timespec.h"
|
||||||
#include "libc/runtime/runtime.h"
|
#include "libc/runtime/runtime.h"
|
||||||
#include "libc/stdio/stdio.h"
|
#include "libc/stdio/stdio.h"
|
||||||
#include "libc/str/str.h"
|
#include "libc/str/str.h"
|
||||||
|
|
||||||
#define BENCH(ITERATIONS, WORK_PER_RUN, CODE) \
|
|
||||||
do { \
|
|
||||||
struct timespec start = timespec_real(); \
|
|
||||||
for (int __i = 0; __i < ITERATIONS; ++__i) { \
|
|
||||||
asm volatile("" ::: "memory"); \
|
|
||||||
CODE; \
|
|
||||||
} \
|
|
||||||
long long work = (WORK_PER_RUN) * (ITERATIONS); \
|
|
||||||
double nanos = \
|
|
||||||
(timespec_tonanos(timespec_sub(timespec_real(), start)) + work - \
|
|
||||||
1) / \
|
|
||||||
(double)work; \
|
|
||||||
printf("%10g ns %2dx %s\n", nanos, (ITERATIONS), #CODE); \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
const char* big_c = "aaaaaaaaaaaaaaaaaaaaaaaa";
|
const char* big_c = "aaaaaaaaaaaaaaaaaaaaaaaa";
|
||||||
const char* small_c = "aaaaaaaaaaaaaaaaaaaaaaa";
|
const char* small_c = "aaaaaaaaaaaaaaaaaaaaaaa";
|
||||||
|
|
||||||
|
@ -55,98 +41,98 @@ main()
|
||||||
{
|
{
|
||||||
const ctl::string_view big(big_c), small(small_c);
|
const ctl::string_view big(big_c), small(small_c);
|
||||||
|
|
||||||
BENCH(ITERATIONS * 10, 1, {
|
BENCHMARK(ITERATIONS * 10, 1, {
|
||||||
ctl::string s;
|
ctl::string s;
|
||||||
s.append("hello ");
|
s.append("hello ");
|
||||||
s.append("world");
|
s.append("world");
|
||||||
});
|
});
|
||||||
|
|
||||||
BENCH(ITERATIONS, 8, {
|
BENCHMARK(ITERATIONS, 8, {
|
||||||
ctl::string s;
|
ctl::string s;
|
||||||
for (int i = 0; i < 8; ++i) {
|
for (int i = 0; i < 8; ++i) {
|
||||||
s.append('a');
|
s.append('a');
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
BENCH(ITERATIONS, 16, {
|
BENCHMARK(ITERATIONS, 16, {
|
||||||
ctl::string s;
|
ctl::string s;
|
||||||
for (int i = 0; i < 16; ++i) {
|
for (int i = 0; i < 16; ++i) {
|
||||||
s.append('a');
|
s.append('a');
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
BENCH(ITERATIONS, 23, {
|
BENCHMARK(ITERATIONS, 23, {
|
||||||
ctl::string s;
|
ctl::string s;
|
||||||
for (int i = 0; i < 23; ++i) {
|
for (int i = 0; i < 23; ++i) {
|
||||||
s.append('a');
|
s.append('a');
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
BENCH(ITERATIONS, 24, {
|
BENCHMARK(ITERATIONS, 24, {
|
||||||
ctl::string s;
|
ctl::string s;
|
||||||
for (int i = 0; i < 24; ++i) {
|
for (int i = 0; i < 24; ++i) {
|
||||||
s.append('a');
|
s.append('a');
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
BENCH(ITERATIONS, 32, {
|
BENCHMARK(ITERATIONS, 32, {
|
||||||
ctl::string s;
|
ctl::string s;
|
||||||
for (int i = 0; i < 32; ++i) {
|
for (int i = 0; i < 32; ++i) {
|
||||||
s.append('a');
|
s.append('a');
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
BENCH(ITERATIONS, 1, { ctl::string s(small_c); });
|
BENCHMARK(ITERATIONS, 1, { ctl::string s(small_c); });
|
||||||
|
|
||||||
BENCH(ITERATIONS, 1, { ctl::string s(small); });
|
BENCHMARK(ITERATIONS, 1, { ctl::string s(small); });
|
||||||
|
|
||||||
{
|
{
|
||||||
ctl::string small_copy("hello world");
|
ctl::string small_copy("hello world");
|
||||||
BENCH(ITERATIONS, 1, { ctl::string s2(small_copy); });
|
BENCHMARK(ITERATIONS, 1, { ctl::string s2(small_copy); });
|
||||||
}
|
}
|
||||||
|
|
||||||
BENCH(ITERATIONS, 1, {
|
BENCHMARK(ITERATIONS, 1, {
|
||||||
ctl::string s(small);
|
ctl::string s(small);
|
||||||
ctl::string s2(ctl::move(s));
|
ctl::string s2(ctl::move(s));
|
||||||
});
|
});
|
||||||
|
|
||||||
BENCH(ITERATIONS, 1, {
|
BENCHMARK(ITERATIONS, 1, {
|
||||||
ctl::string s(small);
|
ctl::string s(small);
|
||||||
ctl::string s2(s);
|
ctl::string s2(s);
|
||||||
});
|
});
|
||||||
|
|
||||||
BENCH(ITERATIONS, 1, { ctl::string s(big_c); });
|
BENCHMARK(ITERATIONS, 1, { ctl::string s(big_c); });
|
||||||
|
|
||||||
BENCH(ITERATIONS, 1, { ctl::string s(big); });
|
BENCHMARK(ITERATIONS, 1, { ctl::string s(big); });
|
||||||
|
|
||||||
{
|
{
|
||||||
ctl::string big_copy(big);
|
ctl::string big_copy(big);
|
||||||
BENCH(ITERATIONS, 1, { ctl::string s2(big_copy); });
|
BENCHMARK(ITERATIONS, 1, { ctl::string s2(big_copy); });
|
||||||
}
|
}
|
||||||
|
|
||||||
BENCH(ITERATIONS, 1, {
|
BENCHMARK(ITERATIONS, 1, {
|
||||||
ctl::string s(big);
|
ctl::string s(big);
|
||||||
ctl::string s2(ctl::move(s));
|
ctl::string s2(ctl::move(s));
|
||||||
});
|
});
|
||||||
|
|
||||||
BENCH(ITERATIONS, 1, {
|
BENCHMARK(ITERATIONS, 1, {
|
||||||
ctl::string s(big);
|
ctl::string s(big);
|
||||||
ctl::string s2(s);
|
ctl::string s2(s);
|
||||||
});
|
});
|
||||||
|
|
||||||
BENCH(ITERATIONS, 1, { ctl::string s(23, 'a'); });
|
BENCHMARK(ITERATIONS, 1, { ctl::string s(23, 'a'); });
|
||||||
|
|
||||||
BENCH(ITERATIONS, 1, { ctl::string s(24, 'a'); });
|
BENCHMARK(ITERATIONS, 1, { ctl::string s(24, 'a'); });
|
||||||
|
|
||||||
{
|
{
|
||||||
ctl::string s(5, 'a');
|
ctl::string s(5, 'a');
|
||||||
BENCH(ITERATIONS, 1, { ctl::string_view s2(s); });
|
BENCHMARK(ITERATIONS, 1, { ctl::string_view s2(s); });
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
ctl::string big_trunc(48, 'a');
|
ctl::string big_trunc(48, 'a');
|
||||||
big_trunc.resize(4);
|
big_trunc.resize(4);
|
||||||
BENCH(ITERATIONS, 1, { ctl::string s(big_trunc); });
|
BENCHMARK(ITERATIONS, 1, { ctl::string s(big_trunc); });
|
||||||
}
|
}
|
||||||
|
|
||||||
CheckForMemoryLeaks();
|
CheckForMemoryLeaks();
|
||||||
|
|
|
@ -18,12 +18,13 @@
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/str/blake2.h"
|
#include "libc/str/blake2.h"
|
||||||
#include "libc/assert.h"
|
#include "libc/assert.h"
|
||||||
|
#include "libc/calls/struct/timespec.h"
|
||||||
#include "libc/mem/mem.h"
|
#include "libc/mem/mem.h"
|
||||||
#include "libc/stdio/rand.h"
|
#include "libc/stdio/rand.h"
|
||||||
#include "libc/stdio/stdio.h"
|
#include "libc/stdio/stdio.h"
|
||||||
#include "libc/str/str.h"
|
#include "libc/str/str.h"
|
||||||
#include "libc/str/tab.internal.h"
|
#include "libc/str/tab.internal.h"
|
||||||
#include "libc/testlib/ezbench.h"
|
#include "libc/testlib/benchmark.h"
|
||||||
#include "libc/testlib/hyperion.h"
|
#include "libc/testlib/hyperion.h"
|
||||||
#include "libc/testlib/testlib.h"
|
#include "libc/testlib/testlib.h"
|
||||||
|
|
||||||
|
@ -90,17 +91,18 @@ TEST(BLAKE2B256Test, vectors) {
|
||||||
free(line);
|
free(line);
|
||||||
}
|
}
|
||||||
|
|
||||||
BENCH(blake2, bench) {
|
BENCH(blake2, benchmark) {
|
||||||
char fun[256];
|
char fun[256];
|
||||||
rngset(fun, 256, _rand64, -1);
|
rngset(fun, 256, _rand64, -1);
|
||||||
EZBENCH_N("blake2b256", 0, EZBLAKE2B256(0, 0));
|
BENCHMARK(100, 0, __expropriate(EZBLAKE2B256(0, 0)));
|
||||||
EZBENCH_N("blake2b256", 8, EZBLAKE2B256("helloooo", 8));
|
BENCHMARK(100, 1, __expropriate(EZBLAKE2B256("h", 1)));
|
||||||
EZBENCH_N("blake2b256", 31, EZBLAKE2B256(fun, 31));
|
BENCHMARK(100, 8, __expropriate(EZBLAKE2B256("helloooo", 8)));
|
||||||
EZBENCH_N("blake2b256", 32, EZBLAKE2B256(fun, 32));
|
BENCHMARK(100, 31, __expropriate(EZBLAKE2B256(fun, 31)));
|
||||||
EZBENCH_N("blake2b256", 63, EZBLAKE2B256(fun, 63));
|
BENCHMARK(100, 32, __expropriate(EZBLAKE2B256(fun, 32)));
|
||||||
EZBENCH_N("blake2b256", 64, EZBLAKE2B256(fun, 64));
|
BENCHMARK(100, 63, __expropriate(EZBLAKE2B256(fun, 63)));
|
||||||
EZBENCH_N("blake2b256", 128, EZBLAKE2B256(fun, 128));
|
BENCHMARK(100, 64, __expropriate(EZBLAKE2B256(fun, 64)));
|
||||||
EZBENCH_N("blake2b256", 256, EZBLAKE2B256(fun, 256));
|
BENCHMARK(100, 128, __expropriate(EZBLAKE2B256(fun, 128)));
|
||||||
EZBENCH_N("blake2b256", kHyperionSize,
|
BENCHMARK(100, 256, __expropriate(EZBLAKE2B256(fun, 256)));
|
||||||
EZBLAKE2B256(kHyperion, kHyperionSize));
|
BENCHMARK(100, kHyperionSize,
|
||||||
|
__expropriate(EZBLAKE2B256(kHyperion, kHyperionSize)));
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,13 +16,14 @@
|
||||||
│ limitations under the License. │
|
│ limitations under the License. │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/str/highwayhash64.h"
|
#include "libc/str/highwayhash64.h"
|
||||||
|
#include "libc/calls/struct/timespec.h"
|
||||||
#include "libc/inttypes.h"
|
#include "libc/inttypes.h"
|
||||||
#include "libc/nexgen32e/crc32.h"
|
#include "libc/nexgen32e/crc32.h"
|
||||||
#include "libc/runtime/runtime.h"
|
#include "libc/runtime/runtime.h"
|
||||||
#include "libc/stdio/rand.h"
|
#include "libc/stdio/rand.h"
|
||||||
#include "libc/stdio/stdio.h"
|
#include "libc/stdio/stdio.h"
|
||||||
#include "libc/str/str.h"
|
#include "libc/str/str.h"
|
||||||
#include "libc/testlib/ezbench.h"
|
#include "libc/testlib/benchmark.h"
|
||||||
#include "libc/testlib/hyperion.h"
|
#include "libc/testlib/hyperion.h"
|
||||||
#include "libc/testlib/testlib.h"
|
#include "libc/testlib/testlib.h"
|
||||||
#include "third_party/zlib/zlib.h"
|
#include "third_party/zlib/zlib.h"
|
||||||
|
@ -100,33 +101,31 @@ TEST(highwayhash64, test) {
|
||||||
BENCH(highwayhash64, newbench) {
|
BENCH(highwayhash64, newbench) {
|
||||||
char fun[256];
|
char fun[256];
|
||||||
rngset(fun, 256, _rand64, -1);
|
rngset(fun, 256, _rand64, -1);
|
||||||
EZBENCH_N("highwayhash64", 0, HighwayHash64(0, 0, kTestKey1));
|
BENCHMARK(10, 0, HighwayHash64(0, 0, kTestKey1));
|
||||||
EZBENCH_N("highwayhash64", 8, HighwayHash64("helloooo", 8, kTestKey1));
|
BENCHMARK(10, 8, HighwayHash64("helloooo", 8, kTestKey1));
|
||||||
EZBENCH_N("highwayhash64", 31, HighwayHash64(fun, 31, kTestKey1));
|
BENCHMARK(10, 31, HighwayHash64(fun, 31, kTestKey1));
|
||||||
EZBENCH_N("highwayhash64", 32, HighwayHash64(fun, 32, kTestKey1));
|
BENCHMARK(10, 32, HighwayHash64(fun, 32, kTestKey1));
|
||||||
EZBENCH_N("highwayhash64", 63, HighwayHash64(fun, 63, kTestKey1));
|
BENCHMARK(10, 63, HighwayHash64(fun, 63, kTestKey1));
|
||||||
EZBENCH_N("highwayhash64", 64, HighwayHash64(fun, 64, kTestKey1));
|
BENCHMARK(10, 64, HighwayHash64(fun, 64, kTestKey1));
|
||||||
EZBENCH_N("highwayhash64", 128, HighwayHash64(fun, 128, kTestKey1));
|
BENCHMARK(10, 128, HighwayHash64(fun, 128, kTestKey1));
|
||||||
EZBENCH_N("highwayhash64", 256, HighwayHash64(fun, 256, kTestKey1));
|
BENCHMARK(10, 256, HighwayHash64(fun, 256, kTestKey1));
|
||||||
EZBENCH_N("highwayhash64", kHyperionSize,
|
BENCHMARK(10, kHyperionSize,
|
||||||
HighwayHash64(kHyperion, kHyperionSize, kTestKey1));
|
HighwayHash64(kHyperion, kHyperionSize, kTestKey1));
|
||||||
}
|
}
|
||||||
|
|
||||||
BENCH(highwayhash64, bench) {
|
BENCH(highwayhash64, bench) {
|
||||||
EZBENCH2("knuth small", donothing,
|
BENCHMARK(10, 5,
|
||||||
__expropriate(KnuthMultiplicativeHash32(__veil("r", "hello"), 5)));
|
__expropriate(KnuthMultiplicativeHash32(__veil("r", "hello"), 5)));
|
||||||
EZBENCH2("crc32c small", donothing, __expropriate(crc32c(0, "hello", 5)));
|
BENCHMARK(10, 5, __expropriate(crc32c(0, "hello", 5)));
|
||||||
EZBENCH2("crc32 small", donothing,
|
BENCHMARK(10, 5, __expropriate(crc32_z(0, __veil("r", "hello"), 5)));
|
||||||
__expropriate(crc32_z(0, __veil("r", "hello"), 5)));
|
BENCHMARK(10, 5, HighwayHash64((void *)"hello", 5, kTestKey1));
|
||||||
EZBENCH2("highwayhash64 small", donothing,
|
BENCHMARK(10, kHyperionSize,
|
||||||
HighwayHash64((void *)"hello", 5, kTestKey1));
|
__expropriate(crc32_z(0, kHyperion, kHyperionSize)));
|
||||||
EZBENCH2("crc32 big", donothing,
|
BENCHMARK(10, kHyperionSize,
|
||||||
__expropriate(crc32_z(0, kHyperion, kHyperionSize)));
|
__expropriate(crc32c(0, kHyperion, kHyperionSize)));
|
||||||
EZBENCH2("crc32c big", donothing,
|
BENCHMARK(10, kHyperionSize,
|
||||||
__expropriate(crc32c(0, kHyperion, kHyperionSize)));
|
HighwayHash64((void *)kHyperion, kHyperionSize, kTestKey1));
|
||||||
EZBENCH2("highwayhash64 big", donothing,
|
BENCHMARK(10, kHyperionSize,
|
||||||
HighwayHash64((void *)kHyperion, kHyperionSize, kTestKey1));
|
__expropriate(KnuthMultiplicativeHash32(__veil("r", kHyperion),
|
||||||
EZBENCH2("knuth big", donothing,
|
kHyperionSize)));
|
||||||
__expropriate(KnuthMultiplicativeHash32(__veil("r", kHyperion),
|
|
||||||
kHyperionSize)));
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
#include "libc/mem/mem.h"
|
#include "libc/mem/mem.h"
|
||||||
#include "libc/runtime/runtime.h"
|
#include "libc/runtime/runtime.h"
|
||||||
#include "libc/stdio/stdio.h"
|
#include "libc/stdio/stdio.h"
|
||||||
|
#include "libc/testlib/benchmark.h"
|
||||||
#include "libc/x/xasprintf.h"
|
#include "libc/x/xasprintf.h"
|
||||||
|
|
||||||
#define EXPENSIVE_TESTS 0
|
#define EXPENSIVE_TESTS 0
|
||||||
|
@ -237,20 +238,6 @@ float nothing(float x) {
|
||||||
|
|
||||||
float (*barrier)(float) = nothing;
|
float (*barrier)(float) = nothing;
|
||||||
|
|
||||||
#define BENCH(ITERATIONS, WORK_PER_RUN, CODE) \
|
|
||||||
do { \
|
|
||||||
struct timespec start = timespec_real(); \
|
|
||||||
for (int __i = 0; __i < ITERATIONS; ++__i) { \
|
|
||||||
asm volatile("" ::: "memory"); \
|
|
||||||
CODE; \
|
|
||||||
} \
|
|
||||||
long long work = (WORK_PER_RUN) * (ITERATIONS); \
|
|
||||||
long nanos = \
|
|
||||||
(timespec_tonanos(timespec_sub(timespec_real(), start)) + work - 1) / \
|
|
||||||
(double)work; \
|
|
||||||
printf("%8ld ns %2dx %s\n", nanos, (ITERATIONS), #CODE); \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
ShowCrashReports();
|
ShowCrashReports();
|
||||||
|
|
||||||
|
@ -270,12 +257,12 @@ int main() {
|
||||||
test_fdotf_naive();
|
test_fdotf_naive();
|
||||||
test_fdotf_hefty();
|
test_fdotf_hefty();
|
||||||
test_fdotf_ruler();
|
test_fdotf_ruler();
|
||||||
BENCH(20, 1, (kahan = barrier(fdotf_kahan(A, B, n))));
|
BENCHMARK(20, 1, (kahan = barrier(fdotf_kahan(A, B, n))));
|
||||||
BENCH(20, 1, (dubble = barrier(fdotf_dubble(A, B, n))));
|
BENCHMARK(20, 1, (dubble = barrier(fdotf_dubble(A, B, n))));
|
||||||
BENCH(20, 1, (naive = barrier(fdotf_naive(A, B, n))));
|
BENCHMARK(20, 1, (naive = barrier(fdotf_naive(A, B, n))));
|
||||||
BENCH(20, 1, (recursive = barrier(fdotf_recursive(A, B, n))));
|
BENCHMARK(20, 1, (recursive = barrier(fdotf_recursive(A, B, n))));
|
||||||
BENCH(20, 1, (ruler = barrier(fdotf_ruler(A, B, n))));
|
BENCHMARK(20, 1, (ruler = barrier(fdotf_ruler(A, B, n))));
|
||||||
BENCH(20, 1, (hefty = barrier(fdotf_hefty(A, B, n))));
|
BENCHMARK(20, 1, (hefty = barrier(fdotf_hefty(A, B, n))));
|
||||||
printf("dubble = %f (%g)\n", dubble, fabs(dubble - dubble));
|
printf("dubble = %f (%g)\n", dubble, fabs(dubble - dubble));
|
||||||
printf("kahan = %f (%g)\n", kahan, fabs(kahan - dubble));
|
printf("kahan = %f (%g)\n", kahan, fabs(kahan - dubble));
|
||||||
printf("naive = %f (%g)\n", naive, fabs(naive - dubble));
|
printf("naive = %f (%g)\n", naive, fabs(naive - dubble));
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
#include "libc/mem/mem.h"
|
#include "libc/mem/mem.h"
|
||||||
#include "libc/runtime/runtime.h"
|
#include "libc/runtime/runtime.h"
|
||||||
#include "libc/stdio/stdio.h"
|
#include "libc/stdio/stdio.h"
|
||||||
|
#include "libc/testlib/benchmark.h"
|
||||||
#include "libc/x/xasprintf.h"
|
#include "libc/x/xasprintf.h"
|
||||||
|
|
||||||
#define EXPENSIVE_TESTS 0
|
#define EXPENSIVE_TESTS 0
|
||||||
|
@ -225,20 +226,6 @@ float nothing(float x) {
|
||||||
|
|
||||||
float (*barrier)(float) = nothing;
|
float (*barrier)(float) = nothing;
|
||||||
|
|
||||||
#define BENCH(ITERATIONS, WORK_PER_RUN, CODE) \
|
|
||||||
do { \
|
|
||||||
struct timespec start = timespec_real(); \
|
|
||||||
for (int __i = 0; __i < ITERATIONS; ++__i) { \
|
|
||||||
asm volatile("" ::: "memory"); \
|
|
||||||
CODE; \
|
|
||||||
} \
|
|
||||||
long long work = (WORK_PER_RUN) * (ITERATIONS); \
|
|
||||||
long nanos = \
|
|
||||||
(timespec_tonanos(timespec_sub(timespec_real(), start)) + work - 1) / \
|
|
||||||
(double)work; \
|
|
||||||
printf("%8ld ns %2dx %s\n", nanos, (ITERATIONS), #CODE); \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
ShowCrashReports();
|
ShowCrashReports();
|
||||||
|
|
||||||
|
@ -255,12 +242,12 @@ int main() {
|
||||||
test_fsumf_naive();
|
test_fsumf_naive();
|
||||||
test_fsumf_hefty();
|
test_fsumf_hefty();
|
||||||
test_fsumf_ruler();
|
test_fsumf_ruler();
|
||||||
BENCH(20, 1, (kahan = barrier(fsumf_kahan(p, n))));
|
BENCHMARK(20, 1, (kahan = barrier(fsumf_kahan(p, n))));
|
||||||
BENCH(20, 1, (dubble = barrier(fsumf_dubble(p, n))));
|
BENCHMARK(20, 1, (dubble = barrier(fsumf_dubble(p, n))));
|
||||||
BENCH(20, 1, (naive = barrier(fsumf_naive(p, n))));
|
BENCHMARK(20, 1, (naive = barrier(fsumf_naive(p, n))));
|
||||||
BENCH(20, 1, (recursive = barrier(fsumf_recursive(p, n))));
|
BENCHMARK(20, 1, (recursive = barrier(fsumf_recursive(p, n))));
|
||||||
BENCH(20, 1, (ruler = barrier(fsumf_ruler(p, n))));
|
BENCHMARK(20, 1, (ruler = barrier(fsumf_ruler(p, n))));
|
||||||
BENCH(20, 1, (hefty = barrier(fsumf_hefty(p, n))));
|
BENCHMARK(20, 1, (hefty = barrier(fsumf_hefty(p, n))));
|
||||||
printf("dubble = %f (%g)\n", dubble, fabs(dubble - dubble));
|
printf("dubble = %f (%g)\n", dubble, fabs(dubble - dubble));
|
||||||
printf("kahan = %f (%g)\n", kahan, fabs(kahan - dubble));
|
printf("kahan = %f (%g)\n", kahan, fabs(kahan - dubble));
|
||||||
printf("naive = %f (%g)\n", naive, fabs(naive - dubble));
|
printf("naive = %f (%g)\n", naive, fabs(naive - dubble));
|
||||||
|
|
7
third_party/dlmalloc/BUILD.mk
vendored
7
third_party/dlmalloc/BUILD.mk
vendored
|
@ -58,6 +58,13 @@ $(THIRD_PARTY_DLMALLOC_A_OBJS): private \
|
||||||
-Wframe-larger-than=4096 \
|
-Wframe-larger-than=4096 \
|
||||||
-Walloca-larger-than=4096
|
-Walloca-larger-than=4096
|
||||||
|
|
||||||
|
# avoid the legacy sse decoding penalty on avx systems
|
||||||
|
ifeq ($(MODE),)
|
||||||
|
$(THIRD_PARTY_DLMALLOC_A_OBJS): private \
|
||||||
|
COPTS += \
|
||||||
|
-mgeneral-regs-only
|
||||||
|
endif
|
||||||
|
|
||||||
THIRD_PARTY_DLMALLOC_LIBS = $(foreach x,$(THIRD_PARTY_DLMALLOC_ARTIFACTS),$($(x)))
|
THIRD_PARTY_DLMALLOC_LIBS = $(foreach x,$(THIRD_PARTY_DLMALLOC_ARTIFACTS),$($(x)))
|
||||||
THIRD_PARTY_DLMALLOC_SRCS = $(foreach x,$(THIRD_PARTY_DLMALLOC_ARTIFACTS),$($(x)_SRCS))
|
THIRD_PARTY_DLMALLOC_SRCS = $(foreach x,$(THIRD_PARTY_DLMALLOC_ARTIFACTS),$($(x)_SRCS))
|
||||||
THIRD_PARTY_DLMALLOC_HDRS = $(foreach x,$(THIRD_PARTY_DLMALLOC_ARTIFACTS),$($(x)_HDRS))
|
THIRD_PARTY_DLMALLOC_HDRS = $(foreach x,$(THIRD_PARTY_DLMALLOC_ARTIFACTS),$($(x)_HDRS))
|
||||||
|
|
3
third_party/libcxx/BUILD.mk
vendored
3
third_party/libcxx/BUILD.mk
vendored
|
@ -2148,6 +2148,9 @@ $(THIRD_PARTY_LIBCXX_A_OBJS): private \
|
||||||
-DLIBCXX_BUILDING_LIBCXXABI \
|
-DLIBCXX_BUILDING_LIBCXXABI \
|
||||||
-D_LIBCPP_BUILDING_LIBRARY
|
-D_LIBCPP_BUILDING_LIBRARY
|
||||||
|
|
||||||
|
o/$(MODE)/third_party/libcxx/locale.o: private \
|
||||||
|
OVERRIDE_COPTS += -O -g0
|
||||||
|
|
||||||
THIRD_PARTY_LIBCXX_LIBS = $(foreach x,$(THIRD_PARTY_LIBCXX_ARTIFACTS),$($(x)))
|
THIRD_PARTY_LIBCXX_LIBS = $(foreach x,$(THIRD_PARTY_LIBCXX_ARTIFACTS),$($(x)))
|
||||||
THIRD_PARTY_LIBCXX_SRCS = $(foreach x,$(THIRD_PARTY_LIBCXX_ARTIFACTS),$($(x)_SRCS))
|
THIRD_PARTY_LIBCXX_SRCS = $(foreach x,$(THIRD_PARTY_LIBCXX_ARTIFACTS),$($(x)_SRCS))
|
||||||
THIRD_PARTY_LIBCXX_HDRS = $(foreach x,$(THIRD_PARTY_LIBCXX_ARTIFACTS),$($(x)_HDRS))
|
THIRD_PARTY_LIBCXX_HDRS = $(foreach x,$(THIRD_PARTY_LIBCXX_ARTIFACTS),$($(x)_HDRS))
|
||||||
|
|
7
third_party/nsync/BUILD.mk
vendored
7
third_party/nsync/BUILD.mk
vendored
|
@ -56,6 +56,13 @@ $(THIRD_PARTY_NSYNC_A_OBJS): private \
|
||||||
-Wframe-larger-than=4096 \
|
-Wframe-larger-than=4096 \
|
||||||
-Walloca-larger-than=4096
|
-Walloca-larger-than=4096
|
||||||
|
|
||||||
|
# avoid the legacy sse decoding penalty on avx systems
|
||||||
|
ifeq ($(MODE),)
|
||||||
|
$(THIRD_PARTY_NSYNC_A_OBJS): private \
|
||||||
|
COPTS += \
|
||||||
|
-mgeneral-regs-only
|
||||||
|
endif
|
||||||
|
|
||||||
# these assembly files are safe to build on aarch64
|
# these assembly files are safe to build on aarch64
|
||||||
o/$(MODE)/third_party/nsync/compat.o: third_party/nsync/compat.S
|
o/$(MODE)/third_party/nsync/compat.o: third_party/nsync/compat.S
|
||||||
@$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) -c $<
|
@$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) -c $<
|
||||||
|
|
7
third_party/nsync/mem/BUILD.mk
vendored
7
third_party/nsync/mem/BUILD.mk
vendored
|
@ -49,6 +49,13 @@ $(THIRD_PARTY_NSYNC_MEM_A_OBJS): private \
|
||||||
-Wframe-larger-than=4096 \
|
-Wframe-larger-than=4096 \
|
||||||
-Walloca-larger-than=4096
|
-Walloca-larger-than=4096
|
||||||
|
|
||||||
|
# avoid the legacy sse decoding penalty on avx systems
|
||||||
|
ifeq ($(MODE),)
|
||||||
|
$(THIRD_PARTY_NSYNC_MEM_A_OBJS): private \
|
||||||
|
COPTS += \
|
||||||
|
-mgeneral-regs-only
|
||||||
|
endif
|
||||||
|
|
||||||
THIRD_PARTY_NSYNC_MEM_LIBS = $(foreach x,$(THIRD_PARTY_NSYNC_MEM_ARTIFACTS),$($(x)))
|
THIRD_PARTY_NSYNC_MEM_LIBS = $(foreach x,$(THIRD_PARTY_NSYNC_MEM_ARTIFACTS),$($(x)))
|
||||||
THIRD_PARTY_NSYNC_MEM_SRCS = $(foreach x,$(THIRD_PARTY_NSYNC_MEM_ARTIFACTS),$($(x)_SRCS))
|
THIRD_PARTY_NSYNC_MEM_SRCS = $(foreach x,$(THIRD_PARTY_NSYNC_MEM_ARTIFACTS),$($(x)_SRCS))
|
||||||
THIRD_PARTY_NSYNC_MEM_CHECKS = $(foreach x,$(THIRD_PARTY_NSYNC_MEM_ARTIFACTS),$($(x)_CHECKS))
|
THIRD_PARTY_NSYNC_MEM_CHECKS = $(foreach x,$(THIRD_PARTY_NSYNC_MEM_ARTIFACTS),$($(x)_CHECKS))
|
||||||
|
|
Loading…
Reference in a new issue