Avoid legacy instruction penalties on x86

This commit is contained in:
Justine Tunney 2024-07-31 01:02:24 -07:00
parent 1fba310e22
commit 8d8aecb6d9
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
16 changed files with 199 additions and 158 deletions

View file

@ -540,7 +540,7 @@ COSMOCC_HDRS = \
$(foreach x,$(COSMOCC_PKGS),$($(x)_HDRS)) \
$(foreach x,$(COSMOCC_PKGS),$($(x)_INCS))
o/cosmocc.h.txt: Makefile
o/cosmocc.h.txt: Makefile libc $(MAKEFILES) $(call uniq,$(foreach x,$(HDRS) $(INCS),$(dir $(x)))) $(HDRS) $(INCS)
$(file >$@, $(call uniq,$(COSMOCC_HDRS)))
COSMOPOLITAN_H_ROOT_HDRS = \

View file

@ -154,6 +154,66 @@ o/$(MODE)/libc/calls/sigcrashsig.o: private \
CFLAGS += \
-Os
# avoid legacy sse decoding penalty on avx systems
o//libc/calls/cfmakeraw.o \
o//libc/calls/clock_gettime-xnu.o \
o//libc/calls/CPU_AND.o \
o//libc/calls/CPU_OR.o \
o//libc/calls/CPU_XOR.o \
o//libc/calls/dl_iterate_phdr.o \
o//libc/calls/dup-nt.o \
o//libc/calls/fcntl-nt.o \
o//libc/calls/flock-nt.o \
o//libc/calls/fstatfs-nt.o \
o//libc/calls/fstat-nt.o \
o//libc/calls/futimesat.o \
o//libc/calls/futimes.o \
o//libc/calls/getrlimit.o \
o//libc/calls/gettimeofday.o \
o//libc/calls/ioctl.o \
o//libc/calls/lutimes.o \
o//libc/calls/metaflock.o \
o//libc/calls/ntaccesscheck.o \
o//libc/calls/ntspawn.o \
o//libc/calls/open-nt.o \
o//libc/calls/pledge-linux.o \
o//libc/calls/ppoll.o \
o//libc/calls/preadv.o \
o//libc/calls/pselect.o \
o//libc/calls/pwritev.o \
o//libc/calls/read-nt.o \
o//libc/calls/readv.o \
o//libc/calls/readwrite-nt.o \
o//libc/calls/releasefd.o \
o//libc/calls/select.o \
o//libc/calls/sigaction.o \
o//libc/calls/sigenter-freebsd.o \
o//libc/calls/sigenter-netbsd.o \
o//libc/calls/sigenter-openbsd.o \
o//libc/calls/sigenter-xnu.o \
o//libc/calls/sigignore.o \
o//libc/calls/siginfo2cosmo.o \
o//libc/calls/signal.o \
o//libc/calls/sig.o \
o//libc/calls/sigtimedwait.o \
o//libc/calls/stat2cosmo.o \
o//libc/calls/statfs2cosmo.o \
o//libc/calls/statfs2statvfs.o \
o//libc/calls/tcgetattr-nt.o \
o//libc/calls/tcgetattr.o \
o//libc/calls/tcgetwinsize-nt.o \
o//libc/calls/tcsetattr-nt.o \
o//libc/calls/tcsetwinsize-nt.o \
o//libc/calls/termios2host.o \
o//libc/calls/timespec_sleep.o \
o//libc/calls/uname.o \
o//libc/calls/utimensat-old.o \
o//libc/calls/utimes.o \
o//libc/calls/winexec.o \
o//libc/calls/writev.o: private \
COPTS += \
-mgeneral-regs-only
# these assembly files are safe to build on aarch64
o/$(MODE)/libc/calls/getcontext.o: libc/calls/getcontext.S
@$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) -c $<

View file

@ -97,6 +97,14 @@ o/$(MODE)/libc/intrin/x86.o: private \
-fpatchable-function-entry=0 \
-Os
# avoid the legacy sse decoding penalty on avx systems
o//libc/intrin/dll.o \
o//libc/intrin/fds.o \
o//libc/intrin/mmap.o \
o//libc/intrin/demangle.o: private \
CFLAGS += \
-mgeneral-regs-only
# these assembly files are safe to build on aarch64
o/$(MODE)/libc/intrin/aarch64/%.o: libc/intrin/aarch64/%.S
@$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) -c $<

View file

@ -1,25 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/locale.h"
#include "libc/sysv/errfuns.h"
locale_t uselocale(locale_t l) {
// TODO: implement me!
return 0;
}

View file

@ -22,6 +22,7 @@ LIBC_TESTLIB_A_ASSETS = \
LIBC_TESTLIB_A_HDRS = \
libc/testlib/aspect.internal.h \
libc/testlib/bench.h \
libc/testlib/benchmark.h \
libc/testlib/blocktronics.h \
libc/testlib/ezbench.h \
libc/testlib/fastrandomstring.h \

26
libc/testlib/benchmark.h Normal file
View file

@ -0,0 +1,26 @@
#ifndef COSMOPOLITAN_LIBC_TESTLIB_BENCHMARK_H_
#define COSMOPOLITAN_LIBC_TESTLIB_BENCHMARK_H_
#include "libc/calls/struct/timespec.h"
#include "libc/stdio/stdio.h"
COSMOPOLITAN_C_START_
#define BENCHMARK(ITERATIONS, WORK_PER_RUN, CODE) \
do { \
struct timespec start = timespec_real(); \
for (int __i = 0; __i < ITERATIONS; ++__i) { \
asm volatile("" ::: "memory"); \
CODE; \
} \
long long work = ((WORK_PER_RUN) ? (WORK_PER_RUN) : 1) * (ITERATIONS); \
double nanos = \
(timespec_tonanos(timespec_sub(timespec_real(), start)) + work - 1) / \
(double)work; \
if (nanos < 1000) { \
printf("%10g ns %2dx %s\n", nanos, (ITERATIONS), #CODE); \
} else { \
printf("%10lld ns %2dx %s\n", (long long)nanos, (ITERATIONS), #CODE); \
} \
} while (0)
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_TESTLIB_BENCHMARK_H_ */

View file

@ -22,26 +22,12 @@
#include "libc/mem/leaks.h"
#include "libc/stdio/stdio.h"
#include "libc/sysv/consts/rusage.h"
#include "libc/testlib/benchmark.h"
// #include <set>
// #define ctl std
// #define check() size()
#define BENCH(ITERATIONS, WORK_PER_RUN, CODE) \
do { \
struct timespec start = timespec_real(); \
for (int __i = 0; __i < ITERATIONS; ++__i) { \
asm volatile("" ::: "memory"); \
CODE; \
} \
long long work = (WORK_PER_RUN) * (ITERATIONS); \
double nanos = \
(timespec_tonanos(timespec_sub(timespec_real(), start)) + work - \
1) / \
(double)work; \
printf("%10g ns %2dx %s\n", nanos, (ITERATIONS), #CODE); \
} while (0)
int
rand32(void)
{
@ -68,19 +54,19 @@ main()
{
long x = 0;
ctl::set<long> s;
BENCH(1000000, 1, s.insert(rand32() % 1000000));
BENCHMARK(1000000, 1, s.insert(rand32() % 1000000));
// s.check();
BENCH(1000000, 1, {
BENCHMARK(1000000, 1, {
auto i = s.find(rand32() % 1000000);
if (i != s.end())
x += *i;
});
BENCH(1000000, 1, {
BENCHMARK(1000000, 1, {
auto i = s.lower_bound(rand32() % 1000000);
if (i != s.end())
x += *i;
});
BENCH(1000000, 1, s.erase(rand32() % 1000000));
BENCHMARK(1000000, 1, s.erase(rand32() % 1000000));
eat(x);
}

View file

@ -20,27 +20,13 @@
#include "ctl/utility.h"
#include "libc/dce.h"
#include "libc/mem/leaks.h"
#include "libc/testlib/benchmark.h"
#include "libc/calls/struct/timespec.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/stdio.h"
#include "libc/str/str.h"
#define BENCH(ITERATIONS, WORK_PER_RUN, CODE) \
do { \
struct timespec start = timespec_real(); \
for (int __i = 0; __i < ITERATIONS; ++__i) { \
asm volatile("" ::: "memory"); \
CODE; \
} \
long long work = (WORK_PER_RUN) * (ITERATIONS); \
double nanos = \
(timespec_tonanos(timespec_sub(timespec_real(), start)) + work - \
1) / \
(double)work; \
printf("%10g ns %2dx %s\n", nanos, (ITERATIONS), #CODE); \
} while (0)
const char* big_c = "aaaaaaaaaaaaaaaaaaaaaaaa";
const char* small_c = "aaaaaaaaaaaaaaaaaaaaaaa";
@ -55,98 +41,98 @@ main()
{
const ctl::string_view big(big_c), small(small_c);
BENCH(ITERATIONS * 10, 1, {
BENCHMARK(ITERATIONS * 10, 1, {
ctl::string s;
s.append("hello ");
s.append("world");
});
BENCH(ITERATIONS, 8, {
BENCHMARK(ITERATIONS, 8, {
ctl::string s;
for (int i = 0; i < 8; ++i) {
s.append('a');
}
});
BENCH(ITERATIONS, 16, {
BENCHMARK(ITERATIONS, 16, {
ctl::string s;
for (int i = 0; i < 16; ++i) {
s.append('a');
}
});
BENCH(ITERATIONS, 23, {
BENCHMARK(ITERATIONS, 23, {
ctl::string s;
for (int i = 0; i < 23; ++i) {
s.append('a');
}
});
BENCH(ITERATIONS, 24, {
BENCHMARK(ITERATIONS, 24, {
ctl::string s;
for (int i = 0; i < 24; ++i) {
s.append('a');
}
});
BENCH(ITERATIONS, 32, {
BENCHMARK(ITERATIONS, 32, {
ctl::string s;
for (int i = 0; i < 32; ++i) {
s.append('a');
}
});
BENCH(ITERATIONS, 1, { ctl::string s(small_c); });
BENCHMARK(ITERATIONS, 1, { ctl::string s(small_c); });
BENCH(ITERATIONS, 1, { ctl::string s(small); });
BENCHMARK(ITERATIONS, 1, { ctl::string s(small); });
{
ctl::string small_copy("hello world");
BENCH(ITERATIONS, 1, { ctl::string s2(small_copy); });
BENCHMARK(ITERATIONS, 1, { ctl::string s2(small_copy); });
}
BENCH(ITERATIONS, 1, {
BENCHMARK(ITERATIONS, 1, {
ctl::string s(small);
ctl::string s2(ctl::move(s));
});
BENCH(ITERATIONS, 1, {
BENCHMARK(ITERATIONS, 1, {
ctl::string s(small);
ctl::string s2(s);
});
BENCH(ITERATIONS, 1, { ctl::string s(big_c); });
BENCHMARK(ITERATIONS, 1, { ctl::string s(big_c); });
BENCH(ITERATIONS, 1, { ctl::string s(big); });
BENCHMARK(ITERATIONS, 1, { ctl::string s(big); });
{
ctl::string big_copy(big);
BENCH(ITERATIONS, 1, { ctl::string s2(big_copy); });
BENCHMARK(ITERATIONS, 1, { ctl::string s2(big_copy); });
}
BENCH(ITERATIONS, 1, {
BENCHMARK(ITERATIONS, 1, {
ctl::string s(big);
ctl::string s2(ctl::move(s));
});
BENCH(ITERATIONS, 1, {
BENCHMARK(ITERATIONS, 1, {
ctl::string s(big);
ctl::string s2(s);
});
BENCH(ITERATIONS, 1, { ctl::string s(23, 'a'); });
BENCHMARK(ITERATIONS, 1, { ctl::string s(23, 'a'); });
BENCH(ITERATIONS, 1, { ctl::string s(24, 'a'); });
BENCHMARK(ITERATIONS, 1, { ctl::string s(24, 'a'); });
{
ctl::string s(5, 'a');
BENCH(ITERATIONS, 1, { ctl::string_view s2(s); });
BENCHMARK(ITERATIONS, 1, { ctl::string_view s2(s); });
}
{
ctl::string big_trunc(48, 'a');
big_trunc.resize(4);
BENCH(ITERATIONS, 1, { ctl::string s(big_trunc); });
BENCHMARK(ITERATIONS, 1, { ctl::string s(big_trunc); });
}
CheckForMemoryLeaks();

View file

@ -18,12 +18,13 @@
*/
#include "libc/str/blake2.h"
#include "libc/assert.h"
#include "libc/calls/struct/timespec.h"
#include "libc/mem/mem.h"
#include "libc/stdio/rand.h"
#include "libc/stdio/stdio.h"
#include "libc/str/str.h"
#include "libc/str/tab.internal.h"
#include "libc/testlib/ezbench.h"
#include "libc/testlib/benchmark.h"
#include "libc/testlib/hyperion.h"
#include "libc/testlib/testlib.h"
@ -90,17 +91,18 @@ TEST(BLAKE2B256Test, vectors) {
free(line);
}
BENCH(blake2, bench) {
BENCH(blake2, benchmark) {
char fun[256];
rngset(fun, 256, _rand64, -1);
EZBENCH_N("blake2b256", 0, EZBLAKE2B256(0, 0));
EZBENCH_N("blake2b256", 8, EZBLAKE2B256("helloooo", 8));
EZBENCH_N("blake2b256", 31, EZBLAKE2B256(fun, 31));
EZBENCH_N("blake2b256", 32, EZBLAKE2B256(fun, 32));
EZBENCH_N("blake2b256", 63, EZBLAKE2B256(fun, 63));
EZBENCH_N("blake2b256", 64, EZBLAKE2B256(fun, 64));
EZBENCH_N("blake2b256", 128, EZBLAKE2B256(fun, 128));
EZBENCH_N("blake2b256", 256, EZBLAKE2B256(fun, 256));
EZBENCH_N("blake2b256", kHyperionSize,
EZBLAKE2B256(kHyperion, kHyperionSize));
BENCHMARK(100, 0, __expropriate(EZBLAKE2B256(0, 0)));
BENCHMARK(100, 1, __expropriate(EZBLAKE2B256("h", 1)));
BENCHMARK(100, 8, __expropriate(EZBLAKE2B256("helloooo", 8)));
BENCHMARK(100, 31, __expropriate(EZBLAKE2B256(fun, 31)));
BENCHMARK(100, 32, __expropriate(EZBLAKE2B256(fun, 32)));
BENCHMARK(100, 63, __expropriate(EZBLAKE2B256(fun, 63)));
BENCHMARK(100, 64, __expropriate(EZBLAKE2B256(fun, 64)));
BENCHMARK(100, 128, __expropriate(EZBLAKE2B256(fun, 128)));
BENCHMARK(100, 256, __expropriate(EZBLAKE2B256(fun, 256)));
BENCHMARK(100, kHyperionSize,
__expropriate(EZBLAKE2B256(kHyperion, kHyperionSize)));
}

View file

@ -16,13 +16,14 @@
limitations under the License.
*/
#include "libc/str/highwayhash64.h"
#include "libc/calls/struct/timespec.h"
#include "libc/inttypes.h"
#include "libc/nexgen32e/crc32.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/rand.h"
#include "libc/stdio/stdio.h"
#include "libc/str/str.h"
#include "libc/testlib/ezbench.h"
#include "libc/testlib/benchmark.h"
#include "libc/testlib/hyperion.h"
#include "libc/testlib/testlib.h"
#include "third_party/zlib/zlib.h"
@ -100,33 +101,31 @@ TEST(highwayhash64, test) {
BENCH(highwayhash64, newbench) {
char fun[256];
rngset(fun, 256, _rand64, -1);
EZBENCH_N("highwayhash64", 0, HighwayHash64(0, 0, kTestKey1));
EZBENCH_N("highwayhash64", 8, HighwayHash64("helloooo", 8, kTestKey1));
EZBENCH_N("highwayhash64", 31, HighwayHash64(fun, 31, kTestKey1));
EZBENCH_N("highwayhash64", 32, HighwayHash64(fun, 32, kTestKey1));
EZBENCH_N("highwayhash64", 63, HighwayHash64(fun, 63, kTestKey1));
EZBENCH_N("highwayhash64", 64, HighwayHash64(fun, 64, kTestKey1));
EZBENCH_N("highwayhash64", 128, HighwayHash64(fun, 128, kTestKey1));
EZBENCH_N("highwayhash64", 256, HighwayHash64(fun, 256, kTestKey1));
EZBENCH_N("highwayhash64", kHyperionSize,
BENCHMARK(10, 0, HighwayHash64(0, 0, kTestKey1));
BENCHMARK(10, 8, HighwayHash64("helloooo", 8, kTestKey1));
BENCHMARK(10, 31, HighwayHash64(fun, 31, kTestKey1));
BENCHMARK(10, 32, HighwayHash64(fun, 32, kTestKey1));
BENCHMARK(10, 63, HighwayHash64(fun, 63, kTestKey1));
BENCHMARK(10, 64, HighwayHash64(fun, 64, kTestKey1));
BENCHMARK(10, 128, HighwayHash64(fun, 128, kTestKey1));
BENCHMARK(10, 256, HighwayHash64(fun, 256, kTestKey1));
BENCHMARK(10, kHyperionSize,
HighwayHash64(kHyperion, kHyperionSize, kTestKey1));
}
BENCH(highwayhash64, bench) {
EZBENCH2("knuth small", donothing,
__expropriate(KnuthMultiplicativeHash32(__veil("r", "hello"), 5)));
EZBENCH2("crc32c small", donothing, __expropriate(crc32c(0, "hello", 5)));
EZBENCH2("crc32 small", donothing,
__expropriate(crc32_z(0, __veil("r", "hello"), 5)));
EZBENCH2("highwayhash64 small", donothing,
HighwayHash64((void *)"hello", 5, kTestKey1));
EZBENCH2("crc32 big", donothing,
__expropriate(crc32_z(0, kHyperion, kHyperionSize)));
EZBENCH2("crc32c big", donothing,
__expropriate(crc32c(0, kHyperion, kHyperionSize)));
EZBENCH2("highwayhash64 big", donothing,
HighwayHash64((void *)kHyperion, kHyperionSize, kTestKey1));
EZBENCH2("knuth big", donothing,
__expropriate(KnuthMultiplicativeHash32(__veil("r", kHyperion),
kHyperionSize)));
BENCHMARK(10, 5,
__expropriate(KnuthMultiplicativeHash32(__veil("r", "hello"), 5)));
BENCHMARK(10, 5, __expropriate(crc32c(0, "hello", 5)));
BENCHMARK(10, 5, __expropriate(crc32_z(0, __veil("r", "hello"), 5)));
BENCHMARK(10, 5, HighwayHash64((void *)"hello", 5, kTestKey1));
BENCHMARK(10, kHyperionSize,
__expropriate(crc32_z(0, kHyperion, kHyperionSize)));
BENCHMARK(10, kHyperionSize,
__expropriate(crc32c(0, kHyperion, kHyperionSize)));
BENCHMARK(10, kHyperionSize,
HighwayHash64((void *)kHyperion, kHyperionSize, kTestKey1));
BENCHMARK(10, kHyperionSize,
__expropriate(KnuthMultiplicativeHash32(__veil("r", kHyperion),
kHyperionSize)));
}

View file

@ -8,6 +8,7 @@
#include "libc/mem/mem.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/stdio.h"
#include "libc/testlib/benchmark.h"
#include "libc/x/xasprintf.h"
#define EXPENSIVE_TESTS 0
@ -237,20 +238,6 @@ float nothing(float x) {
float (*barrier)(float) = nothing;
#define BENCH(ITERATIONS, WORK_PER_RUN, CODE) \
do { \
struct timespec start = timespec_real(); \
for (int __i = 0; __i < ITERATIONS; ++__i) { \
asm volatile("" ::: "memory"); \
CODE; \
} \
long long work = (WORK_PER_RUN) * (ITERATIONS); \
long nanos = \
(timespec_tonanos(timespec_sub(timespec_real(), start)) + work - 1) / \
(double)work; \
printf("%8ld ns %2dx %s\n", nanos, (ITERATIONS), #CODE); \
} while (0)
int main() {
ShowCrashReports();
@ -270,12 +257,12 @@ int main() {
test_fdotf_naive();
test_fdotf_hefty();
test_fdotf_ruler();
BENCH(20, 1, (kahan = barrier(fdotf_kahan(A, B, n))));
BENCH(20, 1, (dubble = barrier(fdotf_dubble(A, B, n))));
BENCH(20, 1, (naive = barrier(fdotf_naive(A, B, n))));
BENCH(20, 1, (recursive = barrier(fdotf_recursive(A, B, n))));
BENCH(20, 1, (ruler = barrier(fdotf_ruler(A, B, n))));
BENCH(20, 1, (hefty = barrier(fdotf_hefty(A, B, n))));
BENCHMARK(20, 1, (kahan = barrier(fdotf_kahan(A, B, n))));
BENCHMARK(20, 1, (dubble = barrier(fdotf_dubble(A, B, n))));
BENCHMARK(20, 1, (naive = barrier(fdotf_naive(A, B, n))));
BENCHMARK(20, 1, (recursive = barrier(fdotf_recursive(A, B, n))));
BENCHMARK(20, 1, (ruler = barrier(fdotf_ruler(A, B, n))));
BENCHMARK(20, 1, (hefty = barrier(fdotf_hefty(A, B, n))));
printf("dubble = %f (%g)\n", dubble, fabs(dubble - dubble));
printf("kahan = %f (%g)\n", kahan, fabs(kahan - dubble));
printf("naive = %f (%g)\n", naive, fabs(naive - dubble));

View file

@ -8,6 +8,7 @@
#include "libc/mem/mem.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/stdio.h"
#include "libc/testlib/benchmark.h"
#include "libc/x/xasprintf.h"
#define EXPENSIVE_TESTS 0
@ -225,20 +226,6 @@ float nothing(float x) {
float (*barrier)(float) = nothing;
#define BENCH(ITERATIONS, WORK_PER_RUN, CODE) \
do { \
struct timespec start = timespec_real(); \
for (int __i = 0; __i < ITERATIONS; ++__i) { \
asm volatile("" ::: "memory"); \
CODE; \
} \
long long work = (WORK_PER_RUN) * (ITERATIONS); \
long nanos = \
(timespec_tonanos(timespec_sub(timespec_real(), start)) + work - 1) / \
(double)work; \
printf("%8ld ns %2dx %s\n", nanos, (ITERATIONS), #CODE); \
} while (0)
int main() {
ShowCrashReports();
@ -255,12 +242,12 @@ int main() {
test_fsumf_naive();
test_fsumf_hefty();
test_fsumf_ruler();
BENCH(20, 1, (kahan = barrier(fsumf_kahan(p, n))));
BENCH(20, 1, (dubble = barrier(fsumf_dubble(p, n))));
BENCH(20, 1, (naive = barrier(fsumf_naive(p, n))));
BENCH(20, 1, (recursive = barrier(fsumf_recursive(p, n))));
BENCH(20, 1, (ruler = barrier(fsumf_ruler(p, n))));
BENCH(20, 1, (hefty = barrier(fsumf_hefty(p, n))));
BENCHMARK(20, 1, (kahan = barrier(fsumf_kahan(p, n))));
BENCHMARK(20, 1, (dubble = barrier(fsumf_dubble(p, n))));
BENCHMARK(20, 1, (naive = barrier(fsumf_naive(p, n))));
BENCHMARK(20, 1, (recursive = barrier(fsumf_recursive(p, n))));
BENCHMARK(20, 1, (ruler = barrier(fsumf_ruler(p, n))));
BENCHMARK(20, 1, (hefty = barrier(fsumf_hefty(p, n))));
printf("dubble = %f (%g)\n", dubble, fabs(dubble - dubble));
printf("kahan = %f (%g)\n", kahan, fabs(kahan - dubble));
printf("naive = %f (%g)\n", naive, fabs(naive - dubble));

View file

@ -58,6 +58,13 @@ $(THIRD_PARTY_DLMALLOC_A_OBJS): private \
-Wframe-larger-than=4096 \
-Walloca-larger-than=4096
# avoid the legacy sse decoding penalty on avx systems
ifeq ($(MODE),)
$(THIRD_PARTY_DLMALLOC_A_OBJS): private \
COPTS += \
-mgeneral-regs-only
endif
THIRD_PARTY_DLMALLOC_LIBS = $(foreach x,$(THIRD_PARTY_DLMALLOC_ARTIFACTS),$($(x)))
THIRD_PARTY_DLMALLOC_SRCS = $(foreach x,$(THIRD_PARTY_DLMALLOC_ARTIFACTS),$($(x)_SRCS))
THIRD_PARTY_DLMALLOC_HDRS = $(foreach x,$(THIRD_PARTY_DLMALLOC_ARTIFACTS),$($(x)_HDRS))

View file

@ -2148,6 +2148,9 @@ $(THIRD_PARTY_LIBCXX_A_OBJS): private \
-DLIBCXX_BUILDING_LIBCXXABI \
-D_LIBCPP_BUILDING_LIBRARY
o/$(MODE)/third_party/libcxx/locale.o: private \
OVERRIDE_COPTS += -O -g0
THIRD_PARTY_LIBCXX_LIBS = $(foreach x,$(THIRD_PARTY_LIBCXX_ARTIFACTS),$($(x)))
THIRD_PARTY_LIBCXX_SRCS = $(foreach x,$(THIRD_PARTY_LIBCXX_ARTIFACTS),$($(x)_SRCS))
THIRD_PARTY_LIBCXX_HDRS = $(foreach x,$(THIRD_PARTY_LIBCXX_ARTIFACTS),$($(x)_HDRS))

View file

@ -56,6 +56,13 @@ $(THIRD_PARTY_NSYNC_A_OBJS): private \
-Wframe-larger-than=4096 \
-Walloca-larger-than=4096
# avoid the legacy sse decoding penalty on avx systems
ifeq ($(MODE),)
$(THIRD_PARTY_NSYNC_A_OBJS): private \
COPTS += \
-mgeneral-regs-only
endif
# these assembly files are safe to build on aarch64
o/$(MODE)/third_party/nsync/compat.o: third_party/nsync/compat.S
@$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) -c $<

View file

@ -49,6 +49,13 @@ $(THIRD_PARTY_NSYNC_MEM_A_OBJS): private \
-Wframe-larger-than=4096 \
-Walloca-larger-than=4096
# avoid the legacy sse decoding penalty on avx systems
ifeq ($(MODE),)
$(THIRD_PARTY_NSYNC_MEM_A_OBJS): private \
COPTS += \
-mgeneral-regs-only
endif
THIRD_PARTY_NSYNC_MEM_LIBS = $(foreach x,$(THIRD_PARTY_NSYNC_MEM_ARTIFACTS),$($(x)))
THIRD_PARTY_NSYNC_MEM_SRCS = $(foreach x,$(THIRD_PARTY_NSYNC_MEM_ARTIFACTS),$($(x)_SRCS))
THIRD_PARTY_NSYNC_MEM_CHECKS = $(foreach x,$(THIRD_PARTY_NSYNC_MEM_ARTIFACTS),$($(x)_CHECKS))