mirror of
				https://github.com/jart/cosmopolitan.git
				synced 2025-10-26 11:10:58 +00:00 
			
		
		
		
	Avoid legacy instruction penalties on x86
This commit is contained in:
		
							parent
							
								
									1fba310e22
								
							
						
					
					
						commit
						8d8aecb6d9
					
				
					 16 changed files with 199 additions and 158 deletions
				
			
		
							
								
								
									
										2
									
								
								Makefile
									
										
									
									
									
								
							
							
						
						
									
										2
									
								
								Makefile
									
										
									
									
									
								
							|  | @ -540,7 +540,7 @@ COSMOCC_HDRS =								\ | |||
| 	$(foreach x,$(COSMOCC_PKGS),$($(x)_HDRS))			\
 | ||||
| 	$(foreach x,$(COSMOCC_PKGS),$($(x)_INCS)) | ||||
| 
 | ||||
| o/cosmocc.h.txt: Makefile | ||||
| o/cosmocc.h.txt: Makefile libc $(MAKEFILES) $(call uniq,$(foreach x,$(HDRS) $(INCS),$(dir $(x)))) $(HDRS) $(INCS) | ||||
| 	$(file >$@, $(call uniq,$(COSMOCC_HDRS))) | ||||
| 
 | ||||
| COSMOPOLITAN_H_ROOT_HDRS =						\
 | ||||
|  |  | |||
|  | @ -154,6 +154,66 @@ o/$(MODE)/libc/calls/sigcrashsig.o: private		\ | |||
| 		CFLAGS +=				\
 | ||||
| 			-Os | ||||
| 
 | ||||
| # avoid legacy sse decoding penalty on avx systems
 | ||||
| o//libc/calls/cfmakeraw.o				\ | ||||
| o//libc/calls/clock_gettime-xnu.o			\ | ||||
| o//libc/calls/CPU_AND.o					\ | ||||
| o//libc/calls/CPU_OR.o					\ | ||||
| o//libc/calls/CPU_XOR.o					\ | ||||
| o//libc/calls/dl_iterate_phdr.o				\ | ||||
| o//libc/calls/dup-nt.o					\ | ||||
| o//libc/calls/fcntl-nt.o				\ | ||||
| o//libc/calls/flock-nt.o				\ | ||||
| o//libc/calls/fstatfs-nt.o				\ | ||||
| o//libc/calls/fstat-nt.o				\ | ||||
| o//libc/calls/futimesat.o				\ | ||||
| o//libc/calls/futimes.o					\ | ||||
| o//libc/calls/getrlimit.o				\ | ||||
| o//libc/calls/gettimeofday.o				\ | ||||
| o//libc/calls/ioctl.o					\ | ||||
| o//libc/calls/lutimes.o					\ | ||||
| o//libc/calls/metaflock.o				\ | ||||
| o//libc/calls/ntaccesscheck.o				\ | ||||
| o//libc/calls/ntspawn.o					\ | ||||
| o//libc/calls/open-nt.o					\ | ||||
| o//libc/calls/pledge-linux.o				\ | ||||
| o//libc/calls/ppoll.o					\ | ||||
| o//libc/calls/preadv.o					\ | ||||
| o//libc/calls/pselect.o					\ | ||||
| o//libc/calls/pwritev.o					\ | ||||
| o//libc/calls/read-nt.o					\ | ||||
| o//libc/calls/readv.o					\ | ||||
| o//libc/calls/readwrite-nt.o				\ | ||||
| o//libc/calls/releasefd.o				\ | ||||
| o//libc/calls/select.o					\ | ||||
| o//libc/calls/sigaction.o				\ | ||||
| o//libc/calls/sigenter-freebsd.o			\ | ||||
| o//libc/calls/sigenter-netbsd.o				\ | ||||
| o//libc/calls/sigenter-openbsd.o			\ | ||||
| o//libc/calls/sigenter-xnu.o				\ | ||||
| o//libc/calls/sigignore.o				\ | ||||
| o//libc/calls/siginfo2cosmo.o				\ | ||||
| o//libc/calls/signal.o					\ | ||||
| o//libc/calls/sig.o					\ | ||||
| o//libc/calls/sigtimedwait.o				\ | ||||
| o//libc/calls/stat2cosmo.o				\ | ||||
| o//libc/calls/statfs2cosmo.o				\ | ||||
| o//libc/calls/statfs2statvfs.o				\ | ||||
| o//libc/calls/tcgetattr-nt.o				\ | ||||
| o//libc/calls/tcgetattr.o				\ | ||||
| o//libc/calls/tcgetwinsize-nt.o				\ | ||||
| o//libc/calls/tcsetattr-nt.o				\ | ||||
| o//libc/calls/tcsetwinsize-nt.o				\ | ||||
| o//libc/calls/termios2host.o				\ | ||||
| o//libc/calls/timespec_sleep.o				\ | ||||
| o//libc/calls/uname.o					\ | ||||
| o//libc/calls/utimensat-old.o				\ | ||||
| o//libc/calls/utimes.o					\ | ||||
| o//libc/calls/winexec.o					\ | ||||
| o//libc/calls/writev.o: private				\ | ||||
| 		COPTS +=				\
 | ||||
| 			-mgeneral-regs-only | ||||
| 
 | ||||
| # these assembly files are safe to build on aarch64
 | ||||
| o/$(MODE)/libc/calls/getcontext.o: libc/calls/getcontext.S | ||||
| 	@$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) -c $< | ||||
|  |  | |||
|  | @ -97,6 +97,14 @@ o/$(MODE)/libc/intrin/x86.o: private			\ | |||
| 			-fpatchable-function-entry=0	\
 | ||||
| 			-Os | ||||
| 
 | ||||
| # avoid the legacy sse decoding penalty on avx systems
 | ||||
| o//libc/intrin/dll.o					\ | ||||
| o//libc/intrin/fds.o					\ | ||||
| o//libc/intrin/mmap.o					\ | ||||
| o//libc/intrin/demangle.o: private			\ | ||||
| 		CFLAGS +=				\
 | ||||
| 			-mgeneral-regs-only | ||||
| 
 | ||||
| # these assembly files are safe to build on aarch64
 | ||||
| o/$(MODE)/libc/intrin/aarch64/%.o: libc/intrin/aarch64/%.S | ||||
| 	@$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) -c $< | ||||
|  |  | |||
|  | @ -1,25 +0,0 @@ | |||
| /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
 | ||||
| │ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │ | ||||
| ╞══════════════════════════════════════════════════════════════════════════════╡ | ||||
| │ Copyright 2022 Justine Alexandra Roberts Tunney                              │ | ||||
| │                                                                              │ | ||||
| │ Permission to use, copy, modify, and/or distribute this software for         │ | ||||
| │ any purpose with or without fee is hereby granted, provided that the         │ | ||||
| │ above copyright notice and this permission notice appear in all copies.      │ | ||||
| │                                                                              │ | ||||
| │ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │ | ||||
| │ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │ | ||||
| │ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │ | ||||
| │ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │ | ||||
| │ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │ | ||||
| │ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │ | ||||
| │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │ | ||||
| │ PERFORMANCE OF THIS SOFTWARE.                                                │ | ||||
| ╚─────────────────────────────────────────────────────────────────────────────*/ | ||||
| #include "libc/str/locale.h" | ||||
| #include "libc/sysv/errfuns.h" | ||||
| 
 | ||||
| locale_t uselocale(locale_t l) { | ||||
|   // TODO: implement me!
 | ||||
|   return 0; | ||||
| } | ||||
|  | @ -22,6 +22,7 @@ LIBC_TESTLIB_A_ASSETS =						\ | |||
| LIBC_TESTLIB_A_HDRS =						\
 | ||||
| 	libc/testlib/aspect.internal.h				\
 | ||||
| 	libc/testlib/bench.h					\
 | ||||
| 	libc/testlib/benchmark.h				\
 | ||||
| 	libc/testlib/blocktronics.h				\
 | ||||
| 	libc/testlib/ezbench.h					\
 | ||||
| 	libc/testlib/fastrandomstring.h				\
 | ||||
|  |  | |||
							
								
								
									
										26
									
								
								libc/testlib/benchmark.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								libc/testlib/benchmark.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,26 @@ | |||
| #ifndef COSMOPOLITAN_LIBC_TESTLIB_BENCHMARK_H_ | ||||
| #define COSMOPOLITAN_LIBC_TESTLIB_BENCHMARK_H_ | ||||
| #include "libc/calls/struct/timespec.h" | ||||
| #include "libc/stdio/stdio.h" | ||||
| COSMOPOLITAN_C_START_ | ||||
| 
 | ||||
| #define BENCHMARK(ITERATIONS, WORK_PER_RUN, CODE)                             \ | ||||
|   do {                                                                        \ | ||||
|     struct timespec start = timespec_real();                                  \ | ||||
|     for (int __i = 0; __i < ITERATIONS; ++__i) {                              \ | ||||
|       asm volatile("" ::: "memory");                                          \ | ||||
|       CODE;                                                                   \ | ||||
|     }                                                                         \ | ||||
|     long long work = ((WORK_PER_RUN) ? (WORK_PER_RUN) : 1) * (ITERATIONS);    \ | ||||
|     double nanos =                                                            \ | ||||
|         (timespec_tonanos(timespec_sub(timespec_real(), start)) + work - 1) / \ | ||||
|         (double)work;                                                         \ | ||||
|     if (nanos < 1000) {                                                       \ | ||||
|       printf("%10g ns %2dx %s\n", nanos, (ITERATIONS), #CODE);                \ | ||||
|     } else {                                                                  \ | ||||
|       printf("%10lld ns %2dx %s\n", (long long)nanos, (ITERATIONS), #CODE);   \ | ||||
|     }                                                                         \ | ||||
|   } while (0) | ||||
| 
 | ||||
| COSMOPOLITAN_C_END_ | ||||
| #endif /* COSMOPOLITAN_LIBC_TESTLIB_BENCHMARK_H_ */ | ||||
|  | @ -22,26 +22,12 @@ | |||
| #include "libc/mem/leaks.h" | ||||
| #include "libc/stdio/stdio.h" | ||||
| #include "libc/sysv/consts/rusage.h" | ||||
| #include "libc/testlib/benchmark.h" | ||||
| 
 | ||||
| // #include <set>
 | ||||
| // #define ctl std
 | ||||
| // #define check() size()
 | ||||
| 
 | ||||
| #define BENCH(ITERATIONS, WORK_PER_RUN, CODE) \ | ||||
|     do { \ | ||||
|         struct timespec start = timespec_real(); \ | ||||
|         for (int __i = 0; __i < ITERATIONS; ++__i) { \ | ||||
|             asm volatile("" ::: "memory"); \ | ||||
|             CODE; \ | ||||
|         } \ | ||||
|         long long work = (WORK_PER_RUN) * (ITERATIONS); \ | ||||
|         double nanos = \ | ||||
|           (timespec_tonanos(timespec_sub(timespec_real(), start)) + work - \ | ||||
|            1) / \ | ||||
|           (double)work; \ | ||||
|         printf("%10g ns %2dx %s\n", nanos, (ITERATIONS), #CODE); \ | ||||
|     } while (0) | ||||
| 
 | ||||
| int | ||||
| rand32(void) | ||||
| { | ||||
|  | @ -68,19 +54,19 @@ main() | |||
|     { | ||||
|         long x = 0; | ||||
|         ctl::set<long> s; | ||||
|         BENCH(1000000, 1, s.insert(rand32() % 1000000)); | ||||
|         BENCHMARK(1000000, 1, s.insert(rand32() % 1000000)); | ||||
|         // s.check();
 | ||||
|         BENCH(1000000, 1, { | ||||
|         BENCHMARK(1000000, 1, { | ||||
|             auto i = s.find(rand32() % 1000000); | ||||
|             if (i != s.end()) | ||||
|                 x += *i; | ||||
|         }); | ||||
|         BENCH(1000000, 1, { | ||||
|         BENCHMARK(1000000, 1, { | ||||
|             auto i = s.lower_bound(rand32() % 1000000); | ||||
|             if (i != s.end()) | ||||
|                 x += *i; | ||||
|         }); | ||||
|         BENCH(1000000, 1, s.erase(rand32() % 1000000)); | ||||
|         BENCHMARK(1000000, 1, s.erase(rand32() % 1000000)); | ||||
|         eat(x); | ||||
|     } | ||||
| 
 | ||||
|  |  | |||
|  | @ -20,27 +20,13 @@ | |||
| #include "ctl/utility.h" | ||||
| #include "libc/dce.h" | ||||
| #include "libc/mem/leaks.h" | ||||
| #include "libc/testlib/benchmark.h" | ||||
| 
 | ||||
| #include "libc/calls/struct/timespec.h" | ||||
| #include "libc/runtime/runtime.h" | ||||
| #include "libc/stdio/stdio.h" | ||||
| #include "libc/str/str.h" | ||||
| 
 | ||||
| #define BENCH(ITERATIONS, WORK_PER_RUN, CODE) \ | ||||
|     do { \ | ||||
|         struct timespec start = timespec_real(); \ | ||||
|         for (int __i = 0; __i < ITERATIONS; ++__i) { \ | ||||
|             asm volatile("" ::: "memory"); \ | ||||
|             CODE; \ | ||||
|         } \ | ||||
|         long long work = (WORK_PER_RUN) * (ITERATIONS); \ | ||||
|         double nanos = \ | ||||
|           (timespec_tonanos(timespec_sub(timespec_real(), start)) + work - \ | ||||
|            1) / \ | ||||
|           (double)work; \ | ||||
|         printf("%10g ns %2dx %s\n", nanos, (ITERATIONS), #CODE); \ | ||||
|     } while (0) | ||||
| 
 | ||||
| const char* big_c = "aaaaaaaaaaaaaaaaaaaaaaaa"; | ||||
| const char* small_c = "aaaaaaaaaaaaaaaaaaaaaaa"; | ||||
| 
 | ||||
|  | @ -55,98 +41,98 @@ main() | |||
| { | ||||
|     const ctl::string_view big(big_c), small(small_c); | ||||
| 
 | ||||
|     BENCH(ITERATIONS * 10, 1, { | ||||
|     BENCHMARK(ITERATIONS * 10, 1, { | ||||
|         ctl::string s; | ||||
|         s.append("hello "); | ||||
|         s.append("world"); | ||||
|     }); | ||||
| 
 | ||||
|     BENCH(ITERATIONS, 8, { | ||||
|     BENCHMARK(ITERATIONS, 8, { | ||||
|         ctl::string s; | ||||
|         for (int i = 0; i < 8; ++i) { | ||||
|             s.append('a'); | ||||
|         } | ||||
|     }); | ||||
| 
 | ||||
|     BENCH(ITERATIONS, 16, { | ||||
|     BENCHMARK(ITERATIONS, 16, { | ||||
|         ctl::string s; | ||||
|         for (int i = 0; i < 16; ++i) { | ||||
|             s.append('a'); | ||||
|         } | ||||
|     }); | ||||
| 
 | ||||
|     BENCH(ITERATIONS, 23, { | ||||
|     BENCHMARK(ITERATIONS, 23, { | ||||
|         ctl::string s; | ||||
|         for (int i = 0; i < 23; ++i) { | ||||
|             s.append('a'); | ||||
|         } | ||||
|     }); | ||||
| 
 | ||||
|     BENCH(ITERATIONS, 24, { | ||||
|     BENCHMARK(ITERATIONS, 24, { | ||||
|         ctl::string s; | ||||
|         for (int i = 0; i < 24; ++i) { | ||||
|             s.append('a'); | ||||
|         } | ||||
|     }); | ||||
| 
 | ||||
|     BENCH(ITERATIONS, 32, { | ||||
|     BENCHMARK(ITERATIONS, 32, { | ||||
|         ctl::string s; | ||||
|         for (int i = 0; i < 32; ++i) { | ||||
|             s.append('a'); | ||||
|         } | ||||
|     }); | ||||
| 
 | ||||
|     BENCH(ITERATIONS, 1, { ctl::string s(small_c); }); | ||||
|     BENCHMARK(ITERATIONS, 1, { ctl::string s(small_c); }); | ||||
| 
 | ||||
|     BENCH(ITERATIONS, 1, { ctl::string s(small); }); | ||||
|     BENCHMARK(ITERATIONS, 1, { ctl::string s(small); }); | ||||
| 
 | ||||
|     { | ||||
|         ctl::string small_copy("hello world"); | ||||
|         BENCH(ITERATIONS, 1, { ctl::string s2(small_copy); }); | ||||
|         BENCHMARK(ITERATIONS, 1, { ctl::string s2(small_copy); }); | ||||
|     } | ||||
| 
 | ||||
|     BENCH(ITERATIONS, 1, { | ||||
|     BENCHMARK(ITERATIONS, 1, { | ||||
|         ctl::string s(small); | ||||
|         ctl::string s2(ctl::move(s)); | ||||
|     }); | ||||
| 
 | ||||
|     BENCH(ITERATIONS, 1, { | ||||
|     BENCHMARK(ITERATIONS, 1, { | ||||
|         ctl::string s(small); | ||||
|         ctl::string s2(s); | ||||
|     }); | ||||
| 
 | ||||
|     BENCH(ITERATIONS, 1, { ctl::string s(big_c); }); | ||||
|     BENCHMARK(ITERATIONS, 1, { ctl::string s(big_c); }); | ||||
| 
 | ||||
|     BENCH(ITERATIONS, 1, { ctl::string s(big); }); | ||||
|     BENCHMARK(ITERATIONS, 1, { ctl::string s(big); }); | ||||
| 
 | ||||
|     { | ||||
|         ctl::string big_copy(big); | ||||
|         BENCH(ITERATIONS, 1, { ctl::string s2(big_copy); }); | ||||
|         BENCHMARK(ITERATIONS, 1, { ctl::string s2(big_copy); }); | ||||
|     } | ||||
| 
 | ||||
|     BENCH(ITERATIONS, 1, { | ||||
|     BENCHMARK(ITERATIONS, 1, { | ||||
|         ctl::string s(big); | ||||
|         ctl::string s2(ctl::move(s)); | ||||
|     }); | ||||
| 
 | ||||
|     BENCH(ITERATIONS, 1, { | ||||
|     BENCHMARK(ITERATIONS, 1, { | ||||
|         ctl::string s(big); | ||||
|         ctl::string s2(s); | ||||
|     }); | ||||
| 
 | ||||
|     BENCH(ITERATIONS, 1, { ctl::string s(23, 'a'); }); | ||||
|     BENCHMARK(ITERATIONS, 1, { ctl::string s(23, 'a'); }); | ||||
| 
 | ||||
|     BENCH(ITERATIONS, 1, { ctl::string s(24, 'a'); }); | ||||
|     BENCHMARK(ITERATIONS, 1, { ctl::string s(24, 'a'); }); | ||||
| 
 | ||||
|     { | ||||
|         ctl::string s(5, 'a'); | ||||
|         BENCH(ITERATIONS, 1, { ctl::string_view s2(s); }); | ||||
|         BENCHMARK(ITERATIONS, 1, { ctl::string_view s2(s); }); | ||||
|     } | ||||
| 
 | ||||
|     { | ||||
|         ctl::string big_trunc(48, 'a'); | ||||
|         big_trunc.resize(4); | ||||
|         BENCH(ITERATIONS, 1, { ctl::string s(big_trunc); }); | ||||
|         BENCHMARK(ITERATIONS, 1, { ctl::string s(big_trunc); }); | ||||
|     } | ||||
| 
 | ||||
|     CheckForMemoryLeaks(); | ||||
|  |  | |||
|  | @ -18,12 +18,13 @@ | |||
| ╚─────────────────────────────────────────────────────────────────────────────*/ | ||||
| #include "libc/str/blake2.h" | ||||
| #include "libc/assert.h" | ||||
| #include "libc/calls/struct/timespec.h" | ||||
| #include "libc/mem/mem.h" | ||||
| #include "libc/stdio/rand.h" | ||||
| #include "libc/stdio/stdio.h" | ||||
| #include "libc/str/str.h" | ||||
| #include "libc/str/tab.internal.h" | ||||
| #include "libc/testlib/ezbench.h" | ||||
| #include "libc/testlib/benchmark.h" | ||||
| #include "libc/testlib/hyperion.h" | ||||
| #include "libc/testlib/testlib.h" | ||||
| 
 | ||||
|  | @ -90,17 +91,18 @@ TEST(BLAKE2B256Test, vectors) { | |||
|   free(line); | ||||
| } | ||||
| 
 | ||||
| BENCH(blake2, bench) { | ||||
| BENCH(blake2, benchmark) { | ||||
|   char fun[256]; | ||||
|   rngset(fun, 256, _rand64, -1); | ||||
|   EZBENCH_N("blake2b256", 0, EZBLAKE2B256(0, 0)); | ||||
|   EZBENCH_N("blake2b256", 8, EZBLAKE2B256("helloooo", 8)); | ||||
|   EZBENCH_N("blake2b256", 31, EZBLAKE2B256(fun, 31)); | ||||
|   EZBENCH_N("blake2b256", 32, EZBLAKE2B256(fun, 32)); | ||||
|   EZBENCH_N("blake2b256", 63, EZBLAKE2B256(fun, 63)); | ||||
|   EZBENCH_N("blake2b256", 64, EZBLAKE2B256(fun, 64)); | ||||
|   EZBENCH_N("blake2b256", 128, EZBLAKE2B256(fun, 128)); | ||||
|   EZBENCH_N("blake2b256", 256, EZBLAKE2B256(fun, 256)); | ||||
|   EZBENCH_N("blake2b256", kHyperionSize, | ||||
|             EZBLAKE2B256(kHyperion, kHyperionSize)); | ||||
|   BENCHMARK(100, 0, __expropriate(EZBLAKE2B256(0, 0))); | ||||
|   BENCHMARK(100, 1, __expropriate(EZBLAKE2B256("h", 1))); | ||||
|   BENCHMARK(100, 8, __expropriate(EZBLAKE2B256("helloooo", 8))); | ||||
|   BENCHMARK(100, 31, __expropriate(EZBLAKE2B256(fun, 31))); | ||||
|   BENCHMARK(100, 32, __expropriate(EZBLAKE2B256(fun, 32))); | ||||
|   BENCHMARK(100, 63, __expropriate(EZBLAKE2B256(fun, 63))); | ||||
|   BENCHMARK(100, 64, __expropriate(EZBLAKE2B256(fun, 64))); | ||||
|   BENCHMARK(100, 128, __expropriate(EZBLAKE2B256(fun, 128))); | ||||
|   BENCHMARK(100, 256, __expropriate(EZBLAKE2B256(fun, 256))); | ||||
|   BENCHMARK(100, kHyperionSize, | ||||
|             __expropriate(EZBLAKE2B256(kHyperion, kHyperionSize))); | ||||
| } | ||||
|  |  | |||
|  | @ -16,13 +16,14 @@ | |||
| │ limitations under the License.                                               │ | ||||
| ╚─────────────────────────────────────────────────────────────────────────────*/ | ||||
| #include "libc/str/highwayhash64.h" | ||||
| #include "libc/calls/struct/timespec.h" | ||||
| #include "libc/inttypes.h" | ||||
| #include "libc/nexgen32e/crc32.h" | ||||
| #include "libc/runtime/runtime.h" | ||||
| #include "libc/stdio/rand.h" | ||||
| #include "libc/stdio/stdio.h" | ||||
| #include "libc/str/str.h" | ||||
| #include "libc/testlib/ezbench.h" | ||||
| #include "libc/testlib/benchmark.h" | ||||
| #include "libc/testlib/hyperion.h" | ||||
| #include "libc/testlib/testlib.h" | ||||
| #include "third_party/zlib/zlib.h" | ||||
|  | @ -100,33 +101,31 @@ TEST(highwayhash64, test) { | |||
| BENCH(highwayhash64, newbench) { | ||||
|   char fun[256]; | ||||
|   rngset(fun, 256, _rand64, -1); | ||||
|   EZBENCH_N("highwayhash64", 0, HighwayHash64(0, 0, kTestKey1)); | ||||
|   EZBENCH_N("highwayhash64", 8, HighwayHash64("helloooo", 8, kTestKey1)); | ||||
|   EZBENCH_N("highwayhash64", 31, HighwayHash64(fun, 31, kTestKey1)); | ||||
|   EZBENCH_N("highwayhash64", 32, HighwayHash64(fun, 32, kTestKey1)); | ||||
|   EZBENCH_N("highwayhash64", 63, HighwayHash64(fun, 63, kTestKey1)); | ||||
|   EZBENCH_N("highwayhash64", 64, HighwayHash64(fun, 64, kTestKey1)); | ||||
|   EZBENCH_N("highwayhash64", 128, HighwayHash64(fun, 128, kTestKey1)); | ||||
|   EZBENCH_N("highwayhash64", 256, HighwayHash64(fun, 256, kTestKey1)); | ||||
|   EZBENCH_N("highwayhash64", kHyperionSize, | ||||
|   BENCHMARK(10, 0, HighwayHash64(0, 0, kTestKey1)); | ||||
|   BENCHMARK(10, 8, HighwayHash64("helloooo", 8, kTestKey1)); | ||||
|   BENCHMARK(10, 31, HighwayHash64(fun, 31, kTestKey1)); | ||||
|   BENCHMARK(10, 32, HighwayHash64(fun, 32, kTestKey1)); | ||||
|   BENCHMARK(10, 63, HighwayHash64(fun, 63, kTestKey1)); | ||||
|   BENCHMARK(10, 64, HighwayHash64(fun, 64, kTestKey1)); | ||||
|   BENCHMARK(10, 128, HighwayHash64(fun, 128, kTestKey1)); | ||||
|   BENCHMARK(10, 256, HighwayHash64(fun, 256, kTestKey1)); | ||||
|   BENCHMARK(10, kHyperionSize, | ||||
|             HighwayHash64(kHyperion, kHyperionSize, kTestKey1)); | ||||
| } | ||||
| 
 | ||||
| BENCH(highwayhash64, bench) { | ||||
|   EZBENCH2("knuth small", donothing, | ||||
|            __expropriate(KnuthMultiplicativeHash32(__veil("r", "hello"), 5))); | ||||
|   EZBENCH2("crc32c small", donothing, __expropriate(crc32c(0, "hello", 5))); | ||||
|   EZBENCH2("crc32 small", donothing, | ||||
|            __expropriate(crc32_z(0, __veil("r", "hello"), 5))); | ||||
|   EZBENCH2("highwayhash64 small", donothing, | ||||
|            HighwayHash64((void *)"hello", 5, kTestKey1)); | ||||
|   EZBENCH2("crc32 big", donothing, | ||||
|            __expropriate(crc32_z(0, kHyperion, kHyperionSize))); | ||||
|   EZBENCH2("crc32c big", donothing, | ||||
|            __expropriate(crc32c(0, kHyperion, kHyperionSize))); | ||||
|   EZBENCH2("highwayhash64 big", donothing, | ||||
|            HighwayHash64((void *)kHyperion, kHyperionSize, kTestKey1)); | ||||
|   EZBENCH2("knuth big", donothing, | ||||
|            __expropriate(KnuthMultiplicativeHash32(__veil("r", kHyperion), | ||||
|                                                    kHyperionSize))); | ||||
|   BENCHMARK(10, 5, | ||||
|             __expropriate(KnuthMultiplicativeHash32(__veil("r", "hello"), 5))); | ||||
|   BENCHMARK(10, 5, __expropriate(crc32c(0, "hello", 5))); | ||||
|   BENCHMARK(10, 5, __expropriate(crc32_z(0, __veil("r", "hello"), 5))); | ||||
|   BENCHMARK(10, 5, HighwayHash64((void *)"hello", 5, kTestKey1)); | ||||
|   BENCHMARK(10, kHyperionSize, | ||||
|             __expropriate(crc32_z(0, kHyperion, kHyperionSize))); | ||||
|   BENCHMARK(10, kHyperionSize, | ||||
|             __expropriate(crc32c(0, kHyperion, kHyperionSize))); | ||||
|   BENCHMARK(10, kHyperionSize, | ||||
|             HighwayHash64((void *)kHyperion, kHyperionSize, kTestKey1)); | ||||
|   BENCHMARK(10, kHyperionSize, | ||||
|             __expropriate(KnuthMultiplicativeHash32(__veil("r", kHyperion), | ||||
|                                                     kHyperionSize))); | ||||
| } | ||||
|  |  | |||
|  | @ -8,6 +8,7 @@ | |||
| #include "libc/mem/mem.h" | ||||
| #include "libc/runtime/runtime.h" | ||||
| #include "libc/stdio/stdio.h" | ||||
| #include "libc/testlib/benchmark.h" | ||||
| #include "libc/x/xasprintf.h" | ||||
| 
 | ||||
| #define EXPENSIVE_TESTS 0 | ||||
|  | @ -237,20 +238,6 @@ float nothing(float x) { | |||
| 
 | ||||
| float (*barrier)(float) = nothing; | ||||
| 
 | ||||
| #define BENCH(ITERATIONS, WORK_PER_RUN, CODE)                                 \ | ||||
|   do {                                                                        \ | ||||
|     struct timespec start = timespec_real();                                  \ | ||||
|     for (int __i = 0; __i < ITERATIONS; ++__i) {                              \ | ||||
|       asm volatile("" ::: "memory");                                          \ | ||||
|       CODE;                                                                   \ | ||||
|     }                                                                         \ | ||||
|     long long work = (WORK_PER_RUN) * (ITERATIONS);                           \ | ||||
|     long nanos =                                                              \ | ||||
|         (timespec_tonanos(timespec_sub(timespec_real(), start)) + work - 1) / \ | ||||
|         (double)work;                                                         \ | ||||
|     printf("%8ld ns %2dx %s\n", nanos, (ITERATIONS), #CODE);                  \ | ||||
|   } while (0) | ||||
| 
 | ||||
| int main() { | ||||
|   ShowCrashReports(); | ||||
| 
 | ||||
|  | @ -270,12 +257,12 @@ int main() { | |||
|   test_fdotf_naive(); | ||||
|   test_fdotf_hefty(); | ||||
|   test_fdotf_ruler(); | ||||
|   BENCH(20, 1, (kahan = barrier(fdotf_kahan(A, B, n)))); | ||||
|   BENCH(20, 1, (dubble = barrier(fdotf_dubble(A, B, n)))); | ||||
|   BENCH(20, 1, (naive = barrier(fdotf_naive(A, B, n)))); | ||||
|   BENCH(20, 1, (recursive = barrier(fdotf_recursive(A, B, n)))); | ||||
|   BENCH(20, 1, (ruler = barrier(fdotf_ruler(A, B, n)))); | ||||
|   BENCH(20, 1, (hefty = barrier(fdotf_hefty(A, B, n)))); | ||||
|   BENCHMARK(20, 1, (kahan = barrier(fdotf_kahan(A, B, n)))); | ||||
|   BENCHMARK(20, 1, (dubble = barrier(fdotf_dubble(A, B, n)))); | ||||
|   BENCHMARK(20, 1, (naive = barrier(fdotf_naive(A, B, n)))); | ||||
|   BENCHMARK(20, 1, (recursive = barrier(fdotf_recursive(A, B, n)))); | ||||
|   BENCHMARK(20, 1, (ruler = barrier(fdotf_ruler(A, B, n)))); | ||||
|   BENCHMARK(20, 1, (hefty = barrier(fdotf_hefty(A, B, n)))); | ||||
|   printf("dubble    = %f (%g)\n", dubble, fabs(dubble - dubble)); | ||||
|   printf("kahan     = %f (%g)\n", kahan, fabs(kahan - dubble)); | ||||
|   printf("naive     = %f (%g)\n", naive, fabs(naive - dubble)); | ||||
|  |  | |||
|  | @ -8,6 +8,7 @@ | |||
| #include "libc/mem/mem.h" | ||||
| #include "libc/runtime/runtime.h" | ||||
| #include "libc/stdio/stdio.h" | ||||
| #include "libc/testlib/benchmark.h" | ||||
| #include "libc/x/xasprintf.h" | ||||
| 
 | ||||
| #define EXPENSIVE_TESTS 0 | ||||
|  | @ -225,20 +226,6 @@ float nothing(float x) { | |||
| 
 | ||||
| float (*barrier)(float) = nothing; | ||||
| 
 | ||||
| #define BENCH(ITERATIONS, WORK_PER_RUN, CODE)                                 \ | ||||
|   do {                                                                        \ | ||||
|     struct timespec start = timespec_real();                                  \ | ||||
|     for (int __i = 0; __i < ITERATIONS; ++__i) {                              \ | ||||
|       asm volatile("" ::: "memory");                                          \ | ||||
|       CODE;                                                                   \ | ||||
|     }                                                                         \ | ||||
|     long long work = (WORK_PER_RUN) * (ITERATIONS);                           \ | ||||
|     long nanos =                                                              \ | ||||
|         (timespec_tonanos(timespec_sub(timespec_real(), start)) + work - 1) / \ | ||||
|         (double)work;                                                         \ | ||||
|     printf("%8ld ns %2dx %s\n", nanos, (ITERATIONS), #CODE);                  \ | ||||
|   } while (0) | ||||
| 
 | ||||
| int main() { | ||||
|   ShowCrashReports(); | ||||
| 
 | ||||
|  | @ -255,12 +242,12 @@ int main() { | |||
|   test_fsumf_naive(); | ||||
|   test_fsumf_hefty(); | ||||
|   test_fsumf_ruler(); | ||||
|   BENCH(20, 1, (kahan = barrier(fsumf_kahan(p, n)))); | ||||
|   BENCH(20, 1, (dubble = barrier(fsumf_dubble(p, n)))); | ||||
|   BENCH(20, 1, (naive = barrier(fsumf_naive(p, n)))); | ||||
|   BENCH(20, 1, (recursive = barrier(fsumf_recursive(p, n)))); | ||||
|   BENCH(20, 1, (ruler = barrier(fsumf_ruler(p, n)))); | ||||
|   BENCH(20, 1, (hefty = barrier(fsumf_hefty(p, n)))); | ||||
|   BENCHMARK(20, 1, (kahan = barrier(fsumf_kahan(p, n)))); | ||||
|   BENCHMARK(20, 1, (dubble = barrier(fsumf_dubble(p, n)))); | ||||
|   BENCHMARK(20, 1, (naive = barrier(fsumf_naive(p, n)))); | ||||
|   BENCHMARK(20, 1, (recursive = barrier(fsumf_recursive(p, n)))); | ||||
|   BENCHMARK(20, 1, (ruler = barrier(fsumf_ruler(p, n)))); | ||||
|   BENCHMARK(20, 1, (hefty = barrier(fsumf_hefty(p, n)))); | ||||
|   printf("dubble    = %f (%g)\n", dubble, fabs(dubble - dubble)); | ||||
|   printf("kahan     = %f (%g)\n", kahan, fabs(kahan - dubble)); | ||||
|   printf("naive     = %f (%g)\n", naive, fabs(naive - dubble)); | ||||
|  |  | |||
							
								
								
									
										7
									
								
								third_party/dlmalloc/BUILD.mk
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										7
									
								
								third_party/dlmalloc/BUILD.mk
									
										
									
									
										vendored
									
									
								
							|  | @ -58,6 +58,13 @@ $(THIRD_PARTY_DLMALLOC_A_OBJS): private				\ | |||
| 			-Wframe-larger-than=4096		\
 | ||||
| 			-Walloca-larger-than=4096 | ||||
| 
 | ||||
| # avoid the legacy sse decoding penalty on avx systems
 | ||||
| ifeq ($(MODE),) | ||||
| $(THIRD_PARTY_DLMALLOC_A_OBJS): private				\ | ||||
| 		COPTS +=					\
 | ||||
| 			-mgeneral-regs-only | ||||
| endif | ||||
| 
 | ||||
| THIRD_PARTY_DLMALLOC_LIBS = $(foreach x,$(THIRD_PARTY_DLMALLOC_ARTIFACTS),$($(x))) | ||||
| THIRD_PARTY_DLMALLOC_SRCS = $(foreach x,$(THIRD_PARTY_DLMALLOC_ARTIFACTS),$($(x)_SRCS)) | ||||
| THIRD_PARTY_DLMALLOC_HDRS = $(foreach x,$(THIRD_PARTY_DLMALLOC_ARTIFACTS),$($(x)_HDRS)) | ||||
|  |  | |||
							
								
								
									
										3
									
								
								third_party/libcxx/BUILD.mk
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								third_party/libcxx/BUILD.mk
									
										
									
									
										vendored
									
									
								
							|  | @ -2148,6 +2148,9 @@ $(THIRD_PARTY_LIBCXX_A_OBJS): private				\ | |||
| 			-DLIBCXX_BUILDING_LIBCXXABI		\
 | ||||
| 			-D_LIBCPP_BUILDING_LIBRARY | ||||
| 
 | ||||
| o/$(MODE)/third_party/libcxx/locale.o: private			\ | ||||
| 		OVERRIDE_COPTS += -O -g0 | ||||
| 
 | ||||
| THIRD_PARTY_LIBCXX_LIBS = $(foreach x,$(THIRD_PARTY_LIBCXX_ARTIFACTS),$($(x))) | ||||
| THIRD_PARTY_LIBCXX_SRCS = $(foreach x,$(THIRD_PARTY_LIBCXX_ARTIFACTS),$($(x)_SRCS)) | ||||
| THIRD_PARTY_LIBCXX_HDRS = $(foreach x,$(THIRD_PARTY_LIBCXX_ARTIFACTS),$($(x)_HDRS)) | ||||
|  |  | |||
							
								
								
									
										7
									
								
								third_party/nsync/BUILD.mk
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										7
									
								
								third_party/nsync/BUILD.mk
									
										
									
									
										vendored
									
									
								
							|  | @ -56,6 +56,13 @@ $(THIRD_PARTY_NSYNC_A_OBJS): private			\ | |||
| 			-Wframe-larger-than=4096	\
 | ||||
| 			-Walloca-larger-than=4096 | ||||
| 
 | ||||
| # avoid the legacy sse decoding penalty on avx systems
 | ||||
| ifeq ($(MODE),) | ||||
| $(THIRD_PARTY_NSYNC_A_OBJS): private			\ | ||||
| 		COPTS +=				\
 | ||||
| 			-mgeneral-regs-only | ||||
| endif | ||||
| 
 | ||||
| # these assembly files are safe to build on aarch64
 | ||||
| o/$(MODE)/third_party/nsync/compat.o: third_party/nsync/compat.S | ||||
| 	@$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) -c $< | ||||
|  |  | |||
							
								
								
									
										7
									
								
								third_party/nsync/mem/BUILD.mk
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										7
									
								
								third_party/nsync/mem/BUILD.mk
									
										
									
									
										vendored
									
									
								
							|  | @ -49,6 +49,13 @@ $(THIRD_PARTY_NSYNC_MEM_A_OBJS): private		\ | |||
| 			-Wframe-larger-than=4096	\
 | ||||
| 			-Walloca-larger-than=4096 | ||||
| 
 | ||||
| # avoid the legacy sse decoding penalty on avx systems
 | ||||
| ifeq ($(MODE),) | ||||
| $(THIRD_PARTY_NSYNC_MEM_A_OBJS): private		\ | ||||
| 		COPTS +=				\
 | ||||
| 			-mgeneral-regs-only | ||||
| endif | ||||
| 
 | ||||
| THIRD_PARTY_NSYNC_MEM_LIBS = $(foreach x,$(THIRD_PARTY_NSYNC_MEM_ARTIFACTS),$($(x))) | ||||
| THIRD_PARTY_NSYNC_MEM_SRCS = $(foreach x,$(THIRD_PARTY_NSYNC_MEM_ARTIFACTS),$($(x)_SRCS)) | ||||
| THIRD_PARTY_NSYNC_MEM_CHECKS = $(foreach x,$(THIRD_PARTY_NSYNC_MEM_ARTIFACTS),$($(x)_CHECKS)) | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue