diff --git a/test/libc/tinymath/fsum_test.c b/test/libc/tinymath/fsum_test.c deleted file mode 100644 index 7936e440a..000000000 --- a/test/libc/tinymath/fsum_test.c +++ /dev/null @@ -1,54 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2021 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" -#include "libc/math.h" -#include "libc/mem/gc.h" -#include "libc/testlib/ezbench.h" -#include "libc/testlib/testlib.h" -#include "libc/x/xasprintf.h" - -#define N 100000 - -float F[N]; -double D[N]; - -void SetUp(void) { - int i; - for (i = 0; i < N / 2; ++i) { - D[i * 2 + 0] = 1000000000.1; - D[i * 2 + 1] = 1.1; - } - for (i = 0; i < N / 2; ++i) { - F[i * 2 + 0] = 1000.1; - F[i * 2 + 1] = 1.1; - } -} - -TEST(fsum, test) { - EXPECT_STREQ("500000000.6", gc(xasprintf("%.15g", fsum(D, N) / N))); -} - -TEST(fsumf, test) { - EXPECT_STREQ("500.6", gc(xasprintf("%.7g", fsumf(F, N) / N))); -} - -BENCH(fsum, bench) { - EZBENCH2("fsum", donothing, fsum(D, N)); - EZBENCH2("fsumf", donothing, fsumf(F, N)); -} diff --git a/test/libc/tinymath/fsum_test.cc b/test/libc/tinymath/fsum_test.cc new file mode 100644 index 000000000..20694d97f --- /dev/null +++ b/test/libc/tinymath/fsum_test.cc @@ -0,0 +1,154 @@ +#include "libc/assert.h" +#include "libc/calls/struct/timespec.h" +#include "libc/intrin/bsr.h" +#include "libc/macros.internal.h" +#include "libc/math.h" +#include "libc/mem/gc.h" +#include "libc/mem/mem.h" +#include "libc/runtime/runtime.h" +#include "libc/stdio/stdio.h" +#include "libc/x/xasprintf.h" + +int rand32(void) { + /* Knuth, D.E., "The Art of Computer Programming," Vol 2, + Seminumerical Algorithms, Third Edition, Addison-Wesley, 1998, + p. 106 (line 26) & p. 108 */ + static unsigned long long lcg = 1; + lcg *= 6364136223846793005; + lcg += 1442695040888963407; + return lcg >> 32; +} + +float float01(unsigned x) { // (0,1) + return 1.f / 8388608 * ((x >> 9) + .5f); +} + +float numba(void) { // (-1,1) + return float01(rand32()) * 2 - 1; +} + +double fsumf_gold(const float *p, size_t n) { + size_t i; + double s; + if (n > 8) + return fsumf_gold(p, n / 2) + fsumf_gold(p + n / 2, n - n / 2); + for (s = i = 0; i < n; ++i) + s += p[i]; + return s; +} + +float fsumf_linear(const float *p, size_t n) { + float s = 0; + for (size_t i = 0; i < n; ++i) + s += p[i]; + return s; +} + +float fsumf_kahan(const float *p, size_t n) { + size_t i; + float err, sum, t, y; + sum = err = 0; + for (i = 0; i < n; ++i) { + y = p[i] - err; + t = sum + y; + err = (t - sum) - y; + sum = t; + } + return sum; +} + +float fsumf_logarithmic(const float *p, size_t n) { + size_t i; + float s; + if (n > 32) + return fsumf_logarithmic(p, n / 2) + + fsumf_logarithmic(p + n / 2, n - n / 2); + for (s = i = 0; i < n; ++i) + s += p[i]; + return s; +} + +template +inline float hsum(const float *p) { + return hsum(p) + hsum(p + N / 2); +} + +template <> +inline float hsum<1>(const float *p) { + return *p; +} + +#define CHUNK 8 + +#define OPTIMIZE __attribute__((__optimize__("-O3"))) +#define PORTABLE __target_clones("avx512f,avx") + +OPTIMIZE PORTABLE float fsumf_nonrecursive(const float *p, size_t n) { + unsigned i, par, len = 0; + float sum, res[n / CHUNK + 1]; + for (res[0] = i = 0; i + CHUNK <= n; i += CHUNK) + res[len++] = hsum(p + i); + if (i < n) { + for (sum = 0; i < n; i++) + sum += p[i]; + res[len++] = sum; + } + for (par = len >> 1; par; par >>= 1, len >>= 1) { + for (i = 0; i < par; ++i) + res[i] += res[par + i]; + if (len & 1) + res[par - 1] += res[len - 1]; + } + return res[0]; +} + +void test_fsumf_nonrecursive(void) { + float A[CHUNK * 3]; + for (int i = 0; i < CHUNK * 3; ++i) + A[i] = numba(); + for (int n = 0; n < CHUNK * 3; ++n) + if (fabsf(fsumf_nonrecursive(A, n) - fsumf_kahan(A, n)) > 1e-3) + exit(7); +} + +float nothing(float x) { + return x; +} + +float (*barrier)(float) = nothing; + +#define BENCH(ITERATIONS, WORK_PER_RUN, CODE) \ + do { \ + struct timespec start = timespec_real(); \ + for (int __i = 0; __i < ITERATIONS; ++__i) { \ + asm volatile("" ::: "memory"); \ + CODE; \ + } \ + long long work = (WORK_PER_RUN) * (ITERATIONS); \ + long nanos = \ + (timespec_tonanos(timespec_sub(timespec_real(), start)) + work - 1) / \ + (double)work; \ + printf("%8ld ns %2dx %s\n", nanos, (ITERATIONS), #CODE); \ + } while (0) + +int main() { + size_t n = 1024; + float *p = (float *)malloc(sizeof(float) * n); + for (size_t i = 0; i < n; ++i) + p[i] = numba(); + float kahan, gold, linear, logarithmic, nonrecursive; + test_fsumf_nonrecursive(); + BENCH(100, 1, (kahan = barrier(fsumf_kahan(p, n)))); + BENCH(100, 1, (gold = barrier(fsumf_gold(p, n)))); + BENCH(100, 1, (linear = barrier(fsumf_linear(p, n)))); + BENCH(100, 1, (logarithmic = barrier(fsumf_logarithmic(p, n)))); + BENCH(100, 1, (nonrecursive = barrier(fsumf_nonrecursive(p, n)))); + printf("gold = %.12g (%.12g)\n", gold, fabs(gold - gold)); + printf("linear = %.12g (%.12g)\n", linear, fabs(linear - gold)); + printf("kahan = %.12g (%.12g)\n", kahan, fabs(kahan - gold)); + printf("logarithmic = %.12g (%.12g)\n", logarithmic, + fabs(logarithmic - gold)); + printf("nonrecursive = %.12g (%.12g)\n", nonrecursive, + fabs(nonrecursive - gold)); + free(p); +} diff --git a/third_party/libcxx/fs/directory_iterator.cpp b/third_party/libcxx/fs/directory_iterator.cpp index 6602d42ac..a82816c60 100644 --- a/third_party/libcxx/fs/directory_iterator.cpp +++ b/third_party/libcxx/fs/directory_iterator.cpp @@ -49,7 +49,7 @@ public: if (__stream_ == INVALID_HANDLE_VALUE) { ec = detail::make_windows_error(GetLastError()); const bool ignore_permission_denied = bool(opts & directory_options::skip_permission_denied); - if (ignore_permission_denied && ec.value() == static_cast(errc::permission_denied)) + if (ignore_permission_denied && ec == errc::permission_denied) ec.clear(); return; } @@ -118,7 +118,7 @@ public: if ((__stream_ = ::opendir(root.c_str())) == nullptr) { ec = detail::capture_errno(); const bool allow_eacces = bool(opts & directory_options::skip_permission_denied); - if (allow_eacces && ec.value() == (int)errc::permission_denied) + if (allow_eacces && ec == errc::permission_denied) ec.clear(); return; } @@ -307,7 +307,7 @@ bool recursive_directory_iterator::__try_recursion(error_code* ec) { } if (m_ec) { const bool allow_eacess = bool(__imp_->__options_ & directory_options::skip_permission_denied); - if (m_ec.value() == (int)errc::permission_denied && allow_eacess) { + if (m_ec == errc::permission_denied && allow_eacess) { if (ec) ec->clear(); } else { diff --git a/third_party/libcxx/fs/error.h b/third_party/libcxx/fs/error.h index 21742d77e..ecbfc1f3f 100644 --- a/third_party/libcxx/fs/error.h +++ b/third_party/libcxx/fs/error.h @@ -100,7 +100,7 @@ inline errc __win_err_to_errc(int err) { inline error_code capture_errno() { _LIBCPP_ASSERT_INTERNAL(errno != 0, "Expected errno to be non-zero"); - return error_code((int)__err_to_errc(errno), generic_category()); + return error_code(__errc_to_err((errc)errno), generic_category()); } #if defined(_LIBCPP_WIN32API) diff --git a/third_party/libcxx/fs/file_descriptor.h b/third_party/libcxx/fs/file_descriptor.h index eab0aa2de..55c313658 100644 --- a/third_party/libcxx/fs/file_descriptor.h +++ b/third_party/libcxx/fs/file_descriptor.h @@ -194,8 +194,8 @@ inline perms posix_get_perms(const StatT& st) noexcept { return static_cast err("posix_stat", ec, &p); diff --git a/third_party/libcxx/system_error.cpp b/third_party/libcxx/system_error.cpp index 5c5df1fda..0e5bf2d4f 100644 --- a/third_party/libcxx/system_error.cpp +++ b/third_party/libcxx/system_error.cpp @@ -218,7 +218,7 @@ system_error::~system_error() noexcept {} void __throw_system_error(int ev, const char* what_arg) { #ifndef _LIBCPP_HAS_NO_EXCEPTIONS - std::__throw_system_error(error_code((int)__err_to_errc(ev), system_category()), what_arg); + std::__throw_system_error(error_code(__errc_to_err((errc)ev), system_category()), what_arg); #else // The above could also handle the no-exception case, but for size, avoid referencing system_category() unnecessarily. _LIBCPP_VERBOSE_ABORT(