diff --git a/examples/trapping.c b/examples/trapping.c new file mode 100644 index 000000000..15a2c5e0c --- /dev/null +++ b/examples/trapping.c @@ -0,0 +1,142 @@ +#include +#include +#include +#include +#include +#include +#include +#include "libc/calls/struct/aarch64.internal.h" + +/* + Do you put lots of assert(!isnan(x)) in your code?? + Your microprocessor has a feature to automate this. + + Uncaught SIGFPE (FPE_FLTINV) + __math_invalidf at libc/tinymath/math_errf.c:88 + logf at libc/tinymath/logf.c:100 + main at examples/trapping.c:29 + cosmo at libc/runtime/cosmo.S:105 + _start at libc/crt/crt.S:116 + + This file shows how to use floating point exception + trapping with Cosmopolitan Libc. +*/ + +#define TRAPS (FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW) + +void spring_trap(int sig, siginfo_t *si, void *arg) { + + // print signal safely + const char *msg; + int sic = si->si_code; + if (sic == FPE_INTDIV) + msg = "FPE_INTDIV: "; // integer divide by zero + else if (sic == FPE_INTOVF) + msg = "FPE_INTOVF: "; // integer overflow + else if (sic == FPE_FLTDIV) + msg = "FPE_FLTDIV: "; // floating point divide by zero + else if (sic == FPE_FLTOVF) + msg = "FPE_FLTOVF: "; // floating point overflow + else if (sic == FPE_FLTUND) + msg = "FPE_FLTUND: "; // floating point underflow + else if (sic == FPE_FLTRES) + msg = "FPE_FLTRES: "; // floating point inexact + else if (sic == FPE_FLTINV) + msg = "FPE_FLTINV: "; // invalid floating point operation + else if (sic == FPE_FLTSUB) + msg = "FPE_FLTSUB: "; // subscript out of range + else + msg = "SIGFPE: "; + write(1, msg, strlen(msg)); + + // recover from trap so that execution may resume + // without this the same signal will just keep getting raised + ucontext_t *ctx = arg; +#ifdef __x86_64__ + if (ctx->uc_mcontext.fpregs) { + ctx->uc_mcontext.fpregs->mxcsr |= TRAPS << 7; // disable traps + ctx->uc_mcontext.fpregs->mxcsr &= ~TRAPS; // clear cages + return; + } +#elif defined(__aarch64__) + struct _aarch64_ctx *ac; + for (ac = (struct _aarch64_ctx *)ctx->uc_mcontext.__reserved; ac->magic; + ac = (struct _aarch64_ctx *)((char *)ac + ac->size)) { + if (ac->magic == FPSIMD_MAGIC) { + struct fpsimd_context *sm = (struct fpsimd_context *)ac; + sm->fpcr &= ~(TRAPS << 8); // disable traps + sm->fpsr &= ~TRAPS; // clear cages + return; + } + } +#endif + + // exit if we can't recover execution + msg = "cannot recover from signal\n"; + write(1, msg, strlen(msg)); + _exit(1); +} + +void setup_trap(void) { + struct sigaction sa; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + sa.sa_sigaction = spring_trap; + sigaction(SIGFPE, &sa, 0); +} + +void activate_trap(void) { + feclearexcept(TRAPS); + if (feenableexcept(TRAPS)) { + static bool once; + if (!once) { + fprintf(stderr, "warning: trapping math isn't supported on this cpu\n"); + once = true; + } + } +} + +float ident(float x) { + return x; +} +float (*veil)(float) = ident; + +int main(int argc, char *argv[]) { + float x; + setup_trap(); + + // test illegal math + activate_trap(); + x = 0 / veil(0); + printf("0/0 = %g\n", x); + + // test divide by zero + activate_trap(); + x = 1 / veil(0); + printf("1/0 = %g\n", x); + + // test divide by zero again + activate_trap(); + x = -1 / veil(0); + printf("-1/0 = %g\n", x); + + // test domain error + activate_trap(); + x = logf(veil(-1)); + printf("log(-1) = %g\n", x); + + // test imaginary number + activate_trap(); + x = sqrtf(veil(-1)); + printf("sqrt(-1) = %g\n", x); + + // test overflow + activate_trap(); + x = expf(veil(88.8)); + printf("expf(88.8) = %g\n", x); + + // test underflow + activate_trap(); + x = expf(veil(-104)); + printf("expf(-104) = %g\n", x); +} diff --git a/libc/calls/ucontext.h b/libc/calls/ucontext.h index a869ab0ad..b820686bc 100644 --- a/libc/calls/ucontext.h +++ b/libc/calls/ucontext.h @@ -43,14 +43,55 @@ struct FpuStackEntry { }; struct thatispacked FpuState { + + /* 8087 FPU Control Word + IM: Invalid Operation ───────────────┐ + DM: Denormal Operand ───────────────┐│ + ZM: Zero Divide ───────────────────┐││ + OM: Overflow ─────────────────────┐│││ + UM: Underflow ───────────────────┐││││ + PM: Precision ──────────────────┐│││││ + PC: Precision Control ───────┐ ││││││ + {float,∅,double,long double}│ ││││││ + RC: Rounding Control ──────┐ │ ││││││ + {even, →-∞, →+∞, →0} │┌┤ ││││││ + ┌┤││ ││││││ + d││││rr││││││ + 0b0000001001111111 */ uint16_t cwd; + + /* 8087 FPU Status Word */ uint16_t swd; + uint16_t ftw; uint16_t fop; uint64_t rip; uint64_t rdp; + + /* SSE CONTROL AND STATUS REGISTER + IE: Invalid Operation Flag ──────────────┐ + DE: Denormal Flag ──────────────────────┐│ + ZE: Divide-by-Zero Flag ───────────────┐││ + OE: Overflow Flag ────────────────────┐│││ + UE: Underflow Flag ──────────────────┐││││ + PE: Precision Flag ─────────────────┐│││││ + DAZ: Denormals Are Zeros ──────────┐││││││ + IM: Invalid Operation Mask ───────┐│││││││ + DM: Denormal Operation Mask ─────┐││││││││ + ZM: Divide-by-Zero Mask ────────┐│││││││││ + OM: Overflow Mask ─────────────┐││││││││││ + UM: Underflow Mask ───────────┐│││││││││││ + PM: Precision Mask ──────────┐││││││││││││ + RC: Rounding Control ───────┐│││││││││││││ + {even, →-∞, →+∞, →0} ││││││││││││││ + FTZ: Flush To Zero ───────┐ ││││││││││││││ + │┌┤│││││││││││││ + ┌──────────────┐││││││││││││││││ + │ reserved │││││││││││││││││ + 0b00000000000000000001111110000000 */ uint32_t mxcsr; uint32_t mxcr_mask; + struct FpuStackEntry st[8]; struct XmmRegister xmm[16]; uint32_t __padding[24]; diff --git a/libc/cosmo.h b/libc/cosmo.h index c84b731eb..1bcadb4bc 100644 --- a/libc/cosmo.h +++ b/libc/cosmo.h @@ -5,6 +5,7 @@ COSMOPOLITAN_C_START_ errno_t cosmo_once(_Atomic(uint32_t) *, void (*)(void)); int systemvpe(const char *, char *const[], char *const[]) libcesque; char *GetProgramExecutableName(void); +void unleaf(void); COSMOPOLITAN_C_END_ #endif /* COSMOPOLITAN_LIBC_COSMO_H_ */ diff --git a/libc/intrin/fedisableexcept.c b/libc/intrin/fedisableexcept.c new file mode 100644 index 000000000..051d6d1cb --- /dev/null +++ b/libc/intrin/fedisableexcept.c @@ -0,0 +1,92 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2024 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/runtime/fenv.h" + +/** + * Disables floating point exception trapping, e.g. + * + * feenableexcept(FE_INVALID | FE_DIVBYZERO | + * FE_OVERFLOW | FE_UNDERFLOW); + * + * When trapping is enabled, something should handle SIGFPE. Calling + * ShowCrashReports() at startup will install a generic handler with + * backtraces and the symbol of the `si->si_code` which UNIX defines + * + * - `FPE_INTOVF`: integer overflow + * - `FPE_INTDIV`: integer divide by zero + * - `FPE_FLTDIV`: floating point divide by zero + * - `FPE_FLTOVF`: floating point overflow + * - `FPE_FLTUND`: floating point underflow + * - `FPE_FLTRES`: floating point inexact + * - `FPE_FLTINV`: invalid floating point operation + * - `FPE_FLTSUB`: subscript out of range + * + * It's important to not use the `-ffast-math` or `-Ofast` flags when + * compiling code that needs to be debugged. Using `-fsignaling-nans` + * will also help, since GCC doesn't enable that by default. + * + * @param excepts may bitwise-or the following: + * - `FE_INVALID` + * - `FE_DIVBYZERO` + * - `FE_OVERFLOW` + * - `FE_UNDERFLOW` + * - `FE_INEXACT` + * - `FE_ALL_EXCEPT` (all of the above) + * @see fetestexcept() if you don't want to deal with signals + * @see feenableexcept() to turn it on in the first place + */ +int fedisableexcept(int excepts) { + + // limit to what we know + excepts &= FE_ALL_EXCEPT; + +#ifdef __x86_64__ + +#ifndef NOX87 + // configure 8087 fpu control word + // setting the bits enables suppression + unsigned short x87cw; + asm("fstcw\t%0" : "=m"(x87cw)); + x87cw |= excepts; + asm("fldcw\t%0" : /* no inputs */ : "m"(x87cw)); +#endif + + // configure modern sse control word + // setting the bits enables suppression + unsigned mxcsr; + asm("stmxcsr\t%0" : "=m"(mxcsr)); + mxcsr |= excepts << 7; + asm("ldmxcsr\t%0" : /* no inputs */ : "m"(mxcsr)); + + return 0; + +#elif defined(__aarch64__) + + unsigned fpcr; + unsigned fpcr2; + fpcr = __builtin_aarch64_get_fpcr(); + fpcr2 = fpcr & ~(excepts << 8); + if (fpcr != fpcr2) + __builtin_aarch64_set_fpcr(fpcr2); + return (fpcr >> 8) & FE_ALL_EXCEPT; + +#else + return -1; +#endif +} diff --git a/libc/intrin/feenableexcept.c b/libc/intrin/feenableexcept.c new file mode 100644 index 000000000..503dee61a --- /dev/null +++ b/libc/intrin/feenableexcept.c @@ -0,0 +1,98 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2024 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/runtime/fenv.h" + +/** + * Enables floating point exception trapping, e.g. + * + * feenableexcept(FE_INVALID | FE_DIVBYZERO | + * FE_OVERFLOW | FE_UNDERFLOW); + * + * When trapping is enabled, something should handle SIGFPE. Calling + * ShowCrashReports() at startup will install a generic handler with + * backtraces and the symbol of the `si->si_code` which UNIX defines + * + * - `FPE_INTOVF`: integer overflow + * - `FPE_INTDIV`: integer divide by zero + * - `FPE_FLTDIV`: floating point divide by zero + * - `FPE_FLTOVF`: floating point overflow + * - `FPE_FLTUND`: floating point underflow + * - `FPE_FLTRES`: floating point inexact + * - `FPE_FLTINV`: invalid floating point operation + * - `FPE_FLTSUB`: subscript out of range + * + * It's important to not use the `-ffast-math` or `-Ofast` flags when + * compiling code that needs to be debugged. Using `-fsignaling-nans` + * will also help, since GCC doesn't enable that by default. + * + * @param excepts may bitwise-or the following: + * - `FE_INVALID` + * - `FE_DIVBYZERO` + * - `FE_OVERFLOW` + * - `FE_UNDERFLOW` + * - `FE_INEXACT` + * - `FE_ALL_EXCEPT` (all of the above) + * @see fetestexcept() if you don't want to deal with signals + * @see fedisableexcept() to turn it back off again + */ +int feenableexcept(int excepts) { + + // limit to what we know + excepts &= FE_ALL_EXCEPT; + +#ifdef __x86_64__ + +#ifndef NOX87 + // configure 8087 fpu control word + // celaring the bits disables suppression + unsigned short x87cw; + asm("fstcw\t%0" : "=m"(x87cw)); + x87cw &= ~excepts; + asm("fldcw\t%0" : /* no inputs */ : "m"(x87cw)); +#endif + + // configure modern sse control word + // clearing the bits disables suppression + unsigned mxcsr; + asm("stmxcsr\t%0" : "=m"(mxcsr)); + mxcsr &= ~(excepts << 7); + asm("ldmxcsr\t%0" : /* no inputs */ : "m"(mxcsr)); + + return 0; + +#elif defined(__aarch64__) + + unsigned fpcr; + unsigned fpcr2; + unsigned updated_fpcr; + fpcr = __builtin_aarch64_get_fpcr(); + fpcr2 = fpcr | (excepts << 8); + if (fpcr != fpcr2) { + __builtin_aarch64_set_fpcr(fpcr2); + // floating point exception trapping is optional in aarch64 + updated_fpcr = __builtin_aarch64_get_fpsr(); + if (fpcr2 & ~updated_fpcr) + return -1; + } + return (fpcr >> 8) & FE_ALL_EXCEPT; + +#else + return -1; +#endif +} diff --git a/libc/intrin/unleaf.c b/libc/intrin/unleaf.c new file mode 100644 index 000000000..5ac002fd3 --- /dev/null +++ b/libc/intrin/unleaf.c @@ -0,0 +1,32 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2024 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/cosmo.h" + +/** + * Does nothing. + * + * Calling this function will force the compiler to generate a stack + * frame. This ensures backtraces will work better in a few critical + * routines. + */ +void unleaf(void) { + // TODO: We should make ShowCrashReports() so __math_invalidf() + // doesn't have to call this in order for the actual math + // function to show up in the backtrace. +} diff --git a/libc/runtime/cosmo.S b/libc/runtime/cosmo.S index 31ee018a9..7d7c66755 100644 --- a/libc/runtime/cosmo.S +++ b/libc/runtime/cosmo.S @@ -75,6 +75,19 @@ cosmo: push %rbp #ifdef __FAST_MATH__ push %rax stmxcsr (%rsp) +// +// Enable hardware optimizations in violation of the IEEE standard. +// +// - 0x0040 enables "DAZ: Denormals Are Zeros" in MXCSR. This causes the +// processor to turn denormal inputs into zero, before computing them. +// See Intel Manual Vol. 1 §10.2.3.4 +// +// - 0x8000 enables "FTZ: Flush To Zero" in MXCSR. This means a floating +// point operation that results in underflow will be set to zero, with +// the same sign, rather than producing a denormalized output. It will +// happen only if underflow trapping hasnt been enabled. See the Intel +// Manual Vol. 1 §10.2.3.3. +// orl $0x8040,(%rsp) ldmxcsr (%rsp) pop %rax diff --git a/libc/runtime/fenv.h b/libc/runtime/fenv.h index 4f25de55a..059b3ad10 100644 --- a/libc/runtime/fenv.h +++ b/libc/runtime/fenv.h @@ -81,6 +81,8 @@ int fesetenv(const fenv_t *); int fesetexceptflag(const fexcept_t *, int); int fesetround(int); int fetestexcept(int); +int feenableexcept(int); +int fedisableexcept(int); int feupdateenv(const fenv_t *); int __flt_rounds(void); int __fesetround(int); diff --git a/libc/thread/threads.h b/libc/thread/threads.h index 89bbedab6..8fe01c89f 100644 --- a/libc/thread/threads.h +++ b/libc/thread/threads.h @@ -2,6 +2,12 @@ #define COSMOPOLITAN_LIBC_THREAD_THREADS_H_ COSMOPOLITAN_C_START_ +#if !defined(__cplusplus) && \ + (!(defined(__GNUC__) && __GNUC__ >= 13) || \ + !(defined(__STDC_VERSION__) && __STDC_VERSION__ > 201710L)) +#define thread_local _Thread_local +#endif + #define TSS_DTOR_ITERATIONS 4 enum { diff --git a/libc/tinymath/math_errf.c b/libc/tinymath/math_errf.c index 2927b565d..377e82408 100644 --- a/libc/tinymath/math_errf.c +++ b/libc/tinymath/math_errf.c @@ -26,6 +26,7 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/errno.h" +#include "libc/cosmo.h" #include "libc/tinymath/arm.internal.h" #if WANT_ERRNO @@ -45,6 +46,7 @@ with_errnof (float y, int e) dontinline static float xflowf (uint32_t sign, float y) { + unleaf(); y = eval_as_float (opt_barrier_float (sign ? -y : y) * y); return with_errnof (y, ERANGE); } @@ -74,6 +76,7 @@ __math_oflowf (uint32_t sign) float __math_divzerof (uint32_t sign) { + unleaf(); float y = opt_barrier_float (sign ? -1.0f : 1.0f) / 0.0f; return with_errnof (y, ERANGE); } @@ -81,6 +84,7 @@ __math_divzerof (uint32_t sign) dontinstrument float __math_invalidf (float x) { + unleaf(); float y = (x - x) / (x - x); return isnan (x) ? y : with_errnof (y, EDOM); } diff --git a/test/math/BUILD.mk b/test/math/BUILD.mk index 188697014..32d9e93dc 100644 --- a/test/math/BUILD.mk +++ b/test/math/BUILD.mk @@ -16,7 +16,8 @@ TEST_MATH_DIRECTDEPS = \ LIBC_RUNTIME \ LIBC_SYSV \ LIBC_TINYMATH \ - THIRD_PARTY_COMPILER_RT + THIRD_PARTY_COMPILER_RT \ + THIRD_PARTY_OPENMP TEST_MATH_DEPS := \ $(call uniq,$(foreach x,$(TEST_MATH_DIRECTDEPS),$($(x)))) @@ -33,7 +34,7 @@ o/$(MODE)/test/math/%.dbg: \ $(APE_NO_MODIFY_SELF) @$(APELINK) -$(TEST_MATH_OBJS): private CFLAGS += -fno-builtin +$(TEST_MATH_OBJS): private CFLAGS += -fno-builtin -fopenmp .PHONY: o/$(MODE)/test/math o/$(MODE)/test/math: \ diff --git a/test/math/erff_test.c b/test/math/erff_test.c new file mode 100644 index 000000000..e298da7ce --- /dev/null +++ b/test/math/erff_test.c @@ -0,0 +1,55 @@ +// Copyright 2024 Justine Alexandra Roberts Tunney +// +// Permission to use, copy, modify, and/or distribute this software for +// any purpose with or without fee is hereby granted, provided that the +// above copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL +// WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE +// AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL +// DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR +// PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +// TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +// PERFORMANCE OF THIS SOFTWARE. + +#include +#include + +#define MAX_ERROR_ULP 1 +#define GOTTA_TEST_THEM_ALL 0 + +unsigned rand32(void) { + /* Knuth, D.E., "The Art of Computer Programming," Vol 2, + Seminumerical Algorithms, Third Edition, Addison-Wesley, 1998, + p. 106 (line 26) & p. 108 */ + static unsigned long long lcg = 1; + lcg *= 6364136223846793005; + lcg += 1442695040888963407; + return lcg >> 32; +} + +int main() { +#if GOTTA_TEST_THEM_ALL +#pragma omp parallel for + for (long i = 0; i < 4294967296; ++i) { +#else + for (long r = 0; r < 100000; ++r) { + unsigned i = rand32(); +#endif + union { + float f; + unsigned i; + } x, a, b; + x.i = i; + a.f = erf(x.f); + b.f = erff(x.f); + long ai = a.i; + long bi = b.i; + long e = bi - ai; + if (e < 0) + e = -e; + if (e > MAX_ERROR_ULP) + exit(99); + } +} diff --git a/test/math/expf_test.c b/test/math/expf_test.c new file mode 100644 index 000000000..6e060e61f --- /dev/null +++ b/test/math/expf_test.c @@ -0,0 +1,87 @@ +// Copyright 2024 Justine Alexandra Roberts Tunney +// +// Permission to use, copy, modify, and/or distribute this software for +// any purpose with or without fee is hereby granted, provided that the +// above copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL +// WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE +// AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL +// DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR +// PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +// TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +// PERFORMANCE OF THIS SOFTWARE. + +#include +#include +#include + +#define MAX_ERROR_ULP 1 +#define GOTTA_TEST_THEM_ALL 0 + +float ident(float x) { + return x; +} +float (*veil)(float) = ident; + +unsigned rand32(void) { + /* Knuth, D.E., "The Art of Computer Programming," Vol 2, + Seminumerical Algorithms, Third Edition, Addison-Wesley, 1998, + p. 106 (line 26) & p. 108 */ + static unsigned long long lcg = 1; + lcg *= 6364136223846793005; + lcg += 1442695040888963407; + return lcg >> 32; +} + +int main() { + + // specials + if (expf(veil(0.f)) != 1.f) + return 1; + if (!isnan(expf(veil(NAN)))) + return 2; + if (expf(veil(-INFINITY)) != 0.f) + return 3; + if (expf(veil(INFINITY)) != INFINITY) + return 4; + if (errno) + return 5; + + // overflow + if (expf(veil(88.8)) != HUGE_VALF) + return 6; + if (errno != ERANGE) + return 7; + errno = 0; + + // underflow + if (expf(veil(-104)) != 0.) + return 8; + if (errno != ERANGE) + return 9; + +#if GOTTA_TEST_THEM_ALL +#pragma omp parallel for + for (long i = 0; i < 4294967296; ++i) { +#else + for (long r = 0; r < 100000; ++r) { + unsigned i = rand32(); +#endif + union { + float f; + unsigned i; + } x, a, b; + x.i = i; + a.f = exp(x.f); + b.f = expf(x.f); + long ai = a.i; + long bi = b.i; + long e = bi - ai; + if (e < 0) + e = -e; + if (e > MAX_ERROR_ULP) + exit(99); + } +}