From 592f6ebc208179fb7dfa585c60146d1d42fab559 Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Sun, 25 Feb 2024 14:57:28 -0800 Subject: [PATCH] Make quality improvements - Write some more unit tests - memcpy() on ARM is now faster - Address the Musl complex math FIXME comments - Some libm funcs like pow() now support setting errno - Import the latest and greatest math functions from ARM - Use more accurate atan2f() and log1pf() implementations - atoi() and atol() will no longer saturate or clobber errno --- Makefile | 3 +- build/definitions.mk | 1 - libc/fmt/atoi.c | 40 +- libc/fmt/atol.c | 31 +- libc/intrin/aarch64/memchr.S | 2 +- libc/intrin/aarch64/memcpy.S | 62 +- libc/intrin/aarch64/memrchr.S | 2 +- libc/intrin/aarch64/memset.S | 2 +- libc/intrin/aarch64/stpcpy.S | 2 +- libc/intrin/aarch64/strchr.S | 2 +- libc/intrin/aarch64/strchrnul.S | 2 +- libc/intrin/aarch64/strcmp.S | 2 +- libc/intrin/aarch64/strcpy.S | 2 +- libc/intrin/aarch64/strlen.S | 2 +- libc/intrin/aarch64/strncmp.S | 2 +- libc/intrin/aarch64/strnlen.S | 2 +- libc/intrin/aarch64/strrchr.S | 2 +- libc/intrin/fbclibm.c | 2 + libc/math.h | 65 +- libc/tinymath/BUILD.mk | 1 + libc/tinymath/LICENSE.optimized-routines | 2 +- libc/tinymath/__math_divzero.c | 33 - libc/tinymath/__math_divzerof.c | 33 - libc/tinymath/__math_invalid.c | 33 - libc/tinymath/__math_invalidf.c | 33 - libc/tinymath/__math_oflow.c | 33 - libc/tinymath/__math_oflowf.c | 33 - libc/tinymath/__math_uflow.c | 33 - libc/tinymath/__math_uflowf.c | 33 - libc/tinymath/__math_xflow.c | 33 - libc/tinymath/__math_xflowf.c | 33 - libc/tinymath/arm.internal.h | 509 +++++++++ libc/tinymath/asinhl.c | 18 +- libc/tinymath/atan2f.c | 259 ++--- libc/tinymath/atan_common.internal.h | 54 - libc/tinymath/atan_data.internal.h | 11 - libc/tinymath/atanf_common.internal.h | 43 - libc/tinymath/atanf_data.internal.h | 11 - libc/tinymath/cacos.c | 41 - libc/tinymath/cacosf.c | 39 - libc/tinymath/cacosh.c | 44 - libc/tinymath/cacoshf.c | 39 - libc/tinymath/casinf.c | 45 - libc/tinymath/casinh.c | 39 - libc/tinymath/casinhf.c | 37 - libc/tinymath/catan.c | 142 --- libc/tinymath/catanf.c | 135 --- libc/tinymath/catanh.c | 39 - libc/tinymath/catanhf.c | 37 - libc/tinymath/catrig.c | 651 ++++++++++++ libc/tinymath/catrigf.c | 377 +++++++ libc/tinymath/cos.c | 1 - libc/tinymath/cosf.c | 139 +-- libc/tinymath/erf.c | 529 +++++----- libc/tinymath/erf_data.c | 105 ++ libc/tinymath/erfc.c | 279 +++++ libc/tinymath/erff.c | 270 ++--- libc/tinymath/{log1pf_data.c => erff_data.c} | 25 +- libc/tinymath/exp.c | 251 +++-- libc/tinymath/exp10.c | 148 ++- libc/tinymath/exp2.c | 203 ++-- libc/tinymath/exp2f.c | 104 +- libc/tinymath/exp2f_data.c | 58 +- libc/tinymath/exp2f_data.internal.h | 19 - libc/tinymath/exp_data.c | 977 +++++++++++++++++- libc/tinymath/exp_data.internal.h | 23 - libc/tinymath/expf.c | 118 ++- libc/tinymath/frexpl.c | 1 - libc/tinymath/hypot.c | 1 - libc/tinymath/hypotf.c | 6 +- libc/tinymath/hypotf2.c | 175 ++++ libc/tinymath/hypotl.c | 1 - libc/tinymath/log.c | 250 +++-- libc/tinymath/log10.c | 2 - libc/tinymath/log1p.c | 262 +++-- libc/tinymath/log1pf.c | 274 +++-- libc/tinymath/log1pf_data.internal.h | 13 - libc/tinymath/log2.c | 219 ++-- libc/tinymath/log2_data.c | 30 +- libc/tinymath/log2_data.internal.h | 26 - libc/tinymath/log2f.c | 116 ++- libc/tinymath/log2f_data.c | 15 +- libc/tinymath/log2f_data.internal.h | 17 - libc/tinymath/log_data.c | 202 +++- libc/tinymath/log_data.internal.h | 26 - libc/tinymath/logf.c | 111 +- libc/tinymath/logf_data.c | 11 +- libc/tinymath/logf_data.internal.h | 18 - libc/tinymath/{atan_data.c => math_err.c} | 88 +- libc/tinymath/{casin.c => math_errf.c} | 93 +- libc/tinymath/{atanf_data.c => math_errl.c} | 32 +- libc/tinymath/pow.c | 553 +++++----- libc/tinymath/pow_data.c | 19 +- libc/tinymath/pow_data.internal.h | 20 - libc/tinymath/powf.c | 306 +++--- libc/tinymath/powf_data.c | 15 +- libc/tinymath/powf_data.internal.h | 23 - libc/tinymath/powl.c | 172 +-- libc/tinymath/round.c | 1 - libc/tinymath/sincosf.c | 28 +- libc/tinymath/sincosf.internal.h | 13 +- libc/tinymath/sincosf_data.c | 2 +- libc/tinymath/sinf.c | 139 ++- libc/tinymath/sinl.c | 1 - libc/tinymath/sqrt.c | 3 +- libc/tinymath/sqrtf.c | 5 +- libc/tinymath/sqrtl.c | 5 +- libc/tinymath/tan.c | 1 - libc/tinymath/trunc.c | 1 - test/BUILD.mk | 3 +- test/libc/fmt/atoi_test.c | 36 +- test/math/BUILD.mk | 41 + test/math/hypot_test.c | 92 ++ test/math/hypotf_test.c | 94 ++ test/math/powf_test.c | 104 ++ test/posix/BUILD.mk | 2 - .../posix/atoi_test.c | 34 +- test/posix/strtol_test.c | 75 ++ tool/cosmocc/bin/cosmocc | 2 +- tool/cosmocc/bin/cosmocross | 2 +- tool/emacs/cosmo-cpp-constants.el | 2 + tool/emacs/cosmo-platform-constants.el | 1 - 122 files changed, 6305 insertions(+), 3859 deletions(-) create mode 100644 libc/intrin/fbclibm.c delete mode 100644 libc/tinymath/__math_divzero.c delete mode 100644 libc/tinymath/__math_divzerof.c delete mode 100644 libc/tinymath/__math_invalid.c delete mode 100644 libc/tinymath/__math_invalidf.c delete mode 100644 libc/tinymath/__math_oflow.c delete mode 100644 libc/tinymath/__math_oflowf.c delete mode 100644 libc/tinymath/__math_uflow.c delete mode 100644 libc/tinymath/__math_uflowf.c delete mode 100644 libc/tinymath/__math_xflow.c delete mode 100644 libc/tinymath/__math_xflowf.c create mode 100644 libc/tinymath/arm.internal.h delete mode 100644 libc/tinymath/atan_common.internal.h delete mode 100644 libc/tinymath/atan_data.internal.h delete mode 100644 libc/tinymath/atanf_common.internal.h delete mode 100644 libc/tinymath/atanf_data.internal.h delete mode 100644 libc/tinymath/cacos.c delete mode 100644 libc/tinymath/cacosf.c delete mode 100644 libc/tinymath/cacosh.c delete mode 100644 libc/tinymath/cacoshf.c delete mode 100644 libc/tinymath/casinf.c delete mode 100644 libc/tinymath/casinh.c delete mode 100644 libc/tinymath/casinhf.c delete mode 100644 libc/tinymath/catan.c delete mode 100644 libc/tinymath/catanf.c delete mode 100644 libc/tinymath/catanh.c delete mode 100644 libc/tinymath/catanhf.c create mode 100644 libc/tinymath/catrig.c create mode 100644 libc/tinymath/catrigf.c create mode 100644 libc/tinymath/erf_data.c create mode 100644 libc/tinymath/erfc.c rename libc/tinymath/{log1pf_data.c => erff_data.c} (83%) delete mode 100644 libc/tinymath/exp2f_data.internal.h delete mode 100644 libc/tinymath/exp_data.internal.h create mode 100644 libc/tinymath/hypotf2.c delete mode 100644 libc/tinymath/log1pf_data.internal.h delete mode 100644 libc/tinymath/log2_data.internal.h delete mode 100644 libc/tinymath/log2f_data.internal.h delete mode 100644 libc/tinymath/log_data.internal.h delete mode 100644 libc/tinymath/logf_data.internal.h rename libc/tinymath/{atan_data.c => math_err.c} (61%) rename libc/tinymath/{casin.c => math_errf.c} (57%) rename libc/tinymath/{atanf_data.c => math_errl.c} (80%) delete mode 100644 libc/tinymath/pow_data.internal.h delete mode 100644 libc/tinymath/powf_data.internal.h create mode 100644 test/math/BUILD.mk create mode 100644 test/math/hypot_test.c create mode 100644 test/math/hypotf_test.c create mode 100644 test/math/powf_test.c rename libc/tinymath/__math_invalidl.c => test/posix/atoi_test.c (70%) create mode 100644 test/posix/strtol_test.c diff --git a/Makefile b/Makefile index 60a98af1f..9f4c3f3c0 100644 --- a/Makefile +++ b/Makefile @@ -206,7 +206,7 @@ endif .UNVEIL += \ libc/integral \ libc/stdbool.h \ - rwc:/dev/shm \ + rwc:/dev/shm \ rx:.cosmocc \ rx:build/bootstrap \ r:build/portcosmo.h \ @@ -297,6 +297,7 @@ include third_party/nsync/testing/BUILD.mk include libc/testlib/BUILD.mk include tool/viz/lib/BUILD.mk include tool/args/BUILD.mk +include test/math/BUILD.mk include test/posix/BUILD.mk include test/libcxx/BUILD.mk include test/tool/args/BUILD.mk diff --git a/build/definitions.mk b/build/definitions.mk index 9c3b3d6e6..b7c825184 100644 --- a/build/definitions.mk +++ b/build/definitions.mk @@ -95,7 +95,6 @@ DEFAULT_CCFLAGS += \ DEFAULT_COPTS ?= \ -fno-ident \ -fno-common \ - -fno-math-errno \ -fno-gnu-unique \ -fstrict-aliasing \ -fstrict-overflow \ diff --git a/libc/fmt/atoi.c b/libc/fmt/atoi.c index 5aefb9f94..9030bf468 100644 --- a/libc/fmt/atoi.c +++ b/libc/fmt/atoi.c @@ -16,44 +16,28 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/errno.h" #include "libc/fmt/conv.h" -#include "libc/limits.h" -#include "libc/stdckdint.h" #include "libc/str/str.h" /** - * Decodes decimal integer from ASCII string. + * Turns string into int. * - * atoi 10⁸ 22𝑐 7𝑛𝑠 - * strtol 10⁸ 37𝑐 12𝑛𝑠 - * strtoul 10⁸ 35𝑐 11𝑛𝑠 - * wcstol 10⁸ 30𝑐 10𝑛𝑠 - * wcstoul 10⁸ 30𝑐 10𝑛𝑠 - * strtoimax 10⁸ 80𝑐 26𝑛𝑠 - * strtoumax 10⁸ 78𝑐 25𝑛𝑠 - * wcstoimax 10⁸ 77𝑐 25𝑛𝑠 - * wcstoumax 10⁸ 76𝑐 25𝑛𝑠 + * Decimal is the only radix supported. Leading whitespace (as specified + * by the isspace() function) is skipped over. Unlike strtol(), the atoi + * function has undefined behavior on error and it never changes `errno` * - * @param s is a non-null nul-terminated string + * @param nptr is a non-null nul-terminated string * @return the decoded signed saturated integer - * @raise ERANGE on overflow */ -int atoi(const char *s) { +int atoi(const char *nptr) { int x, c, d; - do c = *s++; - while (c == ' ' || c == '\t'); + do c = *nptr++; + while (isspace(c)); d = c == '-' ? -1 : 1; - if (c == '-' || c == '+') c = *s++; - for (x = 0; isdigit(c); c = *s++) { - if (ckd_mul(&x, x, 10) || ckd_add(&x, x, (c - '0') * d)) { - errno = ERANGE; - if (d > 0) { - return INT_MAX; - } else { - return INT_MIN; - } - } + if (c == '-' || c == '+') c = *nptr++; + for (x = 0; isdigit(c); c = *nptr++) { + x *= 10; + x += (c - '0') * d; } return x; } diff --git a/libc/fmt/atol.c b/libc/fmt/atol.c index bbb4291da..8102c3228 100644 --- a/libc/fmt/atol.c +++ b/libc/fmt/atol.c @@ -16,34 +16,29 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/errno.h" #include "libc/fmt/conv.h" -#include "libc/limits.h" -#include "libc/stdckdint.h" #include "libc/str/str.h" /** - * Decodes decimal integer from ASCII string. + * Turns string into long. * - * @param s is a non-null nul-terminated string + * Decimal is the only radix supported. Leading whitespace (as specified + * by the isspace() function) is skipped over. Unlike strtol(), the atoi + * function has undefined behavior on error and it never changes `errno` + * + * @param nptr is a non-null nul-terminated string * @return the decoded signed saturated integer */ -long atol(const char *s) { +long atol(const char *nptr) { long x; int c, d; - do c = *s++; - while (c == ' ' || c == '\t'); + do c = *nptr++; + while (isspace(c)); d = c == '-' ? -1 : 1; - if (c == '-' || c == '+') c = *s++; - for (x = 0; isdigit(c); c = *s++) { - if (ckd_mul(&x, x, 10) || ckd_add(&x, x, (c - '0') * d)) { - errno = ERANGE; - if (d > 0) { - return LONG_MAX; - } else { - return LONG_MIN; - } - } + if (c == '-' || c == '+') c = *nptr++; + for (x = 0; isdigit(c); c = *nptr++) { + x *= 10; + x += (c - '0') * d; } return x; } diff --git a/libc/intrin/aarch64/memchr.S b/libc/intrin/aarch64/memchr.S index fcfacc661..7242bfc86 100644 --- a/libc/intrin/aarch64/memchr.S +++ b/libc/intrin/aarch64/memchr.S @@ -3,7 +3,7 @@ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ diff --git a/libc/intrin/aarch64/memcpy.S b/libc/intrin/aarch64/memcpy.S index fd30eb4e7..65b58d75a 100644 --- a/libc/intrin/aarch64/memcpy.S +++ b/libc/intrin/aarch64/memcpy.S @@ -3,7 +3,7 @@ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -80,11 +80,12 @@ ENTRY (__memcpy_aarch64_simd) PTR_ARG (1) SIZE_ARG (2) add srcend, src, count - add dstend, dstin, count cmp count, 128 b.hi L(copy_long) + add dstend, dstin, count cmp count, 32 b.hi L(copy32_128) + nop /* Small copies: 0..32 bytes. */ cmp count, 16 @@ -95,6 +96,18 @@ ENTRY (__memcpy_aarch64_simd) str B_q, [dstend, -16] ret + .p2align 4 + /* Medium copies: 33..128 bytes. */ +L(copy32_128): + ldp A_q, B_q, [src] + ldp C_q, D_q, [srcend, -32] + cmp count, 64 + b.hi L(copy128) + stp A_q, B_q, [dstin] + stp C_q, D_q, [dstend, -32] + ret + + .p2align 4 /* Copy 8-15 bytes. */ L(copy16): tbz count, 3, L(copy8) @@ -104,7 +117,6 @@ L(copy16): str A_h, [dstend, -8] ret - .p2align 3 /* Copy 4-7 bytes. */ L(copy8): tbz count, 2, L(copy4) @@ -114,6 +126,19 @@ L(copy8): str B_lw, [dstend, -4] ret + /* Copy 65..128 bytes. */ +L(copy128): + ldp E_q, F_q, [src, 32] + cmp count, 96 + b.ls L(copy96) + ldp G_q, H_q, [srcend, -64] + stp G_q, H_q, [dstend, -64] +L(copy96): + stp A_q, B_q, [dstin] + stp E_q, F_q, [dstin, 32] + stp C_q, D_q, [dstend, -32] + ret + /* Copy 0..3 bytes using a branchless sequence. */ L(copy4): cbz count, L(copy0) @@ -127,33 +152,11 @@ L(copy4): L(copy0): ret - .p2align 4 - /* Medium copies: 33..128 bytes. */ -L(copy32_128): - ldp A_q, B_q, [src] - ldp C_q, D_q, [srcend, -32] - cmp count, 64 - b.hi L(copy128) - stp A_q, B_q, [dstin] - stp C_q, D_q, [dstend, -32] - ret - - .p2align 4 - /* Copy 65..128 bytes. */ -L(copy128): - ldp E_q, F_q, [src, 32] - cmp count, 96 - b.ls L(copy96) - ldp G_q, H_q, [srcend, -64] - stp G_q, H_q, [dstend, -64] -L(copy96): - stp A_q, B_q, [dstin] - stp E_q, F_q, [dstin, 32] - stp C_q, D_q, [dstend, -32] - ret - + .p2align 3 /* Copy more than 128 bytes. */ L(copy_long): + add dstend, dstin, count + /* Use backwards copy if there is an overlap. */ sub tmp1, dstin, src cmp tmp1, count @@ -190,6 +193,9 @@ L(copy64_from_end): stp A_q, B_q, [dstend, -32] ret + .p2align 4 + nop + /* Large backwards copy for overlapping copies. Copy 16 bytes and then align srcend to 16-byte alignment. */ L(copy_long_backwards): diff --git a/libc/intrin/aarch64/memrchr.S b/libc/intrin/aarch64/memrchr.S index f24c358f7..3041eeab7 100644 --- a/libc/intrin/aarch64/memrchr.S +++ b/libc/intrin/aarch64/memrchr.S @@ -3,7 +3,7 @@ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ diff --git a/libc/intrin/aarch64/memset.S b/libc/intrin/aarch64/memset.S index f6b96d3a6..cef8edd9c 100644 --- a/libc/intrin/aarch64/memset.S +++ b/libc/intrin/aarch64/memset.S @@ -3,7 +3,7 @@ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ diff --git a/libc/intrin/aarch64/stpcpy.S b/libc/intrin/aarch64/stpcpy.S index 05fa1b866..a34f1aa8f 100644 --- a/libc/intrin/aarch64/stpcpy.S +++ b/libc/intrin/aarch64/stpcpy.S @@ -3,7 +3,7 @@ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ diff --git a/libc/intrin/aarch64/strchr.S b/libc/intrin/aarch64/strchr.S index ffceb8eaf..462d94806 100644 --- a/libc/intrin/aarch64/strchr.S +++ b/libc/intrin/aarch64/strchr.S @@ -3,7 +3,7 @@ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ diff --git a/libc/intrin/aarch64/strchrnul.S b/libc/intrin/aarch64/strchrnul.S index c153c8f65..184305b22 100644 --- a/libc/intrin/aarch64/strchrnul.S +++ b/libc/intrin/aarch64/strchrnul.S @@ -3,7 +3,7 @@ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ diff --git a/libc/intrin/aarch64/strcmp.S b/libc/intrin/aarch64/strcmp.S index dceb9d25c..98f26d486 100644 --- a/libc/intrin/aarch64/strcmp.S +++ b/libc/intrin/aarch64/strcmp.S @@ -3,7 +3,7 @@ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ diff --git a/libc/intrin/aarch64/strcpy.S b/libc/intrin/aarch64/strcpy.S index a92541b40..93cb85fec 100644 --- a/libc/intrin/aarch64/strcpy.S +++ b/libc/intrin/aarch64/strcpy.S @@ -3,7 +3,7 @@ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ diff --git a/libc/intrin/aarch64/strlen.S b/libc/intrin/aarch64/strlen.S index 36ecd0e18..7464eafc9 100644 --- a/libc/intrin/aarch64/strlen.S +++ b/libc/intrin/aarch64/strlen.S @@ -3,7 +3,7 @@ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ diff --git a/libc/intrin/aarch64/strncmp.S b/libc/intrin/aarch64/strncmp.S index f91114e51..8a0b75397 100644 --- a/libc/intrin/aarch64/strncmp.S +++ b/libc/intrin/aarch64/strncmp.S @@ -3,7 +3,7 @@ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ diff --git a/libc/intrin/aarch64/strnlen.S b/libc/intrin/aarch64/strnlen.S index a556aa61b..988c15f63 100644 --- a/libc/intrin/aarch64/strnlen.S +++ b/libc/intrin/aarch64/strnlen.S @@ -3,7 +3,7 @@ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ diff --git a/libc/intrin/aarch64/strrchr.S b/libc/intrin/aarch64/strrchr.S index 73289101f..5199254ff 100644 --- a/libc/intrin/aarch64/strrchr.S +++ b/libc/intrin/aarch64/strrchr.S @@ -3,7 +3,7 @@ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ diff --git a/libc/intrin/fbclibm.c b/libc/intrin/fbclibm.c new file mode 100644 index 000000000..fc698ec05 --- /dev/null +++ b/libc/intrin/fbclibm.c @@ -0,0 +1,2 @@ +__notice(freebsd_complex_notice, "FreeBSD Complex Math (BSD-2 License)\n\ +Copyright (c) 2012 Stephen Montgomery-Smith "); diff --git a/libc/math.h b/libc/math.h index 1a9ffb611..018a72884 100644 --- a/libc/math.h +++ b/libc/math.h @@ -9,15 +9,47 @@ #define M_LOG10E 0.43429448190325182765 /* log₁₀𝑒 */ #define M_LN2 0.69314718055994530942 /* logₑ2 */ #define M_LN10 2.30258509299404568402 /* logₑ10 */ -#define M_PI 3.14159265358979323846 /* pi */ -#define M_PI_2 1.57079632679489661923 /* pi/2 */ -#define M_PI_4 0.78539816339744830962 /* pi/4 */ -#define M_1_PI 0.31830988618379067154 /* 1/pi */ -#define M_2_PI 0.63661977236758134308 /* 2/pi */ -#define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrt(pi) */ +#define M_PI 3.14159265358979323846 /* 𝜋 */ +#define M_PI_2 1.57079632679489661923 /* 𝜋/2 */ +#define M_PI_4 0.78539816339744830962 /* 𝜋/4 */ +#define M_1_PI 0.31830988618379067154 /* 1/𝜋 */ +#define M_2_PI 0.63661977236758134308 /* 2/𝜋 */ +#define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrt(𝜋) */ #define M_SQRT2 1.41421356237309504880 /* sqrt(2) */ #define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */ +#if defined(_GNU_SOURCE) || defined(_COSMO_SOURCE) +#define M_Ef 2.7182818284590452354f /* 𝑒 */ +#define M_LOG2Ef 1.4426950408889634074f /* log₂𝑒 */ +#define M_LOG10Ef 0.43429448190325182765f /* log₁₀𝑒 */ +#define M_LN2f 0.69314718055994530942f /* logₑ2 */ +#define M_LN10f 2.30258509299404568402f /* logₑ10 */ +#define M_PIf 3.14159265358979323846f /* 𝜋 */ +#define M_PI_2f 1.57079632679489661923f /* 𝜋/2 */ +#define M_PI_4f 0.78539816339744830962f /* 𝜋/4 */ +#define M_1_PIf 0.31830988618379067154f /* 1/𝜋 */ +#define M_2_PIf 0.63661977236758134308f /* 2/𝜋 */ +#define M_2_SQRTPIf 1.12837916709551257390f /* 2/sqrt(𝜋) */ +#define M_SQRT2f 1.41421356237309504880f /* sqrt(2) */ +#define M_SQRT1_2f 0.70710678118654752440f /* 1/sqrt(2) */ +#endif + +#if defined(_GNU_SOURCE) || defined(_COSMO_SOURCE) +#define M_El 2.718281828459045235360287471352662498L /* 𝑒 */ +#define M_LOG2El 1.442695040888963407359924681001892137L /* log₂𝑒 */ +#define M_LOG10El 0.434294481903251827651128918916605082L /* log₁₀𝑒 */ +#define M_LN2l 0.693147180559945309417232121458176568L /* logₑ2 */ +#define M_LN10l 2.302585092994045684017991454684364208L /* logₑ10 */ +#define M_PIl 3.141592653589793238462643383279502884L /* 𝜋 */ +#define M_PI_2l 1.570796326794896619231321691639751442L /* 𝜋/2 */ +#define M_PI_4l 0.785398163397448309615660845819875721L /* 𝜋/4 */ +#define M_1_PIl 0.318309886183790671537767526745028724L /* 1/𝜋 */ +#define M_2_PIl 0.636619772367581343075535053490057448L /* 2/𝜋 */ +#define M_2_SQRTPIl 1.128379167095512573896158903121545172L /* 2/sqrt(𝜋) */ +#define M_SQRT2l 1.414213562373095048801688724209698079L /* sqrt(2) */ +#define M_SQRT1_2l 0.707106781186547524400844362104849039L /* 1/sqrt(2) */ +#endif + #define DBL_DECIMAL_DIG __DBL_DECIMAL_DIG__ #define DBL_DIG __DBL_DIG__ #define DBL_EPSILON __DBL_EPSILON__ @@ -76,6 +108,27 @@ #define FP_ILOGB0 (-2147483647 - 1) #define FP_ILOGBNAN (-2147483647 - 1) +#define MATH_ERRNO 1 +#define MATH_ERREXCEPT 2 + +#ifdef __FAST_MATH__ +#define math_errhandling 0 +#elif defined(__NO_MATH_ERRNO__) +#define math_errhandling (MATH_ERREXCEPT) +#else +#define math_errhandling (MATH_ERRNO | MATH_ERREXCEPT) +#endif + +#ifdef __FP_FAST_FMA +#define FP_FAST_FMA 1 +#endif +#ifdef __FP_FAST_FMAF +#define FP_FAST_FMAF 1 +#endif +#ifdef __FP_FAST_FMAL +#define FP_FAST_FMAL 1 +#endif + COSMOPOLITAN_C_START_ #define NAN __builtin_nanf("") diff --git a/libc/tinymath/BUILD.mk b/libc/tinymath/BUILD.mk index df3806404..15493f253 100644 --- a/libc/tinymath/BUILD.mk +++ b/libc/tinymath/BUILD.mk @@ -54,6 +54,7 @@ o/$(MODE)/libc/tinymath/loglq.o: private \ $(LIBC_TINYMATH_A_OBJS): private \ CFLAGS += \ + -fmath-errno \ -fsigned-zeros \ -ftrapping-math \ -frounding-math \ diff --git a/libc/tinymath/LICENSE.optimized-routines b/libc/tinymath/LICENSE.optimized-routines index 20a4b7717..c6fbf4a38 100644 --- a/libc/tinymath/LICENSE.optimized-routines +++ b/libc/tinymath/LICENSE.optimized-routines @@ -5,7 +5,7 @@ MIT OR Apache-2.0 WITH LLVM-exception MIT License ----------- -Copyright (c) 1999-2022, Arm Limited. +Copyright (c) 2018-2024, Arm Limited. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/libc/tinymath/__math_divzero.c b/libc/tinymath/__math_divzero.c deleted file mode 100644 index 24b104690..000000000 --- a/libc/tinymath/__math_divzero.c +++ /dev/null @@ -1,33 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ -╚──────────────────────────────────────────────────────────────────────────────╝ -│ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ -│ │ -│ Permission is hereby granted, free of charge, to any person obtaining │ -│ a copy of this software and associated documentation files (the │ -│ "Software"), to deal in the Software without restriction, including │ -│ without limitation the rights to use, copy, modify, merge, publish, │ -│ distribute, sublicense, and/or sell copies of the Software, and to │ -│ permit persons to whom the Software is furnished to do so, subject to │ -│ the following conditions: │ -│ │ -│ The above copyright notice and this permission notice shall be │ -│ included in all copies or substantial portions of the Software. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ -│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ -│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ -│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ -│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ -│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ -│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ -│ │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/tinymath/internal.h" - -double __math_divzero(uint32_t sign) -{ - return fp_barrier(sign ? -1.0 : 1.0) / 0.0; -} diff --git a/libc/tinymath/__math_divzerof.c b/libc/tinymath/__math_divzerof.c deleted file mode 100644 index b241eecb0..000000000 --- a/libc/tinymath/__math_divzerof.c +++ /dev/null @@ -1,33 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ -╚──────────────────────────────────────────────────────────────────────────────╝ -│ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ -│ │ -│ Permission is hereby granted, free of charge, to any person obtaining │ -│ a copy of this software and associated documentation files (the │ -│ "Software"), to deal in the Software without restriction, including │ -│ without limitation the rights to use, copy, modify, merge, publish, │ -│ distribute, sublicense, and/or sell copies of the Software, and to │ -│ permit persons to whom the Software is furnished to do so, subject to │ -│ the following conditions: │ -│ │ -│ The above copyright notice and this permission notice shall be │ -│ included in all copies or substantial portions of the Software. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ -│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ -│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ -│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ -│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ -│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ -│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ -│ │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/tinymath/internal.h" - -float __math_divzerof(uint32_t sign) -{ - return fp_barrierf(sign ? -1.0f : 1.0f) / 0.0f; -} diff --git a/libc/tinymath/__math_invalid.c b/libc/tinymath/__math_invalid.c deleted file mode 100644 index 890a8eadf..000000000 --- a/libc/tinymath/__math_invalid.c +++ /dev/null @@ -1,33 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ -╚──────────────────────────────────────────────────────────────────────────────╝ -│ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ -│ │ -│ Permission is hereby granted, free of charge, to any person obtaining │ -│ a copy of this software and associated documentation files (the │ -│ "Software"), to deal in the Software without restriction, including │ -│ without limitation the rights to use, copy, modify, merge, publish, │ -│ distribute, sublicense, and/or sell copies of the Software, and to │ -│ permit persons to whom the Software is furnished to do so, subject to │ -│ the following conditions: │ -│ │ -│ The above copyright notice and this permission notice shall be │ -│ included in all copies or substantial portions of the Software. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ -│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ -│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ -│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ -│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ -│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ -│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ -│ │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/tinymath/internal.h" - -double __math_invalid(double x) -{ - return (x - x) / (x - x); -} diff --git a/libc/tinymath/__math_invalidf.c b/libc/tinymath/__math_invalidf.c deleted file mode 100644 index 7bcb5870b..000000000 --- a/libc/tinymath/__math_invalidf.c +++ /dev/null @@ -1,33 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ -╚──────────────────────────────────────────────────────────────────────────────╝ -│ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ -│ │ -│ Permission is hereby granted, free of charge, to any person obtaining │ -│ a copy of this software and associated documentation files (the │ -│ "Software"), to deal in the Software without restriction, including │ -│ without limitation the rights to use, copy, modify, merge, publish, │ -│ distribute, sublicense, and/or sell copies of the Software, and to │ -│ permit persons to whom the Software is furnished to do so, subject to │ -│ the following conditions: │ -│ │ -│ The above copyright notice and this permission notice shall be │ -│ included in all copies or substantial portions of the Software. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ -│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ -│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ -│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ -│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ -│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ -│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ -│ │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/tinymath/internal.h" - -float __math_invalidf(float x) -{ - return (x - x) / (x - x); -} diff --git a/libc/tinymath/__math_oflow.c b/libc/tinymath/__math_oflow.c deleted file mode 100644 index 588c80167..000000000 --- a/libc/tinymath/__math_oflow.c +++ /dev/null @@ -1,33 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ -╚──────────────────────────────────────────────────────────────────────────────╝ -│ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ -│ │ -│ Permission is hereby granted, free of charge, to any person obtaining │ -│ a copy of this software and associated documentation files (the │ -│ "Software"), to deal in the Software without restriction, including │ -│ without limitation the rights to use, copy, modify, merge, publish, │ -│ distribute, sublicense, and/or sell copies of the Software, and to │ -│ permit persons to whom the Software is furnished to do so, subject to │ -│ the following conditions: │ -│ │ -│ The above copyright notice and this permission notice shall be │ -│ included in all copies or substantial portions of the Software. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ -│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ -│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ -│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ -│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ -│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ -│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ -│ │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/tinymath/internal.h" - -dontinstrument double __math_oflow(uint32_t sign) -{ - return __math_xflow(sign, 0x1p769); -} diff --git a/libc/tinymath/__math_oflowf.c b/libc/tinymath/__math_oflowf.c deleted file mode 100644 index a7b8d7f1a..000000000 --- a/libc/tinymath/__math_oflowf.c +++ /dev/null @@ -1,33 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ -╚──────────────────────────────────────────────────────────────────────────────╝ -│ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ -│ │ -│ Permission is hereby granted, free of charge, to any person obtaining │ -│ a copy of this software and associated documentation files (the │ -│ "Software"), to deal in the Software without restriction, including │ -│ without limitation the rights to use, copy, modify, merge, publish, │ -│ distribute, sublicense, and/or sell copies of the Software, and to │ -│ permit persons to whom the Software is furnished to do so, subject to │ -│ the following conditions: │ -│ │ -│ The above copyright notice and this permission notice shall be │ -│ included in all copies or substantial portions of the Software. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ -│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ -│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ -│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ -│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ -│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ -│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ -│ │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/tinymath/internal.h" - -dontinstrument float __math_oflowf(uint32_t sign) -{ - return __math_xflowf(sign, 0x1p97f); -} diff --git a/libc/tinymath/__math_uflow.c b/libc/tinymath/__math_uflow.c deleted file mode 100644 index 7ccda32fe..000000000 --- a/libc/tinymath/__math_uflow.c +++ /dev/null @@ -1,33 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ -╚──────────────────────────────────────────────────────────────────────────────╝ -│ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ -│ │ -│ Permission is hereby granted, free of charge, to any person obtaining │ -│ a copy of this software and associated documentation files (the │ -│ "Software"), to deal in the Software without restriction, including │ -│ without limitation the rights to use, copy, modify, merge, publish, │ -│ distribute, sublicense, and/or sell copies of the Software, and to │ -│ permit persons to whom the Software is furnished to do so, subject to │ -│ the following conditions: │ -│ │ -│ The above copyright notice and this permission notice shall be │ -│ included in all copies or substantial portions of the Software. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ -│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ -│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ -│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ -│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ -│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ -│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ -│ │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/tinymath/internal.h" - -dontinstrument double __math_uflow(uint32_t sign) -{ - return __math_xflow(sign, 0x1p-767); -} diff --git a/libc/tinymath/__math_uflowf.c b/libc/tinymath/__math_uflowf.c deleted file mode 100644 index b355b5a7e..000000000 --- a/libc/tinymath/__math_uflowf.c +++ /dev/null @@ -1,33 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ -╚──────────────────────────────────────────────────────────────────────────────╝ -│ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ -│ │ -│ Permission is hereby granted, free of charge, to any person obtaining │ -│ a copy of this software and associated documentation files (the │ -│ "Software"), to deal in the Software without restriction, including │ -│ without limitation the rights to use, copy, modify, merge, publish, │ -│ distribute, sublicense, and/or sell copies of the Software, and to │ -│ permit persons to whom the Software is furnished to do so, subject to │ -│ the following conditions: │ -│ │ -│ The above copyright notice and this permission notice shall be │ -│ included in all copies or substantial portions of the Software. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ -│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ -│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ -│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ -│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ -│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ -│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ -│ │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/tinymath/internal.h" - -dontinstrument float __math_uflowf(uint32_t sign) -{ - return __math_xflowf(sign, 0x1p-95f); -} diff --git a/libc/tinymath/__math_xflow.c b/libc/tinymath/__math_xflow.c deleted file mode 100644 index df508896c..000000000 --- a/libc/tinymath/__math_xflow.c +++ /dev/null @@ -1,33 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ -╚──────────────────────────────────────────────────────────────────────────────╝ -│ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ -│ │ -│ Permission is hereby granted, free of charge, to any person obtaining │ -│ a copy of this software and associated documentation files (the │ -│ "Software"), to deal in the Software without restriction, including │ -│ without limitation the rights to use, copy, modify, merge, publish, │ -│ distribute, sublicense, and/or sell copies of the Software, and to │ -│ permit persons to whom the Software is furnished to do so, subject to │ -│ the following conditions: │ -│ │ -│ The above copyright notice and this permission notice shall be │ -│ included in all copies or substantial portions of the Software. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ -│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ -│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ -│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ -│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ -│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ -│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ -│ │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/tinymath/internal.h" - -dontinstrument double __math_xflow(uint32_t sign, double y) -{ - return eval_as_double(fp_barrier(sign ? -y : y) * y); -} diff --git a/libc/tinymath/__math_xflowf.c b/libc/tinymath/__math_xflowf.c deleted file mode 100644 index 6b23ee8f3..000000000 --- a/libc/tinymath/__math_xflowf.c +++ /dev/null @@ -1,33 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ -╚──────────────────────────────────────────────────────────────────────────────╝ -│ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ -│ │ -│ Permission is hereby granted, free of charge, to any person obtaining │ -│ a copy of this software and associated documentation files (the │ -│ "Software"), to deal in the Software without restriction, including │ -│ without limitation the rights to use, copy, modify, merge, publish, │ -│ distribute, sublicense, and/or sell copies of the Software, and to │ -│ permit persons to whom the Software is furnished to do so, subject to │ -│ the following conditions: │ -│ │ -│ The above copyright notice and this permission notice shall be │ -│ included in all copies or substantial portions of the Software. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ -│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ -│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ -│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ -│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ -│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ -│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ -│ │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/tinymath/internal.h" - -dontinstrument float __math_xflowf(uint32_t sign, float y) -{ - return eval_as_float(fp_barrierf(sign ? -y : y) * y); -} diff --git a/libc/tinymath/arm.internal.h b/libc/tinymath/arm.internal.h new file mode 100644 index 000000000..de1337a4c --- /dev/null +++ b/libc/tinymath/arm.internal.h @@ -0,0 +1,509 @@ +#ifndef COSMOPOLITAN_LIBC_TINYMATH_ARM_H_ +#define COSMOPOLITAN_LIBC_TINYMATH_ARM_H_ +#include "libc/math.h" +COSMOPOLITAN_C_START_ + +#define USE_GLIBC_ABI 1 + +/* If defined to 1, return correct results for special cases in non-nearest + rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than -0.0f). + This may be set to 0 if there is no fenv support or if math functions only + get called in round to nearest mode. */ +#ifdef __ROUNDING_MATH__ +#define WANT_ROUNDING 1 +#else +#define WANT_ROUNDING 0 +#endif + +/* If defined to 1, set errno in math functions according to ISO C. Many math + libraries do not set errno, so this is 0 by default. It may need to be + set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0. */ +#ifdef __NO_MATH_ERRNO__ +#define WANT_ERRNO 0 +#else +#define WANT_ERRNO 1 +#endif + +/*------------------------------------------------------------------------------*/ +/* optimized-routines/math/math_config.h */ + +#ifndef WANT_ROUNDING +/* If defined to 1, return correct results for special cases in non-nearest + rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than -0.0f). + This may be set to 0 if there is no fenv support or if math functions only + get called in round to nearest mode. */ +# define WANT_ROUNDING 1 +#endif +#ifndef WANT_ERRNO +/* If defined to 1, set errno in math functions according to ISO C. Many math + libraries do not set errno, so this is 0 by default. It may need to be + set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0. */ +# define WANT_ERRNO 0 +#endif +#ifndef WANT_ERRNO_UFLOW +/* Set errno to ERANGE if result underflows to 0 (in all rounding modes). */ +# define WANT_ERRNO_UFLOW (WANT_ROUNDING && WANT_ERRNO) +#endif + +/* Compiler can inline round as a single instruction. */ +#ifndef HAVE_FAST_ROUND +# if __aarch64__ +# define HAVE_FAST_ROUND 1 +# else +# define HAVE_FAST_ROUND 0 +# endif +#endif + +/* Compiler can inline lround, but not (long)round(x). */ +#ifndef HAVE_FAST_LROUND +# if __aarch64__ && (100*__GNUC__ + __GNUC_MINOR__) >= 408 && __NO_MATH_ERRNO__ +# define HAVE_FAST_LROUND 1 +# else +# define HAVE_FAST_LROUND 0 +# endif +#endif + +/* Compiler can inline fma as a single instruction. */ +#ifndef HAVE_FAST_FMA +# if defined FP_FAST_FMA || __aarch64__ +# define HAVE_FAST_FMA 1 +# else +# define HAVE_FAST_FMA 0 +# endif +#endif + +/* Provide *_finite symbols and some of the glibc hidden symbols + so libmathlib can be used with binaries compiled against glibc + to interpose math functions with both static and dynamic linking. */ +#ifndef USE_GLIBC_ABI +# if __GNUC__ +# define USE_GLIBC_ABI 1 +# else +# define USE_GLIBC_ABI 0 +# endif +#endif + +/* Optionally used extensions. */ +#ifdef __GNUC__ +# define HIDDEN __attribute__ ((__visibility__ ("hidden"))) +# define NOINLINE __attribute__ ((noinline)) +# define UNUSED __attribute__ ((unused)) +# define likely(x) __builtin_expect (!!(x), 1) +# define unlikely(x) __builtin_expect (x, 0) +# if __GNUC__ >= 9 +# define attribute_copy(f) __attribute__ ((copy (f))) +# else +# define attribute_copy(f) +# endif +# define strong_alias(f, a) \ + extern __typeof (f) a __attribute__ ((alias (#f))) attribute_copy (f); +# define hidden_alias(f, a) \ + extern __typeof (f) a __attribute__ ((alias (#f), visibility ("hidden"))) \ + attribute_copy (f); +#else +# define HIDDEN +# define NOINLINE +# define UNUSED +# define likely(x) (x) +# define unlikely(x) (x) +#endif + +/* Return ptr but hide its value from the compiler so accesses through it + cannot be optimized based on the contents. */ +#define ptr_barrier(ptr) \ + ({ \ + __typeof (ptr) __ptr = (ptr); \ + __asm("" : "+r"(__ptr)); \ + __ptr; \ + }) + +#if HAVE_FAST_ROUND +/* When set, the roundtoint and converttoint functions are provided with + the semantics documented below. */ +# define TOINT_INTRINSICS 1 + +/* Round x to nearest int in all rounding modes, ties have to be rounded + consistently with converttoint so the results match. If the result + would be outside of [-2^31, 2^31-1] then the semantics is unspecified. */ +static inline double_t +roundtoint (double_t x) +{ + return round (x); +} + +/* Convert x to nearest int in all rounding modes, ties have to be rounded + consistently with roundtoint. If the result is not representible in an + int32_t then the semantics is unspecified. */ +static inline int32_t +converttoint (double_t x) +{ +# if HAVE_FAST_LROUND + return lround (x); +# else + return (long) round (x); +# endif +} +#endif + +static inline uint32_t +asuint (float f) +{ + union + { + float f; + uint32_t i; + } u = {f}; + return u.i; +} + +static inline float +asfloat (uint32_t i) +{ + union + { + uint32_t i; + float f; + } u = {i}; + return u.f; +} + +static inline uint64_t +asuint64 (double f) +{ + union + { + double f; + uint64_t i; + } u = {f}; + return u.i; +} + +static inline double +asdouble (uint64_t i) +{ + union + { + uint64_t i; + double f; + } u = {i}; + return u.f; +} + +#ifndef IEEE_754_2008_SNAN +# define IEEE_754_2008_SNAN 1 +#endif +static inline int +issignalingf_inline (float x) +{ + uint32_t ix = asuint (x); + if (!IEEE_754_2008_SNAN) + return (ix & 0x7fc00000) == 0x7fc00000; + return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000; +} + +static inline int +issignaling_inline (double x) +{ + uint64_t ix = asuint64 (x); + if (!IEEE_754_2008_SNAN) + return (ix & 0x7ff8000000000000) == 0x7ff8000000000000; + return 2 * (ix ^ 0x0008000000000000) > 2 * 0x7ff8000000000000ULL; +} + +#if __aarch64__ && __GNUC__ +/* Prevent the optimization of a floating-point expression. */ +static inline float +opt_barrier_float (float x) +{ + __asm__ __volatile__ ("" : "+w" (x)); + return x; +} +static inline double +opt_barrier_double (double x) +{ + __asm__ __volatile__ ("" : "+w" (x)); + return x; +} +/* Force the evaluation of a floating-point expression for its side-effect. */ +static inline void +force_eval_float (float x) +{ + __asm__ __volatile__ ("" : "+w" (x)); +} +static inline void +force_eval_double (double x) +{ + __asm__ __volatile__ ("" : "+w" (x)); +} +#else +static inline float +opt_barrier_float (float x) +{ + volatile float y = x; + return y; +} +static inline double +opt_barrier_double (double x) +{ + volatile double y = x; + return y; +} +static inline void +force_eval_float (float x) +{ + volatile float y UNUSED = x; +} +static inline void +force_eval_double (double x) +{ + volatile double y UNUSED = x; +} +#endif + +/* Evaluate an expression as the specified type, normally a type + cast should be enough, but compilers implement non-standard + excess-precision handling, so when FLT_EVAL_METHOD != 0 then + these functions may need to be customized. */ +static inline float +eval_as_float (float x) +{ + return x; +} +static inline double +eval_as_double (double x) +{ + return x; +} + +/* Error handling tail calls for special cases, with a sign argument. + The sign of the return value is set if the argument is non-zero. */ + +/* The result overflows. */ +HIDDEN float __math_oflowf (uint32_t); +/* The result underflows to 0 in nearest rounding mode. */ +HIDDEN float __math_uflowf (uint32_t); +/* The result underflows to 0 in some directed rounding mode only. */ +HIDDEN float __math_may_uflowf (uint32_t); +/* Division by zero. */ +HIDDEN float __math_divzerof (uint32_t); +/* The result overflows. */ +HIDDEN double __math_oflow (uint32_t); +/* The result underflows to 0 in nearest rounding mode. */ +HIDDEN double __math_uflow (uint32_t); +/* The result underflows to 0 in some directed rounding mode only. */ +HIDDEN double __math_may_uflow (uint32_t); +/* Division by zero. */ +HIDDEN double __math_divzero (uint32_t); + +/* Error handling using input checking. */ + +/* Invalid input unless it is a quiet NaN. */ +HIDDEN float __math_invalidf (float); +/* Invalid input unless it is a quiet NaN. */ +HIDDEN double __math_invalid (double); +/* Invalid input unless it is a quiet NaN. */ +HIDDEN long double __math_invalidl (long double); + +/* Error handling using output checking, only for errno setting. */ + +/* Check if the result overflowed to infinity. */ +HIDDEN double __math_check_oflow (double); +/* Check if the result underflowed to 0. */ +HIDDEN double __math_check_uflow (double); + +/* Check if the result overflowed to infinity. */ +static inline double +check_oflow (double x) +{ + return WANT_ERRNO ? __math_check_oflow (x) : x; +} + +/* Check if the result underflowed to 0. */ +static inline double +check_uflow (double x) +{ + return WANT_ERRNO ? __math_check_uflow (x) : x; +} + +/* Check if the result overflowed to infinity. */ +HIDDEN float __math_check_oflowf (float); +/* Check if the result underflowed to 0. */ +HIDDEN float __math_check_uflowf (float); + +/* Check if the result overflowed to infinity. */ +static inline float +check_oflowf (float x) +{ + return WANT_ERRNO ? __math_check_oflowf (x) : x; +} + +/* Check if the result underflowed to 0. */ +static inline float +check_uflowf (float x) +{ + return WANT_ERRNO ? __math_check_uflowf (x) : x; +} + +/* Shared between expf, exp2f and powf. */ +#define EXP2F_TABLE_BITS 5 +#define EXP2F_POLY_ORDER 3 +extern const struct exp2f_data +{ + uint64_t tab[1 << EXP2F_TABLE_BITS]; + double shift_scaled; + double poly[EXP2F_POLY_ORDER]; + double shift; + double invln2_scaled; + double poly_scaled[EXP2F_POLY_ORDER]; +} __exp2f_data HIDDEN; + +#define LOGF_TABLE_BITS 4 +#define LOGF_POLY_ORDER 4 +extern const struct logf_data +{ + struct + { + double invc, logc; + } tab[1 << LOGF_TABLE_BITS]; + double ln2; + double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1. */ +} __logf_data HIDDEN; + +#define LOG2F_TABLE_BITS 4 +#define LOG2F_POLY_ORDER 4 +extern const struct log2f_data +{ + struct + { + double invc, logc; + } tab[1 << LOG2F_TABLE_BITS]; + double poly[LOG2F_POLY_ORDER]; +} __log2f_data HIDDEN; + +#define POWF_LOG2_TABLE_BITS 4 +#define POWF_LOG2_POLY_ORDER 5 +#if TOINT_INTRINSICS +# define POWF_SCALE_BITS EXP2F_TABLE_BITS +#else +# define POWF_SCALE_BITS 0 +#endif +#define POWF_SCALE ((double) (1 << POWF_SCALE_BITS)) +extern const struct powf_log2_data +{ + struct + { + double invc, logc; + } tab[1 << POWF_LOG2_TABLE_BITS]; + double poly[POWF_LOG2_POLY_ORDER]; +} __powf_log2_data HIDDEN; + + +#define EXP_TABLE_BITS 7 +#define EXP_POLY_ORDER 5 +/* Use polynomial that is optimized for a wider input range. This may be + needed for good precision in non-nearest rounding and !TOINT_INTRINSICS. */ +#define EXP_POLY_WIDE 0 +/* Use close to nearest rounding toint when !TOINT_INTRINSICS. This may be + needed for good precision in non-nearest rouning and !EXP_POLY_WIDE. */ +#define EXP_USE_TOINT_NARROW 0 +#define EXP2_POLY_ORDER 5 +#define EXP2_POLY_WIDE 0 +/* Wider exp10 polynomial necessary for good precision in non-nearest rounding + and !TOINT_INTRINSICS. */ +#define EXP10_POLY_WIDE 0 +extern const struct exp_data +{ + double invln2N; + double invlog10_2N; + double shift; + double negln2hiN; + double negln2loN; + double neglog10_2hiN; + double neglog10_2loN; + double poly[4]; /* Last four coefficients. */ + double exp2_shift; + double exp2_poly[EXP2_POLY_ORDER]; + double exp10_poly[5]; + uint64_t tab[2*(1 << EXP_TABLE_BITS)]; +} __exp_data HIDDEN; + +#define LOG_TABLE_BITS 7 +#define LOG_POLY_ORDER 6 +#define LOG_POLY1_ORDER 12 +extern const struct log_data +{ + double ln2hi; + double ln2lo; + double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1. */ + double poly1[LOG_POLY1_ORDER - 1]; + struct {double invc, logc;} tab[1 << LOG_TABLE_BITS]; +#if !HAVE_FAST_FMA + struct {double chi, clo;} tab2[1 << LOG_TABLE_BITS]; +#endif +} __log_data HIDDEN; + +#define LOG2_TABLE_BITS 6 +#define LOG2_POLY_ORDER 7 +#define LOG2_POLY1_ORDER 11 +extern const struct log2_data +{ + double invln2hi; + double invln2lo; + double poly[LOG2_POLY_ORDER - 1]; + double poly1[LOG2_POLY1_ORDER - 1]; + struct {double invc, logc;} tab[1 << LOG2_TABLE_BITS]; +#if !HAVE_FAST_FMA + struct {double chi, clo;} tab2[1 << LOG2_TABLE_BITS]; +#endif +} __log2_data HIDDEN; + +#define POW_LOG_TABLE_BITS 7 +#define POW_LOG_POLY_ORDER 8 +extern const struct pow_log_data +{ + double ln2hi; + double ln2lo; + double poly[POW_LOG_POLY_ORDER - 1]; /* First coefficient is 1. */ + /* Note: the pad field is unused, but allows slightly faster indexing. */ + struct {double invc, pad, logc, logctail;} tab[1 << POW_LOG_TABLE_BITS]; +} __pow_log_data HIDDEN; + +extern const struct erff_data +{ + float erff_poly_A[6]; + float erff_poly_B[7]; +} __erff_data HIDDEN; + +#define ERF_POLY_A_ORDER 19 +#define ERF_POLY_A_NCOEFFS 10 +#define ERFC_POLY_C_NCOEFFS 16 +#define ERFC_POLY_D_NCOEFFS 18 +#define ERFC_POLY_E_NCOEFFS 14 +#define ERFC_POLY_F_NCOEFFS 17 +extern const struct erf_data +{ + double erf_poly_A[ERF_POLY_A_NCOEFFS]; + double erf_ratio_N_A[5]; + double erf_ratio_D_A[5]; + double erf_ratio_N_B[7]; + double erf_ratio_D_B[6]; + double erfc_poly_C[ERFC_POLY_C_NCOEFFS]; + double erfc_poly_D[ERFC_POLY_D_NCOEFFS]; + double erfc_poly_E[ERFC_POLY_E_NCOEFFS]; + double erfc_poly_F[ERFC_POLY_F_NCOEFFS]; +} __erf_data HIDDEN; + +#define V_EXP_TABLE_BITS 7 +extern const uint64_t __v_exp_data[1 << V_EXP_TABLE_BITS] HIDDEN; + +#define V_LOG_TABLE_BITS 7 +extern const struct v_log_data +{ + struct + { + double invc, logc; + } table[1 << V_LOG_TABLE_BITS]; +} __v_log_data HIDDEN; + +COSMOPOLITAN_C_END_ +#endif /* COSMOPOLITAN_LIBC_TINYMATH_ARM_H_ */ diff --git a/libc/tinymath/asinhl.c b/libc/tinymath/asinhl.c index c39f5f97c..65651f9f4 100644 --- a/libc/tinymath/asinhl.c +++ b/libc/tinymath/asinhl.c @@ -5,13 +5,6 @@ │ FreeBSD lib/msun/src/s_asinhl.c │ │ Converted to ldbl by David Schultz and Bruce D. Evans. │ │ │ -│ Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. │ -│ │ -│ Developed at SunPro, a Sun Microsystems, Inc. business. │ -│ Permission to use, copy, modify, and distribute this │ -│ software is freely granted, provided that this notice │ -│ is preserved. │ -│ │ │ Copyright (c) 1992-2023 The FreeBSD Project. │ │ │ │ Redistribution and use in source and binary forms, with or without │ @@ -35,12 +28,17 @@ │ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF │ │ SUCH DAMAGE. │ │ │ +│ Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. │ +│ │ +│ Developed at SunPro, a Sun Microsystems, Inc. business. │ +│ Permission to use, copy, modify, and distribute this │ +│ software is freely granted, provided that this notice │ +│ is preserved. │ +│ │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/math.h" #include "libc/tinymath/freebsd.internal.h" - -__static_yoink("fdlibm_notice"); __static_yoink("freebsd_libm_notice"); +__static_yoink("fdlibm_notice"); #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) diff --git a/libc/tinymath/atan2f.c b/libc/tinymath/atan2f.c index 3e8ca8194..fe60ca348 100644 --- a/libc/tinymath/atan2f.c +++ b/libc/tinymath/atan2f.c @@ -1,177 +1,120 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│ vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8 :vi │ +/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ +│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ -│ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ FreeBSD lib/msun/src/s_asinhl.c │ +│ Converted to ldbl by David Schultz and Bruce D. Evans. │ │ │ -│ Permission is hereby granted, free of charge, to any person obtaining │ -│ a copy of this software and associated documentation files (the │ -│ "Software"), to deal in the Software without restriction, including │ -│ without limitation the rights to use, copy, modify, merge, publish, │ -│ distribute, sublicense, and/or sell copies of the Software, and to │ -│ permit persons to whom the Software is furnished to do so, subject to │ -│ the following conditions: │ +│ Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. │ │ │ -│ The above copyright notice and this permission notice shall be │ -│ included in all copies or substantial portions of the Software. │ +│ Developed at SunPro, a Sun Microsystems, Inc. business. │ +│ Permission to use, copy, modify, and distribute this │ +│ software is freely granted, provided that this notice │ +│ is preserved. │ │ │ -│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ -│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ -│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ -│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ -│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ -│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ -│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ +│ Copyright (c) 1992-2023 The FreeBSD Project. │ +│ │ +│ Redistribution and use in source and binary forms, with or without │ +│ modification, are permitted provided that the following conditions │ +│ are met: │ +│ 1. Redistributions of source code must retain the above copyright │ +│ notice, this list of conditions and the following disclaimer. │ +│ 2. Redistributions in binary form must reproduce the above copyright │ +│ notice, this list of conditions and the following disclaimer in the │ +│ documentation and/or other materials provided with the distribution. │ +│ │ +│ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND │ +│ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE │ +│ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE │ +│ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE │ +│ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL │ +│ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS │ +│ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) │ +│ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT │ +│ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY │ +│ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF │ +│ SUCH DAMAGE. │ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/intrin/likely.h" -#include "libc/math.h" -#include "libc/tinymath/atanf_common.internal.h" -#include "libc/tinymath/internal.h" -__static_yoink("arm_optimized_routines_notice"); +#include "libc/tinymath/freebsd.internal.h" +__static_yoink("freebsd_libm_notice"); +__static_yoink("fdlibm_notice"); -#define Pi (0x1.921fb6p+1f) -#define PiOver2 (0x1.921fb6p+0f) -#define PiOver4 (0x1.921fb6p-1f) -#define SignMask (0x80000000) +static volatile float +tiny = 1.0e-30; +static const float +zero = 0.0, +pi_o_4 = 7.8539818525e-01, /* 0x3f490fdb */ +pi_o_2 = 1.5707963705e+00, /* 0x3fc90fdb */ +pi = 3.1415927410e+00; /* 0x40490fdb */ +static volatile float +pi_lo = -8.7422776573e-08; /* 0xb3bbbd2e */ -/* We calculate atan2f by P(n/d), where n and d are similar to the input - arguments, and P is a polynomial. The polynomial may underflow. - POLY_UFLOW_BOUND is the lower bound of the difference in exponents of n and d - for which P underflows, and is used to special-case such inputs. */ -#define POLY_UFLOW_BOUND 24 - -static inline int32_t -biased_exponent (float f) -{ - uint32_t fi = asuint (f); - int32_t ex = (int32_t) ((fi & 0x7f800000) >> 23); - if (UNLIKELY (ex == 0)) - { - /* Subnormal case - we still need to get the exponent right for subnormal - numbers as division may take us back inside the normal range. */ - return ex - __builtin_clz (fi << 9); - } - return ex; -} - -/* Fast implementation of scalar atan2f. Largest observed error is - 2.88ulps in [99.0, 101.0] x [99.0, 101.0]: - atan2f(0x1.9332d8p+6, 0x1.8cb6c4p+6) got 0x1.964646p-1 - want 0x1.964640p-1. */ +/** + * Returns arc tangent of 𝑦/𝑥. + */ float -atan2f (float y, float x) +atan2f(float y, float x) { - uint32_t ix = asuint (x); - uint32_t iy = asuint (y); + float z; + int32_t k,m,hx,hy,ix,iy; - uint32_t sign_x = ix & SignMask; - uint32_t sign_y = iy & SignMask; + GET_FLOAT_WORD(hx,x); + ix = hx&0x7fffffff; + GET_FLOAT_WORD(hy,y); + iy = hy&0x7fffffff; + if((ix>0x7f800000)|| + (iy>0x7f800000)) /* x or y is NaN */ + return nan_mix(x, y); + if(hx==0x3f800000) return atanf(y); /* x=1.0 */ + m = ((hy>>31)&1)|((hx>>30)&2); /* 2*sign(x)+sign(y) */ - uint32_t iax = ix & ~SignMask; - uint32_t iay = iy & ~SignMask; - - /* x or y is NaN. */ - if ((iax > 0x7f800000) || (iay > 0x7f800000)) - return x + y; - - /* m = 2 * sign(x) + sign(y). */ - uint32_t m = ((iy >> 31) & 1) | ((ix >> 30) & 2); - - /* The following follows glibc ieee754 implementation, except - that we do not use +-tiny shifts (non-nearest rounding mode). */ - - int32_t exp_diff = biased_exponent (x) - biased_exponent (y); - - /* Special case for (x, y) either on or very close to the x axis. Either y = - 0, or y is tiny and x is huge (difference in exponents >= - POLY_UFLOW_BOUND). In the second case, we only want to use this special - case when x is negative (i.e. quadrants 2 or 3). */ - if (UNLIKELY (iay == 0 || (exp_diff >= POLY_UFLOW_BOUND && m >= 2))) - { - switch (m) - { - case 0: - case 1: - return y; /* atan(+-0,+anything)=+-0. */ - case 2: - return Pi; /* atan(+0,-anything) = pi. */ - case 3: - return -Pi; /* atan(-0,-anything) =-pi. */ - } - } - /* Special case for (x, y) either on or very close to the y axis. Either x = - 0, or x is tiny and y is huge (difference in exponents >= - POLY_UFLOW_BOUND). */ - if (UNLIKELY (iax == 0 || exp_diff <= -POLY_UFLOW_BOUND)) - return sign_y ? -PiOver2 : PiOver2; - - /* x is INF. */ - if (iax == 0x7f800000) - { - if (iay == 0x7f800000) - { - switch (m) - { - case 0: - return PiOver4; /* atan(+INF,+INF). */ - case 1: - return -PiOver4; /* atan(-INF,+INF). */ - case 2: - return 3.0f * PiOver4; /* atan(+INF,-INF). */ - case 3: - return -3.0f * PiOver4; /* atan(-INF,-INF). */ + /* when y = 0 */ + if(iy==0) { + switch(m) { + case 0: + case 1: return y; /* atan(+-0,+anything)=+-0 */ + case 2: return pi+tiny;/* atan(+0,-anything) = pi */ + case 3: return -pi-tiny;/* atan(-0,-anything) =-pi */ } } - else - { - switch (m) - { - case 0: - return 0.0f; /* atan(+...,+INF). */ - case 1: - return -0.0f; /* atan(-...,+INF). */ - case 2: - return Pi; /* atan(+...,-INF). */ - case 3: - return -Pi; /* atan(-...,-INF). */ + /* when x = 0 */ + if(ix==0) return (hy<0)? -pi_o_2-tiny: pi_o_2+tiny; + + /* when x is INF */ + if(ix==0x7f800000) { + if(iy==0x7f800000) { + switch(m) { + case 0: return pi_o_4+tiny;/* atan(+INF,+INF) */ + case 1: return -pi_o_4-tiny;/* atan(-INF,+INF) */ + case 2: return (float)3.0*pi_o_4+tiny;/*atan(+INF,-INF)*/ + case 3: return (float)-3.0*pi_o_4-tiny;/*atan(-INF,-INF)*/ + } + } else { + switch(m) { + case 0: return zero ; /* atan(+...,+INF) */ + case 1: return -zero ; /* atan(-...,+INF) */ + case 2: return pi+tiny ; /* atan(+...,-INF) */ + case 3: return -pi-tiny ; /* atan(-...,-INF) */ + } } } - } - /* y is INF. */ - if (iay == 0x7f800000) - return sign_y ? -PiOver2 : PiOver2; + /* when y is INF */ + if(iy==0x7f800000) return (hy<0)? -pi_o_2-tiny: pi_o_2+tiny; - uint32_t sign_xy = sign_x ^ sign_y; - - float ax = asfloat (iax); - float ay = asfloat (iay); - - bool pred_aygtax = (ay > ax); - - /* Set up z for call to atanf. */ - float n = pred_aygtax ? -ax : ay; - float d = pred_aygtax ? ay : ax; - float z = n / d; - - float ret; - if (UNLIKELY (m < 2 && exp_diff >= POLY_UFLOW_BOUND)) - { - /* If (x, y) is very close to x axis and x is positive, the polynomial - will underflow and evaluate to z. */ - ret = z; - } - else - { - /* Work out the correct shift. */ - float shift = sign_x ? -2.0f : 0.0f; - shift = pred_aygtax ? shift + 1.0f : shift; - shift *= PiOver2; - - ret = eval_poly (z, z, shift); - } - - /* Account for the sign of x and y. */ - return asfloat (asuint (ret) ^ sign_xy); + /* compute y/x */ + k = (iy-ix)>>23; + if(k > 26) { /* |y/x| > 2**26 */ + z=pi_o_2+(float)0.5*pi_lo; + m&=1; + } + else if(k<-26&&hx<0) z=0.0; /* 0 > |y|/x > -2**-26 */ + else z=atanf(fabsf(y/x)); /* safe to do y/x */ + switch (m) { + case 0: return z ; /* atan(+,+) */ + case 1: return -z ; /* atan(-,+) */ + case 2: return pi-(z-pi_lo);/* atan(+,-) */ + default: /* case 3 */ + return (z-pi_lo)-pi;/* atan(-,-) */ + } } diff --git a/libc/tinymath/atan_common.internal.h b/libc/tinymath/atan_common.internal.h deleted file mode 100644 index 7c50045a6..000000000 --- a/libc/tinymath/atan_common.internal.h +++ /dev/null @@ -1,54 +0,0 @@ -#ifndef COSMOPOLITAN_LIBC_TINYMATH_ATAN_COMMON_H_ -#define COSMOPOLITAN_LIBC_TINYMATH_ATAN_COMMON_H_ -#include "libc/tinymath/atan_data.internal.h" -#include "libc/tinymath/estrin_wrap.internal.h" -#include "libc/tinymath/horner.internal.h" -COSMOPOLITAN_C_START_ - -/* - * Double-precision polynomial evaluation function for scalar and vector atan(x) - * and atan2(y,x). - * - * Copyright (c) 2021-2023, Arm Limited. - * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception - */ - -#if WANT_VMATH - -#define DBL_T float64x2_t -#define P(i) v_f64 (__atan_poly_data.poly[i]) - -#else - -#define DBL_T double -#define P(i) __atan_poly_data.poly[i] - -#endif - -/* Polynomial used in fast atan(x) and atan2(y,x) implementations - The order 19 polynomial P approximates (atan(sqrt(x))-sqrt(x))/x^(3/2). */ -static inline DBL_T -eval_poly (DBL_T z, DBL_T az, DBL_T shift) -{ - /* Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of - full scheme to avoid underflow in x^16. */ - DBL_T z2 = z * z; - DBL_T x2 = z2 * z2; - DBL_T x4 = x2 * x2; - DBL_T x8 = x4 * x4; - DBL_T y - = FMA (ESTRIN_11_ (z2, x2, x4, x8, P, 8), x8, ESTRIN_7 (z2, x2, x4, P)); - - /* Finalize. y = shift + z + z^3 * P(z^2). */ - y = FMA (y, z2 * az, az); - y = y + shift; - - return y; -} - -#undef DBL_T -#undef FMA -#undef P - -COSMOPOLITAN_C_END_ -#endif /* COSMOPOLITAN_LIBC_TINYMATH_ATAN_COMMON_H_ */ diff --git a/libc/tinymath/atan_data.internal.h b/libc/tinymath/atan_data.internal.h deleted file mode 100644 index 5f3858465..000000000 --- a/libc/tinymath/atan_data.internal.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef COSMOPOLITAN_LIBC_TINYMATH_ATAN_DATA_H_ -#define COSMOPOLITAN_LIBC_TINYMATH_ATAN_DATA_H_ -COSMOPOLITAN_C_START_ - -#define ATAN_POLY_NCOEFFS 20 -extern const struct atan_poly_data { - double poly[ATAN_POLY_NCOEFFS]; -} __atan_poly_data; - -COSMOPOLITAN_C_END_ -#endif /* COSMOPOLITAN_LIBC_TINYMATH_ATAN_DATA_H_ */ diff --git a/libc/tinymath/atanf_common.internal.h b/libc/tinymath/atanf_common.internal.h deleted file mode 100644 index 38a2df65d..000000000 --- a/libc/tinymath/atanf_common.internal.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef COSMOPOLITAN_LIBC_TINYMATH_ATANF_COMMON_H_ -#define COSMOPOLITAN_LIBC_TINYMATH_ATANF_COMMON_H_ -#include "libc/tinymath/atanf_data.internal.h" -#include "libc/tinymath/estrin_wrap.internal.h" -#include "libc/tinymath/hornerf.internal.h" -COSMOPOLITAN_C_START_ - -#if WANT_VMATH - -#define FLT_T float32x4_t -#define P(i) v_f32 (__atanf_poly_data.poly[i]) - -#else - -#define FLT_T float -#define P(i) __atanf_poly_data.poly[i] - -#endif - -/* Polynomial used in fast atanf(x) and atan2f(y,x) implementations - The order 7 polynomial P approximates (atan(sqrt(x))-sqrt(x))/x^(3/2). */ -static inline FLT_T -eval_poly (FLT_T z, FLT_T az, FLT_T shift) -{ - /* Use 2-level Estrin scheme for P(z^2) with deg(P)=7. However, - a standard implementation using z8 creates spurious underflow - in the very last fma (when z^8 is small enough). - Therefore, we split the last fma into a mul and and an fma. - Horner and single-level Estrin have higher errors that exceed - threshold. */ - FLT_T z2 = z * z; - FLT_T z4 = z2 * z2; - - /* Then assemble polynomial. */ - FLT_T y = FMA (z4, z4 * ESTRIN_3_ (z2, z4, P, 4), ESTRIN_3 (z2, z4, P)); - - /* Finalize: - y = shift + z * P(z^2). */ - return FMA (y, z2 * az, az) + shift; -} - -COSMOPOLITAN_C_END_ -#endif /* COSMOPOLITAN_LIBC_TINYMATH_ATANF_COMMON_H_ */ diff --git a/libc/tinymath/atanf_data.internal.h b/libc/tinymath/atanf_data.internal.h deleted file mode 100644 index ade0fd877..000000000 --- a/libc/tinymath/atanf_data.internal.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef COSMOPOLITAN_LIBC_TINYMATH_ATANF_DATA_H_ -#define COSMOPOLITAN_LIBC_TINYMATH_ATANF_DATA_H_ -COSMOPOLITAN_C_START_ - -#define ATANF_POLY_NCOEFFS 8 -extern const struct atanf_poly_data { - float poly[ATANF_POLY_NCOEFFS]; -} __atanf_poly_data; - -COSMOPOLITAN_C_END_ -#endif /* COSMOPOLITAN_LIBC_TINYMATH_ATANF_DATA_H_ */ diff --git a/libc/tinymath/cacos.c b/libc/tinymath/cacos.c deleted file mode 100644 index 6c774f583..000000000 --- a/libc/tinymath/cacos.c +++ /dev/null @@ -1,41 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ -╚──────────────────────────────────────────────────────────────────────────────╝ -│ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ -│ │ -│ Permission is hereby granted, free of charge, to any person obtaining │ -│ a copy of this software and associated documentation files (the │ -│ "Software"), to deal in the Software without restriction, including │ -│ without limitation the rights to use, copy, modify, merge, publish, │ -│ distribute, sublicense, and/or sell copies of the Software, and to │ -│ permit persons to whom the Software is furnished to do so, subject to │ -│ the following conditions: │ -│ │ -│ The above copyright notice and this permission notice shall be │ -│ included in all copies or substantial portions of the Software. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ -│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ -│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ -│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ -│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ -│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ -│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ -│ │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/complex.h" -#include "libc/math.h" -#include "libc/tinymath/complex.internal.h" -__static_yoink("musl_libc_notice"); - -// FIXME: Hull et al. "Implementing the complex arcsine and arccosine functions using exception handling" 1997 - -/* acos(z) = pi/2 - asin(z) */ - -double complex cacos(double complex z) -{ - z = casin(z); - return CMPLX(M_PI_2 - creal(z), -cimag(z)); -} diff --git a/libc/tinymath/cacosf.c b/libc/tinymath/cacosf.c deleted file mode 100644 index 0c8add51d..000000000 --- a/libc/tinymath/cacosf.c +++ /dev/null @@ -1,39 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ -╚──────────────────────────────────────────────────────────────────────────────╝ -│ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ -│ │ -│ Permission is hereby granted, free of charge, to any person obtaining │ -│ a copy of this software and associated documentation files (the │ -│ "Software"), to deal in the Software without restriction, including │ -│ without limitation the rights to use, copy, modify, merge, publish, │ -│ distribute, sublicense, and/or sell copies of the Software, and to │ -│ permit persons to whom the Software is furnished to do so, subject to │ -│ the following conditions: │ -│ │ -│ The above copyright notice and this permission notice shall be │ -│ included in all copies or substantial portions of the Software. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ -│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ -│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ -│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ -│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ -│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ -│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ -│ │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/complex.h" -#include "libc/math.h" -#include "libc/tinymath/complex.internal.h" -__static_yoink("musl_libc_notice"); - -// FIXME - -float complex cacosf(float complex z) -{ - z = casinf(z); - return CMPLXF((float)M_PI_2 - crealf(z), -cimagf(z)); -} diff --git a/libc/tinymath/cacosh.c b/libc/tinymath/cacosh.c deleted file mode 100644 index 3a663ee16..000000000 --- a/libc/tinymath/cacosh.c +++ /dev/null @@ -1,44 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ -╚──────────────────────────────────────────────────────────────────────────────╝ -│ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ -│ │ -│ Permission is hereby granted, free of charge, to any person obtaining │ -│ a copy of this software and associated documentation files (the │ -│ "Software"), to deal in the Software without restriction, including │ -│ without limitation the rights to use, copy, modify, merge, publish, │ -│ distribute, sublicense, and/or sell copies of the Software, and to │ -│ permit persons to whom the Software is furnished to do so, subject to │ -│ the following conditions: │ -│ │ -│ The above copyright notice and this permission notice shall be │ -│ included in all copies or substantial portions of the Software. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ -│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ -│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ -│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ -│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ -│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ -│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ -│ │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/complex.h" -#include "libc/math.h" -#include "libc/tinymath/complex.internal.h" -__static_yoink("musl_libc_notice"); - -/* acosh(z) = i acos(z) */ -double complex cacosh(double complex z) -{ - int zineg = signbit(cimag(z)); - z = cacos(z); - if (zineg) return CMPLX(cimag(z), -creal(z)); - else return CMPLX(-cimag(z), creal(z)); -} - -#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 -__weak_reference(cacosh, cacoshl); -#endif diff --git a/libc/tinymath/cacoshf.c b/libc/tinymath/cacoshf.c deleted file mode 100644 index 0160691fa..000000000 --- a/libc/tinymath/cacoshf.c +++ /dev/null @@ -1,39 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ -╚──────────────────────────────────────────────────────────────────────────────╝ -│ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ -│ │ -│ Permission is hereby granted, free of charge, to any person obtaining │ -│ a copy of this software and associated documentation files (the │ -│ "Software"), to deal in the Software without restriction, including │ -│ without limitation the rights to use, copy, modify, merge, publish, │ -│ distribute, sublicense, and/or sell copies of the Software, and to │ -│ permit persons to whom the Software is furnished to do so, subject to │ -│ the following conditions: │ -│ │ -│ The above copyright notice and this permission notice shall be │ -│ included in all copies or substantial portions of the Software. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ -│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ -│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ -│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ -│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ -│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ -│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ -│ │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/complex.h" -#include "libc/math.h" -#include "libc/tinymath/complex.internal.h" -__static_yoink("musl_libc_notice"); - -float complex cacoshf(float complex z) -{ - int zineg = signbit(cimagf(z)); - z = cacosf(z); - if (zineg) return CMPLXF(cimagf(z), -crealf(z)); - else return CMPLXF(-cimagf(z), crealf(z)); -} diff --git a/libc/tinymath/casinf.c b/libc/tinymath/casinf.c deleted file mode 100644 index 04766d91d..000000000 --- a/libc/tinymath/casinf.c +++ /dev/null @@ -1,45 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ -╚──────────────────────────────────────────────────────────────────────────────╝ -│ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ -│ │ -│ Permission is hereby granted, free of charge, to any person obtaining │ -│ a copy of this software and associated documentation files (the │ -│ "Software"), to deal in the Software without restriction, including │ -│ without limitation the rights to use, copy, modify, merge, publish, │ -│ distribute, sublicense, and/or sell copies of the Software, and to │ -│ permit persons to whom the Software is furnished to do so, subject to │ -│ the following conditions: │ -│ │ -│ The above copyright notice and this permission notice shall be │ -│ included in all copies or substantial portions of the Software. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ -│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ -│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ -│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ -│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ -│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ -│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ -│ │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/complex.h" -#include "libc/math.h" -#include "libc/tinymath/complex.internal.h" -__static_yoink("musl_libc_notice"); - -// FIXME - -float complex casinf(float complex z) -{ - float complex w; - float x, y; - - x = crealf(z); - y = cimagf(z); - w = CMPLXF(1.0 - (x - y)*(x + y), -2.0*x*y); - float complex r = clogf(CMPLXF(-y, x) + csqrtf(w)); - return CMPLXF(cimagf(r), -crealf(r)); -} diff --git a/libc/tinymath/casinh.c b/libc/tinymath/casinh.c deleted file mode 100644 index 56d958807..000000000 --- a/libc/tinymath/casinh.c +++ /dev/null @@ -1,39 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ -╚──────────────────────────────────────────────────────────────────────────────╝ -│ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ -│ │ -│ Permission is hereby granted, free of charge, to any person obtaining │ -│ a copy of this software and associated documentation files (the │ -│ "Software"), to deal in the Software without restriction, including │ -│ without limitation the rights to use, copy, modify, merge, publish, │ -│ distribute, sublicense, and/or sell copies of the Software, and to │ -│ permit persons to whom the Software is furnished to do so, subject to │ -│ the following conditions: │ -│ │ -│ The above copyright notice and this permission notice shall be │ -│ included in all copies or substantial portions of the Software. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ -│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ -│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ -│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ -│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ -│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ -│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ -│ │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/complex.h" -#include "libc/math.h" -#include "libc/tinymath/complex.internal.h" -__static_yoink("musl_libc_notice"); - -/* asinh(z) = -i asin(i z) */ - -double complex casinh(double complex z) -{ - z = casin(CMPLX(-cimag(z), creal(z))); - return CMPLX(cimag(z), -creal(z)); -} diff --git a/libc/tinymath/casinhf.c b/libc/tinymath/casinhf.c deleted file mode 100644 index 8bcc1e726..000000000 --- a/libc/tinymath/casinhf.c +++ /dev/null @@ -1,37 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ -╚──────────────────────────────────────────────────────────────────────────────╝ -│ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ -│ │ -│ Permission is hereby granted, free of charge, to any person obtaining │ -│ a copy of this software and associated documentation files (the │ -│ "Software"), to deal in the Software without restriction, including │ -│ without limitation the rights to use, copy, modify, merge, publish, │ -│ distribute, sublicense, and/or sell copies of the Software, and to │ -│ permit persons to whom the Software is furnished to do so, subject to │ -│ the following conditions: │ -│ │ -│ The above copyright notice and this permission notice shall be │ -│ included in all copies or substantial portions of the Software. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ -│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ -│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ -│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ -│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ -│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ -│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ -│ │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/complex.h" -#include "libc/math.h" -#include "libc/tinymath/complex.internal.h" -__static_yoink("musl_libc_notice"); - -float complex casinhf(float complex z) -{ - z = casinf(CMPLXF(-cimagf(z), crealf(z))); - return CMPLXF(cimagf(z), -crealf(z)); -} diff --git a/libc/tinymath/catan.c b/libc/tinymath/catan.c deleted file mode 100644 index dd3b4a90e..000000000 --- a/libc/tinymath/catan.c +++ /dev/null @@ -1,142 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ -╚──────────────────────────────────────────────────────────────────────────────╝ -│ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ -│ │ -│ Permission is hereby granted, free of charge, to any person obtaining │ -│ a copy of this software and associated documentation files (the │ -│ "Software"), to deal in the Software without restriction, including │ -│ without limitation the rights to use, copy, modify, merge, publish, │ -│ distribute, sublicense, and/or sell copies of the Software, and to │ -│ permit persons to whom the Software is furnished to do so, subject to │ -│ the following conditions: │ -│ │ -│ The above copyright notice and this permission notice shall be │ -│ included in all copies or substantial portions of the Software. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ -│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ -│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ -│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ -│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ -│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ -│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ -│ │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/complex.h" -#include "libc/math.h" -#include "libc/tinymath/complex.internal.h" -__static_yoink("musl_libc_notice"); -__static_yoink("openbsd_libm_notice"); - -/* origin: OpenBSD /usr/src/lib/libm/src/s_catan.c */ -/* - * Copyright (c) 2008 Stephen L. Moshier - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ -/* - * Complex circular arc tangent - * - * - * SYNOPSIS: - * - * double complex catan(); - * double complex z, w; - * - * w = catan (z); - * - * - * DESCRIPTION: - * - * If - * z = x + iy, - * - * then - * 1 ( 2x ) - * Re w = - arctan(-----------) + k PI - * 2 ( 2 2) - * (1 - x - y ) - * - * ( 2 2) - * 1 (x + (y+1) ) - * Im w = - log(------------) - * 4 ( 2 2) - * (x + (y-1) ) - * - * Where k is an arbitrary integer. - * - * catan(z) = -i catanh(iz). - * - * ACCURACY: - * - * Relative error: - * arithmetic domain # trials peak rms - * DEC -10,+10 5900 1.3e-16 7.8e-18 - * IEEE -10,+10 30000 2.3e-15 8.5e-17 - * The check catan( ctan(z) ) = z, with |x| and |y| < PI/2, - * had peak relative error 1.5e-16, rms relative error - * 2.9e-17. See also clog(). - */ - -#define MAXNUM 1.0e308 - -static const double DP1 = 3.14159265160560607910E0; -static const double DP2 = 1.98418714791870343106E-9; -static const double DP3 = 1.14423774522196636802E-17; - -static double _redupi(double x) -{ - double t; - long i; - - t = x/M_PI; - if (t >= 0.0) - t += 0.5; - else - t -= 0.5; - - i = t; /* the multiple */ - t = i; - t = ((x - t * DP1) - t * DP2) - t * DP3; - return t; -} - -double complex catan(double complex z) -{ - double complex w; - double a, t, x, x2, y; - - x = creal(z); - y = cimag(z); - - x2 = x * x; - a = 1.0 - x2 - (y * y); - - t = 0.5 * atan2(2.0 * x, a); - w = _redupi(t); - - t = y - 1.0; - a = x2 + (t * t); - - t = y + 1.0; - a = (x2 + t * t)/a; - w = CMPLX(w, 0.25 * log(a)); - return w; -} - -#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 -__weak_reference(catan, catanl); -#endif diff --git a/libc/tinymath/catanf.c b/libc/tinymath/catanf.c deleted file mode 100644 index 846c2a7ad..000000000 --- a/libc/tinymath/catanf.c +++ /dev/null @@ -1,135 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ -╚──────────────────────────────────────────────────────────────────────────────╝ -│ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ -│ │ -│ Permission is hereby granted, free of charge, to any person obtaining │ -│ a copy of this software and associated documentation files (the │ -│ "Software"), to deal in the Software without restriction, including │ -│ without limitation the rights to use, copy, modify, merge, publish, │ -│ distribute, sublicense, and/or sell copies of the Software, and to │ -│ permit persons to whom the Software is furnished to do so, subject to │ -│ the following conditions: │ -│ │ -│ The above copyright notice and this permission notice shall be │ -│ included in all copies or substantial portions of the Software. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ -│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ -│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ -│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ -│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ -│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ -│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ -│ │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/complex.h" -#include "libc/math.h" -#include "libc/tinymath/complex.internal.h" -__static_yoink("musl_libc_notice"); -__static_yoink("openbsd_libm_notice"); - -/* origin: OpenBSD /usr/src/lib/libm/src/s_catanf.c */ -/* - * Copyright (c) 2008 Stephen L. Moshier - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ -/* - * Complex circular arc tangent - * - * - * SYNOPSIS: - * - * float complex catanf(); - * float complex z, w; - * - * w = catanf( z ); - * - * - * DESCRIPTION: - * - * If - * z = x + iy, - * - * then - * 1 ( 2x ) - * Re w = - arctan(-----------) + k PI - * 2 ( 2 2) - * (1 - x - y ) - * - * ( 2 2) - * 1 (x + (y+1) ) - * Im w = - log(------------) - * 4 ( 2 2) - * (x + (y-1) ) - * - * Where k is an arbitrary integer. - * - * - * ACCURACY: - * - * Relative error: - * arithmetic domain # trials peak rms - * IEEE -10,+10 30000 2.3e-6 5.2e-8 - */ - - -#define MAXNUMF 1.0e38F - -static const double DP1 = 3.140625; -static const double DP2 = 9.67502593994140625E-4; -static const double DP3 = 1.509957990978376432E-7; - -static float _redupif(float xx) -{ - float x, t; - long i; - - x = xx; - t = x/(float)M_PI; - if (t >= 0.0f) - t += 0.5f; - else - t -= 0.5f; - - i = t; /* the multiple */ - t = i; - t = ((x - t * DP1) - t * DP2) - t * DP3; - return t; -} - -float complex catanf(float complex z) -{ - float complex w; - float a, t, x, x2, y; - - x = crealf(z); - y = cimagf(z); - - x2 = x * x; - a = 1.0f - x2 - (y * y); - - t = 0.5f * atan2f(2.0f * x, a); - w = _redupif(t); - - t = y - 1.0f; - a = x2 + (t * t); - - t = y + 1.0f; - a = (x2 + (t * t))/a; - w = CMPLXF(w, 0.25f * logf(a)); - return w; -} diff --git a/libc/tinymath/catanh.c b/libc/tinymath/catanh.c deleted file mode 100644 index 12b2c88b1..000000000 --- a/libc/tinymath/catanh.c +++ /dev/null @@ -1,39 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ -╚──────────────────────────────────────────────────────────────────────────────╝ -│ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ -│ │ -│ Permission is hereby granted, free of charge, to any person obtaining │ -│ a copy of this software and associated documentation files (the │ -│ "Software"), to deal in the Software without restriction, including │ -│ without limitation the rights to use, copy, modify, merge, publish, │ -│ distribute, sublicense, and/or sell copies of the Software, and to │ -│ permit persons to whom the Software is furnished to do so, subject to │ -│ the following conditions: │ -│ │ -│ The above copyright notice and this permission notice shall be │ -│ included in all copies or substantial portions of the Software. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ -│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ -│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ -│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ -│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ -│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ -│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ -│ │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/complex.h" -#include "libc/math.h" -#include "libc/tinymath/complex.internal.h" -__static_yoink("musl_libc_notice"); - -/* atanh = -i atan(i z) */ - -double complex catanh(double complex z) -{ - z = catan(CMPLX(-cimag(z), creal(z))); - return CMPLX(cimag(z), -creal(z)); -} diff --git a/libc/tinymath/catanhf.c b/libc/tinymath/catanhf.c deleted file mode 100644 index b7282e92e..000000000 --- a/libc/tinymath/catanhf.c +++ /dev/null @@ -1,37 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ -╚──────────────────────────────────────────────────────────────────────────────╝ -│ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ -│ │ -│ Permission is hereby granted, free of charge, to any person obtaining │ -│ a copy of this software and associated documentation files (the │ -│ "Software"), to deal in the Software without restriction, including │ -│ without limitation the rights to use, copy, modify, merge, publish, │ -│ distribute, sublicense, and/or sell copies of the Software, and to │ -│ permit persons to whom the Software is furnished to do so, subject to │ -│ the following conditions: │ -│ │ -│ The above copyright notice and this permission notice shall be │ -│ included in all copies or substantial portions of the Software. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ -│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ -│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ -│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ -│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ -│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ -│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ -│ │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/complex.h" -#include "libc/math.h" -#include "libc/tinymath/complex.internal.h" -__static_yoink("musl_libc_notice"); - -float complex catanhf(float complex z) -{ - z = catanf(CMPLXF(-cimagf(z), crealf(z))); - return CMPLXF(cimagf(z), -crealf(z)); -} diff --git a/libc/tinymath/catrig.c b/libc/tinymath/catrig.c new file mode 100644 index 000000000..84d301fe3 --- /dev/null +++ b/libc/tinymath/catrig.c @@ -0,0 +1,651 @@ +/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ +│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +╚──────────────────────────────────────────────────────────────────────────────╝ +│ │ +│ FreeBSD lib/msun/src/catrig.c │ +│ Copyright (c) 2012 Stephen Montgomery-Smith │ +│ │ +│ Redistribution and use in source and binary forms, with or without │ +│ modification, are permitted provided that the following conditions │ +│ are met: │ +│ 1. Redistributions of source code must retain the above copyright │ +│ notice, this list of conditions and the following disclaimer. │ +│ 2. Redistributions in binary form must reproduce the above copyright │ +│ notice, this list of conditions and the following disclaimer in the │ +│ documentation and/or other materials provided with the distribution. │ +│ │ +│ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND │ +│ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE │ +│ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE │ +│ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE │ +│ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL │ +│ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS │ +│ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) │ +│ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT │ +│ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY │ +│ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF │ +│ SUCH DAMAGE. │ +│ │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/tinymath/freebsd.internal.h" +__static_yoink("freebsd_complex_notice"); + +#undef isinf +#define isinf(x) (fabs(x) == INFINITY) +#undef isnan +#define isnan(x) ((x) != (x)) +#define raise_inexact() do { volatile float _j = 1 + tiny; (void)_j; } while(0) +#undef signbit +#define signbit(x) (__builtin_signbit(x)) + +/* We need that DBL_EPSILON^2/128 is larger than FOUR_SQRT_MIN. */ +static const double +A_crossover = 10, /* Hull et al suggest 1.5, but 10 works better */ +B_crossover = 0.6417, /* suggested by Hull et al */ +FOUR_SQRT_MIN = 0x1p-509, /* >= 4 * sqrt(DBL_MIN) */ +QUARTER_SQRT_MAX = 0x1p509, /* <= sqrt(DBL_MAX) / 4 */ +m_e = 2.7182818284590452e0, /* 0x15bf0a8b145769.0p-51 */ +m_ln2 = 6.9314718055994531e-1, /* 0x162e42fefa39ef.0p-53 */ +pio2_hi = 1.5707963267948966e0, /* 0x1921fb54442d18.0p-52 */ +RECIP_EPSILON = 1 / DBL_EPSILON, +SQRT_3_EPSILON = 2.5809568279517849e-8, /* 0x1bb67ae8584caa.0p-78 */ +SQRT_6_EPSILON = 3.6500241499888571e-8, /* 0x13988e1409212e.0p-77 */ +SQRT_MIN = 0x1p-511; /* >= sqrt(DBL_MIN) */ + +static const volatile double +pio2_lo = 6.1232339957367659e-17; /* 0x11a62633145c07.0p-106 */ +static const volatile float +tiny = 0x1p-100; + +static double complex clog_for_large_values(double complex z); + +/* + * Testing indicates that all these functions are accurate up to 4 ULP. + * The functions casin(h) and cacos(h) are about 2.5 times slower than asinh. + * The functions catan(h) are a little under 2 times slower than atanh. + * + * The code for casinh, casin, cacos, and cacosh comes first. The code is + * rather complicated, and the four functions are highly interdependent. + * + * The code for catanh and catan comes at the end. It is much simpler than + * the other functions, and the code for these can be disconnected from the + * rest of the code. + */ + +/* + * ================================ + * | casinh, casin, cacos, cacosh | + * ================================ + */ + +/* + * The algorithm is very close to that in "Implementing the complex arcsine + * and arccosine functions using exception handling" by T. E. Hull, Thomas F. + * Fairgrieve, and Ping Tak Peter Tang, published in ACM Transactions on + * Mathematical Software, Volume 23 Issue 3, 1997, Pages 299-335, + * http://dl.acm.org/citation.cfm?id=275324. + * + * Throughout we use the convention z = x + I*y. + * + * casinh(z) = sign(x)*log(A+sqrt(A*A-1)) + I*asin(B) + * where + * A = (|z+I| + |z-I|) / 2 + * B = (|z+I| - |z-I|) / 2 = y/A + * + * These formulas become numerically unstable: + * (a) for Re(casinh(z)) when z is close to the line segment [-I, I] (that + * is, Re(casinh(z)) is close to 0); + * (b) for Im(casinh(z)) when z is close to either of the intervals + * [I, I*infinity) or (-I*infinity, -I] (that is, |Im(casinh(z))| is + * close to PI/2). + * + * These numerical problems are overcome by defining + * f(a, b) = (hypot(a, b) - b) / 2 = a*a / (hypot(a, b) + b) / 2 + * Then if A < A_crossover, we use + * log(A + sqrt(A*A-1)) = log1p((A-1) + sqrt((A-1)*(A+1))) + * A-1 = f(x, 1+y) + f(x, 1-y) + * and if B > B_crossover, we use + * asin(B) = atan2(y, sqrt(A*A - y*y)) = atan2(y, sqrt((A+y)*(A-y))) + * A-y = f(x, y+1) + f(x, y-1) + * where without loss of generality we have assumed that x and y are + * non-negative. + * + * Much of the difficulty comes because the intermediate computations may + * produce overflows or underflows. This is dealt with in the paper by Hull + * et al by using exception handling. We do this by detecting when + * computations risk underflow or overflow. The hardest part is handling the + * underflows when computing f(a, b). + * + * Note that the function f(a, b) does not appear explicitly in the paper by + * Hull et al, but the idea may be found on pages 308 and 309. Introducing the + * function f(a, b) allows us to concentrate many of the clever tricks in this + * paper into one function. + */ + +/* + * Function f(a, b, hypot_a_b) = (hypot(a, b) - b) / 2. + * Pass hypot(a, b) as the third argument. + */ +static inline double +f(double a, double b, double hypot_a_b) +{ + if (b < 0) + return ((hypot_a_b - b) / 2); + if (b == 0) + return (a / 2); + return (a * a / (hypot_a_b + b) / 2); +} + +/* + * All the hard work is contained in this function. + * x and y are assumed positive or zero, and less than RECIP_EPSILON. + * Upon return: + * rx = Re(casinh(z)) = -Im(cacos(y + I*x)). + * B_is_usable is set to 1 if the value of B is usable. + * If B_is_usable is set to 0, sqrt_A2my2 = sqrt(A*A - y*y), and new_y = y. + * If returning sqrt_A2my2 has potential to result in an underflow, it is + * rescaled, and new_y is similarly rescaled. + */ +static inline void +do_hard_work(double x, double y, double *rx, int *B_is_usable, double *B, + double *sqrt_A2my2, double *new_y) +{ + double R, S, A; /* A, B, R, and S are as in Hull et al. */ + double Am1, Amy; /* A-1, A-y. */ + + R = hypot(x, y + 1); /* |z+I| */ + S = hypot(x, y - 1); /* |z-I| */ + + /* A = (|z+I| + |z-I|) / 2 */ + A = (R + S) / 2; + /* + * Mathematically A >= 1. There is a small chance that this will not + * be so because of rounding errors. So we will make certain it is + * so. + */ + if (A < 1) + A = 1; + + if (A < A_crossover) { + /* + * Am1 = fp + fm, where fp = f(x, 1+y), and fm = f(x, 1-y). + * rx = log1p(Am1 + sqrt(Am1*(A+1))) + */ + if (y == 1 && x < DBL_EPSILON * DBL_EPSILON / 128) { + /* + * fp is of order x^2, and fm = x/2. + * A = 1 (inexactly). + */ + *rx = sqrt(x); + } else if (x >= DBL_EPSILON * fabs(y - 1)) { + /* + * Underflow will not occur because + * x >= DBL_EPSILON^2/128 >= FOUR_SQRT_MIN + */ + Am1 = f(x, 1 + y, R) + f(x, 1 - y, S); + *rx = log1p(Am1 + sqrt(Am1 * (A + 1))); + } else if (y < 1) { + /* + * fp = x*x/(1+y)/4, fm = x*x/(1-y)/4, and + * A = 1 (inexactly). + */ + *rx = x / sqrt((1 - y) * (1 + y)); + } else { /* if (y > 1) */ + /* + * A-1 = y-1 (inexactly). + */ + *rx = log1p((y - 1) + sqrt((y - 1) * (y + 1))); + } + } else { + *rx = log(A + sqrt(A * A - 1)); + } + + *new_y = y; + + if (y < FOUR_SQRT_MIN) { + /* + * Avoid a possible underflow caused by y/A. For casinh this + * would be legitimate, but will be picked up by invoking atan2 + * later on. For cacos this would not be legitimate. + */ + *B_is_usable = 0; + *sqrt_A2my2 = A * (2 / DBL_EPSILON); + *new_y = y * (2 / DBL_EPSILON); + return; + } + + /* B = (|z+I| - |z-I|) / 2 = y/A */ + *B = y / A; + *B_is_usable = 1; + + if (*B > B_crossover) { + *B_is_usable = 0; + /* + * Amy = fp + fm, where fp = f(x, y+1), and fm = f(x, y-1). + * sqrt_A2my2 = sqrt(Amy*(A+y)) + */ + if (y == 1 && x < DBL_EPSILON / 128) { + /* + * fp is of order x^2, and fm = x/2. + * A = 1 (inexactly). + */ + *sqrt_A2my2 = sqrt(x) * sqrt((A + y) / 2); + } else if (x >= DBL_EPSILON * fabs(y - 1)) { + /* + * Underflow will not occur because + * x >= DBL_EPSILON/128 >= FOUR_SQRT_MIN + * and + * x >= DBL_EPSILON^2 >= FOUR_SQRT_MIN + */ + Amy = f(x, y + 1, R) + f(x, y - 1, S); + *sqrt_A2my2 = sqrt(Amy * (A + y)); + } else if (y > 1) { + /* + * fp = x*x/(y+1)/4, fm = x*x/(y-1)/4, and + * A = y (inexactly). + * + * y < RECIP_EPSILON. So the following + * scaling should avoid any underflow problems. + */ + *sqrt_A2my2 = x * (4 / DBL_EPSILON / DBL_EPSILON) * y / + sqrt((y + 1) * (y - 1)); + *new_y = y * (4 / DBL_EPSILON / DBL_EPSILON); + } else { /* if (y < 1) */ + /* + * fm = 1-y >= DBL_EPSILON, fp is of order x^2, and + * A = 1 (inexactly). + */ + *sqrt_A2my2 = sqrt((1 - y) * (1 + y)); + } + } +} + +/* + * casinh(z) = z + O(z^3) as z -> 0 + * + * casinh(z) = sign(x)*clog(sign(x)*z) + O(1/z^2) as z -> infinity + * The above formula works for the imaginary part as well, because + * Im(casinh(z)) = sign(x)*atan2(sign(x)*y, fabs(x)) + O(y/z^3) + * as z -> infinity, uniformly in y + */ +double complex +casinh(double complex z) +{ + double x, y, ax, ay, rx, ry, B, sqrt_A2my2, new_y; + int B_is_usable; + double complex w; + + x = creal(z); + y = cimag(z); + ax = fabs(x); + ay = fabs(y); + + if (isnan(x) || isnan(y)) { + /* casinh(+-Inf + I*NaN) = +-Inf + I*NaN */ + if (isinf(x)) + return (CMPLX(x, y + y)); + /* casinh(NaN + I*+-Inf) = opt(+-)Inf + I*NaN */ + if (isinf(y)) + return (CMPLX(y, x + x)); + /* casinh(NaN + I*0) = NaN + I*0 */ + if (y == 0) + return (CMPLX(x + x, y)); + /* + * All other cases involving NaN return NaN + I*NaN. + * C99 leaves it optional whether to raise invalid if one of + * the arguments is not NaN, so we opt not to raise it. + */ + return (CMPLX(nan_mix(x, y), nan_mix(x, y))); + } + + if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) { + /* clog...() will raise inexact unless x or y is infinite. */ + if (signbit(x) == 0) + w = clog_for_large_values(z) + m_ln2; + else + w = clog_for_large_values(-z) + m_ln2; + return (CMPLX(copysign(creal(w), x), copysign(cimag(w), y))); + } + + /* Avoid spuriously raising inexact for z = 0. */ + if (x == 0 && y == 0) + return (z); + + /* All remaining cases are inexact. */ + raise_inexact(); + + if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4) + return (z); + + do_hard_work(ax, ay, &rx, &B_is_usable, &B, &sqrt_A2my2, &new_y); + if (B_is_usable) + ry = asin(B); + else + ry = atan2(new_y, sqrt_A2my2); + return (CMPLX(copysign(rx, x), copysign(ry, y))); +} + +/* + * casin(z) = reverse(casinh(reverse(z))) + * where reverse(x + I*y) = y + I*x = I*conj(z). + */ +double complex +casin(double complex z) +{ + double complex w = casinh(CMPLX(cimag(z), creal(z))); + + return (CMPLX(cimag(w), creal(w))); +} + +/* + * cacos(z) = PI/2 - casin(z) + * but do the computation carefully so cacos(z) is accurate when z is + * close to 1. + * + * cacos(z) = PI/2 - z + O(z^3) as z -> 0 + * + * cacos(z) = -sign(y)*I*clog(z) + O(1/z^2) as z -> infinity + * The above formula works for the real part as well, because + * Re(cacos(z)) = atan2(fabs(y), x) + O(y/z^3) + * as z -> infinity, uniformly in y + */ +double complex +cacos(double complex z) +{ + double x, y, ax, ay, rx, ry, B, sqrt_A2mx2, new_x; + int sx, sy; + int B_is_usable; + double complex w; + + x = creal(z); + y = cimag(z); + sx = signbit(x); + sy = signbit(y); + ax = fabs(x); + ay = fabs(y); + + if (isnan(x) || isnan(y)) { + /* cacos(+-Inf + I*NaN) = NaN + I*opt(-)Inf */ + if (isinf(x)) + return (CMPLX(y + y, -INFINITY)); + /* cacos(NaN + I*+-Inf) = NaN + I*-+Inf */ + if (isinf(y)) + return (CMPLX(x + x, -y)); + /* cacos(0 + I*NaN) = PI/2 + I*NaN with inexact */ + if (x == 0) + return (CMPLX(pio2_hi + pio2_lo, y + y)); + /* + * All other cases involving NaN return NaN + I*NaN. + * C99 leaves it optional whether to raise invalid if one of + * the arguments is not NaN, so we opt not to raise it. + */ + return (CMPLX(nan_mix(x, y), nan_mix(x, y))); + } + + if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) { + /* clog...() will raise inexact unless x or y is infinite. */ + w = clog_for_large_values(z); + rx = fabs(cimag(w)); + ry = creal(w) + m_ln2; + if (sy == 0) + ry = -ry; + return (CMPLX(rx, ry)); + } + + /* Avoid spuriously raising inexact for z = 1. */ + if (x == 1 && y == 0) + return (CMPLX(0, -y)); + + /* All remaining cases are inexact. */ + raise_inexact(); + + if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4) + return (CMPLX(pio2_hi - (x - pio2_lo), -y)); + + do_hard_work(ay, ax, &ry, &B_is_usable, &B, &sqrt_A2mx2, &new_x); + if (B_is_usable) { + if (sx == 0) + rx = acos(B); + else + rx = acos(-B); + } else { + if (sx == 0) + rx = atan2(sqrt_A2mx2, new_x); + else + rx = atan2(sqrt_A2mx2, -new_x); + } + if (sy == 0) + ry = -ry; + return (CMPLX(rx, ry)); +} + +/* + * cacosh(z) = I*cacos(z) or -I*cacos(z) + * where the sign is chosen so Re(cacosh(z)) >= 0. + */ +double complex +cacosh(double complex z) +{ + double complex w; + double rx, ry; + + w = cacos(z); + rx = creal(w); + ry = cimag(w); + /* cacosh(NaN + I*NaN) = NaN + I*NaN */ + if (isnan(rx) && isnan(ry)) + return (CMPLX(ry, rx)); + /* cacosh(NaN + I*+-Inf) = +Inf + I*NaN */ + /* cacosh(+-Inf + I*NaN) = +Inf + I*NaN */ + if (isnan(rx)) + return (CMPLX(fabs(ry), rx)); + /* cacosh(0 + I*NaN) = NaN + I*NaN */ + if (isnan(ry)) + return (CMPLX(ry, ry)); + return (CMPLX(fabs(ry), copysign(rx, cimag(z)))); +} + +/* + * Optimized version of clog() for |z| finite and larger than ~RECIP_EPSILON. + */ +static double complex +clog_for_large_values(double complex z) +{ + double x, y; + double ax, ay, t; + + x = creal(z); + y = cimag(z); + ax = fabs(x); + ay = fabs(y); + if (ax < ay) { + t = ax; + ax = ay; + ay = t; + } + + /* + * Avoid overflow in hypot() when x and y are both very large. + * Divide x and y by E, and then add 1 to the logarithm. This + * depends on E being larger than sqrt(2), since the return value of + * hypot cannot overflow if neither argument is greater in magnitude + * than 1/sqrt(2) of the maximum value of the return type. Likewise + * this determines the necessary threshold for using this method + * (however, actually use 1/2 instead as it is simpler). + * + * Dividing by E causes an insignificant loss of accuracy; however + * this method is still poor since it is uneccessarily slow. + */ + if (ax > DBL_MAX / 2) + return (CMPLX(log(hypot(x / m_e, y / m_e)) + 1, atan2(y, x))); + + /* + * Avoid overflow when x or y is large. Avoid underflow when x or + * y is small. + */ + if (ax > QUARTER_SQRT_MAX || ay < SQRT_MIN) + return (CMPLX(log(hypot(x, y)), atan2(y, x))); + + return (CMPLX(log(ax * ax + ay * ay) / 2, atan2(y, x))); +} + +/* + * ================= + * | catanh, catan | + * ================= + */ + +/* + * sum_squares(x,y) = x*x + y*y (or just x*x if y*y would underflow). + * Assumes x*x and y*y will not overflow. + * Assumes x and y are finite. + * Assumes y is non-negative. + * Assumes fabs(x) >= DBL_EPSILON. + */ +static inline double +sum_squares(double x, double y) +{ + + /* Avoid underflow when y is small. */ + if (y < SQRT_MIN) + return (x * x); + + return (x * x + y * y); +} + +/* + * real_part_reciprocal(x, y) = Re(1/(x+I*y)) = x/(x*x + y*y). + * Assumes x and y are not NaN, and one of x and y is larger than + * RECIP_EPSILON. We avoid unwarranted underflow. It is important to not use + * the code creal(1/z), because the imaginary part may produce an unwanted + * underflow. + * This is only called in a context where inexact is always raised before + * the call, so no effort is made to avoid or force inexact. + */ +static inline double +real_part_reciprocal(double x, double y) +{ + double scale; + uint32_t hx, hy; + int32_t ix, iy; + + /* + * This code is inspired by the C99 document n1124.pdf, Section G.5.1, + * example 2. + */ + GET_HIGH_WORD(hx, x); + ix = hx & 0x7ff00000; + GET_HIGH_WORD(hy, y); + iy = hy & 0x7ff00000; +#undef BIAS +#define BIAS (DBL_MAX_EXP - 1) +/* XXX more guard digits are useful iff there is extra precision. */ +#define CUTOFF (DBL_MANT_DIG / 2 + 1) /* just half or 1 guard digit */ + if (ix - iy >= CUTOFF << 20 || isinf(x)) + return (1 / x); /* +-Inf -> +-0 is special */ + if (iy - ix >= CUTOFF << 20) + return (x / y / y); /* should avoid double div, but hard */ + if (ix <= (BIAS + DBL_MAX_EXP / 2 - CUTOFF) << 20) + return (x / (x * x + y * y)); + scale = 1; + SET_HIGH_WORD(scale, 0x7ff00000 - ix); /* 2**(1-ilogb(x)) */ + x *= scale; + y *= scale; + return (x / (x * x + y * y) * scale); +} + +/* + * catanh(z) = log((1+z)/(1-z)) / 2 + * = log1p(4*x / |z-1|^2) / 4 + * + I * atan2(2*y, (1-x)*(1+x)-y*y) / 2 + * + * catanh(z) = z + O(z^3) as z -> 0 + * + * catanh(z) = 1/z + sign(y)*I*PI/2 + O(1/z^3) as z -> infinity + * The above formula works for the real part as well, because + * Re(catanh(z)) = x/|z|^2 + O(x/z^4) + * as z -> infinity, uniformly in x + */ +double complex +catanh(double complex z) +{ + double x, y, ax, ay, rx, ry; + + x = creal(z); + y = cimag(z); + ax = fabs(x); + ay = fabs(y); + + /* This helps handle many cases. */ + if (y == 0 && ax <= 1) + return (CMPLX(atanh(x), y)); + + /* To ensure the same accuracy as atan(), and to filter out z = 0. */ + if (x == 0) + return (CMPLX(x, atan(y))); + + if (isnan(x) || isnan(y)) { + /* catanh(+-Inf + I*NaN) = +-0 + I*NaN */ + if (isinf(x)) + return (CMPLX(copysign(0, x), y + y)); + /* catanh(NaN + I*+-Inf) = sign(NaN)0 + I*+-PI/2 */ + if (isinf(y)) + return (CMPLX(copysign(0, x), + copysign(pio2_hi + pio2_lo, y))); + /* + * All other cases involving NaN return NaN + I*NaN. + * C99 leaves it optional whether to raise invalid if one of + * the arguments is not NaN, so we opt not to raise it. + */ + return (CMPLX(nan_mix(x, y), nan_mix(x, y))); + } + + if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) + return (CMPLX(real_part_reciprocal(x, y), + copysign(pio2_hi + pio2_lo, y))); + + if (ax < SQRT_3_EPSILON / 2 && ay < SQRT_3_EPSILON / 2) { + /* + * z = 0 was filtered out above. All other cases must raise + * inexact, but this is the only case that needs to do it + * explicitly. + */ + raise_inexact(); + return (z); + } + + if (ax == 1 && ay < DBL_EPSILON) + rx = (m_ln2 - log(ay)) / 2; + else + rx = log1p(4 * ax / sum_squares(ax - 1, ay)) / 4; + + if (ax == 1) + ry = atan2(2, -ay) / 2; + else if (ay < DBL_EPSILON) + ry = atan2(2 * ay, (1 - ax) * (1 + ax)) / 2; + else + ry = atan2(2 * ay, (1 - ax) * (1 + ax) - ay * ay) / 2; + + return (CMPLX(copysign(rx, x), copysign(ry, y))); +} + +/* + * catan(z) = reverse(catanh(reverse(z))) + * where reverse(x + I*y) = y + I*x = I*conj(z). + */ +double complex +catan(double complex z) +{ + double complex w = catanh(CMPLX(cimag(z), creal(z))); + + return (CMPLX(cimag(w), creal(w))); +} + +#if LDBL_MANT_DIG == 53 +__weak_reference(cacosh, cacoshl); +__weak_reference(cacos, cacosl); +__weak_reference(casinh, casinhl); +__weak_reference(casin, casinl); +__weak_reference(catanh, catanhl); +__weak_reference(catan, catanl); +#endif diff --git a/libc/tinymath/catrigf.c b/libc/tinymath/catrigf.c new file mode 100644 index 000000000..0e21adfe4 --- /dev/null +++ b/libc/tinymath/catrigf.c @@ -0,0 +1,377 @@ +/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ +│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +╚──────────────────────────────────────────────────────────────────────────────╝ +│ │ +│ FreeBSD lib/msun/src/catrigf.c │ +│ Copyright (c) 2012 Stephen Montgomery-Smith │ +│ │ +│ Redistribution and use in source and binary forms, with or without │ +│ modification, are permitted provided that the following conditions │ +│ are met: │ +│ 1. Redistributions of source code must retain the above copyright │ +│ notice, this list of conditions and the following disclaimer. │ +│ 2. Redistributions in binary form must reproduce the above copyright │ +│ notice, this list of conditions and the following disclaimer in the │ +│ documentation and/or other materials provided with the distribution. │ +│ │ +│ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND │ +│ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE │ +│ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE │ +│ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE │ +│ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL │ +│ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS │ +│ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) │ +│ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT │ +│ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY │ +│ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF │ +│ SUCH DAMAGE. │ +│ │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/tinymath/freebsd.internal.h" +__static_yoink("freebsd_complex_notice"); + +#undef isinf +#define isinf(x) (fabsf(x) == INFINITY) +#undef isnan +#define isnan(x) ((x) != (x)) +#define raise_inexact() do { volatile float _j = 1 + tiny; (void)_j; } while(0) +#undef signbit +#define signbit(x) (__builtin_signbitf(x)) + +static const float +A_crossover = 10, +B_crossover = 0.6417, +FOUR_SQRT_MIN = 0x1p-61, +QUARTER_SQRT_MAX = 0x1p61, +m_e = 2.7182818285e0, /* 0xadf854.0p-22 */ +m_ln2 = 6.9314718056e-1, /* 0xb17218.0p-24 */ +pio2_hi = 1.5707962513e0, /* 0xc90fda.0p-23 */ +RECIP_EPSILON = 1 / FLT_EPSILON, +SQRT_3_EPSILON = 5.9801995673e-4, /* 0x9cc471.0p-34 */ +SQRT_6_EPSILON = 8.4572793338e-4, /* 0xddb3d7.0p-34 */ +SQRT_MIN = 0x1p-63; + +static const volatile float +pio2_lo = 7.5497899549e-8, /* 0xa22169.0p-47 */ +tiny = 0x1p-100; + +static float complex clog_for_large_values(float complex z); + +static inline float +f(float a, float b, float hypot_a_b) +{ + if (b < 0) + return ((hypot_a_b - b) / 2); + if (b == 0) + return (a / 2); + return (a * a / (hypot_a_b + b) / 2); +} + +static inline void +do_hard_work(float x, float y, float *rx, int *B_is_usable, float *B, + float *sqrt_A2my2, float *new_y) +{ + float R, S, A; + float Am1, Amy; + + R = hypotf(x, y + 1); + S = hypotf(x, y - 1); + + A = (R + S) / 2; + if (A < 1) + A = 1; + + if (A < A_crossover) { + if (y == 1 && x < FLT_EPSILON * FLT_EPSILON / 128) { + *rx = sqrtf(x); + } else if (x >= FLT_EPSILON * fabsf(y - 1)) { + Am1 = f(x, 1 + y, R) + f(x, 1 - y, S); + *rx = log1pf(Am1 + sqrtf(Am1 * (A + 1))); + } else if (y < 1) { + *rx = x / sqrtf((1 - y) * (1 + y)); + } else { + *rx = log1pf((y - 1) + sqrtf((y - 1) * (y + 1))); + } + } else { + *rx = logf(A + sqrtf(A * A - 1)); + } + + *new_y = y; + + if (y < FOUR_SQRT_MIN) { + *B_is_usable = 0; + *sqrt_A2my2 = A * (2 / FLT_EPSILON); + *new_y = y * (2 / FLT_EPSILON); + return; + } + + *B = y / A; + *B_is_usable = 1; + + if (*B > B_crossover) { + *B_is_usable = 0; + if (y == 1 && x < FLT_EPSILON / 128) { + *sqrt_A2my2 = sqrtf(x) * sqrtf((A + y) / 2); + } else if (x >= FLT_EPSILON * fabsf(y - 1)) { + Amy = f(x, y + 1, R) + f(x, y - 1, S); + *sqrt_A2my2 = sqrtf(Amy * (A + y)); + } else if (y > 1) { + *sqrt_A2my2 = x * (4 / FLT_EPSILON / FLT_EPSILON) * y / + sqrtf((y + 1) * (y - 1)); + *new_y = y * (4 / FLT_EPSILON / FLT_EPSILON); + } else { + *sqrt_A2my2 = sqrtf((1 - y) * (1 + y)); + } + } +} + +float complex +casinhf(float complex z) +{ + float x, y, ax, ay, rx, ry, B, sqrt_A2my2, new_y; + int B_is_usable; + float complex w; + + x = crealf(z); + y = cimagf(z); + ax = fabsf(x); + ay = fabsf(y); + + if (isnan(x) || isnan(y)) { + if (isinf(x)) + return (CMPLXF(x, y + y)); + if (isinf(y)) + return (CMPLXF(y, x + x)); + if (y == 0) + return (CMPLXF(x + x, y)); + return (CMPLXF(nan_mix(x, y), nan_mix(x, y))); + } + + if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) { + if (signbit(x) == 0) + w = clog_for_large_values(z) + m_ln2; + else + w = clog_for_large_values(-z) + m_ln2; + return (CMPLXF(copysignf(crealf(w), x), + copysignf(cimagf(w), y))); + } + + if (x == 0 && y == 0) + return (z); + + raise_inexact(); + + if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4) + return (z); + + do_hard_work(ax, ay, &rx, &B_is_usable, &B, &sqrt_A2my2, &new_y); + if (B_is_usable) + ry = asinf(B); + else + ry = atan2f(new_y, sqrt_A2my2); + return (CMPLXF(copysignf(rx, x), copysignf(ry, y))); +} + +float complex +casinf(float complex z) +{ + float complex w = casinhf(CMPLXF(cimagf(z), crealf(z))); + + return (CMPLXF(cimagf(w), crealf(w))); +} + +float complex +cacosf(float complex z) +{ + float x, y, ax, ay, rx, ry, B, sqrt_A2mx2, new_x; + int sx, sy; + int B_is_usable; + float complex w; + + x = crealf(z); + y = cimagf(z); + sx = signbit(x); + sy = signbit(y); + ax = fabsf(x); + ay = fabsf(y); + + if (isnan(x) || isnan(y)) { + if (isinf(x)) + return (CMPLXF(y + y, -INFINITY)); + if (isinf(y)) + return (CMPLXF(x + x, -y)); + if (x == 0) + return (CMPLXF(pio2_hi + pio2_lo, y + y)); + return (CMPLXF(nan_mix(x, y), nan_mix(x, y))); + } + + if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) { + w = clog_for_large_values(z); + rx = fabsf(cimagf(w)); + ry = crealf(w) + m_ln2; + if (sy == 0) + ry = -ry; + return (CMPLXF(rx, ry)); + } + + if (x == 1 && y == 0) + return (CMPLXF(0, -y)); + + raise_inexact(); + + if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4) + return (CMPLXF(pio2_hi - (x - pio2_lo), -y)); + + do_hard_work(ay, ax, &ry, &B_is_usable, &B, &sqrt_A2mx2, &new_x); + if (B_is_usable) { + if (sx == 0) + rx = acosf(B); + else + rx = acosf(-B); + } else { + if (sx == 0) + rx = atan2f(sqrt_A2mx2, new_x); + else + rx = atan2f(sqrt_A2mx2, -new_x); + } + if (sy == 0) + ry = -ry; + return (CMPLXF(rx, ry)); +} + +float complex +cacoshf(float complex z) +{ + float complex w; + float rx, ry; + + w = cacosf(z); + rx = crealf(w); + ry = cimagf(w); + if (isnan(rx) && isnan(ry)) + return (CMPLXF(ry, rx)); + if (isnan(rx)) + return (CMPLXF(fabsf(ry), rx)); + if (isnan(ry)) + return (CMPLXF(ry, ry)); + return (CMPLXF(fabsf(ry), copysignf(rx, cimagf(z)))); +} + +static float complex +clog_for_large_values(float complex z) +{ + float x, y; + float ax, ay, t; + + x = crealf(z); + y = cimagf(z); + ax = fabsf(x); + ay = fabsf(y); + if (ax < ay) { + t = ax; + ax = ay; + ay = t; + } + + if (ax > FLT_MAX / 2) + return (CMPLXF(logf(hypotf(x / m_e, y / m_e)) + 1, + atan2f(y, x))); + + if (ax > QUARTER_SQRT_MAX || ay < SQRT_MIN) + return (CMPLXF(logf(hypotf(x, y)), atan2f(y, x))); + + return (CMPLXF(logf(ax * ax + ay * ay) / 2, atan2f(y, x))); +} + +static inline float +sum_squares(float x, float y) +{ + + if (y < SQRT_MIN) + return (x * x); + + return (x * x + y * y); +} + +static inline float +real_part_reciprocal(float x, float y) +{ + float scale; + uint32_t hx, hy; + int32_t ix, iy; + + GET_FLOAT_WORD(hx, x); + ix = hx & 0x7f800000; + GET_FLOAT_WORD(hy, y); + iy = hy & 0x7f800000; +#undef BIAS +#define BIAS (FLT_MAX_EXP - 1) +#define CUTOFF (FLT_MANT_DIG / 2 + 1) + if (ix - iy >= CUTOFF << 23 || isinf(x)) + return (1 / x); + if (iy - ix >= CUTOFF << 23) + return (x / y / y); + if (ix <= (BIAS + FLT_MAX_EXP / 2 - CUTOFF) << 23) + return (x / (x * x + y * y)); + SET_FLOAT_WORD(scale, 0x7f800000 - ix); + x *= scale; + y *= scale; + return (x / (x * x + y * y) * scale); +} + +float complex +catanhf(float complex z) +{ + float x, y, ax, ay, rx, ry; + + x = crealf(z); + y = cimagf(z); + ax = fabsf(x); + ay = fabsf(y); + + if (y == 0 && ax <= 1) + return (CMPLXF(atanhf(x), y)); + + if (x == 0) + return (CMPLXF(x, atanf(y))); + + if (isnan(x) || isnan(y)) { + if (isinf(x)) + return (CMPLXF(copysignf(0, x), y + y)); + if (isinf(y)) + return (CMPLXF(copysignf(0, x), + copysignf(pio2_hi + pio2_lo, y))); + return (CMPLXF(nan_mix(x, y), nan_mix(x, y))); + } + + if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) + return (CMPLXF(real_part_reciprocal(x, y), + copysignf(pio2_hi + pio2_lo, y))); + + if (ax < SQRT_3_EPSILON / 2 && ay < SQRT_3_EPSILON / 2) { + raise_inexact(); + return (z); + } + + if (ax == 1 && ay < FLT_EPSILON) + rx = (m_ln2 - logf(ay)) / 2; + else + rx = log1pf(4 * ax / sum_squares(ax - 1, ay)) / 4; + + if (ax == 1) + ry = atan2f(2, -ay) / 2; + else if (ay < FLT_EPSILON) + ry = atan2f(2 * ay, (1 - ax) * (1 + ax)) / 2; + else + ry = atan2f(2 * ay, (1 - ax) * (1 + ax) - ay * ay) / 2; + + return (CMPLXF(copysignf(rx, x), copysignf(ry, y))); +} + +float complex +catanf(float complex z) +{ + float complex w = catanhf(CMPLXF(cimagf(z), crealf(z))); + + return (CMPLXF(cimagf(w), crealf(w))); +} diff --git a/libc/tinymath/cos.c b/libc/tinymath/cos.c index d1b2b58f4..6933ebac5 100644 --- a/libc/tinymath/cos.c +++ b/libc/tinymath/cos.c @@ -78,7 +78,6 @@ __static_yoink("fdlibm_notice"); /** * Returns cosine of 𝑥. - * @note should take ~5ns */ double cos(double x) { diff --git a/libc/tinymath/cosf.c b/libc/tinymath/cosf.c index 36d3ca2db..0e26909f7 100644 --- a/libc/tinymath/cosf.c +++ b/libc/tinymath/cosf.c @@ -1,9 +1,9 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ +│ Optimized Routines │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -25,92 +25,63 @@ │ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/math.h" -#include "libc/tinymath/complex.internal.h" -#include "libc/tinymath/feval.internal.h" -#include "libc/tinymath/kernel.internal.h" -__static_yoink("freebsd_libm_notice"); -__static_yoink("musl_libc_notice"); -__static_yoink("fdlibm_notice"); - - -/* origin: FreeBSD /usr/src/lib/msun/src/s_cosf.c */ -/* - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - * Optimized by Bruce D. Evans. - */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -/* Small multiples of pi/2 rounded to double precision. */ -static const double -c1pio2 = 1*M_PI_2, /* 0x3FF921FB, 0x54442D18 */ -c2pio2 = 2*M_PI_2, /* 0x400921FB, 0x54442D18 */ -c3pio2 = 3*M_PI_2, /* 0x4012D97C, 0x7F3321D2 */ -c4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */ +#include "libc/tinymath/sincosf.internal.h" +__static_yoink("arm_optimized_routines_notice"); /** - * Returns cosine of 𝑥. - * @note should take about 5ns + * Returns cosine of y. + * + * This is a fast cosf implementation. The worst-case ULP is 0.5607, and + * the maximum relative error is 0.5303 * 2^-23. A single-step range + * reduction is used for small values. Large inputs have their range + * reduced using fast integer arithmetic. + * + * @raise EDOM and FE_INVALID if y is an infinity */ -float cosf(float x) +float +cosf (float y) { - double y; - uint32_t ix; - unsigned n, sign; + double x = y; + double s; + int n; + const sincos_t *p = &__sincosf_table[0]; - GET_FLOAT_WORD(ix, x); - sign = ix >> 31; - ix &= 0x7fffffff; + if (abstop12 (y) < abstop12 (pio4f)) + { + double x2 = x * x; - if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */ - if (ix < 0x39800000) { /* |x| < 2**-12 */ - /* raise inexact if x != 0 */ - FORCE_EVAL(x + 0x1p120f); - return 1.0f; - } - return __cosdf(x); - } - if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */ - if (ix > 0x4016cbe3) /* |x| ~> 3*pi/4 */ - return -__cosdf(sign ? x+c2pio2 : x-c2pio2); - else { - if (sign) - return __sindf(x + c1pio2); - else - return __sindf(c1pio2 - x); - } - } - if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */ - if (ix > 0x40afeddf) /* |x| ~> 7*pi/4 */ - return __cosdf(sign ? x+c4pio2 : x-c4pio2); - else { - if (sign) - return __sindf(-x - c3pio2); - else - return __sindf(x - c3pio2); - } - } + if (unlikely (abstop12 (y) < abstop12 (0x1p-12f))) + return 1.0f; - /* cos(Inf or NaN) is NaN */ - if (ix >= 0x7f800000) - return x-x; + return sinf_poly (x, x2, p, 1); + } + else if (likely (abstop12 (y) < abstop12 (120.0f))) + { + x = reduce_fast (x, p, &n); - /* general argument reduction needed */ - n = __rem_pio2f(x,&y); - switch (n&3) { - case 0: return __cosdf(y); - case 1: return __sindf(-y); - case 2: return -__cosdf(y); - default: - return __sindf(y); - } + /* Setup the signs for sin and cos. */ + s = p->sign[n & 3]; + + if (n & 2) + p = &__sincosf_table[1]; + + return sinf_poly (x * s, x * x, p, n ^ 1); + } + else if (abstop12 (y) < abstop12 (INFINITY)) + { + uint32_t xi = asuint (y); + int sign = xi >> 31; + + x = reduce_large (xi, &n); + + /* Setup signs for sin and cos - include original sign. */ + s = p->sign[(n + sign) & 3]; + + if ((n + sign) & 2) + p = &__sincosf_table[1]; + + return sinf_poly (x * s, x * x, p, n ^ 1); + } + else + return __math_invalidf (y); } diff --git a/libc/tinymath/erf.c b/libc/tinymath/erf.c index a3b976358..cd2948c0a 100644 --- a/libc/tinymath/erf.c +++ b/libc/tinymath/erf.c @@ -1,9 +1,9 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ +│ Optimized Routines │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -25,310 +25,247 @@ │ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/math.h" -__static_yoink("musl_libc_notice"); -__static_yoink("fdlibm_notice"); +#include "libc/tinymath/arm.internal.h" +__static_yoink("arm_optimized_routines_notice"); -/* origin: FreeBSD /usr/src/lib/msun/src/s_erf.c */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ -/* double erf(double x) - * double erfc(double x) - * x - * 2 |\ - * erf(x) = --------- | exp(-t*t)dt - * sqrt(pi) \| - * 0 - * - * erfc(x) = 1-erf(x) - * Note that - * erf(-x) = -erf(x) - * erfc(-x) = 2 - erfc(x) - * - * Method: - * 1. For |x| in [0, 0.84375] - * erf(x) = x + x*R(x^2) - * erfc(x) = 1 - erf(x) if x in [-.84375,0.25] - * = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375] - * where R = P/Q where P is an odd poly of degree 8 and - * Q is an odd poly of degree 10. - * -57.90 - * | R - (erf(x)-x)/x | <= 2 - * - * - * Remark. The formula is derived by noting - * erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....) - * and that - * 2/sqrt(pi) = 1.128379167095512573896158903121545171688 - * is close to one. The interval is chosen because the fix - * point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is - * near 0.6174), and by some experiment, 0.84375 is chosen to - * guarantee the error is less than one ulp for erf. - * - * 2. For |x| in [0.84375,1.25], let s = |x| - 1, and - * c = 0.84506291151 rounded to single (24 bits) - * erf(x) = sign(x) * (c + P1(s)/Q1(s)) - * erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0 - * 1+(c+P1(s)/Q1(s)) if x < 0 - * |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06 - * Remark: here we use the taylor series expansion at x=1. - * erf(1+s) = erf(1) + s*Poly(s) - * = 0.845.. + P1(s)/Q1(s) - * That is, we use rational approximation to approximate - * erf(1+s) - (c = (single)0.84506291151) - * Note that |P1/Q1|< 0.078 for x in [0.84375,1.25] - * where - * P1(s) = degree 6 poly in s - * Q1(s) = degree 6 poly in s - * - * 3. For x in [1.25,1/0.35(~2.857143)], - * erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1) - * erf(x) = 1 - erfc(x) - * where - * R1(z) = degree 7 poly in z, (z=1/x^2) - * S1(z) = degree 8 poly in z - * - * 4. For x in [1/0.35,28] - * erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0 - * = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6 x >= 28 - * erf(x) = sign(x) *(1 - tiny) (raise inexact) - * erfc(x) = tiny*tiny (raise underflow) if x > 0 - * = 2 - tiny if x<0 - * - * 7. Special case: - * erf(0) = 0, erf(inf) = 1, erf(-inf) = -1, - * erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2, - * erfc/erf(NaN) is NaN - */ +#define TwoOverSqrtPiMinusOne 0x1.06eba8214db69p-3 +#define C 0x1.b0ac16p-1 +#define PA __erf_data.erf_poly_A +#define NA __erf_data.erf_ratio_N_A +#define DA __erf_data.erf_ratio_D_A +#define NB __erf_data.erf_ratio_N_B +#define DB __erf_data.erf_ratio_D_B +#define PC __erf_data.erfc_poly_C +#define PD __erf_data.erfc_poly_D +#define PE __erf_data.erfc_poly_E +#define PF __erf_data.erfc_poly_F -static const double -erx = 8.45062911510467529297e-01, /* 0x3FEB0AC1, 0x60000000 */ -/* - * Coefficients for approximation to erf on [0,0.84375] - */ -efx8 = 1.02703333676410069053e+00, /* 0x3FF06EBA, 0x8214DB69 */ -pp0 = 1.28379167095512558561e-01, /* 0x3FC06EBA, 0x8214DB68 */ -pp1 = -3.25042107247001499370e-01, /* 0xBFD4CD7D, 0x691CB913 */ -pp2 = -2.84817495755985104766e-02, /* 0xBF9D2A51, 0xDBD7194F */ -pp3 = -5.77027029648944159157e-03, /* 0xBF77A291, 0x236668E4 */ -pp4 = -2.37630166566501626084e-05, /* 0xBEF8EAD6, 0x120016AC */ -qq1 = 3.97917223959155352819e-01, /* 0x3FD97779, 0xCDDADC09 */ -qq2 = 6.50222499887672944485e-02, /* 0x3FB0A54C, 0x5536CEBA */ -qq3 = 5.08130628187576562776e-03, /* 0x3F74D022, 0xC4D36B0F */ -qq4 = 1.32494738004321644526e-04, /* 0x3F215DC9, 0x221C1A10 */ -qq5 = -3.96022827877536812320e-06, /* 0xBED09C43, 0x42A26120 */ -/* - * Coefficients for approximation to erf in [0.84375,1.25] - */ -pa0 = -2.36211856075265944077e-03, /* 0xBF6359B8, 0xBEF77538 */ -pa1 = 4.14856118683748331666e-01, /* 0x3FDA8D00, 0xAD92B34D */ -pa2 = -3.72207876035701323847e-01, /* 0xBFD7D240, 0xFBB8C3F1 */ -pa3 = 3.18346619901161753674e-01, /* 0x3FD45FCA, 0x805120E4 */ -pa4 = -1.10894694282396677476e-01, /* 0xBFBC6398, 0x3D3E28EC */ -pa5 = 3.54783043256182359371e-02, /* 0x3FA22A36, 0x599795EB */ -pa6 = -2.16637559486879084300e-03, /* 0xBF61BF38, 0x0A96073F */ -qa1 = 1.06420880400844228286e-01, /* 0x3FBB3E66, 0x18EEE323 */ -qa2 = 5.40397917702171048937e-01, /* 0x3FE14AF0, 0x92EB6F33 */ -qa3 = 7.18286544141962662868e-02, /* 0x3FB2635C, 0xD99FE9A7 */ -qa4 = 1.26171219808761642112e-01, /* 0x3FC02660, 0xE763351F */ -qa5 = 1.36370839120290507362e-02, /* 0x3F8BEDC2, 0x6B51DD1C */ -qa6 = 1.19844998467991074170e-02, /* 0x3F888B54, 0x5735151D */ -/* - * Coefficients for approximation to erfc in [1.25,1/0.35] - */ -ra0 = -9.86494403484714822705e-03, /* 0xBF843412, 0x600D6435 */ -ra1 = -6.93858572707181764372e-01, /* 0xBFE63416, 0xE4BA7360 */ -ra2 = -1.05586262253232909814e+01, /* 0xC0251E04, 0x41B0E726 */ -ra3 = -6.23753324503260060396e+01, /* 0xC04F300A, 0xE4CBA38D */ -ra4 = -1.62396669462573470355e+02, /* 0xC0644CB1, 0x84282266 */ -ra5 = -1.84605092906711035994e+02, /* 0xC067135C, 0xEBCCABB2 */ -ra6 = -8.12874355063065934246e+01, /* 0xC0545265, 0x57E4D2F2 */ -ra7 = -9.81432934416914548592e+00, /* 0xC023A0EF, 0xC69AC25C */ -sa1 = 1.96512716674392571292e+01, /* 0x4033A6B9, 0xBD707687 */ -sa2 = 1.37657754143519042600e+02, /* 0x4061350C, 0x526AE721 */ -sa3 = 4.34565877475229228821e+02, /* 0x407B290D, 0xD58A1A71 */ -sa4 = 6.45387271733267880336e+02, /* 0x40842B19, 0x21EC2868 */ -sa5 = 4.29008140027567833386e+02, /* 0x407AD021, 0x57700314 */ -sa6 = 1.08635005541779435134e+02, /* 0x405B28A3, 0xEE48AE2C */ -sa7 = 6.57024977031928170135e+00, /* 0x401A47EF, 0x8E484A93 */ -sa8 = -6.04244152148580987438e-02, /* 0xBFAEEFF2, 0xEE749A62 */ -/* - * Coefficients for approximation to erfc in [1/.35,28] - */ -rb0 = -9.86494292470009928597e-03, /* 0xBF843412, 0x39E86F4A */ -rb1 = -7.99283237680523006574e-01, /* 0xBFE993BA, 0x70C285DE */ -rb2 = -1.77579549177547519889e+01, /* 0xC031C209, 0x555F995A */ -rb3 = -1.60636384855821916062e+02, /* 0xC064145D, 0x43C5ED98 */ -rb4 = -6.37566443368389627722e+02, /* 0xC083EC88, 0x1375F228 */ -rb5 = -1.02509513161107724954e+03, /* 0xC0900461, 0x6A2E5992 */ -rb6 = -4.83519191608651397019e+02, /* 0xC07E384E, 0x9BDC383F */ -sb1 = 3.03380607434824582924e+01, /* 0x403E568B, 0x261D5190 */ -sb2 = 3.25792512996573918826e+02, /* 0x40745CAE, 0x221B9F0A */ -sb3 = 1.53672958608443695994e+03, /* 0x409802EB, 0x189D5118 */ -sb4 = 3.19985821950859553908e+03, /* 0x40A8FFB7, 0x688C246A */ -sb5 = 2.55305040643316442583e+03, /* 0x40A3F219, 0xCEDF3BE6 */ -sb6 = 4.74528541206955367215e+02, /* 0x407DA874, 0xE79FE763 */ -sb7 = -2.24409524465858183362e+01; /* 0xC03670E2, 0x42712D62 */ - -#define asuint(f) ((union{float _f; uint32_t _i;}){f})._i -#define asfloat(i) ((union{uint32_t _i; float _f;}){i})._f -#define asuint64(f) ((union{double _f; uint64_t _i;}){f})._i -#define asdouble(i) ((union{uint64_t _i; double _f;}){i})._f -#define INSERT_WORDS(d,hi,lo) \ -do { \ - (d) = asdouble(((uint64_t)(hi)<<32) | (uint32_t)(lo)); \ -} while (0) -#define GET_HIGH_WORD(hi,d) \ -do { \ - (hi) = asuint64(d) >> 32; \ -} while (0) -#define GET_LOW_WORD(lo,d) \ -do { \ - (lo) = (uint32_t)asuint64(d); \ -} while (0) -#define SET_HIGH_WORD(d,hi) \ - INSERT_WORDS(d, hi, (uint32_t)asuint64(d)) -#define SET_LOW_WORD(d,lo) \ - INSERT_WORDS(d, asuint64(d)>>32, lo) - -static double erfc1(double x) +/* Top 32 bits of a double. */ +static inline uint32_t +top32 (double x) { - double_t s,P,Q; - - s = fabs(x) - 1; - P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6))))); - Q = 1+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6))))); - return 1 - erx - P/Q; -} - -static double erfc2(uint32_t ix, double x) -{ - double_t s,R,S; - double z; - - if (ix < 0x3ff40000) /* |x| < 1.25 */ - return erfc1(x); - - x = fabs(x); - s = 1/(x*x); - if (ix < 0x4006db6d) { /* |x| < 1/.35 ~ 2.85714 */ - R = ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*( - ra5+s*(ra6+s*ra7)))))); - S = 1.0+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*( - sa5+s*(sa6+s*(sa7+s*sa8))))))); - } else { /* |x| > 1/.35 */ - R = rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*( - rb5+s*rb6))))); - S = 1.0+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*( - sb5+s*(sb6+s*sb7)))))); - } - z = x; - SET_LOW_WORD(z,0); - return exp(-z*z-0.5625)*exp((z-x)*(z+x)+R/S)/x; + return asuint64 (x) >> 32; } /** - * Returns error function of 𝑥. + * Returns error function of x. + * + * Highest measured error is 1.01 ULPs at 0x1.39956ac43382fp+0. + * + * @raise ERANGE on underflow */ -double erf(double x) +double +erf (double x) { - double r,s,z,y; - uint32_t ix; - int sign; + /* Get top word and sign. */ + uint32_t ix = top32 (x); + uint32_t ia = ix & 0x7fffffff; + uint32_t sign = ix >> 31; - GET_HIGH_WORD(ix, x); - sign = ix>>31; - ix &= 0x7fffffff; - if (ix >= 0x7ff00000) { - /* erf(nan)=nan, erf(+-inf)=+-1 */ - return 1-2*sign + 1/x; + /* Normalized and subnormal cases */ + if (ia < 0x3feb0000) + { /* a = |x| < 0.84375. */ + + if (ia < 0x3e300000) + { /* a < 2^(-28). */ + if (ia < 0x00800000) + { /* a < 2^(-1015). */ + double y = fma (TwoOverSqrtPiMinusOne, x, x); + return check_uflow (y); + } + return x + TwoOverSqrtPiMinusOne * x; } - if (ix < 0x3feb0000) { /* |x| < 0.84375 */ - if (ix < 0x3e300000) { /* |x| < 2**-28 */ - /* avoid underflow */ - return 0.125*(8*x + efx8*x); - } - z = x*x; - r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4))); - s = 1.0+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5)))); - y = r/s; - return x + x*y; + + double x2 = x * x; + + if (ia < 0x3fe00000) + { /* a < 0.5 - Use polynomial approximation. */ + double r1 = fma (x2, PA[1], PA[0]); + double r2 = fma (x2, PA[3], PA[2]); + double r3 = fma (x2, PA[5], PA[4]); + double r4 = fma (x2, PA[7], PA[6]); + double r5 = fma (x2, PA[9], PA[8]); + double x4 = x2 * x2; + double r = r5; + r = fma (x4, r, r4); + r = fma (x4, r, r3); + r = fma (x4, r, r2); + r = fma (x4, r, r1); + return fma (r, x, x); /* This fma is crucial for accuracy. */ } - if (ix < 0x40180000) /* 0.84375 <= |x| < 6 */ - y = 1 - erfc2(ix,x); - else - y = 1 - 0x1p-1022; - return sign ? -y : y; + else + { /* 0.5 <= a < 0.84375 - Use rational approximation. */ + double x4, x8, r1n, r2n, r1d, r2d, r3d; + + r1n = fma (x2, NA[1], NA[0]); + x4 = x2 * x2; + r2n = fma (x2, NA[3], NA[2]); + x8 = x4 * x4; + r1d = fma (x2, DA[0], 1.0); + r2d = fma (x2, DA[2], DA[1]); + r3d = fma (x2, DA[4], DA[3]); + double P = r1n + x4 * r2n + x8 * NA[4]; + double Q = r1d + x4 * r2d + x8 * r3d; + return fma (P / Q, x, x); + } + } + else if (ia < 0x3ff40000) + { /* 0.84375 <= |x| < 1.25. */ + double a2, a4, a6, r1n, r2n, r3n, r4n, r1d, r2d, r3d, r4d; + double a = fabs (x) - 1.0; + r1n = fma (a, NB[1], NB[0]); + a2 = a * a; + r1d = fma (a, DB[0], 1.0); + a4 = a2 * a2; + r2n = fma (a, NB[3], NB[2]); + a6 = a4 * a2; + r2d = fma (a, DB[2], DB[1]); + r3n = fma (a, NB[5], NB[4]); + r3d = fma (a, DB[4], DB[3]); + r4n = NB[6]; + r4d = DB[5]; + double P = r1n + a2 * r2n + a4 * r3n + a6 * r4n; + double Q = r1d + a2 * r2d + a4 * r3d + a6 * r4d; + if (sign) + return -C - P / Q; + else + return C + P / Q; + } + else if (ia < 0x40000000) + { /* 1.25 <= |x| < 2.0. */ + double a = fabs (x); + a = a - 1.25; + + double r1 = fma (a, PC[1], PC[0]); + double r2 = fma (a, PC[3], PC[2]); + double r3 = fma (a, PC[5], PC[4]); + double r4 = fma (a, PC[7], PC[6]); + double r5 = fma (a, PC[9], PC[8]); + double r6 = fma (a, PC[11], PC[10]); + double r7 = fma (a, PC[13], PC[12]); + double r8 = fma (a, PC[15], PC[14]); + + double a2 = a * a; + + double r = r8; + r = fma (a2, r, r7); + r = fma (a2, r, r6); + r = fma (a2, r, r5); + r = fma (a2, r, r4); + r = fma (a2, r, r3); + r = fma (a2, r, r2); + r = fma (a2, r, r1); + + if (sign) + return -1.0 + r; + else + return 1.0 - r; + } + else if (ia < 0x400a0000) + { /* 2 <= |x| < 3.25. */ + double a = fabs (x); + a = fma (0.5, a, -1.0); + + double r1 = fma (a, PD[1], PD[0]); + double r2 = fma (a, PD[3], PD[2]); + double r3 = fma (a, PD[5], PD[4]); + double r4 = fma (a, PD[7], PD[6]); + double r5 = fma (a, PD[9], PD[8]); + double r6 = fma (a, PD[11], PD[10]); + double r7 = fma (a, PD[13], PD[12]); + double r8 = fma (a, PD[15], PD[14]); + double r9 = fma (a, PD[17], PD[16]); + + double a2 = a * a; + + double r = r9; + r = fma (a2, r, r8); + r = fma (a2, r, r7); + r = fma (a2, r, r6); + r = fma (a2, r, r5); + r = fma (a2, r, r4); + r = fma (a2, r, r3); + r = fma (a2, r, r2); + r = fma (a2, r, r1); + + if (sign) + return -1.0 + r; + else + return 1.0 - r; + } + else if (ia < 0x40100000) + { /* 3.25 <= |x| < 4.0. */ + double a = fabs (x); + a = a - 3.25; + + double r1 = fma (a, PE[1], PE[0]); + double r2 = fma (a, PE[3], PE[2]); + double r3 = fma (a, PE[5], PE[4]); + double r4 = fma (a, PE[7], PE[6]); + double r5 = fma (a, PE[9], PE[8]); + double r6 = fma (a, PE[11], PE[10]); + double r7 = fma (a, PE[13], PE[12]); + + double a2 = a * a; + + double r = r7; + r = fma (a2, r, r6); + r = fma (a2, r, r5); + r = fma (a2, r, r4); + r = fma (a2, r, r3); + r = fma (a2, r, r2); + r = fma (a2, r, r1); + + if (sign) + return -1.0 + r; + else + return 1.0 - r; + } + else if (ia < 0x4017a000) + { /* 4 <= |x| < 5.90625. */ + double a = fabs (x); + a = fma (0.5, a, -2.0); + + double r1 = fma (a, PF[1], PF[0]); + double r2 = fma (a, PF[3], PF[2]); + double r3 = fma (a, PF[5], PF[4]); + double r4 = fma (a, PF[7], PF[6]); + double r5 = fma (a, PF[9], PF[8]); + double r6 = fma (a, PF[11], PF[10]); + double r7 = fma (a, PF[13], PF[12]); + double r8 = fma (a, PF[15], PF[14]); + double r9 = PF[16]; + + double a2 = a * a; + + double r = r9; + r = fma (a2, r, r8); + r = fma (a2, r, r7); + r = fma (a2, r, r6); + r = fma (a2, r, r5); + r = fma (a2, r, r4); + r = fma (a2, r, r3); + r = fma (a2, r, r2); + r = fma (a2, r, r1); + + if (sign) + return -1.0 + r; + else + return 1.0 - r; + } + else + { + /* Special cases : erf(nan)=nan, erf(+inf)=+1 and erf(-inf)=-1. */ + if (unlikely (ia >= 0x7ff00000)) + return (double) (1.0 - (sign << 1)) + 1.0 / x; + + if (sign) + return -1.0; + else + return 1.0; + } } -/** - * Returns complementary error function of 𝑥. - */ -double erfc(double x) -{ - double r,s,z,y; - uint32_t ix; - int sign; - - GET_HIGH_WORD(ix, x); - sign = ix>>31; - ix &= 0x7fffffff; - if (ix >= 0x7ff00000) { - /* erfc(nan)=nan, erfc(+-inf)=0,2 */ - return 2*sign + 1/x; - } - if (ix < 0x3feb0000) { /* |x| < 0.84375 */ - if (ix < 0x3c700000) /* |x| < 2**-56 */ - return 1.0 - x; - z = x*x; - r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4))); - s = 1.0+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5)))); - y = r/s; - if (sign || ix < 0x3fd00000) { /* x < 1/4 */ - return 1.0 - (x+x*y); - } - return 0.5 - (x - 0.5 + x*y); - } - if (ix < 0x403c0000) { /* 0.84375 <= |x| < 28 */ - return sign ? 2 - erfc2(ix,x) : erfc2(ix,x); - } - return sign ? 2 - 0x1p-1022 : 0x1p-1022*0x1p-1022; -} - -#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +#if LDBL_MANT_DIG == 53 __weak_reference(erf, erfl); -__weak_reference(erfc, erfcl); #endif diff --git a/libc/tinymath/erf_data.c b/libc/tinymath/erf_data.c new file mode 100644 index 000000000..bfe7e84db --- /dev/null +++ b/libc/tinymath/erf_data.c @@ -0,0 +1,105 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ +╚──────────────────────────────────────────────────────────────────────────────╝ +│ │ +│ Optimized Routines │ +│ Copyright (c) 2018-2024, Arm Limited. │ +│ │ +│ Permission is hereby granted, free of charge, to any person obtaining │ +│ a copy of this software and associated documentation files (the │ +│ "Software"), to deal in the Software without restriction, including │ +│ without limitation the rights to use, copy, modify, merge, publish, │ +│ distribute, sublicense, and/or sell copies of the Software, and to │ +│ permit persons to whom the Software is furnished to do so, subject to │ +│ the following conditions: │ +│ │ +│ The above copyright notice and this permission notice shall be │ +│ included in all copies or substantial portions of the Software. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ +│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ +│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ +│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ +│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ +│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ +│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ +│ │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/tinymath/arm.internal.h" +__static_yoink("arm_optimized_routines_notice"); + +/* +Minimax approximation of erf +*/ +const struct erf_data __erf_data = { +.erf_poly_A = { +#if ERF_POLY_A_NCOEFFS == 10 +0x1.06eba8214db68p-3, -0x1.812746b037948p-2, 0x1.ce2f21a03872p-4, +-0x1.b82ce30e6548p-6, 0x1.565bcc360a2f2p-8, -0x1.c02d812bc979ap-11, +0x1.f99bddfc1ebe9p-14, -0x1.f42c457cee912p-17, 0x1.b0e414ec20ee9p-20, +-0x1.18c47fd143c5ep-23 +#endif +}, +/* Rational approximation on [0x1p-28, 0.84375] */ +.erf_ratio_N_A = { +0x1.06eba8214db68p-3, -0x1.4cd7d691cb913p-2, -0x1.d2a51dbd7194fp-6, +-0x1.7a291236668e4p-8, -0x1.8ead6120016acp-16 +}, +.erf_ratio_D_A = { +0x1.97779cddadc09p-2, 0x1.0a54c5536cebap-4, 0x1.4d022c4d36b0fp-8, +0x1.15dc9221c1a1p-13, -0x1.09c4342a2612p-18 +}, +/* Rational approximation on [0.84375, 1.25] */ +.erf_ratio_N_B = { +-0x1.359b8bef77538p-9, 0x1.a8d00ad92b34dp-2, -0x1.7d240fbb8c3f1p-2, +0x1.45fca805120e4p-2, -0x1.c63983d3e28ecp-4, 0x1.22a36599795ebp-5, +-0x1.1bf380a96073fp-9 +}, +.erf_ratio_D_B = { +0x1.b3e6618eee323p-4, 0x1.14af092eb6f33p-1, 0x1.2635cd99fe9a7p-4, +0x1.02660e763351fp-3, 0x1.bedc26b51dd1cp-7, 0x1.88b545735151dp-7 +}, +.erfc_poly_C = { +#if ERFC_POLY_C_NCOEFFS == 16 +/* Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=15 a=1.25 b=2 c=1 d=1.25 */ +0x1.3bcd133aa0ffcp-4, -0x1.e4652fadcb702p-3, 0x1.2ebf3dcca0446p-2, +-0x1.571d01c62d66p-3, 0x1.93a9a8f5b3413p-8, 0x1.8281cbcc2cd52p-5, +-0x1.5cffd86b4de16p-6, -0x1.db4ccf595053ep-9, 0x1.757cbf8684edap-8, +-0x1.ce7dfd2a9e56ap-11, -0x1.99ee3bc5a3263p-11, 0x1.3c57cf9213f5fp-12, +0x1.60692996bf254p-14, -0x1.6e44cb7c1fa2ap-14, 0x1.9d4484ac482b2p-16, +-0x1.578c9e375d37p-19 +#endif +}, +.erfc_poly_D = { +#if ERFC_POLY_D_NCOEFFS == 18 +/* Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=17 a=2 b=3.25 c=2 d=2 */ +0x1.328f5ec350e5p-8, -0x1.529b9e8cf8e99p-5, 0x1.529b9e8cd9e71p-3, +-0x1.8b0ae3a023bf2p-2, 0x1.1a2c592599d82p-1, -0x1.ace732477e494p-2, +-0x1.e1a06a27920ffp-6, 0x1.bae92a6d27af6p-2, -0x1.a15470fcf5ce7p-2, +0x1.bafe45d18e213p-6, 0x1.0d950680d199ap-2, -0x1.8c9481e8f22e3p-3, +-0x1.158450ed5c899p-4, 0x1.c01f2973b44p-3, -0x1.73ed2827546a7p-3, +0x1.47733687d1ff7p-4, -0x1.2dec70d00b8e1p-6, 0x1.a947ab83cd4fp-10 +#endif +}, +.erfc_poly_E = { +#if ERFC_POLY_E_NCOEFFS == 14 +/* Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=13 a=3.25 b=4 c=1 d=3.25 */ +0x1.20c13035539e4p-18, -0x1.e9b5e8d16df7ep-16, 0x1.8de3cd4733bf9p-14, +-0x1.9aa48beb8382fp-13, 0x1.2c7d713370a9fp-12, -0x1.490b12110b9e2p-12, +0x1.1459c5d989d23p-12, -0x1.64b28e9f1269p-13, 0x1.57c76d9d05cf8p-14, +-0x1.bf271d9951cf8p-16, 0x1.db7ea4d4535c9p-19, 0x1.91c2e102d5e49p-20, +-0x1.e9f0826c2149ep-21, 0x1.60eebaea236e1p-23 +#endif +}, +.erfc_poly_F = { +#if ERFC_POLY_F_NCOEFFS == 17 +/* Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=16 a=4 b=5.90625 c=2 d=4 */ +0x1.08ddd130d1fa6p-26, -0x1.10b146f59ff06p-22, 0x1.10b135328b7b2p-19, +-0x1.6039988e7575fp-17, 0x1.497d365e19367p-15, -0x1.da48d9afac83ep-14, +0x1.1024c9b1fbb48p-12, -0x1.fc962e7066272p-12, 0x1.87297282d4651p-11, +-0x1.f057b255f8c59p-11, 0x1.0228d0eee063p-10, -0x1.b1b21b84ec41cp-11, +0x1.1ead8ae9e1253p-11, -0x1.1e708fba37fccp-12, 0x1.9559363991edap-14, +-0x1.68c827b783d9cp-16, 0x1.2ec4adeccf4a2p-19 +#endif +} +}; diff --git a/libc/tinymath/erfc.c b/libc/tinymath/erfc.c new file mode 100644 index 000000000..29a87f891 --- /dev/null +++ b/libc/tinymath/erfc.c @@ -0,0 +1,279 @@ +/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ +│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +╚──────────────────────────────────────────────────────────────────────────────╝ +│ │ +│ Copyright (c) 1992-2024 The FreeBSD Project │ +│ │ +│ Redistribution and use in source and binary forms, with or without │ +│ modification, are permitted provided that the following conditions │ +│ are met: │ +│ 1. Redistributions of source code must retain the above copyright │ +│ notice, this list of conditions and the following disclaimer. │ +│ 2. Redistributions in binary form must reproduce the above copyright │ +│ notice, this list of conditions and the following disclaimer in the │ +│ documentation and/or other materials provided with the distribution. │ +│ │ +│ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND │ +│ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE │ +│ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE │ +│ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE │ +│ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL │ +│ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS │ +│ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) │ +│ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT │ +│ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY │ +│ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF │ +│ SUCH DAMAGE. │ +│ │ +│ Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. │ +│ │ +│ Developed at SunPro, a Sun Microsystems, Inc. business. │ +│ Permission to use, copy, modify, and distribute this │ +│ software is freely granted, provided that this notice │ +│ is preserved. │ +│ │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/tinymath/freebsd.internal.h" +__static_yoink("freebsd_libm_notice"); +__static_yoink("fdlibm_notice"); + +/* double erf(double x) + * double erfc(double x) + * x + * 2 |\ + * erf(x) = --------- | exp(-t*t)dt + * sqrt(pi) \| + * 0 + * + * erfc(x) = 1-erf(x) + * Note that + * erf(-x) = -erf(x) + * erfc(-x) = 2 - erfc(x) + * + * Method: + * 1. For |x| in [0, 0.84375] + * erf(x) = x + x*R(x^2) + * erfc(x) = 1 - erf(x) if x in [-.84375,0.25] + * = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375] + * where R = P/Q where P is an odd poly of degree 8 and + * Q is an odd poly of degree 10. + * -57.90 + * | R - (erf(x)-x)/x | <= 2 + * + * + * Remark. The formula is derived by noting + * erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....) + * and that + * 2/sqrt(pi) = 1.128379167095512573896158903121545171688 + * is close to one. The interval is chosen because the fix + * point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is + * near 0.6174), and by some experiment, 0.84375 is chosen to + * guarantee the error is less than one ulp for erf. + * + * 2. For |x| in [0.84375,1.25], let s = |x| - 1, and + * c = 0.84506291151 rounded to single (24 bits) + * erf(x) = sign(x) * (c + P1(s)/Q1(s)) + * erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0 + * 1+(c+P1(s)/Q1(s)) if x < 0 + * |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06 + * Remark: here we use the taylor series expansion at x=1. + * erf(1+s) = erf(1) + s*Poly(s) + * = 0.845.. + P1(s)/Q1(s) + * That is, we use rational approximation to approximate + * erf(1+s) - (c = (single)0.84506291151) + * Note that |P1/Q1|< 0.078 for x in [0.84375,1.25] + * where + * P1(s) = degree 6 poly in s + * Q1(s) = degree 6 poly in s + * + * 3. For x in [1.25,1/0.35(~2.857143)], + * erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1) + * erf(x) = 1 - erfc(x) + * where + * R1(z) = degree 7 poly in z, (z=1/x^2) + * S1(z) = degree 8 poly in z + * + * 4. For x in [1/0.35,28] + * erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0 + * = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6 x >= 28 + * erf(x) = sign(x) *(1 - tiny) (raise inexact) + * erfc(x) = tiny*tiny (raise underflow) if x > 0 + * = 2 - tiny if x<0 + * + * 7. Special case: + * erf(0) = 0, erf(inf) = 1, erf(-inf) = -1, + * erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2, + * erfc/erf(NaN) is NaN + */ + +/* XXX Prevent compilers from erroneously constant folding: */ +static const volatile double tiny= 1e-300; + +static const double +half= 0.5, +one = 1, +two = 2, +/* c = (float)0.84506291151 */ +erx = 8.45062911510467529297e-01, /* 0x3FEB0AC1, 0x60000000 */ +/* + * In the domain [0, 2**-28], only the first term in the power series + * expansion of erf(x) is used. The magnitude of the first neglected + * terms is less than 2**-84. + */ +efx = 1.28379167095512586316e-01, /* 0x3FC06EBA, 0x8214DB69 */ +efx8= 1.02703333676410069053e+00, /* 0x3FF06EBA, 0x8214DB69 */ +/* + * Coefficients for approximation to erf on [0,0.84375] + */ +pp0 = 1.28379167095512558561e-01, /* 0x3FC06EBA, 0x8214DB68 */ +pp1 = -3.25042107247001499370e-01, /* 0xBFD4CD7D, 0x691CB913 */ +pp2 = -2.84817495755985104766e-02, /* 0xBF9D2A51, 0xDBD7194F */ +pp3 = -5.77027029648944159157e-03, /* 0xBF77A291, 0x236668E4 */ +pp4 = -2.37630166566501626084e-05, /* 0xBEF8EAD6, 0x120016AC */ +qq1 = 3.97917223959155352819e-01, /* 0x3FD97779, 0xCDDADC09 */ +qq2 = 6.50222499887672944485e-02, /* 0x3FB0A54C, 0x5536CEBA */ +qq3 = 5.08130628187576562776e-03, /* 0x3F74D022, 0xC4D36B0F */ +qq4 = 1.32494738004321644526e-04, /* 0x3F215DC9, 0x221C1A10 */ +qq5 = -3.96022827877536812320e-06, /* 0xBED09C43, 0x42A26120 */ +/* + * Coefficients for approximation to erf in [0.84375,1.25] + */ +pa0 = -2.36211856075265944077e-03, /* 0xBF6359B8, 0xBEF77538 */ +pa1 = 4.14856118683748331666e-01, /* 0x3FDA8D00, 0xAD92B34D */ +pa2 = -3.72207876035701323847e-01, /* 0xBFD7D240, 0xFBB8C3F1 */ +pa3 = 3.18346619901161753674e-01, /* 0x3FD45FCA, 0x805120E4 */ +pa4 = -1.10894694282396677476e-01, /* 0xBFBC6398, 0x3D3E28EC */ +pa5 = 3.54783043256182359371e-02, /* 0x3FA22A36, 0x599795EB */ +pa6 = -2.16637559486879084300e-03, /* 0xBF61BF38, 0x0A96073F */ +qa1 = 1.06420880400844228286e-01, /* 0x3FBB3E66, 0x18EEE323 */ +qa2 = 5.40397917702171048937e-01, /* 0x3FE14AF0, 0x92EB6F33 */ +qa3 = 7.18286544141962662868e-02, /* 0x3FB2635C, 0xD99FE9A7 */ +qa4 = 1.26171219808761642112e-01, /* 0x3FC02660, 0xE763351F */ +qa5 = 1.36370839120290507362e-02, /* 0x3F8BEDC2, 0x6B51DD1C */ +qa6 = 1.19844998467991074170e-02, /* 0x3F888B54, 0x5735151D */ +/* + * Coefficients for approximation to erfc in [1.25,1/0.35] + */ +ra0 = -9.86494403484714822705e-03, /* 0xBF843412, 0x600D6435 */ +ra1 = -6.93858572707181764372e-01, /* 0xBFE63416, 0xE4BA7360 */ +ra2 = -1.05586262253232909814e+01, /* 0xC0251E04, 0x41B0E726 */ +ra3 = -6.23753324503260060396e+01, /* 0xC04F300A, 0xE4CBA38D */ +ra4 = -1.62396669462573470355e+02, /* 0xC0644CB1, 0x84282266 */ +ra5 = -1.84605092906711035994e+02, /* 0xC067135C, 0xEBCCABB2 */ +ra6 = -8.12874355063065934246e+01, /* 0xC0545265, 0x57E4D2F2 */ +ra7 = -9.81432934416914548592e+00, /* 0xC023A0EF, 0xC69AC25C */ +sa1 = 1.96512716674392571292e+01, /* 0x4033A6B9, 0xBD707687 */ +sa2 = 1.37657754143519042600e+02, /* 0x4061350C, 0x526AE721 */ +sa3 = 4.34565877475229228821e+02, /* 0x407B290D, 0xD58A1A71 */ +sa4 = 6.45387271733267880336e+02, /* 0x40842B19, 0x21EC2868 */ +sa5 = 4.29008140027567833386e+02, /* 0x407AD021, 0x57700314 */ +sa6 = 1.08635005541779435134e+02, /* 0x405B28A3, 0xEE48AE2C */ +sa7 = 6.57024977031928170135e+00, /* 0x401A47EF, 0x8E484A93 */ +sa8 = -6.04244152148580987438e-02, /* 0xBFAEEFF2, 0xEE749A62 */ +/* + * Coefficients for approximation to erfc in [1/.35,28] + */ +rb0 = -9.86494292470009928597e-03, /* 0xBF843412, 0x39E86F4A */ +rb1 = -7.99283237680523006574e-01, /* 0xBFE993BA, 0x70C285DE */ +rb2 = -1.77579549177547519889e+01, /* 0xC031C209, 0x555F995A */ +rb3 = -1.60636384855821916062e+02, /* 0xC064145D, 0x43C5ED98 */ +rb4 = -6.37566443368389627722e+02, /* 0xC083EC88, 0x1375F228 */ +rb5 = -1.02509513161107724954e+03, /* 0xC0900461, 0x6A2E5992 */ +rb6 = -4.83519191608651397019e+02, /* 0xC07E384E, 0x9BDC383F */ +sb1 = 3.03380607434824582924e+01, /* 0x403E568B, 0x261D5190 */ +sb2 = 3.25792512996573918826e+02, /* 0x40745CAE, 0x221B9F0A */ +sb3 = 1.53672958608443695994e+03, /* 0x409802EB, 0x189D5118 */ +sb4 = 3.19985821950859553908e+03, /* 0x40A8FFB7, 0x688C246A */ +sb5 = 2.55305040643316442583e+03, /* 0x40A3F219, 0xCEDF3BE6 */ +sb6 = 4.74528541206955367215e+02, /* 0x407DA874, 0xE79FE763 */ +sb7 = -2.24409524465858183362e+01; /* 0xC03670E2, 0x42712D62 */ + +/** + * Returns complementary error function of x, i.e. 1.0 - erf(x). + */ +double +erfc(double x) +{ + int32_t hx,ix; + double R,S,P,Q,s,y,z,r; + GET_HIGH_WORD(hx,x); + ix = hx&0x7fffffff; + if(ix>=0x7ff00000) { /* erfc(nan)=nan */ + /* erfc(+-inf)=0,2 */ + return (double)(((uint32_t)hx>>31)<<1)+one/x; + } + + if(ix < 0x3feb0000) { /* |x|<0.84375 */ + if(ix < 0x3c700000) /* |x|<2**-56 */ + return one-x; + z = x*x; + r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4))); + s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5)))); + y = r/s; + if(hx < 0x3fd00000) { /* x<1/4 */ + return one-(x+x*y); + } else { + r = x*y; + r += (x-half); + return half - r ; + } + } + if(ix < 0x3ff40000) { /* 0.84375 <= |x| < 1.25 */ + s = fabs(x)-one; + P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6))))); + Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6))))); + if(hx>=0) { + z = one-erx; return z - P/Q; + } else { + z = erx+P/Q; return one+z; + } + } + if (ix < 0x403c0000) { /* |x|<28 */ + x = fabs(x); + s = one/(x*x); + if(ix< 0x4006DB6D) { /* |x| < 1/.35 ~ 2.857143*/ + R=ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(ra5+s*(ra6+s*ra7)))))); + S=one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(sa5+s*(sa6+s*(sa7+ + s*sa8))))))); + } else { /* |x| >= 1/.35 ~ 2.857143 */ + if(hx<0&&ix>=0x40180000) return two-tiny;/* x < -6 */ + R=rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(rb5+s*rb6))))); + S=one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(sb5+s*(sb6+s*sb7)))))); + } + z = x; + SET_LOW_WORD(z,0); + r = exp(-z*z-0.5625)*exp((z-x)*(z+x)+R/S); + if(hx>0) return r/x; else return two-r/x; + } else { + if(hx>0) return tiny*tiny; else return two-tiny; + } +} + +#if LDBL_MANT_DIG == 53 +__weak_reference(erfc, erfcl); +#endif diff --git a/libc/tinymath/erff.c b/libc/tinymath/erff.c index 8e7d60006..ae29ebef1 100644 --- a/libc/tinymath/erff.c +++ b/libc/tinymath/erff.c @@ -1,9 +1,9 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ -│ Musl Libc │ -│ Copyright © 2005-2020 Rich Felker, et al. │ +│ Optimized Routines │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -25,189 +25,99 @@ │ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/math.h" -__static_yoink("freebsd_libm_notice"); -__static_yoink("musl_libc_notice"); -__static_yoink("fdlibm_notice"); +#include "libc/tinymath/arm.internal.h" +__static_yoink("arm_optimized_routines_notice"); -/* origin: FreeBSD /usr/src/lib/msun/src/s_erff.c */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ +#define TwoOverSqrtPiMinusOne 0x1.06eba8p-3f +#define A __erff_data.erff_poly_A +#define B __erff_data.erff_poly_B -#define asuint(f) ((union{float _f; uint32_t _i;}){f})._i -#define asfloat(i) ((union{uint32_t _i; float _f;}){i})._f - -static const float -erx = 8.4506291151e-01, /* 0x3f58560b */ -/* - * Coefficients for approximation to erf on [0,0.84375] - */ -efx8 = 1.0270333290e+00, /* 0x3f8375d4 */ -pp0 = 1.2837916613e-01, /* 0x3e0375d4 */ -pp1 = -3.2504209876e-01, /* 0xbea66beb */ -pp2 = -2.8481749818e-02, /* 0xbce9528f */ -pp3 = -5.7702702470e-03, /* 0xbbbd1489 */ -pp4 = -2.3763017452e-05, /* 0xb7c756b1 */ -qq1 = 3.9791721106e-01, /* 0x3ecbbbce */ -qq2 = 6.5022252500e-02, /* 0x3d852a63 */ -qq3 = 5.0813062117e-03, /* 0x3ba68116 */ -qq4 = 1.3249473704e-04, /* 0x390aee49 */ -qq5 = -3.9602282413e-06, /* 0xb684e21a */ -/* - * Coefficients for approximation to erf in [0.84375,1.25] - */ -pa0 = -2.3621185683e-03, /* 0xbb1acdc6 */ -pa1 = 4.1485610604e-01, /* 0x3ed46805 */ -pa2 = -3.7220788002e-01, /* 0xbebe9208 */ -pa3 = 3.1834661961e-01, /* 0x3ea2fe54 */ -pa4 = -1.1089469492e-01, /* 0xbde31cc2 */ -pa5 = 3.5478305072e-02, /* 0x3d1151b3 */ -pa6 = -2.1663755178e-03, /* 0xbb0df9c0 */ -qa1 = 1.0642088205e-01, /* 0x3dd9f331 */ -qa2 = 5.4039794207e-01, /* 0x3f0a5785 */ -qa3 = 7.1828655899e-02, /* 0x3d931ae7 */ -qa4 = 1.2617121637e-01, /* 0x3e013307 */ -qa5 = 1.3637083583e-02, /* 0x3c5f6e13 */ -qa6 = 1.1984500103e-02, /* 0x3c445aa3 */ -/* - * Coefficients for approximation to erfc in [1.25,1/0.35] - */ -ra0 = -9.8649440333e-03, /* 0xbc21a093 */ -ra1 = -6.9385856390e-01, /* 0xbf31a0b7 */ -ra2 = -1.0558626175e+01, /* 0xc128f022 */ -ra3 = -6.2375331879e+01, /* 0xc2798057 */ -ra4 = -1.6239666748e+02, /* 0xc322658c */ -ra5 = -1.8460508728e+02, /* 0xc3389ae7 */ -ra6 = -8.1287437439e+01, /* 0xc2a2932b */ -ra7 = -9.8143291473e+00, /* 0xc11d077e */ -sa1 = 1.9651271820e+01, /* 0x419d35ce */ -sa2 = 1.3765776062e+02, /* 0x4309a863 */ -sa3 = 4.3456588745e+02, /* 0x43d9486f */ -sa4 = 6.4538726807e+02, /* 0x442158c9 */ -sa5 = 4.2900814819e+02, /* 0x43d6810b */ -sa6 = 1.0863500214e+02, /* 0x42d9451f */ -sa7 = 6.5702495575e+00, /* 0x40d23f7c */ -sa8 = -6.0424413532e-02, /* 0xbd777f97 */ -/* - * Coefficients for approximation to erfc in [1/.35,28] - */ -rb0 = -9.8649431020e-03, /* 0xbc21a092 */ -rb1 = -7.9928326607e-01, /* 0xbf4c9dd4 */ -rb2 = -1.7757955551e+01, /* 0xc18e104b */ -rb3 = -1.6063638306e+02, /* 0xc320a2ea */ -rb4 = -6.3756646729e+02, /* 0xc41f6441 */ -rb5 = -1.0250950928e+03, /* 0xc480230b */ -rb6 = -4.8351919556e+02, /* 0xc3f1c275 */ -sb1 = 3.0338060379e+01, /* 0x41f2b459 */ -sb2 = 3.2579251099e+02, /* 0x43a2e571 */ -sb3 = 1.5367296143e+03, /* 0x44c01759 */ -sb4 = 3.1998581543e+03, /* 0x4547fdbb */ -sb5 = 2.5530502930e+03, /* 0x451f90ce */ -sb6 = 4.7452853394e+02, /* 0x43ed43a7 */ -sb7 = -2.2440952301e+01; /* 0xc1b38712 */ - -static float erfc1(float x) +/* Top 12 bits of a float. */ +static inline uint32_t +top12 (float x) { - float_t s,P,Q; - - s = fabsf(x) - 1; - P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6))))); - Q = 1+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6))))); - return 1 - erx - P/Q; + return asuint (x) >> 20; } -static float erfc2(uint32_t ix, float x) +/* Efficient implementation of erff + using either a pure polynomial approximation or + the exponential of a polynomial. + Worst-case error is 1.09ulps at 0x1.c111acp-1. */ +float +erff (float x) { - float_t s,R,S; - float z; + float r, x2, u; - if (ix < 0x3fa00000) /* |x| < 1.25 */ - return erfc1(x); + /* Get top word. */ + uint32_t ix = asuint (x); + uint32_t sign = ix >> 31; + uint32_t ia12 = top12 (x) & 0x7ff; - x = fabsf(x); - s = 1/(x*x); - if (ix < 0x4036db6d) { /* |x| < 1/0.35 */ - R = ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*( - ra5+s*(ra6+s*ra7)))))); - S = 1.0f+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*( - sa5+s*(sa6+s*(sa7+s*sa8))))))); - } else { /* |x| >= 1/0.35 */ - R = rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*( - rb5+s*rb6))))); - S = 1.0f+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*( - sb5+s*(sb6+s*sb7)))))); + /* Limit of both intervals is 0.875 for performance reasons but coefficients + computed on [0.0, 0.921875] and [0.921875, 4.0], which brought accuracy + from 0.94 to 1.1ulps. */ + if (ia12 < 0x3f6) + { /* a = |x| < 0.875. */ + + /* Tiny and subnormal cases. */ + if (unlikely (ia12 < 0x318)) + { /* |x| < 2^(-28). */ + if (unlikely (ia12 < 0x040)) + { /* |x| < 2^(-119). */ + float y = fmaf (TwoOverSqrtPiMinusOne, x, x); + return check_uflowf (y); + } + return x + TwoOverSqrtPiMinusOne * x; } - ix = asuint(x); - z = asfloat(ix&0xffffe000); - return expf(-z*z - 0.5625f) * expf((z-x)*(z+x) + R/S)/x; -} - -float erff(float x) -{ - float r,s,z,y; - uint32_t ix; - int sign; - - ix = asuint(x); - sign = ix>>31; - ix &= 0x7fffffff; - if (ix >= 0x7f800000) { - /* erf(nan)=nan, erf(+-inf)=+-1 */ - return 1-2*sign + 1/x; - } - if (ix < 0x3f580000) { /* |x| < 0.84375 */ - if (ix < 0x31800000) { /* |x| < 2**-28 */ - /*avoid underflow */ - return 0.125f*(8*x + efx8*x); - } - z = x*x; - r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4))); - s = 1+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5)))); - y = r/s; - return x + x*y; - } - if (ix < 0x40c00000) /* |x| < 6 */ - y = 1 - erfc2(ix,x); - else - y = 1 - 0x1p-120f; - return sign ? -y : y; -} - -float erfcf(float x) -{ - float r,s,z,y; - uint32_t ix; - int sign; - - ix = asuint(x); - sign = ix>>31; - ix &= 0x7fffffff; - if (ix >= 0x7f800000) { - /* erfc(nan)=nan, erfc(+-inf)=0,2 */ - return 2*sign + 1/x; - } - - if (ix < 0x3f580000) { /* |x| < 0.84375 */ - if (ix < 0x23800000) /* |x| < 2**-56 */ - return 1.0f - x; - z = x*x; - r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4))); - s = 1.0f+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5)))); - y = r/s; - if (sign || ix < 0x3e800000) /* x < 1/4 */ - return 1.0f - (x+x*y); - return 0.5f - (x - 0.5f + x*y); - } - if (ix < 0x41e00000) { /* |x| < 28 */ - return sign ? 2 - erfc2(ix,x) : erfc2(ix,x); - } - return sign ? 2 - 0x1p-120f : 0x1p-120f*0x1p-120f; + + x2 = x * x; + + /* Normalized cases (|x| < 0.921875). Use Horner scheme for x+x*P(x^2). */ + r = A[5]; + r = fmaf (r, x2, A[4]); + r = fmaf (r, x2, A[3]); + r = fmaf (r, x2, A[2]); + r = fmaf (r, x2, A[1]); + r = fmaf (r, x2, A[0]); + r = fmaf (r, x, x); + } + else if (ia12 < 0x408) + { /* |x| < 4.0 - Use a custom Estrin scheme. */ + + float a = fabsf (x); + /* Start with Estrin scheme on high order (small magnitude) coefficients. */ + r = fmaf (B[6], a, B[5]); + u = fmaf (B[4], a, B[3]); + x2 = x * x; + r = fmaf (r, x2, u); + /* Then switch to pure Horner scheme. */ + r = fmaf (r, a, B[2]); + r = fmaf (r, a, B[1]); + r = fmaf (r, a, B[0]); + r = fmaf (r, a, a); + /* Single precision exponential with ~0.5ulps, + ensures erff has max. rel. error + < 1ulp on [0.921875, 4.0], + < 1.1ulps on [0.875, 4.0]. */ + r = expf (-r); + /* Explicit copysign (calling copysignf increases latency). */ + if (sign) + r = -1.0f + r; + else + r = 1.0f - r; + } + else + { /* |x| >= 4.0. */ + + /* Special cases : erff(nan)=nan, erff(+inf)=+1 and erff(-inf)=-1. */ + if (unlikely (ia12 >= 0x7f8)) + return (1.f - (float) ((ix >> 31) << 1)) + 1.f / x; + + /* Explicit copysign (calling copysignf increases latency). */ + if (sign) + r = -1.0f; + else + r = 1.0f; + } + return r; } diff --git a/libc/tinymath/log1pf_data.c b/libc/tinymath/erff_data.c similarity index 83% rename from libc/tinymath/log1pf_data.c rename to libc/tinymath/erff_data.c index abcc21bef..6ae973f28 100644 --- a/libc/tinymath/log1pf_data.c +++ b/libc/tinymath/erff_data.c @@ -1,9 +1,9 @@ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│ vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8 :vi │ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -25,12 +25,19 @@ │ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/tinymath/log1pf_data.internal.h" +#include "libc/tinymath/arm.internal.h" __static_yoink("arm_optimized_routines_notice"); -/* Polynomial coefficients generated using floating-point minimax - algorithm, see tools/log1pf.sollya for details. */ -const struct log1pf_data __log1pf_data - = {.coeffs = {-0x1p-1f, 0x1.5555aap-2f, -0x1.000038p-2f, 0x1.99675cp-3f, - -0x1.54ef78p-3f, 0x1.28a1f4p-3f, -0x1.0da91p-3f, 0x1.abcb6p-4f, - -0x1.6f0d5ep-5f}}; +/* Minimax approximation of erff. */ +const struct erff_data __erff_data = { +.erff_poly_A = { +0x1.06eba6p-03f, -0x1.8126e0p-02f, 0x1.ce1a46p-04f, +-0x1.b68bd2p-06f, 0x1.473f48p-08f, -0x1.3a1a82p-11f +}, +.erff_poly_B = { +0x1.079d0cp-3f, 0x1.450aa0p-1f, 0x1.b55cb0p-4f, +-0x1.8d6300p-6f, 0x1.fd1336p-9f, -0x1.91d2ccp-12f, +0x1.222900p-16f +} +}; + diff --git a/libc/tinymath/exp.c b/libc/tinymath/exp.c index 4d6cf8541..d0956ec2d 100644 --- a/libc/tinymath/exp.c +++ b/libc/tinymath/exp.c @@ -1,9 +1,9 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -25,19 +25,9 @@ │ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/intrin/likely.h" -#include "libc/math.h" -#include "libc/tinymath/exp_data.internal.h" -#include "libc/tinymath/internal.h" +#include "libc/tinymath/arm.internal.h" __static_yoink("arm_optimized_routines_notice"); -/* - * Double-precision e^x function. - * - * Copyright (c) 2018, Arm Limited. - * SPDX-License-Identifier: MIT - */ - #define N (1 << EXP_TABLE_BITS) #define InvLn2N __exp_data.invln2N #define NegLn2hiN __exp_data.negln2hiN @@ -48,6 +38,7 @@ __static_yoink("arm_optimized_routines_notice"); #define C3 __exp_data.poly[6 - EXP_POLY_ORDER] #define C4 __exp_data.poly[7 - EXP_POLY_ORDER] #define C5 __exp_data.poly[8 - EXP_POLY_ORDER] +#define C6 __exp_data.poly[9 - EXP_POLY_ORDER] /* Handle cases that may overflow or underflow when computing the result that is scale*(1+TMP) without intermediate rounding. The bit representation of @@ -56,114 +47,154 @@ __static_yoink("arm_optimized_routines_notice"); a double. (int32_t)KI is the k used in the argument reduction and exponent adjustment of scale, positive k here means the result may overflow and negative k means the result may underflow. */ -static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki) +static inline double +specialcase (double_t tmp, uint64_t sbits, uint64_t ki) { - double_t scale, y; + double_t scale, y; - if ((ki & 0x80000000) == 0) { - /* k > 0, the exponent of scale might have overflowed by <= 460. */ - sbits -= 1009ull << 52; - scale = asdouble(sbits); - y = 0x1p1009 * (scale + scale * tmp); - return eval_as_double(y); - } - /* k < 0, need special care in the subnormal range. */ - sbits += 1022ull << 52; - scale = asdouble(sbits); - y = scale + scale * tmp; - if (y < 1.0) { - /* Round y to the right precision before scaling it into the subnormal - range to avoid double rounding that can cause 0.5+E/2 ulp error where - E is the worst-case ulp error outside the subnormal range. So this - is only useful if the goal is better than 1 ulp worst-case error. */ - double_t hi, lo; - lo = scale - y + scale * tmp; - hi = 1.0 + y; - lo = 1.0 - hi + y + lo; - y = eval_as_double(hi + lo) - 1.0; - /* Avoid -0.0 with downward rounding. */ - if (WANT_ROUNDING && y == 0.0) - y = 0.0; - /* The underflow exception needs to be signaled explicitly. */ - fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022); - } - y = 0x1p-1022 * y; - return eval_as_double(y); + if ((ki & 0x80000000) == 0) + { + /* k > 0, the exponent of scale might have overflowed by <= 460. */ + sbits -= 1009ull << 52; + scale = asdouble (sbits); + y = 0x1p1009 * (scale + scale * tmp); + return check_oflow (eval_as_double (y)); + } + /* k < 0, need special care in the subnormal range. */ + sbits += 1022ull << 52; + scale = asdouble (sbits); + y = scale + scale * tmp; + if (y < 1.0) + { + /* Round y to the right precision before scaling it into the subnormal + range to avoid double rounding that can cause 0.5+E/2 ulp error where + E is the worst-case ulp error outside the subnormal range. So this + is only useful if the goal is better than 1 ulp worst-case error. */ + double_t hi, lo; + lo = scale - y + scale * tmp; + hi = 1.0 + y; + lo = 1.0 - hi + y + lo; + y = eval_as_double (hi + lo) - 1.0; + /* Avoid -0.0 with downward rounding. */ + if (WANT_ROUNDING && y == 0.0) + y = 0.0; + /* The underflow exception needs to be signaled explicitly. */ + force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022); + } + y = 0x1p-1022 * y; + return check_uflow (eval_as_double (y)); } /* Top 12 bits of a double (sign and exponent bits). */ -static inline uint32_t top12(double x) +static inline uint32_t +top12 (double x) { - return asuint64(x) >> 52; + return asuint64 (x) >> 52; +} + +/* Computes exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|. + If hastail is 0 then xtail is assumed to be 0 too. */ +static inline double +exp_inline (double x, double xtail, int hastail) +{ + uint32_t abstop; + uint64_t ki, idx, top, sbits; + /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ + double_t kd, z, r, r2, scale, tail, tmp; + + abstop = top12 (x) & 0x7ff; + if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54))) + { + if (abstop - top12 (0x1p-54) >= 0x80000000) + /* Avoid spurious underflow for tiny x. */ + /* Note: 0 is common input. */ + return WANT_ROUNDING ? 1.0 + x : 1.0; + if (abstop >= top12 (1024.0)) + { + if (asuint64 (x) == asuint64 (-INFINITY)) + return 0.0; + if (abstop >= top12 (INFINITY)) + return 1.0 + x; + if (asuint64 (x) >> 63) + return __math_uflow (0); + else + return __math_oflow (0); + } + /* Large x is special cased below. */ + abstop = 0; + } + + /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */ + /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */ + z = InvLn2N * x; +#if TOINT_INTRINSICS + kd = roundtoint (z); + ki = converttoint (z); +#elif EXP_USE_TOINT_NARROW + /* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */ + kd = eval_as_double (z + Shift); + ki = asuint64 (kd) >> 16; + kd = (double_t) (int32_t) ki; +#else + /* z - kd is in [-1, 1] in non-nearest rounding modes. */ + kd = eval_as_double (z + Shift); + ki = asuint64 (kd); + kd -= Shift; +#endif + r = x + kd * NegLn2hiN + kd * NegLn2loN; + /* The code assumes 2^-200 < |xtail| < 2^-8/N. */ + if (hastail) + r += xtail; + /* 2^(k/N) ~= scale * (1 + tail). */ + idx = 2 * (ki % N); + top = ki << (52 - EXP_TABLE_BITS); + tail = asdouble (T[idx]); + /* This is only a valid scale when -1023*N < k < 1024*N. */ + sbits = T[idx + 1] + top; + /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */ + /* Evaluation is optimized assuming superscalar pipelined execution. */ + r2 = r * r; + /* Without fma the worst case error is 0.25/N ulp larger. */ + /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */ +#if EXP_POLY_ORDER == 4 + tmp = tail + r + r2 * C2 + r * r2 * (C3 + r * C4); +#elif EXP_POLY_ORDER == 5 + tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5); +#elif EXP_POLY_ORDER == 6 + tmp = tail + r + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6); +#endif + if (unlikely (abstop == 0)) + return specialcase (tmp, sbits, ki); + scale = asdouble (sbits); + /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there + is no spurious underflow here even without fma. */ + return eval_as_double (scale + scale * tmp); } /** * Returns 𝑒^x. + * + * @raise ERANGE on overflow or underflow */ -double exp(double x) +double +exp (double x) { - uint32_t abstop; - uint64_t ki, idx, top, sbits; - double_t kd, z, r, r2, scale, tail, tmp; - - abstop = top12(x) & 0x7ff; - if (UNLIKELY(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) { - if (abstop - top12(0x1p-54) >= 0x80000000) - /* Avoid spurious underflow for tiny x. */ - /* Note: 0 is common input. */ - return WANT_ROUNDING ? 1.0 + x : 1.0; - if (abstop >= top12(1024.0)) { - if (asuint64(x) == asuint64(-INFINITY)) - return 0.0; - if (abstop >= top12(INFINITY)) - return 1.0 + x; - if (asuint64(x) >> 63) - return __math_uflow(0); - else - return __math_oflow(0); - } - /* Large x is special cased below. */ - abstop = 0; - } - - /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */ - /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */ - z = InvLn2N * x; -#if TOINT_INTRINSICS - kd = roundtoint(z); - ki = converttoint(z); -#elif EXP_USE_TOINT_NARROW - /* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */ - kd = eval_as_double(z + Shift); - ki = asuint64(kd) >> 16; - kd = (double_t)(int32_t)ki; -#else - /* z - kd is in [-1, 1] in non-nearest rounding modes. */ - kd = eval_as_double(z + Shift); - ki = asuint64(kd); - kd -= Shift; -#endif - r = x + kd * NegLn2hiN + kd * NegLn2loN; - /* 2^(k/N) ~= scale * (1 + tail). */ - idx = 2 * (ki % N); - top = ki << (52 - EXP_TABLE_BITS); - tail = asdouble(T[idx]); - /* This is only a valid scale when -1023*N < k < 1024*N. */ - sbits = T[idx + 1] + top; - /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */ - /* Evaluation is optimized assuming superscalar pipelined execution. */ - r2 = r * r; - /* Without fma the worst case error is 0.25/N ulp larger. */ - /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */ - tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5); - if (UNLIKELY(abstop == 0)) - return specialcase(tmp, sbits, ki); - scale = asdouble(sbits); - /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there - is no spurious underflow here even without fma. */ - return eval_as_double(scale + scale * tmp); + return exp_inline (x, 0, 0); } -#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 -__weak_reference(exp, expl); +/* May be useful for implementing pow where more than double + precision input is needed. */ +double +__exp_dd (double x, double xtail) +{ + return exp_inline (x, xtail, 1); +} + +#if USE_GLIBC_ABI +strong_alias (exp, __exp_finite) +hidden_alias (exp, __ieee754_exp) +hidden_alias (__exp_dd, __exp1) +# if LDBL_MANT_DIG == 53 +long double expl (long double x) { return exp (x); } +# endif #endif diff --git a/libc/tinymath/exp10.c b/libc/tinymath/exp10.c index db15c0c61..f80d13963 100644 --- a/libc/tinymath/exp10.c +++ b/libc/tinymath/exp10.c @@ -1,9 +1,9 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ +│ Optimized Routines │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -25,33 +25,135 @@ │ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/math.h" -__static_yoink("musl_libc_notice"); +#include "libc/tinymath/arm.internal.h" +__static_yoink("arm_optimized_routines_notice"); + +#define N (1 << EXP_TABLE_BITS) +#define IndexMask (N - 1) +#define OFlowBound 0x1.34413509f79ffp8 /* log10(DBL_MAX). */ +#define UFlowBound -0x1.5ep+8 /* -350. */ +#define SmallTop 0x3c6 /* top12(0x1p-57). */ +#define BigTop 0x407 /* top12(0x1p8). */ +#define Thresh 0x41 /* BigTop - SmallTop. */ +#define Shift __exp_data.shift +#define C(i) __exp_data.exp10_poly[i] + +static double +special_case (uint64_t sbits, double_t tmp, uint64_t ki) +{ + double_t scale, y; + + if (ki - (1ull << 16) < 0x80000000) + { + /* The exponent of scale might have overflowed by 1. */ + sbits -= 1ull << 52; + scale = asdouble (sbits); + y = 2 * (scale + scale * tmp); + return check_oflow (eval_as_double (y)); + } + + /* n < 0, need special care in the subnormal range. */ + sbits += 1022ull << 52; + scale = asdouble (sbits); + y = scale + scale * tmp; + + if (y < 1.0) + { + /* Round y to the right precision before scaling it into the subnormal + range to avoid double rounding that can cause 0.5+E/2 ulp error where + E is the worst-case ulp error outside the subnormal range. So this + is only useful if the goal is better than 1 ulp worst-case error. */ + double_t lo = scale - y + scale * tmp; + double_t hi = 1.0 + y; + lo = 1.0 - hi + y + lo; + y = eval_as_double (hi + lo) - 1.0; + /* Avoid -0.0 with downward rounding. */ + if (WANT_ROUNDING && y == 0.0) + y = 0.0; + /* The underflow exception needs to be signaled explicitly. */ + force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022); + } + y = 0x1p-1022 * y; + + return check_uflow (y); +} /** * Returns 10ˣ. + * + * The largest observed error is ~0.513 ULP. */ -double exp10(double x) +double +exp10 (double x) { - static const double p10[] = { - 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, - 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, - 1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, - 1e10, 1e11, 1e12, 1e13, 1e14, 1e15 - }; - double n, y = modf(x, &n); - union {double f; uint64_t i;} u = {n}; - /* fabs(n) < 16 without raising invalid on nan */ - if ((u.i>>52 & 0x7ff) < 0x3ff+4) { - if (!y) return p10[(int)n+15]; - y = exp2(3.32192809488736234787031942948939 * y); - return y * p10[(int)n+15]; - } - return pow(10.0, x); + uint64_t ix = asuint64 (x); + uint32_t abstop = (ix >> 52) & 0x7ff; + + if (unlikely (abstop - SmallTop >= Thresh)) + { + if (abstop - SmallTop >= 0x80000000) + /* Avoid spurious underflow for tiny x. + Note: 0 is common input. */ + return x + 1; + if (abstop == 0x7ff) + return ix == asuint64 (-INFINITY) ? 0.0 : x + 1.0; + if (x >= OFlowBound) + return __math_oflow (0); + if (x < UFlowBound) + return __math_uflow (0); + + /* Large x is special-cased below. */ + abstop = 0; + } + + /* Reduce x: z = x * N / log10(2), k = round(z). */ + double_t z = __exp_data.invlog10_2N * x; + double_t kd; + int64_t ki; +#if TOINT_INTRINSICS + kd = roundtoint (z); + ki = converttoint (z); +#else + kd = eval_as_double (z + Shift); + kd -= Shift; + ki = kd; +#endif + + /* r = x - k * log10(2), r in [-0.5, 0.5]. */ + double_t r = x; + r = __exp_data.neglog10_2hiN * kd + r; + r = __exp_data.neglog10_2loN * kd + r; + + /* exp10(x) = 2^(k/N) * 2^(r/N). + Approximate the two components separately. */ + + /* s = 2^(k/N), using lookup table. */ + uint64_t e = ki << (52 - EXP_TABLE_BITS); + uint64_t i = (ki & IndexMask) * 2; + uint64_t u = __exp_data.tab[i + 1]; + uint64_t sbits = u + e; + + double_t tail = asdouble (__exp_data.tab[i]); + + /* 2^(r/N) ~= 1 + r * Poly(r). */ + double_t r2 = r * r; + double_t p = C (0) + r * C (1); + double_t y = C (2) + r * C (3); + y = y + r2 * C (4); + y = p + r2 * y; + y = tail + y * r; + + if (unlikely (abstop == 0)) + return special_case (sbits, y, ki); + + /* Assemble components: + y = 2^(r/N) * 2^(k/N) + ~= (y + 1) * s. */ + double_t s = asdouble (sbits); + return eval_as_double (s * y + s); } __strong_reference(exp10, pow10); #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 __weak_reference(exp10, exp10l); -__weak_reference(exp10, pow10l); #endif diff --git a/libc/tinymath/exp2.c b/libc/tinymath/exp2.c index 627a823ad..c62be798c 100644 --- a/libc/tinymath/exp2.c +++ b/libc/tinymath/exp2.c @@ -1,9 +1,9 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ +│ Optimized Routines │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -25,19 +25,9 @@ │ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/intrin/likely.h" -#include "libc/math.h" -#include "libc/tinymath/exp_data.internal.h" -#include "libc/tinymath/internal.h" +#include "libc/tinymath/arm.internal.h" __static_yoink("arm_optimized_routines_notice"); -/* - * Double-precision 2^x function. - * - * Copyright (c) 2018, Arm Limited. - * SPDX-License-Identifier: MIT - */ - #define N (1 << EXP_TABLE_BITS) #define Shift __exp_data.exp2_shift #define T __exp_data.tab @@ -46,6 +36,7 @@ __static_yoink("arm_optimized_routines_notice"); #define C3 __exp_data.exp2_poly[2] #define C4 __exp_data.exp2_poly[3] #define C5 __exp_data.exp2_poly[4] +#define C6 __exp_data.exp2_poly[5] /* Handle cases that may overflow or underflow when computing the result that is scale*(1+TMP) without intermediate rounding. The bit representation of @@ -54,103 +45,121 @@ __static_yoink("arm_optimized_routines_notice"); a double. (int32_t)KI is the k used in the argument reduction and exponent adjustment of scale, positive k here means the result may overflow and negative k means the result may underflow. */ -static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki) +static inline double +specialcase (double_t tmp, uint64_t sbits, uint64_t ki) { - double_t scale, y; + double_t scale, y; - if ((ki & 0x80000000) == 0) { - /* k > 0, the exponent of scale might have overflowed by 1. */ - sbits -= 1ull << 52; - scale = asdouble(sbits); - y = 2 * (scale + scale * tmp); - return eval_as_double(y); - } - /* k < 0, need special care in the subnormal range. */ - sbits += 1022ull << 52; - scale = asdouble(sbits); - y = scale + scale * tmp; - if (y < 1.0) { - /* Round y to the right precision before scaling it into the subnormal - range to avoid double rounding that can cause 0.5+E/2 ulp error where - E is the worst-case ulp error outside the subnormal range. So this - is only useful if the goal is better than 1 ulp worst-case error. */ - double_t hi, lo; - lo = scale - y + scale * tmp; - hi = 1.0 + y; - lo = 1.0 - hi + y + lo; - y = eval_as_double(hi + lo) - 1.0; - /* Avoid -0.0 with downward rounding. */ - if (WANT_ROUNDING && y == 0.0) - y = 0.0; - /* The underflow exception needs to be signaled explicitly. */ - fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022); - } - y = 0x1p-1022 * y; - return eval_as_double(y); + if ((ki & 0x80000000) == 0) + { + /* k > 0, the exponent of scale might have overflowed by 1. */ + sbits -= 1ull << 52; + scale = asdouble (sbits); + y = 2 * (scale + scale * tmp); + return check_oflow (eval_as_double (y)); + } + /* k < 0, need special care in the subnormal range. */ + sbits += 1022ull << 52; + scale = asdouble (sbits); + y = scale + scale * tmp; + if (y < 1.0) + { + /* Round y to the right precision before scaling it into the subnormal + range to avoid double rounding that can cause 0.5+E/2 ulp error where + E is the worst-case ulp error outside the subnormal range. So this + is only useful if the goal is better than 1 ulp worst-case error. */ + double_t hi, lo; + lo = scale - y + scale * tmp; + hi = 1.0 + y; + lo = 1.0 - hi + y + lo; + y = eval_as_double (hi + lo) - 1.0; + /* Avoid -0.0 with downward rounding. */ + if (WANT_ROUNDING && y == 0.0) + y = 0.0; + /* The underflow exception needs to be signaled explicitly. */ + force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022); + } + y = 0x1p-1022 * y; + return check_uflow (eval_as_double (y)); } /* Top 12 bits of a double (sign and exponent bits). */ -static inline uint32_t top12(double x) +static inline uint32_t +top12 (double x) { - return asuint64(x) >> 52; + return asuint64 (x) >> 52; } /** * Returns 2^𝑥. */ -double exp2(double x) +double +exp2 (double x) { - uint32_t abstop; - uint64_t ki, idx, top, sbits; - double_t kd, r, r2, scale, tail, tmp; + uint32_t abstop; + uint64_t ki, idx, top, sbits; + /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ + double_t kd, r, r2, scale, tail, tmp; - abstop = top12(x) & 0x7ff; - if (UNLIKELY(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) { - if (abstop - top12(0x1p-54) >= 0x80000000) - /* Avoid spurious underflow for tiny x. */ - /* Note: 0 is common input. */ - return WANT_ROUNDING ? 1.0 + x : 1.0; - if (abstop >= top12(1024.0)) { - if (asuint64(x) == asuint64(-INFINITY)) - return 0.0; - if (abstop >= top12(INFINITY)) - return 1.0 + x; - if (!(asuint64(x) >> 63)) - return __math_oflow(0); - else if (asuint64(x) >= asuint64(-1075.0)) - return __math_uflow(0); - } - if (2 * asuint64(x) > 2 * asuint64(928.0)) - /* Large x is special cased below. */ - abstop = 0; + abstop = top12 (x) & 0x7ff; + if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54))) + { + if (abstop - top12 (0x1p-54) >= 0x80000000) + /* Avoid spurious underflow for tiny x. */ + /* Note: 0 is common input. */ + return WANT_ROUNDING ? 1.0 + x : 1.0; + if (abstop >= top12 (1024.0)) + { + if (asuint64 (x) == asuint64 (-INFINITY)) + return 0.0; + if (abstop >= top12 (INFINITY)) + return 1.0 + x; + if (!(asuint64 (x) >> 63)) + return __math_oflow (0); + else if (asuint64 (x) >= asuint64 (-1075.0)) + return __math_uflow (0); } + if (2 * asuint64 (x) > 2 * asuint64 (928.0)) + /* Large x is special cased below. */ + abstop = 0; + } - /* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)]. */ - /* x = k/N + r, with int k and r in [-1/2N, 1/2N]. */ - kd = eval_as_double(x + Shift); - ki = asuint64(kd); /* k. */ - kd -= Shift; /* k/N for int k. */ - r = x - kd; - /* 2^(k/N) ~= scale * (1 + tail). */ - idx = 2 * (ki % N); - top = ki << (52 - EXP_TABLE_BITS); - tail = asdouble(T[idx]); - /* This is only a valid scale when -1023*N < k < 1024*N. */ - sbits = T[idx + 1] + top; - /* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1). */ - /* Evaluation is optimized assuming superscalar pipelined execution. */ - r2 = r * r; - /* Without fma the worst case error is 0.5/N ulp larger. */ - /* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp. */ - tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5); - if (UNLIKELY(abstop == 0)) - return specialcase(tmp, sbits, ki); - scale = asdouble(sbits); - /* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there - is no spurious underflow here even without fma. */ - return eval_as_double(scale + scale * tmp); + /* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)]. */ + /* x = k/N + r, with int k and r in [-1/2N, 1/2N]. */ + kd = eval_as_double (x + Shift); + ki = asuint64 (kd); /* k. */ + kd -= Shift; /* k/N for int k. */ + r = x - kd; + /* 2^(k/N) ~= scale * (1 + tail). */ + idx = 2 * (ki % N); + top = ki << (52 - EXP_TABLE_BITS); + tail = asdouble (T[idx]); + /* This is only a valid scale when -1023*N < k < 1024*N. */ + sbits = T[idx + 1] + top; + /* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1). */ + /* Evaluation is optimized assuming superscalar pipelined execution. */ + r2 = r * r; + /* Without fma the worst case error is 0.5/N ulp larger. */ + /* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp. */ +#if EXP2_POLY_ORDER == 4 + tmp = tail + r * C1 + r2 * C2 + r * r2 * (C3 + r * C4); +#elif EXP2_POLY_ORDER == 5 + tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5); +#elif EXP2_POLY_ORDER == 6 + tmp = tail + r * C1 + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6); +#endif + if (unlikely (abstop == 0)) + return specialcase (tmp, sbits, ki); + scale = asdouble (sbits); + /* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there + is no spurious underflow here even without fma. */ + return eval_as_double (scale + scale * tmp); } -#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 -__weak_reference(exp2, exp2l); +#if USE_GLIBC_ABI +strong_alias (exp2, __exp2_finite) +hidden_alias (exp2, __ieee754_exp2) +# if LDBL_MANT_DIG == 53 +long double exp2l (long double x) { return exp2 (x); } +# endif #endif diff --git a/libc/tinymath/exp2f.c b/libc/tinymath/exp2f.c index ad68d4a70..5b134235d 100644 --- a/libc/tinymath/exp2f.c +++ b/libc/tinymath/exp2f.c @@ -1,9 +1,9 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -25,19 +25,9 @@ │ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/intrin/likely.h" -#include "libc/math.h" -#include "libc/tinymath/exp2f_data.internal.h" -#include "libc/tinymath/internal.h" +#include "libc/tinymath/arm.internal.h" __static_yoink("arm_optimized_routines_notice"); -/* - * Single-precision 2^x function. - * - * Copyright (c) 2017-2018, Arm Limited. - * SPDX-License-Identifier: MIT - */ - /* EXP2F_TABLE_BITS = 5 EXP2F_POLY_ORDER = 3 @@ -53,48 +43,66 @@ Non-nearest ULP error: 1 (rounded ULP error) #define C __exp2f_data.poly #define SHIFT __exp2f_data.shift_scaled -static inline uint32_t top12(float x) +static inline uint32_t +top12 (float x) { - return asuint(x) >> 20; + return asuint (x) >> 20; } /** * Returns 2^𝑥. + * + * - ULP error: 0.502 (nearest rounding.) + * - Relative error: 1.69 * 2^-34 in [-1/64, 1/64] (before rounding.) + * - Wrong count: 168353 (all nearest rounding wrong results with fma.) + * - Non-nearest ULP error: 1 (rounded ULP error) */ -float exp2f(float x) +float +exp2f (float x) { - uint32_t abstop; - uint64_t ki, t; - double_t kd, xd, z, r, r2, y, s; + uint32_t abstop; + uint64_t ki, t; + /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ + double_t kd, xd, z, r, r2, y, s; - xd = (double_t)x; - abstop = top12(x) & 0x7ff; - if (UNLIKELY(abstop >= top12(128.0f))) { - /* |x| >= 128 or x is nan. */ - if (asuint(x) == asuint(-INFINITY)) - return 0.0f; - if (abstop >= top12(INFINITY)) - return x + x; - if (x > 0.0f) - return __math_oflowf(0); - if (x <= -150.0f) - return __math_uflowf(0); - } + xd = (double_t) x; + abstop = top12 (x) & 0x7ff; + if (unlikely (abstop >= top12 (128.0f))) + { + /* |x| >= 128 or x is nan. */ + if (asuint (x) == asuint (-INFINITY)) + return 0.0f; + if (abstop >= top12 (INFINITY)) + return x + x; + if (x > 0.0f) + return __math_oflowf (0); + if (x <= -150.0f) + return __math_uflowf (0); +#if WANT_ERRNO_UFLOW + if (x < -149.0f) + return __math_may_uflowf (0); +#endif + } - /* x = k/N + r with r in [-1/(2N), 1/(2N)] and int k. */ - kd = eval_as_double(xd + SHIFT); - ki = asuint64(kd); - kd -= SHIFT; /* k/N for int k. */ - r = xd - kd; + /* x = k/N + r with r in [-1/(2N), 1/(2N)] and int k. */ + kd = eval_as_double (xd + SHIFT); + ki = asuint64 (kd); + kd -= SHIFT; /* k/N for int k. */ + r = xd - kd; - /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */ - t = T[ki % N]; - t += ki << (52 - EXP2F_TABLE_BITS); - s = asdouble(t); - z = C[0] * r + C[1]; - r2 = r * r; - y = C[2] * r + 1; - y = z * r2 + y; - y = y * s; - return eval_as_float(y); + /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */ + t = T[ki % N]; + t += ki << (52 - EXP2F_TABLE_BITS); + s = asdouble (t); + z = C[0] * r + C[1]; + r2 = r * r; + y = C[2] * r + 1; + y = z * r2 + y; + y = y * s; + return eval_as_float (y); } + +#if USE_GLIBC_ABI +strong_alias (exp2f, __exp2f_finite) +hidden_alias (exp2f, __ieee754_exp2f) +#endif diff --git a/libc/tinymath/exp2f_data.c b/libc/tinymath/exp2f_data.c index e2b098251..6e21620d3 100644 --- a/libc/tinymath/exp2f_data.c +++ b/libc/tinymath/exp2f_data.c @@ -1,9 +1,9 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -25,16 +25,9 @@ │ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/tinymath/exp2f_data.internal.h" +#include "libc/tinymath/arm.internal.h" __static_yoink("arm_optimized_routines_notice"); -/* - * Shared data between expf, exp2f and powf. - * - * Copyright (c) 2017-2018, Arm Limited. - * SPDX-License-Identifier: MIT - */ - #define N (1 << EXP2F_TABLE_BITS) const struct exp2f_data __exp2f_data = { @@ -42,6 +35,15 @@ const struct exp2f_data __exp2f_data = { used for computing 2^(k/N) for an int |k| < 150 N as double(tab[k%N] + (k << 52-BITS)) */ .tab = { +#if N == 8 +0x3ff0000000000000, 0x3fef72b83c7d517b, 0x3fef06fe0a31b715, 0x3feebfdad5362a27, +0x3feea09e667f3bcd, 0x3feeace5422aa0db, 0x3feee89f995ad3ad, 0x3fef5818dcfba487, +#elif N == 16 +0x3ff0000000000000, 0x3fefb5586cf9890f, 0x3fef72b83c7d517b, 0x3fef387a6e756238, +0x3fef06fe0a31b715, 0x3feedea64c123422, 0x3feebfdad5362a27, 0x3feeab07dd485429, +0x3feea09e667f3bcd, 0x3feea11473eb0187, 0x3feeace5422aa0db, 0x3feec49182a3f090, +0x3feee89f995ad3ad, 0x3fef199bdd85529c, 0x3fef5818dcfba487, 0x3fefa4afa2a490da, +#elif N == 32 0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51, 0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1, 0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d, @@ -50,14 +52,48 @@ const struct exp2f_data __exp2f_data = { 0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d, 0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069, 0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540, +#elif N == 64 +0x3ff0000000000000, 0x3fefec9a3e778061, 0x3fefd9b0d3158574, 0x3fefc74518759bc8, +0x3fefb5586cf9890f, 0x3fefa3ec32d3d1a2, 0x3fef9301d0125b51, 0x3fef829aaea92de0, +0x3fef72b83c7d517b, 0x3fef635beb6fcb75, 0x3fef54873168b9aa, 0x3fef463b88628cd6, +0x3fef387a6e756238, 0x3fef2b4565e27cdd, 0x3fef1e9df51fdee1, 0x3fef1285a6e4030b, +0x3fef06fe0a31b715, 0x3feefc08b26416ff, 0x3feef1a7373aa9cb, 0x3feee7db34e59ff7, +0x3feedea64c123422, 0x3feed60a21f72e2a, 0x3feece086061892d, 0x3feec6a2b5c13cd0, +0x3feebfdad5362a27, 0x3feeb9b2769d2ca7, 0x3feeb42b569d4f82, 0x3feeaf4736b527da, +0x3feeab07dd485429, 0x3feea76f15ad2148, 0x3feea47eb03a5585, 0x3feea23882552225, +0x3feea09e667f3bcd, 0x3fee9fb23c651a2f, 0x3fee9f75e8ec5f74, 0x3fee9feb564267c9, +0x3feea11473eb0187, 0x3feea2f336cf4e62, 0x3feea589994cce13, 0x3feea8d99b4492ed, +0x3feeace5422aa0db, 0x3feeb1ae99157736, 0x3feeb737b0cdc5e5, 0x3feebd829fde4e50, +0x3feec49182a3f090, 0x3feecc667b5de565, 0x3feed503b23e255d, 0x3feede6b5579fdbf, +0x3feee89f995ad3ad, 0x3feef3a2b84f15fb, 0x3feeff76f2fb5e47, 0x3fef0c1e904bc1d2, +0x3fef199bdd85529c, 0x3fef27f12e57d14b, 0x3fef3720dcef9069, 0x3fef472d4a07897c, +0x3fef5818dcfba487, 0x3fef69e603db3285, 0x3fef7c97337b9b5f, 0x3fef902ee78b3ff6, +0x3fefa4afa2a490da, 0x3fefba1bee615a27, 0x3fefd0765b6e4540, 0x3fefe7c1819e90d8, +#endif }, .shift_scaled = 0x1.8p+52 / N, .poly = { +#if N == 8 + 0x1.c6a00335106e2p-5, 0x1.ec0c313449f55p-3, 0x1.62e431111f69fp-1, +#elif N == 16 + 0x1.c6ac6aa313963p-5, 0x1.ebfff4532d9bap-3, 0x1.62e43001bc49fp-1, +#elif N == 32 0x1.c6af84b912394p-5, 0x1.ebfce50fac4f3p-3, 0x1.62e42ff0c52d6p-1, +#elif N == 64 + 0x1.c6b04b4221b2ap-5, 0x1.ebfc213e184d7p-3, 0x1.62e42fefb5b7fp-1, +#endif }, .shift = 0x1.8p+52, .invln2_scaled = 0x1.71547652b82fep+0 * N, .poly_scaled = { +#if N == 8 + 0x1.c6a00335106e2p-5/N/N/N, 0x1.ec0c313449f55p-3/N/N, 0x1.62e431111f69fp-1/N, +#elif N == 16 + 0x1.c6ac6aa313963p-5/N/N/N, 0x1.ebfff4532d9bap-3/N/N, 0x1.62e43001bc49fp-1/N, +#elif N == 32 0x1.c6af84b912394p-5/N/N/N, 0x1.ebfce50fac4f3p-3/N/N, 0x1.62e42ff0c52d6p-1/N, +#elif N == 64 + 0x1.c6b04b4221b2ap-5/N/N/N, 0x1.ebfc213e184d7p-3/N/N, 0x1.62e42fefb5b7fp-1/N, +#endif }, }; diff --git a/libc/tinymath/exp2f_data.internal.h b/libc/tinymath/exp2f_data.internal.h deleted file mode 100644 index af157b4e8..000000000 --- a/libc/tinymath/exp2f_data.internal.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef COSMOPOLITAN_LIBC_TINYMATH_EXP2F_DATA_INTERNAL_H_ -#define COSMOPOLITAN_LIBC_TINYMATH_EXP2F_DATA_INTERNAL_H_ - -#define EXP2F_TABLE_BITS 5 -#define EXP2F_POLY_ORDER 3 - -COSMOPOLITAN_C_START_ - -extern const struct exp2f_data { - uint64_t tab[1 << EXP2F_TABLE_BITS]; - double shift_scaled; - double poly[EXP2F_POLY_ORDER]; - double shift; - double invln2_scaled; - double poly_scaled[EXP2F_POLY_ORDER]; -} __exp2f_data; - -COSMOPOLITAN_C_END_ -#endif /* COSMOPOLITAN_LIBC_TINYMATH_EXP2F_DATA_INTERNAL_H_ */ diff --git a/libc/tinymath/exp_data.c b/libc/tinymath/exp_data.c index df7efc71e..2bc52bc62 100644 --- a/libc/tinymath/exp_data.c +++ b/libc/tinymath/exp_data.c @@ -1,9 +1,9 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ +│ Optimized Routines │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -25,23 +25,31 @@ │ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/tinymath/exp_data.internal.h" - -/* - * Shared data between exp, exp2 and pow. - * - * Copyright (c) 2018, Arm Limited. - * SPDX-License-Identifier: MIT - */ +#include "libc/tinymath/arm.internal.h" +__static_yoink("arm_optimized_routines_notice"); #define N (1 << EXP_TABLE_BITS) const struct exp_data __exp_data = { // N/ln2 .invln2N = 0x1.71547652b82fep0 * N, +.invlog10_2N = 0x1.a934f0979a371p1 * N, // -ln2/N +#if N == 64 +.negln2hiN = -0x1.62e42fefa0000p-7, +.negln2loN = -0x1.cf79abc9e3b3ap-46, +#elif N == 128 .negln2hiN = -0x1.62e42fefa0000p-8, .negln2loN = -0x1.cf79abc9e3b3ap-47, +#elif N == 256 +.negln2hiN = -0x1.62e42fefc0000p-9, +.negln2loN = 0x1.c610ca86c3899p-45, +#elif N == 512 +.negln2hiN = -0x1.62e42fef80000p-10, +.negln2loN = -0x1.1cf79abc9e3b4p-45, +#endif +.neglog10_2hiN = -0x1.3441350ap-2 / N, +.neglog10_2loN = 0x1.0c0219dc1da99p-39 / N, // Used for rounding when !TOINT_INTRINSICS #if EXP_USE_TOINT_NARROW .shift = 0x1800000000.8p0, @@ -50,6 +58,24 @@ const struct exp_data __exp_data = { #endif // exp polynomial coefficients. .poly = { +#if N == 64 && EXP_POLY_ORDER == 5 && !EXP_POLY_WIDE +// abs error: 1.5543*2^-60 +// ulp error: 0.529 (0.533 without fma) +// if |x| < ln2/128+eps +// abs error if |x| < ln2/64: 1.7157*2^-50 +0x1.fffffffffdbcdp-2, +0x1.555555555444cp-3, +0x1.555573c6a9f7dp-5, +0x1.1111266d28935p-7, +#elif N == 64 && EXP_POLY_ORDER == 6 && EXP_POLY_WIDE +// abs error: 1.6735*2^-64 +// ulp error: 0.518 (0.522 without fma) +// if |x| < ln2/64 +0x1.5555555548f9ap-3, +0x1.555555554bf5dp-5, +0x1.11115b75f0f4dp-7, +0x1.6c171a6b6303ep-10, +#elif N == 128 && EXP_POLY_ORDER == 5 && !EXP_POLY_WIDE // abs error: 1.555*2^-66 // ulp error: 0.509 (0.511 without fma) // if |x| < ln2/256+eps @@ -59,10 +85,63 @@ const struct exp_data __exp_data = { 0x1.555555555543cp-3, 0x1.55555cf172b91p-5, 0x1.1111167a4d017p-7, +#elif N == 128 && EXP_POLY_ORDER == 5 && EXP_POLY_WIDE +// abs error: 1.5542*2^-60 +// ulp error: 0.521 (0.523 without fma) +// if |x| < ln2/128 +0x1.fffffffffdbcep-2, +0x1.55555555543c2p-3, +0x1.555573c64f2e3p-5, +0x1.111126b4eff73p-7, +#elif N == 128 && EXP_POLY_ORDER == 6 && EXP_POLY_WIDE +// abs error: 1.6861*2^-71 +// ulp error: 0.509 (0.511 without fma) +// if |x| < ln2/128 +0x1.55555555548fdp-3, +0x1.555555555658fp-5, +0x1.111123a859bb6p-7, +0x1.6c16ba6920cabp-10, +#elif N == 256 && EXP_POLY_ORDER == 4 && !EXP_POLY_WIDE +// abs error: 1.43*2^-58 +// ulp error: 0.549 (0.550 without fma) +// if |x| < ln2/512 +0x1p0, // unused +0x1.fffffffffffd4p-2, +0x1.5555571d6ef9p-3, +0x1.5555576a5adcep-5, +#elif N == 256 && EXP_POLY_ORDER == 5 && EXP_POLY_WIDE +// abs error: 1.5547*2^-66 +// ulp error: 0.505 (0.506 without fma) +// if |x| < ln2/256 +0x1.ffffffffffdbdp-2, +0x1.555555555543cp-3, +0x1.55555cf16e1edp-5, +0x1.1111167a4b553p-7, +#elif N == 512 && EXP_POLY_ORDER == 4 && !EXP_POLY_WIDE +// abs error: 1.4300*2^-63 +// ulp error: 0.504 +// if |x| < ln2/1024 +// abs error if |x| < ln2/512: 1.0689*2^-55 +0x1p0, // unused +0x1.ffffffffffffdp-2, +0x1.555555c75bb6p-3, +0x1.555555dec04a8p-5, +#endif }, .exp2_shift = 0x1.8p52 / N, // exp2 polynomial coefficients. .exp2_poly = { +#if N == 64 && EXP2_POLY_ORDER == 6 && EXP2_POLY_WIDE +// abs error: 1.3054*2^-63 +// ulp error: 0.515 +// if |x| < 1/64 +0x1.62e42fefa39efp-1, +0x1.ebfbdff82c58fp-3, +0x1.c6b08d7045cf1p-5, +0x1.3b2ab6fb8fd0ep-7, +0x1.5d884afec48d7p-10, +0x1.43097dc684ae1p-13, +#elif N == 128 && EXP2_POLY_ORDER == 5 && !EXP2_POLY_WIDE // abs error: 1.2195*2^-65 // ulp error: 0.507 (0.511 without fma) // if |x| < 1/256 @@ -72,11 +151,114 @@ const struct exp_data __exp_data = { 0x1.c6b08d70cf4b5p-5, 0x1.3b2abd24650ccp-7, 0x1.5d7e09b4e3a84p-10, +#elif N == 256 && EXP2_POLY_ORDER == 5 && EXP2_POLY_WIDE +// abs error: 1.2195*2^-65 +// ulp error: 0.504 (0.508 without fma) +// if |x| < 1/256 +0x1.62e42fefa39efp-1, +0x1.ebfbdff82c424p-3, +0x1.c6b08d70cf4b5p-5, +0x1.3b2abd24650ccp-7, +0x1.5d7e09b4e3a84p-10, +#elif N == 512 && EXP2_POLY_ORDER == 4 && !EXP2_POLY_WIDE +// abs error: 1.4411*2^-64 +// ulp error: 0.5024 (0.5063 without fma) +// if |x| < 1/1024 +// abs error if |x| < 1/512: 1.9430*2^-56 +0x1.62e42fefa39ecp-1, +0x1.ebfbdff82c58bp-3, +0x1.c6b08e46de41fp-5, +0x1.3b2ab786ee1dap-7, +#endif +}, +.exp10_poly = { +#if EXP10_POLY_WIDE +/* Range is wider if using shift-based reduction: coeffs generated + using Remez in [-log10(2)/128, log10(2)/128 ]. */ +0x1.26bb1bbb55515p1, +0x1.53524c73cd32bp1, +0x1.0470591e1a108p1, +0x1.2bd77b12fe9a8p0, +0x1.14289fef24b78p-1 +#else +/* Coeffs generated using Remez in [-log10(2)/256, log10(2)/256 ]. */ +0x1.26bb1bbb55516p1, +0x1.53524c73ce9fep1, +0x1.0470591ce4b26p1, +0x1.2bd76577fe684p0, +0x1.1446eeccd0efbp-1 +#endif }, // 2^(k/N) ~= H[k]*(1 + T[k]) for int k in [0,N) // tab[2*k] = asuint64(T[k]) // tab[2*k+1] = asuint64(H[k]) - (k << 52)/N .tab = { +#if N == 64 +0x0, 0x3ff0000000000000, +0xbc7160139cd8dc5d, 0x3fefec9a3e778061, +0x3c8cd2523567f613, 0x3fefd9b0d3158574, +0x3c60f74e61e6c861, 0x3fefc74518759bc8, +0x3c979aa65d837b6d, 0x3fefb5586cf9890f, +0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2, +0xbc9556522a2fbd0e, 0x3fef9301d0125b51, +0xbc91c923b9d5f416, 0x3fef829aaea92de0, +0xbc801b15eaa59348, 0x3fef72b83c7d517b, +0x3c8b898c3f1353bf, 0x3fef635beb6fcb75, +0x3c9aecf73e3a2f60, 0x3fef54873168b9aa, +0x3c8a6f4144a6c38d, 0x3fef463b88628cd6, +0x3c968efde3a8a894, 0x3fef387a6e756238, +0x3c80472b981fe7f2, 0x3fef2b4565e27cdd, +0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1, +0x3c8b3782720c0ab4, 0x3fef1285a6e4030b, +0x3c834d754db0abb6, 0x3fef06fe0a31b715, +0x3c8fdd395dd3f84a, 0x3feefc08b26416ff, +0xbc924aedcc4b5068, 0x3feef1a7373aa9cb, +0xbc71d1e83e9436d2, 0x3feee7db34e59ff7, +0x3c859f48a72a4c6d, 0x3feedea64c123422, +0xbc58a78f4817895b, 0x3feed60a21f72e2a, +0x3c4363ed60c2ac11, 0x3feece086061892d, +0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0, +0x3c7690cebb7aafb0, 0x3feebfdad5362a27, +0xbc8f94340071a38e, 0x3feeb9b2769d2ca7, +0xbc78dec6bd0f385f, 0x3feeb42b569d4f82, +0x3c93350518fdd78e, 0x3feeaf4736b527da, +0x3c9063e1e21c5409, 0x3feeab07dd485429, +0x3c9432e62b64c035, 0x3feea76f15ad2148, +0xbc8c33c53bef4da8, 0x3feea47eb03a5585, +0xbc93cedd78565858, 0x3feea23882552225, +0xbc93b3efbf5e2228, 0x3feea09e667f3bcd, +0xbc6367efb86da9ee, 0x3fee9fb23c651a2f, +0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74, +0xbc8619321e55e68a, 0x3fee9feb564267c9, +0xbc7b32dcb94da51d, 0x3feea11473eb0187, +0x3c65ebe1abd66c55, 0x3feea2f336cf4e62, +0xbc9369b6f13b3734, 0x3feea589994cce13, +0xbc94d450d872576e, 0x3feea8d99b4492ed, +0x3c8db72fc1f0eab4, 0x3feeace5422aa0db, +0x3c7bf68359f35f44, 0x3feeb1ae99157736, +0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5, +0xbc92434322f4f9aa, 0x3feebd829fde4e50, +0x3c71affc2b91ce27, 0x3feec49182a3f090, +0xbc87c50422622263, 0x3feecc667b5de565, +0xbc91bbd1d3bcbb15, 0x3feed503b23e255d, +0x3c8469846e735ab3, 0x3feede6b5579fdbf, +0x3c8c1a7792cb3387, 0x3feee89f995ad3ad, +0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb, +0xbc68d6f438ad9334, 0x3feeff76f2fb5e47, +0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2, +0x3c736eae30af0cb3, 0x3fef199bdd85529c, +0x3c84e08fd10959ac, 0x3fef27f12e57d14b, +0x3c676b2c6c921968, 0x3fef3720dcef9069, +0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c, +0x3c74a385a63d07a7, 0x3fef5818dcfba487, +0x3c8e5a50d5c192ac, 0x3fef69e603db3285, +0xbc82d52107b43e1f, 0x3fef7c97337b9b5f, +0x3c74b604603a88d3, 0x3fef902ee78b3ff6, +0xbc8ff7128fd391f0, 0x3fefa4afa2a490da, +0x3c8ec3bc41aa2008, 0x3fefba1bee615a27, +0x3c8a64a931d185ee, 0x3fefd0765b6e4540, +0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8, +#elif N == 128 0x0, 0x3ff0000000000000, 0x3c9b3b4f1a88bf6e, 0x3feff63da9fb3335, 0xbc7160139cd8dc5d, 0x3fefec9a3e778061, @@ -205,5 +387,776 @@ const struct exp_data __exp_data = { 0xbc8e37bae43be3ed, 0x3fefdbfdad9cbe14, 0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8, 0x3c5305c14160cc89, 0x3feff3c22b8f71f1, +#elif N == 256 +0x0, 0x3ff0000000000000, +0xbc84e82fc61851ac, 0x3feffb1afa5abcbf, +0x3c9b3b4f1a88bf6e, 0x3feff63da9fb3335, +0xbc82985dd8521d32, 0x3feff168143b0281, +0xbc7160139cd8dc5d, 0x3fefec9a3e778061, +0x3c651e617061bfbd, 0x3fefe7d42e11bbcc, +0xbc905e7a108766d1, 0x3fefe315e86e7f85, +0x3c845fad437fa426, 0x3fefde5f72f654b1, +0x3c8cd2523567f613, 0x3fefd9b0d3158574, +0xbc954529642b232f, 0x3fefd50a0e3c1f89, +0xbc8bce8023f98efa, 0x3fefd06b29ddf6de, +0x3c8293708ef5c32e, 0x3fefcbd42b72a836, +0x3c60f74e61e6c861, 0x3fefc74518759bc8, +0xbc95b9280905b2a4, 0x3fefc2bdf66607e0, +0x3c90a3e45b33d399, 0x3fefbe3ecac6f383, +0x3c84f31f32c4b7e7, 0x3fefb9c79b1f3919, +0x3c979aa65d837b6d, 0x3fefb5586cf9890f, +0x3c9407fb30d06420, 0x3fefb0f145e46c85, +0x3c8eb51a92fdeffc, 0x3fefac922b7247f7, +0xbc9a5d04b3b9911b, 0x3fefa83b23395dec, +0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2, +0xbc937a01f0739546, 0x3fef9fa55fdfa9c5, +0xbc6a033489906e0b, 0x3fef9b66affed31b, +0x3c8b8268b04ef0a5, 0x3fef973028d7233e, +0xbc9556522a2fbd0e, 0x3fef9301d0125b51, +0xbc9ac46e44a2ebcc, 0x3fef8edbab5e2ab6, +0xbc5080ef8c4eea55, 0x3fef8abdc06c31cc, +0xbc65704e90c9f860, 0x3fef86a814f204ab, +0xbc91c923b9d5f416, 0x3fef829aaea92de0, +0xbc897cea57e46280, 0x3fef7e95934f312e, +0x3c80d3e3e95c55af, 0x3fef7a98c8a58e51, +0x3c56f01429e2b9d2, 0x3fef76a45471c3c2, +0xbc801b15eaa59348, 0x3fef72b83c7d517b, +0x3c6e653b2459034b, 0x3fef6ed48695bbc0, +0xbc8f1ff055de323d, 0x3fef6af9388c8dea, +0x3c92cc7ea345b7dc, 0x3fef672658375d2f, +0x3c8b898c3f1353bf, 0x3fef635beb6fcb75, +0x3c957bfb2876ea9e, 0x3fef5f99f8138a1c, +0xbc96d99c7611eb26, 0x3fef5be084045cd4, +0x3c8cdc1873af2155, 0x3fef582f95281c6b, +0x3c9aecf73e3a2f60, 0x3fef54873168b9aa, +0xbc9493684653a131, 0x3fef50e75eb44027, +0xbc8fe782cb86389d, 0x3fef4d5022fcd91d, +0xbc98e2899077520a, 0x3fef49c18438ce4d, +0x3c8a6f4144a6c38d, 0x3fef463b88628cd6, +0x3c9120fcd4f59273, 0x3fef42be3578a819, +0x3c807a05b0e4047d, 0x3fef3f49917ddc96, +0x3c89b788c188c9b8, 0x3fef3bdda27912d1, +0x3c968efde3a8a894, 0x3fef387a6e756238, +0x3c877afbca90ef84, 0x3fef351ffb82140a, +0x3c875e18f274487d, 0x3fef31ce4fb2a63f, +0x3c91512f082876ee, 0x3fef2e85711ece75, +0x3c80472b981fe7f2, 0x3fef2b4565e27cdd, +0x3c9a02f0c7d75ec6, 0x3fef280e341ddf29, +0xbc96b87b3f71085e, 0x3fef24dfe1f56381, +0xbc803297e78260bf, 0x3fef21ba7591bb70, +0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1, +0xbc95b77e5ccd9fbf, 0x3fef1b8a66d10f13, +0xbc3d219b1a6fbffa, 0x3fef187fd0dad990, +0xbc91e75c40b4251e, 0x3fef157e39771b2f, +0x3c8b3782720c0ab4, 0x3fef1285a6e4030b, +0x3c98a911f1f7785a, 0x3fef0f961f641589, +0x3c6e149289cecb8f, 0x3fef0cafa93e2f56, +0xbc61e7c998db7dbb, 0x3fef09d24abd886b, +0x3c834d754db0abb6, 0x3fef06fe0a31b715, +0x3c85425c11faadf4, 0x3fef0432edeeb2fd, +0x3c864201e2ac744c, 0x3fef0170fc4cd831, +0xbc979517a03e2847, 0x3feefeb83ba8ea32, +0x3c8fdd395dd3f84a, 0x3feefc08b26416ff, +0xbc800e2a46da4bee, 0x3feef96266e3fa2d, +0xbc86a3803b8e5b04, 0x3feef6c55f929ff1, +0xbc87430803972b34, 0x3feef431a2de883b, +0xbc924aedcc4b5068, 0x3feef1a7373aa9cb, +0xbc954de30ae02d94, 0x3feeef26231e754a, +0xbc9907f81b512d8e, 0x3feeecae6d05d866, +0xbc94f2487e1c03ec, 0x3feeea401b7140ef, +0xbc71d1e83e9436d2, 0x3feee7db34e59ff7, +0x3c914a5432fcb2f4, 0x3feee57fbfec6cf4, +0xbc991919b3ce1b15, 0x3feee32dc313a8e5, +0x3c79c3bba5562a2f, 0x3feee0e544ede173, +0x3c859f48a72a4c6d, 0x3feedea64c123422, +0xbc85a71612e21658, 0x3feedc70df1c5175, +0xbc9312607a28698a, 0x3feeda4504ac801c, +0x3c86421f6f1d24d6, 0x3feed822c367a024, +0xbc58a78f4817895b, 0x3feed60a21f72e2a, +0xbc9348a6815fce65, 0x3feed3fb2709468a, +0xbc7c2c9b67499a1b, 0x3feed1f5d950a897, +0x3c835c43984d9871, 0x3feecffa3f84b9d4, +0x3c4363ed60c2ac11, 0x3feece086061892d, +0xbc632afc8d9473a0, 0x3feecc2042a7d232, +0x3c9666093b0664ef, 0x3feeca41ed1d0057, +0xbc95fc5e44de020e, 0x3feec86d668b3237, +0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0, +0xbc7ea0148327c42f, 0x3feec4e1e192aed2, +0x3c93ff8e3f0f1230, 0x3feec32af0d7d3de, +0xbc7a843ad1a88022, 0x3feec17dea6db7d7, +0x3c7690cebb7aafb0, 0x3feebfdad5362a27, +0x3c892ca3bf144e63, 0x3feebe41b817c114, +0x3c931dbdeb54e077, 0x3feebcb299fddd0d, +0xbc902c99b04aa8b0, 0x3feebb2d81d8abff, +0xbc8f94340071a38e, 0x3feeb9b2769d2ca7, +0x3c73e34f67e67118, 0x3feeb8417f4531ee, +0xbc87deccdc93a349, 0x3feeb6daa2cf6642, +0xbc75a3b1197ba0f0, 0x3feeb57de83f4eef, +0xbc78dec6bd0f385f, 0x3feeb42b569d4f82, +0x3c81bd2888075068, 0x3feeb2e2f4f6ad27, +0xbc861246ec7b5cf6, 0x3feeb1a4ca5d920f, +0xbc896be8ae89ef8f, 0x3feeb070dde910d2, +0x3c93350518fdd78e, 0x3feeaf4736b527da, +0xbc88e6ac90348602, 0x3feeae27dbe2c4cf, +0x3c7b98b72f8a9b05, 0x3feead12d497c7fd, +0xbc91af7f1365c3ac, 0x3feeac0827ff07cc, +0x3c9063e1e21c5409, 0x3feeab07dd485429, +0xbc943a3540d1898a, 0x3feeaa11fba87a03, +0x3c34c7855019c6ea, 0x3feea9268a5946b7, +0xbc951f58ddaa8090, 0x3feea84590998b93, +0x3c9432e62b64c035, 0x3feea76f15ad2148, +0xbc82e1648e50a17c, 0x3feea6a320dceb71, +0xbc8ce44a6199769f, 0x3feea5e1b976dc09, +0x3c95f30eda98a575, 0x3feea52ae6cdf6f4, +0xbc8c33c53bef4da8, 0x3feea47eb03a5585, +0x3c917ecda8a72159, 0x3feea3dd1d1929fd, +0xbc845378892be9ae, 0x3feea34634ccc320, +0xbc9345f3cee1ae6e, 0x3feea2b9febc8fb7, +0xbc93cedd78565858, 0x3feea23882552225, +0xbc85c33fdf910406, 0x3feea1c1c70833f6, +0x3c5710aa807e1964, 0x3feea155d44ca973, +0x3c81079ab5789604, 0x3feea0f4b19e9538, +0xbc93b3efbf5e2228, 0x3feea09e667f3bcd, +0x3c727df161cd7778, 0x3feea052fa75173e, +0xbc6a12ad8734b982, 0x3feea012750bdabf, +0x3c93f9924a05b767, 0x3fee9fdcddd47645, +0xbc6367efb86da9ee, 0x3fee9fb23c651a2f, +0xbc87557939a8b5ef, 0x3fee9f9298593ae5, +0xbc80dc3d54e08851, 0x3fee9f7df9519484, +0x3c51ed2f56fa9d1a, 0x3fee9f7466f42e87, +0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74, +0xbc88e67a9006c909, 0x3fee9f8286ead08a, +0xbc86ee4ac08b7db0, 0x3fee9f9a48a58174, +0x3c86597566977ac8, 0x3fee9fbd35d7cbfd, +0xbc8619321e55e68a, 0x3fee9feb564267c9, +0x3c92c0b7028a5c3a, 0x3feea024b1ab6e09, +0x3c909ccb5e09d4d3, 0x3feea0694fde5d3f, +0x3c8a30faf49cc78c, 0x3feea0b938ac1cf6, +0xbc7b32dcb94da51d, 0x3feea11473eb0187, +0xbc92dad3519d7b5b, 0x3feea17b0976cfdb, +0x3c94ecfd5467c06b, 0x3feea1ed0130c132, +0x3c87d51410fd15c2, 0x3feea26a62ff86f0, +0x3c65ebe1abd66c55, 0x3feea2f336cf4e62, +0xbc760a3629969871, 0x3feea3878491c491, +0xbc88a1c52fb3cf42, 0x3feea427543e1a12, +0x3c8b18c6e3fdef5d, 0x3feea4d2add106d9, +0xbc9369b6f13b3734, 0x3feea589994cce13, +0x3c90ec1ddcb1390a, 0x3feea64c1eb941f7, +0xbc805e843a19ff1e, 0x3feea71a4623c7ad, +0xbc522cea4f3afa1e, 0x3feea7f4179f5b21, +0xbc94d450d872576e, 0x3feea8d99b4492ed, +0x3c7c88549b958471, 0x3feea9cad931a436, +0x3c90ad675b0e8a00, 0x3feeaac7d98a6699, +0x3c931143962f7877, 0x3feeabd0a478580f, +0x3c8db72fc1f0eab4, 0x3feeace5422aa0db, +0x3c93e9e96f112479, 0x3feeae05bad61778, +0xbc65b6609cc5e7ff, 0x3feeaf3216b5448c, +0xbc8dac42a4a38df0, 0x3feeb06a5e0866d9, +0x3c7bf68359f35f44, 0x3feeb1ae99157736, +0x3c8b99dd98b1ed84, 0x3feeb2fed0282c8a, +0xbc93091fa71e3d83, 0x3feeb45b0b91ffc6, +0xbc7885ad50cbb750, 0x3feeb5c353aa2fe2, +0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5, +0xbc82d5e85f3e0301, 0x3feeb8b82b5f98e5, +0xbc6c23f97c90b959, 0x3feeba44cbc8520f, +0xbc51669428996971, 0x3feebbdd9a7670b3, +0xbc92434322f4f9aa, 0x3feebd829fde4e50, +0x3c71f2b2c1c4c014, 0x3feebf33e47a22a2, +0xbc85ca6cd7668e4b, 0x3feec0f170ca07ba, +0xbc9294f304f166b6, 0x3feec2bb4d53fe0d, +0x3c71affc2b91ce27, 0x3feec49182a3f090, +0xbc8a1e58414c07d3, 0x3feec674194bb8d5, +0x3c6dd235e10a73bb, 0x3feec86319e32323, +0xbc79740b58a20091, 0x3feeca5e8d07f29e, +0xbc87c50422622263, 0x3feecc667b5de565, +0x3c9165830a2b96c2, 0x3feece7aed8eb8bb, +0x3c8b1c86e3e231d5, 0x3feed09bec4a2d33, +0xbc903d5cbe27874b, 0x3feed2c980460ad8, +0xbc91bbd1d3bcbb15, 0x3feed503b23e255d, +0x3c5986178980fce0, 0x3feed74a8af46052, +0x3c90cc319cee31d2, 0x3feed99e1330b358, +0xbc89472975b1f2a5, 0x3feedbfe53c12e59, +0x3c8469846e735ab3, 0x3feede6b5579fdbf, +0x3c7d8157a34b7e7f, 0x3feee0e521356eba, +0xbc82dfcd978e9db4, 0x3feee36bbfd3f37a, +0x3c8c8a4e231ebb7d, 0x3feee5ff3a3c2774, +0x3c8c1a7792cb3387, 0x3feee89f995ad3ad, +0xbc888c8d11a142e5, 0x3feeeb4ce622f2ff, +0xbc907b8f4ad1d9fa, 0x3feeee07298db666, +0x3c889c2ea41433c7, 0x3feef0ce6c9a8952, +0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb, +0xbc7274aedac8ff80, 0x3feef68415b749b1, +0xbc90a40e3da6f640, 0x3feef9728de5593a, +0x3c85c620ce76df06, 0x3feefc6e29f1c52a, +0xbc68d6f438ad9334, 0x3feeff76f2fb5e47, +0xbc8fda52e1b51e41, 0x3fef028cf22749e4, +0xbc91eee26b588a35, 0x3fef05b030a1064a, +0xbc32141a7b3e2cd8, 0x3fef08e0b79a6f1f, +0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2, +0xbc302899507554e5, 0x3fef0f69c3f3a207, +0xbc91bdfbfa9298ac, 0x3fef12c25bd71e09, +0xbc80dda2d4c0010c, 0x3fef16286141b33d, +0x3c736eae30af0cb3, 0x3fef199bdd85529c, +0xbc8a007daadf8d68, 0x3fef1d1cd9fa652c, +0x3c8ee3325c9ffd94, 0x3fef20ab5fffd07a, +0x3c836909391181d3, 0x3fef244778fafb22, +0x3c84e08fd10959ac, 0x3fef27f12e57d14b, +0xbc811cd7dbdf9547, 0x3fef2ba88988c933, +0x3c63cdaf384e1a67, 0x3fef2f6d9406e7b5, +0xbc7ac28b7bef6621, 0x3fef33405751c4db, +0x3c676b2c6c921968, 0x3fef3720dcef9069, +0xbc7030587207b9e1, 0x3fef3b0f2e6d1675, +0xbc808a1883ccb5d2, 0x3fef3f0b555dc3fa, +0xbc8cc734592af7fc, 0x3fef43155b5bab74, +0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c, +0x3c87752a44f587e8, 0x3fef4b532b08c968, +0xbc900dae3875a949, 0x3fef4f87080d89f2, +0x3c85b66fefeef52e, 0x3fef53c8eacaa1d6, +0x3c74a385a63d07a7, 0x3fef5818dcfba487, +0x3c5159d9d908a96e, 0x3fef5c76e862e6d3, +0xbc82919e2040220f, 0x3fef60e316c98398, +0x3c8c254d16117a68, 0x3fef655d71ff6075, +0x3c8e5a50d5c192ac, 0x3fef69e603db3285, +0xbc8d8c329fbd0e03, 0x3fef6e7cd63a8315, +0x3c843a59ac016b4b, 0x3fef7321f301b460, +0xbc8ea6e6fbd5f2a6, 0x3fef77d5641c0658, +0xbc82d52107b43e1f, 0x3fef7c97337b9b5f, +0xbc63e8e3eab2cbb4, 0x3fef81676b197d17, +0xbc892ab93b470dc9, 0x3fef864614f5a129, +0xbc8b7966cd0d2cd9, 0x3fef8b333b16ee12, +0x3c74b604603a88d3, 0x3fef902ee78b3ff6, +0xbc776caa4c2ff1cf, 0x3fef953924676d76, +0x3c83c5ec519d7271, 0x3fef9a51fbc74c83, +0xbc81d5fc525d9940, 0x3fef9f7977cdb740, +0xbc8ff7128fd391f0, 0x3fefa4afa2a490da, +0x3c855cd8aaea3d21, 0x3fefa9f4867cca6e, +0xbc8dae98e223747d, 0x3fefaf482d8e67f1, +0x3c8269947c2bed4a, 0x3fefb4aaa2188510, +0x3c8ec3bc41aa2008, 0x3fefba1bee615a27, +0xbc83b6137e9afe9e, 0x3fefbf9c1cb6412a, +0x3c842b94c3a9eb32, 0x3fefc52b376bba97, +0xbc69fa74878ba7c7, 0x3fefcac948dd7274, +0x3c8a64a931d185ee, 0x3fefd0765b6e4540, +0x3c901f3a75ee0efe, 0x3fefd632798844f8, +0xbc8e37bae43be3ed, 0x3fefdbfdad9cbe14, +0xbc516a9ce6ed84fa, 0x3fefe1d802243c89, +0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8, +0xbc699c7db2effc76, 0x3fefedba3692d514, +0x3c5305c14160cc89, 0x3feff3c22b8f71f1, +0x3c64b458677f9840, 0x3feff9d96b2a23d9, +#elif N == 512 +0x0, 0x3ff0000000000000, +0xbc75d87ade1f60d5, 0x3feffd8c86da1c0a, +0xbc84e82fc61851ac, 0x3feffb1afa5abcbf, +0x3c9bffdaa7ac4bac, 0x3feff8ab5b2cbd11, +0x3c9b3b4f1a88bf6e, 0x3feff63da9fb3335, +0x3c75c18e5ae0563a, 0x3feff3d1e77170b4, +0xbc82985dd8521d32, 0x3feff168143b0281, +0xbc705b1125cf49a5, 0x3fefef003103b10e, +0xbc7160139cd8dc5d, 0x3fefec9a3e778061, +0x3c9f879abbff3f87, 0x3fefea363d42b027, +0x3c651e617061bfbd, 0x3fefe7d42e11bbcc, +0x3c9b14003824712a, 0x3fefe57411915a8a, +0xbc905e7a108766d1, 0x3fefe315e86e7f85, +0x3c61cbf0f38af658, 0x3fefe0b9b35659d8, +0x3c845fad437fa426, 0x3fefde5f72f654b1, +0xbc9a3316383dcbc5, 0x3fefdc0727fc1762, +0x3c8cd2523567f613, 0x3fefd9b0d3158574, +0x3c9901c9e0e797fd, 0x3fefd75c74f0bec2, +0xbc954529642b232f, 0x3fefd50a0e3c1f89, +0xbc89b3236d111646, 0x3fefd2b99fa6407c, +0xbc8bce8023f98efa, 0x3fefd06b29ddf6de, +0xbc8cb191be99b1b0, 0x3fefce1ead925493, +0x3c8293708ef5c32e, 0x3fefcbd42b72a836, +0xbc9acb71e83765b7, 0x3fefc98ba42e7d30, +0x3c60f74e61e6c861, 0x3fefc74518759bc8, +0x3c5cd3e58b03697e, 0x3fefc50088f8093f, +0xbc95b9280905b2a4, 0x3fefc2bdf66607e0, +0xbc8bfb07d4755452, 0x3fefc07d61701716, +0x3c90a3e45b33d399, 0x3fefbe3ecac6f383, +0x3c8aedeb3e7b14cd, 0x3fefbc02331b9715, +0x3c84f31f32c4b7e7, 0x3fefb9c79b1f3919, +0x3c9a8eb1f3d914b4, 0x3fefb78f03834e52, +0x3c979aa65d837b6d, 0x3fefb5586cf9890f, +0xbc85b9eb0402507b, 0x3fefb323d833d93f, +0x3c9407fb30d06420, 0x3fefb0f145e46c85, +0xbc93f0f225bbf3ee, 0x3fefaec0b6bdae53, +0x3c8eb51a92fdeffc, 0x3fefac922b7247f7, +0xbc9c3fe7282d1784, 0x3fefaa65a4b520ba, +0xbc9a5d04b3b9911b, 0x3fefa83b23395dec, +0x3c9c8be44bf4cde8, 0x3fefa612a7b26300, +0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2, +0x3c820c5444c93c44, 0x3fefa1c7c55189c6, +0xbc937a01f0739546, 0x3fef9fa55fdfa9c5, +0xbc84c6baeb580d7a, 0x3fef9d8503328e6d, +0xbc6a033489906e0b, 0x3fef9b66affed31b, +0x3c8657aa1b0d9f83, 0x3fef994a66f951ce, +0x3c8b8268b04ef0a5, 0x3fef973028d7233e, +0x3c62f2c7fd6ee145, 0x3fef9517f64d9ef1, +0xbc9556522a2fbd0e, 0x3fef9301d0125b51, +0xbc6b0b2789925e90, 0x3fef90edb6db2dc1, +0xbc9ac46e44a2ebcc, 0x3fef8edbab5e2ab6, +0xbc93aad17d197fae, 0x3fef8ccbae51a5c8, +0xbc5080ef8c4eea55, 0x3fef8abdc06c31cc, +0xbc989c464a07ad70, 0x3fef88b1e264a0e9, +0xbc65704e90c9f860, 0x3fef86a814f204ab, +0xbc72c338fce197f4, 0x3fef84a058cbae1e, +0xbc91c923b9d5f416, 0x3fef829aaea92de0, +0xbc6dca724cea0eb6, 0x3fef809717425438, +0xbc897cea57e46280, 0x3fef7e95934f312e, +0x3c464770b955d34d, 0x3fef7c962388149e, +0x3c80d3e3e95c55af, 0x3fef7a98c8a58e51, +0xbc962811c114424f, 0x3fef789d83606e12, +0x3c56f01429e2b9d2, 0x3fef76a45471c3c2, +0x3c8ec58e74904dd4, 0x3fef74ad3c92df73, +0xbc801b15eaa59348, 0x3fef72b83c7d517b, +0x3c8d63b0ab2d5bbf, 0x3fef70c554eaea89, +0x3c6e653b2459034b, 0x3fef6ed48695bbc0, +0xbc9ca9effbeeac92, 0x3fef6ce5d23816c9, +0xbc8f1ff055de323d, 0x3fef6af9388c8dea, +0x3c8bda920de0f6e2, 0x3fef690eba4df41f, +0x3c92cc7ea345b7dc, 0x3fef672658375d2f, +0xbc9a597f9a5ff71c, 0x3fef654013041dc2, +0x3c8b898c3f1353bf, 0x3fef635beb6fcb75, +0x3c50835b125aa573, 0x3fef6179e2363cf8, +0x3c957bfb2876ea9e, 0x3fef5f99f8138a1c, +0x3c8aaa13d61aec1f, 0x3fef5dbc2dc40bf0, +0xbc96d99c7611eb26, 0x3fef5be084045cd4, +0x3c8a4f81aa7110bd, 0x3fef5a06fb91588f, +0x3c8cdc1873af2155, 0x3fef582f95281c6b, +0xbc6817fd6a313e3e, 0x3fef565a51860746, +0x3c9aecf73e3a2f60, 0x3fef54873168b9aa, +0xbc96236af85fd26a, 0x3fef52b6358e15e8, +0xbc9493684653a131, 0x3fef50e75eb44027, +0x3c7795eb4523abe7, 0x3fef4f1aad999e82, +0xbc8fe782cb86389d, 0x3fef4d5022fcd91d, +0x3c8fe58b91b40095, 0x3fef4b87bf9cda38, +0xbc98e2899077520a, 0x3fef49c18438ce4d, +0x3c91ecaa860c614a, 0x3fef47fd7190241e, +0x3c8a6f4144a6c38d, 0x3fef463b88628cd6, +0xbc3e45c83ba0bbcb, 0x3fef447bc96ffc18, +0x3c9120fcd4f59273, 0x3fef42be3578a819, +0xbc29fd3bea07b4ee, 0x3fef4102cd3d09b9, +0x3c807a05b0e4047d, 0x3fef3f49917ddc96, +0x3c87f1c7350e256d, 0x3fef3d9282fc1f27, +0x3c89b788c188c9b8, 0x3fef3bdda27912d1, +0x3c420dac6c124f4f, 0x3fef3a2af0b63bff, +0x3c968efde3a8a894, 0x3fef387a6e756238, +0xbc99501d09bc09fd, 0x3fef36cc1c78903a, +0x3c877afbca90ef84, 0x3fef351ffb82140a, +0x3c73baf864dc8675, 0x3fef33760c547f15, +0x3c875e18f274487d, 0x3fef31ce4fb2a63f, +0x3c91b0575c1eaf54, 0x3fef3028c65fa1ff, +0x3c91512f082876ee, 0x3fef2e85711ece75, +0xbc90364bc9ce33ab, 0x3fef2ce450b3cb82, +0x3c80472b981fe7f2, 0x3fef2b4565e27cdd, +0xbc7548165d85ed32, 0x3fef29a8b16f0a30, +0x3c9a02f0c7d75ec6, 0x3fef280e341ddf29, +0x3c7c3b977a68e32c, 0x3fef2675eeb3ab98, +0xbc96b87b3f71085e, 0x3fef24dfe1f56381, +0xbc93a255f697ecfe, 0x3fef234c0ea83f36, +0xbc803297e78260bf, 0x3fef21ba7591bb70, +0x3c8d2d19edc1e550, 0x3fef202b17779965, +0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1, +0xbc76b2173113dd8c, 0x3fef1d130f50d65c, +0xbc95b77e5ccd9fbf, 0x3fef1b8a66d10f13, +0x3c811aa5f853590b, 0x3fef1a03fc675d1f, +0xbc3d219b1a6fbffa, 0x3fef187fd0dad990, +0x3c61d61a34c8aa02, 0x3fef16fde4f2e280, +0xbc91e75c40b4251e, 0x3fef157e39771b2f, +0xbc91f892bf6b286d, 0x3fef1400cf2f6c18, +0x3c8b3782720c0ab4, 0x3fef1285a6e4030b, +0x3c7590c65c20e680, 0x3fef110cc15d5346, +0x3c98a911f1f7785a, 0x3fef0f961f641589, +0x3c86fe320b5c1e9d, 0x3fef0e21c1c14833, +0x3c6e149289cecb8f, 0x3fef0cafa93e2f56, +0xbc903cd8b2f25790, 0x3fef0b3fd6a454d2, +0xbc61e7c998db7dbb, 0x3fef09d24abd886b, +0x3c7b3bf786a54a87, 0x3fef08670653dfe4, +0x3c834d754db0abb6, 0x3fef06fe0a31b715, +0x3c74bb6c41732885, 0x3fef05975721b004, +0x3c85425c11faadf4, 0x3fef0432edeeb2fd, +0xbc99d7399abb9a8b, 0x3fef02d0cf63eeac, +0x3c864201e2ac744c, 0x3fef0170fc4cd831, +0xbc5451d60c6ac9eb, 0x3fef001375752b40, +0xbc979517a03e2847, 0x3feefeb83ba8ea32, +0x3c8787a210ceafd9, 0x3feefd5f4fb45e20, +0x3c8fdd395dd3f84a, 0x3feefc08b26416ff, +0xbc888d1e4629943d, 0x3feefab46484ebb4, +0xbc800e2a46da4bee, 0x3feef96266e3fa2d, +0xbc93369c544088b6, 0x3feef812ba4ea77d, +0xbc86a3803b8e5b04, 0x3feef6c55f929ff1, +0x3c85373ce4eb6dfb, 0x3feef57a577dd72b, +0xbc87430803972b34, 0x3feef431a2de883b, +0x3c83adec8265a67f, 0x3feef2eb428335b4, +0xbc924aedcc4b5068, 0x3feef1a7373aa9cb, +0xbc835388bcac6bc5, 0x3feef06581d3f669, +0xbc954de30ae02d94, 0x3feeef26231e754a, +0x3c727cdb4e4b6640, 0x3feeede91be9c811, +0xbc9907f81b512d8e, 0x3feeecae6d05d866, +0x3c86c2696a26af35, 0x3feeeb761742d808, +0xbc94f2487e1c03ec, 0x3feeea401b7140ef, +0x3c888f6ff06b979a, 0x3feee90c7a61d55b, +0xbc71d1e83e9436d2, 0x3feee7db34e59ff7, +0xbc89d5efaabc2030, 0x3feee6ac4bcdf3ea, +0x3c914a5432fcb2f4, 0x3feee57fbfec6cf4, +0xbc76b8867f91c9d6, 0x3feee4559212ef89, +0xbc991919b3ce1b15, 0x3feee32dc313a8e5, +0x3c94c9c0b5157fe6, 0x3feee20853c10f28, +0x3c79c3bba5562a2f, 0x3feee0e544ede173, +0xbc62455345b51c8e, 0x3feedfc4976d27fa, +0x3c859f48a72a4c6d, 0x3feedea64c123422, +0xbc93331de45477d0, 0x3feedd8a63b0a09b, +0xbc85a71612e21658, 0x3feedc70df1c5175, +0xbc95f84d39b39b16, 0x3feedb59bf29743f, +0xbc9312607a28698a, 0x3feeda4504ac801c, +0xbc72ba4dc7c4d562, 0x3feed932b07a35df, +0x3c86421f6f1d24d6, 0x3feed822c367a024, +0xbc844f25dc02691f, 0x3feed7153e4a136a, +0xbc58a78f4817895b, 0x3feed60a21f72e2a, +0xbc888d328eb9b501, 0x3feed5016f44d8f5, +0xbc9348a6815fce65, 0x3feed3fb2709468a, +0x3c7f0bec42ddb15a, 0x3feed2f74a1af3f1, +0xbc7c2c9b67499a1b, 0x3feed1f5d950a897, +0xbc615f0a2b9cd452, 0x3feed0f6d5817663, +0x3c835c43984d9871, 0x3feecffa3f84b9d4, +0xbc8c2e465a919e1d, 0x3feecf0018321a1a, +0x3c4363ed60c2ac11, 0x3feece086061892d, +0xbc865dfd02bd08f1, 0x3feecd1318eb43ec, +0xbc632afc8d9473a0, 0x3feecc2042a7d232, +0xbc8e68cec89b1762, 0x3feecb2fde7006f4, +0x3c9666093b0664ef, 0x3feeca41ed1d0057, +0xbc48ae858eb682ca, 0x3feec9566f8827d0, +0xbc95fc5e44de020e, 0x3feec86d668b3237, +0x3c5dd71277c0915f, 0x3feec786d3001fe5, +0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0, +0x3c92001325ecd7fb, 0x3feec5c10fa920a1, +0xbc7ea0148327c42f, 0x3feec4e1e192aed2, +0x3c65ace6e2870332, 0x3feec4052c5916c4, +0x3c93ff8e3f0f1230, 0x3feec32af0d7d3de, +0xbc9595c55690ffaf, 0x3feec2532feaada6, +0xbc7a843ad1a88022, 0x3feec17dea6db7d7, +0xbc8b401ba9fb5199, 0x3feec0ab213d5283, +0x3c7690cebb7aafb0, 0x3feebfdad5362a27, +0x3c6df82bf324cc57, 0x3feebf0d073537ca, +0x3c892ca3bf144e63, 0x3feebe41b817c114, +0x3c97cae38641c7bb, 0x3feebd78e8bb586b, +0x3c931dbdeb54e077, 0x3feebcb299fddd0d, +0x3c62d80c5c4a2b67, 0x3feebbeeccbd7b2a, +0xbc902c99b04aa8b0, 0x3feebb2d81d8abff, +0x3c8f39c10d12eaf0, 0x3feeba6eba2e35f0, +0xbc8f94340071a38e, 0x3feeb9b2769d2ca7, +0xbc80b582d74a55d9, 0x3feeb8f8b804f127, +0x3c73e34f67e67118, 0x3feeb8417f4531ee, +0xbc6b4e327ff434ca, 0x3feeb78ccd3deb0d, +0xbc87deccdc93a349, 0x3feeb6daa2cf6642, +0xbc592dca38593e20, 0x3feeb62b00da3b14, +0xbc75a3b1197ba0f0, 0x3feeb57de83f4eef, +0xbc85daca9994833e, 0x3feeb4d359dfd53d, +0xbc78dec6bd0f385f, 0x3feeb42b569d4f82, +0xbc980b4321bc6dae, 0x3feeb385df598d78, +0x3c81bd2888075068, 0x3feeb2e2f4f6ad27, +0xbc8390afec5241c5, 0x3feeb24298571b06, +0xbc861246ec7b5cf6, 0x3feeb1a4ca5d920f, +0x3c8f15cdafe7d586, 0x3feeb1098bed1bdf, +0xbc896be8ae89ef8f, 0x3feeb070dde910d2, +0xbc910aa91ae9b67f, 0x3feeafdac1351819, +0x3c93350518fdd78e, 0x3feeaf4736b527da, +0x3c957e1b67462375, 0x3feeaeb63f4d854c, +0xbc88e6ac90348602, 0x3feeae27dbe2c4cf, +0x3c8124d5051552a7, 0x3feead9c0d59ca07, +0x3c7b98b72f8a9b05, 0x3feead12d497c7fd, +0xbc3ca103952ecf1f, 0x3feeac8c32824135, +0xbc91af7f1365c3ac, 0x3feeac0827ff07cc, +0x3c773345c02a4fd6, 0x3feeab86b5f43d92, +0x3c9063e1e21c5409, 0x3feeab07dd485429, +0xbc909d2a0fce20f2, 0x3feeaa8b9ee20d1e, +0xbc943a3540d1898a, 0x3feeaa11fba87a03, +0xbc924f2cb4f81746, 0x3feea99af482fc8f, +0x3c34c7855019c6ea, 0x3feea9268a5946b7, +0xbc943592a0a9846b, 0x3feea8b4be135acc, +0xbc951f58ddaa8090, 0x3feea84590998b93, +0xbc956bc85d444f4f, 0x3feea7d902d47c65, +0x3c9432e62b64c035, 0x3feea76f15ad2148, +0x3c914d1e4218319f, 0x3feea707ca0cbf0f, +0xbc82e1648e50a17c, 0x3feea6a320dceb71, +0x3c971c93709313f4, 0x3feea6411b078d26, +0xbc8ce44a6199769f, 0x3feea5e1b976dc09, +0x3c7f88303b60d222, 0x3feea584fd15612a, +0x3c95f30eda98a575, 0x3feea52ae6cdf6f4, +0x3c70125ca18d4b5b, 0x3feea4d3778bc944, +0xbc8c33c53bef4da8, 0x3feea47eb03a5585, +0x3c9592ea73798b11, 0x3feea42c91c56acd, +0x3c917ecda8a72159, 0x3feea3dd1d1929fd, +0xbc9371d6d7d75739, 0x3feea390532205d8, +0xbc845378892be9ae, 0x3feea34634ccc320, +0xbc8ac05fd996f807, 0x3feea2fec30678b7, +0xbc9345f3cee1ae6e, 0x3feea2b9febc8fb7, +0xbc91f5067d03653a, 0x3feea277e8dcc390, +0xbc93cedd78565858, 0x3feea23882552225, +0x3c917339c86ce3ad, 0x3feea1fbcc140be7, +0xbc85c33fdf910406, 0x3feea1c1c70833f6, +0xbc77e66065ba2500, 0x3feea18a7420a036, +0x3c5710aa807e1964, 0x3feea155d44ca973, +0x3c964c827ee6b49a, 0x3feea123e87bfb7a, +0x3c81079ab5789604, 0x3feea0f4b19e9538, +0xbc928311a3c73480, 0x3feea0c830a4c8d4, +0xbc93b3efbf5e2228, 0x3feea09e667f3bcd, +0x3c882c79e185e981, 0x3feea077541ee718, +0x3c727df161cd7778, 0x3feea052fa75173e, +0xbc8b48cea80b043b, 0x3feea0315a736c75, +0xbc6a12ad8734b982, 0x3feea012750bdabf, +0xbc4f4863bc8e5180, 0x3fee9ff64b30aa09, +0x3c93f9924a05b767, 0x3fee9fdcddd47645, +0x3c954835dd4b7548, 0x3fee9fc62dea2f8a, +0xbc6367efb86da9ee, 0x3fee9fb23c651a2f, +0xbc8bf41f59b59f8a, 0x3fee9fa10a38cee8, +0xbc87557939a8b5ef, 0x3fee9f9298593ae5, +0xbc8f652fde52775c, 0x3fee9f86e7ba9fef, +0xbc80dc3d54e08851, 0x3fee9f7df9519484, +0xbc7b0300defbcf98, 0x3fee9f77ce1303f6, +0x3c51ed2f56fa9d1a, 0x3fee9f7466f42e87, +0xbc89dab646035dc0, 0x3fee9f73c4eaa988, +0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74, +0xbc91f0c230588dde, 0x3fee9f7ad3ef9011, +0xbc88e67a9006c909, 0x3fee9f8286ead08a, +0x3c9106450507a28c, 0x3fee9f8d02d50b8f, +0xbc86ee4ac08b7db0, 0x3fee9f9a48a58174, +0xbc9129729a10f3a0, 0x3fee9faa5953c849, +0x3c86597566977ac8, 0x3fee9fbd35d7cbfd, +0x3c781a70a5124f67, 0x3fee9fd2df29ce7c, +0xbc8619321e55e68a, 0x3fee9feb564267c9, +0x3c941626ea62646d, 0x3feea0069c1a861d, +0x3c92c0b7028a5c3a, 0x3feea024b1ab6e09, +0xbc940b9f54365b7c, 0x3feea04597eeba8f, +0x3c909ccb5e09d4d3, 0x3feea0694fde5d3f, +0x3c873455e0e826c1, 0x3feea08fda749e5d, +0x3c8a30faf49cc78c, 0x3feea0b938ac1cf6, +0x3c94f006ad874e3e, 0x3feea0e56b7fcf03, +0xbc7b32dcb94da51d, 0x3feea11473eb0187, +0xbc8f6d693d0973bb, 0x3feea14652e958aa, +0xbc92dad3519d7b5b, 0x3feea17b0976cfdb, +0x3c58c5ee2b7e7848, 0x3feea1b2988fb9ec, +0x3c94ecfd5467c06b, 0x3feea1ed0130c132, +0xbc88b25e045d207b, 0x3feea22a4456e7a3, +0x3c87d51410fd15c2, 0x3feea26a62ff86f0, +0xbc69cb3314060ca7, 0x3feea2ad5e2850ac, +0x3c65ebe1abd66c55, 0x3feea2f336cf4e62, +0x3c87a0b15d19e0bb, 0x3feea33bedf2e1b9, +0xbc760a3629969871, 0x3feea3878491c491, +0x3c94aa7212bfa73c, 0x3feea3d5fbab091f, +0xbc88a1c52fb3cf42, 0x3feea427543e1a12, +0xbc81e688272a8a12, 0x3feea47b8f4abaa9, +0x3c8b18c6e3fdef5d, 0x3feea4d2add106d9, +0x3c4ab7b7112ec9d5, 0x3feea52cb0d1736a, +0xbc9369b6f13b3734, 0x3feea589994cce13, +0x3c8a1e274eed4476, 0x3feea5e968443d9a, +0x3c90ec1ddcb1390a, 0x3feea64c1eb941f7, +0x3c94a533a59324da, 0x3feea6b1bdadb46d, +0xbc805e843a19ff1e, 0x3feea71a4623c7ad, +0x3c7a56d2760d087d, 0x3feea785b91e07f1, +0xbc522cea4f3afa1e, 0x3feea7f4179f5b21, +0x3c91682c1c6e8b05, 0x3feea86562ab00ec, +0xbc94d450d872576e, 0x3feea8d99b4492ed, +0x3c89ea99cf7a9591, 0x3feea950c27004c2, +0x3c7c88549b958471, 0x3feea9cad931a436, +0xbc59e57d8f92ff8e, 0x3feeaa47e08e1957, +0x3c90ad675b0e8a00, 0x3feeaac7d98a6699, +0x3c909b176e05a9cd, 0x3feeab4ac52be8f7, +0x3c931143962f7877, 0x3feeabd0a478580f, +0x3c711607f1952c95, 0x3feeac597875c644, +0x3c8db72fc1f0eab4, 0x3feeace5422aa0db, +0x3c869608f0f86431, 0x3feead74029db01e, +0x3c93e9e96f112479, 0x3feeae05bad61778, +0xbc7f1ced15c5c5c0, 0x3feeae9a6bdb5598, +0xbc65b6609cc5e7ff, 0x3feeaf3216b5448c, +0x3c614b97be3f7b4e, 0x3feeafccbc6c19e6, +0xbc8dac42a4a38df0, 0x3feeb06a5e0866d9, +0x3c81c1701c359530, 0x3feeb10afc931857, +0x3c7bf68359f35f44, 0x3feeb1ae99157736, +0xbc8edb1bf6809287, 0x3feeb2553499284b, +0x3c8b99dd98b1ed84, 0x3feeb2fed0282c8a, +0xbc8ba58ce7a736d3, 0x3feeb3ab6ccce12c, +0xbc93091fa71e3d83, 0x3feeb45b0b91ffc6, +0xbc93fc025e1db9ce, 0x3feeb50dad829e70, +0xbc7885ad50cbb750, 0x3feeb5c353aa2fe2, +0xbc8d737c7d71382e, 0x3feeb67bff148396, +0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5, +0x3c6ae88c43905293, 0x3feeb7f669e2802b, +0xbc82d5e85f3e0301, 0x3feeb8b82b5f98e5, +0xbc93d1f7661fe51b, 0x3feeb97cf65253d1, +0xbc6c23f97c90b959, 0x3feeba44cbc8520f, +0x3c651b68797ffc1c, 0x3feebb0faccf9243, +0xbc51669428996971, 0x3feebbdd9a7670b3, +0x3c54579c5ceed70b, 0x3feebcae95cba768, +0xbc92434322f4f9aa, 0x3feebd829fde4e50, +0x3c87298413381667, 0x3feebe59b9bddb5b, +0x3c71f2b2c1c4c014, 0x3feebf33e47a22a2, +0xbc905000be64e965, 0x3feec01121235681, +0xbc85ca6cd7668e4b, 0x3feec0f170ca07ba, +0xbc89fb12e3454b73, 0x3feec1d4d47f2598, +0xbc9294f304f166b6, 0x3feec2bb4d53fe0d, +0x3c7be2a03697693b, 0x3feec3a4dc5a3dd3, +0x3c71affc2b91ce27, 0x3feec49182a3f090, +0x3c90622b15810eea, 0x3feec581414380f2, +0xbc8a1e58414c07d3, 0x3feec674194bb8d5, +0x3be9a5ecc875d327, 0x3feec76a0bcfc15e, +0x3c6dd235e10a73bb, 0x3feec86319e32323, +0x3c88ea486a3350ef, 0x3feec95f4499c647, +0xbc79740b58a20091, 0x3feeca5e8d07f29e, +0xbc7a2ee551d4c40f, 0x3feecb60f4424fcb, +0xbc87c50422622263, 0x3feecc667b5de565, +0x3c89c31f7e38028b, 0x3feecd6f23701b15, +0x3c9165830a2b96c2, 0x3feece7aed8eb8bb, +0xbc5fac13f4e005a3, 0x3feecf89dacfe68c, +0x3c8b1c86e3e231d5, 0x3feed09bec4a2d33, +0x3c7d8aced7162e89, 0x3feed1b1231475f7, +0xbc903d5cbe27874b, 0x3feed2c980460ad8, +0xbc848f50cea7269f, 0x3feed3e504f696b1, +0xbc91bbd1d3bcbb15, 0x3feed503b23e255d, +0x3c821eb9a08a0542, 0x3feed625893523d4, +0x3c5986178980fce0, 0x3feed74a8af46052, +0xbc6133a953131cfd, 0x3feed872b8950a73, +0x3c90cc319cee31d2, 0x3feed99e1330b358, +0x3c89e95e6f4a0ae4, 0x3feedacc9be14dca, +0xbc89472975b1f2a5, 0x3feedbfe53c12e59, +0xbc90260cf07cb311, 0x3feedd333beb0b7e, +0x3c8469846e735ab3, 0x3feede6b5579fdbf, +0x3c1bca400a7b939d, 0x3feedfa6a1897fd2, +0x3c7d8157a34b7e7f, 0x3feee0e521356eba, +0x3c9140bc34dfc19f, 0x3feee226d59a09ee, +0xbc82dfcd978e9db4, 0x3feee36bbfd3f37a, +0xbc8c9b1da461ab87, 0x3feee4b3e100301e, +0x3c8c8a4e231ebb7d, 0x3feee5ff3a3c2774, +0x3c8c115f23ebea8e, 0x3feee74dcca5a413, +0x3c8c1a7792cb3387, 0x3feee89f995ad3ad, +0xbc6dcab99f23f84e, 0x3feee9f4a17a4735, +0xbc888c8d11a142e5, 0x3feeeb4ce622f2ff, +0x3c60a43e8b7e4bfe, 0x3feeeca868742ee4, +0xbc907b8f4ad1d9fa, 0x3feeee07298db666, +0x3c915b1397075f04, 0x3feeef692a8fa8cd, +0x3c889c2ea41433c7, 0x3feef0ce6c9a8952, +0xbc839f7a1f04d2b0, 0x3feef236f0cf3f3a, +0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb, +0xbc86a510f31e13e6, 0x3feef511c43bbd62, +0xbc7274aedac8ff80, 0x3feef68415b749b1, +0xbc92887ea88e7340, 0x3feef7f9ade433c6, +0xbc90a40e3da6f640, 0x3feef9728de5593a, +0xbc6e57ac604759ba, 0x3feefaeeb6ddfc87, +0x3c85c620ce76df06, 0x3feefc6e29f1c52a, +0x3c8e6c6db4f83226, 0x3feefdf0e844bfc6, +0xbc68d6f438ad9334, 0x3feeff76f2fb5e47, +0xbc8d1bf10460dba0, 0x3fef01004b3a7804, +0xbc8fda52e1b51e41, 0x3fef028cf22749e4, +0x3c8e5d80813dddfc, 0x3fef041ce8e77680, +0xbc91eee26b588a35, 0x3fef05b030a1064a, +0x3c8caff9640f2dcb, 0x3fef0746ca7a67a7, +0xbc32141a7b3e2cd8, 0x3fef08e0b79a6f1f, +0x3c7a77557fd62db3, 0x3fef0a7df9285775, +0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2, +0xbc651ba6128db749, 0x3fef0dc27e2cb5e5, +0xbc302899507554e5, 0x3fef0f69c3f3a207, +0xbc7c0ffefdc5e251, 0x3fef111462c95b60, +0xbc91bdfbfa9298ac, 0x3fef12c25bd71e09, +0xbc8b6cd058bfd6fa, 0x3fef1473b0468d30, +0xbc80dda2d4c0010c, 0x3fef16286141b33d, +0x3c923759b8aca76d, 0x3fef17e06ff301f4, +0x3c736eae30af0cb3, 0x3fef199bdd85529c, +0xbc895498a73dac7d, 0x3fef1b5aab23e61e, +0xbc8a007daadf8d68, 0x3fef1d1cd9fa652c, +0x3c851de924583108, 0x3fef1ee26b34e065, +0x3c8ee3325c9ffd94, 0x3fef20ab5fffd07a, +0xbc8c5fe4051ba06c, 0x3fef2277b9881650, +0x3c836909391181d3, 0x3fef244778fafb22, +0xbc6d1816c0a9ac07, 0x3fef261a9f8630ad, +0x3c84e08fd10959ac, 0x3fef27f12e57d14b, +0xbc7af5c67c4e8235, 0x3fef29cb269e601f, +0xbc811cd7dbdf9547, 0x3fef2ba88988c933, +0xbc8304ef0045d575, 0x3fef2d89584661a1, +0x3c63cdaf384e1a67, 0x3fef2f6d9406e7b5, +0x3c8725f94f910375, 0x3fef31553dfa8313, +0xbc7ac28b7bef6621, 0x3fef33405751c4db, +0x3c7b53e99f9191e8, 0x3fef352ee13da7cb, +0x3c676b2c6c921968, 0x3fef3720dcef9069, +0xbc810a79e6d7e2b8, 0x3fef39164b994d23, +0xbc7030587207b9e1, 0x3fef3b0f2e6d1675, +0x3c840635f6d2a9c0, 0x3fef3d0b869d8f0f, +0xbc808a1883ccb5d2, 0x3fef3f0b555dc3fa, +0x3c549eeef9ec910c, 0x3fef410e9be12cb9, +0xbc8cc734592af7fc, 0x3fef43155b5bab74, +0xbc8335827ffb9dce, 0x3fef451f95018d17, +0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c, +0x3c645563980ef762, 0x3fef493e7ba2c38c, +0x3c87752a44f587e8, 0x3fef4b532b08c968, +0xbc8cd0205eb2aab2, 0x3fef4d6b596f948c, +0xbc900dae3875a949, 0x3fef4f87080d89f2, +0xbc8aab80ceab2b4a, 0x3fef51a638197a3c, +0x3c85b66fefeef52e, 0x3fef53c8eacaa1d6, +0xbc8f870f40a8ba1b, 0x3fef55ef2158a91f, +0x3c74a385a63d07a7, 0x3fef5818dcfba487, +0x3c83c119f18464c5, 0x3fef5a461eec14be, +0x3c5159d9d908a96e, 0x3fef5c76e862e6d3, +0xbc5a628c2be4e7c7, 0x3fef5eab3a99745b, +0xbc82919e2040220f, 0x3fef60e316c98398, +0xbc72550d76be719a, 0x3fef631e7e2d479d, +0x3c8c254d16117a68, 0x3fef655d71ff6075, +0xbc82090274667d12, 0x3fef679ff37adb4a, +0x3c8e5a50d5c192ac, 0x3fef69e603db3285, +0x3c75f7d28150cac4, 0x3fef6c2fa45c4dfd, +0xbc8d8c329fbd0e03, 0x3fef6e7cd63a8315, +0x3c890de9296f4cd1, 0x3fef70cd9ab294e4, +0x3c843a59ac016b4b, 0x3fef7321f301b460, +0x3c832ff9978b34bc, 0x3fef7579e065807d, +0xbc8ea6e6fbd5f2a6, 0x3fef77d5641c0658, +0xbc7303b63dda1980, 0x3fef7a347f63c159, +0xbc82d52107b43e1f, 0x3fef7c97337b9b5f, +0xbc81f2ba385f2f95, 0x3fef7efd81a2ece1, +0xbc63e8e3eab2cbb4, 0x3fef81676b197d17, +0x3c768d9144ae12fc, 0x3fef83d4f11f8220, +0xbc892ab93b470dc9, 0x3fef864614f5a129, +0x3c853687f542403b, 0x3fef88bad7dcee90, +0xbc8b7966cd0d2cd9, 0x3fef8b333b16ee12, +0xbc736ed2de40b407, 0x3fef8daf3fe592e8, +0x3c74b604603a88d3, 0x3fef902ee78b3ff6, +0xbc614ef56c770f3b, 0x3fef92b2334ac7ee, +0xbc776caa4c2ff1cf, 0x3fef953924676d76, +0x3c8df7d1353d8e88, 0x3fef97c3bc24e350, +0x3c83c5ec519d7271, 0x3fef9a51fbc74c83, +0xbc850bed64091b8a, 0x3fef9ce3e4933c7e, +0xbc81d5fc525d9940, 0x3fef9f7977cdb740, +0x3c89d852381c317f, 0x3fefa212b6bc3181, +0xbc8ff7128fd391f0, 0x3fefa4afa2a490da, +0x3c68a00e3cca04c4, 0x3fefa7503ccd2be5, +0x3c855cd8aaea3d21, 0x3fefa9f4867cca6e, +0xbc5a1f25ce94cae7, 0x3fefac9c80faa594, +0xbc8dae98e223747d, 0x3fefaf482d8e67f1, +0xbc6fb5f3ee307976, 0x3fefb1f78d802dc2, +0x3c8269947c2bed4a, 0x3fefb4aaa2188510, +0x3c737e8ae802b851, 0x3fefb7616ca06dd6, +0x3c8ec3bc41aa2008, 0x3fefba1bee615a27, +0x3c875119560e34af, 0x3fefbcda28a52e59, +0xbc83b6137e9afe9e, 0x3fefbf9c1cb6412a, +0xbc7431c3840929c6, 0x3fefc261cbdf5be7, +0x3c842b94c3a9eb32, 0x3fefc52b376bba97, +0xbc8cb472d2e86b99, 0x3fefc7f860a70c22, +0xbc69fa74878ba7c7, 0x3fefcac948dd7274, +0x3c83f5df2fde16a8, 0x3fefcd9df15b82ac, +0x3c8a64a931d185ee, 0x3fefd0765b6e4540, +0x3c8eef18336b62e3, 0x3fefd35288633625, +0x3c901f3a75ee0efe, 0x3fefd632798844f8, +0x3c80d23f87b50a2a, 0x3fefd916302bd526, +0xbc8e37bae43be3ed, 0x3fefdbfdad9cbe14, +0x3c8302dee657c8e6, 0x3fefdee8f32a4b45, +0xbc516a9ce6ed84fa, 0x3fefe1d802243c89, +0xbc7b0caa080df170, 0x3fefe4cadbdac61d, +0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8, +0x3c7617a9f2fd24e5, 0x3fefeabbf4c0ba54, +0xbc699c7db2effc76, 0x3fefedba3692d514, +0x3c75f103b8fd5ca7, 0x3feff0bc4866e8ad, +0x3c5305c14160cc89, 0x3feff3c22b8f71f1, +0x3c8e70b094fa075a, 0x3feff6cbe15f6314, +0x3c64b458677f9840, 0x3feff9d96b2a23d9, +0xbc72ec9a3e5d680a, 0x3feffceaca4391b6, +#endif }, }; diff --git a/libc/tinymath/exp_data.internal.h b/libc/tinymath/exp_data.internal.h deleted file mode 100644 index a0713d691..000000000 --- a/libc/tinymath/exp_data.internal.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef COSMOPOLITAN_LIBC_TINYMATH_EXP_DATA_H_ -#define COSMOPOLITAN_LIBC_TINYMATH_EXP_DATA_H_ - -#define EXP_TABLE_BITS 7 -#define EXP_POLY_ORDER 5 -#define EXP_USE_TOINT_NARROW 0 -#define EXP2_POLY_ORDER 5 - -COSMOPOLITAN_C_START_ - -extern const struct exp_data { - double invln2N; - double shift; - double negln2hiN; - double negln2loN; - double poly[4]; /* Last four coefficients. */ - double exp2_shift; - double exp2_poly[EXP2_POLY_ORDER]; - uint64_t tab[2 * (1 << EXP_TABLE_BITS)]; -} __exp_data; - -COSMOPOLITAN_C_END_ -#endif /* COSMOPOLITAN_LIBC_TINYMATH_EXP_DATA_H_ */ diff --git a/libc/tinymath/expf.c b/libc/tinymath/expf.c index 43f33219b..625133bac 100644 --- a/libc/tinymath/expf.c +++ b/libc/tinymath/expf.c @@ -1,9 +1,9 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -25,19 +25,9 @@ │ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/intrin/likely.h" -#include "libc/math.h" -#include "libc/tinymath/exp2f_data.internal.h" -#include "libc/tinymath/internal.h" +#include "libc/tinymath/arm.internal.h" __static_yoink("arm_optimized_routines_notice"); -/* - * Single-precision e^x function. - * - * Copyright (c) 2017-2018, Arm Limited. - * SPDX-License-Identifier: MIT - */ - /* EXP2F_TABLE_BITS = 5 EXP2F_POLY_ORDER = 3 @@ -53,59 +43,79 @@ Non-nearest ULP error: 1 (rounded ULP error) #define T __exp2f_data.tab #define C __exp2f_data.poly_scaled -static inline uint32_t top12(float x) +static inline uint32_t +top12 (float x) { - return asuint(x) >> 20; + return asuint (x) >> 20; } /** * Returns 𝑒^x. + * + * - ULP error: 0.502 (nearest rounding.) + * - Relative error: 1.69 * 2^-34 in [-ln2/64, ln2/64] (before rounding.) + * - Wrong count: 170635 (all nearest rounding wrong results with fma.) + * - Non-nearest ULP error: 1 (rounded ULP error) + * + * @raise ERANGE on overflow or underflow */ -float expf(float x) +float +expf (float x) { - uint32_t abstop; - uint64_t ki, t; - double_t kd, xd, z, r, r2, y, s; + uint32_t abstop; + uint64_t ki, t; + /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ + double_t kd, xd, z, r, r2, y, s; - xd = (double_t)x; - abstop = top12(x) & 0x7ff; - if (UNLIKELY(abstop >= top12(88.0f))) { - /* |x| >= 88 or x is nan. */ - if (asuint(x) == asuint(-INFINITY)) - return 0.0f; - if (abstop >= top12(INFINITY)) - return x + x; - if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */ - return __math_oflowf(0); - if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */ - return __math_uflowf(0); - } + xd = (double_t) x; + abstop = top12 (x) & 0x7ff; + if (unlikely (abstop >= top12 (88.0f))) + { + /* |x| >= 88 or x is nan. */ + if (asuint (x) == asuint (-INFINITY)) + return 0.0f; + if (abstop >= top12 (INFINITY)) + return x + x; + if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */ + return __math_oflowf (0); + if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */ + return __math_uflowf (0); +#if WANT_ERRNO_UFLOW + if (x < -0x1.9d1d9ep6f) /* x < log(0x1p-149) ~= -103.28 */ + return __math_may_uflowf (0); +#endif + } - /* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k. */ - z = InvLn2N * xd; + /* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k. */ + z = InvLn2N * xd; - /* Round and convert z to int, the result is in [-150*N, 128*N] and - ideally ties-to-even rule is used, otherwise the magnitude of r - can be bigger which gives larger approximation error. */ + /* Round and convert z to int, the result is in [-150*N, 128*N] and + ideally nearest int is used, otherwise the magnitude of r can be + bigger which gives larger approximation error. */ #if TOINT_INTRINSICS - kd = roundtoint(z); - ki = converttoint(z); + kd = roundtoint (z); + ki = converttoint (z); #else # define SHIFT __exp2f_data.shift - kd = eval_as_double(z + SHIFT); - ki = asuint64(kd); - kd -= SHIFT; + kd = eval_as_double (z + SHIFT); + ki = asuint64 (kd); + kd -= SHIFT; #endif - r = z - kd; + r = z - kd; - /* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */ - t = T[ki % N]; - t += ki << (52 - EXP2F_TABLE_BITS); - s = asdouble(t); - z = C[0] * r + C[1]; - r2 = r * r; - y = C[2] * r + 1; - y = z * r2 + y; - y = y * s; - return eval_as_float(y); + /* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */ + t = T[ki % N]; + t += ki << (52 - EXP2F_TABLE_BITS); + s = asdouble (t); + z = C[0] * r + C[1]; + r2 = r * r; + y = C[2] * r + 1; + y = z * r2 + y; + y = y * s; + return eval_as_float (y); } + +#if USE_GLIBC_ABI +strong_alias (expf, __expf_finite) +hidden_alias (expf, __ieee754_expf) +#endif diff --git a/libc/tinymath/frexpl.c b/libc/tinymath/frexpl.c index 45b5bbcac..619442c45 100644 --- a/libc/tinymath/frexpl.c +++ b/libc/tinymath/frexpl.c @@ -29,7 +29,6 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/freebsd.internal.h" - __static_yoink("freebsd_libm_notice"); #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) diff --git a/libc/tinymath/hypot.c b/libc/tinymath/hypot.c index 37ba5b507..fbc6b7737 100644 --- a/libc/tinymath/hypot.c +++ b/libc/tinymath/hypot.c @@ -28,7 +28,6 @@ #include "libc/math.h" __static_yoink("musl_libc_notice"); - #if FLT_EVAL_METHOD > 1U && LDBL_MANT_DIG == 64 #define SPLIT (0x1p32 + 1) #else diff --git a/libc/tinymath/hypotf.c b/libc/tinymath/hypotf.c index 4692b97b7..22ab375a3 100644 --- a/libc/tinymath/hypotf.c +++ b/libc/tinymath/hypotf.c @@ -28,7 +28,11 @@ #include "libc/math.h" __static_yoink("musl_libc_notice"); - +/** + * Returns euclidean distance. + * + * Max observed error is 1 ulp. + */ float hypotf(float x, float y) { union {float f; uint32_t i;} ux = {x}, uy = {y}, ut; diff --git a/libc/tinymath/hypotf2.c b/libc/tinymath/hypotf2.c new file mode 100644 index 000000000..734884e43 --- /dev/null +++ b/libc/tinymath/hypotf2.c @@ -0,0 +1,175 @@ +/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ +│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +╚──────────────────────────────────────────────────────────────────────────────╝ +│ │ +│ FreeBSD lib/msun/src/e_hypotf.c │ +│ Copyright (c) 1992-2023 The FreeBSD Project. │ +│ │ +│ Redistribution and use in source and binary forms, with or without │ +│ modification, are permitted provided that the following conditions │ +│ are met: │ +│ 1. Redistributions of source code must retain the above copyright │ +│ notice, this list of conditions and the following disclaimer. │ +│ 2. Redistributions in binary form must reproduce the above copyright │ +│ notice, this list of conditions and the following disclaimer in the │ +│ documentation and/or other materials provided with the distribution. │ +│ │ +│ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND │ +│ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE │ +│ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE │ +│ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE │ +│ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL │ +│ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS │ +│ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) │ +│ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT │ +│ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY │ +│ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF │ +│ SUCH DAMAGE. │ +│ │ +│ Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. │ +│ │ +│ Developed at SunPro, a Sun Microsystems, Inc. business. │ +│ Permission to use, copy, modify, and distribute this │ +│ software is freely granted, provided that this notice │ +│ is preserved. │ +│ │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/tinymath/freebsd.internal.h" +__static_yoink("freebsd_libm_notice"); +__static_yoink("fdlibm_notice"); + +static const float one = 1.0, tiny=1.0e-30; + +float +sqrtf2(float x) +{ + float z; + int32_t sign = (int)0x80000000; + int32_t ix,s,q,m,t,i; + uint32_t r; + + GET_FLOAT_WORD(ix,x); + + /* take care of Inf and NaN */ + if((ix&0x7f800000)==0x7f800000) { + return x*x+x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf + sqrt(-inf)=sNaN */ + } + /* take care of zero */ + if(ix<=0) { + if((ix&(~sign))==0) return x;/* sqrt(+-0) = +-0 */ + else if(ix<0) + return (x-x)/(x-x); /* sqrt(-ve) = sNaN */ + } + /* normalize x */ + m = (ix>>23); + if(m==0) { /* subnormal x */ + for(i=0;(ix&0x00800000)==0;i++) ix<<=1; + m -= i-1; + } + m -= 127; /* unbias exponent */ + ix = (ix&0x007fffff)|0x00800000; + if(m&1) /* odd m, double x to make it even */ + ix += ix; + m >>= 1; /* m = [m/2] */ + + /* generate sqrt(x) bit by bit */ + ix += ix; + q = s = 0; /* q = sqrt(x) */ + r = 0x01000000; /* r = moving bit from right to left */ + + while(r!=0) { + t = s+r; + if(t<=ix) { + s = t+r; + ix -= t; + q += r; + } + ix += ix; + r>>=1; + } + + /* use floating add to find out rounding direction */ + if(ix!=0) { + z = one-tiny; /* trigger inexact flag */ + if (z>=one) { + z = one+tiny; + if (z>one) + q += 2; + else + q += (q&1); + } + } + ix = (q>>1)+0x3f000000; + ix += ((uint32_t)m <<23); + SET_FLOAT_WORD(z,ix); + return z; +} + +/** + * Returns euclidean distance. + * + * Error is less than 1 ULP. + */ +float +hypotf2(float x, float y) +{ + float a,b,t1,t2,y1,y2,w; + int32_t j,k,ha,hb; + + GET_FLOAT_WORD(ha,x); + ha &= 0x7fffffff; + GET_FLOAT_WORD(hb,y); + hb &= 0x7fffffff; + if(hb > ha) {a=y;b=x;j=ha; ha=hb;hb=j;} else {a=x;b=y;} + a = fabsf(a); + b = fabsf(b); + if((ha-hb)>0xf000000) {return a+b;} /* x/y > 2**30 */ + k=0; + if(ha > 0x58800000) { /* a>2**50 */ + if(ha >= 0x7f800000) { /* Inf or NaN */ + /* Use original arg order iff result is NaN; quieten sNaNs. */ + w = fabsl(x+0.0L)-fabsf(y+0); + if(ha == 0x7f800000) w = a; + if(hb == 0x7f800000) w = b; + return w; + } + /* scale a and b by 2**-68 */ + ha -= 0x22000000; hb -= 0x22000000; k += 68; + SET_FLOAT_WORD(a,ha); + SET_FLOAT_WORD(b,hb); + } + if(hb < 0x26800000) { /* b < 2**-50 */ + if(hb <= 0x007fffff) { /* subnormal b or 0 */ + if(hb==0) return a; + SET_FLOAT_WORD(t1,0x7e800000); /* t1=2^126 */ + b *= t1; + a *= t1; + k -= 126; + } else { /* scale a and b by 2^68 */ + ha += 0x22000000; /* a *= 2^68 */ + hb += 0x22000000; /* b *= 2^68 */ + k -= 68; + SET_FLOAT_WORD(a,ha); + SET_FLOAT_WORD(b,hb); + } + } + /* medium size a and b */ + w = a-b; + if (w>b) { + SET_FLOAT_WORD(t1,ha&0xfffff000); + t2 = a-t1; + w = sqrtf2(t1*t1-(b*(-b)-t2*(a+t1))); + } else { + a = a+a; + SET_FLOAT_WORD(y1,hb&0xfffff000); + y2 = b - y1; + SET_FLOAT_WORD(t1,(ha+0x00800000)&0xfffff000); + t2 = a - t1; + w = sqrtf2(t1*y1-(w*(-w)-(t1*y2+t2*b))); + } + if(k!=0) { + SET_FLOAT_WORD(t1,(127+k)<<23); + return t1*w; + } else return w; +} diff --git a/libc/tinymath/hypotl.c b/libc/tinymath/hypotl.c index a45fd7ed4..a569d3239 100644 --- a/libc/tinymath/hypotl.c +++ b/libc/tinymath/hypotl.c @@ -30,7 +30,6 @@ #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) __static_yoink("musl_libc_notice"); - #if LDBL_MANT_DIG == 64 #define SPLIT (0x1p32L+1) #elif LDBL_MANT_DIG == 113 diff --git a/libc/tinymath/log.c b/libc/tinymath/log.c index e793dd923..662d1e427 100644 --- a/libc/tinymath/log.c +++ b/libc/tinymath/log.c @@ -1,9 +1,9 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -25,19 +25,9 @@ │ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/intrin/likely.h" -#include "libc/math.h" -#include "libc/tinymath/internal.h" -#include "libc/tinymath/log_data.internal.h" +#include "libc/tinymath/arm.internal.h" __static_yoink("arm_optimized_routines_notice"); -/* - * Double-precision log(x) function. - * - * Copyright (c) 2018, Arm Limited. - * SPDX-License-Identifier: MIT - */ - #define T __log_data.tab #define T2 __log_data.tab2 #define B __log_data.poly1 @@ -47,95 +37,151 @@ __static_yoink("arm_optimized_routines_notice"); #define N (1 << LOG_TABLE_BITS) #define OFF 0x3fe6000000000000 -/** - * Returns natural logarithm of 𝑥. - */ -double log(double x) +/* Top 16 bits of a double. */ +static inline uint32_t +top16 (double x) { - double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo; - uint64_t ix, iz, tmp; - uint32_t top; - int k, i; - - ix = asuint64(x); - top = ix >> 48; -#define LO asuint64(1.0 - 0x1p-4) -#define HI asuint64(1.0 + 0x1.09p-4) - if (UNLIKELY(ix - LO < HI - LO)) { - /* Handle close to 1.0 inputs separately. */ - /* Fix sign of zero with downward rounding when x==1. */ - if (WANT_ROUNDING && UNLIKELY(ix == asuint64(1.0))) - return 0; - r = x - 1.0; - r2 = r * r; - r3 = r * r2; - y = r3 * - (B[1] + r * B[2] + r2 * B[3] + - r3 * (B[4] + r * B[5] + r2 * B[6] + - r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10]))); - /* Worst-case error is around 0.507 ULP. */ - w = r * 0x1p27; - double_t rhi = r + w - w; - double_t rlo = r - rhi; - w = rhi * rhi * B[0]; /* B[0] == -0.5. */ - hi = r + w; - lo = r - hi + w; - lo += B[0] * rlo * (rhi + r); - y += lo; - y += hi; - return eval_as_double(y); - } - if (UNLIKELY(top - 0x0010 >= 0x7ff0 - 0x0010)) { - /* x < 0x1p-1022 or inf or nan. */ - if (ix * 2 == 0) - return __math_divzero(1); - if (ix == asuint64(INFINITY)) /* log(inf) == inf. */ - return x; - if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0) - return __math_invalid(x); - /* x is subnormal, normalize it. */ - ix = asuint64(x * 0x1p52); - ix -= 52ULL << 52; - } - - /* x = 2^k z; where z is in range [OFF,2*OFF) and exact. - The range is split into N subintervals. - The ith subinterval contains z and c is near its center. */ - tmp = ix - OFF; - i = (tmp >> (52 - LOG_TABLE_BITS)) % N; - k = (int64_t)tmp >> 52; /* arithmetic shift */ - iz = ix - (tmp & 0xfffULL << 52); - invc = T[i].invc; - logc = T[i].logc; - z = asdouble(iz); - - /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */ - /* r ~= z/c - 1, |r| < 1/(2*N). */ -#if __FP_FAST_FMA - /* rounding error: 0x1p-55/N. */ - r = __builtin_fma(z, invc, -1.0); -#else - /* rounding error: 0x1p-55/N + 0x1p-66. */ - r = (z - T2[i].chi - T2[i].clo) * invc; -#endif - kd = (double_t)k; - - /* hi + lo = r + log(c) + k*Ln2. */ - w = kd * Ln2hi + logc; - hi = w + r; - lo = w - hi + r + kd * Ln2lo; - - /* log(x) = lo + (log1p(r) - r) + hi. */ - r2 = r * r; /* rounding error: 0x1p-54/N^2. */ - /* Worst case error if |y| > 0x1p-5: - 0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma) - Worst case error if |y| > 0x1p-4: - 0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma). */ - y = lo + r2 * A[0] + - r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi; - return eval_as_double(y); + return asuint64 (x) >> 48; } -#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 -__weak_reference(log, logl); +/** + * Returns natural logarithm of 𝑥. + * + * @raise EDOM and FE_INVALID if x is negative + * @raise ERANGE and FE_DIVBYZERO if x is zero + */ +double +log (double x) +{ + /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ + double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo; + uint64_t ix, iz, tmp; + uint32_t top; + int k, i; + + ix = asuint64 (x); + top = top16 (x); + +#if LOG_POLY1_ORDER == 10 || LOG_POLY1_ORDER == 11 +# define LO asuint64 (1.0 - 0x1p-5) +# define HI asuint64 (1.0 + 0x1.1p-5) +#elif LOG_POLY1_ORDER == 12 +# define LO asuint64 (1.0 - 0x1p-4) +# define HI asuint64 (1.0 + 0x1.09p-4) +#endif + if (unlikely (ix - LO < HI - LO)) + { + /* Handle close to 1.0 inputs separately. */ + /* Fix sign of zero with downward rounding when x==1. */ + if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0))) + return 0; + r = x - 1.0; + r2 = r * r; + r3 = r * r2; +#if LOG_POLY1_ORDER == 10 + /* Worst-case error is around 0.516 ULP. */ + y = r3 * (B[1] + r * B[2] + r2 * B[3] + + r3 * (B[4] + r * B[5] + r2 * B[6] + r3 * (B[7] + r * B[8]))); + w = B[0] * r2; /* B[0] == -0.5. */ + hi = r + w; + y += r - hi + w; + y += hi; +#elif LOG_POLY1_ORDER == 11 + /* Worst-case error is around 0.516 ULP. */ + y = r3 * (B[1] + r * B[2] + + r2 * (B[3] + r * B[4] + r2 * B[5] + + r3 * (B[6] + r * B[7] + r2 * B[8] + r3 * B[9]))); + w = B[0] * r2; /* B[0] == -0.5. */ + hi = r + w; + y += r - hi + w; + y += hi; +#elif LOG_POLY1_ORDER == 12 + y = r3 * (B[1] + r * B[2] + r2 * B[3] + + r3 * (B[4] + r * B[5] + r2 * B[6] + + r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10]))); +# if N <= 64 + /* Worst-case error is around 0.532 ULP. */ + w = B[0] * r2; /* B[0] == -0.5. */ + hi = r + w; + y += r - hi + w; + y += hi; +# else + /* Worst-case error is around 0.507 ULP. */ + w = r * 0x1p27; + double_t rhi = r + w - w; + double_t rlo = r - rhi; + w = rhi * rhi * B[0]; /* B[0] == -0.5. */ + hi = r + w; + lo = r - hi + w; + lo += B[0] * rlo * (rhi + r); + y += lo; + y += hi; +# endif +#endif + return eval_as_double (y); + } + if (unlikely (top - 0x0010 >= 0x7ff0 - 0x0010)) + { + /* x < 0x1p-1022 or inf or nan. */ + if (ix * 2 == 0) + return __math_divzero (1); + if (ix == asuint64 (INFINITY)) /* log(inf) == inf. */ + return x; + if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0) + return __math_invalid (x); + /* x is subnormal, normalize it. */ + ix = asuint64 (x * 0x1p52); + ix -= 52ULL << 52; + } + + /* x = 2^k z; where z is in range [OFF,2*OFF) and exact. + The range is split into N subintervals. + The ith subinterval contains z and c is near its center. */ + tmp = ix - OFF; + i = (tmp >> (52 - LOG_TABLE_BITS)) % N; + k = (int64_t) tmp >> 52; /* arithmetic shift */ + iz = ix - (tmp & 0xfffULL << 52); + invc = T[i].invc; + logc = T[i].logc; + z = asdouble (iz); + + /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */ + /* r ~= z/c - 1, |r| < 1/(2*N). */ +#if HAVE_FAST_FMA + /* rounding error: 0x1p-55/N. */ + r = fma (z, invc, -1.0); +#else + /* rounding error: 0x1p-55/N + 0x1p-66. */ + r = (z - T2[i].chi - T2[i].clo) * invc; +#endif + kd = (double_t) k; + + /* hi + lo = r + log(c) + k*Ln2. */ + w = kd * Ln2hi + logc; + hi = w + r; + lo = w - hi + r + kd * Ln2lo; + + /* log(x) = lo + (log1p(r) - r) + hi. */ + r2 = r * r; /* rounding error: 0x1p-54/N^2. */ + /* Worst case error if |y| > 0x1p-5: + 0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma) + Worst case error if |y| > 0x1p-4: + 0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma). */ +#if LOG_POLY_ORDER == 6 + y = lo + r2 * A[0] + r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi; +#elif LOG_POLY_ORDER == 7 + y = lo + + r2 * (A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + + r2 * r2 * (A[4] + r * A[5])) + + hi; +#endif + return eval_as_double (y); +} + +#if USE_GLIBC_ABI +strong_alias (log, __log_finite) +hidden_alias (log, __ieee754_log) +# if LDBL_MANT_DIG == 53 +long double logl (long double x) { return log (x); } +# endif #endif diff --git a/libc/tinymath/log10.c b/libc/tinymath/log10.c index e073c1e38..79d14e8b8 100644 --- a/libc/tinymath/log10.c +++ b/libc/tinymath/log10.c @@ -29,11 +29,9 @@ #include "libc/math.h" #include "libc/tinymath/complex.internal.h" #include "libc/tinymath/internal.h" -#include "libc/tinymath/log2_data.internal.h" __static_yoink("musl_libc_notice"); __static_yoink("fdlibm_notice"); - /* origin: FreeBSD /usr/src/lib/msun/src/e_log10.c */ /* * ==================================================== diff --git a/libc/tinymath/log1p.c b/libc/tinymath/log1p.c index 97e9b51c6..c965de88b 100644 --- a/libc/tinymath/log1p.c +++ b/libc/tinymath/log1p.c @@ -2,74 +2,84 @@ │ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ +│ Copyright (c) 1992-2024 The FreeBSD Project │ +│ Copyright (c) 1993 Sun Microsystems, Inc. │ +│ All rights reserved. │ │ │ -│ Permission is hereby granted, free of charge, to any person obtaining │ -│ a copy of this software and associated documentation files (the │ -│ "Software"), to deal in the Software without restriction, including │ -│ without limitation the rights to use, copy, modify, merge, publish, │ -│ distribute, sublicense, and/or sell copies of the Software, and to │ -│ permit persons to whom the Software is furnished to do so, subject to │ -│ the following conditions: │ +│ Redistribution and use in source and binary forms, with or without │ +│ modification, are permitted provided that the following conditions │ +│ are met: │ +│ 1. Redistributions of source code must retain the above copyright │ +│ notice, this list of conditions and the following disclaimer. │ +│ 2. Redistributions in binary form must reproduce the above copyright │ +│ notice, this list of conditions and the following disclaimer in the │ +│ documentation and/or other materials provided with the distribution. │ │ │ -│ The above copyright notice and this permission notice shall be │ -│ included in all copies or substantial portions of the Software. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ -│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ -│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ -│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ -│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ -│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ -│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ +│ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND │ +│ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE │ +│ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE │ +│ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE │ +│ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL │ +│ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS │ +│ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) │ +│ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT │ +│ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY │ +│ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF │ +│ SUCH DAMAGE. │ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/intrin/likely.h" -#include "libc/math.h" -#include "libc/tinymath/internal.h" -#include "libc/tinymath/log_data.internal.h" +#include "libc/tinymath/freebsd.internal.h" __static_yoink("freebsd_libm_notice"); __static_yoink("fdlibm_notice"); -/* origin: FreeBSD /usr/src/lib/msun/src/s_log1p.c */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ /* double log1p(double x) - * Return the natural logarithm of 1+x. * * Method : * 1. Argument Reduction: find k and f such that - * 1+x = 2^k * (1+f), - * where sqrt(2)/2 < 1+f < sqrt(2) . + * 1+x = 2^k * (1+f), + * where sqrt(2)/2 < 1+f < sqrt(2) . * * Note. If k=0, then f=x is exact. However, if k!=0, then f - * may not be representable exactly. In that case, a correction - * term is need. Let u=1+x rounded. Let c = (1+x)-u, then - * log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u), - * and add back the correction term c/u. - * (Note: when x > 2**53, one can simply return log(x)) + * may not be representable exactly. In that case, a correction + * term is need. Let u=1+x rounded. Let c = (1+x)-u, then + * log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u), + * and add back the correction term c/u. + * (Note: when x > 2**53, one can simply return log(x)) * - * 2. Approximation of log(1+f): See log.c + * 2. Approximation of log1p(f). + * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) + * = 2s + 2/3 s**3 + 2/5 s**5 + ....., + * = 2s + s*R + * We use a special Reme algorithm on [0,0.1716] to generate + * a polynomial of degree 14 to approximate R The maximum error + * of this polynomial approximation is bounded by 2**-58.45. In + * other words, + * 2 4 6 8 10 12 14 + * R(z) ~ Lp1*s +Lp2*s +Lp3*s +Lp4*s +Lp5*s +Lp6*s +Lp7*s + * (the values of Lp1 to Lp7 are listed in the program) + * and + * | 2 14 | -58.45 + * | Lp1*s +...+Lp7*s - R(z) | <= 2 + * | | + * Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. + * In order to guarantee error in log below 1ulp, we compute log + * by + * log1p(f) = f - (hfsq - s*(hfsq+R)). * - * 3. Finally, log1p(x) = k*ln2 + log(1+f) + c/u. See log.c + * 3. Finally, log1p(x) = k*ln2 + log1p(f). + * = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo))) + * Here ln2 is split into two floating point number: + * ln2_hi + ln2_lo, + * where n*ln2_hi is always exact for |n| < 2000. * * Special cases: - * log1p(x) is NaN with signal if x < -1 (including -INF) ; - * log1p(+INF) is +INF; log1p(-1) is -INF with signal; - * log1p(NaN) is that NaN with no signal. + * log1p(x) is NaN with signal if x < -1 (including -INF) ; + * log1p(+INF) is +INF; log1p(-1) is -INF with signal; + * log1p(NaN) is that NaN with no signal. * * Accuracy: - * according to an error analysis, the error is always less than - * 1 ulp (unit in the last place). + * according to an error analysis, the error is always less than + * 1 ulp (unit in the last place). * * Constants: * The hexadecimal values are the intended ones for the following @@ -78,84 +88,110 @@ __static_yoink("fdlibm_notice"); * to produce the hexadecimal values shown. * * Note: Assuming log() return accurate answer, the following - * algorithm can be used to compute log1p(x) to within a few ULP: + * algorithm can be used to compute log1p(x) to within a few ULP: * - * u = 1+x; - * if(u==1.0) return x ; else - * return log(u)*(x/(u-1.0)); + * u = 1+x; + * if(u==1.0) return x ; else + * return log(u)*(x/(u-1.0)); * - * See HP-15C Advanced Functions Handbook, p.193. + * See HP-15C Advanced Functions Handbook, p.193. */ static const double -ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */ -ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */ -Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */ -Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */ -Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */ -Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */ -Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */ -Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */ -Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ +ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */ +ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */ +two54 = 1.80143985094819840000e+16, /* 43500000 00000000 */ +Lp1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */ +Lp2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */ +Lp3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */ +Lp4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */ +Lp5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */ +Lp6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */ +Lp7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ + +static const double zero = 0.0; +static volatile double vzero = 0.0; /** - * Returns log(𝟷+𝑥). + * Returns log(1 + x). */ -double log1p(double x) +double +log1p(double x) { - union {double f; uint64_t i;} u = {x}; - double_t hfsq,f,c,s,z,R,w,t1,t2,dk; - uint32_t hx,hu; - int k; + double hfsq,f,c,s,z,R,u; + int32_t k,hx,hu,ax; + + GET_HIGH_WORD(hx,x); + ax = hx&0x7fffffff; - hx = u.i>>32; k = 1; - if (hx < 0x3fda827a || hx>>31) { /* 1+x < sqrt(2)+ */ - if (hx >= 0xbff00000) { /* x <= -1.0 */ - if (x == -1) - return x/0.0; /* log1p(-1) = -inf */ - return (x-x)/0.0; /* log1p(x<-1) = NaN */ - } - if (hx<<1 < 0x3ca00000<<1) { /* |x| < 2**-53 */ - /* underflow if subnormal */ - if ((hx&0x7ff00000) == 0) - FORCE_EVAL((float)x); - return x; - } - if (hx <= 0xbfd2bec4) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */ - k = 0; - c = 0; - f = x; - } - } else if (hx >= 0x7ff00000) - return x; - if (k) { - u.f = 1 + x; - hu = u.i>>32; - hu += 0x3ff00000 - 0x3fe6a09e; - k = (int)(hu>>20) - 0x3ff; - /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */ - if (k < 54) { - c = k >= 2 ? 1-(u.f-x) : x-(u.f-1); - c /= u.f; - } else - c = 0; - /* reduce u into [sqrt(2)/2, sqrt(2)] */ - hu = (hu&0x000fffff) + 0x3fe6a09e; - u.i = (uint64_t)hu<<32 | (u.i&0xffffffff); - f = u.f - 1; + if (hx < 0x3FDA827A) { /* 1+x < sqrt(2)+ */ + if(ax>=0x3ff00000) { /* x <= -1.0 */ + if(x==-1.0) return -two54/vzero; /* log1p(-1)=+inf */ + else return (x-x)/(x-x); /* log1p(x<-1)=NaN */ + } + if(ax<0x3e200000) { /* |x| < 2**-29 */ + if(two54+x>zero /* raise inexact */ + &&ax<0x3c900000) /* |x| < 2**-54 */ + return x; + else + return x - x*x*0.5; + } + if(hx>0||hx<=((int32_t)0xbfd2bec4)) { + k=0;f=x;hu=1;} /* sqrt(2)/2- <= 1+x < sqrt(2)+ */ } - hfsq = 0.5*f*f; - s = f/(2.0+f); + if (hx >= 0x7ff00000) return x+x; + if(k!=0) { + if(hx<0x43400000) { + STRICT_ASSIGN(double,u,1.0+x); + GET_HIGH_WORD(hu,u); + k = (hu>>20)-1023; + c = (k>0)? 1.0-(u-x):x-(u-1.0);/* correction term */ + c /= u; + } else { + u = x; + GET_HIGH_WORD(hu,u); + k = (hu>>20)-1023; + c = 0; + } + hu &= 0x000fffff; + /* + * The approximation to sqrt(2) used in thresholds is not + * critical. However, the ones used above must give less + * strict bounds than the one here so that the k==0 case is + * never reached from here, since here we have committed to + * using the correction term but don't use it if k==0. + */ + if(hu<0x6a09e) { /* u ~< sqrt(2) */ + SET_HIGH_WORD(u,hu|0x3ff00000); /* normalize u */ + } else { + k += 1; + SET_HIGH_WORD(u,hu|0x3fe00000); /* normalize u/2 */ + hu = (0x00100000-hu)>>2; + } + f = u-1.0; + } + hfsq=0.5*f*f; + if(hu==0) { /* |f| < 2**-20 */ + if(f==zero) { + if(k==0) { + return zero; + } else { + c += k*ln2_lo; + return k*ln2_hi+c; + } + } + R = hfsq*(1.0-0.66666666666666666*f); + if(k==0) return f-R; else + return k*ln2_hi-((R-(k*ln2_lo+c))-f); + } + s = f/(2.0+f); z = s*s; - w = z*z; - t1 = w*(Lg2+w*(Lg4+w*Lg6)); - t2 = z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7))); - R = t2 + t1; - dk = k; - return s*(hfsq+R) + (dk*ln2_lo+c) - hfsq + f + dk*ln2_hi; + R = z*(Lp1+z*(Lp2+z*(Lp3+z*(Lp4+z*(Lp5+z*(Lp6+z*Lp7)))))); + if(k==0) return f-(hfsq-s*(hfsq+R)); else + return k*ln2_hi-((hfsq-(s*(hfsq+R)+(k*ln2_lo+c)))-f); } -#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +#if (LDBL_MANT_DIG == 53) __weak_reference(log1p, log1pl); #endif diff --git a/libc/tinymath/log1pf.c b/libc/tinymath/log1pf.c index 59fe2aace..04e388154 100644 --- a/libc/tinymath/log1pf.c +++ b/libc/tinymath/log1pf.c @@ -1,175 +1,133 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│ vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8 :vi │ +/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ +│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ -│ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 1992-2024 The FreeBSD Project │ +│ Copyright (c) 1993 Sun Microsystems, Inc. │ +│ All rights reserved. │ │ │ -│ Permission is hereby granted, free of charge, to any person obtaining │ -│ a copy of this software and associated documentation files (the │ -│ "Software"), to deal in the Software without restriction, including │ -│ without limitation the rights to use, copy, modify, merge, publish, │ -│ distribute, sublicense, and/or sell copies of the Software, and to │ -│ permit persons to whom the Software is furnished to do so, subject to │ -│ the following conditions: │ +│ Redistribution and use in source and binary forms, with or without │ +│ modification, are permitted provided that the following conditions │ +│ are met: │ +│ 1. Redistributions of source code must retain the above copyright │ +│ notice, this list of conditions and the following disclaimer. │ +│ 2. Redistributions in binary form must reproduce the above copyright │ +│ notice, this list of conditions and the following disclaimer in the │ +│ documentation and/or other materials provided with the distribution. │ │ │ -│ The above copyright notice and this permission notice shall be │ -│ included in all copies or substantial portions of the Software. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ -│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ -│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ -│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ -│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ -│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ -│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ +│ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND │ +│ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE │ +│ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE │ +│ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE │ +│ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL │ +│ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS │ +│ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) │ +│ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT │ +│ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY │ +│ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF │ +│ SUCH DAMAGE. │ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/intrin/likely.h" -#include "libc/math.h" -#include "libc/tinymath/internal.h" -#include "libc/tinymath/log1pf_data.internal.h" -__static_yoink("arm_optimized_routines_notice"); +#include "libc/tinymath/freebsd.internal.h" +__static_yoink("freebsd_libm_notice"); +__static_yoink("fdlibm_notice"); -#define Ln2 (0x1.62e43p-1f) -#define SignMask (0x80000000) +/* s_log1pf.c -- float version of s_log1p.c. + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ -/* Biased exponent of the largest float m for which m^8 underflows. */ -#define M8UFLOW_BOUND_BEXP 112 -/* Biased exponent of the largest float for which we just return x. */ -#define TINY_BOUND_BEXP 103 +static const float +ln2_hi = 6.9313812256e-01, /* 0x3f317180 */ +ln2_lo = 9.0580006145e-06, /* 0x3717f7d1 */ +two25 = 3.355443200e+07, /* 0x4c000000 */ +Lp1 = 6.6666668653e-01, /* 3F2AAAAB */ +Lp2 = 4.0000000596e-01, /* 3ECCCCCD */ +Lp3 = 2.8571429849e-01, /* 3E924925 */ +Lp4 = 2.2222198546e-01, /* 3E638E29 */ +Lp5 = 1.8183572590e-01, /* 3E3A3325 */ +Lp6 = 1.5313838422e-01, /* 3E1CD04F */ +Lp7 = 1.4798198640e-01; /* 3E178897 */ -#define C(i) __log1pf_data.coeffs[i] +static const float zero = 0.0; +static volatile float vzero = 0.0; -static inline float -eval_poly (float m, uint32_t e) -{ -#ifdef LOG1PF_2U5 - - /* 2.5 ulp variant. Approximate log(1+m) on [-0.25, 0.5] using - slightly modified Estrin scheme (no x^0 term, and x term is just x). */ - float p_12 = fmaf (m, C (1), C (0)); - float p_34 = fmaf (m, C (3), C (2)); - float p_56 = fmaf (m, C (5), C (4)); - float p_78 = fmaf (m, C (7), C (6)); - - float m2 = m * m; - float p_02 = fmaf (m2, p_12, m); - float p_36 = fmaf (m2, p_56, p_34); - float p_79 = fmaf (m2, C (8), p_78); - - float m4 = m2 * m2; - float p_06 = fmaf (m4, p_36, p_02); - - if (UNLIKELY (e < M8UFLOW_BOUND_BEXP)) - return p_06; - - float m8 = m4 * m4; - return fmaf (m8, p_79, p_06); - -#elif defined(LOG1PF_1U3) - - /* 1.3 ulp variant. Approximate log(1+m) on [-0.25, 0.5] using Horner - scheme. Our polynomial approximation for log1p has the form - x + C1 * x^2 + C2 * x^3 + C3 * x^4 + ... - Hence approximation has the form m + m^2 * P(m) - where P(x) = C1 + C2 * x + C3 * x^2 + ... . */ - return fmaf (m, m * HORNER_8 (m, C), m); - -#else -#error No log1pf approximation exists with the requested precision. Options are 13 or 25. -#endif -} - -static inline uint32_t -biased_exponent (uint32_t ix) -{ - return (ix & 0x7f800000) >> 23; -} - -/* log1pf approximation using polynomial on reduced interval. Worst-case error - when using Estrin is roughly 2.02 ULP: - log1pf(0x1.21e13ap-2) got 0x1.fe8028p-3 want 0x1.fe802cp-3. */ +/** + * Returns log(1 + x). + */ float -log1pf (float x) +log1pf(float x) { - uint32_t ix = asuint (x); - uint32_t ia = ix & ~SignMask; - uint32_t ia12 = ia >> 20; - uint32_t e = biased_exponent (ix); + float hfsq,f,c,s,z,R,u; + int32_t k,hx,hu,ax; - /* Handle special cases first. */ - if (UNLIKELY (ia12 >= 0x7f8 || ix >= 0xbf800000 || ix == 0x80000000 - || e <= TINY_BOUND_BEXP)) - { - if (ix == 0xff800000) - { - /* x == -Inf => log1pf(x) = NaN. */ - return NAN; + GET_FLOAT_WORD(hx,x); + ax = hx&0x7fffffff; + + k = 1; + if (hx < 0x3ed413d0) { /* 1+x < sqrt(2)+ */ + if(ax>=0x3f800000) { /* x <= -1.0 */ + if(x==(float)-1.0) return -two25/vzero; /* log1p(-1)=+inf */ + else return (x-x)/(x-x); /* log1p(x<-1)=NaN */ + } + if(ax<0x38000000) { /* |x| < 2**-15 */ + if(two25+x>zero /* raise inexact */ + &&ax<0x33800000) /* |x| < 2**-24 */ + return x; + else + return x - x*x*(float)0.5; + } + if(hx>0||hx<=((int32_t)0xbe95f619)) { + k=0;f=x;hu=1;} /* sqrt(2)/2- <= 1+x < sqrt(2)+ */ } - if ((ix == 0x7f800000 || e <= TINY_BOUND_BEXP) && ia12 <= 0x7f8) - { - /* |x| < TinyBound => log1p(x) = x. - x == Inf => log1pf(x) = Inf. */ - return x; + if (hx >= 0x7f800000) return x+x; + if(k!=0) { + if(hx<0x5a000000) { + STRICT_ASSIGN(float,u,(float)1.0+x); + GET_FLOAT_WORD(hu,u); + k = (hu>>23)-127; + /* correction term */ + c = (k>0)? (float)1.0-(u-x):x-(u-(float)1.0); + c /= u; + } else { + u = x; + GET_FLOAT_WORD(hu,u); + k = (hu>>23)-127; + c = 0; + } + hu &= 0x007fffff; + /* + * The approximation to sqrt(2) used in thresholds is not + * critical. However, the ones used above must give less + * strict bounds than the one here so that the k==0 case is + * never reached from here, since here we have committed to + * using the correction term but don't use it if k==0. + */ + if(hu<0x3504f4) { /* u < sqrt(2) */ + SET_FLOAT_WORD(u,hu|0x3f800000);/* normalize u */ + } else { + k += 1; + SET_FLOAT_WORD(u,hu|0x3f000000); /* normalize u/2 */ + hu = (0x00800000-hu)>>2; + } + f = u-(float)1.0; } - if (ix == 0xbf800000) - { - /* x == -1.0 => log1pf(x) = -Inf. */ - return __math_divzerof (-1); + hfsq=(float)0.5*f*f; + if(hu==0) { /* |f| < 2**-20 */ + if(f==zero) { + if(k==0) { + return zero; + } else { + c += k*ln2_lo; + return k*ln2_hi+c; + } + } + R = hfsq*((float)1.0-(float)0.66666666666666666*f); + if(k==0) return f-R; else + return k*ln2_hi-((R-(k*ln2_lo+c))-f); } - if (ia12 >= 0x7f8) - { - /* x == +/-NaN => log1pf(x) = NaN. */ - return __math_invalidf (asfloat (ia)); - } - /* x < -1.0 => log1pf(x) = NaN. */ - return __math_invalidf (x); - } - - /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m - is in [-0.25, 0.5]): - log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2). - - We approximate log1p(m) with a polynomial, then scale by - k*log(2). Instead of doing this directly, we use an intermediate - scale factor s = 4*k*log(2) to ensure the scale is representable - as a normalised fp32 number. */ - - if (ix <= 0x3f000000 || ia <= 0x3e800000) - { - /* If x is in [-0.25, 0.5] then we can shortcut all the logic - below, as k = 0 and m = x. All we need is to return the - polynomial. */ - return eval_poly (x, e); - } - - float m = x + 1.0f; - - /* k is used scale the input. 0x3f400000 is chosen as we are trying to - reduce x to the range [-0.25, 0.5]. Inside this range, k is 0. - Outside this range, if k is reinterpreted as (NOT CONVERTED TO) float: - let k = sign * 2^p where sign = -1 if x < 0 - 1 otherwise - and p is a negative integer whose magnitude increases with the - magnitude of x. */ - int k = (asuint (m) - 0x3f400000) & 0xff800000; - - /* By using integer arithmetic, we obtain the necessary scaling by - subtracting the unbiased exponent of k from the exponent of x. */ - float m_scale = asfloat (asuint (x) - k); - - /* Scale up to ensure that the scale factor is representable as normalised - fp32 number (s in [2**-126,2**26]), and scale m down accordingly. */ - float s = asfloat (asuint (4.0f) - k); - m_scale = m_scale + fmaf (0.25f, s, -1.0f); - - float p = eval_poly (m_scale, biased_exponent (asuint (m_scale))); - - /* The scale factor to be applied back at the end - by multiplying float(k) - by 2^-23 we get the unbiased exponent of k. */ - float scale_back = (float) k * 0x1.0p-23f; - - /* Apply the scaling back. */ - return fmaf (scale_back, Ln2, p); + s = f/((float)2.0+f); + z = s*s; + R = z*(Lp1+z*(Lp2+z*(Lp3+z*(Lp4+z*(Lp5+z*(Lp6+z*Lp7)))))); + if(k==0) return f-(hfsq-s*(hfsq+R)); else + return k*ln2_hi-((hfsq-(s*(hfsq+R)+(k*ln2_lo+c)))-f); } diff --git a/libc/tinymath/log1pf_data.internal.h b/libc/tinymath/log1pf_data.internal.h deleted file mode 100644 index 867f79fd3..000000000 --- a/libc/tinymath/log1pf_data.internal.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef COSMOPOLITAN_LIBC_TINYMATH_LOG1PF_DATA_H_ -#define COSMOPOLITAN_LIBC_TINYMATH_LOG1PF_DATA_H_ -COSMOPOLITAN_C_START_ - -#define LOG1PF_2U5 -#define V_LOG1PF_2U5 -#define LOG1PF_NCOEFFS 9 -extern const struct log1pf_data { - float coeffs[LOG1PF_NCOEFFS]; // -} __log1pf_data; - -COSMOPOLITAN_C_END_ -#endif /* COSMOPOLITAN_LIBC_TINYMATH_LOG1PF_DATA_H_ */ diff --git a/libc/tinymath/log2.c b/libc/tinymath/log2.c index 29a774c56..a1f802383 100644 --- a/libc/tinymath/log2.c +++ b/libc/tinymath/log2.c @@ -1,9 +1,9 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -25,20 +25,9 @@ │ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/intrin/likely.h" -#include "libc/math.h" -#include "libc/tinymath/complex.internal.h" -#include "libc/tinymath/internal.h" -#include "libc/tinymath/log2_data.internal.h" +#include "libc/tinymath/arm.internal.h" __static_yoink("arm_optimized_routines_notice"); -/* - * Double-precision log2(x) function. - * - * Copyright (c) 2018, Arm Limited. - * SPDX-License-Identifier: MIT - */ - #define T __log2_data.tab #define T2 __log2_data.tab2 #define B __log2_data.poly1 @@ -49,110 +38,126 @@ __static_yoink("arm_optimized_routines_notice"); #define OFF 0x3fe6000000000000 /* Top 16 bits of a double. */ -static inline uint32_t top16(double x) +static inline uint32_t +top16 (double x) { - return asuint64(x) >> 48; + return asuint64 (x) >> 48; } /** - * Calculates log₂𝑥. + * Returns base 2 logarithm of x. */ -double log2(double x) +double +log2 (double x) { - double_t z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p; - uint64_t ix, iz, tmp; - uint32_t top; - int k, i; + /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ + double_t z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p; + uint64_t ix, iz, tmp; + uint32_t top; + int k, i; - ix = asuint64(x); - top = top16(x); -#define LO asuint64(1.0 - 0x1.5b51p-5) -#define HI asuint64(1.0 + 0x1.6ab2p-5) - if (UNLIKELY(ix - LO < HI - LO)) { - /* Handle close to 1.0 inputs separately. */ - /* Fix sign of zero with downward rounding when x==1. */ - if (WANT_ROUNDING && UNLIKELY(ix == asuint64(1.0))) - return 0; - r = x - 1.0; -#if __FP_FAST_FMA - hi = r * InvLn2hi; - lo = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -hi); -#else - double_t rhi, rlo; - rhi = asdouble(asuint64(r) & -1ULL << 32); - rlo = r - rhi; - hi = rhi * InvLn2hi; - lo = rlo * InvLn2hi + r * InvLn2lo; + ix = asuint64 (x); + top = top16 (x); + +#if LOG2_POLY1_ORDER == 11 +# define LO asuint64 (1.0 - 0x1.5b51p-5) +# define HI asuint64 (1.0 + 0x1.6ab2p-5) #endif - r2 = r * r; /* rounding error: 0x1p-62. */ - r4 = r2 * r2; - /* Worst-case error is less than 0.54 ULP (0.55 ULP without fma). */ - p = r2 * (B[0] + r * B[1]); - y = hi + p; - lo += hi - y + p; - lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5]) + - r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9]))); - y += lo; - return eval_as_double(y); - } - if (UNLIKELY(top - 0x0010 >= 0x7ff0 - 0x0010)) { - /* x < 0x1p-1022 or inf or nan. */ - if (ix * 2 == 0) - return __math_divzero(1); - if (ix == asuint64(INFINITY)) /* log(inf) == inf. */ - return x; - if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0) - return __math_invalid(x); - /* x is subnormal, normalize it. */ - ix = asuint64(x * 0x1p52); - ix -= 52ULL << 52; - } - - /* x = 2^k z; where z is in range [OFF,2*OFF) and exact. - The range is split into N subintervals. - The ith subinterval contains z and c is near its center. */ - tmp = ix - OFF; - i = (tmp >> (52 - LOG2_TABLE_BITS)) % N; - k = (int64_t)tmp >> 52; /* arithmetic shift */ - iz = ix - (tmp & 0xfffULL << 52); - invc = T[i].invc; - logc = T[i].logc; - z = asdouble(iz); - kd = (double_t)k; - - /* log2(x) = log2(z/c) + log2(c) + k. */ - /* r ~= z/c - 1, |r| < 1/(2*N). */ -#if __FP_FAST_FMA - /* rounding error: 0x1p-55/N. */ - r = __builtin_fma(z, invc, -1.0); - t1 = r * InvLn2hi; - t2 = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -t1); + if (unlikely (ix - LO < HI - LO)) + { + /* Handle close to 1.0 inputs separately. */ + /* Fix sign of zero with downward rounding when x==1. */ + if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0))) + return 0; + r = x - 1.0; +#if HAVE_FAST_FMA + hi = r * InvLn2hi; + lo = r * InvLn2lo + fma (r, InvLn2hi, -hi); #else - double_t rhi, rlo; - /* rounding error: 0x1p-55/N + 0x1p-65. */ - r = (z - T2[i].chi - T2[i].clo) * invc; - rhi = asdouble(asuint64(r) & -1ULL << 32); - rlo = r - rhi; - t1 = rhi * InvLn2hi; - t2 = rlo * InvLn2hi + r * InvLn2lo; + double_t rhi, rlo; + rhi = asdouble (asuint64 (r) & -1ULL << 32); + rlo = r - rhi; + hi = rhi * InvLn2hi; + lo = rlo * InvLn2hi + r * InvLn2lo; +#endif + r2 = r * r; /* rounding error: 0x1p-62. */ + r4 = r2 * r2; +#if LOG2_POLY1_ORDER == 11 + /* Worst-case error is less than 0.54 ULP (0.55 ULP without fma). */ + p = r2 * (B[0] + r * B[1]); + y = hi + p; + lo += hi - y + p; + lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5]) + + r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9]))); + y += lo; +#endif + return eval_as_double (y); + } + if (unlikely (top - 0x0010 >= 0x7ff0 - 0x0010)) + { + /* x < 0x1p-1022 or inf or nan. */ + if (ix * 2 == 0) + return __math_divzero (1); + if (ix == asuint64 (INFINITY)) /* log(inf) == inf. */ + return x; + if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0) + return __math_invalid (x); + /* x is subnormal, normalize it. */ + ix = asuint64 (x * 0x1p52); + ix -= 52ULL << 52; + } + + /* x = 2^k z; where z is in range [OFF,2*OFF) and exact. + The range is split into N subintervals. + The ith subinterval contains z and c is near its center. */ + tmp = ix - OFF; + i = (tmp >> (52 - LOG2_TABLE_BITS)) % N; + k = (int64_t) tmp >> 52; /* arithmetic shift */ + iz = ix - (tmp & 0xfffULL << 52); + invc = T[i].invc; + logc = T[i].logc; + z = asdouble (iz); + kd = (double_t) k; + + /* log2(x) = log2(z/c) + log2(c) + k. */ + /* r ~= z/c - 1, |r| < 1/(2*N). */ +#if HAVE_FAST_FMA + /* rounding error: 0x1p-55/N. */ + r = fma (z, invc, -1.0); + t1 = r * InvLn2hi; + t2 = r * InvLn2lo + fma (r, InvLn2hi, -t1); +#else + double_t rhi, rlo; + /* rounding error: 0x1p-55/N + 0x1p-65. */ + r = (z - T2[i].chi - T2[i].clo) * invc; + rhi = asdouble (asuint64 (r) & -1ULL << 32); + rlo = r - rhi; + t1 = rhi * InvLn2hi; + t2 = rlo * InvLn2hi + r * InvLn2lo; #endif - /* hi + lo = r/ln2 + log2(c) + k. */ - t3 = kd + logc; - hi = t3 + t1; - lo = t3 - hi + t1 + t2; + /* hi + lo = r/ln2 + log2(c) + k. */ + t3 = kd + logc; + hi = t3 + t1; + lo = t3 - hi + t1 + t2; - /* log2(r+1) = r/ln2 + r^2*poly(r). */ - /* Evaluation is optimized assuming superscalar pipelined execution. */ - r2 = r * r; /* rounding error: 0x1p-54/N^2. */ - r4 = r2 * r2; - /* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma). - ~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma). */ - p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]); - y = lo + r2 * p + hi; - return eval_as_double(y); + /* log2(r+1) = r/ln2 + r^2*poly(r). */ + /* Evaluation is optimized assuming superscalar pipelined execution. */ + r2 = r * r; /* rounding error: 0x1p-54/N^2. */ + r4 = r2 * r2; +#if LOG2_POLY_ORDER == 7 + /* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma). + ~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma). */ + p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]); + y = lo + r2 * p + hi; +#endif + return eval_as_double (y); } -#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 -__weak_reference(log2, log2l); +#if USE_GLIBC_ABI +strong_alias (log2, __log2_finite) +hidden_alias (log2, __ieee754_log2) +# if LDBL_MANT_DIG == 53 +long double log2l (long double x) { return log2 (x); } +# endif #endif diff --git a/libc/tinymath/log2_data.c b/libc/tinymath/log2_data.c index 2ecd1da72..bd0658073 100644 --- a/libc/tinymath/log2_data.c +++ b/libc/tinymath/log2_data.c @@ -1,9 +1,9 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ +│ Optimized Routines │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -25,14 +25,8 @@ │ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/tinymath/log2_data.internal.h" - -/* - * Data for log2. - * - * Copyright (c) 2018, Arm Limited. - * SPDX-License-Identifier: MIT - */ +#include "libc/tinymath/arm.internal.h" +__static_yoink("arm_optimized_routines_notice"); #define N (1 << LOG2_TABLE_BITS) @@ -41,6 +35,7 @@ const struct log2_data __log2_data = { .invln2hi = 0x1.7154765200000p+0, .invln2lo = 0x1.705fc2eefa200p-33, .poly1 = { +#if LOG2_POLY1_ORDER == 11 // relative error: 0x1.2fad8188p-63 // in -0x1.5b51p-5 0x1.6ab2p-5 -0x1.71547652b82fep-1, @@ -53,8 +48,10 @@ const struct log2_data __log2_data = { 0x1.484d154f01b4ap-3, -0x1.289e4a72c383cp-3, 0x1.0b32f285aee66p-3, +#endif }, .poly = { +#if N == 64 && LOG2_POLY_ORDER == 7 // relative error: 0x1.a72c2bf8p-58 // abs error: 0x1.67a552c8p-66 // in -0x1.f45p-8 0x1.f45p-8 @@ -64,6 +61,7 @@ const struct log2_data __log2_data = { 0x1.2776c50034c48p-2, -0x1.ec7b328ea92bcp-3, 0x1.a6225e117f92ep-3, +#endif }, /* Algorithm: @@ -92,6 +90,7 @@ single rounding error when there is no fast fma for z*invc - 1, 3) ensures that logc + poly(z/c - 1) has small error, however near x == 1 when |log2(x)| < 0x1p-4, this is not enough so that is special cased. */ .tab = { +#if N == 64 {0x1.724286bb1acf8p+0, -0x1.1095feecdb000p-1}, {0x1.6e1f766d2cca1p+0, -0x1.08494bd76d000p-1}, {0x1.6a13d0e30d48ap+0, -0x1.00143aee8f800p-1}, @@ -156,9 +155,11 @@ that logc + poly(z/c - 1) has small error, however near x == 1 when {0x1.7f405ffc61022p-1, 0x1.abe186ed3d000p-2}, {0x1.7ad22181415cap-1, 0x1.bd0f2aea0e000p-2}, {0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2}, +#endif }, -#if !__FP_FAST_FMA +#if !HAVE_FAST_FMA .tab2 = { +# if N == 64 {0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55}, {0x1.66000045734a6p-1, 0x1.1ff9bea62f7a9p-57}, {0x1.69fffc325f2c5p-1, 0x1.27ecfcb3c90bap-55}, @@ -223,6 +224,7 @@ that logc + poly(z/c - 1) has small error, however near x == 1 when {0x1.55ffffd87b36fp+0, -0x1.709e731d02807p-55}, {0x1.59ffff21df7bap+0, 0x1.7f79f68727b02p-55}, {0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54}, +# endif }, -#endif +#endif /* !HAVE_FAST_FMA */ }; diff --git a/libc/tinymath/log2_data.internal.h b/libc/tinymath/log2_data.internal.h deleted file mode 100644 index d838672f6..000000000 --- a/libc/tinymath/log2_data.internal.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef COSMOPOLITAN_LIBC_TINYMATH_LOG2_DATA_INTERNAL_H_ -#define COSMOPOLITAN_LIBC_TINYMATH_LOG2_DATA_INTERNAL_H_ - -#define LOG2_TABLE_BITS 6 -#define LOG2_POLY_ORDER 7 -#define LOG2_POLY1_ORDER 11 - -COSMOPOLITAN_C_START_ - -extern const struct log2_data { - double invln2hi; - double invln2lo; - double poly[LOG2_POLY_ORDER - 1]; - double poly1[LOG2_POLY1_ORDER - 1]; - struct { - double invc, logc; - } tab[1 << LOG2_TABLE_BITS]; -#if !__FP_FAST_FMA - struct { - double chi, clo; - } tab2[1 << LOG2_TABLE_BITS]; -#endif -} __log2_data; - -COSMOPOLITAN_C_END_ -#endif /* COSMOPOLITAN_LIBC_TINYMATH_LOG2_DATA_INTERNAL_H_ */ diff --git a/libc/tinymath/log2f.c b/libc/tinymath/log2f.c index d7cc7c88e..044bd4004 100644 --- a/libc/tinymath/log2f.c +++ b/libc/tinymath/log2f.c @@ -1,9 +1,9 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -25,20 +25,9 @@ │ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/intrin/likely.h" -#include "libc/math.h" -#include "libc/tinymath/complex.internal.h" -#include "libc/tinymath/internal.h" -#include "libc/tinymath/log2f_data.internal.h" +#include "libc/tinymath/arm.internal.h" __static_yoink("arm_optimized_routines_notice"); -/* - * Single-precision log2 function. - * - * Copyright (c) 2017-2018, Arm Limited. - * SPDX-License-Identifier: MIT - */ - /* LOG2F_TABLE_BITS = 4 LOG2F_POLY_ORDER = 4 @@ -53,52 +42,65 @@ Relative error: 1.9 * 2^-26 (before rounding.) #define OFF 0x3f330000 /** - * Calculates log₂𝑥. + * Returns base-2 logarithm of x. + * + * - ULP error: 0.752 (nearest rounding.) + * - Relative error: 1.9 * 2^-26 (before rounding.) */ -float log2f(float x) +float +log2f (float x) { - double_t z, r, r2, p, y, y0, invc, logc; - uint32_t ix, iz, top, tmp; - int k, i; + /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ + double_t z, r, r2, p, y, y0, invc, logc; + uint32_t ix, iz, top, tmp; + int k, i; - ix = asuint(x); - /* Fix sign of zero with downward rounding when x==1. */ - if (WANT_ROUNDING && UNLIKELY(ix == 0x3f800000)) - return 0; - if (UNLIKELY(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) { - /* x < 0x1p-126 or inf or nan. */ - if (ix * 2 == 0) - return __math_divzerof(1); - if (ix == 0x7f800000) /* log2(inf) == inf. */ - return x; - if ((ix & 0x80000000) || ix * 2 >= 0xff000000) - return __math_invalidf(x); - /* x is subnormal, normalize it. */ - ix = asuint(x * 0x1p23f); - ix -= 23 << 23; - } + ix = asuint (x); +#if WANT_ROUNDING + /* Fix sign of zero with downward rounding when x==1. */ + if (unlikely (ix == 0x3f800000)) + return 0; +#endif + if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000)) + { + /* x < 0x1p-126 or inf or nan. */ + if (ix * 2 == 0) + return __math_divzerof (1); + if (ix == 0x7f800000) /* log2(inf) == inf. */ + return x; + if ((ix & 0x80000000) || ix * 2 >= 0xff000000) + return __math_invalidf (x); + /* x is subnormal, normalize it. */ + ix = asuint (x * 0x1p23f); + ix -= 23 << 23; + } - /* x = 2^k z; where z is in range [OFF,2*OFF] and exact. - The range is split into N subintervals. - The ith subinterval contains z and c is near its center. */ - tmp = ix - OFF; - i = (tmp >> (23 - LOG2F_TABLE_BITS)) % N; - top = tmp & 0xff800000; - iz = ix - top; - k = (int32_t)tmp >> 23; /* arithmetic shift */ - invc = T[i].invc; - logc = T[i].logc; - z = (double_t)asfloat(iz); + /* x = 2^k z; where z is in range [OFF,2*OFF] and exact. + The range is split into N subintervals. + The ith subinterval contains z and c is near its center. */ + tmp = ix - OFF; + i = (tmp >> (23 - LOG2F_TABLE_BITS)) % N; + top = tmp & 0xff800000; + iz = ix - top; + k = (int32_t) tmp >> 23; /* arithmetic shift */ + invc = T[i].invc; + logc = T[i].logc; + z = (double_t) asfloat (iz); - /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */ - r = z * invc - 1; - y0 = logc + (double_t)k; + /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */ + r = z * invc - 1; + y0 = logc + (double_t) k; - /* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */ - r2 = r * r; - y = A[1] * r + A[2]; - y = A[0] * r2 + y; - p = A[3] * r + y0; - y = y * r2 + p; - return eval_as_float(y); + /* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */ + r2 = r * r; + y = A[1] * r + A[2]; + y = A[0] * r2 + y; + p = A[3] * r + y0; + y = y * r2 + p; + return eval_as_float (y); } + +#if USE_GLIBC_ABI +strong_alias (log2f, __log2f_finite) +hidden_alias (log2f, __ieee754_log2f) +#endif diff --git a/libc/tinymath/log2f_data.c b/libc/tinymath/log2f_data.c index bd04b407d..f34e7a8b4 100644 --- a/libc/tinymath/log2f_data.c +++ b/libc/tinymath/log2f_data.c @@ -1,9 +1,9 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -25,16 +25,9 @@ │ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/tinymath/log2f_data.internal.h" +#include "libc/tinymath/arm.internal.h" __static_yoink("arm_optimized_routines_notice"); -/* - * Data definition for log2f. - * - * Copyright (c) 2017-2018, Arm Limited. - * SPDX-License-Identifier: MIT - */ - const struct log2f_data __log2f_data = { .tab = { { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 }, diff --git a/libc/tinymath/log2f_data.internal.h b/libc/tinymath/log2f_data.internal.h deleted file mode 100644 index 835274890..000000000 --- a/libc/tinymath/log2f_data.internal.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef COSMOPOLITAN_LIBC_TINYMATH_LOG2F_DATA_INTERNAL_H_ -#define COSMOPOLITAN_LIBC_TINYMATH_LOG2F_DATA_INTERNAL_H_ - -#define LOG2F_TABLE_BITS 4 -#define LOG2F_POLY_ORDER 4 - -COSMOPOLITAN_C_START_ - -extern const struct log2f_data { - struct { - double invc, logc; - } tab[1 << LOG2F_TABLE_BITS]; - double poly[LOG2F_POLY_ORDER]; -} __log2f_data; - -COSMOPOLITAN_C_END_ -#endif /* COSMOPOLITAN_LIBC_TINYMATH_LOG2F_DATA_INTERNAL_H_ */ diff --git a/libc/tinymath/log_data.c b/libc/tinymath/log_data.c index 0dca131ae..5c96ed1e0 100644 --- a/libc/tinymath/log_data.c +++ b/libc/tinymath/log_data.c @@ -1,9 +1,9 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -25,22 +25,41 @@ │ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/tinymath/log_data.internal.h" +#include "libc/tinymath/arm.internal.h" __static_yoink("arm_optimized_routines_notice"); -/* - * Data for log. - * - * Copyright (c) 2018, Arm Limited. - * SPDX-License-Identifier: MIT - */ - #define N (1 << LOG_TABLE_BITS) const struct log_data __log_data = { .ln2hi = 0x1.62e42fefa3800p-1, .ln2lo = 0x1.ef35793c76730p-45, .poly1 = { +#if LOG_POLY1_ORDER == 10 +// relative error: 0x1.32eccc6p-62 +// in -0x1p-5 0x1.1p-5 (|log(1+x)| > 0x1p-5 outside this interval) +-0x1p-1, +0x1.55555555554e5p-2, +-0x1.0000000000af2p-2, +0x1.9999999bbe436p-3, +-0x1.55555537f9cdep-3, +0x1.24922fc8127cfp-3, +-0x1.0000b7d6bb612p-3, +0x1.c806ee1ddbcafp-4, +-0x1.972335a9c2d6ep-4, +#elif LOG_POLY1_ORDER == 11 +// relative error: 0x1.52c8b708p-68 +// in -0x1p-5 0x1.1p-5 (|log(1+x)| > 0x1p-5 outside this interval) +-0x1p-1, +0x1.5555555555555p-2, +-0x1.ffffffffffea9p-3, +0x1.999999999c4d4p-3, +-0x1.55555557f5541p-3, +0x1.249248fbe33e4p-3, +-0x1.ffffc9a3c825bp-4, +0x1.c71e1f204435dp-4, +-0x1.9a7f26377d06ep-4, +0x1.71c30cf8f7364p-4, +#elif LOG_POLY1_ORDER == 12 // relative error: 0x1.c04d76cp-63 // in -0x1p-4 0x1.09p-4 (|log(1+x)| > 0x1p-4 outside the interval) -0x1p-1, @@ -54,8 +73,20 @@ const struct log_data __log_data = { -0x1.999eb43b068ffp-4, 0x1.78182f7afd085p-4, -0x1.5521375d145cdp-4, +#endif }, .poly = { +#if N == 64 && LOG_POLY_ORDER == 7 +// relative error: 0x1.906eb8ap-58 +// abs error: 0x1.d2cad5a8p-67 +// in -0x1.fp-8 0x1.fp-8 +-0x1.0000000000027p-1, +0x1.555555555556ap-2, +-0x1.fffffff0440bap-3, +0x1.99999991906c3p-3, +-0x1.555c8d7e8201ep-3, +0x1.24978c59151fap-3, +#elif N == 128 && LOG_POLY_ORDER == 6 // relative error: 0x1.926199e8p-56 // abs error: 0x1.882ff33p-65 // in -0x1.fp-9 0x1.fp-9 @@ -64,6 +95,17 @@ const struct log_data __log_data = { -0x1.fffffffeb459p-3, 0x1.999b324f10111p-3, -0x1.55575e506c89fp-3, +#elif N == 128 && LOG_POLY_ORDER == 7 +// relative error: 0x1.649fc4bp-64 +// abs error: 0x1.c3b5769p-74 +// in -0x1.fp-9 0x1.fp-9 +-0x1.0000000000001p-1, +0x1.5555555555556p-2, +-0x1.fffffffea1a8p-3, +0x1.99999998e9139p-3, +-0x1.555776801b968p-3, +0x1.2493c29331a5cp-3, +#endif }, /* Algorithm: @@ -92,6 +134,72 @@ a single rounding error when there is no fast fma for z*invc - 1, 3) ensures that logc + poly(z/c - 1) has small error, however near x == 1 when |log(x)| < 0x1p-4, this is not enough so that is special cased. */ .tab = { +#if N == 64 +{0x1.7242886495cd8p+0, -0x1.79e267bdfe000p-2}, +{0x1.6e1f769340dc9p+0, -0x1.6e60ee0ecb000p-2}, +{0x1.6a13ccc8f195cp+0, -0x1.63002fdbf6000p-2}, +{0x1.661ec72e86f3ap+0, -0x1.57bf76c597000p-2}, +{0x1.623fa6c447b16p+0, -0x1.4c9e07f0d2000p-2}, +{0x1.5e75bbca31702p+0, -0x1.419b42f027000p-2}, +{0x1.5ac05655adb10p+0, -0x1.36b67660e6000p-2}, +{0x1.571ed3e940191p+0, -0x1.2bef0839e4800p-2}, +{0x1.539094ac0fbbfp+0, -0x1.21445727cb000p-2}, +{0x1.5015007e7fc42p+0, -0x1.16b5ca3c3d000p-2}, +{0x1.4cab877c31cf9p+0, -0x1.0c42d3805f800p-2}, +{0x1.49539e76a88d3p+0, -0x1.01eae61b60800p-2}, +{0x1.460cbc12211dap+0, -0x1.ef5adb9fb0000p-3}, +{0x1.42d6624debe3ap+0, -0x1.db13daab99000p-3}, +{0x1.3fb0144f0d462p+0, -0x1.c6ffbe896e000p-3}, +{0x1.3c995a1f9a9b4p+0, -0x1.b31d84722d000p-3}, +{0x1.3991c23952500p+0, -0x1.9f6c3cf6eb000p-3}, +{0x1.3698df35eaa14p+0, -0x1.8beafe7f13000p-3}, +{0x1.33ae463091760p+0, -0x1.7898db878d000p-3}, +{0x1.30d190aae3d72p+0, -0x1.6574efe4ec000p-3}, +{0x1.2e025c9203c89p+0, -0x1.527e620845000p-3}, +{0x1.2b404a7244988p+0, -0x1.3fb457d798000p-3}, +{0x1.288b01dc19544p+0, -0x1.2d1615a077000p-3}, +{0x1.25e2268085f69p+0, -0x1.1aa2b431e5000p-3}, +{0x1.23456812abb74p+0, -0x1.08598f1d2b000p-3}, +{0x1.20b4703174157p+0, -0x1.ec738fee40000p-4}, +{0x1.1e2ef308b4e9bp+0, -0x1.c885768862000p-4}, +{0x1.1bb4a36b70a3fp+0, -0x1.a4e75b6a46000p-4}, +{0x1.194538e960658p+0, -0x1.8197efba9a000p-4}, +{0x1.16e0692a10ac8p+0, -0x1.5e95ad734e000p-4}, +{0x1.1485f1ba1568bp+0, -0x1.3bdf67117c000p-4}, +{0x1.12358e123ed6fp+0, -0x1.1973b744f0000p-4}, +{0x1.0fef01de37c8dp+0, -0x1.eea33446bc000p-5}, +{0x1.0db20b82be414p+0, -0x1.aaef4ab304000p-5}, +{0x1.0b7e6f67f69b3p+0, -0x1.67c962fd2c000p-5}, +{0x1.0953f342fc108p+0, -0x1.252f29acf8000p-5}, +{0x1.0732604ec956bp+0, -0x1.c63d19e9c0000p-6}, +{0x1.051980117f9b0p+0, -0x1.432ab6a388000p-6}, +{0x1.03091aa6810f1p+0, -0x1.8244357f50000p-7}, +{0x1.01010152cf066p+0, -0x1.0080a711c0000p-8}, +{0x1.fc07ef6b6e30bp-1, 0x1.fe03018e80000p-8}, +{0x1.f4465aa1024afp-1, 0x1.7b91986450000p-6}, +{0x1.ecc07a8fd3f5ep-1, 0x1.39e88608c8000p-5}, +{0x1.e573ad856b537p-1, 0x1.b42dc6e624000p-5}, +{0x1.de5d6dc7b8057p-1, 0x1.165372ec20000p-4}, +{0x1.d77b6498bddf7p-1, 0x1.51b07a0170000p-4}, +{0x1.d0cb580315c0fp-1, 0x1.8c3465c7ea000p-4}, +{0x1.ca4b30d1cf449p-1, 0x1.c5e544a290000p-4}, +{0x1.c3f8ef4810d8ep-1, 0x1.fec91aa0a6000p-4}, +{0x1.bdd2b8b311f44p-1, 0x1.1b72acdc5c000p-3}, +{0x1.b7d6c2eeac054p-1, 0x1.371fc65a98000p-3}, +{0x1.b20363474c8f5p-1, 0x1.526e61c1aa000p-3}, +{0x1.ac570165eeab1p-1, 0x1.6d60ffc240000p-3}, +{0x1.a6d019f331df4p-1, 0x1.87fa08a013000p-3}, +{0x1.a16d3ebc9e3c3p-1, 0x1.a23bc630c3000p-3}, +{0x1.9c2d14567ef45p-1, 0x1.bc286a3512000p-3}, +{0x1.970e4efae9169p-1, 0x1.d5c2195697000p-3}, +{0x1.920fb3bd0b802p-1, 0x1.ef0ae132d3000p-3}, +{0x1.8d3018b58699ap-1, 0x1.040259974e000p-2}, +{0x1.886e5ff170ee6p-1, 0x1.1058bd40e2000p-2}, +{0x1.83c977ad35d27p-1, 0x1.1c898c1137800p-2}, +{0x1.7f405ed16c520p-1, 0x1.2895a3e65b000p-2}, +{0x1.7ad220d0335c4p-1, 0x1.347dd8f6bd000p-2}, +{0x1.767dce53474fdp-1, 0x1.4043083cb3800p-2}, +#elif N == 128 {0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2}, {0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2}, {0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2}, @@ -220,9 +328,76 @@ that logc + poly(z/c - 1) has small error, however near x == 1 when {0x1.79baa679725c2p-1, 0x1.377266dec1800p-2}, {0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2}, {0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2}, +#endif }, -#if !__FP_FAST_FMA +#if !HAVE_FAST_FMA .tab2 = { +# if N == 64 +{0x1.61ffff94c4fecp-1, -0x1.9fe4fc998f325p-56}, +{0x1.66000020377ddp-1, 0x1.e804c7a9519f2p-55}, +{0x1.6a00004c41678p-1, 0x1.902c675d9ecfep-55}, +{0x1.6dffff7384f87p-1, -0x1.2fd6b95e55043p-56}, +{0x1.720000b37216ep-1, 0x1.802bc8d437043p-55}, +{0x1.75ffffbeb3c9dp-1, 0x1.6047ad0a0d4e4p-57}, +{0x1.7a0000628daep-1, -0x1.e00434b49313dp-56}, +{0x1.7dffffd7abd1ap-1, -0x1.6015f8a083576p-56}, +{0x1.81ffffdf40c54p-1, 0x1.7f54bf76a42c9p-57}, +{0x1.860000f334e11p-1, 0x1.60054cb5344d7p-56}, +{0x1.8a0001238aca7p-1, 0x1.c03c9bd132f55p-57}, +{0x1.8dffffb81d212p-1, -0x1.001e519f2764fp-55}, +{0x1.92000086adc7cp-1, 0x1.1fe40f88f49c6p-55}, +{0x1.960000135d8eap-1, -0x1.f832268dc3095p-55}, +{0x1.99ffff9435acp-1, 0x1.7031d8b835edcp-56}, +{0x1.9e00003478565p-1, -0x1.0030b221ce3eep-58}, +{0x1.a20000b592948p-1, 0x1.8fd2f1dbd4639p-55}, +{0x1.a600000ad0bcfp-1, 0x1.901d6a974e6bep-55}, +{0x1.a9ffff55953a5p-1, 0x1.a07556192db98p-57}, +{0x1.adffff29ce03dp-1, -0x1.fff0717ec71c2p-56}, +{0x1.b1ffff34f3ac8p-1, 0x1.8005573de89d1p-57}, +{0x1.b60000894c55bp-1, -0x1.ff2fb51b044c7p-57}, +{0x1.b9fffef45ec7dp-1, -0x1.9ff7c4e8730fp-56}, +{0x1.be0000cda7b2ap-1, 0x1.57d058dbf3c1dp-55}, +{0x1.c1ffff2c57917p-1, 0x1.7e66d7e48dbc9p-58}, +{0x1.c60000ea5b82ap-1, -0x1.47f5e132ed4bep-55}, +{0x1.ca0001121ae98p-1, -0x1.40958c8d5e00ap-58}, +{0x1.ce0000f9241cbp-1, -0x1.7da063caa81c8p-59}, +{0x1.d1fffe8be95a4p-1, -0x1.82e3a411afcd9p-59}, +{0x1.d5ffff035932bp-1, -0x1.00f901b3fe87dp-58}, +{0x1.d9fffe8b54ba7p-1, 0x1.ffef55d6e3a4p-55}, +{0x1.de0000ad95d19p-1, 0x1.5feb2efd4c7c7p-55}, +{0x1.e1fffe925ce47p-1, 0x1.c8085484eaf08p-55}, +{0x1.e5fffe3ddf853p-1, -0x1.fd5ed02c5cadp-60}, +{0x1.e9fffed0a0e5fp-1, -0x1.a80aaef411586p-55}, +{0x1.ee00008f82eep-1, -0x1.b000aeaf97276p-55}, +{0x1.f20000a22d2f4p-1, -0x1.8f8906e13eba3p-56}, +{0x1.f5fffee35b57dp-1, 0x1.1fdd33b2d3714p-57}, +{0x1.fa00014eec3a6p-1, -0x1.3ee0b7a18c1a5p-58}, +{0x1.fdffff5daa89fp-1, -0x1.c1e24c8e3b503p-58}, +{0x1.0200005b93349p+0, -0x1.50197fe6bedcap-54}, +{0x1.05ffff9d597acp+0, 0x1.20160d062d0dcp-55}, +{0x1.0a00005687a63p+0, -0x1.27f3f9307696ep-54}, +{0x1.0dffff779164ep+0, 0x1.b7eb40bb9c4f4p-54}, +{0x1.12000044a0aa8p+0, 0x1.efbc914d512c4p-55}, +{0x1.16000069685bcp+0, -0x1.c0bea3eb2d82cp-57}, +{0x1.1a000093f0d78p+0, 0x1.1fecbf1e8c52p-54}, +{0x1.1dffffb2b1457p+0, -0x1.3fc91365637d6p-55}, +{0x1.2200008824a1p+0, -0x1.dff7e9feb578ap-54}, +{0x1.25ffffeef953p+0, -0x1.b00a61ec912f7p-55}, +{0x1.2a0000a1e7783p+0, 0x1.60048318b0483p-56}, +{0x1.2e0000853d4c7p+0, -0x1.77fbedf2c8cf3p-54}, +{0x1.320000324c55bp+0, 0x1.f81983997354fp-54}, +{0x1.360000594f796p+0, -0x1.cfe4beff900a9p-54}, +{0x1.3a0000a4c1c0fp+0, 0x1.07dbb2e268d0ep-54}, +{0x1.3e0000751c61bp+0, 0x1.80583ed1c566ep-56}, +{0x1.42000069e8a9fp+0, 0x1.f01f1edf82045p-54}, +{0x1.460000b5a1e34p+0, -0x1.dfdf0cf45c14ap-55}, +{0x1.4a0000187e513p+0, 0x1.401306b83a98dp-55}, +{0x1.4dffff3ba420bp+0, 0x1.9fc6539a6454ep-56}, +{0x1.51fffffe391c9p+0, -0x1.601ef3353ac83p-54}, +{0x1.560000e342455p+0, 0x1.3fb7fac8ac151p-55}, +{0x1.59ffffc39676fp+0, 0x1.4fe7dd6659cc2p-55}, +{0x1.5dfffff10ef42p+0, -0x1.48154cb592bcbp-54}, +# elif N == 128 {0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56}, {0x1.63000034db495p-1, 0x1.dbfea48005d41p-55}, {0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55}, @@ -351,6 +526,7 @@ that logc + poly(z/c - 1) has small error, however near x == 1 when {0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54}, {0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54}, {0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54}, -}, #endif +}, +#endif /* !HAVE_FAST_FMA */ }; diff --git a/libc/tinymath/log_data.internal.h b/libc/tinymath/log_data.internal.h deleted file mode 100644 index 72f87b410..000000000 --- a/libc/tinymath/log_data.internal.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef COSMOPOLITAN_LIBC_TINYMATH_LOG_DATA_H_ -#define COSMOPOLITAN_LIBC_TINYMATH_LOG_DATA_H_ - -#define LOG_TABLE_BITS 7 -#define LOG_POLY_ORDER 6 -#define LOG_POLY1_ORDER 12 - -COSMOPOLITAN_C_START_ - -extern const struct log_data { - double ln2hi; - double ln2lo; - double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1. */ - double poly1[LOG_POLY1_ORDER - 1]; - struct { - double invc, logc; - } tab[1 << LOG_TABLE_BITS]; -#if !__FP_FAST_FMA - struct { - double chi, clo; - } tab2[1 << LOG_TABLE_BITS]; -#endif -} __log_data; - -COSMOPOLITAN_C_END_ -#endif /* COSMOPOLITAN_LIBC_TINYMATH_LOG_DATA_H_ */ diff --git a/libc/tinymath/logf.c b/libc/tinymath/logf.c index c25d898b4..3b9b85c19 100644 --- a/libc/tinymath/logf.c +++ b/libc/tinymath/logf.c @@ -1,9 +1,9 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -25,19 +25,9 @@ │ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/intrin/likely.h" -#include "libc/math.h" -#include "libc/tinymath/internal.h" -#include "libc/tinymath/logf_data.internal.h" +#include "libc/tinymath/arm.internal.h" __static_yoink("arm_optimized_routines_notice"); -/* - * Single-precision log function. - * - * Copyright (c) 2017-2018, Arm Limited. - * SPDX-License-Identifier: MIT - */ - /* LOGF_TABLE_BITS = 4 LOGF_POLY_ORDER = 4 @@ -53,50 +43,63 @@ Relative error: 1.957 * 2^-26 (before rounding.) #define OFF 0x3f330000 /** - * Returns natural logarithm of 𝑥. + * Returns natural logarithm of x. + * + * - ULP error: 0.818 (nearest rounding.) + * - Relative error: 1.957 * 2^-26 (before rounding.) */ -float logf(float x) +float +logf (float x) { - double_t z, r, r2, y, y0, invc, logc; - uint32_t ix, iz, tmp; - int k, i; + /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ + double_t z, r, r2, y, y0, invc, logc; + uint32_t ix, iz, tmp; + int k, i; - ix = asuint(x); - /* Fix sign of zero with downward rounding when x==1. */ - if (WANT_ROUNDING && UNLIKELY(ix == 0x3f800000)) - return 0; - if (UNLIKELY(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) { - /* x < 0x1p-126 or inf or nan. */ - if (ix * 2 == 0) - return __math_divzerof(1); - if (ix == 0x7f800000) /* log(inf) == inf. */ - return x; - if ((ix & 0x80000000) || ix * 2 >= 0xff000000) - return __math_invalidf(x); - /* x is subnormal, normalize it. */ - ix = asuint(x * 0x1p23f); - ix -= 23 << 23; - } + ix = asuint (x); +#if WANT_ROUNDING + /* Fix sign of zero with downward rounding when x==1. */ + if (unlikely (ix == 0x3f800000)) + return 0; +#endif + if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000)) + { + /* x < 0x1p-126 or inf or nan. */ + if (ix * 2 == 0) + return __math_divzerof (1); + if (ix == 0x7f800000) /* log(inf) == inf. */ + return x; + if ((ix & 0x80000000) || ix * 2 >= 0xff000000) + return __math_invalidf (x); + /* x is subnormal, normalize it. */ + ix = asuint (x * 0x1p23f); + ix -= 23 << 23; + } - /* x = 2^k z; where z is in range [OFF,2*OFF] and exact. - The range is split into N subintervals. - The ith subinterval contains z and c is near its center. */ - tmp = ix - OFF; - i = (tmp >> (23 - LOGF_TABLE_BITS)) % N; - k = (int32_t)tmp >> 23; /* arithmetic shift */ - iz = ix - (tmp & 0xff800000); - invc = T[i].invc; - logc = T[i].logc; - z = (double_t)asfloat(iz); + /* x = 2^k z; where z is in range [OFF,2*OFF] and exact. + The range is split into N subintervals. + The ith subinterval contains z and c is near its center. */ + tmp = ix - OFF; + i = (tmp >> (23 - LOGF_TABLE_BITS)) % N; + k = (int32_t) tmp >> 23; /* arithmetic shift */ + iz = ix - (tmp & 0xff800000); + invc = T[i].invc; + logc = T[i].logc; + z = (double_t) asfloat (iz); - /* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */ - r = z * invc - 1; - y0 = logc + (double_t)k * Ln2; + /* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */ + r = z * invc - 1; + y0 = logc + (double_t) k * Ln2; - /* Pipelined polynomial evaluation to approximate log1p(r). */ - r2 = r * r; - y = A[1] * r + A[2]; - y = A[0] * r2 + y; - y = y * r2 + (y0 + r); - return eval_as_float(y); + /* Pipelined polynomial evaluation to approximate log1p(r). */ + r2 = r * r; + y = A[1] * r + A[2]; + y = A[0] * r2 + y; + y = y * r2 + (y0 + r); + return eval_as_float (y); } + +#if USE_GLIBC_ABI +strong_alias (logf, __logf_finite) +hidden_alias (logf, __ieee754_logf) +#endif diff --git a/libc/tinymath/logf_data.c b/libc/tinymath/logf_data.c index cd8f944fe..30885b7eb 100644 --- a/libc/tinymath/logf_data.c +++ b/libc/tinymath/logf_data.c @@ -3,7 +3,7 @@ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -25,16 +25,9 @@ │ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/tinymath/logf_data.internal.h" +#include "libc/tinymath/arm.internal.h" __static_yoink("arm_optimized_routines_notice"); -/* - * Data definition for logf. - * - * Copyright (c) 2017-2018, Arm Limited. - * SPDX-License-Identifier: MIT - */ - const struct logf_data __logf_data = { .tab = { { 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2 }, diff --git a/libc/tinymath/logf_data.internal.h b/libc/tinymath/logf_data.internal.h deleted file mode 100644 index caa51da52..000000000 --- a/libc/tinymath/logf_data.internal.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef COSMOPOLITAN_LIBC_TINYMATH_LOGF_DATA_INTERNAL_H_ -#define COSMOPOLITAN_LIBC_TINYMATH_LOGF_DATA_INTERNAL_H_ - -#define LOGF_TABLE_BITS 4 -#define LOGF_POLY_ORDER 4 - -COSMOPOLITAN_C_START_ - -extern const struct logf_data { - struct { - double invc, logc; - } tab[1 << LOGF_TABLE_BITS]; - double ln2; - double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1. */ -} __logf_data; - -COSMOPOLITAN_C_END_ -#endif /* COSMOPOLITAN_LIBC_TINYMATH_LOGF_DATA_INTERNAL_H_ */ diff --git a/libc/tinymath/atan_data.c b/libc/tinymath/math_err.c similarity index 61% rename from libc/tinymath/atan_data.c rename to libc/tinymath/math_err.c index 3b465d0c5..c5a7fa08c 100644 --- a/libc/tinymath/atan_data.c +++ b/libc/tinymath/math_err.c @@ -1,9 +1,9 @@ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│ vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8 :vi │ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -25,16 +25,76 @@ │ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/tinymath/atan_data.internal.h" +#include "libc/errno.h" +#include "libc/tinymath/arm.internal.h" -const struct atan_poly_data __atan_poly_data = { - .poly = {/* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on - [2**-1022, 1.0]. See atan.sollya for details of how these were - generated. */ - -0x1.5555555555555p-2, 0x1.99999999996c1p-3, -0x1.2492492478f88p-3, - 0x1.c71c71bc3951cp-4, -0x1.745d160a7e368p-4, 0x1.3b139b6a88ba1p-4, - -0x1.11100ee084227p-4, 0x1.e1d0f9696f63bp-5, -0x1.aebfe7b418581p-5, - 0x1.842dbe9b0d916p-5, -0x1.5d30140ae5e99p-5, 0x1.338e31eb2fbbcp-5, - -0x1.00e6eece7de8p-5, 0x1.860897b29e5efp-6, -0x1.0051381722a59p-6, - 0x1.14e9dc19a4a4ep-7, -0x1.d0062b42fe3bfp-9, 0x1.17739e210171ap-10, - -0x1.ab24da7be7402p-13, 0x1.358851160a528p-16}}; +#if WANT_ERRNO +/* dontinline reduces code size and avoids making math functions non-leaf + when the error handling is inlined. */ +dontinline static double +with_errno (double y, int e) +{ + errno = e; + return y; +} +#else +#define with_errno(x, e) (x) +#endif + +/* dontinline reduces code size. */ +dontinline static double +xflow (uint32_t sign, double y) +{ + y = eval_as_double (opt_barrier_double (sign ? -y : y) * y); + return with_errno (y, ERANGE); +} + +double +__math_uflow (uint32_t sign) +{ + return xflow (sign, 0x1p-767); +} + +#if WANT_ERRNO_UFLOW +/* Underflows to zero in some non-nearest rounding mode, setting errno + is valid even if the result is non-zero, but in the subnormal range. */ +double +__math_may_uflow (uint32_t sign) +{ + return xflow (sign, 0x1.8p-538); +} +#endif + +double +__math_oflow (uint32_t sign) +{ + return xflow (sign, 0x1p769); +} + +double +__math_divzero (uint32_t sign) +{ + double y = opt_barrier_double (sign ? -1.0 : 1.0) / 0.0; + return with_errno (y, ERANGE); +} + +dontinstrument double +__math_invalid (double x) +{ + double y = (x - x) / (x - x); + return isnan (x) ? y : with_errno (y, EDOM); +} + +/* Check result and set errno if necessary. */ + +dontinstrument double +__math_check_uflow (double y) +{ + return y == 0.0 ? with_errno (y, ERANGE) : y; +} + +dontinstrument double +__math_check_oflow (double y) +{ + return isinf (y) ? with_errno (y, ERANGE) : y; +} diff --git a/libc/tinymath/casin.c b/libc/tinymath/math_errf.c similarity index 57% rename from libc/tinymath/casin.c rename to libc/tinymath/math_errf.c index f6478d061..2927b565d 100644 --- a/libc/tinymath/casin.c +++ b/libc/tinymath/math_errf.c @@ -1,9 +1,9 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ +│ Optimized Routines │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -25,27 +25,76 @@ │ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/complex.h" -#include "libc/math.h" -#include "libc/tinymath/complex.internal.h" -__static_yoink("musl_libc_notice"); +#include "libc/errno.h" +#include "libc/tinymath/arm.internal.h" -// FIXME - -/* asin(z) = -i log(i z + sqrt(1 - z*z)) */ - -double complex casin(double complex z) +#if WANT_ERRNO +/* dontinline reduces code size and avoids making math functions non-leaf + when the error handling is inlined. */ +dontinline static float +with_errnof (float y, int e) { - double complex w; - double x, y; + errno = e; + return y; +} +#else +#define with_errnof(x, e) (x) +#endif - x = creal(z); - y = cimag(z); - w = CMPLX(1.0 - (x - y)*(x + y), -2.0*x*y); - double complex r = clog(CMPLX(-y, x) + csqrt(w)); - return CMPLX(cimag(r), -creal(r)); +/* dontinline reduces code size. */ +dontinline static float +xflowf (uint32_t sign, float y) +{ + y = eval_as_float (opt_barrier_float (sign ? -y : y) * y); + return with_errnof (y, ERANGE); } -#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 -__weak_reference(casin, casinl); +float +__math_uflowf (uint32_t sign) +{ + return xflowf (sign, 0x1p-95f); +} + +#if WANT_ERRNO_UFLOW +/* Underflows to zero in some non-nearest rounding mode, setting errno + is valid even if the result is non-zero, but in the subnormal range. */ +float +__math_may_uflowf (uint32_t sign) +{ + return xflowf (sign, 0x1.4p-75f); +} #endif + +float +__math_oflowf (uint32_t sign) +{ + return xflowf (sign, 0x1p97f); +} + +float +__math_divzerof (uint32_t sign) +{ + float y = opt_barrier_float (sign ? -1.0f : 1.0f) / 0.0f; + return with_errnof (y, ERANGE); +} + +dontinstrument float +__math_invalidf (float x) +{ + float y = (x - x) / (x - x); + return isnan (x) ? y : with_errnof (y, EDOM); +} + +/* Check result and set errno if necessary. */ + +dontinstrument float +__math_check_uflowf (float y) +{ + return y == 0.0f ? with_errnof (y, ERANGE) : y; +} + +dontinstrument float +__math_check_oflowf (float y) +{ + return isinf (y) ? with_errnof (y, ERANGE) : y; +} diff --git a/libc/tinymath/atanf_data.c b/libc/tinymath/math_errl.c similarity index 80% rename from libc/tinymath/atanf_data.c rename to libc/tinymath/math_errl.c index e44a7631e..d776fb349 100644 --- a/libc/tinymath/atanf_data.c +++ b/libc/tinymath/math_errl.c @@ -1,9 +1,9 @@ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│ vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8 :vi │ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -25,11 +25,25 @@ │ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/tinymath/atanf_data.internal.h" +#include "libc/errno.h" +#include "libc/tinymath/arm.internal.h" -/* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-128, 1.0]. - */ -const struct atanf_poly_data __atanf_poly_data = { - .poly = {/* See atanf.sollya for details of how these were generated. */ - -0x1.55555p-2f, 0x1.99935ep-3f, -0x1.24051ep-3f, 0x1.bd7368p-4f, - -0x1.491f0ep-4f, 0x1.93a2c0p-5f, -0x1.4c3c60p-6f, 0x1.01fd88p-8f}}; +#if WANT_ERRNO +/* dontinline reduces code size and avoids making math functions non-leaf + when the error handling is inlined. */ +dontinline static long double +with_errnol (long double y, int e) +{ + errno = e; + return y; +} +#else +#define with_errnol(x, e) (x) +#endif + +dontinstrument long double +__math_invalidl (long double x) +{ + long double y = (x - x) / (x - x); + return isnan (x) ? y : with_errnol (y, EDOM); +} diff --git a/libc/tinymath/pow.c b/libc/tinymath/pow.c index abca8c6d5..86e36fd40 100644 --- a/libc/tinymath/pow.c +++ b/libc/tinymath/pow.c @@ -1,9 +1,9 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ +│ Optimized Routines │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -25,20 +25,9 @@ │ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/intrin/likely.h" -#include "libc/math.h" -#include "libc/tinymath/exp_data.internal.h" -#include "libc/tinymath/internal.h" -#include "libc/tinymath/pow_data.internal.h" +#include "libc/tinymath/arm.internal.h" __static_yoink("arm_optimized_routines_notice"); -/* - * Double-precision x^y function. - * - * Copyright (c) 2018, Arm Limited. - * SPDX-License-Identifier: MIT - */ - /* Worst-case error: 0.54 ULP (~= ulperr_exp + 1024*Ln2*relerr_log*2^53) relerr_log: 1.3 * 2^-68 (Relative error of log, 1.5 * 2^-68 without fma) @@ -53,79 +42,83 @@ ulperr_exp: 0.509 ULP (ULP error of exp, 0.511 ULP without fma) #define OFF 0x3fe6955500000000 /* Top 12 bits of a double (sign and exponent bits). */ -static inline uint32_t top12(double x) +static inline uint32_t +top12 (double x) { - return asuint64(x) >> 52; + return asuint64 (x) >> 52; } /* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about additional 15 bits precision. IX is the bit representation of x, but normalized in the subnormal range using the sign bit for the exponent. */ -static inline double_t log_inline(uint64_t ix, double_t *tail) +static inline double_t +log_inline (uint64_t ix, double_t *tail) { - /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ - double_t z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p; - uint64_t iz, tmp; - int k, i; + /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ + double_t z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p; + uint64_t iz, tmp; + int k, i; - /* x = 2^k z; where z is in range [OFF,2*OFF) and exact. - The range is split into N subintervals. - The ith subinterval contains z and c is near its center. */ - tmp = ix - OFF; - i = (tmp >> (52 - POW_LOG_TABLE_BITS)) % N; - k = (int64_t)tmp >> 52; /* arithmetic shift */ - iz = ix - (tmp & 0xfffULL << 52); - z = asdouble(iz); - kd = (double_t)k; + /* x = 2^k z; where z is in range [OFF,2*OFF) and exact. + The range is split into N subintervals. + The ith subinterval contains z and c is near its center. */ + tmp = ix - OFF; + i = (tmp >> (52 - POW_LOG_TABLE_BITS)) % N; + k = (int64_t) tmp >> 52; /* arithmetic shift */ + iz = ix - (tmp & 0xfffULL << 52); + z = asdouble (iz); + kd = (double_t) k; - /* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */ - invc = T[i].invc; - logc = T[i].logc; - logctail = T[i].logctail; + /* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */ + invc = T[i].invc; + logc = T[i].logc; + logctail = T[i].logctail; - /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and + /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */ -#if __FP_FAST_FMA - r = __builtin_fma(z, invc, -1.0); +#if HAVE_FAST_FMA + r = fma (z, invc, -1.0); #else - /* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|. */ - double_t zhi = asdouble((iz + (1ULL << 31)) & (-1ULL << 32)); - double_t zlo = z - zhi; - double_t rhi = zhi * invc - 1.0; - double_t rlo = zlo * invc; - r = rhi + rlo; + /* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|. */ + double_t zhi = asdouble ((iz + (1ULL << 31)) & (-1ULL << 32)); + double_t zlo = z - zhi; + double_t rhi = zhi * invc - 1.0; + double_t rlo = zlo * invc; + r = rhi + rlo; #endif - /* k*Ln2 + log(c) + r. */ - t1 = kd * Ln2hi + logc; - t2 = t1 + r; - lo1 = kd * Ln2lo + logctail; - lo2 = t1 - t2 + r; + /* k*Ln2 + log(c) + r. */ + t1 = kd * Ln2hi + logc; + t2 = t1 + r; + lo1 = kd * Ln2lo + logctail; + lo2 = t1 - t2 + r; - /* Evaluation is optimized assuming superscalar pipelined execution. */ - double_t ar, ar2, ar3, lo3, lo4; - ar = A[0] * r; /* A[0] = -0.5. */ - ar2 = r * ar; - ar3 = r * ar2; - /* k*Ln2 + log(c) + r + A[0]*r*r. */ -#if __FP_FAST_FMA - hi = t2 + ar2; - lo3 = __builtin_fma(ar, r, -ar2); - lo4 = t2 - hi + ar2; + /* Evaluation is optimized assuming superscalar pipelined execution. */ + double_t ar, ar2, ar3, lo3, lo4; + ar = A[0] * r; /* A[0] = -0.5. */ + ar2 = r * ar; + ar3 = r * ar2; + /* k*Ln2 + log(c) + r + A[0]*r*r. */ +#if HAVE_FAST_FMA + hi = t2 + ar2; + lo3 = fma (ar, r, -ar2); + lo4 = t2 - hi + ar2; #else - double_t arhi = A[0] * rhi; - double_t arhi2 = rhi * arhi; - hi = t2 + arhi2; - lo3 = rlo * (ar + arhi); - lo4 = t2 - hi + arhi2; + double_t arhi = A[0] * rhi; + double_t arhi2 = rhi * arhi; + hi = t2 + arhi2; + lo3 = rlo * (ar + arhi); + lo4 = t2 - hi + arhi2; #endif - /* p = log1p(r) - r - A[0]*r*r. */ - p = (ar3 * (A[1] + r * A[2] + - ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6])))); - lo = lo1 + lo2 + lo3 + lo4 + p; - y = hi + lo; - *tail = hi - y + lo; - return y; + /* p = log1p(r) - r - A[0]*r*r. */ +#if POW_LOG_POLY_ORDER == 8 + p = (ar3 + * (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6])))); +#endif + lo = lo1 + lo2 + lo3 + lo4 + p; + y = hi + lo; + *tail = hi - y + lo; + return y; } #undef N @@ -149,232 +142,268 @@ static inline double_t log_inline(uint64_t ix, double_t *tail) a double. (int32_t)KI is the k used in the argument reduction and exponent adjustment of scale, positive k here means the result may overflow and negative k means the result may underflow. */ -forceinline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki) +static inline double +specialcase (double_t tmp, uint64_t sbits, uint64_t ki) { - double_t scale, y; + double_t scale, y; - if ((ki & 0x80000000) == 0) { - /* k > 0, the exponent of scale might have overflowed by <= 460. */ - sbits -= 1009ull << 52; - scale = asdouble(sbits); - y = 0x1p1009 * (scale + scale * tmp); - return eval_as_double(y); - } - /* k < 0, need special care in the subnormal range. */ - sbits += 1022ull << 52; - /* Note: sbits is signed scale. */ - scale = asdouble(sbits); - y = scale + scale * tmp; - if (fabs(y) < 1.0) { - /* Round y to the right precision before scaling it into the subnormal - range to avoid double rounding that can cause 0.5+E/2 ulp error where - E is the worst-case ulp error outside the subnormal range. So this - is only useful if the goal is better than 1 ulp worst-case error. */ - double_t hi, lo, one = 1.0; - if (y < 0.0) - one = -1.0; - lo = scale - y + scale * tmp; - hi = one + y; - lo = one - hi + y + lo; - y = eval_as_double(hi + lo) - one; - /* Fix the sign of 0. */ - if (y == 0.0) - y = asdouble(sbits & 0x8000000000000000); - /* The underflow exception needs to be signaled explicitly. */ - fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022); - } - y = 0x1p-1022 * y; - return eval_as_double(y); + if ((ki & 0x80000000) == 0) + { + /* k > 0, the exponent of scale might have overflowed by <= 460. */ + sbits -= 1009ull << 52; + scale = asdouble (sbits); + y = 0x1p1009 * (scale + scale * tmp); + return check_oflow (eval_as_double (y)); + } + /* k < 0, need special care in the subnormal range. */ + sbits += 1022ull << 52; + /* Note: sbits is signed scale. */ + scale = asdouble (sbits); + y = scale + scale * tmp; + if (fabs (y) < 1.0) + { + /* Round y to the right precision before scaling it into the subnormal + range to avoid double rounding that can cause 0.5+E/2 ulp error where + E is the worst-case ulp error outside the subnormal range. So this + is only useful if the goal is better than 1 ulp worst-case error. */ + double_t hi, lo, one = 1.0; + if (y < 0.0) + one = -1.0; + lo = scale - y + scale * tmp; + hi = one + y; + lo = one - hi + y + lo; + y = eval_as_double (hi + lo) - one; + /* Fix the sign of 0. */ + if (y == 0.0) + y = asdouble (sbits & 0x8000000000000000); + /* The underflow exception needs to be signaled explicitly. */ + force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022); + } + y = 0x1p-1022 * y; + return check_uflow (eval_as_double (y)); } #define SIGN_BIAS (0x800 << EXP_TABLE_BITS) /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|. The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */ -forceinline double exp_inline(double_t x, double_t xtail, uint32_t sign_bias) +static inline double +exp_inline (double_t x, double_t xtail, uint32_t sign_bias) { - uint32_t abstop; - uint64_t ki, idx, top, sbits; - /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ - double_t kd, z, r, r2, scale, tail, tmp; + uint32_t abstop; + uint64_t ki, idx, top, sbits; + /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ + double_t kd, z, r, r2, scale, tail, tmp; - abstop = top12(x) & 0x7ff; - if (UNLIKELY(abstop - top12(0x1p-54) >= - top12(512.0) - top12(0x1p-54))) { - if (abstop - top12(0x1p-54) >= 0x80000000) { - /* Avoid spurious underflow for tiny x. */ - /* Note: 0 is common input. */ - double_t one = WANT_ROUNDING ? 1.0 + x : 1.0; - return sign_bias ? -one : one; - } - if (abstop >= top12(1024.0)) { - /* Note: inf and nan are already handled. */ - if (asuint64(x) >> 63) - return __math_uflow(sign_bias); - else - return __math_oflow(sign_bias); - } - /* Large x is special cased below. */ - abstop = 0; + abstop = top12 (x) & 0x7ff; + if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54))) + { + if (abstop - top12 (0x1p-54) >= 0x80000000) + { + /* Avoid spurious underflow for tiny x. */ + /* Note: 0 is common input. */ + double_t one = WANT_ROUNDING ? 1.0 + x : 1.0; + return sign_bias ? -one : one; } + if (abstop >= top12 (1024.0)) + { + /* Note: inf and nan are already handled. */ + if (asuint64 (x) >> 63) + return __math_uflow (sign_bias); + else + return __math_oflow (sign_bias); + } + /* Large x is special cased below. */ + abstop = 0; + } - /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */ - /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */ - z = InvLn2N * x; + /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */ + /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */ + z = InvLn2N * x; #if TOINT_INTRINSICS - kd = roundtoint(z); - ki = converttoint(z); + kd = roundtoint (z); + ki = converttoint (z); #elif EXP_USE_TOINT_NARROW - /* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */ - kd = eval_as_double(z + Shift); - ki = asuint64(kd) >> 16; - kd = (double_t)(int32_t)ki; + /* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */ + kd = eval_as_double (z + Shift); + ki = asuint64 (kd) >> 16; + kd = (double_t) (int32_t) ki; #else - /* z - kd is in [-1, 1] in non-nearest rounding modes. */ - kd = eval_as_double(z + Shift); - ki = asuint64(kd); - kd -= Shift; + /* z - kd is in [-1, 1] in non-nearest rounding modes. */ + kd = eval_as_double (z + Shift); + ki = asuint64 (kd); + kd -= Shift; #endif - r = x + kd * NegLn2hiN + kd * NegLn2loN; - /* The code assumes 2^-200 < |xtail| < 2^-8/N. */ - r += xtail; - /* 2^(k/N) ~= scale * (1 + tail). */ - idx = 2 * (ki % N); - top = (ki + sign_bias) << (52 - EXP_TABLE_BITS); - tail = asdouble(T[idx]); - /* This is only a valid scale when -1023*N < k < 1024*N. */ - sbits = T[idx + 1] + top; - /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */ - /* Evaluation is optimized assuming superscalar pipelined execution. */ - r2 = r * r; - /* Without fma the worst case error is 0.25/N ulp larger. */ - /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */ - tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5); - if (UNLIKELY(abstop == 0)) - return specialcase(tmp, sbits, ki); - scale = asdouble(sbits); - /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there - is no spurious underflow here even without fma. */ - return eval_as_double(scale + scale * tmp); + r = x + kd * NegLn2hiN + kd * NegLn2loN; + /* The code assumes 2^-200 < |xtail| < 2^-8/N. */ + r += xtail; + /* 2^(k/N) ~= scale * (1 + tail). */ + idx = 2 * (ki % N); + top = (ki + sign_bias) << (52 - EXP_TABLE_BITS); + tail = asdouble (T[idx]); + /* This is only a valid scale when -1023*N < k < 1024*N. */ + sbits = T[idx + 1] + top; + /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */ + /* Evaluation is optimized assuming superscalar pipelined execution. */ + r2 = r * r; + /* Without fma the worst case error is 0.25/N ulp larger. */ + /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */ +#if EXP_POLY_ORDER == 4 + tmp = tail + r + r2 * C2 + r * r2 * (C3 + r * C4); +#elif EXP_POLY_ORDER == 5 + tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5); +#elif EXP_POLY_ORDER == 6 + tmp = tail + r + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6); +#endif + if (unlikely (abstop == 0)) + return specialcase (tmp, sbits, ki); + scale = asdouble (sbits); + /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there + is no spurious underflow here even without fma. */ + return eval_as_double (scale + scale * tmp); } /* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is the bit representation of a non-zero finite floating-point value. */ -static inline int checkint(uint64_t iy) +static inline int +checkint (uint64_t iy) { - int e = iy >> 52 & 0x7ff; - if (e < 0x3ff) - return 0; - if (e > 0x3ff + 52) - return 2; - if (iy & ((1ULL << (0x3ff + 52 - e)) - 1)) - return 0; - if (iy & (1ULL << (0x3ff + 52 - e))) - return 1; - return 2; + int e = iy >> 52 & 0x7ff; + if (e < 0x3ff) + return 0; + if (e > 0x3ff + 52) + return 2; + if (iy & ((1ULL << (0x3ff + 52 - e)) - 1)) + return 0; + if (iy & (1ULL << (0x3ff + 52 - e))) + return 1; + return 2; } /* Returns 1 if input is the bit representation of 0, infinity or nan. */ -static inline int zeroinfnan(uint64_t i) +static inline int +zeroinfnan (uint64_t i) { - return 2 * i - 1 >= 2 * asuint64(INFINITY) - 1; + return 2 * i - 1 >= 2 * asuint64 (INFINITY) - 1; } /** * Returns 𝑥^𝑦. - * @note should take ~18ns + * + * - Worst-case error: 0.54 ULP (~= ulperr_exp + 1024*Ln2*relerr_log*2^53) + * - relerr_log: 1.3 * 2^-68 (Relative error of log, 1.5 * 2^-68 without fma) + * - ulperr_exp: 0.509 ULP (ULP error of exp, 0.511 ULP without fma) + * + * @raise ERANGE on overflow or underflow + * @raise EDOM if x is negative and y is a finite non-integer */ -double pow(double x, double y) +double +pow (double x, double y) { - uint32_t sign_bias = 0; - uint64_t ix, iy; - uint32_t topx, topy; + uint32_t sign_bias = 0; + uint64_t ix, iy; + uint32_t topx, topy; - ix = asuint64(x); - iy = asuint64(y); - topx = top12(x); - topy = top12(y); - if (UNLIKELY(topx - 0x001 >= 0x7ff - 0x001 || - (topy & 0x7ff) - 0x3be >= 0x43e - 0x3be)) { - /* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0 - and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */ - /* Special cases: (x < 0x1p-126 or inf or nan) or - (|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */ - if (UNLIKELY(zeroinfnan(iy))) { - if (2 * iy == 0) - return issignaling_inline(x) ? x + y : 1.0; - if (ix == asuint64(1.0)) - return issignaling_inline(y) ? x + y : 1.0; - if (2 * ix > 2 * asuint64(INFINITY) || - 2 * iy > 2 * asuint64(INFINITY)) - return x + y; - if (2 * ix == 2 * asuint64(1.0)) - return 1.0; - if ((2 * ix < 2 * asuint64(1.0)) == !(iy >> 63)) - return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */ - return y * y; - } - if (UNLIKELY(zeroinfnan(ix))) { - double_t x2 = x * x; - if (ix >> 63 && checkint(iy) == 1) - x2 = -x2; - /* Without the barrier some versions of clang hoist the 1/x2 and - thus division by zero exception can be signaled spuriously. */ - return iy >> 63 ? fp_barrier(1 / x2) : x2; - } - /* Here x and y are non-zero finite. */ - if (ix >> 63) { - /* Finite x < 0. */ - int yint = checkint(iy); - if (yint == 0) - return __math_invalid(x); - if (yint == 1) - sign_bias = SIGN_BIAS; - ix &= 0x7fffffffffffffff; - topx &= 0x7ff; - } - if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be) { - /* Note: sign_bias == 0 here because y is not odd. */ - if (ix == asuint64(1.0)) - return 1.0; - if ((topy & 0x7ff) < 0x3be) { - /* |y| < 2^-65, x^y ~= 1 + y*log(x). */ - if (WANT_ROUNDING) - return ix > asuint64(1.0) ? 1.0 + y : - 1.0 - y; - else - return 1.0; - } - return (ix > asuint64(1.0)) == (topy < 0x800) ? - __math_oflow(0) : - __math_uflow(0); - } - if (topx == 0) { - /* Normalize subnormal x so exponent becomes negative. */ - ix = asuint64(x * 0x1p52); - ix &= 0x7fffffffffffffff; - ix -= 52ULL << 52; - } + ix = asuint64 (x); + iy = asuint64 (y); + topx = top12 (x); + topy = top12 (y); + if (unlikely (topx - 0x001 >= 0x7ff - 0x001 + || (topy & 0x7ff) - 0x3be >= 0x43e - 0x3be)) + { + /* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0 + and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */ + /* Special cases: (x < 0x1p-126 or inf or nan) or + (|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */ + if (unlikely (zeroinfnan (iy))) + { + if (2 * iy == 0) + return issignaling_inline (x) ? x + y : 1.0; + if (ix == asuint64 (1.0)) + return issignaling_inline (y) ? x + y : 1.0; + if (2 * ix > 2 * asuint64 (INFINITY) + || 2 * iy > 2 * asuint64 (INFINITY)) + return x + y; + if (2 * ix == 2 * asuint64 (1.0)) + return 1.0; + if ((2 * ix < 2 * asuint64 (1.0)) == !(iy >> 63)) + return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */ + return y * y; } + if (unlikely (zeroinfnan (ix))) + { + double_t x2 = x * x; + if (ix >> 63 && checkint (iy) == 1) + { + x2 = -x2; + sign_bias = 1; + } + if (WANT_ERRNO && 2 * ix == 0 && iy >> 63) + return __math_divzero (sign_bias); + /* Without the barrier some versions of clang hoist the 1/x2 and + thus division by zero exception can be signaled spuriously. */ + return iy >> 63 ? opt_barrier_double (1 / x2) : x2; + } + /* Here x and y are non-zero finite. */ + if (ix >> 63) + { + /* Finite x < 0. */ + int yint = checkint (iy); + if (yint == 0) + return __math_invalid (x); + if (yint == 1) + sign_bias = SIGN_BIAS; + ix &= 0x7fffffffffffffff; + topx &= 0x7ff; + } + if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be) + { + /* Note: sign_bias == 0 here because y is not odd. */ + if (ix == asuint64 (1.0)) + return 1.0; + if ((topy & 0x7ff) < 0x3be) + { + /* |y| < 2^-65, x^y ~= 1 + y*log(x). */ + if (WANT_ROUNDING) + return ix > asuint64 (1.0) ? 1.0 + y : 1.0 - y; + else + return 1.0; + } + return (ix > asuint64 (1.0)) == (topy < 0x800) ? __math_oflow (0) + : __math_uflow (0); + } + if (topx == 0) + { + /* Normalize subnormal x so exponent becomes negative. */ + /* Without the barrier some versions of clang evalutate the mul + unconditionally causing spurious overflow exceptions. */ + ix = asuint64 (opt_barrier_double (x) * 0x1p52); + ix &= 0x7fffffffffffffff; + ix -= 52ULL << 52; + } + } - double_t lo; - double_t hi = log_inline(ix, &lo); - double_t ehi, elo; -#if __FP_FAST_FMA - ehi = y * hi; - elo = y * lo + __builtin_fma(y, hi, -ehi); + double_t lo; + double_t hi = log_inline (ix, &lo); + double_t ehi, elo; +#if HAVE_FAST_FMA + ehi = y * hi; + elo = y * lo + fma (y, hi, -ehi); #else - double_t yhi = asdouble(iy & -1ULL << 27); - double_t ylo = y - yhi; - double_t lhi = asdouble(asuint64(hi) & -1ULL << 27); - double_t llo = hi - lhi + lo; - ehi = yhi * lhi; - elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25. */ + double_t yhi = asdouble (iy & -1ULL << 27); + double_t ylo = y - yhi; + double_t lhi = asdouble (asuint64 (hi) & -1ULL << 27); + double_t llo = hi - lhi + lo; + ehi = yhi * lhi; + elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25. */ #endif - return exp_inline(ehi, elo, sign_bias); + return exp_inline (ehi, elo, sign_bias); } -__weak_reference(pow, __pow_finite); -#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 -__weak_reference(pow, powl); +#if USE_GLIBC_ABI +strong_alias (pow, __pow_finite) +hidden_alias (pow, __ieee754_pow) +# if LDBL_MANT_DIG == 53 +long double powl (long double x, long double y) { return pow (x, y); } +# endif #endif diff --git a/libc/tinymath/pow_data.c b/libc/tinymath/pow_data.c index ce804633e..c1da89cb3 100644 --- a/libc/tinymath/pow_data.c +++ b/libc/tinymath/pow_data.c @@ -1,9 +1,9 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -25,22 +25,16 @@ │ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/tinymath/pow_data.internal.h" +#include "libc/tinymath/arm.internal.h" __static_yoink("arm_optimized_routines_notice"); -/* - * Data for the log part of pow. - * - * Copyright (c) 2018, Arm Limited. - * SPDX-License-Identifier: MIT - */ - #define N (1 << POW_LOG_TABLE_BITS) const struct pow_log_data __pow_log_data = { .ln2hi = 0x1.62e42fefa3800p-1, .ln2lo = 0x1.ef35793c76730p-45, .poly = { +#if N == 128 && POW_LOG_POLY_ORDER == 8 // relative error: 0x1.11922ap-70 // in -0x1.6bp-8 0x1.6bp-8 // Coefficients are scaled to match the scaling during evaluation. @@ -51,6 +45,7 @@ const struct pow_log_data __pow_log_data = { -0x1.555555529a47ap-3 * 4, 0x1.2495b9b4845e9p-3 * -8, -0x1.0002b8b263fc3p-3 * -8, +#endif }, /* Algorithm: @@ -75,6 +70,7 @@ the last few bits of logc are rounded away so k*ln2hi + logc has no rounding error and the interval for z is selected such that near x == 1, where log(x) is tiny, large cancellation error is avoided in logc + poly(z/c - 1). */ .tab = { +#if N == 128 #define A(a, b, c) {a, 0, b, c}, A(0x1.6a00000000000p+0, -0x1.62c82f2b9c800p-2, 0x1.ab42428375680p-48) A(0x1.6800000000000p+0, -0x1.5d1bdbf580800p-2, -0x1.ca508d8e0f720p-46) @@ -204,5 +200,6 @@ A(0x1.7200000000000p-1, 0x1.4c9e09e173000p-2, -0x1.e20891b0ad8a4p-45) A(0x1.7000000000000p-1, 0x1.522ae0738a000p-2, 0x1.ebe708164c759p-45) A(0x1.6e00000000000p-1, 0x1.57bf753c8d000p-2, 0x1.fadedee5d40efp-46) A(0x1.6c00000000000p-1, 0x1.5d5bddf596000p-2, -0x1.a0b2a08a465dcp-47) +#endif }, }; diff --git a/libc/tinymath/pow_data.internal.h b/libc/tinymath/pow_data.internal.h deleted file mode 100644 index ac394a416..000000000 --- a/libc/tinymath/pow_data.internal.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef COSMOPOLITAN_LIBC_TINYMATH_POW_DATA_INTERNAL_H_ -#define COSMOPOLITAN_LIBC_TINYMATH_POW_DATA_INTERNAL_H_ - -#define POW_LOG_TABLE_BITS 7 -#define POW_LOG_POLY_ORDER 8 - -COSMOPOLITAN_C_START_ - -extern const struct pow_log_data { - double ln2hi; - double ln2lo; - double poly[POW_LOG_POLY_ORDER - 1]; /* First coefficient is 1. */ - /* Note: the pad field is unused, but allows slightly faster indexing. */ - struct { - double invc, pad, logc, logctail; - } tab[1 << POW_LOG_TABLE_BITS]; -} __pow_log_data; - -COSMOPOLITAN_C_END_ -#endif /* COSMOPOLITAN_LIBC_TINYMATH_POW_DATA_INTERNAL_H_ */ diff --git a/libc/tinymath/powf.c b/libc/tinymath/powf.c index aeab1bbb7..c116f3931 100644 --- a/libc/tinymath/powf.c +++ b/libc/tinymath/powf.c @@ -1,9 +1,9 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -25,19 +25,9 @@ │ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/intrin/likely.h" -#include "libc/math.h" -#include "libc/tinymath/exp2f_data.internal.h" -#include "libc/tinymath/exp_data.internal.h" -#include "libc/tinymath/internal.h" -#include "libc/tinymath/powf_data.internal.h" +#include "libc/tinymath/arm.internal.h" __static_yoink("arm_optimized_routines_notice"); -/* - * Copyright (c) 2017-2018, Arm Limited. - * SPDX-License-Identifier: MIT - */ - /* POWF_LOG2_POLY_ORDER = 5 EXP2F_TABLE_BITS = 5 @@ -55,37 +45,39 @@ relerr_exp2: 1.69 * 2^-34 (Relative error of exp2(ylogx).) /* Subnormal input is normalized so ix has negative biased exponent. Output is multiplied by N (POWF_SCALE) if TOINT_INTRINICS is set. */ -static inline double_t log2_inline(uint32_t ix) +static inline double_t +log2_inline (uint32_t ix) { - double_t z, r, r2, r4, p, q, y, y0, invc, logc; - uint32_t iz, top, tmp; - int k, i; + /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ + double_t z, r, r2, r4, p, q, y, y0, invc, logc; + uint32_t iz, top, tmp; + int k, i; - /* x = 2^k z; where z is in range [OFF,2*OFF] and exact. - The range is split into N subintervals. - The ith subinterval contains z and c is near its center. */ - tmp = ix - OFF; - i = (tmp >> (23 - POWF_LOG2_TABLE_BITS)) % N; - top = tmp & 0xff800000; - iz = ix - top; - k = (int32_t)top >> (23 - POWF_SCALE_BITS); /* arithmetic shift */ - invc = T[i].invc; - logc = T[i].logc; - z = (double_t)asfloat(iz); + /* x = 2^k z; where z is in range [OFF,2*OFF] and exact. + The range is split into N subintervals. + The ith subinterval contains z and c is near its center. */ + tmp = ix - OFF; + i = (tmp >> (23 - POWF_LOG2_TABLE_BITS)) % N; + top = tmp & 0xff800000; + iz = ix - top; + k = (int32_t) top >> (23 - POWF_SCALE_BITS); /* arithmetic shift */ + invc = T[i].invc; + logc = T[i].logc; + z = (double_t) asfloat (iz); - /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */ - r = z * invc - 1; - y0 = logc + (double_t)k; + /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */ + r = z * invc - 1; + y0 = logc + (double_t) k; - /* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */ - r2 = r * r; - y = A[0] * r + A[1]; - p = A[2] * r + A[3]; - r4 = r2 * r2; - q = A[4] * r + y0; - q = p * r2 + q; - y = y * r4 + q; - return y; + /* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */ + r2 = r * r; + y = A[0] * r + A[1]; + p = A[2] * r + A[3]; + r4 = r2 * r2; + q = A[4] * r + y0; + q = p * r2 + q; + y = y * r4 + q; + return y; } #undef N @@ -97,124 +89,164 @@ static inline double_t log2_inline(uint32_t ix) /* The output of log2 and thus the input of exp2 is either scaled by N (in case of fast toint intrinsics) or not. The unscaled xd must be in [-1021,1023], sign_bias sets the sign of the result. */ -static inline float exp2_inline(double_t xd, uint32_t sign_bias) +static inline float +exp2_inline (double_t xd, uint32_t sign_bias) { - uint64_t ki, ski, t; - double_t kd, z, r, r2, y, s; + uint64_t ki, ski, t; + /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ + double_t kd, z, r, r2, y, s; #if TOINT_INTRINSICS -#define C __exp2f_data.poly_scaled - /* N*x = k + r with r in [-1/2, 1/2] */ - kd = roundtoint(xd); /* k */ - ki = converttoint(xd); +# define C __exp2f_data.poly_scaled + /* N*x = k + r with r in [-1/2, 1/2] */ + kd = roundtoint (xd); /* k */ + ki = converttoint (xd); #else -#define C __exp2f_data.poly -#define SHIFT __exp2f_data.shift_scaled - /* x = k/N + r with r in [-1/(2N), 1/(2N)] */ - kd = eval_as_double(xd + SHIFT); - ki = asuint64(kd); - kd -= SHIFT; /* k/N */ +# define C __exp2f_data.poly +# define SHIFT __exp2f_data.shift_scaled + /* x = k/N + r with r in [-1/(2N), 1/(2N)] */ + kd = eval_as_double (xd + SHIFT); + ki = asuint64 (kd); + kd -= SHIFT; /* k/N */ #endif - r = xd - kd; + r = xd - kd; - /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */ - t = T[ki % N]; - ski = ki + sign_bias; - t += ski << (52 - EXP2F_TABLE_BITS); - s = asdouble(t); - z = C[0] * r + C[1]; - r2 = r * r; - y = C[2] * r + 1; - y = z * r2 + y; - y = y * s; - return eval_as_float(y); + /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */ + t = T[ki % N]; + ski = ki + sign_bias; + t += ski << (52 - EXP2F_TABLE_BITS); + s = asdouble (t); + z = C[0] * r + C[1]; + r2 = r * r; + y = C[2] * r + 1; + y = z * r2 + y; + y = y * s; + return eval_as_float (y); } /* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is the bit representation of a non-zero finite floating-point value. */ -static inline int checkint(uint32_t iy) +static inline int +checkint (uint32_t iy) { - int e = iy >> 23 & 0xff; - if (e < 0x7f) - return 0; - if (e > 0x7f + 23) - return 2; - if (iy & ((1 << (0x7f + 23 - e)) - 1)) - return 0; - if (iy & (1 << (0x7f + 23 - e))) - return 1; - return 2; + int e = iy >> 23 & 0xff; + if (e < 0x7f) + return 0; + if (e > 0x7f + 23) + return 2; + if (iy & ((1 << (0x7f + 23 - e)) - 1)) + return 0; + if (iy & (1 << (0x7f + 23 - e))) + return 1; + return 2; } -static inline int zeroinfnan(uint32_t ix) +static inline int +zeroinfnan (uint32_t ix) { - return 2 * ix - 1 >= 2u * 0x7f800000 - 1; + return 2 * ix - 1 >= 2u * 0x7f800000 - 1; } /** * Returns 𝑥^𝑦. - * @note should take ~16ns + * + * - ULP error: 0.82 (~ 0.5 + relerr*2^24) + * - relerr: 1.27 * 2^-26 (Relative error ~= 128*Ln2*relerr_log2 + relerr_exp2) + * - relerr_log2: 1.83 * 2^-33 (Relative error of logx.) + * - relerr_exp2: 1.69 * 2^-34 (Relative error of exp2(ylogx).) + * + * @raise ERANGE on overflow or underflow + * @raise EDOM if x is negative and y is a finite non-integer */ -float powf(float x, float y) +float +powf (float x, float y) { - uint32_t sign_bias = 0; - uint32_t ix, iy; + uint32_t sign_bias = 0; + uint32_t ix, iy; - ix = asuint(x); - iy = asuint(y); - if (UNLIKELY(ix - 0x00800000 >= 0x7f800000 - 0x00800000 || - zeroinfnan(iy))) { - /* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan). */ - if (UNLIKELY(zeroinfnan(iy))) { - if (2 * iy == 0) - return issignalingf_inline(x) ? x + y : 1.0f; - if (ix == 0x3f800000) - return issignalingf_inline(y) ? x + y : 1.0f; - if (2 * ix > 2u * 0x7f800000 || - 2 * iy > 2u * 0x7f800000) - return x + y; - if (2 * ix == 2 * 0x3f800000) - return 1.0f; - if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000)) - return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf. */ - return y * y; - } - if (UNLIKELY(zeroinfnan(ix))) { - float_t x2 = x * x; - if (ix & 0x80000000 && checkint(iy) == 1) - x2 = -x2; - /* Without the barrier some versions of clang hoist the 1/x2 and - thus division by zero exception can be signaled spuriously. */ - return iy & 0x80000000 ? fp_barrierf(1 / x2) : x2; - } - /* x and y are non-zero finite. */ - if (ix & 0x80000000) { - /* Finite x < 0. */ - int yint = checkint(iy); - if (yint == 0) - return __math_invalidf(x); - if (yint == 1) - sign_bias = SIGN_BIAS; - ix &= 0x7fffffff; - } - if (ix < 0x00800000) { - /* Normalize subnormal x so exponent becomes negative. */ - ix = asuint(x * 0x1p23f); - ix &= 0x7fffffff; - ix -= 23 << 23; - } + ix = asuint (x); + iy = asuint (y); + if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000 || zeroinfnan (iy))) + { + /* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan). */ + if (unlikely (zeroinfnan (iy))) + { + if (2 * iy == 0) + return issignalingf_inline (x) ? x + y : 1.0f; + if (ix == 0x3f800000) + return issignalingf_inline (y) ? x + y : 1.0f; + if (2 * ix > 2u * 0x7f800000 || 2 * iy > 2u * 0x7f800000) + return x + y; + if (2 * ix == 2 * 0x3f800000) + return 1.0f; + if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000)) + return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf. */ + return y * y; } - double_t logx = log2_inline(ix); - double_t ylogx = y * logx; /* cannot overflow, y is single prec. */ - if (UNLIKELY((asuint64(ylogx) >> 47 & 0xffff) >= - asuint64(126.0 * POWF_SCALE) >> 47)) { - /* |y*log(x)| >= 126. */ - if (ylogx > 0x1.fffffffd1d571p+6 * POWF_SCALE) - return __math_oflowf(sign_bias); - if (ylogx <= -150.0 * POWF_SCALE) - return __math_uflowf(sign_bias); + if (unlikely (zeroinfnan (ix))) + { + float_t x2 = x * x; + if (ix & 0x80000000 && checkint (iy) == 1) + { + x2 = -x2; + sign_bias = 1; + } +#if WANT_ERRNO + if (2 * ix == 0 && iy & 0x80000000) + return __math_divzerof (sign_bias); +#endif + /* Without the barrier some versions of clang hoist the 1/x2 and + thus division by zero exception can be signaled spuriously. */ + return iy & 0x80000000 ? opt_barrier_float (1 / x2) : x2; } - return exp2_inline(ylogx, sign_bias); + /* x and y are non-zero finite. */ + if (ix & 0x80000000) + { + /* Finite x < 0. */ + int yint = checkint (iy); + if (yint == 0) + return __math_invalidf (x); + if (yint == 1) + sign_bias = SIGN_BIAS; + ix &= 0x7fffffff; + } + if (ix < 0x00800000) + { + /* Normalize subnormal x so exponent becomes negative. */ + ix = asuint (x * 0x1p23f); + ix &= 0x7fffffff; + ix -= 23 << 23; + } + } + double_t logx = log2_inline (ix); + double_t ylogx = y * logx; /* Note: cannot overflow, y is single prec. */ + if (unlikely ((asuint64 (ylogx) >> 47 & 0xffff) + >= asuint64 (126.0 * POWF_SCALE) >> 47)) + { + /* |y*log(x)| >= 126. */ + if (ylogx > 0x1.fffffffd1d571p+6 * POWF_SCALE) + /* |x^y| > 0x1.ffffffp127. */ + return __math_oflowf (sign_bias); + if (WANT_ROUNDING && WANT_ERRNO + && ylogx > 0x1.fffffffa3aae2p+6 * POWF_SCALE) + /* |x^y| > 0x1.fffffep127, check if we round away from 0. */ + if ((!sign_bias + && eval_as_float (1.0f + opt_barrier_float (0x1p-25f)) != 1.0f) + || (sign_bias + && eval_as_float (-1.0f - opt_barrier_float (0x1p-25f)) + != -1.0f)) + return __math_oflowf (sign_bias); + if (ylogx <= -150.0 * POWF_SCALE) + return __math_uflowf (sign_bias); +#if WANT_ERRNO_UFLOW + if (ylogx < -149.0 * POWF_SCALE) + return __math_may_uflowf (sign_bias); +#endif + } + return exp2_inline (ylogx, sign_bias); } -__weak_reference(powf, __powf_finite); +#if USE_GLIBC_ABI +strong_alias (powf, __powf_finite) +hidden_alias (powf, __ieee754_powf) +#endif diff --git a/libc/tinymath/powf_data.c b/libc/tinymath/powf_data.c index 77cd461f1..24cb324af 100644 --- a/libc/tinymath/powf_data.c +++ b/libc/tinymath/powf_data.c @@ -1,9 +1,9 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -25,16 +25,9 @@ │ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/tinymath/powf_data.internal.h" +#include "libc/tinymath/arm.internal.h" __static_yoink("arm_optimized_routines_notice"); -/* - * Data definition for powf. - * - * Copyright (c) 2017-2018, Arm Limited. - * SPDX-License-Identifier: MIT - */ - const struct powf_log2_data __powf_log2_data = { .tab = { { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 * POWF_SCALE }, diff --git a/libc/tinymath/powf_data.internal.h b/libc/tinymath/powf_data.internal.h deleted file mode 100644 index 498abf299..000000000 --- a/libc/tinymath/powf_data.internal.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef COSMOPOLITAN_LIBC_TINYMATH_POWF_DATA_INTERNAL_H_ -#define COSMOPOLITAN_LIBC_TINYMATH_POWF_DATA_INTERNAL_H_ - -#define POWF_LOG2_TABLE_BITS 4 -#define POWF_LOG2_POLY_ORDER 5 -#if TOINT_INTRINSICS -#define POWF_SCALE_BITS EXP2F_TABLE_BITS -#else -#define POWF_SCALE_BITS 0 -#endif -#define POWF_SCALE ((double)(1 << POWF_SCALE_BITS)) - -COSMOPOLITAN_C_START_ - -extern const struct powf_log2_data { - struct { - double invc, logc; - } tab[1 << POWF_LOG2_TABLE_BITS]; - double poly[POWF_LOG2_POLY_ORDER]; -} __powf_log2_data; - -COSMOPOLITAN_C_END_ -#endif /* COSMOPOLITAN_LIBC_TINYMATH_POWF_DATA_INTERNAL_H_ */ diff --git a/libc/tinymath/powl.c b/libc/tinymath/powl.c index 6de2ae99a..3ed4cd9e4 100644 --- a/libc/tinymath/powl.c +++ b/libc/tinymath/powl.c @@ -1,120 +1,42 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│ vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8 :vi │ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2021 Justine Alexandra Roberts Tunney │ +/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ +│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +╚──────────────────────────────────────────────────────────────────────────────╝ │ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ +│ OpenBSD /usr/src/lib/libm/src/ld80/e_powl.c │ +│ /usr/src/lib/libm/src/ld128/e_powl.c │ +│ │ +│ Copyright (c) 2008 Stephen L. Moshier │ +│ │ +│ Permission to use, copy, modify, and distribute this software for any │ +│ purpose with or without fee is hereby granted, provided that the above │ +│ copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES │ +│ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF │ +│ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR │ +│ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES │ +│ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN │ +│ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF │ +│ │ +│ Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. │ +│ │ +│ Developed at SunPro, a Sun Microsystems, Inc. business. │ +│ Permission to use, copy, modify, and distribute this │ +│ software is freely granted, provided that this notice │ +│ is preserved. │ │ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/errno.h" #include "libc/math.h" #include "libc/tinymath/internal.h" -#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) +#include "libc/tinymath/freebsd.internal.h" -#ifdef __x86_64__ - -/** - * Returns 𝑥^𝑦. - * @note should take ~56ns - */ -long double powl(long double x, long double y) { - long double t, u; - if (!isunordered(x, y)) { - if (!isinf(y)) { - if (!isinf(x)) { - if (x) { - if (y) { - if (x < 0 && y != truncl(y)) { -#ifndef __NO_MATH_ERRNO__ - errno = EDOM; -#endif - return NAN; - } - asm("fyl2x" : "=t"(u) : "0"(fabsl(x)), "u"(y) : "st(1)"); - asm("fprem" : "=t"(t) : "0"(u), "u"(1.L)); - asm("f2xm1" : "=t"(t) : "0"(t)); - asm("fscale" : "=t"(t) : "0"(t + 1), "u"(u)); - if (signbit(x)) { - if (y != truncl(y)) return -NAN; - if ((int64_t)y & 1) t = -t; - } - return t; - } else { - return 1; - } - } else if (y > 0) { - if (signbit(x) && y == truncl(y) && ((int64_t)y & 1)) { - return -0.; - } else { - return 0; - } - } else if (!y) { - return 1; - } else { -#ifndef __NO_MATH_ERRNO__ - errno = ERANGE; -#endif - if (y == truncl(y) && ((int64_t)y & 1)) { - return copysignl(INFINITY, x); - } else { - return INFINITY; - } - } - } else if (signbit(x)) { - if (!y) return 1; - x = y < 0 ? 0 : INFINITY; - if (y == truncl(y) && ((int64_t)y & 1)) x = -x; - return x; - } else if (y < 0) { - return 0; - } else if (y > 0) { - return INFINITY; - } else { - return 1; - } - } else { - x = fabsl(x); - if (x < 1) return signbit(y) ? INFINITY : 0; - if (x > 1) return signbit(y) ? 0 : INFINITY; - return 1; - } - } else if (!y || x == 1) { - return 1; - } else { - return NAN; - } -} - -#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 -__static_yoink("musl_libc_notice"); __static_yoink("openbsd_libm_notice"); +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); + +#if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 -/* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_powl.c */ -/* - * Copyright (c) 2008 Stephen L. Moshier - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ /* powl.c * * Power function, long double precision @@ -606,35 +528,9 @@ static long double powil(long double x, int nn) return y; } +__weak_reference(powl, __powl_finite); + #elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 -#include "libc/tinymath/freebsd.internal.h" - -/*- - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -/* - * Copyright (c) 2008 Stephen L. Moshier - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ /* powl(x,y) return x**y * @@ -1045,8 +941,6 @@ powl(long double x, long double y) return s * z; } -#endif /* __x86_64__ */ - __weak_reference(powl, __powl_finite); -#endif /* long double is long */ +#endif /* __x86_64__ */ diff --git a/libc/tinymath/round.c b/libc/tinymath/round.c index 800df248f..86670d97d 100644 --- a/libc/tinymath/round.c +++ b/libc/tinymath/round.c @@ -30,7 +30,6 @@ #include "libc/tinymath/internal.h" __static_yoink("musl_libc_notice"); - #if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1 #define EPS DBL_EPSILON #elif FLT_EVAL_METHOD==2 diff --git a/libc/tinymath/sincosf.c b/libc/tinymath/sincosf.c index cc0a52d55..a95801bfc 100644 --- a/libc/tinymath/sincosf.c +++ b/libc/tinymath/sincosf.c @@ -1,9 +1,9 @@ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│ vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8 :vi │ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -25,15 +25,19 @@ │ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/intrin/likely.h" -#include "libc/math.h" #include "libc/tinymath/sincosf.internal.h" __static_yoink("arm_optimized_routines_notice"); -/* Fast sincosf implementation. Worst-case ULP is 0.5607, maximum relative - error is 0.5303 * 2^-23. A single-step range reduction is used for - small values. Large inputs have their range reduced using fast integer - arithmetic. */ +/** + * Returns sine and cosine of y. + * + * This is a fast sincosf implementation. Worst-case ULP is 0.5607, + * maximum relative error is 0.5303 * 2^-23. A single-step range + * reduction is used for small values. Large inputs have their range + * reduced using fast integer arithmetic. + * + * @raise EDOM if y is an infinity + */ void sincosf (float y, float *sinp, float *cosp) { @@ -46,11 +50,11 @@ sincosf (float y, float *sinp, float *cosp) { double x2 = x * x; - if (UNLIKELY (abstop12 (y) < abstop12 (0x1p-12f))) + if (unlikely (abstop12 (y) < abstop12 (0x1p-12f))) { - if (UNLIKELY (abstop12 (y) < abstop12 (0x1p-126f))) + if (unlikely (abstop12 (y) < abstop12 (0x1p-126f))) /* Force underflow for tiny y. */ - FORCE_EVAL (x2); + force_eval_float (x2); *sinp = y; *cosp = 1.0f; return; @@ -70,7 +74,7 @@ sincosf (float y, float *sinp, float *cosp) sincosf_poly (x * s, x * x, p, n, sinp, cosp); } - else if (LIKELY (abstop12 (y) < abstop12 (INFINITY))) + else if (likely (abstop12 (y) < abstop12 (INFINITY))) { uint32_t xi = asuint (y); int sign = xi >> 31; diff --git a/libc/tinymath/sincosf.internal.h b/libc/tinymath/sincosf.internal.h index 57a596c98..e8c055e25 100644 --- a/libc/tinymath/sincosf.internal.h +++ b/libc/tinymath/sincosf.internal.h @@ -1,15 +1,8 @@ #ifndef COSMOPOLITAN_LIBC_TINYMATH_SINCOSF_INTERNAL_H_ #define COSMOPOLITAN_LIBC_TINYMATH_SINCOSF_INTERNAL_H_ -#include "libc/tinymath/internal.h" +#include "libc/tinymath/arm.internal.h" COSMOPOLITAN_C_START_ -/* - * Header for sinf, cosf and sincosf. - * - * Copyright (c) 2018-2021, Arm Limited. - * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception - */ - /* 2PI * 2^-64. */ static const double pi63 = 0x1.921FB54442D18p-62; /* PI / 4. */ @@ -26,10 +19,10 @@ typedef struct } sincos_t; /* Polynomial data (the cosine polynomial is negated in the 2nd entry). */ -extern const sincos_t __sincosf_table[2] ; +extern const sincos_t __sincosf_table[2] HIDDEN; /* Table with 4/PI to 192 bit precision. */ -extern const uint32_t __inv_pio4[] ; +extern const uint32_t __inv_pio4[] HIDDEN; /* Top 12 bits of the float representation with the sign bit cleared. */ static inline uint32_t diff --git a/libc/tinymath/sincosf_data.c b/libc/tinymath/sincosf_data.c index 61a9ff067..03d181202 100644 --- a/libc/tinymath/sincosf_data.c +++ b/libc/tinymath/sincosf_data.c @@ -3,7 +3,7 @@ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ │ Optimized Routines │ -│ Copyright (c) 1999-2022, Arm Limited. │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ diff --git a/libc/tinymath/sinf.c b/libc/tinymath/sinf.c index 14f8de8a5..812d67521 100644 --- a/libc/tinymath/sinf.c +++ b/libc/tinymath/sinf.c @@ -1,9 +1,9 @@ -/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ +│ Optimized Routines │ +│ Copyright (c) 2018-2024, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -25,89 +25,68 @@ │ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/math.h" -#include "libc/tinymath/complex.internal.h" -#include "libc/tinymath/feval.internal.h" -#include "libc/tinymath/kernel.internal.h" -__static_yoink("musl_libc_notice"); -__static_yoink("fdlibm_notice"); - - -/* origin: FreeBSD /usr/src/lib/msun/src/s_sinf.c */ -/* - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - * Optimized by Bruce D. Evans. - */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -/* Small multiples of pi/2 rounded to double precision. */ -static const double -s1pio2 = 1*M_PI_2, /* 0x3FF921FB, 0x54442D18 */ -s2pio2 = 2*M_PI_2, /* 0x400921FB, 0x54442D18 */ -s3pio2 = 3*M_PI_2, /* 0x4012D97C, 0x7F3321D2 */ -s4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */ +#include "libc/tinymath/sincosf.internal.h" +__static_yoink("arm_optimized_routines_notice"); /** - * Returns sine of 𝑥. - * @note should take about 5ns + * Returns sine of y. + * + * This is a fast sinf implementation. The worst-case ULP is 0.5607 and + * the maximum relative error is 0.5303 * 2^-23. A single-step range + * reduction is used for small values. Large inputs have their range + * reduced using fast integer arithmetic. + * + * @raise EDOM and FE_INVALID if y is an infinity */ -float sinf(float x) +float +sinf (float y) { - double y; - uint32_t ix; - int n, sign; + double x = y; + double s; + int n; + const sincos_t *p = &__sincosf_table[0]; - GET_FLOAT_WORD(ix, x); - sign = ix >> 31; - ix &= 0x7fffffff; + if (abstop12 (y) < abstop12 (pio4f)) + { + s = x * x; - if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */ - if (ix < 0x39800000) { /* |x| < 2**-12 */ - /* raise inexact if x!=0 and underflow if subnormal */ - FORCE_EVAL(ix < 0x00800000 ? x/0x1p120f : x+0x1p120f); - return x; - } - return __sindf(x); - } - if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */ - if (ix <= 0x4016cbe3) { /* |x| ~<= 3pi/4 */ - if (sign) - return -__cosdf(x + s1pio2); - else - return __cosdf(x - s1pio2); - } - return __sindf(sign ? -(x + s2pio2) : -(x - s2pio2)); - } - if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */ - if (ix <= 0x40afeddf) { /* |x| ~<= 7*pi/4 */ - if (sign) - return __cosdf(x + s3pio2); - else - return -__cosdf(x - s3pio2); - } - return __sindf(sign ? x + s4pio2 : x - s4pio2); + if (unlikely (abstop12 (y) < abstop12 (0x1p-12f))) + { + if (unlikely (abstop12 (y) < abstop12 (0x1p-126f))) + /* Force underflow for tiny y. */ + force_eval_float (s); + return y; } - /* sin(Inf or NaN) is NaN */ - if (ix >= 0x7f800000) - return x - x; + return sinf_poly (x, s, p, 0); + } + else if (likely (abstop12 (y) < abstop12 (120.0f))) + { + x = reduce_fast (x, p, &n); - /* general argument reduction needed */ - n = __rem_pio2f(x, &y); - switch (n&3) { - case 0: return __sindf(y); - case 1: return __cosdf(y); - case 2: return __sindf(-y); - default: - return -__cosdf(y); - } + /* Setup the signs for sin and cos. */ + s = p->sign[n & 3]; + + if (n & 2) + p = &__sincosf_table[1]; + + return sinf_poly (x * s, x * x, p, n); + } + else if (abstop12 (y) < abstop12 (INFINITY)) + { + uint32_t xi = asuint (y); + int sign = xi >> 31; + + x = reduce_large (xi, &n); + + /* Setup signs for sin and cos - include original sign. */ + s = p->sign[(n + sign) & 3]; + + if ((n + sign) & 2) + p = &__sincosf_table[1]; + + return sinf_poly (x * s, x * x, p, n); + } + else + return __math_invalidf (y); } diff --git a/libc/tinymath/sinl.c b/libc/tinymath/sinl.c index 2d87d6b3f..7f6241c8e 100644 --- a/libc/tinymath/sinl.c +++ b/libc/tinymath/sinl.c @@ -32,7 +32,6 @@ #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) __static_yoink("musl_libc_notice"); - /** * Returns sine of 𝑥. */ diff --git a/libc/tinymath/sqrt.c b/libc/tinymath/sqrt.c index 1d23a9fd7..f8802df1e 100644 --- a/libc/tinymath/sqrt.c +++ b/libc/tinymath/sqrt.c @@ -30,7 +30,6 @@ #include "libc/tinymath/internal.h" __static_yoink("musl_libc_notice"); - #define FENV_SUPPORT 1 /* returns a*b*2^-32 - e, with error 0 <= e < 1. */ @@ -54,7 +53,7 @@ static inline uint64_t mul64(uint64_t a, uint64_t b) */ double sqrt(double x) { -#if defined(__x86_64__) && defined(__SSE2__) +#if defined(__x86_64__) asm("sqrtsd\t%1,%0" : "=x"(x) : "x"(x)); return x; diff --git a/libc/tinymath/sqrtf.c b/libc/tinymath/sqrtf.c index f37ecbc12..8df10f33c 100644 --- a/libc/tinymath/sqrtf.c +++ b/libc/tinymath/sqrtf.c @@ -30,7 +30,6 @@ #include "libc/tinymath/internal.h" __static_yoink("musl_libc_notice"); - #define FENV_SUPPORT 1 static inline uint32_t mul32(uint32_t a, uint32_t b) @@ -38,14 +37,12 @@ static inline uint32_t mul32(uint32_t a, uint32_t b) return (uint64_t)a*b >> 32; } -/* see sqrt.c for more detailed comments. */ - /** * Returns square root of 𝑥. */ float sqrtf(float x) { -#ifdef __SSE2__ +#if defined(__x86_64__) asm("sqrtss\t%1,%0" : "=x"(x) : "x"(x)); return x; diff --git a/libc/tinymath/sqrtl.c b/libc/tinymath/sqrtl.c index 335d40f4d..fa3c2c41c 100644 --- a/libc/tinymath/sqrtl.c +++ b/libc/tinymath/sqrtl.c @@ -32,7 +32,6 @@ #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) __static_yoink("musl_libc_notice"); - #define FENV_SUPPORT 1 typedef struct { @@ -195,14 +194,12 @@ static inline u128 mul128_tail(u128 a, u128 b) return lo; } -/* see sqrt.c for detailed comments. */ - /** * Returns square root of 𝑥. */ long double sqrtl(long double x) { -#ifdef __x86__ +#if defined(__x86__) asm("fsqrt" : "+t"(x)); return x; diff --git a/libc/tinymath/tan.c b/libc/tinymath/tan.c index 92d072404..475328ea7 100644 --- a/libc/tinymath/tan.c +++ b/libc/tinymath/tan.c @@ -31,7 +31,6 @@ __static_yoink("musl_libc_notice"); __static_yoink("fdlibm_notice"); - /* origin: FreeBSD /usr/src/lib/msun/src/s_tan.c */ /* * ==================================================== diff --git a/libc/tinymath/trunc.c b/libc/tinymath/trunc.c index 3dc404c6d..a849fcc6d 100644 --- a/libc/tinymath/trunc.c +++ b/libc/tinymath/trunc.c @@ -32,7 +32,6 @@ #endif __static_yoink("musl_libc_notice"); - /** * Rounds to integer, towards zero. */ diff --git a/test/BUILD.mk b/test/BUILD.mk index db75515cc..63d89de46 100644 --- a/test/BUILD.mk +++ b/test/BUILD.mk @@ -4,7 +4,8 @@ .PHONY: o/$(MODE)/test o/$(MODE)/test: o/$(MODE)/test/dsp \ o/$(MODE)/test/libc \ - o/$(MODE)/test/net \ o/$(MODE)/test/libcxx \ + o/$(MODE)/test/math \ + o/$(MODE)/test/net \ o/$(MODE)/test/posix \ o/$(MODE)/test/tool diff --git a/test/libc/fmt/atoi_test.c b/test/libc/fmt/atoi_test.c index d345c8c22..429a077d3 100644 --- a/test/libc/fmt/atoi_test.c +++ b/test/libc/fmt/atoi_test.c @@ -46,12 +46,8 @@ TEST(atoi, test) { EXPECT_EQ(-1, atoi("-1")); EXPECT_EQ(-9, atoi("-9")); EXPECT_EQ(-31337, atoi("-31337")); - EXPECT_EQ(INT_MIN, atoi("-2147483648")); - EXPECT_EQ(INT_MAX, atoi("2147483647")); - EXPECT_EQ(INT_MIN, atoi("-2147483649")); - EXPECT_EQ(INT_MAX, atoi("2147483648")); - EXPECT_EQ(INT_MIN, atoi("-2147483658")); - EXPECT_EQ(INT_MAX, atoi("2147483657")); + EXPECT_EQ(-2147483648, atoi("-2147483648")); + EXPECT_EQ(2147483647, atoi("2147483647")); EXPECT_EQ(123, atoi(" 123")); EXPECT_EQ(123, atoi(" \t123")); EXPECT_EQ(+123, atoi(" +123")); @@ -63,21 +59,21 @@ TEST(atoi, test) { } TEST(atoi, testWithinLimit_doesntChangeErrno) { - errno = 7; + errno = 666; EXPECT_EQ(INT_MAX, atoi("2147483647")); - EXPECT_EQ(7, errno); - errno = 7; + EXPECT_EQ(666, errno); + errno = 666; EXPECT_EQ(INT_MIN, atoi("-2147483648")); - EXPECT_EQ(7, errno); + EXPECT_EQ(666, errno); } -TEST(atoi, testOutsideLimit_saturatesAndSetsErangeErrno) { - errno = 0; - EXPECT_EQ(INT_MAX, atoi("2147483648")); - EXPECT_EQ(ERANGE, errno); - errno = 0; - EXPECT_EQ(INT_MIN, atoi("-2147483649")); - EXPECT_EQ(ERANGE, errno); +TEST(atol, testWithinLimit_doesntChangeErrno) { + errno = 666; + EXPECT_EQ(INT_MAX, atol("2147483647")); + EXPECT_EQ(666, errno); + errno = 666; + EXPECT_EQ(INT_MIN, atol("-2147483648")); + EXPECT_EQ(666, errno); } TEST(atol, test) { @@ -97,10 +93,6 @@ TEST(atol, test) { EXPECT_EQ(-31337, atol("-31337")); EXPECT_EQ(LONG_MIN, atol("-9223372036854775808")); EXPECT_EQ(LONG_MAX, atol("9223372036854775807")); - EXPECT_EQ(LONG_MIN, atol("-9223372036854775809")); - EXPECT_EQ(LONG_MAX, atol("9223372036854775808")); - EXPECT_EQ(LONG_MIN, atol("-9223372036854775818")); - EXPECT_EQ(LONG_MAX, atol("9223372036854775817")); EXPECT_EQ(123, atol(" 123")); EXPECT_EQ(123, atol(" \t123")); EXPECT_EQ(-123, atol(" -123")); @@ -571,6 +563,8 @@ TEST(strtol, invalidBin2) { BENCH(atoi, bench) { EZBENCH2("atoi 10⁸", donothing, __expropriate(atoi(__veil("r", "100000000")))); + EZBENCH2("atol 10⁸", donothing, + __expropriate(atol(__veil("r", "100000000")))); EZBENCH2("strtol 10⁸", donothing, __expropriate(strtol(__veil("r", "100000000"), 0, 10))); EZBENCH2("strtoul 10⁸", donothing, diff --git a/test/math/BUILD.mk b/test/math/BUILD.mk new file mode 100644 index 000000000..e0a241b20 --- /dev/null +++ b/test/math/BUILD.mk @@ -0,0 +1,41 @@ +#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐ +#── vi: set noet ft=make ts=8 sw=8 fenc=utf-8 :vi ────────────────────┘ + +PKGS += TEST_MATH + +TEST_MATH_SRCS := $(wildcard test/math/*.c) +TEST_MATH_SRCS_TEST = $(filter %_test.c,$(TEST_MATH_SRCS)) +TEST_MATH_OBJS = $(TEST_MATH_SRCS:%.c=o/$(MODE)/%.o) +TEST_MATH_COMS = $(TEST_MATH_SRCS_TEST:%.c=o/$(MODE)/%.com) +TEST_MATH_BINS = $(TEST_MATH_COMS) $(TEST_MATH_COMS:%=%.dbg) +TEST_MATH_TESTS = $(TEST_MATH_SRCS_TEST:%.c=o/$(MODE)/%.com.ok) +TEST_MATH_CHECKS = $(TEST_MATH_SRCS_TEST:%.c=o/$(MODE)/%.com.runs) + +TEST_MATH_DIRECTDEPS = \ + LIBC_INTRIN \ + LIBC_RUNTIME \ + LIBC_SYSV \ + LIBC_TINYMATH \ + THIRD_PARTY_COMPILER_RT + +TEST_MATH_DEPS := \ + $(call uniq,$(foreach x,$(TEST_MATH_DIRECTDEPS),$($(x)))) + +o/$(MODE)/test/math/math.pkg: \ + $(TEST_MATH_OBJS) \ + $(foreach x,$(TEST_MATH_DIRECTDEPS),$($(x)_A).pkg) + +o/$(MODE)/test/math/%.com.dbg: \ + $(TEST_MATH_DEPS) \ + o/$(MODE)/test/math/%.o \ + o/$(MODE)/test/math/math.pkg \ + $(CRT) \ + $(APE_NO_MODIFY_SELF) + @$(APELINK) + +$(TEST_MATH_OBJS): private CFLAGS += -fno-builtin + +.PHONY: o/$(MODE)/test/math +o/$(MODE)/test/math: \ + $(TEST_MATH_BINS) \ + $(TEST_MATH_CHECKS) diff --git a/test/math/hypot_test.c b/test/math/hypot_test.c new file mode 100644 index 000000000..dbe345edc --- /dev/null +++ b/test/math/hypot_test.c @@ -0,0 +1,92 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2024 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include +#include +#include + +#define OK 0 + +#define MAX_ERROR_ULP 1 + +#define CHECK(x) \ + if (!(x)) return __LINE__ + +#define TEST(e, x) \ + errno = 0; \ + CHECK(x); \ + CHECK(errno == e) + +long lemur(void) { + static unsigned __int128 s = 2131259787901769494; + return (s *= 15750249268501108917ul) >> 64; +} + +int main() { + + // test base cases + TEST(OK, hypot(3, 4) == 5); + TEST(OK, hypot(0, 0) == 0); + TEST(OK, hypot(5, 12) == 13); + TEST(OK, hypot(-5, -12) == 13); + TEST(OK, hypot(-3, 4) == 5); + + // test with zeros + TEST(OK, hypot(0, 0) == 0); + TEST(OK, hypot(0, 3) == 3); + TEST(OK, hypot(3, 0) == 3); + + // test with NAN + TEST(OK, isnan(hypot(NAN, 1))); + TEST(OK, isnan(hypot(1, NAN))); + TEST(OK, isnan(hypot(NAN, NAN))); + + // test with INFINITY + TEST(OK, hypot(INFINITY, 1) == INFINITY); + TEST(OK, hypot(1, INFINITY) == INFINITY); + TEST(OK, hypot(-INFINITY, -INFINITY) == INFINITY); + + // test underflow avoidance + TEST(OK, hypot(2e-308, 3e-308) > 0); + + // test what happens on overflow + // TODO(jart): This should raise ERANGE. + TEST(OK, hypot(DBL_MAX, DBL_MAX) == INFINITY); + + // test accuracy assuming hypotl() is correct + union { + long i; + double f; + } x, y, a, b; + int n = 1000; + for (int i = 0; i < n; ++i) { + for (int j = 0; j < n; ++j) { + x.i = lemur(); + y.i = lemur(); + a.f = hypotl(x.f, y.f); + b.f = hypot(x.f, y.f); + if (isnan(a.f) || isnan(b.f)) { + CHECK(isnan(a.f) == isnan(b.f)); + continue; + } + long e = b.i - a.i; + if (e < 0) e = -e; + CHECK(e <= MAX_ERROR_ULP); + } + } +} diff --git a/test/math/hypotf_test.c b/test/math/hypotf_test.c new file mode 100644 index 000000000..961f3c6d6 --- /dev/null +++ b/test/math/hypotf_test.c @@ -0,0 +1,94 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2024 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include +#include +#include + +#define OK 0 + +#define MAX_ERROR_ULP 1 + +#define CHECK(x) \ + if (!(x)) return __LINE__ + +#define TEST(e, x) \ + errno = 0; \ + CHECK(x); \ + CHECK(errno == e) + +int rando(void) { + static unsigned long s; + s *= 6364136223846793005; + s += 1442695040888963407; + return s >> 32; +} + +int main() { + + // test base cases + TEST(OK, hypotf(3, 4) == 5); + TEST(OK, hypotf(0, 0) == 0); + TEST(OK, hypotf(5, 12) == 13); + TEST(OK, hypotf(-5, -12) == 13); + TEST(OK, hypotf(-3, 4) == 5); + + // test with zeros + TEST(OK, hypotf(0, 0) == 0); + TEST(OK, hypotf(0, 3) == 3); + TEST(OK, hypotf(3, 0) == 3); + + // test with NAN + TEST(OK, isnan(hypotf(NAN, 1))); + TEST(OK, isnan(hypotf(1, NAN))); + TEST(OK, isnan(hypotf(NAN, NAN))); + + // test underflow avoidance + TEST(OK, hypotf(2e-38, 3e-38) > 0); + + // test what happens on overflow + // TODO(jart): This should raise ERANGE. + TEST(OK, hypotf(FLT_MAX, FLT_MAX) == INFINITY); + + // test with INFINITY + TEST(OK, hypotf(INFINITY, 1) == INFINITY); + TEST(OK, hypotf(1, INFINITY) == INFINITY); + TEST(OK, hypotf(-INFINITY, -INFINITY) == INFINITY); + + // test accuracy assuming hypotl() is correct + union { + int i; + float f; + } x, y, a, b; + int n = 1000; + for (int i = 0; i < n; ++i) { + for (int j = 0; j < n; ++j) { + x.i = rando(); + y.i = rando(); + a.f = hypotl(x.f, y.f); + b.f = hypotf(x.f, y.f); + if (isnan(a.f) || isnan(b.f)) { + CHECK(isnan(a.f) == isnan(b.f)); + continue; + } + long e = b.i - a.i; + if (e < 0) e = -e; + CHECK(e <= MAX_ERROR_ULP); + } + } +} diff --git a/test/math/powf_test.c b/test/math/powf_test.c new file mode 100644 index 000000000..357299826 --- /dev/null +++ b/test/math/powf_test.c @@ -0,0 +1,104 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2024 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include +#include +#include + +#define OK 0 + +#define MAX_ERROR_ULP 1 + +#define CHECK(x) \ + if (!(x)) return __LINE__ + +#define TEST(e, x) \ + errno = 0; \ + CHECK(x); \ + CHECK(errno == e) + +int rando(void) { + static unsigned long s; + s *= 6364136223846793005; + s += 1442695040888963407; + return s >> 32; +} + +int main() { + + // test base cases + TEST(OK, !powf(0, 5)); + TEST(OK, powf(0, 0) == 1); + TEST(OK, powf(1, 0) == 1); + TEST(OK, powf(2, 0) == 1); + TEST(OK, powf(0, 2) == 0); + TEST(OK, powf(5, 0) == 1); + TEST(OK, powf(1, 5) == 1); + TEST(OK, powf(2, 3) == 8); + TEST(OK, powf(-2, 3) == -8); + TEST(OK, powf(-2, 2) == 4); + TEST(OK, powf(2, -2) == 0.25); + + // test edge cases + TEST(OK, powf(-2, -2) == 0.25); + TEST(OK, powf(2, .5) == sqrtf(2)); + TEST(OK, powf(2, -.5) == M_SQRT1_2f); + + // test special values + TEST(OK, powf(NAN, 0) == 1); + TEST(OK, !powf(INFINITY, -2)); + TEST(OK, isnan(powf(NAN, 2))); + TEST(OK, isnan(powf(2, NAN))); + TEST(OK, powf(INFINITY, -1) == 0); + TEST(OK, powf(INFINITY, 2) == INFINITY); + TEST(OK, powf(-INFINITY, 2) == INFINITY); + TEST(OK, powf(-INFINITY, 3) == -INFINITY); + + // test domain errors + TEST(EDOM, isnan(powf(-1, 0.5))); + + // test pole errors + TEST(ERANGE, isinf(powf(0, -1))); + TEST(ERANGE, isinf(powf(0, -.5))); + + // test underflow and overflow + TEST(ERANGE, powf(1e-38, 2) == 0); + TEST(ERANGE, powf(FLT_MAX, 2) == INFINITY); + + // test accuracy assuming pow() is correct + union { + int i; + float f; + } x, y, a, b; + int n = 1000; + for (int i = 0; i < n; ++i) { + for (int j = 0; j < n; ++j) { + x.i = rando(); + y.i = rando(); + a.f = pow(x.f, y.f); + b.f = powf(x.f, y.f); + if (isnan(a.f) || isnan(b.f)) { + CHECK(isnan(a.f) == isnan(b.f)); + continue; + } + int e = b.i - a.i; + if (e < 0) e = -e; + CHECK(e <= MAX_ERROR_ULP); + } + } +} diff --git a/test/posix/BUILD.mk b/test/posix/BUILD.mk index 42faaf7ef..ac5b2e413 100644 --- a/test/posix/BUILD.mk +++ b/test/posix/BUILD.mk @@ -51,8 +51,6 @@ o/$(MODE)/test/posix/%.com.dbg: \ $(APE_NO_MODIFY_SELF) @$(APELINK) -$(TEST_POSIX_OBJS): private CFLAGS += -isystem isystem/ - .PHONY: o/$(MODE)/test/posix o/$(MODE)/test/posix: \ $(TEST_POSIX_BINS) \ diff --git a/libc/tinymath/__math_invalidl.c b/test/posix/atoi_test.c similarity index 70% rename from libc/tinymath/__math_invalidl.c rename to test/posix/atoi_test.c index 9c4239581..afef98200 100644 --- a/libc/tinymath/__math_invalidl.c +++ b/test/posix/atoi_test.c @@ -1,7 +1,7 @@ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ │ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2023 Justine Alexandra Roberts Tunney │ +│ Copyright 2024 Justine Alexandra Roberts Tunney │ │ │ │ Permission to use, copy, modify, and/or distribute this software for │ │ any purpose with or without fee is hereby granted, provided that the │ @@ -16,11 +16,31 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/math.h" -#include "libc/tinymath/internal.h" +#include +#include +#include -#if LDBL_MANT_DIG != DBL_MANT_DIG -long double __math_invalidl(long double x) { - return (x - x) / (x - x); +#define TEST(x) \ + if (!(x)) return __LINE__ + +int main() { + TEST(atoi("") == 0); + TEST(atoi("-") == 0); + TEST(atoi("0") == 0); + TEST(atoi("1") == 1); + TEST(atoi("+1") == 1); + TEST(atoi("-1") == -1); + TEST(atoi("1-") == 1); + TEST(atoi("--1") == 0); + TEST(atoi("16 32") == 16); + TEST(atoi("\t 16") == 16); + TEST(atoi("\v 16") == 16); + TEST(atoi("\n 16") == 16); + TEST(atoi("\r 16") == 16); + TEST(atoi("rr 16") == 0); + TEST(atoi("0123456789") == 123456789); + TEST(atoi("2147483647") == INT_MAX); + TEST(atoi("-2147483648") == INT_MIN); + TEST(atoi("-2147483647") == INT_MIN + 1); + TEST(!errno); } -#endif diff --git a/test/posix/strtol_test.c b/test/posix/strtol_test.c new file mode 100644 index 000000000..4a6da98be --- /dev/null +++ b/test/posix/strtol_test.c @@ -0,0 +1,75 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2024 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include +#include +#include + +#define TEST(x) \ + if (!(x)) return __LINE__ + +int main() { + char *end; + + // Basic conversions + TEST(strtol("10", &end, 10) == 10L); + TEST(strtol("-10", &end, 10) == -10L); + TEST(strtol("+10", &end, 10) == 10L); + + // Edge cases and error detection + TEST(strtol("0", &end, 10) == 0L); + TEST(strtol("-0", &end, 10) == 0L); + TEST(strtol("+0", &end, 10) == 0L); + TEST(strtol("9223372036854775807", &end, 10) == LONG_MAX); + TEST(strtol("-9223372036854775808", &end, 10) == LONG_MIN); + + // Base specification + TEST(strtol("10", &end, 2) == 2L); + TEST(strtol("10", &end, 16) == 16L); + + // Invalid input (should not modify errno if conversion is successful) + errno = 0; + TEST(strtol("invalid", &end, 10) == 0L && errno == 0); + + // Overflow detection + errno = 0; + TEST(strtol("99999999999999999999999999", &end, 10) == LONG_MAX && + errno == ERANGE); + + // Underflow detection + errno = 0; + TEST(strtol("-99999999999999999999999999", &end, 10) == LONG_MIN && + errno == ERANGE); + + // Partial conversion with valid characters before invalid ones + TEST(strtol("123abc", &end, 10) == 123L && *end == 'a'); + + // Testing with leading white space + TEST(strtol(" 123", &end, 10) == 123L); + + // Base 0 auto-detection + TEST(strtol("0x10", &end, 0) == 16L); + TEST(strtol("010", &end, 0) == 8L); + TEST(strtol("10", &end, 0) == 10L); + + // Check if 'end' pointer is set correctly to the next character after the + // last valid digit + char *ptr = "1234abcd"; + strtol(ptr, &end, 10); + TEST(end == ptr + 4); +} diff --git a/tool/cosmocc/bin/cosmocc b/tool/cosmocc/bin/cosmocc index b225ad5fc..7a797da21 100755 --- a/tool/cosmocc/bin/cosmocc +++ b/tool/cosmocc/bin/cosmocc @@ -237,7 +237,7 @@ fi PLATFORM="-D__COSMOPOLITAN__ -D__COSMOCC__ -D__FATCOSMOCC__" PREDEF="-include libc/integral/normalize.inc" -CPPFLAGS="-fno-pie -nostdinc -fno-math-errno -isystem $BIN/../include" +CPPFLAGS="-fno-pie -nostdinc -isystem $BIN/../include" CFLAGS="-fportcosmo -fno-dwarf2-cfi-asm -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-semantic-interposition" LDFLAGS="-static -nostdlib -no-pie -fuse-ld=bfd -Wl,-z,noexecstack -Wl,-z,norelro -Wl,--gc-sections" PRECIOUS="-fno-omit-frame-pointer" diff --git a/tool/cosmocc/bin/cosmocross b/tool/cosmocc/bin/cosmocross index edc5fb5f7..699ef31f8 100755 --- a/tool/cosmocc/bin/cosmocross +++ b/tool/cosmocc/bin/cosmocross @@ -48,7 +48,7 @@ ORIGINAL="$0 $*" PLATFORM="-D__COSMOPOLITAN__ -D__COSMOCC__" PREDEF="-include libc/integral/normalize.inc" CFLAGS="-fportcosmo -fno-dwarf2-cfi-asm -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-semantic-interposition" -CPPFLAGS="-fno-pie -nostdinc -fno-math-errno -isystem $BIN/../include" +CPPFLAGS="-fno-pie -nostdinc -isystem $BIN/../include" LDFLAGS="-static -no-pie -nostdlib -fuse-ld=bfd -Wl,-z,noexecstack" APEFLAGS="-Wl,--gc-sections" PRECIOUS="-fno-omit-frame-pointer" diff --git a/tool/emacs/cosmo-cpp-constants.el b/tool/emacs/cosmo-cpp-constants.el index b5f2af0fa..010ab7127 100644 --- a/tool/emacs/cosmo-cpp-constants.el +++ b/tool/emacs/cosmo-cpp-constants.el @@ -66,6 +66,8 @@ "__BMI2__" "__FMA__" "__FAST_MATH__" + "__ROUNDING_MATH__" + "__NO_MATH_ERRNO__" "__FMA4__" "__F16C__" "__CLZERO__" diff --git a/tool/emacs/cosmo-platform-constants.el b/tool/emacs/cosmo-platform-constants.el index 65a84a434..a632b649a 100644 --- a/tool/emacs/cosmo-platform-constants.el +++ b/tool/emacs/cosmo-platform-constants.el @@ -70,7 +70,6 @@ "__GCC_IEC_559" "__SUPPORT_SNAN__" "__GCC_IEC_559_COMPLEX" - "__NO_MATH_ERRNO__" "__gnu__" "_OPENMP"))