Make quality improvements

- Write some more unit tests
- memcpy() on ARM is now faster
- Address the Musl complex math FIXME comments
- Some libm funcs like pow() now support setting errno
- Import the latest and greatest math functions from ARM
- Use more accurate atan2f() and log1pf() implementations
- atoi() and atol() will no longer saturate or clobber errno
This commit is contained in:
Justine Tunney 2024-02-25 14:57:28 -08:00
parent af8f2bd19f
commit 592f6ebc20
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
122 changed files with 6305 additions and 3859 deletions

View file

@ -206,7 +206,7 @@ endif
.UNVEIL += \ .UNVEIL += \
libc/integral \ libc/integral \
libc/stdbool.h \ libc/stdbool.h \
rwc:/dev/shm \ rwc:/dev/shm \
rx:.cosmocc \ rx:.cosmocc \
rx:build/bootstrap \ rx:build/bootstrap \
r:build/portcosmo.h \ r:build/portcosmo.h \
@ -297,6 +297,7 @@ include third_party/nsync/testing/BUILD.mk
include libc/testlib/BUILD.mk include libc/testlib/BUILD.mk
include tool/viz/lib/BUILD.mk include tool/viz/lib/BUILD.mk
include tool/args/BUILD.mk include tool/args/BUILD.mk
include test/math/BUILD.mk
include test/posix/BUILD.mk include test/posix/BUILD.mk
include test/libcxx/BUILD.mk include test/libcxx/BUILD.mk
include test/tool/args/BUILD.mk include test/tool/args/BUILD.mk

View file

@ -95,7 +95,6 @@ DEFAULT_CCFLAGS += \
DEFAULT_COPTS ?= \ DEFAULT_COPTS ?= \
-fno-ident \ -fno-ident \
-fno-common \ -fno-common \
-fno-math-errno \
-fno-gnu-unique \ -fno-gnu-unique \
-fstrict-aliasing \ -fstrict-aliasing \
-fstrict-overflow \ -fstrict-overflow \

View file

@ -16,44 +16,28 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/errno.h"
#include "libc/fmt/conv.h" #include "libc/fmt/conv.h"
#include "libc/limits.h"
#include "libc/stdckdint.h"
#include "libc/str/str.h" #include "libc/str/str.h"
/** /**
* Decodes decimal integer from ASCII string. * Turns string into int.
* *
* atoi 10 22𝑐 7𝑛𝑠 * Decimal is the only radix supported. Leading whitespace (as specified
* strtol 10 37𝑐 12𝑛𝑠 * by the isspace() function) is skipped over. Unlike strtol(), the atoi
* strtoul 10 35𝑐 11𝑛𝑠 * function has undefined behavior on error and it never changes `errno`
* wcstol 10 30𝑐 10𝑛𝑠
* wcstoul 10 30𝑐 10𝑛𝑠
* strtoimax 10 80𝑐 26𝑛𝑠
* strtoumax 10 78𝑐 25𝑛𝑠
* wcstoimax 10 77𝑐 25𝑛𝑠
* wcstoumax 10 76𝑐 25𝑛𝑠
* *
* @param s is a non-null nul-terminated string * @param nptr is a non-null nul-terminated string
* @return the decoded signed saturated integer * @return the decoded signed saturated integer
* @raise ERANGE on overflow
*/ */
int atoi(const char *s) { int atoi(const char *nptr) {
int x, c, d; int x, c, d;
do c = *s++; do c = *nptr++;
while (c == ' ' || c == '\t'); while (isspace(c));
d = c == '-' ? -1 : 1; d = c == '-' ? -1 : 1;
if (c == '-' || c == '+') c = *s++; if (c == '-' || c == '+') c = *nptr++;
for (x = 0; isdigit(c); c = *s++) { for (x = 0; isdigit(c); c = *nptr++) {
if (ckd_mul(&x, x, 10) || ckd_add(&x, x, (c - '0') * d)) { x *= 10;
errno = ERANGE; x += (c - '0') * d;
if (d > 0) {
return INT_MAX;
} else {
return INT_MIN;
}
}
} }
return x; return x;
} }

View file

@ -16,34 +16,29 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/errno.h"
#include "libc/fmt/conv.h" #include "libc/fmt/conv.h"
#include "libc/limits.h"
#include "libc/stdckdint.h"
#include "libc/str/str.h" #include "libc/str/str.h"
/** /**
* Decodes decimal integer from ASCII string. * Turns string into long.
* *
* @param s is a non-null nul-terminated string * Decimal is the only radix supported. Leading whitespace (as specified
* by the isspace() function) is skipped over. Unlike strtol(), the atoi
* function has undefined behavior on error and it never changes `errno`
*
* @param nptr is a non-null nul-terminated string
* @return the decoded signed saturated integer * @return the decoded signed saturated integer
*/ */
long atol(const char *s) { long atol(const char *nptr) {
long x; long x;
int c, d; int c, d;
do c = *s++; do c = *nptr++;
while (c == ' ' || c == '\t'); while (isspace(c));
d = c == '-' ? -1 : 1; d = c == '-' ? -1 : 1;
if (c == '-' || c == '+') c = *s++; if (c == '-' || c == '+') c = *nptr++;
for (x = 0; isdigit(c); c = *s++) { for (x = 0; isdigit(c); c = *nptr++) {
if (ckd_mul(&x, x, 10) || ckd_add(&x, x, (c - '0') * d)) { x *= 10;
errno = ERANGE; x += (c - '0') * d;
if (d > 0) {
return LONG_MAX;
} else {
return LONG_MIN;
}
}
} }
return x; return x;
} }

View file

@ -3,7 +3,7 @@
Optimized Routines Optimized Routines
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the

View file

@ -3,7 +3,7 @@
Optimized Routines Optimized Routines
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the
@ -80,11 +80,12 @@ ENTRY (__memcpy_aarch64_simd)
PTR_ARG (1) PTR_ARG (1)
SIZE_ARG (2) SIZE_ARG (2)
add srcend, src, count add srcend, src, count
add dstend, dstin, count
cmp count, 128 cmp count, 128
b.hi L(copy_long) b.hi L(copy_long)
add dstend, dstin, count
cmp count, 32 cmp count, 32
b.hi L(copy32_128) b.hi L(copy32_128)
nop
/* Small copies: 0..32 bytes. */ /* Small copies: 0..32 bytes. */
cmp count, 16 cmp count, 16
@ -95,6 +96,18 @@ ENTRY (__memcpy_aarch64_simd)
str B_q, [dstend, -16] str B_q, [dstend, -16]
ret ret
.p2align 4
/* Medium copies: 33..128 bytes. */
L(copy32_128):
ldp A_q, B_q, [src]
ldp C_q, D_q, [srcend, -32]
cmp count, 64
b.hi L(copy128)
stp A_q, B_q, [dstin]
stp C_q, D_q, [dstend, -32]
ret
.p2align 4
/* Copy 8-15 bytes. */ /* Copy 8-15 bytes. */
L(copy16): L(copy16):
tbz count, 3, L(copy8) tbz count, 3, L(copy8)
@ -104,7 +117,6 @@ L(copy16):
str A_h, [dstend, -8] str A_h, [dstend, -8]
ret ret
.p2align 3
/* Copy 4-7 bytes. */ /* Copy 4-7 bytes. */
L(copy8): L(copy8):
tbz count, 2, L(copy4) tbz count, 2, L(copy4)
@ -114,6 +126,19 @@ L(copy8):
str B_lw, [dstend, -4] str B_lw, [dstend, -4]
ret ret
/* Copy 65..128 bytes. */
L(copy128):
ldp E_q, F_q, [src, 32]
cmp count, 96
b.ls L(copy96)
ldp G_q, H_q, [srcend, -64]
stp G_q, H_q, [dstend, -64]
L(copy96):
stp A_q, B_q, [dstin]
stp E_q, F_q, [dstin, 32]
stp C_q, D_q, [dstend, -32]
ret
/* Copy 0..3 bytes using a branchless sequence. */ /* Copy 0..3 bytes using a branchless sequence. */
L(copy4): L(copy4):
cbz count, L(copy0) cbz count, L(copy0)
@ -127,33 +152,11 @@ L(copy4):
L(copy0): L(copy0):
ret ret
.p2align 4 .p2align 3
/* Medium copies: 33..128 bytes. */
L(copy32_128):
ldp A_q, B_q, [src]
ldp C_q, D_q, [srcend, -32]
cmp count, 64
b.hi L(copy128)
stp A_q, B_q, [dstin]
stp C_q, D_q, [dstend, -32]
ret
.p2align 4
/* Copy 65..128 bytes. */
L(copy128):
ldp E_q, F_q, [src, 32]
cmp count, 96
b.ls L(copy96)
ldp G_q, H_q, [srcend, -64]
stp G_q, H_q, [dstend, -64]
L(copy96):
stp A_q, B_q, [dstin]
stp E_q, F_q, [dstin, 32]
stp C_q, D_q, [dstend, -32]
ret
/* Copy more than 128 bytes. */ /* Copy more than 128 bytes. */
L(copy_long): L(copy_long):
add dstend, dstin, count
/* Use backwards copy if there is an overlap. */ /* Use backwards copy if there is an overlap. */
sub tmp1, dstin, src sub tmp1, dstin, src
cmp tmp1, count cmp tmp1, count
@ -190,6 +193,9 @@ L(copy64_from_end):
stp A_q, B_q, [dstend, -32] stp A_q, B_q, [dstend, -32]
ret ret
.p2align 4
nop
/* Large backwards copy for overlapping copies. /* Large backwards copy for overlapping copies.
Copy 16 bytes and then align srcend to 16-byte alignment. */ Copy 16 bytes and then align srcend to 16-byte alignment. */
L(copy_long_backwards): L(copy_long_backwards):

View file

@ -3,7 +3,7 @@
Optimized Routines Optimized Routines
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the

View file

@ -3,7 +3,7 @@
Optimized Routines Optimized Routines
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the

View file

@ -3,7 +3,7 @@
Optimized Routines Optimized Routines
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the

View file

@ -3,7 +3,7 @@
Optimized Routines Optimized Routines
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the

View file

@ -3,7 +3,7 @@
Optimized Routines Optimized Routines
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the

View file

@ -3,7 +3,7 @@
Optimized Routines Optimized Routines
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the

View file

@ -3,7 +3,7 @@
Optimized Routines Optimized Routines
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the

View file

@ -3,7 +3,7 @@
Optimized Routines Optimized Routines
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the

View file

@ -3,7 +3,7 @@
Optimized Routines Optimized Routines
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the

View file

@ -3,7 +3,7 @@
Optimized Routines Optimized Routines
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the

View file

@ -3,7 +3,7 @@
Optimized Routines Optimized Routines
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the

2
libc/intrin/fbclibm.c Normal file
View file

@ -0,0 +1,2 @@
__notice(freebsd_complex_notice, "FreeBSD Complex Math (BSD-2 License)\n\
Copyright (c) 2012 Stephen Montgomery-Smith <stephen@FreeBSD.ORG>");

View file

@ -9,15 +9,47 @@
#define M_LOG10E 0.43429448190325182765 /* log₁₀𝑒 */ #define M_LOG10E 0.43429448190325182765 /* log₁₀𝑒 */
#define M_LN2 0.69314718055994530942 /* logₑ2 */ #define M_LN2 0.69314718055994530942 /* logₑ2 */
#define M_LN10 2.30258509299404568402 /* logₑ10 */ #define M_LN10 2.30258509299404568402 /* logₑ10 */
#define M_PI 3.14159265358979323846 /* pi */ #define M_PI 3.14159265358979323846 /* 𝜋 */
#define M_PI_2 1.57079632679489661923 /* pi/2 */ #define M_PI_2 1.57079632679489661923 /* 𝜋/2 */
#define M_PI_4 0.78539816339744830962 /* pi/4 */ #define M_PI_4 0.78539816339744830962 /* 𝜋/4 */
#define M_1_PI 0.31830988618379067154 /* 1/pi */ #define M_1_PI 0.31830988618379067154 /* 1/𝜋 */
#define M_2_PI 0.63661977236758134308 /* 2/pi */ #define M_2_PI 0.63661977236758134308 /* 2/𝜋 */
#define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrt(pi) */ #define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrt(𝜋) */
#define M_SQRT2 1.41421356237309504880 /* sqrt(2) */ #define M_SQRT2 1.41421356237309504880 /* sqrt(2) */
#define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */ #define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */
#if defined(_GNU_SOURCE) || defined(_COSMO_SOURCE)
#define M_Ef 2.7182818284590452354f /* 𝑒 */
#define M_LOG2Ef 1.4426950408889634074f /* log₂𝑒 */
#define M_LOG10Ef 0.43429448190325182765f /* log₁₀𝑒 */
#define M_LN2f 0.69314718055994530942f /* logₑ2 */
#define M_LN10f 2.30258509299404568402f /* logₑ10 */
#define M_PIf 3.14159265358979323846f /* 𝜋 */
#define M_PI_2f 1.57079632679489661923f /* 𝜋/2 */
#define M_PI_4f 0.78539816339744830962f /* 𝜋/4 */
#define M_1_PIf 0.31830988618379067154f /* 1/𝜋 */
#define M_2_PIf 0.63661977236758134308f /* 2/𝜋 */
#define M_2_SQRTPIf 1.12837916709551257390f /* 2/sqrt(𝜋) */
#define M_SQRT2f 1.41421356237309504880f /* sqrt(2) */
#define M_SQRT1_2f 0.70710678118654752440f /* 1/sqrt(2) */
#endif
#if defined(_GNU_SOURCE) || defined(_COSMO_SOURCE)
#define M_El 2.718281828459045235360287471352662498L /* 𝑒 */
#define M_LOG2El 1.442695040888963407359924681001892137L /* log₂𝑒 */
#define M_LOG10El 0.434294481903251827651128918916605082L /* log₁₀𝑒 */
#define M_LN2l 0.693147180559945309417232121458176568L /* logₑ2 */
#define M_LN10l 2.302585092994045684017991454684364208L /* logₑ10 */
#define M_PIl 3.141592653589793238462643383279502884L /* 𝜋 */
#define M_PI_2l 1.570796326794896619231321691639751442L /* 𝜋/2 */
#define M_PI_4l 0.785398163397448309615660845819875721L /* 𝜋/4 */
#define M_1_PIl 0.318309886183790671537767526745028724L /* 1/𝜋 */
#define M_2_PIl 0.636619772367581343075535053490057448L /* 2/𝜋 */
#define M_2_SQRTPIl 1.128379167095512573896158903121545172L /* 2/sqrt(𝜋) */
#define M_SQRT2l 1.414213562373095048801688724209698079L /* sqrt(2) */
#define M_SQRT1_2l 0.707106781186547524400844362104849039L /* 1/sqrt(2) */
#endif
#define DBL_DECIMAL_DIG __DBL_DECIMAL_DIG__ #define DBL_DECIMAL_DIG __DBL_DECIMAL_DIG__
#define DBL_DIG __DBL_DIG__ #define DBL_DIG __DBL_DIG__
#define DBL_EPSILON __DBL_EPSILON__ #define DBL_EPSILON __DBL_EPSILON__
@ -76,6 +108,27 @@
#define FP_ILOGB0 (-2147483647 - 1) #define FP_ILOGB0 (-2147483647 - 1)
#define FP_ILOGBNAN (-2147483647 - 1) #define FP_ILOGBNAN (-2147483647 - 1)
#define MATH_ERRNO 1
#define MATH_ERREXCEPT 2
#ifdef __FAST_MATH__
#define math_errhandling 0
#elif defined(__NO_MATH_ERRNO__)
#define math_errhandling (MATH_ERREXCEPT)
#else
#define math_errhandling (MATH_ERRNO | MATH_ERREXCEPT)
#endif
#ifdef __FP_FAST_FMA
#define FP_FAST_FMA 1
#endif
#ifdef __FP_FAST_FMAF
#define FP_FAST_FMAF 1
#endif
#ifdef __FP_FAST_FMAL
#define FP_FAST_FMAL 1
#endif
COSMOPOLITAN_C_START_ COSMOPOLITAN_C_START_
#define NAN __builtin_nanf("") #define NAN __builtin_nanf("")

View file

@ -54,6 +54,7 @@ o/$(MODE)/libc/tinymath/loglq.o: private \
$(LIBC_TINYMATH_A_OBJS): private \ $(LIBC_TINYMATH_A_OBJS): private \
CFLAGS += \ CFLAGS += \
-fmath-errno \
-fsigned-zeros \ -fsigned-zeros \
-ftrapping-math \ -ftrapping-math \
-frounding-math \ -frounding-math \

View file

@ -5,7 +5,7 @@ MIT OR Apache-2.0 WITH LLVM-exception
MIT License MIT License
----------- -----------
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal of this software and associated documentation files (the "Software"), to deal

View file

@ -1,33 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/internal.h"
double __math_divzero(uint32_t sign)
{
return fp_barrier(sign ? -1.0 : 1.0) / 0.0;
}

View file

@ -1,33 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/internal.h"
float __math_divzerof(uint32_t sign)
{
return fp_barrierf(sign ? -1.0f : 1.0f) / 0.0f;
}

View file

@ -1,33 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/internal.h"
double __math_invalid(double x)
{
return (x - x) / (x - x);
}

View file

@ -1,33 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/internal.h"
float __math_invalidf(float x)
{
return (x - x) / (x - x);
}

View file

@ -1,33 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/internal.h"
dontinstrument double __math_oflow(uint32_t sign)
{
return __math_xflow(sign, 0x1p769);
}

View file

@ -1,33 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/internal.h"
dontinstrument float __math_oflowf(uint32_t sign)
{
return __math_xflowf(sign, 0x1p97f);
}

View file

@ -1,33 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/internal.h"
dontinstrument double __math_uflow(uint32_t sign)
{
return __math_xflow(sign, 0x1p-767);
}

View file

@ -1,33 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/internal.h"
dontinstrument float __math_uflowf(uint32_t sign)
{
return __math_xflowf(sign, 0x1p-95f);
}

View file

@ -1,33 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/internal.h"
dontinstrument double __math_xflow(uint32_t sign, double y)
{
return eval_as_double(fp_barrier(sign ? -y : y) * y);
}

View file

@ -1,33 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/internal.h"
dontinstrument float __math_xflowf(uint32_t sign, float y)
{
return eval_as_float(fp_barrierf(sign ? -y : y) * y);
}

View file

@ -0,0 +1,509 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_ARM_H_
#define COSMOPOLITAN_LIBC_TINYMATH_ARM_H_
#include "libc/math.h"
COSMOPOLITAN_C_START_
#define USE_GLIBC_ABI 1
/* If defined to 1, return correct results for special cases in non-nearest
rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than -0.0f).
This may be set to 0 if there is no fenv support or if math functions only
get called in round to nearest mode. */
#ifdef __ROUNDING_MATH__
#define WANT_ROUNDING 1
#else
#define WANT_ROUNDING 0
#endif
/* If defined to 1, set errno in math functions according to ISO C. Many math
libraries do not set errno, so this is 0 by default. It may need to be
set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0. */
#ifdef __NO_MATH_ERRNO__
#define WANT_ERRNO 0
#else
#define WANT_ERRNO 1
#endif
/*------------------------------------------------------------------------------*/
/* optimized-routines/math/math_config.h */
#ifndef WANT_ROUNDING
/* If defined to 1, return correct results for special cases in non-nearest
rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than -0.0f).
This may be set to 0 if there is no fenv support or if math functions only
get called in round to nearest mode. */
# define WANT_ROUNDING 1
#endif
#ifndef WANT_ERRNO
/* If defined to 1, set errno in math functions according to ISO C. Many math
libraries do not set errno, so this is 0 by default. It may need to be
set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0. */
# define WANT_ERRNO 0
#endif
#ifndef WANT_ERRNO_UFLOW
/* Set errno to ERANGE if result underflows to 0 (in all rounding modes). */
# define WANT_ERRNO_UFLOW (WANT_ROUNDING && WANT_ERRNO)
#endif
/* Compiler can inline round as a single instruction. */
#ifndef HAVE_FAST_ROUND
# if __aarch64__
# define HAVE_FAST_ROUND 1
# else
# define HAVE_FAST_ROUND 0
# endif
#endif
/* Compiler can inline lround, but not (long)round(x). */
#ifndef HAVE_FAST_LROUND
# if __aarch64__ && (100*__GNUC__ + __GNUC_MINOR__) >= 408 && __NO_MATH_ERRNO__
# define HAVE_FAST_LROUND 1
# else
# define HAVE_FAST_LROUND 0
# endif
#endif
/* Compiler can inline fma as a single instruction. */
#ifndef HAVE_FAST_FMA
# if defined FP_FAST_FMA || __aarch64__
# define HAVE_FAST_FMA 1
# else
# define HAVE_FAST_FMA 0
# endif
#endif
/* Provide *_finite symbols and some of the glibc hidden symbols
so libmathlib can be used with binaries compiled against glibc
to interpose math functions with both static and dynamic linking. */
#ifndef USE_GLIBC_ABI
# if __GNUC__
# define USE_GLIBC_ABI 1
# else
# define USE_GLIBC_ABI 0
# endif
#endif
/* Optionally used extensions. */
#ifdef __GNUC__
# define HIDDEN __attribute__ ((__visibility__ ("hidden")))
# define NOINLINE __attribute__ ((noinline))
# define UNUSED __attribute__ ((unused))
# define likely(x) __builtin_expect (!!(x), 1)
# define unlikely(x) __builtin_expect (x, 0)
# if __GNUC__ >= 9
# define attribute_copy(f) __attribute__ ((copy (f)))
# else
# define attribute_copy(f)
# endif
# define strong_alias(f, a) \
extern __typeof (f) a __attribute__ ((alias (#f))) attribute_copy (f);
# define hidden_alias(f, a) \
extern __typeof (f) a __attribute__ ((alias (#f), visibility ("hidden"))) \
attribute_copy (f);
#else
# define HIDDEN
# define NOINLINE
# define UNUSED
# define likely(x) (x)
# define unlikely(x) (x)
#endif
/* Return ptr but hide its value from the compiler so accesses through it
cannot be optimized based on the contents. */
#define ptr_barrier(ptr) \
({ \
__typeof (ptr) __ptr = (ptr); \
__asm("" : "+r"(__ptr)); \
__ptr; \
})
#if HAVE_FAST_ROUND
/* When set, the roundtoint and converttoint functions are provided with
the semantics documented below. */
# define TOINT_INTRINSICS 1
/* Round x to nearest int in all rounding modes, ties have to be rounded
consistently with converttoint so the results match. If the result
would be outside of [-2^31, 2^31-1] then the semantics is unspecified. */
static inline double_t
roundtoint (double_t x)
{
return round (x);
}
/* Convert x to nearest int in all rounding modes, ties have to be rounded
consistently with roundtoint. If the result is not representible in an
int32_t then the semantics is unspecified. */
static inline int32_t
converttoint (double_t x)
{
# if HAVE_FAST_LROUND
return lround (x);
# else
return (long) round (x);
# endif
}
#endif
static inline uint32_t
asuint (float f)
{
union
{
float f;
uint32_t i;
} u = {f};
return u.i;
}
static inline float
asfloat (uint32_t i)
{
union
{
uint32_t i;
float f;
} u = {i};
return u.f;
}
static inline uint64_t
asuint64 (double f)
{
union
{
double f;
uint64_t i;
} u = {f};
return u.i;
}
static inline double
asdouble (uint64_t i)
{
union
{
uint64_t i;
double f;
} u = {i};
return u.f;
}
#ifndef IEEE_754_2008_SNAN
# define IEEE_754_2008_SNAN 1
#endif
static inline int
issignalingf_inline (float x)
{
uint32_t ix = asuint (x);
if (!IEEE_754_2008_SNAN)
return (ix & 0x7fc00000) == 0x7fc00000;
return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000;
}
static inline int
issignaling_inline (double x)
{
uint64_t ix = asuint64 (x);
if (!IEEE_754_2008_SNAN)
return (ix & 0x7ff8000000000000) == 0x7ff8000000000000;
return 2 * (ix ^ 0x0008000000000000) > 2 * 0x7ff8000000000000ULL;
}
#if __aarch64__ && __GNUC__
/* Prevent the optimization of a floating-point expression. */
static inline float
opt_barrier_float (float x)
{
__asm__ __volatile__ ("" : "+w" (x));
return x;
}
static inline double
opt_barrier_double (double x)
{
__asm__ __volatile__ ("" : "+w" (x));
return x;
}
/* Force the evaluation of a floating-point expression for its side-effect. */
static inline void
force_eval_float (float x)
{
__asm__ __volatile__ ("" : "+w" (x));
}
static inline void
force_eval_double (double x)
{
__asm__ __volatile__ ("" : "+w" (x));
}
#else
static inline float
opt_barrier_float (float x)
{
volatile float y = x;
return y;
}
static inline double
opt_barrier_double (double x)
{
volatile double y = x;
return y;
}
static inline void
force_eval_float (float x)
{
volatile float y UNUSED = x;
}
static inline void
force_eval_double (double x)
{
volatile double y UNUSED = x;
}
#endif
/* Evaluate an expression as the specified type, normally a type
cast should be enough, but compilers implement non-standard
excess-precision handling, so when FLT_EVAL_METHOD != 0 then
these functions may need to be customized. */
static inline float
eval_as_float (float x)
{
return x;
}
static inline double
eval_as_double (double x)
{
return x;
}
/* Error handling tail calls for special cases, with a sign argument.
The sign of the return value is set if the argument is non-zero. */
/* The result overflows. */
HIDDEN float __math_oflowf (uint32_t);
/* The result underflows to 0 in nearest rounding mode. */
HIDDEN float __math_uflowf (uint32_t);
/* The result underflows to 0 in some directed rounding mode only. */
HIDDEN float __math_may_uflowf (uint32_t);
/* Division by zero. */
HIDDEN float __math_divzerof (uint32_t);
/* The result overflows. */
HIDDEN double __math_oflow (uint32_t);
/* The result underflows to 0 in nearest rounding mode. */
HIDDEN double __math_uflow (uint32_t);
/* The result underflows to 0 in some directed rounding mode only. */
HIDDEN double __math_may_uflow (uint32_t);
/* Division by zero. */
HIDDEN double __math_divzero (uint32_t);
/* Error handling using input checking. */
/* Invalid input unless it is a quiet NaN. */
HIDDEN float __math_invalidf (float);
/* Invalid input unless it is a quiet NaN. */
HIDDEN double __math_invalid (double);
/* Invalid input unless it is a quiet NaN. */
HIDDEN long double __math_invalidl (long double);
/* Error handling using output checking, only for errno setting. */
/* Check if the result overflowed to infinity. */
HIDDEN double __math_check_oflow (double);
/* Check if the result underflowed to 0. */
HIDDEN double __math_check_uflow (double);
/* Check if the result overflowed to infinity. */
static inline double
check_oflow (double x)
{
return WANT_ERRNO ? __math_check_oflow (x) : x;
}
/* Check if the result underflowed to 0. */
static inline double
check_uflow (double x)
{
return WANT_ERRNO ? __math_check_uflow (x) : x;
}
/* Check if the result overflowed to infinity. */
HIDDEN float __math_check_oflowf (float);
/* Check if the result underflowed to 0. */
HIDDEN float __math_check_uflowf (float);
/* Check if the result overflowed to infinity. */
static inline float
check_oflowf (float x)
{
return WANT_ERRNO ? __math_check_oflowf (x) : x;
}
/* Check if the result underflowed to 0. */
static inline float
check_uflowf (float x)
{
return WANT_ERRNO ? __math_check_uflowf (x) : x;
}
/* Shared between expf, exp2f and powf. */
#define EXP2F_TABLE_BITS 5
#define EXP2F_POLY_ORDER 3
extern const struct exp2f_data
{
uint64_t tab[1 << EXP2F_TABLE_BITS];
double shift_scaled;
double poly[EXP2F_POLY_ORDER];
double shift;
double invln2_scaled;
double poly_scaled[EXP2F_POLY_ORDER];
} __exp2f_data HIDDEN;
#define LOGF_TABLE_BITS 4
#define LOGF_POLY_ORDER 4
extern const struct logf_data
{
struct
{
double invc, logc;
} tab[1 << LOGF_TABLE_BITS];
double ln2;
double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1. */
} __logf_data HIDDEN;
#define LOG2F_TABLE_BITS 4
#define LOG2F_POLY_ORDER 4
extern const struct log2f_data
{
struct
{
double invc, logc;
} tab[1 << LOG2F_TABLE_BITS];
double poly[LOG2F_POLY_ORDER];
} __log2f_data HIDDEN;
#define POWF_LOG2_TABLE_BITS 4
#define POWF_LOG2_POLY_ORDER 5
#if TOINT_INTRINSICS
# define POWF_SCALE_BITS EXP2F_TABLE_BITS
#else
# define POWF_SCALE_BITS 0
#endif
#define POWF_SCALE ((double) (1 << POWF_SCALE_BITS))
extern const struct powf_log2_data
{
struct
{
double invc, logc;
} tab[1 << POWF_LOG2_TABLE_BITS];
double poly[POWF_LOG2_POLY_ORDER];
} __powf_log2_data HIDDEN;
#define EXP_TABLE_BITS 7
#define EXP_POLY_ORDER 5
/* Use polynomial that is optimized for a wider input range. This may be
needed for good precision in non-nearest rounding and !TOINT_INTRINSICS. */
#define EXP_POLY_WIDE 0
/* Use close to nearest rounding toint when !TOINT_INTRINSICS. This may be
needed for good precision in non-nearest rouning and !EXP_POLY_WIDE. */
#define EXP_USE_TOINT_NARROW 0
#define EXP2_POLY_ORDER 5
#define EXP2_POLY_WIDE 0
/* Wider exp10 polynomial necessary for good precision in non-nearest rounding
and !TOINT_INTRINSICS. */
#define EXP10_POLY_WIDE 0
extern const struct exp_data
{
double invln2N;
double invlog10_2N;
double shift;
double negln2hiN;
double negln2loN;
double neglog10_2hiN;
double neglog10_2loN;
double poly[4]; /* Last four coefficients. */
double exp2_shift;
double exp2_poly[EXP2_POLY_ORDER];
double exp10_poly[5];
uint64_t tab[2*(1 << EXP_TABLE_BITS)];
} __exp_data HIDDEN;
#define LOG_TABLE_BITS 7
#define LOG_POLY_ORDER 6
#define LOG_POLY1_ORDER 12
extern const struct log_data
{
double ln2hi;
double ln2lo;
double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
double poly1[LOG_POLY1_ORDER - 1];
struct {double invc, logc;} tab[1 << LOG_TABLE_BITS];
#if !HAVE_FAST_FMA
struct {double chi, clo;} tab2[1 << LOG_TABLE_BITS];
#endif
} __log_data HIDDEN;
#define LOG2_TABLE_BITS 6
#define LOG2_POLY_ORDER 7
#define LOG2_POLY1_ORDER 11
extern const struct log2_data
{
double invln2hi;
double invln2lo;
double poly[LOG2_POLY_ORDER - 1];
double poly1[LOG2_POLY1_ORDER - 1];
struct {double invc, logc;} tab[1 << LOG2_TABLE_BITS];
#if !HAVE_FAST_FMA
struct {double chi, clo;} tab2[1 << LOG2_TABLE_BITS];
#endif
} __log2_data HIDDEN;
#define POW_LOG_TABLE_BITS 7
#define POW_LOG_POLY_ORDER 8
extern const struct pow_log_data
{
double ln2hi;
double ln2lo;
double poly[POW_LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
/* Note: the pad field is unused, but allows slightly faster indexing. */
struct {double invc, pad, logc, logctail;} tab[1 << POW_LOG_TABLE_BITS];
} __pow_log_data HIDDEN;
extern const struct erff_data
{
float erff_poly_A[6];
float erff_poly_B[7];
} __erff_data HIDDEN;
#define ERF_POLY_A_ORDER 19
#define ERF_POLY_A_NCOEFFS 10
#define ERFC_POLY_C_NCOEFFS 16
#define ERFC_POLY_D_NCOEFFS 18
#define ERFC_POLY_E_NCOEFFS 14
#define ERFC_POLY_F_NCOEFFS 17
extern const struct erf_data
{
double erf_poly_A[ERF_POLY_A_NCOEFFS];
double erf_ratio_N_A[5];
double erf_ratio_D_A[5];
double erf_ratio_N_B[7];
double erf_ratio_D_B[6];
double erfc_poly_C[ERFC_POLY_C_NCOEFFS];
double erfc_poly_D[ERFC_POLY_D_NCOEFFS];
double erfc_poly_E[ERFC_POLY_E_NCOEFFS];
double erfc_poly_F[ERFC_POLY_F_NCOEFFS];
} __erf_data HIDDEN;
#define V_EXP_TABLE_BITS 7
extern const uint64_t __v_exp_data[1 << V_EXP_TABLE_BITS] HIDDEN;
#define V_LOG_TABLE_BITS 7
extern const struct v_log_data
{
struct
{
double invc, logc;
} table[1 << V_LOG_TABLE_BITS];
} __v_log_data HIDDEN;
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_TINYMATH_ARM_H_ */

View file

@ -5,13 +5,6 @@
FreeBSD lib/msun/src/s_asinhl.c FreeBSD lib/msun/src/s_asinhl.c
Converted to ldbl by David Schultz <das@FreeBSD.ORG> and Bruce D. Evans. Converted to ldbl by David Schultz <das@FreeBSD.ORG> and Bruce D. Evans.
Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
Developed at SunPro, a Sun Microsystems, Inc. business.
Permission to use, copy, modify, and distribute this
software is freely granted, provided that this notice
is preserved.
Copyright (c) 1992-2023 The FreeBSD Project. Copyright (c) 1992-2023 The FreeBSD Project.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -35,12 +28,17 @@
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE. SUCH DAMAGE.
Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
Developed at SunPro, a Sun Microsystems, Inc. business.
Permission to use, copy, modify, and distribute this
software is freely granted, provided that this notice
is preserved.
*/ */
#include "libc/math.h"
#include "libc/tinymath/freebsd.internal.h" #include "libc/tinymath/freebsd.internal.h"
__static_yoink("fdlibm_notice");
__static_yoink("freebsd_libm_notice"); __static_yoink("freebsd_libm_notice");
__static_yoink("fdlibm_notice");
#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)

View file

@ -1,177 +1,120 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ /*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8 :vi vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Optimized Routines FreeBSD lib/msun/src/s_asinhl.c
Copyright (c) 1999-2022, Arm Limited. Converted to ldbl by David Schultz <das@FreeBSD.ORG> and Bruce D. Evans.
Permission is hereby granted, free of charge, to any person obtaining Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be Developed at SunPro, a Sun Microsystems, Inc. business.
included in all copies or substantial portions of the Software. Permission to use, copy, modify, and distribute this
software is freely granted, provided that this notice
is preserved.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, Copyright (c) 1992-2023 The FreeBSD Project.
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. Redistribution and use in source and binary forms, with or without
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY modification, are permitted provided that the following conditions
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, are met:
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 1. Redistributions of source code must retain the above copyright
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
*/ */
#include "libc/intrin/likely.h" #include "libc/tinymath/freebsd.internal.h"
#include "libc/math.h" __static_yoink("freebsd_libm_notice");
#include "libc/tinymath/atanf_common.internal.h" __static_yoink("fdlibm_notice");
#include "libc/tinymath/internal.h"
__static_yoink("arm_optimized_routines_notice");
#define Pi (0x1.921fb6p+1f) static volatile float
#define PiOver2 (0x1.921fb6p+0f) tiny = 1.0e-30;
#define PiOver4 (0x1.921fb6p-1f) static const float
#define SignMask (0x80000000) zero = 0.0,
pi_o_4 = 7.8539818525e-01, /* 0x3f490fdb */
pi_o_2 = 1.5707963705e+00, /* 0x3fc90fdb */
pi = 3.1415927410e+00; /* 0x40490fdb */
static volatile float
pi_lo = -8.7422776573e-08; /* 0xb3bbbd2e */
/* We calculate atan2f by P(n/d), where n and d are similar to the input /**
arguments, and P is a polynomial. The polynomial may underflow. * Returns arc tangent of 𝑦/𝑥.
POLY_UFLOW_BOUND is the lower bound of the difference in exponents of n and d */
for which P underflows, and is used to special-case such inputs. */
#define POLY_UFLOW_BOUND 24
static inline int32_t
biased_exponent (float f)
{
uint32_t fi = asuint (f);
int32_t ex = (int32_t) ((fi & 0x7f800000) >> 23);
if (UNLIKELY (ex == 0))
{
/* Subnormal case - we still need to get the exponent right for subnormal
numbers as division may take us back inside the normal range. */
return ex - __builtin_clz (fi << 9);
}
return ex;
}
/* Fast implementation of scalar atan2f. Largest observed error is
2.88ulps in [99.0, 101.0] x [99.0, 101.0]:
atan2f(0x1.9332d8p+6, 0x1.8cb6c4p+6) got 0x1.964646p-1
want 0x1.964640p-1. */
float float
atan2f (float y, float x) atan2f(float y, float x)
{ {
uint32_t ix = asuint (x); float z;
uint32_t iy = asuint (y); int32_t k,m,hx,hy,ix,iy;
uint32_t sign_x = ix & SignMask; GET_FLOAT_WORD(hx,x);
uint32_t sign_y = iy & SignMask; ix = hx&0x7fffffff;
GET_FLOAT_WORD(hy,y);
iy = hy&0x7fffffff;
if((ix>0x7f800000)||
(iy>0x7f800000)) /* x or y is NaN */
return nan_mix(x, y);
if(hx==0x3f800000) return atanf(y); /* x=1.0 */
m = ((hy>>31)&1)|((hx>>30)&2); /* 2*sign(x)+sign(y) */
uint32_t iax = ix & ~SignMask; /* when y = 0 */
uint32_t iay = iy & ~SignMask; if(iy==0) {
switch(m) {
/* x or y is NaN. */ case 0:
if ((iax > 0x7f800000) || (iay > 0x7f800000)) case 1: return y; /* atan(+-0,+anything)=+-0 */
return x + y; case 2: return pi+tiny;/* atan(+0,-anything) = pi */
case 3: return -pi-tiny;/* atan(-0,-anything) =-pi */
/* m = 2 * sign(x) + sign(y). */
uint32_t m = ((iy >> 31) & 1) | ((ix >> 30) & 2);
/* The following follows glibc ieee754 implementation, except
that we do not use +-tiny shifts (non-nearest rounding mode). */
int32_t exp_diff = biased_exponent (x) - biased_exponent (y);
/* Special case for (x, y) either on or very close to the x axis. Either y =
0, or y is tiny and x is huge (difference in exponents >=
POLY_UFLOW_BOUND). In the second case, we only want to use this special
case when x is negative (i.e. quadrants 2 or 3). */
if (UNLIKELY (iay == 0 || (exp_diff >= POLY_UFLOW_BOUND && m >= 2)))
{
switch (m)
{
case 0:
case 1:
return y; /* atan(+-0,+anything)=+-0. */
case 2:
return Pi; /* atan(+0,-anything) = pi. */
case 3:
return -Pi; /* atan(-0,-anything) =-pi. */
}
}
/* Special case for (x, y) either on or very close to the y axis. Either x =
0, or x is tiny and y is huge (difference in exponents >=
POLY_UFLOW_BOUND). */
if (UNLIKELY (iax == 0 || exp_diff <= -POLY_UFLOW_BOUND))
return sign_y ? -PiOver2 : PiOver2;
/* x is INF. */
if (iax == 0x7f800000)
{
if (iay == 0x7f800000)
{
switch (m)
{
case 0:
return PiOver4; /* atan(+INF,+INF). */
case 1:
return -PiOver4; /* atan(-INF,+INF). */
case 2:
return 3.0f * PiOver4; /* atan(+INF,-INF). */
case 3:
return -3.0f * PiOver4; /* atan(-INF,-INF). */
} }
} }
else /* when x = 0 */
{ if(ix==0) return (hy<0)? -pi_o_2-tiny: pi_o_2+tiny;
switch (m)
{ /* when x is INF */
case 0: if(ix==0x7f800000) {
return 0.0f; /* atan(+...,+INF). */ if(iy==0x7f800000) {
case 1: switch(m) {
return -0.0f; /* atan(-...,+INF). */ case 0: return pi_o_4+tiny;/* atan(+INF,+INF) */
case 2: case 1: return -pi_o_4-tiny;/* atan(-INF,+INF) */
return Pi; /* atan(+...,-INF). */ case 2: return (float)3.0*pi_o_4+tiny;/*atan(+INF,-INF)*/
case 3: case 3: return (float)-3.0*pi_o_4-tiny;/*atan(-INF,-INF)*/
return -Pi; /* atan(-...,-INF). */ }
} else {
switch(m) {
case 0: return zero ; /* atan(+...,+INF) */
case 1: return -zero ; /* atan(-...,+INF) */
case 2: return pi+tiny ; /* atan(+...,-INF) */
case 3: return -pi-tiny ; /* atan(-...,-INF) */
}
} }
} }
} /* when y is INF */
/* y is INF. */ if(iy==0x7f800000) return (hy<0)? -pi_o_2-tiny: pi_o_2+tiny;
if (iay == 0x7f800000)
return sign_y ? -PiOver2 : PiOver2;
uint32_t sign_xy = sign_x ^ sign_y; /* compute y/x */
k = (iy-ix)>>23;
float ax = asfloat (iax); if(k > 26) { /* |y/x| > 2**26 */
float ay = asfloat (iay); z=pi_o_2+(float)0.5*pi_lo;
m&=1;
bool pred_aygtax = (ay > ax); }
else if(k<-26&&hx<0) z=0.0; /* 0 > |y|/x > -2**-26 */
/* Set up z for call to atanf. */ else z=atanf(fabsf(y/x)); /* safe to do y/x */
float n = pred_aygtax ? -ax : ay; switch (m) {
float d = pred_aygtax ? ay : ax; case 0: return z ; /* atan(+,+) */
float z = n / d; case 1: return -z ; /* atan(-,+) */
case 2: return pi-(z-pi_lo);/* atan(+,-) */
float ret; default: /* case 3 */
if (UNLIKELY (m < 2 && exp_diff >= POLY_UFLOW_BOUND)) return (z-pi_lo)-pi;/* atan(-,-) */
{ }
/* If (x, y) is very close to x axis and x is positive, the polynomial
will underflow and evaluate to z. */
ret = z;
}
else
{
/* Work out the correct shift. */
float shift = sign_x ? -2.0f : 0.0f;
shift = pred_aygtax ? shift + 1.0f : shift;
shift *= PiOver2;
ret = eval_poly (z, z, shift);
}
/* Account for the sign of x and y. */
return asfloat (asuint (ret) ^ sign_xy);
} }

View file

@ -1,54 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_ATAN_COMMON_H_
#define COSMOPOLITAN_LIBC_TINYMATH_ATAN_COMMON_H_
#include "libc/tinymath/atan_data.internal.h"
#include "libc/tinymath/estrin_wrap.internal.h"
#include "libc/tinymath/horner.internal.h"
COSMOPOLITAN_C_START_
/*
* Double-precision polynomial evaluation function for scalar and vector atan(x)
* and atan2(y,x).
*
* Copyright (c) 2021-2023, Arm Limited.
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#if WANT_VMATH
#define DBL_T float64x2_t
#define P(i) v_f64 (__atan_poly_data.poly[i])
#else
#define DBL_T double
#define P(i) __atan_poly_data.poly[i]
#endif
/* Polynomial used in fast atan(x) and atan2(y,x) implementations
The order 19 polynomial P approximates (atan(sqrt(x))-sqrt(x))/x^(3/2). */
static inline DBL_T
eval_poly (DBL_T z, DBL_T az, DBL_T shift)
{
/* Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of
full scheme to avoid underflow in x^16. */
DBL_T z2 = z * z;
DBL_T x2 = z2 * z2;
DBL_T x4 = x2 * x2;
DBL_T x8 = x4 * x4;
DBL_T y
= FMA (ESTRIN_11_ (z2, x2, x4, x8, P, 8), x8, ESTRIN_7 (z2, x2, x4, P));
/* Finalize. y = shift + z + z^3 * P(z^2). */
y = FMA (y, z2 * az, az);
y = y + shift;
return y;
}
#undef DBL_T
#undef FMA
#undef P
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_TINYMATH_ATAN_COMMON_H_ */

View file

@ -1,11 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_ATAN_DATA_H_
#define COSMOPOLITAN_LIBC_TINYMATH_ATAN_DATA_H_
COSMOPOLITAN_C_START_
#define ATAN_POLY_NCOEFFS 20
extern const struct atan_poly_data {
double poly[ATAN_POLY_NCOEFFS];
} __atan_poly_data;
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_TINYMATH_ATAN_DATA_H_ */

View file

@ -1,43 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_ATANF_COMMON_H_
#define COSMOPOLITAN_LIBC_TINYMATH_ATANF_COMMON_H_
#include "libc/tinymath/atanf_data.internal.h"
#include "libc/tinymath/estrin_wrap.internal.h"
#include "libc/tinymath/hornerf.internal.h"
COSMOPOLITAN_C_START_
#if WANT_VMATH
#define FLT_T float32x4_t
#define P(i) v_f32 (__atanf_poly_data.poly[i])
#else
#define FLT_T float
#define P(i) __atanf_poly_data.poly[i]
#endif
/* Polynomial used in fast atanf(x) and atan2f(y,x) implementations
The order 7 polynomial P approximates (atan(sqrt(x))-sqrt(x))/x^(3/2). */
static inline FLT_T
eval_poly (FLT_T z, FLT_T az, FLT_T shift)
{
/* Use 2-level Estrin scheme for P(z^2) with deg(P)=7. However,
a standard implementation using z8 creates spurious underflow
in the very last fma (when z^8 is small enough).
Therefore, we split the last fma into a mul and and an fma.
Horner and single-level Estrin have higher errors that exceed
threshold. */
FLT_T z2 = z * z;
FLT_T z4 = z2 * z2;
/* Then assemble polynomial. */
FLT_T y = FMA (z4, z4 * ESTRIN_3_ (z2, z4, P, 4), ESTRIN_3 (z2, z4, P));
/* Finalize:
y = shift + z * P(z^2). */
return FMA (y, z2 * az, az) + shift;
}
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_TINYMATH_ATANF_COMMON_H_ */

View file

@ -1,11 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_ATANF_DATA_H_
#define COSMOPOLITAN_LIBC_TINYMATH_ATANF_DATA_H_
COSMOPOLITAN_C_START_
#define ATANF_POLY_NCOEFFS 8
extern const struct atanf_poly_data {
float poly[ATANF_POLY_NCOEFFS];
} __atanf_poly_data;
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_TINYMATH_ATANF_DATA_H_ */

View file

@ -1,41 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/complex.h"
#include "libc/math.h"
#include "libc/tinymath/complex.internal.h"
__static_yoink("musl_libc_notice");
// FIXME: Hull et al. "Implementing the complex arcsine and arccosine functions using exception handling" 1997
/* acos(z) = pi/2 - asin(z) */
double complex cacos(double complex z)
{
z = casin(z);
return CMPLX(M_PI_2 - creal(z), -cimag(z));
}

View file

@ -1,39 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/complex.h"
#include "libc/math.h"
#include "libc/tinymath/complex.internal.h"
__static_yoink("musl_libc_notice");
// FIXME
float complex cacosf(float complex z)
{
z = casinf(z);
return CMPLXF((float)M_PI_2 - crealf(z), -cimagf(z));
}

View file

@ -1,44 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/complex.h"
#include "libc/math.h"
#include "libc/tinymath/complex.internal.h"
__static_yoink("musl_libc_notice");
/* acosh(z) = i acos(z) */
double complex cacosh(double complex z)
{
int zineg = signbit(cimag(z));
z = cacos(z);
if (zineg) return CMPLX(cimag(z), -creal(z));
else return CMPLX(-cimag(z), creal(z));
}
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
__weak_reference(cacosh, cacoshl);
#endif

View file

@ -1,39 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/complex.h"
#include "libc/math.h"
#include "libc/tinymath/complex.internal.h"
__static_yoink("musl_libc_notice");
float complex cacoshf(float complex z)
{
int zineg = signbit(cimagf(z));
z = cacosf(z);
if (zineg) return CMPLXF(cimagf(z), -crealf(z));
else return CMPLXF(-cimagf(z), crealf(z));
}

View file

@ -1,45 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/complex.h"
#include "libc/math.h"
#include "libc/tinymath/complex.internal.h"
__static_yoink("musl_libc_notice");
// FIXME
float complex casinf(float complex z)
{
float complex w;
float x, y;
x = crealf(z);
y = cimagf(z);
w = CMPLXF(1.0 - (x - y)*(x + y), -2.0*x*y);
float complex r = clogf(CMPLXF(-y, x) + csqrtf(w));
return CMPLXF(cimagf(r), -crealf(r));
}

View file

@ -1,39 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/complex.h"
#include "libc/math.h"
#include "libc/tinymath/complex.internal.h"
__static_yoink("musl_libc_notice");
/* asinh(z) = -i asin(i z) */
double complex casinh(double complex z)
{
z = casin(CMPLX(-cimag(z), creal(z)));
return CMPLX(cimag(z), -creal(z));
}

View file

@ -1,37 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/complex.h"
#include "libc/math.h"
#include "libc/tinymath/complex.internal.h"
__static_yoink("musl_libc_notice");
float complex casinhf(float complex z)
{
z = casinf(CMPLXF(-cimagf(z), crealf(z)));
return CMPLXF(cimagf(z), -crealf(z));
}

View file

@ -1,142 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/complex.h"
#include "libc/math.h"
#include "libc/tinymath/complex.internal.h"
__static_yoink("musl_libc_notice");
__static_yoink("openbsd_libm_notice");
/* origin: OpenBSD /usr/src/lib/libm/src/s_catan.c */
/*
* Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/*
* Complex circular arc tangent
*
*
* SYNOPSIS:
*
* double complex catan();
* double complex z, w;
*
* w = catan (z);
*
*
* DESCRIPTION:
*
* If
* z = x + iy,
*
* then
* 1 ( 2x )
* Re w = - arctan(-----------) + k PI
* 2 ( 2 2)
* (1 - x - y )
*
* ( 2 2)
* 1 (x + (y+1) )
* Im w = - log(------------)
* 4 ( 2 2)
* (x + (y-1) )
*
* Where k is an arbitrary integer.
*
* catan(z) = -i catanh(iz).
*
* ACCURACY:
*
* Relative error:
* arithmetic domain # trials peak rms
* DEC -10,+10 5900 1.3e-16 7.8e-18
* IEEE -10,+10 30000 2.3e-15 8.5e-17
* The check catan( ctan(z) ) = z, with |x| and |y| < PI/2,
* had peak relative error 1.5e-16, rms relative error
* 2.9e-17. See also clog().
*/
#define MAXNUM 1.0e308
static const double DP1 = 3.14159265160560607910E0;
static const double DP2 = 1.98418714791870343106E-9;
static const double DP3 = 1.14423774522196636802E-17;
static double _redupi(double x)
{
double t;
long i;
t = x/M_PI;
if (t >= 0.0)
t += 0.5;
else
t -= 0.5;
i = t; /* the multiple */
t = i;
t = ((x - t * DP1) - t * DP2) - t * DP3;
return t;
}
double complex catan(double complex z)
{
double complex w;
double a, t, x, x2, y;
x = creal(z);
y = cimag(z);
x2 = x * x;
a = 1.0 - x2 - (y * y);
t = 0.5 * atan2(2.0 * x, a);
w = _redupi(t);
t = y - 1.0;
a = x2 + (t * t);
t = y + 1.0;
a = (x2 + t * t)/a;
w = CMPLX(w, 0.25 * log(a));
return w;
}
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
__weak_reference(catan, catanl);
#endif

View file

@ -1,135 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/complex.h"
#include "libc/math.h"
#include "libc/tinymath/complex.internal.h"
__static_yoink("musl_libc_notice");
__static_yoink("openbsd_libm_notice");
/* origin: OpenBSD /usr/src/lib/libm/src/s_catanf.c */
/*
* Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/*
* Complex circular arc tangent
*
*
* SYNOPSIS:
*
* float complex catanf();
* float complex z, w;
*
* w = catanf( z );
*
*
* DESCRIPTION:
*
* If
* z = x + iy,
*
* then
* 1 ( 2x )
* Re w = - arctan(-----------) + k PI
* 2 ( 2 2)
* (1 - x - y )
*
* ( 2 2)
* 1 (x + (y+1) )
* Im w = - log(------------)
* 4 ( 2 2)
* (x + (y-1) )
*
* Where k is an arbitrary integer.
*
*
* ACCURACY:
*
* Relative error:
* arithmetic domain # trials peak rms
* IEEE -10,+10 30000 2.3e-6 5.2e-8
*/
#define MAXNUMF 1.0e38F
static const double DP1 = 3.140625;
static const double DP2 = 9.67502593994140625E-4;
static const double DP3 = 1.509957990978376432E-7;
static float _redupif(float xx)
{
float x, t;
long i;
x = xx;
t = x/(float)M_PI;
if (t >= 0.0f)
t += 0.5f;
else
t -= 0.5f;
i = t; /* the multiple */
t = i;
t = ((x - t * DP1) - t * DP2) - t * DP3;
return t;
}
float complex catanf(float complex z)
{
float complex w;
float a, t, x, x2, y;
x = crealf(z);
y = cimagf(z);
x2 = x * x;
a = 1.0f - x2 - (y * y);
t = 0.5f * atan2f(2.0f * x, a);
w = _redupif(t);
t = y - 1.0f;
a = x2 + (t * t);
t = y + 1.0f;
a = (x2 + (t * t))/a;
w = CMPLXF(w, 0.25f * logf(a));
return w;
}

View file

@ -1,39 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/complex.h"
#include "libc/math.h"
#include "libc/tinymath/complex.internal.h"
__static_yoink("musl_libc_notice");
/* atanh = -i atan(i z) */
double complex catanh(double complex z)
{
z = catan(CMPLX(-cimag(z), creal(z)));
return CMPLX(cimag(z), -creal(z));
}

View file

@ -1,37 +0,0 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/complex.h"
#include "libc/math.h"
#include "libc/tinymath/complex.internal.h"
__static_yoink("musl_libc_notice");
float complex catanhf(float complex z)
{
z = catanf(CMPLXF(-cimagf(z), crealf(z)));
return CMPLXF(cimagf(z), -crealf(z));
}

651
libc/tinymath/catrig.c Normal file
View file

@ -0,0 +1,651 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
FreeBSD lib/msun/src/catrig.c
Copyright (c) 2012 Stephen Montgomery-Smith <stephen@FreeBSD.ORG>
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
*/
#include "libc/tinymath/freebsd.internal.h"
__static_yoink("freebsd_complex_notice");
#undef isinf
#define isinf(x) (fabs(x) == INFINITY)
#undef isnan
#define isnan(x) ((x) != (x))
#define raise_inexact() do { volatile float _j = 1 + tiny; (void)_j; } while(0)
#undef signbit
#define signbit(x) (__builtin_signbit(x))
/* We need that DBL_EPSILON^2/128 is larger than FOUR_SQRT_MIN. */
static const double
A_crossover = 10, /* Hull et al suggest 1.5, but 10 works better */
B_crossover = 0.6417, /* suggested by Hull et al */
FOUR_SQRT_MIN = 0x1p-509, /* >= 4 * sqrt(DBL_MIN) */
QUARTER_SQRT_MAX = 0x1p509, /* <= sqrt(DBL_MAX) / 4 */
m_e = 2.7182818284590452e0, /* 0x15bf0a8b145769.0p-51 */
m_ln2 = 6.9314718055994531e-1, /* 0x162e42fefa39ef.0p-53 */
pio2_hi = 1.5707963267948966e0, /* 0x1921fb54442d18.0p-52 */
RECIP_EPSILON = 1 / DBL_EPSILON,
SQRT_3_EPSILON = 2.5809568279517849e-8, /* 0x1bb67ae8584caa.0p-78 */
SQRT_6_EPSILON = 3.6500241499888571e-8, /* 0x13988e1409212e.0p-77 */
SQRT_MIN = 0x1p-511; /* >= sqrt(DBL_MIN) */
static const volatile double
pio2_lo = 6.1232339957367659e-17; /* 0x11a62633145c07.0p-106 */
static const volatile float
tiny = 0x1p-100;
static double complex clog_for_large_values(double complex z);
/*
* Testing indicates that all these functions are accurate up to 4 ULP.
* The functions casin(h) and cacos(h) are about 2.5 times slower than asinh.
* The functions catan(h) are a little under 2 times slower than atanh.
*
* The code for casinh, casin, cacos, and cacosh comes first. The code is
* rather complicated, and the four functions are highly interdependent.
*
* The code for catanh and catan comes at the end. It is much simpler than
* the other functions, and the code for these can be disconnected from the
* rest of the code.
*/
/*
* ================================
* | casinh, casin, cacos, cacosh |
* ================================
*/
/*
* The algorithm is very close to that in "Implementing the complex arcsine
* and arccosine functions using exception handling" by T. E. Hull, Thomas F.
* Fairgrieve, and Ping Tak Peter Tang, published in ACM Transactions on
* Mathematical Software, Volume 23 Issue 3, 1997, Pages 299-335,
* http://dl.acm.org/citation.cfm?id=275324.
*
* Throughout we use the convention z = x + I*y.
*
* casinh(z) = sign(x)*log(A+sqrt(A*A-1)) + I*asin(B)
* where
* A = (|z+I| + |z-I|) / 2
* B = (|z+I| - |z-I|) / 2 = y/A
*
* These formulas become numerically unstable:
* (a) for Re(casinh(z)) when z is close to the line segment [-I, I] (that
* is, Re(casinh(z)) is close to 0);
* (b) for Im(casinh(z)) when z is close to either of the intervals
* [I, I*infinity) or (-I*infinity, -I] (that is, |Im(casinh(z))| is
* close to PI/2).
*
* These numerical problems are overcome by defining
* f(a, b) = (hypot(a, b) - b) / 2 = a*a / (hypot(a, b) + b) / 2
* Then if A < A_crossover, we use
* log(A + sqrt(A*A-1)) = log1p((A-1) + sqrt((A-1)*(A+1)))
* A-1 = f(x, 1+y) + f(x, 1-y)
* and if B > B_crossover, we use
* asin(B) = atan2(y, sqrt(A*A - y*y)) = atan2(y, sqrt((A+y)*(A-y)))
* A-y = f(x, y+1) + f(x, y-1)
* where without loss of generality we have assumed that x and y are
* non-negative.
*
* Much of the difficulty comes because the intermediate computations may
* produce overflows or underflows. This is dealt with in the paper by Hull
* et al by using exception handling. We do this by detecting when
* computations risk underflow or overflow. The hardest part is handling the
* underflows when computing f(a, b).
*
* Note that the function f(a, b) does not appear explicitly in the paper by
* Hull et al, but the idea may be found on pages 308 and 309. Introducing the
* function f(a, b) allows us to concentrate many of the clever tricks in this
* paper into one function.
*/
/*
* Function f(a, b, hypot_a_b) = (hypot(a, b) - b) / 2.
* Pass hypot(a, b) as the third argument.
*/
static inline double
f(double a, double b, double hypot_a_b)
{
if (b < 0)
return ((hypot_a_b - b) / 2);
if (b == 0)
return (a / 2);
return (a * a / (hypot_a_b + b) / 2);
}
/*
* All the hard work is contained in this function.
* x and y are assumed positive or zero, and less than RECIP_EPSILON.
* Upon return:
* rx = Re(casinh(z)) = -Im(cacos(y + I*x)).
* B_is_usable is set to 1 if the value of B is usable.
* If B_is_usable is set to 0, sqrt_A2my2 = sqrt(A*A - y*y), and new_y = y.
* If returning sqrt_A2my2 has potential to result in an underflow, it is
* rescaled, and new_y is similarly rescaled.
*/
static inline void
do_hard_work(double x, double y, double *rx, int *B_is_usable, double *B,
double *sqrt_A2my2, double *new_y)
{
double R, S, A; /* A, B, R, and S are as in Hull et al. */
double Am1, Amy; /* A-1, A-y. */
R = hypot(x, y + 1); /* |z+I| */
S = hypot(x, y - 1); /* |z-I| */
/* A = (|z+I| + |z-I|) / 2 */
A = (R + S) / 2;
/*
* Mathematically A >= 1. There is a small chance that this will not
* be so because of rounding errors. So we will make certain it is
* so.
*/
if (A < 1)
A = 1;
if (A < A_crossover) {
/*
* Am1 = fp + fm, where fp = f(x, 1+y), and fm = f(x, 1-y).
* rx = log1p(Am1 + sqrt(Am1*(A+1)))
*/
if (y == 1 && x < DBL_EPSILON * DBL_EPSILON / 128) {
/*
* fp is of order x^2, and fm = x/2.
* A = 1 (inexactly).
*/
*rx = sqrt(x);
} else if (x >= DBL_EPSILON * fabs(y - 1)) {
/*
* Underflow will not occur because
* x >= DBL_EPSILON^2/128 >= FOUR_SQRT_MIN
*/
Am1 = f(x, 1 + y, R) + f(x, 1 - y, S);
*rx = log1p(Am1 + sqrt(Am1 * (A + 1)));
} else if (y < 1) {
/*
* fp = x*x/(1+y)/4, fm = x*x/(1-y)/4, and
* A = 1 (inexactly).
*/
*rx = x / sqrt((1 - y) * (1 + y));
} else { /* if (y > 1) */
/*
* A-1 = y-1 (inexactly).
*/
*rx = log1p((y - 1) + sqrt((y - 1) * (y + 1)));
}
} else {
*rx = log(A + sqrt(A * A - 1));
}
*new_y = y;
if (y < FOUR_SQRT_MIN) {
/*
* Avoid a possible underflow caused by y/A. For casinh this
* would be legitimate, but will be picked up by invoking atan2
* later on. For cacos this would not be legitimate.
*/
*B_is_usable = 0;
*sqrt_A2my2 = A * (2 / DBL_EPSILON);
*new_y = y * (2 / DBL_EPSILON);
return;
}
/* B = (|z+I| - |z-I|) / 2 = y/A */
*B = y / A;
*B_is_usable = 1;
if (*B > B_crossover) {
*B_is_usable = 0;
/*
* Amy = fp + fm, where fp = f(x, y+1), and fm = f(x, y-1).
* sqrt_A2my2 = sqrt(Amy*(A+y))
*/
if (y == 1 && x < DBL_EPSILON / 128) {
/*
* fp is of order x^2, and fm = x/2.
* A = 1 (inexactly).
*/
*sqrt_A2my2 = sqrt(x) * sqrt((A + y) / 2);
} else if (x >= DBL_EPSILON * fabs(y - 1)) {
/*
* Underflow will not occur because
* x >= DBL_EPSILON/128 >= FOUR_SQRT_MIN
* and
* x >= DBL_EPSILON^2 >= FOUR_SQRT_MIN
*/
Amy = f(x, y + 1, R) + f(x, y - 1, S);
*sqrt_A2my2 = sqrt(Amy * (A + y));
} else if (y > 1) {
/*
* fp = x*x/(y+1)/4, fm = x*x/(y-1)/4, and
* A = y (inexactly).
*
* y < RECIP_EPSILON. So the following
* scaling should avoid any underflow problems.
*/
*sqrt_A2my2 = x * (4 / DBL_EPSILON / DBL_EPSILON) * y /
sqrt((y + 1) * (y - 1));
*new_y = y * (4 / DBL_EPSILON / DBL_EPSILON);
} else { /* if (y < 1) */
/*
* fm = 1-y >= DBL_EPSILON, fp is of order x^2, and
* A = 1 (inexactly).
*/
*sqrt_A2my2 = sqrt((1 - y) * (1 + y));
}
}
}
/*
* casinh(z) = z + O(z^3) as z -> 0
*
* casinh(z) = sign(x)*clog(sign(x)*z) + O(1/z^2) as z -> infinity
* The above formula works for the imaginary part as well, because
* Im(casinh(z)) = sign(x)*atan2(sign(x)*y, fabs(x)) + O(y/z^3)
* as z -> infinity, uniformly in y
*/
double complex
casinh(double complex z)
{
double x, y, ax, ay, rx, ry, B, sqrt_A2my2, new_y;
int B_is_usable;
double complex w;
x = creal(z);
y = cimag(z);
ax = fabs(x);
ay = fabs(y);
if (isnan(x) || isnan(y)) {
/* casinh(+-Inf + I*NaN) = +-Inf + I*NaN */
if (isinf(x))
return (CMPLX(x, y + y));
/* casinh(NaN + I*+-Inf) = opt(+-)Inf + I*NaN */
if (isinf(y))
return (CMPLX(y, x + x));
/* casinh(NaN + I*0) = NaN + I*0 */
if (y == 0)
return (CMPLX(x + x, y));
/*
* All other cases involving NaN return NaN + I*NaN.
* C99 leaves it optional whether to raise invalid if one of
* the arguments is not NaN, so we opt not to raise it.
*/
return (CMPLX(nan_mix(x, y), nan_mix(x, y)));
}
if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) {
/* clog...() will raise inexact unless x or y is infinite. */
if (signbit(x) == 0)
w = clog_for_large_values(z) + m_ln2;
else
w = clog_for_large_values(-z) + m_ln2;
return (CMPLX(copysign(creal(w), x), copysign(cimag(w), y)));
}
/* Avoid spuriously raising inexact for z = 0. */
if (x == 0 && y == 0)
return (z);
/* All remaining cases are inexact. */
raise_inexact();
if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4)
return (z);
do_hard_work(ax, ay, &rx, &B_is_usable, &B, &sqrt_A2my2, &new_y);
if (B_is_usable)
ry = asin(B);
else
ry = atan2(new_y, sqrt_A2my2);
return (CMPLX(copysign(rx, x), copysign(ry, y)));
}
/*
* casin(z) = reverse(casinh(reverse(z)))
* where reverse(x + I*y) = y + I*x = I*conj(z).
*/
double complex
casin(double complex z)
{
double complex w = casinh(CMPLX(cimag(z), creal(z)));
return (CMPLX(cimag(w), creal(w)));
}
/*
* cacos(z) = PI/2 - casin(z)
* but do the computation carefully so cacos(z) is accurate when z is
* close to 1.
*
* cacos(z) = PI/2 - z + O(z^3) as z -> 0
*
* cacos(z) = -sign(y)*I*clog(z) + O(1/z^2) as z -> infinity
* The above formula works for the real part as well, because
* Re(cacos(z)) = atan2(fabs(y), x) + O(y/z^3)
* as z -> infinity, uniformly in y
*/
double complex
cacos(double complex z)
{
double x, y, ax, ay, rx, ry, B, sqrt_A2mx2, new_x;
int sx, sy;
int B_is_usable;
double complex w;
x = creal(z);
y = cimag(z);
sx = signbit(x);
sy = signbit(y);
ax = fabs(x);
ay = fabs(y);
if (isnan(x) || isnan(y)) {
/* cacos(+-Inf + I*NaN) = NaN + I*opt(-)Inf */
if (isinf(x))
return (CMPLX(y + y, -INFINITY));
/* cacos(NaN + I*+-Inf) = NaN + I*-+Inf */
if (isinf(y))
return (CMPLX(x + x, -y));
/* cacos(0 + I*NaN) = PI/2 + I*NaN with inexact */
if (x == 0)
return (CMPLX(pio2_hi + pio2_lo, y + y));
/*
* All other cases involving NaN return NaN + I*NaN.
* C99 leaves it optional whether to raise invalid if one of
* the arguments is not NaN, so we opt not to raise it.
*/
return (CMPLX(nan_mix(x, y), nan_mix(x, y)));
}
if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) {
/* clog...() will raise inexact unless x or y is infinite. */
w = clog_for_large_values(z);
rx = fabs(cimag(w));
ry = creal(w) + m_ln2;
if (sy == 0)
ry = -ry;
return (CMPLX(rx, ry));
}
/* Avoid spuriously raising inexact for z = 1. */
if (x == 1 && y == 0)
return (CMPLX(0, -y));
/* All remaining cases are inexact. */
raise_inexact();
if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4)
return (CMPLX(pio2_hi - (x - pio2_lo), -y));
do_hard_work(ay, ax, &ry, &B_is_usable, &B, &sqrt_A2mx2, &new_x);
if (B_is_usable) {
if (sx == 0)
rx = acos(B);
else
rx = acos(-B);
} else {
if (sx == 0)
rx = atan2(sqrt_A2mx2, new_x);
else
rx = atan2(sqrt_A2mx2, -new_x);
}
if (sy == 0)
ry = -ry;
return (CMPLX(rx, ry));
}
/*
* cacosh(z) = I*cacos(z) or -I*cacos(z)
* where the sign is chosen so Re(cacosh(z)) >= 0.
*/
double complex
cacosh(double complex z)
{
double complex w;
double rx, ry;
w = cacos(z);
rx = creal(w);
ry = cimag(w);
/* cacosh(NaN + I*NaN) = NaN + I*NaN */
if (isnan(rx) && isnan(ry))
return (CMPLX(ry, rx));
/* cacosh(NaN + I*+-Inf) = +Inf + I*NaN */
/* cacosh(+-Inf + I*NaN) = +Inf + I*NaN */
if (isnan(rx))
return (CMPLX(fabs(ry), rx));
/* cacosh(0 + I*NaN) = NaN + I*NaN */
if (isnan(ry))
return (CMPLX(ry, ry));
return (CMPLX(fabs(ry), copysign(rx, cimag(z))));
}
/*
* Optimized version of clog() for |z| finite and larger than ~RECIP_EPSILON.
*/
static double complex
clog_for_large_values(double complex z)
{
double x, y;
double ax, ay, t;
x = creal(z);
y = cimag(z);
ax = fabs(x);
ay = fabs(y);
if (ax < ay) {
t = ax;
ax = ay;
ay = t;
}
/*
* Avoid overflow in hypot() when x and y are both very large.
* Divide x and y by E, and then add 1 to the logarithm. This
* depends on E being larger than sqrt(2), since the return value of
* hypot cannot overflow if neither argument is greater in magnitude
* than 1/sqrt(2) of the maximum value of the return type. Likewise
* this determines the necessary threshold for using this method
* (however, actually use 1/2 instead as it is simpler).
*
* Dividing by E causes an insignificant loss of accuracy; however
* this method is still poor since it is uneccessarily slow.
*/
if (ax > DBL_MAX / 2)
return (CMPLX(log(hypot(x / m_e, y / m_e)) + 1, atan2(y, x)));
/*
* Avoid overflow when x or y is large. Avoid underflow when x or
* y is small.
*/
if (ax > QUARTER_SQRT_MAX || ay < SQRT_MIN)
return (CMPLX(log(hypot(x, y)), atan2(y, x)));
return (CMPLX(log(ax * ax + ay * ay) / 2, atan2(y, x)));
}
/*
* =================
* | catanh, catan |
* =================
*/
/*
* sum_squares(x,y) = x*x + y*y (or just x*x if y*y would underflow).
* Assumes x*x and y*y will not overflow.
* Assumes x and y are finite.
* Assumes y is non-negative.
* Assumes fabs(x) >= DBL_EPSILON.
*/
static inline double
sum_squares(double x, double y)
{
/* Avoid underflow when y is small. */
if (y < SQRT_MIN)
return (x * x);
return (x * x + y * y);
}
/*
* real_part_reciprocal(x, y) = Re(1/(x+I*y)) = x/(x*x + y*y).
* Assumes x and y are not NaN, and one of x and y is larger than
* RECIP_EPSILON. We avoid unwarranted underflow. It is important to not use
* the code creal(1/z), because the imaginary part may produce an unwanted
* underflow.
* This is only called in a context where inexact is always raised before
* the call, so no effort is made to avoid or force inexact.
*/
static inline double
real_part_reciprocal(double x, double y)
{
double scale;
uint32_t hx, hy;
int32_t ix, iy;
/*
* This code is inspired by the C99 document n1124.pdf, Section G.5.1,
* example 2.
*/
GET_HIGH_WORD(hx, x);
ix = hx & 0x7ff00000;
GET_HIGH_WORD(hy, y);
iy = hy & 0x7ff00000;
#undef BIAS
#define BIAS (DBL_MAX_EXP - 1)
/* XXX more guard digits are useful iff there is extra precision. */
#define CUTOFF (DBL_MANT_DIG / 2 + 1) /* just half or 1 guard digit */
if (ix - iy >= CUTOFF << 20 || isinf(x))
return (1 / x); /* +-Inf -> +-0 is special */
if (iy - ix >= CUTOFF << 20)
return (x / y / y); /* should avoid double div, but hard */
if (ix <= (BIAS + DBL_MAX_EXP / 2 - CUTOFF) << 20)
return (x / (x * x + y * y));
scale = 1;
SET_HIGH_WORD(scale, 0x7ff00000 - ix); /* 2**(1-ilogb(x)) */
x *= scale;
y *= scale;
return (x / (x * x + y * y) * scale);
}
/*
* catanh(z) = log((1+z)/(1-z)) / 2
* = log1p(4*x / |z-1|^2) / 4
* + I * atan2(2*y, (1-x)*(1+x)-y*y) / 2
*
* catanh(z) = z + O(z^3) as z -> 0
*
* catanh(z) = 1/z + sign(y)*I*PI/2 + O(1/z^3) as z -> infinity
* The above formula works for the real part as well, because
* Re(catanh(z)) = x/|z|^2 + O(x/z^4)
* as z -> infinity, uniformly in x
*/
double complex
catanh(double complex z)
{
double x, y, ax, ay, rx, ry;
x = creal(z);
y = cimag(z);
ax = fabs(x);
ay = fabs(y);
/* This helps handle many cases. */
if (y == 0 && ax <= 1)
return (CMPLX(atanh(x), y));
/* To ensure the same accuracy as atan(), and to filter out z = 0. */
if (x == 0)
return (CMPLX(x, atan(y)));
if (isnan(x) || isnan(y)) {
/* catanh(+-Inf + I*NaN) = +-0 + I*NaN */
if (isinf(x))
return (CMPLX(copysign(0, x), y + y));
/* catanh(NaN + I*+-Inf) = sign(NaN)0 + I*+-PI/2 */
if (isinf(y))
return (CMPLX(copysign(0, x),
copysign(pio2_hi + pio2_lo, y)));
/*
* All other cases involving NaN return NaN + I*NaN.
* C99 leaves it optional whether to raise invalid if one of
* the arguments is not NaN, so we opt not to raise it.
*/
return (CMPLX(nan_mix(x, y), nan_mix(x, y)));
}
if (ax > RECIP_EPSILON || ay > RECIP_EPSILON)
return (CMPLX(real_part_reciprocal(x, y),
copysign(pio2_hi + pio2_lo, y)));
if (ax < SQRT_3_EPSILON / 2 && ay < SQRT_3_EPSILON / 2) {
/*
* z = 0 was filtered out above. All other cases must raise
* inexact, but this is the only case that needs to do it
* explicitly.
*/
raise_inexact();
return (z);
}
if (ax == 1 && ay < DBL_EPSILON)
rx = (m_ln2 - log(ay)) / 2;
else
rx = log1p(4 * ax / sum_squares(ax - 1, ay)) / 4;
if (ax == 1)
ry = atan2(2, -ay) / 2;
else if (ay < DBL_EPSILON)
ry = atan2(2 * ay, (1 - ax) * (1 + ax)) / 2;
else
ry = atan2(2 * ay, (1 - ax) * (1 + ax) - ay * ay) / 2;
return (CMPLX(copysign(rx, x), copysign(ry, y)));
}
/*
* catan(z) = reverse(catanh(reverse(z)))
* where reverse(x + I*y) = y + I*x = I*conj(z).
*/
double complex
catan(double complex z)
{
double complex w = catanh(CMPLX(cimag(z), creal(z)));
return (CMPLX(cimag(w), creal(w)));
}
#if LDBL_MANT_DIG == 53
__weak_reference(cacosh, cacoshl);
__weak_reference(cacos, cacosl);
__weak_reference(casinh, casinhl);
__weak_reference(casin, casinl);
__weak_reference(catanh, catanhl);
__weak_reference(catan, catanl);
#endif

377
libc/tinymath/catrigf.c Normal file
View file

@ -0,0 +1,377 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
FreeBSD lib/msun/src/catrigf.c
Copyright (c) 2012 Stephen Montgomery-Smith <stephen@FreeBSD.ORG>
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
*/
#include "libc/tinymath/freebsd.internal.h"
__static_yoink("freebsd_complex_notice");
#undef isinf
#define isinf(x) (fabsf(x) == INFINITY)
#undef isnan
#define isnan(x) ((x) != (x))
#define raise_inexact() do { volatile float _j = 1 + tiny; (void)_j; } while(0)
#undef signbit
#define signbit(x) (__builtin_signbitf(x))
static const float
A_crossover = 10,
B_crossover = 0.6417,
FOUR_SQRT_MIN = 0x1p-61,
QUARTER_SQRT_MAX = 0x1p61,
m_e = 2.7182818285e0, /* 0xadf854.0p-22 */
m_ln2 = 6.9314718056e-1, /* 0xb17218.0p-24 */
pio2_hi = 1.5707962513e0, /* 0xc90fda.0p-23 */
RECIP_EPSILON = 1 / FLT_EPSILON,
SQRT_3_EPSILON = 5.9801995673e-4, /* 0x9cc471.0p-34 */
SQRT_6_EPSILON = 8.4572793338e-4, /* 0xddb3d7.0p-34 */
SQRT_MIN = 0x1p-63;
static const volatile float
pio2_lo = 7.5497899549e-8, /* 0xa22169.0p-47 */
tiny = 0x1p-100;
static float complex clog_for_large_values(float complex z);
static inline float
f(float a, float b, float hypot_a_b)
{
if (b < 0)
return ((hypot_a_b - b) / 2);
if (b == 0)
return (a / 2);
return (a * a / (hypot_a_b + b) / 2);
}
static inline void
do_hard_work(float x, float y, float *rx, int *B_is_usable, float *B,
float *sqrt_A2my2, float *new_y)
{
float R, S, A;
float Am1, Amy;
R = hypotf(x, y + 1);
S = hypotf(x, y - 1);
A = (R + S) / 2;
if (A < 1)
A = 1;
if (A < A_crossover) {
if (y == 1 && x < FLT_EPSILON * FLT_EPSILON / 128) {
*rx = sqrtf(x);
} else if (x >= FLT_EPSILON * fabsf(y - 1)) {
Am1 = f(x, 1 + y, R) + f(x, 1 - y, S);
*rx = log1pf(Am1 + sqrtf(Am1 * (A + 1)));
} else if (y < 1) {
*rx = x / sqrtf((1 - y) * (1 + y));
} else {
*rx = log1pf((y - 1) + sqrtf((y - 1) * (y + 1)));
}
} else {
*rx = logf(A + sqrtf(A * A - 1));
}
*new_y = y;
if (y < FOUR_SQRT_MIN) {
*B_is_usable = 0;
*sqrt_A2my2 = A * (2 / FLT_EPSILON);
*new_y = y * (2 / FLT_EPSILON);
return;
}
*B = y / A;
*B_is_usable = 1;
if (*B > B_crossover) {
*B_is_usable = 0;
if (y == 1 && x < FLT_EPSILON / 128) {
*sqrt_A2my2 = sqrtf(x) * sqrtf((A + y) / 2);
} else if (x >= FLT_EPSILON * fabsf(y - 1)) {
Amy = f(x, y + 1, R) + f(x, y - 1, S);
*sqrt_A2my2 = sqrtf(Amy * (A + y));
} else if (y > 1) {
*sqrt_A2my2 = x * (4 / FLT_EPSILON / FLT_EPSILON) * y /
sqrtf((y + 1) * (y - 1));
*new_y = y * (4 / FLT_EPSILON / FLT_EPSILON);
} else {
*sqrt_A2my2 = sqrtf((1 - y) * (1 + y));
}
}
}
float complex
casinhf(float complex z)
{
float x, y, ax, ay, rx, ry, B, sqrt_A2my2, new_y;
int B_is_usable;
float complex w;
x = crealf(z);
y = cimagf(z);
ax = fabsf(x);
ay = fabsf(y);
if (isnan(x) || isnan(y)) {
if (isinf(x))
return (CMPLXF(x, y + y));
if (isinf(y))
return (CMPLXF(y, x + x));
if (y == 0)
return (CMPLXF(x + x, y));
return (CMPLXF(nan_mix(x, y), nan_mix(x, y)));
}
if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) {
if (signbit(x) == 0)
w = clog_for_large_values(z) + m_ln2;
else
w = clog_for_large_values(-z) + m_ln2;
return (CMPLXF(copysignf(crealf(w), x),
copysignf(cimagf(w), y)));
}
if (x == 0 && y == 0)
return (z);
raise_inexact();
if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4)
return (z);
do_hard_work(ax, ay, &rx, &B_is_usable, &B, &sqrt_A2my2, &new_y);
if (B_is_usable)
ry = asinf(B);
else
ry = atan2f(new_y, sqrt_A2my2);
return (CMPLXF(copysignf(rx, x), copysignf(ry, y)));
}
float complex
casinf(float complex z)
{
float complex w = casinhf(CMPLXF(cimagf(z), crealf(z)));
return (CMPLXF(cimagf(w), crealf(w)));
}
float complex
cacosf(float complex z)
{
float x, y, ax, ay, rx, ry, B, sqrt_A2mx2, new_x;
int sx, sy;
int B_is_usable;
float complex w;
x = crealf(z);
y = cimagf(z);
sx = signbit(x);
sy = signbit(y);
ax = fabsf(x);
ay = fabsf(y);
if (isnan(x) || isnan(y)) {
if (isinf(x))
return (CMPLXF(y + y, -INFINITY));
if (isinf(y))
return (CMPLXF(x + x, -y));
if (x == 0)
return (CMPLXF(pio2_hi + pio2_lo, y + y));
return (CMPLXF(nan_mix(x, y), nan_mix(x, y)));
}
if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) {
w = clog_for_large_values(z);
rx = fabsf(cimagf(w));
ry = crealf(w) + m_ln2;
if (sy == 0)
ry = -ry;
return (CMPLXF(rx, ry));
}
if (x == 1 && y == 0)
return (CMPLXF(0, -y));
raise_inexact();
if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4)
return (CMPLXF(pio2_hi - (x - pio2_lo), -y));
do_hard_work(ay, ax, &ry, &B_is_usable, &B, &sqrt_A2mx2, &new_x);
if (B_is_usable) {
if (sx == 0)
rx = acosf(B);
else
rx = acosf(-B);
} else {
if (sx == 0)
rx = atan2f(sqrt_A2mx2, new_x);
else
rx = atan2f(sqrt_A2mx2, -new_x);
}
if (sy == 0)
ry = -ry;
return (CMPLXF(rx, ry));
}
float complex
cacoshf(float complex z)
{
float complex w;
float rx, ry;
w = cacosf(z);
rx = crealf(w);
ry = cimagf(w);
if (isnan(rx) && isnan(ry))
return (CMPLXF(ry, rx));
if (isnan(rx))
return (CMPLXF(fabsf(ry), rx));
if (isnan(ry))
return (CMPLXF(ry, ry));
return (CMPLXF(fabsf(ry), copysignf(rx, cimagf(z))));
}
static float complex
clog_for_large_values(float complex z)
{
float x, y;
float ax, ay, t;
x = crealf(z);
y = cimagf(z);
ax = fabsf(x);
ay = fabsf(y);
if (ax < ay) {
t = ax;
ax = ay;
ay = t;
}
if (ax > FLT_MAX / 2)
return (CMPLXF(logf(hypotf(x / m_e, y / m_e)) + 1,
atan2f(y, x)));
if (ax > QUARTER_SQRT_MAX || ay < SQRT_MIN)
return (CMPLXF(logf(hypotf(x, y)), atan2f(y, x)));
return (CMPLXF(logf(ax * ax + ay * ay) / 2, atan2f(y, x)));
}
static inline float
sum_squares(float x, float y)
{
if (y < SQRT_MIN)
return (x * x);
return (x * x + y * y);
}
static inline float
real_part_reciprocal(float x, float y)
{
float scale;
uint32_t hx, hy;
int32_t ix, iy;
GET_FLOAT_WORD(hx, x);
ix = hx & 0x7f800000;
GET_FLOAT_WORD(hy, y);
iy = hy & 0x7f800000;
#undef BIAS
#define BIAS (FLT_MAX_EXP - 1)
#define CUTOFF (FLT_MANT_DIG / 2 + 1)
if (ix - iy >= CUTOFF << 23 || isinf(x))
return (1 / x);
if (iy - ix >= CUTOFF << 23)
return (x / y / y);
if (ix <= (BIAS + FLT_MAX_EXP / 2 - CUTOFF) << 23)
return (x / (x * x + y * y));
SET_FLOAT_WORD(scale, 0x7f800000 - ix);
x *= scale;
y *= scale;
return (x / (x * x + y * y) * scale);
}
float complex
catanhf(float complex z)
{
float x, y, ax, ay, rx, ry;
x = crealf(z);
y = cimagf(z);
ax = fabsf(x);
ay = fabsf(y);
if (y == 0 && ax <= 1)
return (CMPLXF(atanhf(x), y));
if (x == 0)
return (CMPLXF(x, atanf(y)));
if (isnan(x) || isnan(y)) {
if (isinf(x))
return (CMPLXF(copysignf(0, x), y + y));
if (isinf(y))
return (CMPLXF(copysignf(0, x),
copysignf(pio2_hi + pio2_lo, y)));
return (CMPLXF(nan_mix(x, y), nan_mix(x, y)));
}
if (ax > RECIP_EPSILON || ay > RECIP_EPSILON)
return (CMPLXF(real_part_reciprocal(x, y),
copysignf(pio2_hi + pio2_lo, y)));
if (ax < SQRT_3_EPSILON / 2 && ay < SQRT_3_EPSILON / 2) {
raise_inexact();
return (z);
}
if (ax == 1 && ay < FLT_EPSILON)
rx = (m_ln2 - logf(ay)) / 2;
else
rx = log1pf(4 * ax / sum_squares(ax - 1, ay)) / 4;
if (ax == 1)
ry = atan2f(2, -ay) / 2;
else if (ay < FLT_EPSILON)
ry = atan2f(2 * ay, (1 - ax) * (1 + ax)) / 2;
else
ry = atan2f(2 * ay, (1 - ax) * (1 + ax) - ay * ay) / 2;
return (CMPLXF(copysignf(rx, x), copysignf(ry, y)));
}
float complex
catanf(float complex z)
{
float complex w = catanhf(CMPLXF(cimagf(z), crealf(z)));
return (CMPLXF(cimagf(w), crealf(w)));
}

View file

@ -78,7 +78,6 @@ __static_yoink("fdlibm_notice");
/** /**
* Returns cosine of 𝑥. * Returns cosine of 𝑥.
* @note should take ~5ns
*/ */
double cos(double x) double cos(double x)
{ {

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Musl Libc Optimized Routines
Copyright © 2005-2014 Rich Felker, et al. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the
@ -25,92 +25,63 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "libc/math.h" #include "libc/tinymath/sincosf.internal.h"
#include "libc/tinymath/complex.internal.h" __static_yoink("arm_optimized_routines_notice");
#include "libc/tinymath/feval.internal.h"
#include "libc/tinymath/kernel.internal.h"
__static_yoink("freebsd_libm_notice");
__static_yoink("musl_libc_notice");
__static_yoink("fdlibm_notice");
/* origin: FreeBSD /usr/src/lib/msun/src/s_cosf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
* Optimized by Bruce D. Evans.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Small multiples of pi/2 rounded to double precision. */
static const double
c1pio2 = 1*M_PI_2, /* 0x3FF921FB, 0x54442D18 */
c2pio2 = 2*M_PI_2, /* 0x400921FB, 0x54442D18 */
c3pio2 = 3*M_PI_2, /* 0x4012D97C, 0x7F3321D2 */
c4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */
/** /**
* Returns cosine of 𝑥. * Returns cosine of y.
* @note should take about 5ns *
* This is a fast cosf implementation. The worst-case ULP is 0.5607, and
* the maximum relative error is 0.5303 * 2^-23. A single-step range
* reduction is used for small values. Large inputs have their range
* reduced using fast integer arithmetic.
*
* @raise EDOM and FE_INVALID if y is an infinity
*/ */
float cosf(float x) float
cosf (float y)
{ {
double y; double x = y;
uint32_t ix; double s;
unsigned n, sign; int n;
const sincos_t *p = &__sincosf_table[0];
GET_FLOAT_WORD(ix, x); if (abstop12 (y) < abstop12 (pio4f))
sign = ix >> 31; {
ix &= 0x7fffffff; double x2 = x * x;
if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */ if (unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
if (ix < 0x39800000) { /* |x| < 2**-12 */ return 1.0f;
/* raise inexact if x != 0 */
FORCE_EVAL(x + 0x1p120f);
return 1.0f;
}
return __cosdf(x);
}
if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */
if (ix > 0x4016cbe3) /* |x| ~> 3*pi/4 */
return -__cosdf(sign ? x+c2pio2 : x-c2pio2);
else {
if (sign)
return __sindf(x + c1pio2);
else
return __sindf(c1pio2 - x);
}
}
if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */
if (ix > 0x40afeddf) /* |x| ~> 7*pi/4 */
return __cosdf(sign ? x+c4pio2 : x-c4pio2);
else {
if (sign)
return __sindf(-x - c3pio2);
else
return __sindf(x - c3pio2);
}
}
/* cos(Inf or NaN) is NaN */ return sinf_poly (x, x2, p, 1);
if (ix >= 0x7f800000) }
return x-x; else if (likely (abstop12 (y) < abstop12 (120.0f)))
{
x = reduce_fast (x, p, &n);
/* general argument reduction needed */ /* Setup the signs for sin and cos. */
n = __rem_pio2f(x,&y); s = p->sign[n & 3];
switch (n&3) {
case 0: return __cosdf(y); if (n & 2)
case 1: return __sindf(-y); p = &__sincosf_table[1];
case 2: return -__cosdf(y);
default: return sinf_poly (x * s, x * x, p, n ^ 1);
return __sindf(y); }
} else if (abstop12 (y) < abstop12 (INFINITY))
{
uint32_t xi = asuint (y);
int sign = xi >> 31;
x = reduce_large (xi, &n);
/* Setup signs for sin and cos - include original sign. */
s = p->sign[(n + sign) & 3];
if ((n + sign) & 2)
p = &__sincosf_table[1];
return sinf_poly (x * s, x * x, p, n ^ 1);
}
else
return __math_invalidf (y);
} }

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Musl Libc Optimized Routines
Copyright © 2005-2014 Rich Felker, et al. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the
@ -25,310 +25,247 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "libc/math.h" #include "libc/tinymath/arm.internal.h"
__static_yoink("musl_libc_notice"); __static_yoink("arm_optimized_routines_notice");
__static_yoink("fdlibm_notice");
/* origin: FreeBSD /usr/src/lib/msun/src/s_erf.c */ #define TwoOverSqrtPiMinusOne 0x1.06eba8214db69p-3
/* #define C 0x1.b0ac16p-1
* ==================================================== #define PA __erf_data.erf_poly_A
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. #define NA __erf_data.erf_ratio_N_A
* #define DA __erf_data.erf_ratio_D_A
* Developed at SunPro, a Sun Microsystems, Inc. business. #define NB __erf_data.erf_ratio_N_B
* Permission to use, copy, modify, and distribute this #define DB __erf_data.erf_ratio_D_B
* software is freely granted, provided that this notice #define PC __erf_data.erfc_poly_C
* is preserved. #define PD __erf_data.erfc_poly_D
* ==================================================== #define PE __erf_data.erfc_poly_E
*/ #define PF __erf_data.erfc_poly_F
/* double erf(double x)
* double erfc(double x)
* x
* 2 |\
* erf(x) = --------- | exp(-t*t)dt
* sqrt(pi) \|
* 0
*
* erfc(x) = 1-erf(x)
* Note that
* erf(-x) = -erf(x)
* erfc(-x) = 2 - erfc(x)
*
* Method:
* 1. For |x| in [0, 0.84375]
* erf(x) = x + x*R(x^2)
* erfc(x) = 1 - erf(x) if x in [-.84375,0.25]
* = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375]
* where R = P/Q where P is an odd poly of degree 8 and
* Q is an odd poly of degree 10.
* -57.90
* | R - (erf(x)-x)/x | <= 2
*
*
* Remark. The formula is derived by noting
* erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....)
* and that
* 2/sqrt(pi) = 1.128379167095512573896158903121545171688
* is close to one. The interval is chosen because the fix
* point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is
* near 0.6174), and by some experiment, 0.84375 is chosen to
* guarantee the error is less than one ulp for erf.
*
* 2. For |x| in [0.84375,1.25], let s = |x| - 1, and
* c = 0.84506291151 rounded to single (24 bits)
* erf(x) = sign(x) * (c + P1(s)/Q1(s))
* erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0
* 1+(c+P1(s)/Q1(s)) if x < 0
* |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06
* Remark: here we use the taylor series expansion at x=1.
* erf(1+s) = erf(1) + s*Poly(s)
* = 0.845.. + P1(s)/Q1(s)
* That is, we use rational approximation to approximate
* erf(1+s) - (c = (single)0.84506291151)
* Note that |P1/Q1|< 0.078 for x in [0.84375,1.25]
* where
* P1(s) = degree 6 poly in s
* Q1(s) = degree 6 poly in s
*
* 3. For x in [1.25,1/0.35(~2.857143)],
* erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1)
* erf(x) = 1 - erfc(x)
* where
* R1(z) = degree 7 poly in z, (z=1/x^2)
* S1(z) = degree 8 poly in z
*
* 4. For x in [1/0.35,28]
* erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0
* = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6<x<0
* = 2.0 - tiny (if x <= -6)
* erf(x) = sign(x)*(1.0 - erfc(x)) if x < 6, else
* erf(x) = sign(x)*(1.0 - tiny)
* where
* R2(z) = degree 6 poly in z, (z=1/x^2)
* S2(z) = degree 7 poly in z
*
* Note1:
* To compute exp(-x*x-0.5625+R/S), let s be a single
* precision number and s := x; then
* -x*x = -s*s + (s-x)*(s+x)
* exp(-x*x-0.5626+R/S) =
* exp(-s*s-0.5625)*exp((s-x)*(s+x)+R/S);
* Note2:
* Here 4 and 5 make use of the asymptotic series
* exp(-x*x)
* erfc(x) ~ ---------- * ( 1 + Poly(1/x^2) )
* x*sqrt(pi)
* We use rational approximation to approximate
* g(s)=f(1/x^2) = log(erfc(x)*x) - x*x + 0.5625
* Here is the error bound for R1/S1 and R2/S2
* |R1/S1 - f(x)| < 2**(-62.57)
* |R2/S2 - f(x)| < 2**(-61.52)
*
* 5. For inf > x >= 28
* erf(x) = sign(x) *(1 - tiny) (raise inexact)
* erfc(x) = tiny*tiny (raise underflow) if x > 0
* = 2 - tiny if x<0
*
* 7. Special case:
* erf(0) = 0, erf(inf) = 1, erf(-inf) = -1,
* erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2,
* erfc/erf(NaN) is NaN
*/
static const double /* Top 32 bits of a double. */
erx = 8.45062911510467529297e-01, /* 0x3FEB0AC1, 0x60000000 */ static inline uint32_t
/* top32 (double x)
* Coefficients for approximation to erf on [0,0.84375]
*/
efx8 = 1.02703333676410069053e+00, /* 0x3FF06EBA, 0x8214DB69 */
pp0 = 1.28379167095512558561e-01, /* 0x3FC06EBA, 0x8214DB68 */
pp1 = -3.25042107247001499370e-01, /* 0xBFD4CD7D, 0x691CB913 */
pp2 = -2.84817495755985104766e-02, /* 0xBF9D2A51, 0xDBD7194F */
pp3 = -5.77027029648944159157e-03, /* 0xBF77A291, 0x236668E4 */
pp4 = -2.37630166566501626084e-05, /* 0xBEF8EAD6, 0x120016AC */
qq1 = 3.97917223959155352819e-01, /* 0x3FD97779, 0xCDDADC09 */
qq2 = 6.50222499887672944485e-02, /* 0x3FB0A54C, 0x5536CEBA */
qq3 = 5.08130628187576562776e-03, /* 0x3F74D022, 0xC4D36B0F */
qq4 = 1.32494738004321644526e-04, /* 0x3F215DC9, 0x221C1A10 */
qq5 = -3.96022827877536812320e-06, /* 0xBED09C43, 0x42A26120 */
/*
* Coefficients for approximation to erf in [0.84375,1.25]
*/
pa0 = -2.36211856075265944077e-03, /* 0xBF6359B8, 0xBEF77538 */
pa1 = 4.14856118683748331666e-01, /* 0x3FDA8D00, 0xAD92B34D */
pa2 = -3.72207876035701323847e-01, /* 0xBFD7D240, 0xFBB8C3F1 */
pa3 = 3.18346619901161753674e-01, /* 0x3FD45FCA, 0x805120E4 */
pa4 = -1.10894694282396677476e-01, /* 0xBFBC6398, 0x3D3E28EC */
pa5 = 3.54783043256182359371e-02, /* 0x3FA22A36, 0x599795EB */
pa6 = -2.16637559486879084300e-03, /* 0xBF61BF38, 0x0A96073F */
qa1 = 1.06420880400844228286e-01, /* 0x3FBB3E66, 0x18EEE323 */
qa2 = 5.40397917702171048937e-01, /* 0x3FE14AF0, 0x92EB6F33 */
qa3 = 7.18286544141962662868e-02, /* 0x3FB2635C, 0xD99FE9A7 */
qa4 = 1.26171219808761642112e-01, /* 0x3FC02660, 0xE763351F */
qa5 = 1.36370839120290507362e-02, /* 0x3F8BEDC2, 0x6B51DD1C */
qa6 = 1.19844998467991074170e-02, /* 0x3F888B54, 0x5735151D */
/*
* Coefficients for approximation to erfc in [1.25,1/0.35]
*/
ra0 = -9.86494403484714822705e-03, /* 0xBF843412, 0x600D6435 */
ra1 = -6.93858572707181764372e-01, /* 0xBFE63416, 0xE4BA7360 */
ra2 = -1.05586262253232909814e+01, /* 0xC0251E04, 0x41B0E726 */
ra3 = -6.23753324503260060396e+01, /* 0xC04F300A, 0xE4CBA38D */
ra4 = -1.62396669462573470355e+02, /* 0xC0644CB1, 0x84282266 */
ra5 = -1.84605092906711035994e+02, /* 0xC067135C, 0xEBCCABB2 */
ra6 = -8.12874355063065934246e+01, /* 0xC0545265, 0x57E4D2F2 */
ra7 = -9.81432934416914548592e+00, /* 0xC023A0EF, 0xC69AC25C */
sa1 = 1.96512716674392571292e+01, /* 0x4033A6B9, 0xBD707687 */
sa2 = 1.37657754143519042600e+02, /* 0x4061350C, 0x526AE721 */
sa3 = 4.34565877475229228821e+02, /* 0x407B290D, 0xD58A1A71 */
sa4 = 6.45387271733267880336e+02, /* 0x40842B19, 0x21EC2868 */
sa5 = 4.29008140027567833386e+02, /* 0x407AD021, 0x57700314 */
sa6 = 1.08635005541779435134e+02, /* 0x405B28A3, 0xEE48AE2C */
sa7 = 6.57024977031928170135e+00, /* 0x401A47EF, 0x8E484A93 */
sa8 = -6.04244152148580987438e-02, /* 0xBFAEEFF2, 0xEE749A62 */
/*
* Coefficients for approximation to erfc in [1/.35,28]
*/
rb0 = -9.86494292470009928597e-03, /* 0xBF843412, 0x39E86F4A */
rb1 = -7.99283237680523006574e-01, /* 0xBFE993BA, 0x70C285DE */
rb2 = -1.77579549177547519889e+01, /* 0xC031C209, 0x555F995A */
rb3 = -1.60636384855821916062e+02, /* 0xC064145D, 0x43C5ED98 */
rb4 = -6.37566443368389627722e+02, /* 0xC083EC88, 0x1375F228 */
rb5 = -1.02509513161107724954e+03, /* 0xC0900461, 0x6A2E5992 */
rb6 = -4.83519191608651397019e+02, /* 0xC07E384E, 0x9BDC383F */
sb1 = 3.03380607434824582924e+01, /* 0x403E568B, 0x261D5190 */
sb2 = 3.25792512996573918826e+02, /* 0x40745CAE, 0x221B9F0A */
sb3 = 1.53672958608443695994e+03, /* 0x409802EB, 0x189D5118 */
sb4 = 3.19985821950859553908e+03, /* 0x40A8FFB7, 0x688C246A */
sb5 = 2.55305040643316442583e+03, /* 0x40A3F219, 0xCEDF3BE6 */
sb6 = 4.74528541206955367215e+02, /* 0x407DA874, 0xE79FE763 */
sb7 = -2.24409524465858183362e+01; /* 0xC03670E2, 0x42712D62 */
#define asuint(f) ((union{float _f; uint32_t _i;}){f})._i
#define asfloat(i) ((union{uint32_t _i; float _f;}){i})._f
#define asuint64(f) ((union{double _f; uint64_t _i;}){f})._i
#define asdouble(i) ((union{uint64_t _i; double _f;}){i})._f
#define INSERT_WORDS(d,hi,lo) \
do { \
(d) = asdouble(((uint64_t)(hi)<<32) | (uint32_t)(lo)); \
} while (0)
#define GET_HIGH_WORD(hi,d) \
do { \
(hi) = asuint64(d) >> 32; \
} while (0)
#define GET_LOW_WORD(lo,d) \
do { \
(lo) = (uint32_t)asuint64(d); \
} while (0)
#define SET_HIGH_WORD(d,hi) \
INSERT_WORDS(d, hi, (uint32_t)asuint64(d))
#define SET_LOW_WORD(d,lo) \
INSERT_WORDS(d, asuint64(d)>>32, lo)
static double erfc1(double x)
{ {
double_t s,P,Q; return asuint64 (x) >> 32;
s = fabs(x) - 1;
P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
Q = 1+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
return 1 - erx - P/Q;
}
static double erfc2(uint32_t ix, double x)
{
double_t s,R,S;
double z;
if (ix < 0x3ff40000) /* |x| < 1.25 */
return erfc1(x);
x = fabs(x);
s = 1/(x*x);
if (ix < 0x4006db6d) { /* |x| < 1/.35 ~ 2.85714 */
R = ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(
ra5+s*(ra6+s*ra7))))));
S = 1.0+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(
sa5+s*(sa6+s*(sa7+s*sa8)))))));
} else { /* |x| > 1/.35 */
R = rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(
rb5+s*rb6)))));
S = 1.0+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(
sb5+s*(sb6+s*sb7))))));
}
z = x;
SET_LOW_WORD(z,0);
return exp(-z*z-0.5625)*exp((z-x)*(z+x)+R/S)/x;
} }
/** /**
* Returns error function of 𝑥. * Returns error function of x.
*
* Highest measured error is 1.01 ULPs at 0x1.39956ac43382fp+0.
*
* @raise ERANGE on underflow
*/ */
double erf(double x) double
erf (double x)
{ {
double r,s,z,y; /* Get top word and sign. */
uint32_t ix; uint32_t ix = top32 (x);
int sign; uint32_t ia = ix & 0x7fffffff;
uint32_t sign = ix >> 31;
GET_HIGH_WORD(ix, x); /* Normalized and subnormal cases */
sign = ix>>31; if (ia < 0x3feb0000)
ix &= 0x7fffffff; { /* a = |x| < 0.84375. */
if (ix >= 0x7ff00000) {
/* erf(nan)=nan, erf(+-inf)=+-1 */ if (ia < 0x3e300000)
return 1-2*sign + 1/x; { /* a < 2^(-28). */
if (ia < 0x00800000)
{ /* a < 2^(-1015). */
double y = fma (TwoOverSqrtPiMinusOne, x, x);
return check_uflow (y);
}
return x + TwoOverSqrtPiMinusOne * x;
} }
if (ix < 0x3feb0000) { /* |x| < 0.84375 */
if (ix < 0x3e300000) { /* |x| < 2**-28 */ double x2 = x * x;
/* avoid underflow */
return 0.125*(8*x + efx8*x); if (ia < 0x3fe00000)
} { /* a < 0.5 - Use polynomial approximation. */
z = x*x; double r1 = fma (x2, PA[1], PA[0]);
r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4))); double r2 = fma (x2, PA[3], PA[2]);
s = 1.0+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5)))); double r3 = fma (x2, PA[5], PA[4]);
y = r/s; double r4 = fma (x2, PA[7], PA[6]);
return x + x*y; double r5 = fma (x2, PA[9], PA[8]);
double x4 = x2 * x2;
double r = r5;
r = fma (x4, r, r4);
r = fma (x4, r, r3);
r = fma (x4, r, r2);
r = fma (x4, r, r1);
return fma (r, x, x); /* This fma is crucial for accuracy. */
} }
if (ix < 0x40180000) /* 0.84375 <= |x| < 6 */ else
y = 1 - erfc2(ix,x); { /* 0.5 <= a < 0.84375 - Use rational approximation. */
else double x4, x8, r1n, r2n, r1d, r2d, r3d;
y = 1 - 0x1p-1022;
return sign ? -y : y; r1n = fma (x2, NA[1], NA[0]);
x4 = x2 * x2;
r2n = fma (x2, NA[3], NA[2]);
x8 = x4 * x4;
r1d = fma (x2, DA[0], 1.0);
r2d = fma (x2, DA[2], DA[1]);
r3d = fma (x2, DA[4], DA[3]);
double P = r1n + x4 * r2n + x8 * NA[4];
double Q = r1d + x4 * r2d + x8 * r3d;
return fma (P / Q, x, x);
}
}
else if (ia < 0x3ff40000)
{ /* 0.84375 <= |x| < 1.25. */
double a2, a4, a6, r1n, r2n, r3n, r4n, r1d, r2d, r3d, r4d;
double a = fabs (x) - 1.0;
r1n = fma (a, NB[1], NB[0]);
a2 = a * a;
r1d = fma (a, DB[0], 1.0);
a4 = a2 * a2;
r2n = fma (a, NB[3], NB[2]);
a6 = a4 * a2;
r2d = fma (a, DB[2], DB[1]);
r3n = fma (a, NB[5], NB[4]);
r3d = fma (a, DB[4], DB[3]);
r4n = NB[6];
r4d = DB[5];
double P = r1n + a2 * r2n + a4 * r3n + a6 * r4n;
double Q = r1d + a2 * r2d + a4 * r3d + a6 * r4d;
if (sign)
return -C - P / Q;
else
return C + P / Q;
}
else if (ia < 0x40000000)
{ /* 1.25 <= |x| < 2.0. */
double a = fabs (x);
a = a - 1.25;
double r1 = fma (a, PC[1], PC[0]);
double r2 = fma (a, PC[3], PC[2]);
double r3 = fma (a, PC[5], PC[4]);
double r4 = fma (a, PC[7], PC[6]);
double r5 = fma (a, PC[9], PC[8]);
double r6 = fma (a, PC[11], PC[10]);
double r7 = fma (a, PC[13], PC[12]);
double r8 = fma (a, PC[15], PC[14]);
double a2 = a * a;
double r = r8;
r = fma (a2, r, r7);
r = fma (a2, r, r6);
r = fma (a2, r, r5);
r = fma (a2, r, r4);
r = fma (a2, r, r3);
r = fma (a2, r, r2);
r = fma (a2, r, r1);
if (sign)
return -1.0 + r;
else
return 1.0 - r;
}
else if (ia < 0x400a0000)
{ /* 2 <= |x| < 3.25. */
double a = fabs (x);
a = fma (0.5, a, -1.0);
double r1 = fma (a, PD[1], PD[0]);
double r2 = fma (a, PD[3], PD[2]);
double r3 = fma (a, PD[5], PD[4]);
double r4 = fma (a, PD[7], PD[6]);
double r5 = fma (a, PD[9], PD[8]);
double r6 = fma (a, PD[11], PD[10]);
double r7 = fma (a, PD[13], PD[12]);
double r8 = fma (a, PD[15], PD[14]);
double r9 = fma (a, PD[17], PD[16]);
double a2 = a * a;
double r = r9;
r = fma (a2, r, r8);
r = fma (a2, r, r7);
r = fma (a2, r, r6);
r = fma (a2, r, r5);
r = fma (a2, r, r4);
r = fma (a2, r, r3);
r = fma (a2, r, r2);
r = fma (a2, r, r1);
if (sign)
return -1.0 + r;
else
return 1.0 - r;
}
else if (ia < 0x40100000)
{ /* 3.25 <= |x| < 4.0. */
double a = fabs (x);
a = a - 3.25;
double r1 = fma (a, PE[1], PE[0]);
double r2 = fma (a, PE[3], PE[2]);
double r3 = fma (a, PE[5], PE[4]);
double r4 = fma (a, PE[7], PE[6]);
double r5 = fma (a, PE[9], PE[8]);
double r6 = fma (a, PE[11], PE[10]);
double r7 = fma (a, PE[13], PE[12]);
double a2 = a * a;
double r = r7;
r = fma (a2, r, r6);
r = fma (a2, r, r5);
r = fma (a2, r, r4);
r = fma (a2, r, r3);
r = fma (a2, r, r2);
r = fma (a2, r, r1);
if (sign)
return -1.0 + r;
else
return 1.0 - r;
}
else if (ia < 0x4017a000)
{ /* 4 <= |x| < 5.90625. */
double a = fabs (x);
a = fma (0.5, a, -2.0);
double r1 = fma (a, PF[1], PF[0]);
double r2 = fma (a, PF[3], PF[2]);
double r3 = fma (a, PF[5], PF[4]);
double r4 = fma (a, PF[7], PF[6]);
double r5 = fma (a, PF[9], PF[8]);
double r6 = fma (a, PF[11], PF[10]);
double r7 = fma (a, PF[13], PF[12]);
double r8 = fma (a, PF[15], PF[14]);
double r9 = PF[16];
double a2 = a * a;
double r = r9;
r = fma (a2, r, r8);
r = fma (a2, r, r7);
r = fma (a2, r, r6);
r = fma (a2, r, r5);
r = fma (a2, r, r4);
r = fma (a2, r, r3);
r = fma (a2, r, r2);
r = fma (a2, r, r1);
if (sign)
return -1.0 + r;
else
return 1.0 - r;
}
else
{
/* Special cases : erf(nan)=nan, erf(+inf)=+1 and erf(-inf)=-1. */
if (unlikely (ia >= 0x7ff00000))
return (double) (1.0 - (sign << 1)) + 1.0 / x;
if (sign)
return -1.0;
else
return 1.0;
}
} }
/** #if LDBL_MANT_DIG == 53
* Returns complementary error function of 𝑥.
*/
double erfc(double x)
{
double r,s,z,y;
uint32_t ix;
int sign;
GET_HIGH_WORD(ix, x);
sign = ix>>31;
ix &= 0x7fffffff;
if (ix >= 0x7ff00000) {
/* erfc(nan)=nan, erfc(+-inf)=0,2 */
return 2*sign + 1/x;
}
if (ix < 0x3feb0000) { /* |x| < 0.84375 */
if (ix < 0x3c700000) /* |x| < 2**-56 */
return 1.0 - x;
z = x*x;
r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
s = 1.0+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
y = r/s;
if (sign || ix < 0x3fd00000) { /* x < 1/4 */
return 1.0 - (x+x*y);
}
return 0.5 - (x - 0.5 + x*y);
}
if (ix < 0x403c0000) { /* 0.84375 <= |x| < 28 */
return sign ? 2 - erfc2(ix,x) : erfc2(ix,x);
}
return sign ? 2 - 0x1p-1022 : 0x1p-1022*0x1p-1022;
}
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
__weak_reference(erf, erfl); __weak_reference(erf, erfl);
__weak_reference(erfc, erfcl);
#endif #endif

105
libc/tinymath/erf_data.c Normal file
View file

@ -0,0 +1,105 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/arm.internal.h"
__static_yoink("arm_optimized_routines_notice");
/*
Minimax approximation of erf
*/
const struct erf_data __erf_data = {
.erf_poly_A = {
#if ERF_POLY_A_NCOEFFS == 10
0x1.06eba8214db68p-3, -0x1.812746b037948p-2, 0x1.ce2f21a03872p-4,
-0x1.b82ce30e6548p-6, 0x1.565bcc360a2f2p-8, -0x1.c02d812bc979ap-11,
0x1.f99bddfc1ebe9p-14, -0x1.f42c457cee912p-17, 0x1.b0e414ec20ee9p-20,
-0x1.18c47fd143c5ep-23
#endif
},
/* Rational approximation on [0x1p-28, 0.84375] */
.erf_ratio_N_A = {
0x1.06eba8214db68p-3, -0x1.4cd7d691cb913p-2, -0x1.d2a51dbd7194fp-6,
-0x1.7a291236668e4p-8, -0x1.8ead6120016acp-16
},
.erf_ratio_D_A = {
0x1.97779cddadc09p-2, 0x1.0a54c5536cebap-4, 0x1.4d022c4d36b0fp-8,
0x1.15dc9221c1a1p-13, -0x1.09c4342a2612p-18
},
/* Rational approximation on [0.84375, 1.25] */
.erf_ratio_N_B = {
-0x1.359b8bef77538p-9, 0x1.a8d00ad92b34dp-2, -0x1.7d240fbb8c3f1p-2,
0x1.45fca805120e4p-2, -0x1.c63983d3e28ecp-4, 0x1.22a36599795ebp-5,
-0x1.1bf380a96073fp-9
},
.erf_ratio_D_B = {
0x1.b3e6618eee323p-4, 0x1.14af092eb6f33p-1, 0x1.2635cd99fe9a7p-4,
0x1.02660e763351fp-3, 0x1.bedc26b51dd1cp-7, 0x1.88b545735151dp-7
},
.erfc_poly_C = {
#if ERFC_POLY_C_NCOEFFS == 16
/* Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=15 a=1.25 b=2 c=1 d=1.25 */
0x1.3bcd133aa0ffcp-4, -0x1.e4652fadcb702p-3, 0x1.2ebf3dcca0446p-2,
-0x1.571d01c62d66p-3, 0x1.93a9a8f5b3413p-8, 0x1.8281cbcc2cd52p-5,
-0x1.5cffd86b4de16p-6, -0x1.db4ccf595053ep-9, 0x1.757cbf8684edap-8,
-0x1.ce7dfd2a9e56ap-11, -0x1.99ee3bc5a3263p-11, 0x1.3c57cf9213f5fp-12,
0x1.60692996bf254p-14, -0x1.6e44cb7c1fa2ap-14, 0x1.9d4484ac482b2p-16,
-0x1.578c9e375d37p-19
#endif
},
.erfc_poly_D = {
#if ERFC_POLY_D_NCOEFFS == 18
/* Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=17 a=2 b=3.25 c=2 d=2 */
0x1.328f5ec350e5p-8, -0x1.529b9e8cf8e99p-5, 0x1.529b9e8cd9e71p-3,
-0x1.8b0ae3a023bf2p-2, 0x1.1a2c592599d82p-1, -0x1.ace732477e494p-2,
-0x1.e1a06a27920ffp-6, 0x1.bae92a6d27af6p-2, -0x1.a15470fcf5ce7p-2,
0x1.bafe45d18e213p-6, 0x1.0d950680d199ap-2, -0x1.8c9481e8f22e3p-3,
-0x1.158450ed5c899p-4, 0x1.c01f2973b44p-3, -0x1.73ed2827546a7p-3,
0x1.47733687d1ff7p-4, -0x1.2dec70d00b8e1p-6, 0x1.a947ab83cd4fp-10
#endif
},
.erfc_poly_E = {
#if ERFC_POLY_E_NCOEFFS == 14
/* Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=13 a=3.25 b=4 c=1 d=3.25 */
0x1.20c13035539e4p-18, -0x1.e9b5e8d16df7ep-16, 0x1.8de3cd4733bf9p-14,
-0x1.9aa48beb8382fp-13, 0x1.2c7d713370a9fp-12, -0x1.490b12110b9e2p-12,
0x1.1459c5d989d23p-12, -0x1.64b28e9f1269p-13, 0x1.57c76d9d05cf8p-14,
-0x1.bf271d9951cf8p-16, 0x1.db7ea4d4535c9p-19, 0x1.91c2e102d5e49p-20,
-0x1.e9f0826c2149ep-21, 0x1.60eebaea236e1p-23
#endif
},
.erfc_poly_F = {
#if ERFC_POLY_F_NCOEFFS == 17
/* Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=16 a=4 b=5.90625 c=2 d=4 */
0x1.08ddd130d1fa6p-26, -0x1.10b146f59ff06p-22, 0x1.10b135328b7b2p-19,
-0x1.6039988e7575fp-17, 0x1.497d365e19367p-15, -0x1.da48d9afac83ep-14,
0x1.1024c9b1fbb48p-12, -0x1.fc962e7066272p-12, 0x1.87297282d4651p-11,
-0x1.f057b255f8c59p-11, 0x1.0228d0eee063p-10, -0x1.b1b21b84ec41cp-11,
0x1.1ead8ae9e1253p-11, -0x1.1e708fba37fccp-12, 0x1.9559363991edap-14,
-0x1.68c827b783d9cp-16, 0x1.2ec4adeccf4a2p-19
#endif
}
};

279
libc/tinymath/erfc.c Normal file
View file

@ -0,0 +1,279 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Copyright (c) 1992-2024 The FreeBSD Project
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
Developed at SunPro, a Sun Microsystems, Inc. business.
Permission to use, copy, modify, and distribute this
software is freely granted, provided that this notice
is preserved.
*/
#include "libc/tinymath/freebsd.internal.h"
__static_yoink("freebsd_libm_notice");
__static_yoink("fdlibm_notice");
/* double erf(double x)
* double erfc(double x)
* x
* 2 |\
* erf(x) = --------- | exp(-t*t)dt
* sqrt(pi) \|
* 0
*
* erfc(x) = 1-erf(x)
* Note that
* erf(-x) = -erf(x)
* erfc(-x) = 2 - erfc(x)
*
* Method:
* 1. For |x| in [0, 0.84375]
* erf(x) = x + x*R(x^2)
* erfc(x) = 1 - erf(x) if x in [-.84375,0.25]
* = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375]
* where R = P/Q where P is an odd poly of degree 8 and
* Q is an odd poly of degree 10.
* -57.90
* | R - (erf(x)-x)/x | <= 2
*
*
* Remark. The formula is derived by noting
* erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....)
* and that
* 2/sqrt(pi) = 1.128379167095512573896158903121545171688
* is close to one. The interval is chosen because the fix
* point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is
* near 0.6174), and by some experiment, 0.84375 is chosen to
* guarantee the error is less than one ulp for erf.
*
* 2. For |x| in [0.84375,1.25], let s = |x| - 1, and
* c = 0.84506291151 rounded to single (24 bits)
* erf(x) = sign(x) * (c + P1(s)/Q1(s))
* erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0
* 1+(c+P1(s)/Q1(s)) if x < 0
* |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06
* Remark: here we use the taylor series expansion at x=1.
* erf(1+s) = erf(1) + s*Poly(s)
* = 0.845.. + P1(s)/Q1(s)
* That is, we use rational approximation to approximate
* erf(1+s) - (c = (single)0.84506291151)
* Note that |P1/Q1|< 0.078 for x in [0.84375,1.25]
* where
* P1(s) = degree 6 poly in s
* Q1(s) = degree 6 poly in s
*
* 3. For x in [1.25,1/0.35(~2.857143)],
* erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1)
* erf(x) = 1 - erfc(x)
* where
* R1(z) = degree 7 poly in z, (z=1/x^2)
* S1(z) = degree 8 poly in z
*
* 4. For x in [1/0.35,28]
* erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0
* = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6<x<0
* = 2.0 - tiny (if x <= -6)
* erf(x) = sign(x)*(1.0 - erfc(x)) if x < 6, else
* erf(x) = sign(x)*(1.0 - tiny)
* where
* R2(z) = degree 6 poly in z, (z=1/x^2)
* S2(z) = degree 7 poly in z
*
* Note1:
* To compute exp(-x*x-0.5625+R/S), let s be a single
* precision number and s := x; then
* -x*x = -s*s + (s-x)*(s+x)
* exp(-x*x-0.5626+R/S) =
* exp(-s*s-0.5625)*exp((s-x)*(s+x)+R/S);
* Note2:
* Here 4 and 5 make use of the asymptotic series
* exp(-x*x)
* erfc(x) ~ ---------- * ( 1 + Poly(1/x^2) )
* x*sqrt(pi)
* We use rational approximation to approximate
* g(s)=f(1/x^2) = log(erfc(x)*x) - x*x + 0.5625
* Here is the error bound for R1/S1 and R2/S2
* |R1/S1 - f(x)| < 2**(-62.57)
* |R2/S2 - f(x)| < 2**(-61.52)
*
* 5. For inf > x >= 28
* erf(x) = sign(x) *(1 - tiny) (raise inexact)
* erfc(x) = tiny*tiny (raise underflow) if x > 0
* = 2 - tiny if x<0
*
* 7. Special case:
* erf(0) = 0, erf(inf) = 1, erf(-inf) = -1,
* erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2,
* erfc/erf(NaN) is NaN
*/
/* XXX Prevent compilers from erroneously constant folding: */
static const volatile double tiny= 1e-300;
static const double
half= 0.5,
one = 1,
two = 2,
/* c = (float)0.84506291151 */
erx = 8.45062911510467529297e-01, /* 0x3FEB0AC1, 0x60000000 */
/*
* In the domain [0, 2**-28], only the first term in the power series
* expansion of erf(x) is used. The magnitude of the first neglected
* terms is less than 2**-84.
*/
efx = 1.28379167095512586316e-01, /* 0x3FC06EBA, 0x8214DB69 */
efx8= 1.02703333676410069053e+00, /* 0x3FF06EBA, 0x8214DB69 */
/*
* Coefficients for approximation to erf on [0,0.84375]
*/
pp0 = 1.28379167095512558561e-01, /* 0x3FC06EBA, 0x8214DB68 */
pp1 = -3.25042107247001499370e-01, /* 0xBFD4CD7D, 0x691CB913 */
pp2 = -2.84817495755985104766e-02, /* 0xBF9D2A51, 0xDBD7194F */
pp3 = -5.77027029648944159157e-03, /* 0xBF77A291, 0x236668E4 */
pp4 = -2.37630166566501626084e-05, /* 0xBEF8EAD6, 0x120016AC */
qq1 = 3.97917223959155352819e-01, /* 0x3FD97779, 0xCDDADC09 */
qq2 = 6.50222499887672944485e-02, /* 0x3FB0A54C, 0x5536CEBA */
qq3 = 5.08130628187576562776e-03, /* 0x3F74D022, 0xC4D36B0F */
qq4 = 1.32494738004321644526e-04, /* 0x3F215DC9, 0x221C1A10 */
qq5 = -3.96022827877536812320e-06, /* 0xBED09C43, 0x42A26120 */
/*
* Coefficients for approximation to erf in [0.84375,1.25]
*/
pa0 = -2.36211856075265944077e-03, /* 0xBF6359B8, 0xBEF77538 */
pa1 = 4.14856118683748331666e-01, /* 0x3FDA8D00, 0xAD92B34D */
pa2 = -3.72207876035701323847e-01, /* 0xBFD7D240, 0xFBB8C3F1 */
pa3 = 3.18346619901161753674e-01, /* 0x3FD45FCA, 0x805120E4 */
pa4 = -1.10894694282396677476e-01, /* 0xBFBC6398, 0x3D3E28EC */
pa5 = 3.54783043256182359371e-02, /* 0x3FA22A36, 0x599795EB */
pa6 = -2.16637559486879084300e-03, /* 0xBF61BF38, 0x0A96073F */
qa1 = 1.06420880400844228286e-01, /* 0x3FBB3E66, 0x18EEE323 */
qa2 = 5.40397917702171048937e-01, /* 0x3FE14AF0, 0x92EB6F33 */
qa3 = 7.18286544141962662868e-02, /* 0x3FB2635C, 0xD99FE9A7 */
qa4 = 1.26171219808761642112e-01, /* 0x3FC02660, 0xE763351F */
qa5 = 1.36370839120290507362e-02, /* 0x3F8BEDC2, 0x6B51DD1C */
qa6 = 1.19844998467991074170e-02, /* 0x3F888B54, 0x5735151D */
/*
* Coefficients for approximation to erfc in [1.25,1/0.35]
*/
ra0 = -9.86494403484714822705e-03, /* 0xBF843412, 0x600D6435 */
ra1 = -6.93858572707181764372e-01, /* 0xBFE63416, 0xE4BA7360 */
ra2 = -1.05586262253232909814e+01, /* 0xC0251E04, 0x41B0E726 */
ra3 = -6.23753324503260060396e+01, /* 0xC04F300A, 0xE4CBA38D */
ra4 = -1.62396669462573470355e+02, /* 0xC0644CB1, 0x84282266 */
ra5 = -1.84605092906711035994e+02, /* 0xC067135C, 0xEBCCABB2 */
ra6 = -8.12874355063065934246e+01, /* 0xC0545265, 0x57E4D2F2 */
ra7 = -9.81432934416914548592e+00, /* 0xC023A0EF, 0xC69AC25C */
sa1 = 1.96512716674392571292e+01, /* 0x4033A6B9, 0xBD707687 */
sa2 = 1.37657754143519042600e+02, /* 0x4061350C, 0x526AE721 */
sa3 = 4.34565877475229228821e+02, /* 0x407B290D, 0xD58A1A71 */
sa4 = 6.45387271733267880336e+02, /* 0x40842B19, 0x21EC2868 */
sa5 = 4.29008140027567833386e+02, /* 0x407AD021, 0x57700314 */
sa6 = 1.08635005541779435134e+02, /* 0x405B28A3, 0xEE48AE2C */
sa7 = 6.57024977031928170135e+00, /* 0x401A47EF, 0x8E484A93 */
sa8 = -6.04244152148580987438e-02, /* 0xBFAEEFF2, 0xEE749A62 */
/*
* Coefficients for approximation to erfc in [1/.35,28]
*/
rb0 = -9.86494292470009928597e-03, /* 0xBF843412, 0x39E86F4A */
rb1 = -7.99283237680523006574e-01, /* 0xBFE993BA, 0x70C285DE */
rb2 = -1.77579549177547519889e+01, /* 0xC031C209, 0x555F995A */
rb3 = -1.60636384855821916062e+02, /* 0xC064145D, 0x43C5ED98 */
rb4 = -6.37566443368389627722e+02, /* 0xC083EC88, 0x1375F228 */
rb5 = -1.02509513161107724954e+03, /* 0xC0900461, 0x6A2E5992 */
rb6 = -4.83519191608651397019e+02, /* 0xC07E384E, 0x9BDC383F */
sb1 = 3.03380607434824582924e+01, /* 0x403E568B, 0x261D5190 */
sb2 = 3.25792512996573918826e+02, /* 0x40745CAE, 0x221B9F0A */
sb3 = 1.53672958608443695994e+03, /* 0x409802EB, 0x189D5118 */
sb4 = 3.19985821950859553908e+03, /* 0x40A8FFB7, 0x688C246A */
sb5 = 2.55305040643316442583e+03, /* 0x40A3F219, 0xCEDF3BE6 */
sb6 = 4.74528541206955367215e+02, /* 0x407DA874, 0xE79FE763 */
sb7 = -2.24409524465858183362e+01; /* 0xC03670E2, 0x42712D62 */
/**
* Returns complementary error function of x, i.e. 1.0 - erf(x).
*/
double
erfc(double x)
{
int32_t hx,ix;
double R,S,P,Q,s,y,z,r;
GET_HIGH_WORD(hx,x);
ix = hx&0x7fffffff;
if(ix>=0x7ff00000) { /* erfc(nan)=nan */
/* erfc(+-inf)=0,2 */
return (double)(((uint32_t)hx>>31)<<1)+one/x;
}
if(ix < 0x3feb0000) { /* |x|<0.84375 */
if(ix < 0x3c700000) /* |x|<2**-56 */
return one-x;
z = x*x;
r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
y = r/s;
if(hx < 0x3fd00000) { /* x<1/4 */
return one-(x+x*y);
} else {
r = x*y;
r += (x-half);
return half - r ;
}
}
if(ix < 0x3ff40000) { /* 0.84375 <= |x| < 1.25 */
s = fabs(x)-one;
P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
if(hx>=0) {
z = one-erx; return z - P/Q;
} else {
z = erx+P/Q; return one+z;
}
}
if (ix < 0x403c0000) { /* |x|<28 */
x = fabs(x);
s = one/(x*x);
if(ix< 0x4006DB6D) { /* |x| < 1/.35 ~ 2.857143*/
R=ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(ra5+s*(ra6+s*ra7))))));
S=one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(sa5+s*(sa6+s*(sa7+
s*sa8)))))));
} else { /* |x| >= 1/.35 ~ 2.857143 */
if(hx<0&&ix>=0x40180000) return two-tiny;/* x < -6 */
R=rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(rb5+s*rb6)))));
S=one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(sb5+s*(sb6+s*sb7))))));
}
z = x;
SET_LOW_WORD(z,0);
r = exp(-z*z-0.5625)*exp((z-x)*(z+x)+R/S);
if(hx>0) return r/x; else return two-r/x;
} else {
if(hx>0) return tiny*tiny; else return two-tiny;
}
}
#if LDBL_MANT_DIG == 53
__weak_reference(erfc, erfcl);
#endif

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Musl Libc Optimized Routines
Copyright © 2005-2020 Rich Felker, et al. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the
@ -25,189 +25,99 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "libc/math.h" #include "libc/tinymath/arm.internal.h"
__static_yoink("freebsd_libm_notice"); __static_yoink("arm_optimized_routines_notice");
__static_yoink("musl_libc_notice");
__static_yoink("fdlibm_notice");
/* origin: FreeBSD /usr/src/lib/msun/src/s_erff.c */ #define TwoOverSqrtPiMinusOne 0x1.06eba8p-3f
/* #define A __erff_data.erff_poly_A
* ==================================================== #define B __erff_data.erff_poly_B
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#define asuint(f) ((union{float _f; uint32_t _i;}){f})._i /* Top 12 bits of a float. */
#define asfloat(i) ((union{uint32_t _i; float _f;}){i})._f static inline uint32_t
top12 (float x)
static const float
erx = 8.4506291151e-01, /* 0x3f58560b */
/*
* Coefficients for approximation to erf on [0,0.84375]
*/
efx8 = 1.0270333290e+00, /* 0x3f8375d4 */
pp0 = 1.2837916613e-01, /* 0x3e0375d4 */
pp1 = -3.2504209876e-01, /* 0xbea66beb */
pp2 = -2.8481749818e-02, /* 0xbce9528f */
pp3 = -5.7702702470e-03, /* 0xbbbd1489 */
pp4 = -2.3763017452e-05, /* 0xb7c756b1 */
qq1 = 3.9791721106e-01, /* 0x3ecbbbce */
qq2 = 6.5022252500e-02, /* 0x3d852a63 */
qq3 = 5.0813062117e-03, /* 0x3ba68116 */
qq4 = 1.3249473704e-04, /* 0x390aee49 */
qq5 = -3.9602282413e-06, /* 0xb684e21a */
/*
* Coefficients for approximation to erf in [0.84375,1.25]
*/
pa0 = -2.3621185683e-03, /* 0xbb1acdc6 */
pa1 = 4.1485610604e-01, /* 0x3ed46805 */
pa2 = -3.7220788002e-01, /* 0xbebe9208 */
pa3 = 3.1834661961e-01, /* 0x3ea2fe54 */
pa4 = -1.1089469492e-01, /* 0xbde31cc2 */
pa5 = 3.5478305072e-02, /* 0x3d1151b3 */
pa6 = -2.1663755178e-03, /* 0xbb0df9c0 */
qa1 = 1.0642088205e-01, /* 0x3dd9f331 */
qa2 = 5.4039794207e-01, /* 0x3f0a5785 */
qa3 = 7.1828655899e-02, /* 0x3d931ae7 */
qa4 = 1.2617121637e-01, /* 0x3e013307 */
qa5 = 1.3637083583e-02, /* 0x3c5f6e13 */
qa6 = 1.1984500103e-02, /* 0x3c445aa3 */
/*
* Coefficients for approximation to erfc in [1.25,1/0.35]
*/
ra0 = -9.8649440333e-03, /* 0xbc21a093 */
ra1 = -6.9385856390e-01, /* 0xbf31a0b7 */
ra2 = -1.0558626175e+01, /* 0xc128f022 */
ra3 = -6.2375331879e+01, /* 0xc2798057 */
ra4 = -1.6239666748e+02, /* 0xc322658c */
ra5 = -1.8460508728e+02, /* 0xc3389ae7 */
ra6 = -8.1287437439e+01, /* 0xc2a2932b */
ra7 = -9.8143291473e+00, /* 0xc11d077e */
sa1 = 1.9651271820e+01, /* 0x419d35ce */
sa2 = 1.3765776062e+02, /* 0x4309a863 */
sa3 = 4.3456588745e+02, /* 0x43d9486f */
sa4 = 6.4538726807e+02, /* 0x442158c9 */
sa5 = 4.2900814819e+02, /* 0x43d6810b */
sa6 = 1.0863500214e+02, /* 0x42d9451f */
sa7 = 6.5702495575e+00, /* 0x40d23f7c */
sa8 = -6.0424413532e-02, /* 0xbd777f97 */
/*
* Coefficients for approximation to erfc in [1/.35,28]
*/
rb0 = -9.8649431020e-03, /* 0xbc21a092 */
rb1 = -7.9928326607e-01, /* 0xbf4c9dd4 */
rb2 = -1.7757955551e+01, /* 0xc18e104b */
rb3 = -1.6063638306e+02, /* 0xc320a2ea */
rb4 = -6.3756646729e+02, /* 0xc41f6441 */
rb5 = -1.0250950928e+03, /* 0xc480230b */
rb6 = -4.8351919556e+02, /* 0xc3f1c275 */
sb1 = 3.0338060379e+01, /* 0x41f2b459 */
sb2 = 3.2579251099e+02, /* 0x43a2e571 */
sb3 = 1.5367296143e+03, /* 0x44c01759 */
sb4 = 3.1998581543e+03, /* 0x4547fdbb */
sb5 = 2.5530502930e+03, /* 0x451f90ce */
sb6 = 4.7452853394e+02, /* 0x43ed43a7 */
sb7 = -2.2440952301e+01; /* 0xc1b38712 */
static float erfc1(float x)
{ {
float_t s,P,Q; return asuint (x) >> 20;
s = fabsf(x) - 1;
P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
Q = 1+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
return 1 - erx - P/Q;
} }
static float erfc2(uint32_t ix, float x) /* Efficient implementation of erff
using either a pure polynomial approximation or
the exponential of a polynomial.
Worst-case error is 1.09ulps at 0x1.c111acp-1. */
float
erff (float x)
{ {
float_t s,R,S; float r, x2, u;
float z;
if (ix < 0x3fa00000) /* |x| < 1.25 */ /* Get top word. */
return erfc1(x); uint32_t ix = asuint (x);
uint32_t sign = ix >> 31;
uint32_t ia12 = top12 (x) & 0x7ff;
x = fabsf(x); /* Limit of both intervals is 0.875 for performance reasons but coefficients
s = 1/(x*x); computed on [0.0, 0.921875] and [0.921875, 4.0], which brought accuracy
if (ix < 0x4036db6d) { /* |x| < 1/0.35 */ from 0.94 to 1.1ulps. */
R = ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*( if (ia12 < 0x3f6)
ra5+s*(ra6+s*ra7)))))); { /* a = |x| < 0.875. */
S = 1.0f+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(
sa5+s*(sa6+s*(sa7+s*sa8))))))); /* Tiny and subnormal cases. */
} else { /* |x| >= 1/0.35 */ if (unlikely (ia12 < 0x318))
R = rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*( { /* |x| < 2^(-28). */
rb5+s*rb6))))); if (unlikely (ia12 < 0x040))
S = 1.0f+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*( { /* |x| < 2^(-119). */
sb5+s*(sb6+s*sb7)))))); float y = fmaf (TwoOverSqrtPiMinusOne, x, x);
return check_uflowf (y);
}
return x + TwoOverSqrtPiMinusOne * x;
} }
ix = asuint(x);
z = asfloat(ix&0xffffe000); x2 = x * x;
return expf(-z*z - 0.5625f) * expf((z-x)*(z+x) + R/S)/x;
} /* Normalized cases (|x| < 0.921875). Use Horner scheme for x+x*P(x^2). */
r = A[5];
float erff(float x) r = fmaf (r, x2, A[4]);
{ r = fmaf (r, x2, A[3]);
float r,s,z,y; r = fmaf (r, x2, A[2]);
uint32_t ix; r = fmaf (r, x2, A[1]);
int sign; r = fmaf (r, x2, A[0]);
r = fmaf (r, x, x);
ix = asuint(x); }
sign = ix>>31; else if (ia12 < 0x408)
ix &= 0x7fffffff; { /* |x| < 4.0 - Use a custom Estrin scheme. */
if (ix >= 0x7f800000) {
/* erf(nan)=nan, erf(+-inf)=+-1 */ float a = fabsf (x);
return 1-2*sign + 1/x; /* Start with Estrin scheme on high order (small magnitude) coefficients. */
} r = fmaf (B[6], a, B[5]);
if (ix < 0x3f580000) { /* |x| < 0.84375 */ u = fmaf (B[4], a, B[3]);
if (ix < 0x31800000) { /* |x| < 2**-28 */ x2 = x * x;
/*avoid underflow */ r = fmaf (r, x2, u);
return 0.125f*(8*x + efx8*x); /* Then switch to pure Horner scheme. */
} r = fmaf (r, a, B[2]);
z = x*x; r = fmaf (r, a, B[1]);
r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4))); r = fmaf (r, a, B[0]);
s = 1+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5)))); r = fmaf (r, a, a);
y = r/s; /* Single precision exponential with ~0.5ulps,
return x + x*y; ensures erff has max. rel. error
} < 1ulp on [0.921875, 4.0],
if (ix < 0x40c00000) /* |x| < 6 */ < 1.1ulps on [0.875, 4.0]. */
y = 1 - erfc2(ix,x); r = expf (-r);
else /* Explicit copysign (calling copysignf increases latency). */
y = 1 - 0x1p-120f; if (sign)
return sign ? -y : y; r = -1.0f + r;
} else
r = 1.0f - r;
float erfcf(float x) }
{ else
float r,s,z,y; { /* |x| >= 4.0. */
uint32_t ix;
int sign; /* Special cases : erff(nan)=nan, erff(+inf)=+1 and erff(-inf)=-1. */
if (unlikely (ia12 >= 0x7f8))
ix = asuint(x); return (1.f - (float) ((ix >> 31) << 1)) + 1.f / x;
sign = ix>>31;
ix &= 0x7fffffff; /* Explicit copysign (calling copysignf increases latency). */
if (ix >= 0x7f800000) { if (sign)
/* erfc(nan)=nan, erfc(+-inf)=0,2 */ r = -1.0f;
return 2*sign + 1/x; else
} r = 1.0f;
}
if (ix < 0x3f580000) { /* |x| < 0.84375 */ return r;
if (ix < 0x23800000) /* |x| < 2**-56 */
return 1.0f - x;
z = x*x;
r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
s = 1.0f+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
y = r/s;
if (sign || ix < 0x3e800000) /* x < 1/4 */
return 1.0f - (x+x*y);
return 0.5f - (x - 0.5f + x*y);
}
if (ix < 0x41e00000) { /* |x| < 28 */
return sign ? 2 - erfc2(ix,x) : erfc2(ix,x);
}
return sign ? 2 - 0x1p-120f : 0x1p-120f*0x1p-120f;
} }

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8 :vi vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines Optimized Routines
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the
@ -25,12 +25,19 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "libc/tinymath/log1pf_data.internal.h" #include "libc/tinymath/arm.internal.h"
__static_yoink("arm_optimized_routines_notice"); __static_yoink("arm_optimized_routines_notice");
/* Polynomial coefficients generated using floating-point minimax /* Minimax approximation of erff. */
algorithm, see tools/log1pf.sollya for details. */ const struct erff_data __erff_data = {
const struct log1pf_data __log1pf_data .erff_poly_A = {
= {.coeffs = {-0x1p-1f, 0x1.5555aap-2f, -0x1.000038p-2f, 0x1.99675cp-3f, 0x1.06eba6p-03f, -0x1.8126e0p-02f, 0x1.ce1a46p-04f,
-0x1.54ef78p-3f, 0x1.28a1f4p-3f, -0x1.0da91p-3f, 0x1.abcb6p-4f, -0x1.b68bd2p-06f, 0x1.473f48p-08f, -0x1.3a1a82p-11f
-0x1.6f0d5ep-5f}}; },
.erff_poly_B = {
0x1.079d0cp-3f, 0x1.450aa0p-1f, 0x1.b55cb0p-4f,
-0x1.8d6300p-6f, 0x1.fd1336p-9f, -0x1.91d2ccp-12f,
0x1.222900p-16f
}
};

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines Optimized Routines
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the
@ -25,19 +25,9 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "libc/intrin/likely.h" #include "libc/tinymath/arm.internal.h"
#include "libc/math.h"
#include "libc/tinymath/exp_data.internal.h"
#include "libc/tinymath/internal.h"
__static_yoink("arm_optimized_routines_notice"); __static_yoink("arm_optimized_routines_notice");
/*
* Double-precision e^x function.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#define N (1 << EXP_TABLE_BITS) #define N (1 << EXP_TABLE_BITS)
#define InvLn2N __exp_data.invln2N #define InvLn2N __exp_data.invln2N
#define NegLn2hiN __exp_data.negln2hiN #define NegLn2hiN __exp_data.negln2hiN
@ -48,6 +38,7 @@ __static_yoink("arm_optimized_routines_notice");
#define C3 __exp_data.poly[6 - EXP_POLY_ORDER] #define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
#define C4 __exp_data.poly[7 - EXP_POLY_ORDER] #define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
#define C5 __exp_data.poly[8 - EXP_POLY_ORDER] #define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
#define C6 __exp_data.poly[9 - EXP_POLY_ORDER]
/* Handle cases that may overflow or underflow when computing the result that /* Handle cases that may overflow or underflow when computing the result that
is scale*(1+TMP) without intermediate rounding. The bit representation of is scale*(1+TMP) without intermediate rounding. The bit representation of
@ -56,114 +47,154 @@ __static_yoink("arm_optimized_routines_notice");
a double. (int32_t)KI is the k used in the argument reduction and exponent a double. (int32_t)KI is the k used in the argument reduction and exponent
adjustment of scale, positive k here means the result may overflow and adjustment of scale, positive k here means the result may overflow and
negative k means the result may underflow. */ negative k means the result may underflow. */
static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki) static inline double
specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
{ {
double_t scale, y; double_t scale, y;
if ((ki & 0x80000000) == 0) { if ((ki & 0x80000000) == 0)
/* k > 0, the exponent of scale might have overflowed by <= 460. */ {
sbits -= 1009ull << 52; /* k > 0, the exponent of scale might have overflowed by <= 460. */
scale = asdouble(sbits); sbits -= 1009ull << 52;
y = 0x1p1009 * (scale + scale * tmp); scale = asdouble (sbits);
return eval_as_double(y); y = 0x1p1009 * (scale + scale * tmp);
} return check_oflow (eval_as_double (y));
/* k < 0, need special care in the subnormal range. */ }
sbits += 1022ull << 52; /* k < 0, need special care in the subnormal range. */
scale = asdouble(sbits); sbits += 1022ull << 52;
y = scale + scale * tmp; scale = asdouble (sbits);
if (y < 1.0) { y = scale + scale * tmp;
/* Round y to the right precision before scaling it into the subnormal if (y < 1.0)
range to avoid double rounding that can cause 0.5+E/2 ulp error where {
E is the worst-case ulp error outside the subnormal range. So this /* Round y to the right precision before scaling it into the subnormal
is only useful if the goal is better than 1 ulp worst-case error. */ range to avoid double rounding that can cause 0.5+E/2 ulp error where
double_t hi, lo; E is the worst-case ulp error outside the subnormal range. So this
lo = scale - y + scale * tmp; is only useful if the goal is better than 1 ulp worst-case error. */
hi = 1.0 + y; double_t hi, lo;
lo = 1.0 - hi + y + lo; lo = scale - y + scale * tmp;
y = eval_as_double(hi + lo) - 1.0; hi = 1.0 + y;
/* Avoid -0.0 with downward rounding. */ lo = 1.0 - hi + y + lo;
if (WANT_ROUNDING && y == 0.0) y = eval_as_double (hi + lo) - 1.0;
y = 0.0; /* Avoid -0.0 with downward rounding. */
/* The underflow exception needs to be signaled explicitly. */ if (WANT_ROUNDING && y == 0.0)
fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022); y = 0.0;
} /* The underflow exception needs to be signaled explicitly. */
y = 0x1p-1022 * y; force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
return eval_as_double(y); }
y = 0x1p-1022 * y;
return check_uflow (eval_as_double (y));
} }
/* Top 12 bits of a double (sign and exponent bits). */ /* Top 12 bits of a double (sign and exponent bits). */
static inline uint32_t top12(double x) static inline uint32_t
top12 (double x)
{ {
return asuint64(x) >> 52; return asuint64 (x) >> 52;
}
/* Computes exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
If hastail is 0 then xtail is assumed to be 0 too. */
static inline double
exp_inline (double x, double xtail, int hastail)
{
uint32_t abstop;
uint64_t ki, idx, top, sbits;
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t kd, z, r, r2, scale, tail, tmp;
abstop = top12 (x) & 0x7ff;
if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
{
if (abstop - top12 (0x1p-54) >= 0x80000000)
/* Avoid spurious underflow for tiny x. */
/* Note: 0 is common input. */
return WANT_ROUNDING ? 1.0 + x : 1.0;
if (abstop >= top12 (1024.0))
{
if (asuint64 (x) == asuint64 (-INFINITY))
return 0.0;
if (abstop >= top12 (INFINITY))
return 1.0 + x;
if (asuint64 (x) >> 63)
return __math_uflow (0);
else
return __math_oflow (0);
}
/* Large x is special cased below. */
abstop = 0;
}
/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
/* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
z = InvLn2N * x;
#if TOINT_INTRINSICS
kd = roundtoint (z);
ki = converttoint (z);
#elif EXP_USE_TOINT_NARROW
/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
kd = eval_as_double (z + Shift);
ki = asuint64 (kd) >> 16;
kd = (double_t) (int32_t) ki;
#else
/* z - kd is in [-1, 1] in non-nearest rounding modes. */
kd = eval_as_double (z + Shift);
ki = asuint64 (kd);
kd -= Shift;
#endif
r = x + kd * NegLn2hiN + kd * NegLn2loN;
/* The code assumes 2^-200 < |xtail| < 2^-8/N. */
if (hastail)
r += xtail;
/* 2^(k/N) ~= scale * (1 + tail). */
idx = 2 * (ki % N);
top = ki << (52 - EXP_TABLE_BITS);
tail = asdouble (T[idx]);
/* This is only a valid scale when -1023*N < k < 1024*N. */
sbits = T[idx + 1] + top;
/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
/* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r * r;
/* Without fma the worst case error is 0.25/N ulp larger. */
/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
#if EXP_POLY_ORDER == 4
tmp = tail + r + r2 * C2 + r * r2 * (C3 + r * C4);
#elif EXP_POLY_ORDER == 5
tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
#elif EXP_POLY_ORDER == 6
tmp = tail + r + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
#endif
if (unlikely (abstop == 0))
return specialcase (tmp, sbits, ki);
scale = asdouble (sbits);
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
is no spurious underflow here even without fma. */
return eval_as_double (scale + scale * tmp);
} }
/** /**
* Returns 𝑒^x. * Returns 𝑒^x.
*
* @raise ERANGE on overflow or underflow
*/ */
double exp(double x) double
exp (double x)
{ {
uint32_t abstop; return exp_inline (x, 0, 0);
uint64_t ki, idx, top, sbits;
double_t kd, z, r, r2, scale, tail, tmp;
abstop = top12(x) & 0x7ff;
if (UNLIKELY(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) {
if (abstop - top12(0x1p-54) >= 0x80000000)
/* Avoid spurious underflow for tiny x. */
/* Note: 0 is common input. */
return WANT_ROUNDING ? 1.0 + x : 1.0;
if (abstop >= top12(1024.0)) {
if (asuint64(x) == asuint64(-INFINITY))
return 0.0;
if (abstop >= top12(INFINITY))
return 1.0 + x;
if (asuint64(x) >> 63)
return __math_uflow(0);
else
return __math_oflow(0);
}
/* Large x is special cased below. */
abstop = 0;
}
/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
/* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
z = InvLn2N * x;
#if TOINT_INTRINSICS
kd = roundtoint(z);
ki = converttoint(z);
#elif EXP_USE_TOINT_NARROW
/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
kd = eval_as_double(z + Shift);
ki = asuint64(kd) >> 16;
kd = (double_t)(int32_t)ki;
#else
/* z - kd is in [-1, 1] in non-nearest rounding modes. */
kd = eval_as_double(z + Shift);
ki = asuint64(kd);
kd -= Shift;
#endif
r = x + kd * NegLn2hiN + kd * NegLn2loN;
/* 2^(k/N) ~= scale * (1 + tail). */
idx = 2 * (ki % N);
top = ki << (52 - EXP_TABLE_BITS);
tail = asdouble(T[idx]);
/* This is only a valid scale when -1023*N < k < 1024*N. */
sbits = T[idx + 1] + top;
/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
/* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r * r;
/* Without fma the worst case error is 0.25/N ulp larger. */
/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
if (UNLIKELY(abstop == 0))
return specialcase(tmp, sbits, ki);
scale = asdouble(sbits);
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
is no spurious underflow here even without fma. */
return eval_as_double(scale + scale * tmp);
} }
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 /* May be useful for implementing pow where more than double
__weak_reference(exp, expl); precision input is needed. */
double
__exp_dd (double x, double xtail)
{
return exp_inline (x, xtail, 1);
}
#if USE_GLIBC_ABI
strong_alias (exp, __exp_finite)
hidden_alias (exp, __ieee754_exp)
hidden_alias (__exp_dd, __exp1)
# if LDBL_MANT_DIG == 53
long double expl (long double x) { return exp (x); }
# endif
#endif #endif

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Musl Libc Optimized Routines
Copyright © 2005-2014 Rich Felker, et al. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the
@ -25,33 +25,135 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "libc/math.h" #include "libc/tinymath/arm.internal.h"
__static_yoink("musl_libc_notice"); __static_yoink("arm_optimized_routines_notice");
#define N (1 << EXP_TABLE_BITS)
#define IndexMask (N - 1)
#define OFlowBound 0x1.34413509f79ffp8 /* log10(DBL_MAX). */
#define UFlowBound -0x1.5ep+8 /* -350. */
#define SmallTop 0x3c6 /* top12(0x1p-57). */
#define BigTop 0x407 /* top12(0x1p8). */
#define Thresh 0x41 /* BigTop - SmallTop. */
#define Shift __exp_data.shift
#define C(i) __exp_data.exp10_poly[i]
static double
special_case (uint64_t sbits, double_t tmp, uint64_t ki)
{
double_t scale, y;
if (ki - (1ull << 16) < 0x80000000)
{
/* The exponent of scale might have overflowed by 1. */
sbits -= 1ull << 52;
scale = asdouble (sbits);
y = 2 * (scale + scale * tmp);
return check_oflow (eval_as_double (y));
}
/* n < 0, need special care in the subnormal range. */
sbits += 1022ull << 52;
scale = asdouble (sbits);
y = scale + scale * tmp;
if (y < 1.0)
{
/* Round y to the right precision before scaling it into the subnormal
range to avoid double rounding that can cause 0.5+E/2 ulp error where
E is the worst-case ulp error outside the subnormal range. So this
is only useful if the goal is better than 1 ulp worst-case error. */
double_t lo = scale - y + scale * tmp;
double_t hi = 1.0 + y;
lo = 1.0 - hi + y + lo;
y = eval_as_double (hi + lo) - 1.0;
/* Avoid -0.0 with downward rounding. */
if (WANT_ROUNDING && y == 0.0)
y = 0.0;
/* The underflow exception needs to be signaled explicitly. */
force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
}
y = 0x1p-1022 * y;
return check_uflow (y);
}
/** /**
* Returns 10ˣ. * Returns 10ˣ.
*
* The largest observed error is ~0.513 ULP.
*/ */
double exp10(double x) double
exp10 (double x)
{ {
static const double p10[] = { uint64_t ix = asuint64 (x);
1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, uint32_t abstop = (ix >> 52) & 0x7ff;
1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1,
1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, if (unlikely (abstop - SmallTop >= Thresh))
1e10, 1e11, 1e12, 1e13, 1e14, 1e15 {
}; if (abstop - SmallTop >= 0x80000000)
double n, y = modf(x, &n); /* Avoid spurious underflow for tiny x.
union {double f; uint64_t i;} u = {n}; Note: 0 is common input. */
/* fabs(n) < 16 without raising invalid on nan */ return x + 1;
if ((u.i>>52 & 0x7ff) < 0x3ff+4) { if (abstop == 0x7ff)
if (!y) return p10[(int)n+15]; return ix == asuint64 (-INFINITY) ? 0.0 : x + 1.0;
y = exp2(3.32192809488736234787031942948939 * y); if (x >= OFlowBound)
return y * p10[(int)n+15]; return __math_oflow (0);
} if (x < UFlowBound)
return pow(10.0, x); return __math_uflow (0);
/* Large x is special-cased below. */
abstop = 0;
}
/* Reduce x: z = x * N / log10(2), k = round(z). */
double_t z = __exp_data.invlog10_2N * x;
double_t kd;
int64_t ki;
#if TOINT_INTRINSICS
kd = roundtoint (z);
ki = converttoint (z);
#else
kd = eval_as_double (z + Shift);
kd -= Shift;
ki = kd;
#endif
/* r = x - k * log10(2), r in [-0.5, 0.5]. */
double_t r = x;
r = __exp_data.neglog10_2hiN * kd + r;
r = __exp_data.neglog10_2loN * kd + r;
/* exp10(x) = 2^(k/N) * 2^(r/N).
Approximate the two components separately. */
/* s = 2^(k/N), using lookup table. */
uint64_t e = ki << (52 - EXP_TABLE_BITS);
uint64_t i = (ki & IndexMask) * 2;
uint64_t u = __exp_data.tab[i + 1];
uint64_t sbits = u + e;
double_t tail = asdouble (__exp_data.tab[i]);
/* 2^(r/N) ~= 1 + r * Poly(r). */
double_t r2 = r * r;
double_t p = C (0) + r * C (1);
double_t y = C (2) + r * C (3);
y = y + r2 * C (4);
y = p + r2 * y;
y = tail + y * r;
if (unlikely (abstop == 0))
return special_case (sbits, y, ki);
/* Assemble components:
y = 2^(r/N) * 2^(k/N)
~= (y + 1) * s. */
double_t s = asdouble (sbits);
return eval_as_double (s * y + s);
} }
__strong_reference(exp10, pow10); __strong_reference(exp10, pow10);
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
__weak_reference(exp10, exp10l); __weak_reference(exp10, exp10l);
__weak_reference(exp10, pow10l);
#endif #endif

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Musl Libc Optimized Routines
Copyright © 2005-2014 Rich Felker, et al. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the
@ -25,19 +25,9 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "libc/intrin/likely.h" #include "libc/tinymath/arm.internal.h"
#include "libc/math.h"
#include "libc/tinymath/exp_data.internal.h"
#include "libc/tinymath/internal.h"
__static_yoink("arm_optimized_routines_notice"); __static_yoink("arm_optimized_routines_notice");
/*
* Double-precision 2^x function.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#define N (1 << EXP_TABLE_BITS) #define N (1 << EXP_TABLE_BITS)
#define Shift __exp_data.exp2_shift #define Shift __exp_data.exp2_shift
#define T __exp_data.tab #define T __exp_data.tab
@ -46,6 +36,7 @@ __static_yoink("arm_optimized_routines_notice");
#define C3 __exp_data.exp2_poly[2] #define C3 __exp_data.exp2_poly[2]
#define C4 __exp_data.exp2_poly[3] #define C4 __exp_data.exp2_poly[3]
#define C5 __exp_data.exp2_poly[4] #define C5 __exp_data.exp2_poly[4]
#define C6 __exp_data.exp2_poly[5]
/* Handle cases that may overflow or underflow when computing the result that /* Handle cases that may overflow or underflow when computing the result that
is scale*(1+TMP) without intermediate rounding. The bit representation of is scale*(1+TMP) without intermediate rounding. The bit representation of
@ -54,103 +45,121 @@ __static_yoink("arm_optimized_routines_notice");
a double. (int32_t)KI is the k used in the argument reduction and exponent a double. (int32_t)KI is the k used in the argument reduction and exponent
adjustment of scale, positive k here means the result may overflow and adjustment of scale, positive k here means the result may overflow and
negative k means the result may underflow. */ negative k means the result may underflow. */
static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki) static inline double
specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
{ {
double_t scale, y; double_t scale, y;
if ((ki & 0x80000000) == 0) { if ((ki & 0x80000000) == 0)
/* k > 0, the exponent of scale might have overflowed by 1. */ {
sbits -= 1ull << 52; /* k > 0, the exponent of scale might have overflowed by 1. */
scale = asdouble(sbits); sbits -= 1ull << 52;
y = 2 * (scale + scale * tmp); scale = asdouble (sbits);
return eval_as_double(y); y = 2 * (scale + scale * tmp);
} return check_oflow (eval_as_double (y));
/* k < 0, need special care in the subnormal range. */ }
sbits += 1022ull << 52; /* k < 0, need special care in the subnormal range. */
scale = asdouble(sbits); sbits += 1022ull << 52;
y = scale + scale * tmp; scale = asdouble (sbits);
if (y < 1.0) { y = scale + scale * tmp;
/* Round y to the right precision before scaling it into the subnormal if (y < 1.0)
range to avoid double rounding that can cause 0.5+E/2 ulp error where {
E is the worst-case ulp error outside the subnormal range. So this /* Round y to the right precision before scaling it into the subnormal
is only useful if the goal is better than 1 ulp worst-case error. */ range to avoid double rounding that can cause 0.5+E/2 ulp error where
double_t hi, lo; E is the worst-case ulp error outside the subnormal range. So this
lo = scale - y + scale * tmp; is only useful if the goal is better than 1 ulp worst-case error. */
hi = 1.0 + y; double_t hi, lo;
lo = 1.0 - hi + y + lo; lo = scale - y + scale * tmp;
y = eval_as_double(hi + lo) - 1.0; hi = 1.0 + y;
/* Avoid -0.0 with downward rounding. */ lo = 1.0 - hi + y + lo;
if (WANT_ROUNDING && y == 0.0) y = eval_as_double (hi + lo) - 1.0;
y = 0.0; /* Avoid -0.0 with downward rounding. */
/* The underflow exception needs to be signaled explicitly. */ if (WANT_ROUNDING && y == 0.0)
fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022); y = 0.0;
} /* The underflow exception needs to be signaled explicitly. */
y = 0x1p-1022 * y; force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
return eval_as_double(y); }
y = 0x1p-1022 * y;
return check_uflow (eval_as_double (y));
} }
/* Top 12 bits of a double (sign and exponent bits). */ /* Top 12 bits of a double (sign and exponent bits). */
static inline uint32_t top12(double x) static inline uint32_t
top12 (double x)
{ {
return asuint64(x) >> 52; return asuint64 (x) >> 52;
} }
/** /**
* Returns 2^𝑥. * Returns 2^𝑥.
*/ */
double exp2(double x) double
exp2 (double x)
{ {
uint32_t abstop; uint32_t abstop;
uint64_t ki, idx, top, sbits; uint64_t ki, idx, top, sbits;
double_t kd, r, r2, scale, tail, tmp; /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t kd, r, r2, scale, tail, tmp;
abstop = top12(x) & 0x7ff; abstop = top12 (x) & 0x7ff;
if (UNLIKELY(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) { if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
if (abstop - top12(0x1p-54) >= 0x80000000) {
/* Avoid spurious underflow for tiny x. */ if (abstop - top12 (0x1p-54) >= 0x80000000)
/* Note: 0 is common input. */ /* Avoid spurious underflow for tiny x. */
return WANT_ROUNDING ? 1.0 + x : 1.0; /* Note: 0 is common input. */
if (abstop >= top12(1024.0)) { return WANT_ROUNDING ? 1.0 + x : 1.0;
if (asuint64(x) == asuint64(-INFINITY)) if (abstop >= top12 (1024.0))
return 0.0; {
if (abstop >= top12(INFINITY)) if (asuint64 (x) == asuint64 (-INFINITY))
return 1.0 + x; return 0.0;
if (!(asuint64(x) >> 63)) if (abstop >= top12 (INFINITY))
return __math_oflow(0); return 1.0 + x;
else if (asuint64(x) >= asuint64(-1075.0)) if (!(asuint64 (x) >> 63))
return __math_uflow(0); return __math_oflow (0);
} else if (asuint64 (x) >= asuint64 (-1075.0))
if (2 * asuint64(x) > 2 * asuint64(928.0)) return __math_uflow (0);
/* Large x is special cased below. */
abstop = 0;
} }
if (2 * asuint64 (x) > 2 * asuint64 (928.0))
/* Large x is special cased below. */
abstop = 0;
}
/* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)]. */ /* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)]. */
/* x = k/N + r, with int k and r in [-1/2N, 1/2N]. */ /* x = k/N + r, with int k and r in [-1/2N, 1/2N]. */
kd = eval_as_double(x + Shift); kd = eval_as_double (x + Shift);
ki = asuint64(kd); /* k. */ ki = asuint64 (kd); /* k. */
kd -= Shift; /* k/N for int k. */ kd -= Shift; /* k/N for int k. */
r = x - kd; r = x - kd;
/* 2^(k/N) ~= scale * (1 + tail). */ /* 2^(k/N) ~= scale * (1 + tail). */
idx = 2 * (ki % N); idx = 2 * (ki % N);
top = ki << (52 - EXP_TABLE_BITS); top = ki << (52 - EXP_TABLE_BITS);
tail = asdouble(T[idx]); tail = asdouble (T[idx]);
/* This is only a valid scale when -1023*N < k < 1024*N. */ /* This is only a valid scale when -1023*N < k < 1024*N. */
sbits = T[idx + 1] + top; sbits = T[idx + 1] + top;
/* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1). */ /* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1). */
/* Evaluation is optimized assuming superscalar pipelined execution. */ /* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r * r; r2 = r * r;
/* Without fma the worst case error is 0.5/N ulp larger. */ /* Without fma the worst case error is 0.5/N ulp larger. */
/* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp. */ /* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp. */
tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5); #if EXP2_POLY_ORDER == 4
if (UNLIKELY(abstop == 0)) tmp = tail + r * C1 + r2 * C2 + r * r2 * (C3 + r * C4);
return specialcase(tmp, sbits, ki); #elif EXP2_POLY_ORDER == 5
scale = asdouble(sbits); tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
/* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there #elif EXP2_POLY_ORDER == 6
is no spurious underflow here even without fma. */ tmp = tail + r * C1 + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
return eval_as_double(scale + scale * tmp); #endif
if (unlikely (abstop == 0))
return specialcase (tmp, sbits, ki);
scale = asdouble (sbits);
/* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there
is no spurious underflow here even without fma. */
return eval_as_double (scale + scale * tmp);
} }
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 #if USE_GLIBC_ABI
__weak_reference(exp2, exp2l); strong_alias (exp2, __exp2_finite)
hidden_alias (exp2, __ieee754_exp2)
# if LDBL_MANT_DIG == 53
long double exp2l (long double x) { return exp2 (x); }
# endif
#endif #endif

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines Optimized Routines
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the
@ -25,19 +25,9 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "libc/intrin/likely.h" #include "libc/tinymath/arm.internal.h"
#include "libc/math.h"
#include "libc/tinymath/exp2f_data.internal.h"
#include "libc/tinymath/internal.h"
__static_yoink("arm_optimized_routines_notice"); __static_yoink("arm_optimized_routines_notice");
/*
* Single-precision 2^x function.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
/* /*
EXP2F_TABLE_BITS = 5 EXP2F_TABLE_BITS = 5
EXP2F_POLY_ORDER = 3 EXP2F_POLY_ORDER = 3
@ -53,48 +43,66 @@ Non-nearest ULP error: 1 (rounded ULP error)
#define C __exp2f_data.poly #define C __exp2f_data.poly
#define SHIFT __exp2f_data.shift_scaled #define SHIFT __exp2f_data.shift_scaled
static inline uint32_t top12(float x) static inline uint32_t
top12 (float x)
{ {
return asuint(x) >> 20; return asuint (x) >> 20;
} }
/** /**
* Returns 2^𝑥. * Returns 2^𝑥.
*
* - ULP error: 0.502 (nearest rounding.)
* - Relative error: 1.69 * 2^-34 in [-1/64, 1/64] (before rounding.)
* - Wrong count: 168353 (all nearest rounding wrong results with fma.)
* - Non-nearest ULP error: 1 (rounded ULP error)
*/ */
float exp2f(float x) float
exp2f (float x)
{ {
uint32_t abstop; uint32_t abstop;
uint64_t ki, t; uint64_t ki, t;
double_t kd, xd, z, r, r2, y, s; /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t kd, xd, z, r, r2, y, s;
xd = (double_t)x; xd = (double_t) x;
abstop = top12(x) & 0x7ff; abstop = top12 (x) & 0x7ff;
if (UNLIKELY(abstop >= top12(128.0f))) { if (unlikely (abstop >= top12 (128.0f)))
/* |x| >= 128 or x is nan. */ {
if (asuint(x) == asuint(-INFINITY)) /* |x| >= 128 or x is nan. */
return 0.0f; if (asuint (x) == asuint (-INFINITY))
if (abstop >= top12(INFINITY)) return 0.0f;
return x + x; if (abstop >= top12 (INFINITY))
if (x > 0.0f) return x + x;
return __math_oflowf(0); if (x > 0.0f)
if (x <= -150.0f) return __math_oflowf (0);
return __math_uflowf(0); if (x <= -150.0f)
} return __math_uflowf (0);
#if WANT_ERRNO_UFLOW
if (x < -149.0f)
return __math_may_uflowf (0);
#endif
}
/* x = k/N + r with r in [-1/(2N), 1/(2N)] and int k. */ /* x = k/N + r with r in [-1/(2N), 1/(2N)] and int k. */
kd = eval_as_double(xd + SHIFT); kd = eval_as_double (xd + SHIFT);
ki = asuint64(kd); ki = asuint64 (kd);
kd -= SHIFT; /* k/N for int k. */ kd -= SHIFT; /* k/N for int k. */
r = xd - kd; r = xd - kd;
/* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */ /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
t = T[ki % N]; t = T[ki % N];
t += ki << (52 - EXP2F_TABLE_BITS); t += ki << (52 - EXP2F_TABLE_BITS);
s = asdouble(t); s = asdouble (t);
z = C[0] * r + C[1]; z = C[0] * r + C[1];
r2 = r * r; r2 = r * r;
y = C[2] * r + 1; y = C[2] * r + 1;
y = z * r2 + y; y = z * r2 + y;
y = y * s; y = y * s;
return eval_as_float(y); return eval_as_float (y);
} }
#if USE_GLIBC_ABI
strong_alias (exp2f, __exp2f_finite)
hidden_alias (exp2f, __ieee754_exp2f)
#endif

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines Optimized Routines
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the
@ -25,16 +25,9 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "libc/tinymath/exp2f_data.internal.h" #include "libc/tinymath/arm.internal.h"
__static_yoink("arm_optimized_routines_notice"); __static_yoink("arm_optimized_routines_notice");
/*
* Shared data between expf, exp2f and powf.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#define N (1 << EXP2F_TABLE_BITS) #define N (1 << EXP2F_TABLE_BITS)
const struct exp2f_data __exp2f_data = { const struct exp2f_data __exp2f_data = {
@ -42,6 +35,15 @@ const struct exp2f_data __exp2f_data = {
used for computing 2^(k/N) for an int |k| < 150 N as used for computing 2^(k/N) for an int |k| < 150 N as
double(tab[k%N] + (k << 52-BITS)) */ double(tab[k%N] + (k << 52-BITS)) */
.tab = { .tab = {
#if N == 8
0x3ff0000000000000, 0x3fef72b83c7d517b, 0x3fef06fe0a31b715, 0x3feebfdad5362a27,
0x3feea09e667f3bcd, 0x3feeace5422aa0db, 0x3feee89f995ad3ad, 0x3fef5818dcfba487,
#elif N == 16
0x3ff0000000000000, 0x3fefb5586cf9890f, 0x3fef72b83c7d517b, 0x3fef387a6e756238,
0x3fef06fe0a31b715, 0x3feedea64c123422, 0x3feebfdad5362a27, 0x3feeab07dd485429,
0x3feea09e667f3bcd, 0x3feea11473eb0187, 0x3feeace5422aa0db, 0x3feec49182a3f090,
0x3feee89f995ad3ad, 0x3fef199bdd85529c, 0x3fef5818dcfba487, 0x3fefa4afa2a490da,
#elif N == 32
0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51, 0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51,
0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1, 0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1,
0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d, 0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
@ -50,14 +52,48 @@ const struct exp2f_data __exp2f_data = {
0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d, 0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069, 0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069,
0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540, 0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540,
#elif N == 64
0x3ff0000000000000, 0x3fefec9a3e778061, 0x3fefd9b0d3158574, 0x3fefc74518759bc8,
0x3fefb5586cf9890f, 0x3fefa3ec32d3d1a2, 0x3fef9301d0125b51, 0x3fef829aaea92de0,
0x3fef72b83c7d517b, 0x3fef635beb6fcb75, 0x3fef54873168b9aa, 0x3fef463b88628cd6,
0x3fef387a6e756238, 0x3fef2b4565e27cdd, 0x3fef1e9df51fdee1, 0x3fef1285a6e4030b,
0x3fef06fe0a31b715, 0x3feefc08b26416ff, 0x3feef1a7373aa9cb, 0x3feee7db34e59ff7,
0x3feedea64c123422, 0x3feed60a21f72e2a, 0x3feece086061892d, 0x3feec6a2b5c13cd0,
0x3feebfdad5362a27, 0x3feeb9b2769d2ca7, 0x3feeb42b569d4f82, 0x3feeaf4736b527da,
0x3feeab07dd485429, 0x3feea76f15ad2148, 0x3feea47eb03a5585, 0x3feea23882552225,
0x3feea09e667f3bcd, 0x3fee9fb23c651a2f, 0x3fee9f75e8ec5f74, 0x3fee9feb564267c9,
0x3feea11473eb0187, 0x3feea2f336cf4e62, 0x3feea589994cce13, 0x3feea8d99b4492ed,
0x3feeace5422aa0db, 0x3feeb1ae99157736, 0x3feeb737b0cdc5e5, 0x3feebd829fde4e50,
0x3feec49182a3f090, 0x3feecc667b5de565, 0x3feed503b23e255d, 0x3feede6b5579fdbf,
0x3feee89f995ad3ad, 0x3feef3a2b84f15fb, 0x3feeff76f2fb5e47, 0x3fef0c1e904bc1d2,
0x3fef199bdd85529c, 0x3fef27f12e57d14b, 0x3fef3720dcef9069, 0x3fef472d4a07897c,
0x3fef5818dcfba487, 0x3fef69e603db3285, 0x3fef7c97337b9b5f, 0x3fef902ee78b3ff6,
0x3fefa4afa2a490da, 0x3fefba1bee615a27, 0x3fefd0765b6e4540, 0x3fefe7c1819e90d8,
#endif
}, },
.shift_scaled = 0x1.8p+52 / N, .shift_scaled = 0x1.8p+52 / N,
.poly = { .poly = {
#if N == 8
0x1.c6a00335106e2p-5, 0x1.ec0c313449f55p-3, 0x1.62e431111f69fp-1,
#elif N == 16
0x1.c6ac6aa313963p-5, 0x1.ebfff4532d9bap-3, 0x1.62e43001bc49fp-1,
#elif N == 32
0x1.c6af84b912394p-5, 0x1.ebfce50fac4f3p-3, 0x1.62e42ff0c52d6p-1, 0x1.c6af84b912394p-5, 0x1.ebfce50fac4f3p-3, 0x1.62e42ff0c52d6p-1,
#elif N == 64
0x1.c6b04b4221b2ap-5, 0x1.ebfc213e184d7p-3, 0x1.62e42fefb5b7fp-1,
#endif
}, },
.shift = 0x1.8p+52, .shift = 0x1.8p+52,
.invln2_scaled = 0x1.71547652b82fep+0 * N, .invln2_scaled = 0x1.71547652b82fep+0 * N,
.poly_scaled = { .poly_scaled = {
#if N == 8
0x1.c6a00335106e2p-5/N/N/N, 0x1.ec0c313449f55p-3/N/N, 0x1.62e431111f69fp-1/N,
#elif N == 16
0x1.c6ac6aa313963p-5/N/N/N, 0x1.ebfff4532d9bap-3/N/N, 0x1.62e43001bc49fp-1/N,
#elif N == 32
0x1.c6af84b912394p-5/N/N/N, 0x1.ebfce50fac4f3p-3/N/N, 0x1.62e42ff0c52d6p-1/N, 0x1.c6af84b912394p-5/N/N/N, 0x1.ebfce50fac4f3p-3/N/N, 0x1.62e42ff0c52d6p-1/N,
#elif N == 64
0x1.c6b04b4221b2ap-5/N/N/N, 0x1.ebfc213e184d7p-3/N/N, 0x1.62e42fefb5b7fp-1/N,
#endif
}, },
}; };

View file

@ -1,19 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_EXP2F_DATA_INTERNAL_H_
#define COSMOPOLITAN_LIBC_TINYMATH_EXP2F_DATA_INTERNAL_H_
#define EXP2F_TABLE_BITS 5
#define EXP2F_POLY_ORDER 3
COSMOPOLITAN_C_START_
extern const struct exp2f_data {
uint64_t tab[1 << EXP2F_TABLE_BITS];
double shift_scaled;
double poly[EXP2F_POLY_ORDER];
double shift;
double invln2_scaled;
double poly_scaled[EXP2F_POLY_ORDER];
} __exp2f_data;
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_TINYMATH_EXP2F_DATA_INTERNAL_H_ */

File diff suppressed because it is too large Load diff

View file

@ -1,23 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_EXP_DATA_H_
#define COSMOPOLITAN_LIBC_TINYMATH_EXP_DATA_H_
#define EXP_TABLE_BITS 7
#define EXP_POLY_ORDER 5
#define EXP_USE_TOINT_NARROW 0
#define EXP2_POLY_ORDER 5
COSMOPOLITAN_C_START_
extern const struct exp_data {
double invln2N;
double shift;
double negln2hiN;
double negln2loN;
double poly[4]; /* Last four coefficients. */
double exp2_shift;
double exp2_poly[EXP2_POLY_ORDER];
uint64_t tab[2 * (1 << EXP_TABLE_BITS)];
} __exp_data;
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_TINYMATH_EXP_DATA_H_ */

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines Optimized Routines
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the
@ -25,19 +25,9 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "libc/intrin/likely.h" #include "libc/tinymath/arm.internal.h"
#include "libc/math.h"
#include "libc/tinymath/exp2f_data.internal.h"
#include "libc/tinymath/internal.h"
__static_yoink("arm_optimized_routines_notice"); __static_yoink("arm_optimized_routines_notice");
/*
* Single-precision e^x function.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
/* /*
EXP2F_TABLE_BITS = 5 EXP2F_TABLE_BITS = 5
EXP2F_POLY_ORDER = 3 EXP2F_POLY_ORDER = 3
@ -53,59 +43,79 @@ Non-nearest ULP error: 1 (rounded ULP error)
#define T __exp2f_data.tab #define T __exp2f_data.tab
#define C __exp2f_data.poly_scaled #define C __exp2f_data.poly_scaled
static inline uint32_t top12(float x) static inline uint32_t
top12 (float x)
{ {
return asuint(x) >> 20; return asuint (x) >> 20;
} }
/** /**
* Returns 𝑒^x. * Returns 𝑒^x.
*
* - ULP error: 0.502 (nearest rounding.)
* - Relative error: 1.69 * 2^-34 in [-ln2/64, ln2/64] (before rounding.)
* - Wrong count: 170635 (all nearest rounding wrong results with fma.)
* - Non-nearest ULP error: 1 (rounded ULP error)
*
* @raise ERANGE on overflow or underflow
*/ */
float expf(float x) float
expf (float x)
{ {
uint32_t abstop; uint32_t abstop;
uint64_t ki, t; uint64_t ki, t;
double_t kd, xd, z, r, r2, y, s; /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t kd, xd, z, r, r2, y, s;
xd = (double_t)x; xd = (double_t) x;
abstop = top12(x) & 0x7ff; abstop = top12 (x) & 0x7ff;
if (UNLIKELY(abstop >= top12(88.0f))) { if (unlikely (abstop >= top12 (88.0f)))
/* |x| >= 88 or x is nan. */ {
if (asuint(x) == asuint(-INFINITY)) /* |x| >= 88 or x is nan. */
return 0.0f; if (asuint (x) == asuint (-INFINITY))
if (abstop >= top12(INFINITY)) return 0.0f;
return x + x; if (abstop >= top12 (INFINITY))
if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */ return x + x;
return __math_oflowf(0); if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */
if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */ return __math_oflowf (0);
return __math_uflowf(0); if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */
} return __math_uflowf (0);
#if WANT_ERRNO_UFLOW
if (x < -0x1.9d1d9ep6f) /* x < log(0x1p-149) ~= -103.28 */
return __math_may_uflowf (0);
#endif
}
/* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k. */ /* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k. */
z = InvLn2N * xd; z = InvLn2N * xd;
/* Round and convert z to int, the result is in [-150*N, 128*N] and /* Round and convert z to int, the result is in [-150*N, 128*N] and
ideally ties-to-even rule is used, otherwise the magnitude of r ideally nearest int is used, otherwise the magnitude of r can be
can be bigger which gives larger approximation error. */ bigger which gives larger approximation error. */
#if TOINT_INTRINSICS #if TOINT_INTRINSICS
kd = roundtoint(z); kd = roundtoint (z);
ki = converttoint(z); ki = converttoint (z);
#else #else
# define SHIFT __exp2f_data.shift # define SHIFT __exp2f_data.shift
kd = eval_as_double(z + SHIFT); kd = eval_as_double (z + SHIFT);
ki = asuint64(kd); ki = asuint64 (kd);
kd -= SHIFT; kd -= SHIFT;
#endif #endif
r = z - kd; r = z - kd;
/* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */ /* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
t = T[ki % N]; t = T[ki % N];
t += ki << (52 - EXP2F_TABLE_BITS); t += ki << (52 - EXP2F_TABLE_BITS);
s = asdouble(t); s = asdouble (t);
z = C[0] * r + C[1]; z = C[0] * r + C[1];
r2 = r * r; r2 = r * r;
y = C[2] * r + 1; y = C[2] * r + 1;
y = z * r2 + y; y = z * r2 + y;
y = y * s; y = y * s;
return eval_as_float(y); return eval_as_float (y);
} }
#if USE_GLIBC_ABI
strong_alias (expf, __expf_finite)
hidden_alias (expf, __ieee754_expf)
#endif

View file

@ -29,7 +29,6 @@
*/ */
#include "libc/math.h" #include "libc/math.h"
#include "libc/tinymath/freebsd.internal.h" #include "libc/tinymath/freebsd.internal.h"
__static_yoink("freebsd_libm_notice"); __static_yoink("freebsd_libm_notice");
#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)

View file

@ -28,7 +28,6 @@
#include "libc/math.h" #include "libc/math.h"
__static_yoink("musl_libc_notice"); __static_yoink("musl_libc_notice");
#if FLT_EVAL_METHOD > 1U && LDBL_MANT_DIG == 64 #if FLT_EVAL_METHOD > 1U && LDBL_MANT_DIG == 64
#define SPLIT (0x1p32 + 1) #define SPLIT (0x1p32 + 1)
#else #else

View file

@ -28,7 +28,11 @@
#include "libc/math.h" #include "libc/math.h"
__static_yoink("musl_libc_notice"); __static_yoink("musl_libc_notice");
/**
* Returns euclidean distance.
*
* Max observed error is 1 ulp.
*/
float hypotf(float x, float y) float hypotf(float x, float y)
{ {
union {float f; uint32_t i;} ux = {x}, uy = {y}, ut; union {float f; uint32_t i;} ux = {x}, uy = {y}, ut;

175
libc/tinymath/hypotf2.c Normal file
View file

@ -0,0 +1,175 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
FreeBSD lib/msun/src/e_hypotf.c
Copyright (c) 1992-2023 The FreeBSD Project.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
Developed at SunPro, a Sun Microsystems, Inc. business.
Permission to use, copy, modify, and distribute this
software is freely granted, provided that this notice
is preserved.
*/
#include "libc/tinymath/freebsd.internal.h"
__static_yoink("freebsd_libm_notice");
__static_yoink("fdlibm_notice");
static const float one = 1.0, tiny=1.0e-30;
float
sqrtf2(float x)
{
float z;
int32_t sign = (int)0x80000000;
int32_t ix,s,q,m,t,i;
uint32_t r;
GET_FLOAT_WORD(ix,x);
/* take care of Inf and NaN */
if((ix&0x7f800000)==0x7f800000) {
return x*x+x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf
sqrt(-inf)=sNaN */
}
/* take care of zero */
if(ix<=0) {
if((ix&(~sign))==0) return x;/* sqrt(+-0) = +-0 */
else if(ix<0)
return (x-x)/(x-x); /* sqrt(-ve) = sNaN */
}
/* normalize x */
m = (ix>>23);
if(m==0) { /* subnormal x */
for(i=0;(ix&0x00800000)==0;i++) ix<<=1;
m -= i-1;
}
m -= 127; /* unbias exponent */
ix = (ix&0x007fffff)|0x00800000;
if(m&1) /* odd m, double x to make it even */
ix += ix;
m >>= 1; /* m = [m/2] */
/* generate sqrt(x) bit by bit */
ix += ix;
q = s = 0; /* q = sqrt(x) */
r = 0x01000000; /* r = moving bit from right to left */
while(r!=0) {
t = s+r;
if(t<=ix) {
s = t+r;
ix -= t;
q += r;
}
ix += ix;
r>>=1;
}
/* use floating add to find out rounding direction */
if(ix!=0) {
z = one-tiny; /* trigger inexact flag */
if (z>=one) {
z = one+tiny;
if (z>one)
q += 2;
else
q += (q&1);
}
}
ix = (q>>1)+0x3f000000;
ix += ((uint32_t)m <<23);
SET_FLOAT_WORD(z,ix);
return z;
}
/**
* Returns euclidean distance.
*
* Error is less than 1 ULP.
*/
float
hypotf2(float x, float y)
{
float a,b,t1,t2,y1,y2,w;
int32_t j,k,ha,hb;
GET_FLOAT_WORD(ha,x);
ha &= 0x7fffffff;
GET_FLOAT_WORD(hb,y);
hb &= 0x7fffffff;
if(hb > ha) {a=y;b=x;j=ha; ha=hb;hb=j;} else {a=x;b=y;}
a = fabsf(a);
b = fabsf(b);
if((ha-hb)>0xf000000) {return a+b;} /* x/y > 2**30 */
k=0;
if(ha > 0x58800000) { /* a>2**50 */
if(ha >= 0x7f800000) { /* Inf or NaN */
/* Use original arg order iff result is NaN; quieten sNaNs. */
w = fabsl(x+0.0L)-fabsf(y+0);
if(ha == 0x7f800000) w = a;
if(hb == 0x7f800000) w = b;
return w;
}
/* scale a and b by 2**-68 */
ha -= 0x22000000; hb -= 0x22000000; k += 68;
SET_FLOAT_WORD(a,ha);
SET_FLOAT_WORD(b,hb);
}
if(hb < 0x26800000) { /* b < 2**-50 */
if(hb <= 0x007fffff) { /* subnormal b or 0 */
if(hb==0) return a;
SET_FLOAT_WORD(t1,0x7e800000); /* t1=2^126 */
b *= t1;
a *= t1;
k -= 126;
} else { /* scale a and b by 2^68 */
ha += 0x22000000; /* a *= 2^68 */
hb += 0x22000000; /* b *= 2^68 */
k -= 68;
SET_FLOAT_WORD(a,ha);
SET_FLOAT_WORD(b,hb);
}
}
/* medium size a and b */
w = a-b;
if (w>b) {
SET_FLOAT_WORD(t1,ha&0xfffff000);
t2 = a-t1;
w = sqrtf2(t1*t1-(b*(-b)-t2*(a+t1)));
} else {
a = a+a;
SET_FLOAT_WORD(y1,hb&0xfffff000);
y2 = b - y1;
SET_FLOAT_WORD(t1,(ha+0x00800000)&0xfffff000);
t2 = a - t1;
w = sqrtf2(t1*y1-(w*(-w)-(t1*y2+t2*b)));
}
if(k!=0) {
SET_FLOAT_WORD(t1,(127+k)<<23);
return t1*w;
} else return w;
}

View file

@ -30,7 +30,6 @@
#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
__static_yoink("musl_libc_notice"); __static_yoink("musl_libc_notice");
#if LDBL_MANT_DIG == 64 #if LDBL_MANT_DIG == 64
#define SPLIT (0x1p32L+1) #define SPLIT (0x1p32L+1)
#elif LDBL_MANT_DIG == 113 #elif LDBL_MANT_DIG == 113

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines Optimized Routines
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the
@ -25,19 +25,9 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "libc/intrin/likely.h" #include "libc/tinymath/arm.internal.h"
#include "libc/math.h"
#include "libc/tinymath/internal.h"
#include "libc/tinymath/log_data.internal.h"
__static_yoink("arm_optimized_routines_notice"); __static_yoink("arm_optimized_routines_notice");
/*
* Double-precision log(x) function.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#define T __log_data.tab #define T __log_data.tab
#define T2 __log_data.tab2 #define T2 __log_data.tab2
#define B __log_data.poly1 #define B __log_data.poly1
@ -47,95 +37,151 @@ __static_yoink("arm_optimized_routines_notice");
#define N (1 << LOG_TABLE_BITS) #define N (1 << LOG_TABLE_BITS)
#define OFF 0x3fe6000000000000 #define OFF 0x3fe6000000000000
/** /* Top 16 bits of a double. */
* Returns natural logarithm of 𝑥. static inline uint32_t
*/ top16 (double x)
double log(double x)
{ {
double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo; return asuint64 (x) >> 48;
uint64_t ix, iz, tmp;
uint32_t top;
int k, i;
ix = asuint64(x);
top = ix >> 48;
#define LO asuint64(1.0 - 0x1p-4)
#define HI asuint64(1.0 + 0x1.09p-4)
if (UNLIKELY(ix - LO < HI - LO)) {
/* Handle close to 1.0 inputs separately. */
/* Fix sign of zero with downward rounding when x==1. */
if (WANT_ROUNDING && UNLIKELY(ix == asuint64(1.0)))
return 0;
r = x - 1.0;
r2 = r * r;
r3 = r * r2;
y = r3 *
(B[1] + r * B[2] + r2 * B[3] +
r3 * (B[4] + r * B[5] + r2 * B[6] +
r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
/* Worst-case error is around 0.507 ULP. */
w = r * 0x1p27;
double_t rhi = r + w - w;
double_t rlo = r - rhi;
w = rhi * rhi * B[0]; /* B[0] == -0.5. */
hi = r + w;
lo = r - hi + w;
lo += B[0] * rlo * (rhi + r);
y += lo;
y += hi;
return eval_as_double(y);
}
if (UNLIKELY(top - 0x0010 >= 0x7ff0 - 0x0010)) {
/* x < 0x1p-1022 or inf or nan. */
if (ix * 2 == 0)
return __math_divzero(1);
if (ix == asuint64(INFINITY)) /* log(inf) == inf. */
return x;
if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
return __math_invalid(x);
/* x is subnormal, normalize it. */
ix = asuint64(x * 0x1p52);
ix -= 52ULL << 52;
}
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (52 - LOG_TABLE_BITS)) % N;
k = (int64_t)tmp >> 52; /* arithmetic shift */
iz = ix - (tmp & 0xfffULL << 52);
invc = T[i].invc;
logc = T[i].logc;
z = asdouble(iz);
/* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
/* r ~= z/c - 1, |r| < 1/(2*N). */
#if __FP_FAST_FMA
/* rounding error: 0x1p-55/N. */
r = __builtin_fma(z, invc, -1.0);
#else
/* rounding error: 0x1p-55/N + 0x1p-66. */
r = (z - T2[i].chi - T2[i].clo) * invc;
#endif
kd = (double_t)k;
/* hi + lo = r + log(c) + k*Ln2. */
w = kd * Ln2hi + logc;
hi = w + r;
lo = w - hi + r + kd * Ln2lo;
/* log(x) = lo + (log1p(r) - r) + hi. */
r2 = r * r; /* rounding error: 0x1p-54/N^2. */
/* Worst case error if |y| > 0x1p-5:
0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
Worst case error if |y| > 0x1p-4:
0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma). */
y = lo + r2 * A[0] +
r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
return eval_as_double(y);
} }
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 /**
__weak_reference(log, logl); * Returns natural logarithm of 𝑥.
*
* @raise EDOM and FE_INVALID if x is negative
* @raise ERANGE and FE_DIVBYZERO if x is zero
*/
double
log (double x)
{
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
uint64_t ix, iz, tmp;
uint32_t top;
int k, i;
ix = asuint64 (x);
top = top16 (x);
#if LOG_POLY1_ORDER == 10 || LOG_POLY1_ORDER == 11
# define LO asuint64 (1.0 - 0x1p-5)
# define HI asuint64 (1.0 + 0x1.1p-5)
#elif LOG_POLY1_ORDER == 12
# define LO asuint64 (1.0 - 0x1p-4)
# define HI asuint64 (1.0 + 0x1.09p-4)
#endif
if (unlikely (ix - LO < HI - LO))
{
/* Handle close to 1.0 inputs separately. */
/* Fix sign of zero with downward rounding when x==1. */
if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
return 0;
r = x - 1.0;
r2 = r * r;
r3 = r * r2;
#if LOG_POLY1_ORDER == 10
/* Worst-case error is around 0.516 ULP. */
y = r3 * (B[1] + r * B[2] + r2 * B[3]
+ r3 * (B[4] + r * B[5] + r2 * B[6] + r3 * (B[7] + r * B[8])));
w = B[0] * r2; /* B[0] == -0.5. */
hi = r + w;
y += r - hi + w;
y += hi;
#elif LOG_POLY1_ORDER == 11
/* Worst-case error is around 0.516 ULP. */
y = r3 * (B[1] + r * B[2]
+ r2 * (B[3] + r * B[4] + r2 * B[5]
+ r3 * (B[6] + r * B[7] + r2 * B[8] + r3 * B[9])));
w = B[0] * r2; /* B[0] == -0.5. */
hi = r + w;
y += r - hi + w;
y += hi;
#elif LOG_POLY1_ORDER == 12
y = r3 * (B[1] + r * B[2] + r2 * B[3]
+ r3 * (B[4] + r * B[5] + r2 * B[6]
+ r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
# if N <= 64
/* Worst-case error is around 0.532 ULP. */
w = B[0] * r2; /* B[0] == -0.5. */
hi = r + w;
y += r - hi + w;
y += hi;
# else
/* Worst-case error is around 0.507 ULP. */
w = r * 0x1p27;
double_t rhi = r + w - w;
double_t rlo = r - rhi;
w = rhi * rhi * B[0]; /* B[0] == -0.5. */
hi = r + w;
lo = r - hi + w;
lo += B[0] * rlo * (rhi + r);
y += lo;
y += hi;
# endif
#endif
return eval_as_double (y);
}
if (unlikely (top - 0x0010 >= 0x7ff0 - 0x0010))
{
/* x < 0x1p-1022 or inf or nan. */
if (ix * 2 == 0)
return __math_divzero (1);
if (ix == asuint64 (INFINITY)) /* log(inf) == inf. */
return x;
if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
return __math_invalid (x);
/* x is subnormal, normalize it. */
ix = asuint64 (x * 0x1p52);
ix -= 52ULL << 52;
}
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (52 - LOG_TABLE_BITS)) % N;
k = (int64_t) tmp >> 52; /* arithmetic shift */
iz = ix - (tmp & 0xfffULL << 52);
invc = T[i].invc;
logc = T[i].logc;
z = asdouble (iz);
/* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
/* r ~= z/c - 1, |r| < 1/(2*N). */
#if HAVE_FAST_FMA
/* rounding error: 0x1p-55/N. */
r = fma (z, invc, -1.0);
#else
/* rounding error: 0x1p-55/N + 0x1p-66. */
r = (z - T2[i].chi - T2[i].clo) * invc;
#endif
kd = (double_t) k;
/* hi + lo = r + log(c) + k*Ln2. */
w = kd * Ln2hi + logc;
hi = w + r;
lo = w - hi + r + kd * Ln2lo;
/* log(x) = lo + (log1p(r) - r) + hi. */
r2 = r * r; /* rounding error: 0x1p-54/N^2. */
/* Worst case error if |y| > 0x1p-5:
0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
Worst case error if |y| > 0x1p-4:
0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma). */
#if LOG_POLY_ORDER == 6
y = lo + r2 * A[0] + r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
#elif LOG_POLY_ORDER == 7
y = lo
+ r2 * (A[0] + r * A[1] + r2 * (A[2] + r * A[3])
+ r2 * r2 * (A[4] + r * A[5]))
+ hi;
#endif
return eval_as_double (y);
}
#if USE_GLIBC_ABI
strong_alias (log, __log_finite)
hidden_alias (log, __ieee754_log)
# if LDBL_MANT_DIG == 53
long double logl (long double x) { return log (x); }
# endif
#endif #endif

View file

@ -29,11 +29,9 @@
#include "libc/math.h" #include "libc/math.h"
#include "libc/tinymath/complex.internal.h" #include "libc/tinymath/complex.internal.h"
#include "libc/tinymath/internal.h" #include "libc/tinymath/internal.h"
#include "libc/tinymath/log2_data.internal.h"
__static_yoink("musl_libc_notice"); __static_yoink("musl_libc_notice");
__static_yoink("fdlibm_notice"); __static_yoink("fdlibm_notice");
/* origin: FreeBSD /usr/src/lib/msun/src/e_log10.c */ /* origin: FreeBSD /usr/src/lib/msun/src/e_log10.c */
/* /*
* ==================================================== * ====================================================

View file

@ -2,74 +2,84 @@
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Musl Libc Copyright (c) 1992-2024 The FreeBSD Project
Copyright © 2005-2014 Rich Felker, et al. Copyright (c) 1993 Sun Microsystems, Inc.
All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining Redistribution and use in source and binary forms, with or without
a copy of this software and associated documentation files (the modification, are permitted provided that the following conditions
"Software"), to deal in the Software without restriction, including are met:
without limitation the rights to use, copy, modify, merge, publish, 1. Redistributions of source code must retain the above copyright
distribute, sublicense, and/or sell copies of the Software, and to notice, this list of conditions and the following disclaimer.
permit persons to whom the Software is furnished to do so, subject to 2. Redistributions in binary form must reproduce the above copyright
the following conditions: notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
The above copyright notice and this permission notice shall be THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
included in all copies or substantial portions of the Software. ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
*/ */
#include "libc/intrin/likely.h" #include "libc/tinymath/freebsd.internal.h"
#include "libc/math.h"
#include "libc/tinymath/internal.h"
#include "libc/tinymath/log_data.internal.h"
__static_yoink("freebsd_libm_notice"); __static_yoink("freebsd_libm_notice");
__static_yoink("fdlibm_notice"); __static_yoink("fdlibm_notice");
/* origin: FreeBSD /usr/src/lib/msun/src/s_log1p.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* double log1p(double x) /* double log1p(double x)
* Return the natural logarithm of 1+x.
* *
* Method : * Method :
* 1. Argument Reduction: find k and f such that * 1. Argument Reduction: find k and f such that
* 1+x = 2^k * (1+f), * 1+x = 2^k * (1+f),
* where sqrt(2)/2 < 1+f < sqrt(2) . * where sqrt(2)/2 < 1+f < sqrt(2) .
* *
* Note. If k=0, then f=x is exact. However, if k!=0, then f * Note. If k=0, then f=x is exact. However, if k!=0, then f
* may not be representable exactly. In that case, a correction * may not be representable exactly. In that case, a correction
* term is need. Let u=1+x rounded. Let c = (1+x)-u, then * term is need. Let u=1+x rounded. Let c = (1+x)-u, then
* log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u), * log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u),
* and add back the correction term c/u. * and add back the correction term c/u.
* (Note: when x > 2**53, one can simply return log(x)) * (Note: when x > 2**53, one can simply return log(x))
* *
* 2. Approximation of log(1+f): See log.c * 2. Approximation of log1p(f).
* Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
* = 2s + 2/3 s**3 + 2/5 s**5 + .....,
* = 2s + s*R
* We use a special Reme algorithm on [0,0.1716] to generate
* a polynomial of degree 14 to approximate R The maximum error
* of this polynomial approximation is bounded by 2**-58.45. In
* other words,
* 2 4 6 8 10 12 14
* R(z) ~ Lp1*s +Lp2*s +Lp3*s +Lp4*s +Lp5*s +Lp6*s +Lp7*s
* (the values of Lp1 to Lp7 are listed in the program)
* and
* | 2 14 | -58.45
* | Lp1*s +...+Lp7*s - R(z) | <= 2
* | |
* Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
* In order to guarantee error in log below 1ulp, we compute log
* by
* log1p(f) = f - (hfsq - s*(hfsq+R)).
* *
* 3. Finally, log1p(x) = k*ln2 + log(1+f) + c/u. See log.c * 3. Finally, log1p(x) = k*ln2 + log1p(f).
* = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
* Here ln2 is split into two floating point number:
* ln2_hi + ln2_lo,
* where n*ln2_hi is always exact for |n| < 2000.
* *
* Special cases: * Special cases:
* log1p(x) is NaN with signal if x < -1 (including -INF) ; * log1p(x) is NaN with signal if x < -1 (including -INF) ;
* log1p(+INF) is +INF; log1p(-1) is -INF with signal; * log1p(+INF) is +INF; log1p(-1) is -INF with signal;
* log1p(NaN) is that NaN with no signal. * log1p(NaN) is that NaN with no signal.
* *
* Accuracy: * Accuracy:
* according to an error analysis, the error is always less than * according to an error analysis, the error is always less than
* 1 ulp (unit in the last place). * 1 ulp (unit in the last place).
* *
* Constants: * Constants:
* The hexadecimal values are the intended ones for the following * The hexadecimal values are the intended ones for the following
@ -78,84 +88,110 @@ __static_yoink("fdlibm_notice");
* to produce the hexadecimal values shown. * to produce the hexadecimal values shown.
* *
* Note: Assuming log() return accurate answer, the following * Note: Assuming log() return accurate answer, the following
* algorithm can be used to compute log1p(x) to within a few ULP: * algorithm can be used to compute log1p(x) to within a few ULP:
* *
* u = 1+x; * u = 1+x;
* if(u==1.0) return x ; else * if(u==1.0) return x ; else
* return log(u)*(x/(u-1.0)); * return log(u)*(x/(u-1.0));
* *
* See HP-15C Advanced Functions Handbook, p.193. * See HP-15C Advanced Functions Handbook, p.193.
*/ */
static const double static const double
ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */ ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */ ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */ two54 = 1.80143985094819840000e+16, /* 43500000 00000000 */
Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */ Lp1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */
Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */ Lp2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */
Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */ Lp3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */
Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */ Lp4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */
Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */ Lp5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */
Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ Lp6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */
Lp7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
static const double zero = 0.0;
static volatile double vzero = 0.0;
/** /**
* Returns log(𝟷+𝑥). * Returns log(1 + x).
*/ */
double log1p(double x) double
log1p(double x)
{ {
union {double f; uint64_t i;} u = {x}; double hfsq,f,c,s,z,R,u;
double_t hfsq,f,c,s,z,R,w,t1,t2,dk; int32_t k,hx,hu,ax;
uint32_t hx,hu;
int k; GET_HIGH_WORD(hx,x);
ax = hx&0x7fffffff;
hx = u.i>>32;
k = 1; k = 1;
if (hx < 0x3fda827a || hx>>31) { /* 1+x < sqrt(2)+ */ if (hx < 0x3FDA827A) { /* 1+x < sqrt(2)+ */
if (hx >= 0xbff00000) { /* x <= -1.0 */ if(ax>=0x3ff00000) { /* x <= -1.0 */
if (x == -1) if(x==-1.0) return -two54/vzero; /* log1p(-1)=+inf */
return x/0.0; /* log1p(-1) = -inf */ else return (x-x)/(x-x); /* log1p(x<-1)=NaN */
return (x-x)/0.0; /* log1p(x<-1) = NaN */ }
} if(ax<0x3e200000) { /* |x| < 2**-29 */
if (hx<<1 < 0x3ca00000<<1) { /* |x| < 2**-53 */ if(two54+x>zero /* raise inexact */
/* underflow if subnormal */ &&ax<0x3c900000) /* |x| < 2**-54 */
if ((hx&0x7ff00000) == 0) return x;
FORCE_EVAL((float)x); else
return x; return x - x*x*0.5;
} }
if (hx <= 0xbfd2bec4) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */ if(hx>0||hx<=((int32_t)0xbfd2bec4)) {
k = 0; k=0;f=x;hu=1;} /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
c = 0;
f = x;
}
} else if (hx >= 0x7ff00000)
return x;
if (k) {
u.f = 1 + x;
hu = u.i>>32;
hu += 0x3ff00000 - 0x3fe6a09e;
k = (int)(hu>>20) - 0x3ff;
/* correction term ~ log(1+x)-log(u), avoid underflow in c/u */
if (k < 54) {
c = k >= 2 ? 1-(u.f-x) : x-(u.f-1);
c /= u.f;
} else
c = 0;
/* reduce u into [sqrt(2)/2, sqrt(2)] */
hu = (hu&0x000fffff) + 0x3fe6a09e;
u.i = (uint64_t)hu<<32 | (u.i&0xffffffff);
f = u.f - 1;
} }
hfsq = 0.5*f*f; if (hx >= 0x7ff00000) return x+x;
s = f/(2.0+f); if(k!=0) {
if(hx<0x43400000) {
STRICT_ASSIGN(double,u,1.0+x);
GET_HIGH_WORD(hu,u);
k = (hu>>20)-1023;
c = (k>0)? 1.0-(u-x):x-(u-1.0);/* correction term */
c /= u;
} else {
u = x;
GET_HIGH_WORD(hu,u);
k = (hu>>20)-1023;
c = 0;
}
hu &= 0x000fffff;
/*
* The approximation to sqrt(2) used in thresholds is not
* critical. However, the ones used above must give less
* strict bounds than the one here so that the k==0 case is
* never reached from here, since here we have committed to
* using the correction term but don't use it if k==0.
*/
if(hu<0x6a09e) { /* u ~< sqrt(2) */
SET_HIGH_WORD(u,hu|0x3ff00000); /* normalize u */
} else {
k += 1;
SET_HIGH_WORD(u,hu|0x3fe00000); /* normalize u/2 */
hu = (0x00100000-hu)>>2;
}
f = u-1.0;
}
hfsq=0.5*f*f;
if(hu==0) { /* |f| < 2**-20 */
if(f==zero) {
if(k==0) {
return zero;
} else {
c += k*ln2_lo;
return k*ln2_hi+c;
}
}
R = hfsq*(1.0-0.66666666666666666*f);
if(k==0) return f-R; else
return k*ln2_hi-((R-(k*ln2_lo+c))-f);
}
s = f/(2.0+f);
z = s*s; z = s*s;
w = z*z; R = z*(Lp1+z*(Lp2+z*(Lp3+z*(Lp4+z*(Lp5+z*(Lp6+z*Lp7))))));
t1 = w*(Lg2+w*(Lg4+w*Lg6)); if(k==0) return f-(hfsq-s*(hfsq+R)); else
t2 = z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7))); return k*ln2_hi-((hfsq-(s*(hfsq+R)+(k*ln2_lo+c)))-f);
R = t2 + t1;
dk = k;
return s*(hfsq+R) + (dk*ln2_lo+c) - hfsq + f + dk*ln2_hi;
} }
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 #if (LDBL_MANT_DIG == 53)
__weak_reference(log1p, log1pl); __weak_reference(log1p, log1pl);
#endif #endif

View file

@ -1,175 +1,133 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ /*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8 :vi vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Optimized Routines Copyright (c) 1992-2024 The FreeBSD Project
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 1993 Sun Microsystems, Inc.
All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining Redistribution and use in source and binary forms, with or without
a copy of this software and associated documentation files (the modification, are permitted provided that the following conditions
"Software"), to deal in the Software without restriction, including are met:
without limitation the rights to use, copy, modify, merge, publish, 1. Redistributions of source code must retain the above copyright
distribute, sublicense, and/or sell copies of the Software, and to notice, this list of conditions and the following disclaimer.
permit persons to whom the Software is furnished to do so, subject to 2. Redistributions in binary form must reproduce the above copyright
the following conditions: notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
The above copyright notice and this permission notice shall be THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
included in all copies or substantial portions of the Software. ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
*/ */
#include "libc/intrin/likely.h" #include "libc/tinymath/freebsd.internal.h"
#include "libc/math.h" __static_yoink("freebsd_libm_notice");
#include "libc/tinymath/internal.h" __static_yoink("fdlibm_notice");
#include "libc/tinymath/log1pf_data.internal.h"
__static_yoink("arm_optimized_routines_notice");
#define Ln2 (0x1.62e43p-1f) /* s_log1pf.c -- float version of s_log1p.c.
#define SignMask (0x80000000) * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
*/
/* Biased exponent of the largest float m for which m^8 underflows. */ static const float
#define M8UFLOW_BOUND_BEXP 112 ln2_hi = 6.9313812256e-01, /* 0x3f317180 */
/* Biased exponent of the largest float for which we just return x. */ ln2_lo = 9.0580006145e-06, /* 0x3717f7d1 */
#define TINY_BOUND_BEXP 103 two25 = 3.355443200e+07, /* 0x4c000000 */
Lp1 = 6.6666668653e-01, /* 3F2AAAAB */
Lp2 = 4.0000000596e-01, /* 3ECCCCCD */
Lp3 = 2.8571429849e-01, /* 3E924925 */
Lp4 = 2.2222198546e-01, /* 3E638E29 */
Lp5 = 1.8183572590e-01, /* 3E3A3325 */
Lp6 = 1.5313838422e-01, /* 3E1CD04F */
Lp7 = 1.4798198640e-01; /* 3E178897 */
#define C(i) __log1pf_data.coeffs[i] static const float zero = 0.0;
static volatile float vzero = 0.0;
static inline float /**
eval_poly (float m, uint32_t e) * Returns log(1 + x).
{ */
#ifdef LOG1PF_2U5
/* 2.5 ulp variant. Approximate log(1+m) on [-0.25, 0.5] using
slightly modified Estrin scheme (no x^0 term, and x term is just x). */
float p_12 = fmaf (m, C (1), C (0));
float p_34 = fmaf (m, C (3), C (2));
float p_56 = fmaf (m, C (5), C (4));
float p_78 = fmaf (m, C (7), C (6));
float m2 = m * m;
float p_02 = fmaf (m2, p_12, m);
float p_36 = fmaf (m2, p_56, p_34);
float p_79 = fmaf (m2, C (8), p_78);
float m4 = m2 * m2;
float p_06 = fmaf (m4, p_36, p_02);
if (UNLIKELY (e < M8UFLOW_BOUND_BEXP))
return p_06;
float m8 = m4 * m4;
return fmaf (m8, p_79, p_06);
#elif defined(LOG1PF_1U3)
/* 1.3 ulp variant. Approximate log(1+m) on [-0.25, 0.5] using Horner
scheme. Our polynomial approximation for log1p has the form
x + C1 * x^2 + C2 * x^3 + C3 * x^4 + ...
Hence approximation has the form m + m^2 * P(m)
where P(x) = C1 + C2 * x + C3 * x^2 + ... . */
return fmaf (m, m * HORNER_8 (m, C), m);
#else
#error No log1pf approximation exists with the requested precision. Options are 13 or 25.
#endif
}
static inline uint32_t
biased_exponent (uint32_t ix)
{
return (ix & 0x7f800000) >> 23;
}
/* log1pf approximation using polynomial on reduced interval. Worst-case error
when using Estrin is roughly 2.02 ULP:
log1pf(0x1.21e13ap-2) got 0x1.fe8028p-3 want 0x1.fe802cp-3. */
float float
log1pf (float x) log1pf(float x)
{ {
uint32_t ix = asuint (x); float hfsq,f,c,s,z,R,u;
uint32_t ia = ix & ~SignMask; int32_t k,hx,hu,ax;
uint32_t ia12 = ia >> 20;
uint32_t e = biased_exponent (ix);
/* Handle special cases first. */ GET_FLOAT_WORD(hx,x);
if (UNLIKELY (ia12 >= 0x7f8 || ix >= 0xbf800000 || ix == 0x80000000 ax = hx&0x7fffffff;
|| e <= TINY_BOUND_BEXP))
{ k = 1;
if (ix == 0xff800000) if (hx < 0x3ed413d0) { /* 1+x < sqrt(2)+ */
{ if(ax>=0x3f800000) { /* x <= -1.0 */
/* x == -Inf => log1pf(x) = NaN. */ if(x==(float)-1.0) return -two25/vzero; /* log1p(-1)=+inf */
return NAN; else return (x-x)/(x-x); /* log1p(x<-1)=NaN */
}
if(ax<0x38000000) { /* |x| < 2**-15 */
if(two25+x>zero /* raise inexact */
&&ax<0x33800000) /* |x| < 2**-24 */
return x;
else
return x - x*x*(float)0.5;
}
if(hx>0||hx<=((int32_t)0xbe95f619)) {
k=0;f=x;hu=1;} /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
} }
if ((ix == 0x7f800000 || e <= TINY_BOUND_BEXP) && ia12 <= 0x7f8) if (hx >= 0x7f800000) return x+x;
{ if(k!=0) {
/* |x| < TinyBound => log1p(x) = x. if(hx<0x5a000000) {
x == Inf => log1pf(x) = Inf. */ STRICT_ASSIGN(float,u,(float)1.0+x);
return x; GET_FLOAT_WORD(hu,u);
k = (hu>>23)-127;
/* correction term */
c = (k>0)? (float)1.0-(u-x):x-(u-(float)1.0);
c /= u;
} else {
u = x;
GET_FLOAT_WORD(hu,u);
k = (hu>>23)-127;
c = 0;
}
hu &= 0x007fffff;
/*
* The approximation to sqrt(2) used in thresholds is not
* critical. However, the ones used above must give less
* strict bounds than the one here so that the k==0 case is
* never reached from here, since here we have committed to
* using the correction term but don't use it if k==0.
*/
if(hu<0x3504f4) { /* u < sqrt(2) */
SET_FLOAT_WORD(u,hu|0x3f800000);/* normalize u */
} else {
k += 1;
SET_FLOAT_WORD(u,hu|0x3f000000); /* normalize u/2 */
hu = (0x00800000-hu)>>2;
}
f = u-(float)1.0;
} }
if (ix == 0xbf800000) hfsq=(float)0.5*f*f;
{ if(hu==0) { /* |f| < 2**-20 */
/* x == -1.0 => log1pf(x) = -Inf. */ if(f==zero) {
return __math_divzerof (-1); if(k==0) {
return zero;
} else {
c += k*ln2_lo;
return k*ln2_hi+c;
}
}
R = hfsq*((float)1.0-(float)0.66666666666666666*f);
if(k==0) return f-R; else
return k*ln2_hi-((R-(k*ln2_lo+c))-f);
} }
if (ia12 >= 0x7f8) s = f/((float)2.0+f);
{ z = s*s;
/* x == +/-NaN => log1pf(x) = NaN. */ R = z*(Lp1+z*(Lp2+z*(Lp3+z*(Lp4+z*(Lp5+z*(Lp6+z*Lp7))))));
return __math_invalidf (asfloat (ia)); if(k==0) return f-(hfsq-s*(hfsq+R)); else
} return k*ln2_hi-((hfsq-(s*(hfsq+R)+(k*ln2_lo+c)))-f);
/* x < -1.0 => log1pf(x) = NaN. */
return __math_invalidf (x);
}
/* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m
is in [-0.25, 0.5]):
log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2).
We approximate log1p(m) with a polynomial, then scale by
k*log(2). Instead of doing this directly, we use an intermediate
scale factor s = 4*k*log(2) to ensure the scale is representable
as a normalised fp32 number. */
if (ix <= 0x3f000000 || ia <= 0x3e800000)
{
/* If x is in [-0.25, 0.5] then we can shortcut all the logic
below, as k = 0 and m = x. All we need is to return the
polynomial. */
return eval_poly (x, e);
}
float m = x + 1.0f;
/* k is used scale the input. 0x3f400000 is chosen as we are trying to
reduce x to the range [-0.25, 0.5]. Inside this range, k is 0.
Outside this range, if k is reinterpreted as (NOT CONVERTED TO) float:
let k = sign * 2^p where sign = -1 if x < 0
1 otherwise
and p is a negative integer whose magnitude increases with the
magnitude of x. */
int k = (asuint (m) - 0x3f400000) & 0xff800000;
/* By using integer arithmetic, we obtain the necessary scaling by
subtracting the unbiased exponent of k from the exponent of x. */
float m_scale = asfloat (asuint (x) - k);
/* Scale up to ensure that the scale factor is representable as normalised
fp32 number (s in [2**-126,2**26]), and scale m down accordingly. */
float s = asfloat (asuint (4.0f) - k);
m_scale = m_scale + fmaf (0.25f, s, -1.0f);
float p = eval_poly (m_scale, biased_exponent (asuint (m_scale)));
/* The scale factor to be applied back at the end - by multiplying float(k)
by 2^-23 we get the unbiased exponent of k. */
float scale_back = (float) k * 0x1.0p-23f;
/* Apply the scaling back. */
return fmaf (scale_back, Ln2, p);
} }

View file

@ -1,13 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_LOG1PF_DATA_H_
#define COSMOPOLITAN_LIBC_TINYMATH_LOG1PF_DATA_H_
COSMOPOLITAN_C_START_
#define LOG1PF_2U5
#define V_LOG1PF_2U5
#define LOG1PF_NCOEFFS 9
extern const struct log1pf_data {
float coeffs[LOG1PF_NCOEFFS]; //
} __log1pf_data;
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_TINYMATH_LOG1PF_DATA_H_ */

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines Optimized Routines
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the
@ -25,20 +25,9 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "libc/intrin/likely.h" #include "libc/tinymath/arm.internal.h"
#include "libc/math.h"
#include "libc/tinymath/complex.internal.h"
#include "libc/tinymath/internal.h"
#include "libc/tinymath/log2_data.internal.h"
__static_yoink("arm_optimized_routines_notice"); __static_yoink("arm_optimized_routines_notice");
/*
* Double-precision log2(x) function.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#define T __log2_data.tab #define T __log2_data.tab
#define T2 __log2_data.tab2 #define T2 __log2_data.tab2
#define B __log2_data.poly1 #define B __log2_data.poly1
@ -49,110 +38,126 @@ __static_yoink("arm_optimized_routines_notice");
#define OFF 0x3fe6000000000000 #define OFF 0x3fe6000000000000
/* Top 16 bits of a double. */ /* Top 16 bits of a double. */
static inline uint32_t top16(double x) static inline uint32_t
top16 (double x)
{ {
return asuint64(x) >> 48; return asuint64 (x) >> 48;
} }
/** /**
* Calculates log𝑥. * Returns base 2 logarithm of x.
*/ */
double log2(double x) double
log2 (double x)
{ {
double_t z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p; /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
uint64_t ix, iz, tmp; double_t z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p;
uint32_t top; uint64_t ix, iz, tmp;
int k, i; uint32_t top;
int k, i;
ix = asuint64(x); ix = asuint64 (x);
top = top16(x); top = top16 (x);
#define LO asuint64(1.0 - 0x1.5b51p-5)
#define HI asuint64(1.0 + 0x1.6ab2p-5) #if LOG2_POLY1_ORDER == 11
if (UNLIKELY(ix - LO < HI - LO)) { # define LO asuint64 (1.0 - 0x1.5b51p-5)
/* Handle close to 1.0 inputs separately. */ # define HI asuint64 (1.0 + 0x1.6ab2p-5)
/* Fix sign of zero with downward rounding when x==1. */
if (WANT_ROUNDING && UNLIKELY(ix == asuint64(1.0)))
return 0;
r = x - 1.0;
#if __FP_FAST_FMA
hi = r * InvLn2hi;
lo = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -hi);
#else
double_t rhi, rlo;
rhi = asdouble(asuint64(r) & -1ULL << 32);
rlo = r - rhi;
hi = rhi * InvLn2hi;
lo = rlo * InvLn2hi + r * InvLn2lo;
#endif #endif
r2 = r * r; /* rounding error: 0x1p-62. */ if (unlikely (ix - LO < HI - LO))
r4 = r2 * r2; {
/* Worst-case error is less than 0.54 ULP (0.55 ULP without fma). */ /* Handle close to 1.0 inputs separately. */
p = r2 * (B[0] + r * B[1]); /* Fix sign of zero with downward rounding when x==1. */
y = hi + p; if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
lo += hi - y + p; return 0;
lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5]) + r = x - 1.0;
r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9]))); #if HAVE_FAST_FMA
y += lo; hi = r * InvLn2hi;
return eval_as_double(y); lo = r * InvLn2lo + fma (r, InvLn2hi, -hi);
}
if (UNLIKELY(top - 0x0010 >= 0x7ff0 - 0x0010)) {
/* x < 0x1p-1022 or inf or nan. */
if (ix * 2 == 0)
return __math_divzero(1);
if (ix == asuint64(INFINITY)) /* log(inf) == inf. */
return x;
if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
return __math_invalid(x);
/* x is subnormal, normalize it. */
ix = asuint64(x * 0x1p52);
ix -= 52ULL << 52;
}
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (52 - LOG2_TABLE_BITS)) % N;
k = (int64_t)tmp >> 52; /* arithmetic shift */
iz = ix - (tmp & 0xfffULL << 52);
invc = T[i].invc;
logc = T[i].logc;
z = asdouble(iz);
kd = (double_t)k;
/* log2(x) = log2(z/c) + log2(c) + k. */
/* r ~= z/c - 1, |r| < 1/(2*N). */
#if __FP_FAST_FMA
/* rounding error: 0x1p-55/N. */
r = __builtin_fma(z, invc, -1.0);
t1 = r * InvLn2hi;
t2 = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -t1);
#else #else
double_t rhi, rlo; double_t rhi, rlo;
/* rounding error: 0x1p-55/N + 0x1p-65. */ rhi = asdouble (asuint64 (r) & -1ULL << 32);
r = (z - T2[i].chi - T2[i].clo) * invc; rlo = r - rhi;
rhi = asdouble(asuint64(r) & -1ULL << 32); hi = rhi * InvLn2hi;
rlo = r - rhi; lo = rlo * InvLn2hi + r * InvLn2lo;
t1 = rhi * InvLn2hi; #endif
t2 = rlo * InvLn2hi + r * InvLn2lo; r2 = r * r; /* rounding error: 0x1p-62. */
r4 = r2 * r2;
#if LOG2_POLY1_ORDER == 11
/* Worst-case error is less than 0.54 ULP (0.55 ULP without fma). */
p = r2 * (B[0] + r * B[1]);
y = hi + p;
lo += hi - y + p;
lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5])
+ r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9])));
y += lo;
#endif
return eval_as_double (y);
}
if (unlikely (top - 0x0010 >= 0x7ff0 - 0x0010))
{
/* x < 0x1p-1022 or inf or nan. */
if (ix * 2 == 0)
return __math_divzero (1);
if (ix == asuint64 (INFINITY)) /* log(inf) == inf. */
return x;
if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
return __math_invalid (x);
/* x is subnormal, normalize it. */
ix = asuint64 (x * 0x1p52);
ix -= 52ULL << 52;
}
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (52 - LOG2_TABLE_BITS)) % N;
k = (int64_t) tmp >> 52; /* arithmetic shift */
iz = ix - (tmp & 0xfffULL << 52);
invc = T[i].invc;
logc = T[i].logc;
z = asdouble (iz);
kd = (double_t) k;
/* log2(x) = log2(z/c) + log2(c) + k. */
/* r ~= z/c - 1, |r| < 1/(2*N). */
#if HAVE_FAST_FMA
/* rounding error: 0x1p-55/N. */
r = fma (z, invc, -1.0);
t1 = r * InvLn2hi;
t2 = r * InvLn2lo + fma (r, InvLn2hi, -t1);
#else
double_t rhi, rlo;
/* rounding error: 0x1p-55/N + 0x1p-65. */
r = (z - T2[i].chi - T2[i].clo) * invc;
rhi = asdouble (asuint64 (r) & -1ULL << 32);
rlo = r - rhi;
t1 = rhi * InvLn2hi;
t2 = rlo * InvLn2hi + r * InvLn2lo;
#endif #endif
/* hi + lo = r/ln2 + log2(c) + k. */ /* hi + lo = r/ln2 + log2(c) + k. */
t3 = kd + logc; t3 = kd + logc;
hi = t3 + t1; hi = t3 + t1;
lo = t3 - hi + t1 + t2; lo = t3 - hi + t1 + t2;
/* log2(r+1) = r/ln2 + r^2*poly(r). */ /* log2(r+1) = r/ln2 + r^2*poly(r). */
/* Evaluation is optimized assuming superscalar pipelined execution. */ /* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r * r; /* rounding error: 0x1p-54/N^2. */ r2 = r * r; /* rounding error: 0x1p-54/N^2. */
r4 = r2 * r2; r4 = r2 * r2;
/* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma). #if LOG2_POLY_ORDER == 7
~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma). */ /* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]); ~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma). */
y = lo + r2 * p + hi; p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]);
return eval_as_double(y); y = lo + r2 * p + hi;
#endif
return eval_as_double (y);
} }
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 #if USE_GLIBC_ABI
__weak_reference(log2, log2l); strong_alias (log2, __log2_finite)
hidden_alias (log2, __ieee754_log2)
# if LDBL_MANT_DIG == 53
long double log2l (long double x) { return log2 (x); }
# endif
#endif #endif

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Musl Libc Optimized Routines
Copyright © 2005-2014 Rich Felker, et al. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the
@ -25,14 +25,8 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "libc/tinymath/log2_data.internal.h" #include "libc/tinymath/arm.internal.h"
__static_yoink("arm_optimized_routines_notice");
/*
* Data for log2.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#define N (1 << LOG2_TABLE_BITS) #define N (1 << LOG2_TABLE_BITS)
@ -41,6 +35,7 @@ const struct log2_data __log2_data = {
.invln2hi = 0x1.7154765200000p+0, .invln2hi = 0x1.7154765200000p+0,
.invln2lo = 0x1.705fc2eefa200p-33, .invln2lo = 0x1.705fc2eefa200p-33,
.poly1 = { .poly1 = {
#if LOG2_POLY1_ORDER == 11
// relative error: 0x1.2fad8188p-63 // relative error: 0x1.2fad8188p-63
// in -0x1.5b51p-5 0x1.6ab2p-5 // in -0x1.5b51p-5 0x1.6ab2p-5
-0x1.71547652b82fep-1, -0x1.71547652b82fep-1,
@ -53,8 +48,10 @@ const struct log2_data __log2_data = {
0x1.484d154f01b4ap-3, 0x1.484d154f01b4ap-3,
-0x1.289e4a72c383cp-3, -0x1.289e4a72c383cp-3,
0x1.0b32f285aee66p-3, 0x1.0b32f285aee66p-3,
#endif
}, },
.poly = { .poly = {
#if N == 64 && LOG2_POLY_ORDER == 7
// relative error: 0x1.a72c2bf8p-58 // relative error: 0x1.a72c2bf8p-58
// abs error: 0x1.67a552c8p-66 // abs error: 0x1.67a552c8p-66
// in -0x1.f45p-8 0x1.f45p-8 // in -0x1.f45p-8 0x1.f45p-8
@ -64,6 +61,7 @@ const struct log2_data __log2_data = {
0x1.2776c50034c48p-2, 0x1.2776c50034c48p-2,
-0x1.ec7b328ea92bcp-3, -0x1.ec7b328ea92bcp-3,
0x1.a6225e117f92ep-3, 0x1.a6225e117f92ep-3,
#endif
}, },
/* Algorithm: /* Algorithm:
@ -92,6 +90,7 @@ single rounding error when there is no fast fma for z*invc - 1, 3) ensures
that logc + poly(z/c - 1) has small error, however near x == 1 when that logc + poly(z/c - 1) has small error, however near x == 1 when
|log2(x)| < 0x1p-4, this is not enough so that is special cased. */ |log2(x)| < 0x1p-4, this is not enough so that is special cased. */
.tab = { .tab = {
#if N == 64
{0x1.724286bb1acf8p+0, -0x1.1095feecdb000p-1}, {0x1.724286bb1acf8p+0, -0x1.1095feecdb000p-1},
{0x1.6e1f766d2cca1p+0, -0x1.08494bd76d000p-1}, {0x1.6e1f766d2cca1p+0, -0x1.08494bd76d000p-1},
{0x1.6a13d0e30d48ap+0, -0x1.00143aee8f800p-1}, {0x1.6a13d0e30d48ap+0, -0x1.00143aee8f800p-1},
@ -156,9 +155,11 @@ that logc + poly(z/c - 1) has small error, however near x == 1 when
{0x1.7f405ffc61022p-1, 0x1.abe186ed3d000p-2}, {0x1.7f405ffc61022p-1, 0x1.abe186ed3d000p-2},
{0x1.7ad22181415cap-1, 0x1.bd0f2aea0e000p-2}, {0x1.7ad22181415cap-1, 0x1.bd0f2aea0e000p-2},
{0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2}, {0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2},
#endif
}, },
#if !__FP_FAST_FMA #if !HAVE_FAST_FMA
.tab2 = { .tab2 = {
# if N == 64
{0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55}, {0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55},
{0x1.66000045734a6p-1, 0x1.1ff9bea62f7a9p-57}, {0x1.66000045734a6p-1, 0x1.1ff9bea62f7a9p-57},
{0x1.69fffc325f2c5p-1, 0x1.27ecfcb3c90bap-55}, {0x1.69fffc325f2c5p-1, 0x1.27ecfcb3c90bap-55},
@ -223,6 +224,7 @@ that logc + poly(z/c - 1) has small error, however near x == 1 when
{0x1.55ffffd87b36fp+0, -0x1.709e731d02807p-55}, {0x1.55ffffd87b36fp+0, -0x1.709e731d02807p-55},
{0x1.59ffff21df7bap+0, 0x1.7f79f68727b02p-55}, {0x1.59ffff21df7bap+0, 0x1.7f79f68727b02p-55},
{0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54}, {0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54},
# endif
}, },
#endif #endif /* !HAVE_FAST_FMA */
}; };

View file

@ -1,26 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_LOG2_DATA_INTERNAL_H_
#define COSMOPOLITAN_LIBC_TINYMATH_LOG2_DATA_INTERNAL_H_
#define LOG2_TABLE_BITS 6
#define LOG2_POLY_ORDER 7
#define LOG2_POLY1_ORDER 11
COSMOPOLITAN_C_START_
extern const struct log2_data {
double invln2hi;
double invln2lo;
double poly[LOG2_POLY_ORDER - 1];
double poly1[LOG2_POLY1_ORDER - 1];
struct {
double invc, logc;
} tab[1 << LOG2_TABLE_BITS];
#if !__FP_FAST_FMA
struct {
double chi, clo;
} tab2[1 << LOG2_TABLE_BITS];
#endif
} __log2_data;
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_TINYMATH_LOG2_DATA_INTERNAL_H_ */

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines Optimized Routines
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the
@ -25,20 +25,9 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "libc/intrin/likely.h" #include "libc/tinymath/arm.internal.h"
#include "libc/math.h"
#include "libc/tinymath/complex.internal.h"
#include "libc/tinymath/internal.h"
#include "libc/tinymath/log2f_data.internal.h"
__static_yoink("arm_optimized_routines_notice"); __static_yoink("arm_optimized_routines_notice");
/*
* Single-precision log2 function.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
/* /*
LOG2F_TABLE_BITS = 4 LOG2F_TABLE_BITS = 4
LOG2F_POLY_ORDER = 4 LOG2F_POLY_ORDER = 4
@ -53,52 +42,65 @@ Relative error: 1.9 * 2^-26 (before rounding.)
#define OFF 0x3f330000 #define OFF 0x3f330000
/** /**
* Calculates log𝑥. * Returns base-2 logarithm of x.
*
* - ULP error: 0.752 (nearest rounding.)
* - Relative error: 1.9 * 2^-26 (before rounding.)
*/ */
float log2f(float x) float
log2f (float x)
{ {
double_t z, r, r2, p, y, y0, invc, logc; /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
uint32_t ix, iz, top, tmp; double_t z, r, r2, p, y, y0, invc, logc;
int k, i; uint32_t ix, iz, top, tmp;
int k, i;
ix = asuint(x); ix = asuint (x);
/* Fix sign of zero with downward rounding when x==1. */ #if WANT_ROUNDING
if (WANT_ROUNDING && UNLIKELY(ix == 0x3f800000)) /* Fix sign of zero with downward rounding when x==1. */
return 0; if (unlikely (ix == 0x3f800000))
if (UNLIKELY(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) { return 0;
/* x < 0x1p-126 or inf or nan. */ #endif
if (ix * 2 == 0) if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000))
return __math_divzerof(1); {
if (ix == 0x7f800000) /* log2(inf) == inf. */ /* x < 0x1p-126 or inf or nan. */
return x; if (ix * 2 == 0)
if ((ix & 0x80000000) || ix * 2 >= 0xff000000) return __math_divzerof (1);
return __math_invalidf(x); if (ix == 0x7f800000) /* log2(inf) == inf. */
/* x is subnormal, normalize it. */ return x;
ix = asuint(x * 0x1p23f); if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
ix -= 23 << 23; return __math_invalidf (x);
} /* x is subnormal, normalize it. */
ix = asuint (x * 0x1p23f);
ix -= 23 << 23;
}
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact. /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
The range is split into N subintervals. The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */ The ith subinterval contains z and c is near its center. */
tmp = ix - OFF; tmp = ix - OFF;
i = (tmp >> (23 - LOG2F_TABLE_BITS)) % N; i = (tmp >> (23 - LOG2F_TABLE_BITS)) % N;
top = tmp & 0xff800000; top = tmp & 0xff800000;
iz = ix - top; iz = ix - top;
k = (int32_t)tmp >> 23; /* arithmetic shift */ k = (int32_t) tmp >> 23; /* arithmetic shift */
invc = T[i].invc; invc = T[i].invc;
logc = T[i].logc; logc = T[i].logc;
z = (double_t)asfloat(iz); z = (double_t) asfloat (iz);
/* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */ /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
r = z * invc - 1; r = z * invc - 1;
y0 = logc + (double_t)k; y0 = logc + (double_t) k;
/* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */ /* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
r2 = r * r; r2 = r * r;
y = A[1] * r + A[2]; y = A[1] * r + A[2];
y = A[0] * r2 + y; y = A[0] * r2 + y;
p = A[3] * r + y0; p = A[3] * r + y0;
y = y * r2 + p; y = y * r2 + p;
return eval_as_float(y); return eval_as_float (y);
} }
#if USE_GLIBC_ABI
strong_alias (log2f, __log2f_finite)
hidden_alias (log2f, __ieee754_log2f)
#endif

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines Optimized Routines
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the
@ -25,16 +25,9 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "libc/tinymath/log2f_data.internal.h" #include "libc/tinymath/arm.internal.h"
__static_yoink("arm_optimized_routines_notice"); __static_yoink("arm_optimized_routines_notice");
/*
* Data definition for log2f.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
const struct log2f_data __log2f_data = { const struct log2f_data __log2f_data = {
.tab = { .tab = {
{ 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 }, { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 },

View file

@ -1,17 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_LOG2F_DATA_INTERNAL_H_
#define COSMOPOLITAN_LIBC_TINYMATH_LOG2F_DATA_INTERNAL_H_
#define LOG2F_TABLE_BITS 4
#define LOG2F_POLY_ORDER 4
COSMOPOLITAN_C_START_
extern const struct log2f_data {
struct {
double invc, logc;
} tab[1 << LOG2F_TABLE_BITS];
double poly[LOG2F_POLY_ORDER];
} __log2f_data;
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_TINYMATH_LOG2F_DATA_INTERNAL_H_ */

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines Optimized Routines
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the
@ -25,22 +25,41 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "libc/tinymath/log_data.internal.h" #include "libc/tinymath/arm.internal.h"
__static_yoink("arm_optimized_routines_notice"); __static_yoink("arm_optimized_routines_notice");
/*
* Data for log.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#define N (1 << LOG_TABLE_BITS) #define N (1 << LOG_TABLE_BITS)
const struct log_data __log_data = { const struct log_data __log_data = {
.ln2hi = 0x1.62e42fefa3800p-1, .ln2hi = 0x1.62e42fefa3800p-1,
.ln2lo = 0x1.ef35793c76730p-45, .ln2lo = 0x1.ef35793c76730p-45,
.poly1 = { .poly1 = {
#if LOG_POLY1_ORDER == 10
// relative error: 0x1.32eccc6p-62
// in -0x1p-5 0x1.1p-5 (|log(1+x)| > 0x1p-5 outside this interval)
-0x1p-1,
0x1.55555555554e5p-2,
-0x1.0000000000af2p-2,
0x1.9999999bbe436p-3,
-0x1.55555537f9cdep-3,
0x1.24922fc8127cfp-3,
-0x1.0000b7d6bb612p-3,
0x1.c806ee1ddbcafp-4,
-0x1.972335a9c2d6ep-4,
#elif LOG_POLY1_ORDER == 11
// relative error: 0x1.52c8b708p-68
// in -0x1p-5 0x1.1p-5 (|log(1+x)| > 0x1p-5 outside this interval)
-0x1p-1,
0x1.5555555555555p-2,
-0x1.ffffffffffea9p-3,
0x1.999999999c4d4p-3,
-0x1.55555557f5541p-3,
0x1.249248fbe33e4p-3,
-0x1.ffffc9a3c825bp-4,
0x1.c71e1f204435dp-4,
-0x1.9a7f26377d06ep-4,
0x1.71c30cf8f7364p-4,
#elif LOG_POLY1_ORDER == 12
// relative error: 0x1.c04d76cp-63 // relative error: 0x1.c04d76cp-63
// in -0x1p-4 0x1.09p-4 (|log(1+x)| > 0x1p-4 outside the interval) // in -0x1p-4 0x1.09p-4 (|log(1+x)| > 0x1p-4 outside the interval)
-0x1p-1, -0x1p-1,
@ -54,8 +73,20 @@ const struct log_data __log_data = {
-0x1.999eb43b068ffp-4, -0x1.999eb43b068ffp-4,
0x1.78182f7afd085p-4, 0x1.78182f7afd085p-4,
-0x1.5521375d145cdp-4, -0x1.5521375d145cdp-4,
#endif
}, },
.poly = { .poly = {
#if N == 64 && LOG_POLY_ORDER == 7
// relative error: 0x1.906eb8ap-58
// abs error: 0x1.d2cad5a8p-67
// in -0x1.fp-8 0x1.fp-8
-0x1.0000000000027p-1,
0x1.555555555556ap-2,
-0x1.fffffff0440bap-3,
0x1.99999991906c3p-3,
-0x1.555c8d7e8201ep-3,
0x1.24978c59151fap-3,
#elif N == 128 && LOG_POLY_ORDER == 6
// relative error: 0x1.926199e8p-56 // relative error: 0x1.926199e8p-56
// abs error: 0x1.882ff33p-65 // abs error: 0x1.882ff33p-65
// in -0x1.fp-9 0x1.fp-9 // in -0x1.fp-9 0x1.fp-9
@ -64,6 +95,17 @@ const struct log_data __log_data = {
-0x1.fffffffeb459p-3, -0x1.fffffffeb459p-3,
0x1.999b324f10111p-3, 0x1.999b324f10111p-3,
-0x1.55575e506c89fp-3, -0x1.55575e506c89fp-3,
#elif N == 128 && LOG_POLY_ORDER == 7
// relative error: 0x1.649fc4bp-64
// abs error: 0x1.c3b5769p-74
// in -0x1.fp-9 0x1.fp-9
-0x1.0000000000001p-1,
0x1.5555555555556p-2,
-0x1.fffffffea1a8p-3,
0x1.99999998e9139p-3,
-0x1.555776801b968p-3,
0x1.2493c29331a5cp-3,
#endif
}, },
/* Algorithm: /* Algorithm:
@ -92,6 +134,72 @@ a single rounding error when there is no fast fma for z*invc - 1, 3) ensures
that logc + poly(z/c - 1) has small error, however near x == 1 when that logc + poly(z/c - 1) has small error, however near x == 1 when
|log(x)| < 0x1p-4, this is not enough so that is special cased. */ |log(x)| < 0x1p-4, this is not enough so that is special cased. */
.tab = { .tab = {
#if N == 64
{0x1.7242886495cd8p+0, -0x1.79e267bdfe000p-2},
{0x1.6e1f769340dc9p+0, -0x1.6e60ee0ecb000p-2},
{0x1.6a13ccc8f195cp+0, -0x1.63002fdbf6000p-2},
{0x1.661ec72e86f3ap+0, -0x1.57bf76c597000p-2},
{0x1.623fa6c447b16p+0, -0x1.4c9e07f0d2000p-2},
{0x1.5e75bbca31702p+0, -0x1.419b42f027000p-2},
{0x1.5ac05655adb10p+0, -0x1.36b67660e6000p-2},
{0x1.571ed3e940191p+0, -0x1.2bef0839e4800p-2},
{0x1.539094ac0fbbfp+0, -0x1.21445727cb000p-2},
{0x1.5015007e7fc42p+0, -0x1.16b5ca3c3d000p-2},
{0x1.4cab877c31cf9p+0, -0x1.0c42d3805f800p-2},
{0x1.49539e76a88d3p+0, -0x1.01eae61b60800p-2},
{0x1.460cbc12211dap+0, -0x1.ef5adb9fb0000p-3},
{0x1.42d6624debe3ap+0, -0x1.db13daab99000p-3},
{0x1.3fb0144f0d462p+0, -0x1.c6ffbe896e000p-3},
{0x1.3c995a1f9a9b4p+0, -0x1.b31d84722d000p-3},
{0x1.3991c23952500p+0, -0x1.9f6c3cf6eb000p-3},
{0x1.3698df35eaa14p+0, -0x1.8beafe7f13000p-3},
{0x1.33ae463091760p+0, -0x1.7898db878d000p-3},
{0x1.30d190aae3d72p+0, -0x1.6574efe4ec000p-3},
{0x1.2e025c9203c89p+0, -0x1.527e620845000p-3},
{0x1.2b404a7244988p+0, -0x1.3fb457d798000p-3},
{0x1.288b01dc19544p+0, -0x1.2d1615a077000p-3},
{0x1.25e2268085f69p+0, -0x1.1aa2b431e5000p-3},
{0x1.23456812abb74p+0, -0x1.08598f1d2b000p-3},
{0x1.20b4703174157p+0, -0x1.ec738fee40000p-4},
{0x1.1e2ef308b4e9bp+0, -0x1.c885768862000p-4},
{0x1.1bb4a36b70a3fp+0, -0x1.a4e75b6a46000p-4},
{0x1.194538e960658p+0, -0x1.8197efba9a000p-4},
{0x1.16e0692a10ac8p+0, -0x1.5e95ad734e000p-4},
{0x1.1485f1ba1568bp+0, -0x1.3bdf67117c000p-4},
{0x1.12358e123ed6fp+0, -0x1.1973b744f0000p-4},
{0x1.0fef01de37c8dp+0, -0x1.eea33446bc000p-5},
{0x1.0db20b82be414p+0, -0x1.aaef4ab304000p-5},
{0x1.0b7e6f67f69b3p+0, -0x1.67c962fd2c000p-5},
{0x1.0953f342fc108p+0, -0x1.252f29acf8000p-5},
{0x1.0732604ec956bp+0, -0x1.c63d19e9c0000p-6},
{0x1.051980117f9b0p+0, -0x1.432ab6a388000p-6},
{0x1.03091aa6810f1p+0, -0x1.8244357f50000p-7},
{0x1.01010152cf066p+0, -0x1.0080a711c0000p-8},
{0x1.fc07ef6b6e30bp-1, 0x1.fe03018e80000p-8},
{0x1.f4465aa1024afp-1, 0x1.7b91986450000p-6},
{0x1.ecc07a8fd3f5ep-1, 0x1.39e88608c8000p-5},
{0x1.e573ad856b537p-1, 0x1.b42dc6e624000p-5},
{0x1.de5d6dc7b8057p-1, 0x1.165372ec20000p-4},
{0x1.d77b6498bddf7p-1, 0x1.51b07a0170000p-4},
{0x1.d0cb580315c0fp-1, 0x1.8c3465c7ea000p-4},
{0x1.ca4b30d1cf449p-1, 0x1.c5e544a290000p-4},
{0x1.c3f8ef4810d8ep-1, 0x1.fec91aa0a6000p-4},
{0x1.bdd2b8b311f44p-1, 0x1.1b72acdc5c000p-3},
{0x1.b7d6c2eeac054p-1, 0x1.371fc65a98000p-3},
{0x1.b20363474c8f5p-1, 0x1.526e61c1aa000p-3},
{0x1.ac570165eeab1p-1, 0x1.6d60ffc240000p-3},
{0x1.a6d019f331df4p-1, 0x1.87fa08a013000p-3},
{0x1.a16d3ebc9e3c3p-1, 0x1.a23bc630c3000p-3},
{0x1.9c2d14567ef45p-1, 0x1.bc286a3512000p-3},
{0x1.970e4efae9169p-1, 0x1.d5c2195697000p-3},
{0x1.920fb3bd0b802p-1, 0x1.ef0ae132d3000p-3},
{0x1.8d3018b58699ap-1, 0x1.040259974e000p-2},
{0x1.886e5ff170ee6p-1, 0x1.1058bd40e2000p-2},
{0x1.83c977ad35d27p-1, 0x1.1c898c1137800p-2},
{0x1.7f405ed16c520p-1, 0x1.2895a3e65b000p-2},
{0x1.7ad220d0335c4p-1, 0x1.347dd8f6bd000p-2},
{0x1.767dce53474fdp-1, 0x1.4043083cb3800p-2},
#elif N == 128
{0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2}, {0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2},
{0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2}, {0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2},
{0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2}, {0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2},
@ -220,9 +328,76 @@ that logc + poly(z/c - 1) has small error, however near x == 1 when
{0x1.79baa679725c2p-1, 0x1.377266dec1800p-2}, {0x1.79baa679725c2p-1, 0x1.377266dec1800p-2},
{0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2}, {0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2},
{0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2}, {0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2},
#endif
}, },
#if !__FP_FAST_FMA #if !HAVE_FAST_FMA
.tab2 = { .tab2 = {
# if N == 64
{0x1.61ffff94c4fecp-1, -0x1.9fe4fc998f325p-56},
{0x1.66000020377ddp-1, 0x1.e804c7a9519f2p-55},
{0x1.6a00004c41678p-1, 0x1.902c675d9ecfep-55},
{0x1.6dffff7384f87p-1, -0x1.2fd6b95e55043p-56},
{0x1.720000b37216ep-1, 0x1.802bc8d437043p-55},
{0x1.75ffffbeb3c9dp-1, 0x1.6047ad0a0d4e4p-57},
{0x1.7a0000628daep-1, -0x1.e00434b49313dp-56},
{0x1.7dffffd7abd1ap-1, -0x1.6015f8a083576p-56},
{0x1.81ffffdf40c54p-1, 0x1.7f54bf76a42c9p-57},
{0x1.860000f334e11p-1, 0x1.60054cb5344d7p-56},
{0x1.8a0001238aca7p-1, 0x1.c03c9bd132f55p-57},
{0x1.8dffffb81d212p-1, -0x1.001e519f2764fp-55},
{0x1.92000086adc7cp-1, 0x1.1fe40f88f49c6p-55},
{0x1.960000135d8eap-1, -0x1.f832268dc3095p-55},
{0x1.99ffff9435acp-1, 0x1.7031d8b835edcp-56},
{0x1.9e00003478565p-1, -0x1.0030b221ce3eep-58},
{0x1.a20000b592948p-1, 0x1.8fd2f1dbd4639p-55},
{0x1.a600000ad0bcfp-1, 0x1.901d6a974e6bep-55},
{0x1.a9ffff55953a5p-1, 0x1.a07556192db98p-57},
{0x1.adffff29ce03dp-1, -0x1.fff0717ec71c2p-56},
{0x1.b1ffff34f3ac8p-1, 0x1.8005573de89d1p-57},
{0x1.b60000894c55bp-1, -0x1.ff2fb51b044c7p-57},
{0x1.b9fffef45ec7dp-1, -0x1.9ff7c4e8730fp-56},
{0x1.be0000cda7b2ap-1, 0x1.57d058dbf3c1dp-55},
{0x1.c1ffff2c57917p-1, 0x1.7e66d7e48dbc9p-58},
{0x1.c60000ea5b82ap-1, -0x1.47f5e132ed4bep-55},
{0x1.ca0001121ae98p-1, -0x1.40958c8d5e00ap-58},
{0x1.ce0000f9241cbp-1, -0x1.7da063caa81c8p-59},
{0x1.d1fffe8be95a4p-1, -0x1.82e3a411afcd9p-59},
{0x1.d5ffff035932bp-1, -0x1.00f901b3fe87dp-58},
{0x1.d9fffe8b54ba7p-1, 0x1.ffef55d6e3a4p-55},
{0x1.de0000ad95d19p-1, 0x1.5feb2efd4c7c7p-55},
{0x1.e1fffe925ce47p-1, 0x1.c8085484eaf08p-55},
{0x1.e5fffe3ddf853p-1, -0x1.fd5ed02c5cadp-60},
{0x1.e9fffed0a0e5fp-1, -0x1.a80aaef411586p-55},
{0x1.ee00008f82eep-1, -0x1.b000aeaf97276p-55},
{0x1.f20000a22d2f4p-1, -0x1.8f8906e13eba3p-56},
{0x1.f5fffee35b57dp-1, 0x1.1fdd33b2d3714p-57},
{0x1.fa00014eec3a6p-1, -0x1.3ee0b7a18c1a5p-58},
{0x1.fdffff5daa89fp-1, -0x1.c1e24c8e3b503p-58},
{0x1.0200005b93349p+0, -0x1.50197fe6bedcap-54},
{0x1.05ffff9d597acp+0, 0x1.20160d062d0dcp-55},
{0x1.0a00005687a63p+0, -0x1.27f3f9307696ep-54},
{0x1.0dffff779164ep+0, 0x1.b7eb40bb9c4f4p-54},
{0x1.12000044a0aa8p+0, 0x1.efbc914d512c4p-55},
{0x1.16000069685bcp+0, -0x1.c0bea3eb2d82cp-57},
{0x1.1a000093f0d78p+0, 0x1.1fecbf1e8c52p-54},
{0x1.1dffffb2b1457p+0, -0x1.3fc91365637d6p-55},
{0x1.2200008824a1p+0, -0x1.dff7e9feb578ap-54},
{0x1.25ffffeef953p+0, -0x1.b00a61ec912f7p-55},
{0x1.2a0000a1e7783p+0, 0x1.60048318b0483p-56},
{0x1.2e0000853d4c7p+0, -0x1.77fbedf2c8cf3p-54},
{0x1.320000324c55bp+0, 0x1.f81983997354fp-54},
{0x1.360000594f796p+0, -0x1.cfe4beff900a9p-54},
{0x1.3a0000a4c1c0fp+0, 0x1.07dbb2e268d0ep-54},
{0x1.3e0000751c61bp+0, 0x1.80583ed1c566ep-56},
{0x1.42000069e8a9fp+0, 0x1.f01f1edf82045p-54},
{0x1.460000b5a1e34p+0, -0x1.dfdf0cf45c14ap-55},
{0x1.4a0000187e513p+0, 0x1.401306b83a98dp-55},
{0x1.4dffff3ba420bp+0, 0x1.9fc6539a6454ep-56},
{0x1.51fffffe391c9p+0, -0x1.601ef3353ac83p-54},
{0x1.560000e342455p+0, 0x1.3fb7fac8ac151p-55},
{0x1.59ffffc39676fp+0, 0x1.4fe7dd6659cc2p-55},
{0x1.5dfffff10ef42p+0, -0x1.48154cb592bcbp-54},
# elif N == 128
{0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56}, {0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56},
{0x1.63000034db495p-1, 0x1.dbfea48005d41p-55}, {0x1.63000034db495p-1, 0x1.dbfea48005d41p-55},
{0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55}, {0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55},
@ -351,6 +526,7 @@ that logc + poly(z/c - 1) has small error, however near x == 1 when
{0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54}, {0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54},
{0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54}, {0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54},
{0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54}, {0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54},
},
#endif #endif
},
#endif /* !HAVE_FAST_FMA */
}; };

View file

@ -1,26 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_LOG_DATA_H_
#define COSMOPOLITAN_LIBC_TINYMATH_LOG_DATA_H_
#define LOG_TABLE_BITS 7
#define LOG_POLY_ORDER 6
#define LOG_POLY1_ORDER 12
COSMOPOLITAN_C_START_
extern const struct log_data {
double ln2hi;
double ln2lo;
double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
double poly1[LOG_POLY1_ORDER - 1];
struct {
double invc, logc;
} tab[1 << LOG_TABLE_BITS];
#if !__FP_FAST_FMA
struct {
double chi, clo;
} tab2[1 << LOG_TABLE_BITS];
#endif
} __log_data;
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_TINYMATH_LOG_DATA_H_ */

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines Optimized Routines
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the
@ -25,19 +25,9 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "libc/intrin/likely.h" #include "libc/tinymath/arm.internal.h"
#include "libc/math.h"
#include "libc/tinymath/internal.h"
#include "libc/tinymath/logf_data.internal.h"
__static_yoink("arm_optimized_routines_notice"); __static_yoink("arm_optimized_routines_notice");
/*
* Single-precision log function.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
/* /*
LOGF_TABLE_BITS = 4 LOGF_TABLE_BITS = 4
LOGF_POLY_ORDER = 4 LOGF_POLY_ORDER = 4
@ -53,50 +43,63 @@ Relative error: 1.957 * 2^-26 (before rounding.)
#define OFF 0x3f330000 #define OFF 0x3f330000
/** /**
* Returns natural logarithm of 𝑥. * Returns natural logarithm of x.
*
* - ULP error: 0.818 (nearest rounding.)
* - Relative error: 1.957 * 2^-26 (before rounding.)
*/ */
float logf(float x) float
logf (float x)
{ {
double_t z, r, r2, y, y0, invc, logc; /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
uint32_t ix, iz, tmp; double_t z, r, r2, y, y0, invc, logc;
int k, i; uint32_t ix, iz, tmp;
int k, i;
ix = asuint(x); ix = asuint (x);
/* Fix sign of zero with downward rounding when x==1. */ #if WANT_ROUNDING
if (WANT_ROUNDING && UNLIKELY(ix == 0x3f800000)) /* Fix sign of zero with downward rounding when x==1. */
return 0; if (unlikely (ix == 0x3f800000))
if (UNLIKELY(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) { return 0;
/* x < 0x1p-126 or inf or nan. */ #endif
if (ix * 2 == 0) if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000))
return __math_divzerof(1); {
if (ix == 0x7f800000) /* log(inf) == inf. */ /* x < 0x1p-126 or inf or nan. */
return x; if (ix * 2 == 0)
if ((ix & 0x80000000) || ix * 2 >= 0xff000000) return __math_divzerof (1);
return __math_invalidf(x); if (ix == 0x7f800000) /* log(inf) == inf. */
/* x is subnormal, normalize it. */ return x;
ix = asuint(x * 0x1p23f); if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
ix -= 23 << 23; return __math_invalidf (x);
} /* x is subnormal, normalize it. */
ix = asuint (x * 0x1p23f);
ix -= 23 << 23;
}
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact. /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
The range is split into N subintervals. The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */ The ith subinterval contains z and c is near its center. */
tmp = ix - OFF; tmp = ix - OFF;
i = (tmp >> (23 - LOGF_TABLE_BITS)) % N; i = (tmp >> (23 - LOGF_TABLE_BITS)) % N;
k = (int32_t)tmp >> 23; /* arithmetic shift */ k = (int32_t) tmp >> 23; /* arithmetic shift */
iz = ix - (tmp & 0xff800000); iz = ix - (tmp & 0xff800000);
invc = T[i].invc; invc = T[i].invc;
logc = T[i].logc; logc = T[i].logc;
z = (double_t)asfloat(iz); z = (double_t) asfloat (iz);
/* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */ /* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
r = z * invc - 1; r = z * invc - 1;
y0 = logc + (double_t)k * Ln2; y0 = logc + (double_t) k * Ln2;
/* Pipelined polynomial evaluation to approximate log1p(r). */ /* Pipelined polynomial evaluation to approximate log1p(r). */
r2 = r * r; r2 = r * r;
y = A[1] * r + A[2]; y = A[1] * r + A[2];
y = A[0] * r2 + y; y = A[0] * r2 + y;
y = y * r2 + (y0 + r); y = y * r2 + (y0 + r);
return eval_as_float(y); return eval_as_float (y);
} }
#if USE_GLIBC_ABI
strong_alias (logf, __logf_finite)
hidden_alias (logf, __ieee754_logf)
#endif

View file

@ -3,7 +3,7 @@
Optimized Routines Optimized Routines
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the
@ -25,16 +25,9 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "libc/tinymath/logf_data.internal.h" #include "libc/tinymath/arm.internal.h"
__static_yoink("arm_optimized_routines_notice"); __static_yoink("arm_optimized_routines_notice");
/*
* Data definition for logf.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
const struct logf_data __logf_data = { const struct logf_data __logf_data = {
.tab = { .tab = {
{ 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2 }, { 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2 },

View file

@ -1,18 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_LOGF_DATA_INTERNAL_H_
#define COSMOPOLITAN_LIBC_TINYMATH_LOGF_DATA_INTERNAL_H_
#define LOGF_TABLE_BITS 4
#define LOGF_POLY_ORDER 4
COSMOPOLITAN_C_START_
extern const struct logf_data {
struct {
double invc, logc;
} tab[1 << LOGF_TABLE_BITS];
double ln2;
double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1. */
} __logf_data;
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_TINYMATH_LOGF_DATA_INTERNAL_H_ */

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8 :vi vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines Optimized Routines
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the
@ -25,16 +25,76 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "libc/tinymath/atan_data.internal.h" #include "libc/errno.h"
#include "libc/tinymath/arm.internal.h"
const struct atan_poly_data __atan_poly_data = { #if WANT_ERRNO
.poly = {/* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on /* dontinline reduces code size and avoids making math functions non-leaf
[2**-1022, 1.0]. See atan.sollya for details of how these were when the error handling is inlined. */
generated. */ dontinline static double
-0x1.5555555555555p-2, 0x1.99999999996c1p-3, -0x1.2492492478f88p-3, with_errno (double y, int e)
0x1.c71c71bc3951cp-4, -0x1.745d160a7e368p-4, 0x1.3b139b6a88ba1p-4, {
-0x1.11100ee084227p-4, 0x1.e1d0f9696f63bp-5, -0x1.aebfe7b418581p-5, errno = e;
0x1.842dbe9b0d916p-5, -0x1.5d30140ae5e99p-5, 0x1.338e31eb2fbbcp-5, return y;
-0x1.00e6eece7de8p-5, 0x1.860897b29e5efp-6, -0x1.0051381722a59p-6, }
0x1.14e9dc19a4a4ep-7, -0x1.d0062b42fe3bfp-9, 0x1.17739e210171ap-10, #else
-0x1.ab24da7be7402p-13, 0x1.358851160a528p-16}}; #define with_errno(x, e) (x)
#endif
/* dontinline reduces code size. */
dontinline static double
xflow (uint32_t sign, double y)
{
y = eval_as_double (opt_barrier_double (sign ? -y : y) * y);
return with_errno (y, ERANGE);
}
double
__math_uflow (uint32_t sign)
{
return xflow (sign, 0x1p-767);
}
#if WANT_ERRNO_UFLOW
/* Underflows to zero in some non-nearest rounding mode, setting errno
is valid even if the result is non-zero, but in the subnormal range. */
double
__math_may_uflow (uint32_t sign)
{
return xflow (sign, 0x1.8p-538);
}
#endif
double
__math_oflow (uint32_t sign)
{
return xflow (sign, 0x1p769);
}
double
__math_divzero (uint32_t sign)
{
double y = opt_barrier_double (sign ? -1.0 : 1.0) / 0.0;
return with_errno (y, ERANGE);
}
dontinstrument double
__math_invalid (double x)
{
double y = (x - x) / (x - x);
return isnan (x) ? y : with_errno (y, EDOM);
}
/* Check result and set errno if necessary. */
dontinstrument double
__math_check_uflow (double y)
{
return y == 0.0 ? with_errno (y, ERANGE) : y;
}
dontinstrument double
__math_check_oflow (double y)
{
return isinf (y) ? with_errno (y, ERANGE) : y;
}

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Musl Libc Optimized Routines
Copyright © 2005-2014 Rich Felker, et al. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the
@ -25,27 +25,76 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "libc/complex.h" #include "libc/errno.h"
#include "libc/math.h" #include "libc/tinymath/arm.internal.h"
#include "libc/tinymath/complex.internal.h"
__static_yoink("musl_libc_notice");
// FIXME #if WANT_ERRNO
/* dontinline reduces code size and avoids making math functions non-leaf
/* asin(z) = -i log(i z + sqrt(1 - z*z)) */ when the error handling is inlined. */
dontinline static float
double complex casin(double complex z) with_errnof (float y, int e)
{ {
double complex w; errno = e;
double x, y; return y;
}
#else
#define with_errnof(x, e) (x)
#endif
x = creal(z); /* dontinline reduces code size. */
y = cimag(z); dontinline static float
w = CMPLX(1.0 - (x - y)*(x + y), -2.0*x*y); xflowf (uint32_t sign, float y)
double complex r = clog(CMPLX(-y, x) + csqrt(w)); {
return CMPLX(cimag(r), -creal(r)); y = eval_as_float (opt_barrier_float (sign ? -y : y) * y);
return with_errnof (y, ERANGE);
} }
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 float
__weak_reference(casin, casinl); __math_uflowf (uint32_t sign)
{
return xflowf (sign, 0x1p-95f);
}
#if WANT_ERRNO_UFLOW
/* Underflows to zero in some non-nearest rounding mode, setting errno
is valid even if the result is non-zero, but in the subnormal range. */
float
__math_may_uflowf (uint32_t sign)
{
return xflowf (sign, 0x1.4p-75f);
}
#endif #endif
float
__math_oflowf (uint32_t sign)
{
return xflowf (sign, 0x1p97f);
}
float
__math_divzerof (uint32_t sign)
{
float y = opt_barrier_float (sign ? -1.0f : 1.0f) / 0.0f;
return with_errnof (y, ERANGE);
}
dontinstrument float
__math_invalidf (float x)
{
float y = (x - x) / (x - x);
return isnan (x) ? y : with_errnof (y, EDOM);
}
/* Check result and set errno if necessary. */
dontinstrument float
__math_check_uflowf (float y)
{
return y == 0.0f ? with_errnof (y, ERANGE) : y;
}
dontinstrument float
__math_check_oflowf (float y)
{
return isinf (y) ? with_errnof (y, ERANGE) : y;
}

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8 :vi vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines Optimized Routines
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the
@ -25,11 +25,25 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "libc/tinymath/atanf_data.internal.h" #include "libc/errno.h"
#include "libc/tinymath/arm.internal.h"
/* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-128, 1.0]. #if WANT_ERRNO
*/ /* dontinline reduces code size and avoids making math functions non-leaf
const struct atanf_poly_data __atanf_poly_data = { when the error handling is inlined. */
.poly = {/* See atanf.sollya for details of how these were generated. */ dontinline static long double
-0x1.55555p-2f, 0x1.99935ep-3f, -0x1.24051ep-3f, 0x1.bd7368p-4f, with_errnol (long double y, int e)
-0x1.491f0ep-4f, 0x1.93a2c0p-5f, -0x1.4c3c60p-6f, 0x1.01fd88p-8f}}; {
errno = e;
return y;
}
#else
#define with_errnol(x, e) (x)
#endif
dontinstrument long double
__math_invalidl (long double x)
{
long double y = (x - x) / (x - x);
return isnan (x) ? y : with_errnol (y, EDOM);
}

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Musl Libc Optimized Routines
Copyright © 2005-2014 Rich Felker, et al. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the
@ -25,20 +25,9 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "libc/intrin/likely.h" #include "libc/tinymath/arm.internal.h"
#include "libc/math.h"
#include "libc/tinymath/exp_data.internal.h"
#include "libc/tinymath/internal.h"
#include "libc/tinymath/pow_data.internal.h"
__static_yoink("arm_optimized_routines_notice"); __static_yoink("arm_optimized_routines_notice");
/*
* Double-precision x^y function.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
/* /*
Worst-case error: 0.54 ULP (~= ulperr_exp + 1024*Ln2*relerr_log*2^53) Worst-case error: 0.54 ULP (~= ulperr_exp + 1024*Ln2*relerr_log*2^53)
relerr_log: 1.3 * 2^-68 (Relative error of log, 1.5 * 2^-68 without fma) relerr_log: 1.3 * 2^-68 (Relative error of log, 1.5 * 2^-68 without fma)
@ -53,79 +42,83 @@ ulperr_exp: 0.509 ULP (ULP error of exp, 0.511 ULP without fma)
#define OFF 0x3fe6955500000000 #define OFF 0x3fe6955500000000
/* Top 12 bits of a double (sign and exponent bits). */ /* Top 12 bits of a double (sign and exponent bits). */
static inline uint32_t top12(double x) static inline uint32_t
top12 (double x)
{ {
return asuint64(x) >> 52; return asuint64 (x) >> 52;
} }
/* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about /* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
additional 15 bits precision. IX is the bit representation of x, but additional 15 bits precision. IX is the bit representation of x, but
normalized in the subnormal range using the sign bit for the exponent. */ normalized in the subnormal range using the sign bit for the exponent. */
static inline double_t log_inline(uint64_t ix, double_t *tail) static inline double_t
log_inline (uint64_t ix, double_t *tail)
{ {
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p; double_t z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
uint64_t iz, tmp; uint64_t iz, tmp;
int k, i; int k, i;
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact. /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
The range is split into N subintervals. The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */ The ith subinterval contains z and c is near its center. */
tmp = ix - OFF; tmp = ix - OFF;
i = (tmp >> (52 - POW_LOG_TABLE_BITS)) % N; i = (tmp >> (52 - POW_LOG_TABLE_BITS)) % N;
k = (int64_t)tmp >> 52; /* arithmetic shift */ k = (int64_t) tmp >> 52; /* arithmetic shift */
iz = ix - (tmp & 0xfffULL << 52); iz = ix - (tmp & 0xfffULL << 52);
z = asdouble(iz); z = asdouble (iz);
kd = (double_t)k; kd = (double_t) k;
/* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */ /* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */
invc = T[i].invc; invc = T[i].invc;
logc = T[i].logc; logc = T[i].logc;
logctail = T[i].logctail; logctail = T[i].logctail;
/* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
|z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */ |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */
#if __FP_FAST_FMA #if HAVE_FAST_FMA
r = __builtin_fma(z, invc, -1.0); r = fma (z, invc, -1.0);
#else #else
/* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|. */ /* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|. */
double_t zhi = asdouble((iz + (1ULL << 31)) & (-1ULL << 32)); double_t zhi = asdouble ((iz + (1ULL << 31)) & (-1ULL << 32));
double_t zlo = z - zhi; double_t zlo = z - zhi;
double_t rhi = zhi * invc - 1.0; double_t rhi = zhi * invc - 1.0;
double_t rlo = zlo * invc; double_t rlo = zlo * invc;
r = rhi + rlo; r = rhi + rlo;
#endif #endif
/* k*Ln2 + log(c) + r. */ /* k*Ln2 + log(c) + r. */
t1 = kd * Ln2hi + logc; t1 = kd * Ln2hi + logc;
t2 = t1 + r; t2 = t1 + r;
lo1 = kd * Ln2lo + logctail; lo1 = kd * Ln2lo + logctail;
lo2 = t1 - t2 + r; lo2 = t1 - t2 + r;
/* Evaluation is optimized assuming superscalar pipelined execution. */ /* Evaluation is optimized assuming superscalar pipelined execution. */
double_t ar, ar2, ar3, lo3, lo4; double_t ar, ar2, ar3, lo3, lo4;
ar = A[0] * r; /* A[0] = -0.5. */ ar = A[0] * r; /* A[0] = -0.5. */
ar2 = r * ar; ar2 = r * ar;
ar3 = r * ar2; ar3 = r * ar2;
/* k*Ln2 + log(c) + r + A[0]*r*r. */ /* k*Ln2 + log(c) + r + A[0]*r*r. */
#if __FP_FAST_FMA #if HAVE_FAST_FMA
hi = t2 + ar2; hi = t2 + ar2;
lo3 = __builtin_fma(ar, r, -ar2); lo3 = fma (ar, r, -ar2);
lo4 = t2 - hi + ar2; lo4 = t2 - hi + ar2;
#else #else
double_t arhi = A[0] * rhi; double_t arhi = A[0] * rhi;
double_t arhi2 = rhi * arhi; double_t arhi2 = rhi * arhi;
hi = t2 + arhi2; hi = t2 + arhi2;
lo3 = rlo * (ar + arhi); lo3 = rlo * (ar + arhi);
lo4 = t2 - hi + arhi2; lo4 = t2 - hi + arhi2;
#endif #endif
/* p = log1p(r) - r - A[0]*r*r. */ /* p = log1p(r) - r - A[0]*r*r. */
p = (ar3 * (A[1] + r * A[2] + #if POW_LOG_POLY_ORDER == 8
ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6])))); p = (ar3
lo = lo1 + lo2 + lo3 + lo4 + p; * (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6]))));
y = hi + lo; #endif
*tail = hi - y + lo; lo = lo1 + lo2 + lo3 + lo4 + p;
return y; y = hi + lo;
*tail = hi - y + lo;
return y;
} }
#undef N #undef N
@ -149,232 +142,268 @@ static inline double_t log_inline(uint64_t ix, double_t *tail)
a double. (int32_t)KI is the k used in the argument reduction and exponent a double. (int32_t)KI is the k used in the argument reduction and exponent
adjustment of scale, positive k here means the result may overflow and adjustment of scale, positive k here means the result may overflow and
negative k means the result may underflow. */ negative k means the result may underflow. */
forceinline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki) static inline double
specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
{ {
double_t scale, y; double_t scale, y;
if ((ki & 0x80000000) == 0) { if ((ki & 0x80000000) == 0)
/* k > 0, the exponent of scale might have overflowed by <= 460. */ {
sbits -= 1009ull << 52; /* k > 0, the exponent of scale might have overflowed by <= 460. */
scale = asdouble(sbits); sbits -= 1009ull << 52;
y = 0x1p1009 * (scale + scale * tmp); scale = asdouble (sbits);
return eval_as_double(y); y = 0x1p1009 * (scale + scale * tmp);
} return check_oflow (eval_as_double (y));
/* k < 0, need special care in the subnormal range. */ }
sbits += 1022ull << 52; /* k < 0, need special care in the subnormal range. */
/* Note: sbits is signed scale. */ sbits += 1022ull << 52;
scale = asdouble(sbits); /* Note: sbits is signed scale. */
y = scale + scale * tmp; scale = asdouble (sbits);
if (fabs(y) < 1.0) { y = scale + scale * tmp;
/* Round y to the right precision before scaling it into the subnormal if (fabs (y) < 1.0)
range to avoid double rounding that can cause 0.5+E/2 ulp error where {
E is the worst-case ulp error outside the subnormal range. So this /* Round y to the right precision before scaling it into the subnormal
is only useful if the goal is better than 1 ulp worst-case error. */ range to avoid double rounding that can cause 0.5+E/2 ulp error where
double_t hi, lo, one = 1.0; E is the worst-case ulp error outside the subnormal range. So this
if (y < 0.0) is only useful if the goal is better than 1 ulp worst-case error. */
one = -1.0; double_t hi, lo, one = 1.0;
lo = scale - y + scale * tmp; if (y < 0.0)
hi = one + y; one = -1.0;
lo = one - hi + y + lo; lo = scale - y + scale * tmp;
y = eval_as_double(hi + lo) - one; hi = one + y;
/* Fix the sign of 0. */ lo = one - hi + y + lo;
if (y == 0.0) y = eval_as_double (hi + lo) - one;
y = asdouble(sbits & 0x8000000000000000); /* Fix the sign of 0. */
/* The underflow exception needs to be signaled explicitly. */ if (y == 0.0)
fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022); y = asdouble (sbits & 0x8000000000000000);
} /* The underflow exception needs to be signaled explicitly. */
y = 0x1p-1022 * y; force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
return eval_as_double(y); }
y = 0x1p-1022 * y;
return check_uflow (eval_as_double (y));
} }
#define SIGN_BIAS (0x800 << EXP_TABLE_BITS) #define SIGN_BIAS (0x800 << EXP_TABLE_BITS)
/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|. /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */ The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */
forceinline double exp_inline(double_t x, double_t xtail, uint32_t sign_bias) static inline double
exp_inline (double_t x, double_t xtail, uint32_t sign_bias)
{ {
uint32_t abstop; uint32_t abstop;
uint64_t ki, idx, top, sbits; uint64_t ki, idx, top, sbits;
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t kd, z, r, r2, scale, tail, tmp; double_t kd, z, r, r2, scale, tail, tmp;
abstop = top12(x) & 0x7ff; abstop = top12 (x) & 0x7ff;
if (UNLIKELY(abstop - top12(0x1p-54) >= if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
top12(512.0) - top12(0x1p-54))) { {
if (abstop - top12(0x1p-54) >= 0x80000000) { if (abstop - top12 (0x1p-54) >= 0x80000000)
/* Avoid spurious underflow for tiny x. */ {
/* Note: 0 is common input. */ /* Avoid spurious underflow for tiny x. */
double_t one = WANT_ROUNDING ? 1.0 + x : 1.0; /* Note: 0 is common input. */
return sign_bias ? -one : one; double_t one = WANT_ROUNDING ? 1.0 + x : 1.0;
} return sign_bias ? -one : one;
if (abstop >= top12(1024.0)) {
/* Note: inf and nan are already handled. */
if (asuint64(x) >> 63)
return __math_uflow(sign_bias);
else
return __math_oflow(sign_bias);
}
/* Large x is special cased below. */
abstop = 0;
} }
if (abstop >= top12 (1024.0))
{
/* Note: inf and nan are already handled. */
if (asuint64 (x) >> 63)
return __math_uflow (sign_bias);
else
return __math_oflow (sign_bias);
}
/* Large x is special cased below. */
abstop = 0;
}
/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */ /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
/* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */ /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
z = InvLn2N * x; z = InvLn2N * x;
#if TOINT_INTRINSICS #if TOINT_INTRINSICS
kd = roundtoint(z); kd = roundtoint (z);
ki = converttoint(z); ki = converttoint (z);
#elif EXP_USE_TOINT_NARROW #elif EXP_USE_TOINT_NARROW
/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */ /* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
kd = eval_as_double(z + Shift); kd = eval_as_double (z + Shift);
ki = asuint64(kd) >> 16; ki = asuint64 (kd) >> 16;
kd = (double_t)(int32_t)ki; kd = (double_t) (int32_t) ki;
#else #else
/* z - kd is in [-1, 1] in non-nearest rounding modes. */ /* z - kd is in [-1, 1] in non-nearest rounding modes. */
kd = eval_as_double(z + Shift); kd = eval_as_double (z + Shift);
ki = asuint64(kd); ki = asuint64 (kd);
kd -= Shift; kd -= Shift;
#endif #endif
r = x + kd * NegLn2hiN + kd * NegLn2loN; r = x + kd * NegLn2hiN + kd * NegLn2loN;
/* The code assumes 2^-200 < |xtail| < 2^-8/N. */ /* The code assumes 2^-200 < |xtail| < 2^-8/N. */
r += xtail; r += xtail;
/* 2^(k/N) ~= scale * (1 + tail). */ /* 2^(k/N) ~= scale * (1 + tail). */
idx = 2 * (ki % N); idx = 2 * (ki % N);
top = (ki + sign_bias) << (52 - EXP_TABLE_BITS); top = (ki + sign_bias) << (52 - EXP_TABLE_BITS);
tail = asdouble(T[idx]); tail = asdouble (T[idx]);
/* This is only a valid scale when -1023*N < k < 1024*N. */ /* This is only a valid scale when -1023*N < k < 1024*N. */
sbits = T[idx + 1] + top; sbits = T[idx + 1] + top;
/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */ /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
/* Evaluation is optimized assuming superscalar pipelined execution. */ /* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r * r; r2 = r * r;
/* Without fma the worst case error is 0.25/N ulp larger. */ /* Without fma the worst case error is 0.25/N ulp larger. */
/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */ /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5); #if EXP_POLY_ORDER == 4
if (UNLIKELY(abstop == 0)) tmp = tail + r + r2 * C2 + r * r2 * (C3 + r * C4);
return specialcase(tmp, sbits, ki); #elif EXP_POLY_ORDER == 5
scale = asdouble(sbits); tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there #elif EXP_POLY_ORDER == 6
is no spurious underflow here even without fma. */ tmp = tail + r + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
return eval_as_double(scale + scale * tmp); #endif
if (unlikely (abstop == 0))
return specialcase (tmp, sbits, ki);
scale = asdouble (sbits);
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
is no spurious underflow here even without fma. */
return eval_as_double (scale + scale * tmp);
} }
/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is /* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
the bit representation of a non-zero finite floating-point value. */ the bit representation of a non-zero finite floating-point value. */
static inline int checkint(uint64_t iy) static inline int
checkint (uint64_t iy)
{ {
int e = iy >> 52 & 0x7ff; int e = iy >> 52 & 0x7ff;
if (e < 0x3ff) if (e < 0x3ff)
return 0; return 0;
if (e > 0x3ff + 52) if (e > 0x3ff + 52)
return 2; return 2;
if (iy & ((1ULL << (0x3ff + 52 - e)) - 1)) if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
return 0; return 0;
if (iy & (1ULL << (0x3ff + 52 - e))) if (iy & (1ULL << (0x3ff + 52 - e)))
return 1; return 1;
return 2; return 2;
} }
/* Returns 1 if input is the bit representation of 0, infinity or nan. */ /* Returns 1 if input is the bit representation of 0, infinity or nan. */
static inline int zeroinfnan(uint64_t i) static inline int
zeroinfnan (uint64_t i)
{ {
return 2 * i - 1 >= 2 * asuint64(INFINITY) - 1; return 2 * i - 1 >= 2 * asuint64 (INFINITY) - 1;
} }
/** /**
* Returns 𝑥^𝑦. * Returns 𝑥^𝑦.
* @note should take ~18ns *
* - Worst-case error: 0.54 ULP (~= ulperr_exp + 1024*Ln2*relerr_log*2^53)
* - relerr_log: 1.3 * 2^-68 (Relative error of log, 1.5 * 2^-68 without fma)
* - ulperr_exp: 0.509 ULP (ULP error of exp, 0.511 ULP without fma)
*
* @raise ERANGE on overflow or underflow
* @raise EDOM if x is negative and y is a finite non-integer
*/ */
double pow(double x, double y) double
pow (double x, double y)
{ {
uint32_t sign_bias = 0; uint32_t sign_bias = 0;
uint64_t ix, iy; uint64_t ix, iy;
uint32_t topx, topy; uint32_t topx, topy;
ix = asuint64(x); ix = asuint64 (x);
iy = asuint64(y); iy = asuint64 (y);
topx = top12(x); topx = top12 (x);
topy = top12(y); topy = top12 (y);
if (UNLIKELY(topx - 0x001 >= 0x7ff - 0x001 || if (unlikely (topx - 0x001 >= 0x7ff - 0x001
(topy & 0x7ff) - 0x3be >= 0x43e - 0x3be)) { || (topy & 0x7ff) - 0x3be >= 0x43e - 0x3be))
/* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0 {
and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */ /* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
/* Special cases: (x < 0x1p-126 or inf or nan) or and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */
(|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */ /* Special cases: (x < 0x1p-126 or inf or nan) or
if (UNLIKELY(zeroinfnan(iy))) { (|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */
if (2 * iy == 0) if (unlikely (zeroinfnan (iy)))
return issignaling_inline(x) ? x + y : 1.0; {
if (ix == asuint64(1.0)) if (2 * iy == 0)
return issignaling_inline(y) ? x + y : 1.0; return issignaling_inline (x) ? x + y : 1.0;
if (2 * ix > 2 * asuint64(INFINITY) || if (ix == asuint64 (1.0))
2 * iy > 2 * asuint64(INFINITY)) return issignaling_inline (y) ? x + y : 1.0;
return x + y; if (2 * ix > 2 * asuint64 (INFINITY)
if (2 * ix == 2 * asuint64(1.0)) || 2 * iy > 2 * asuint64 (INFINITY))
return 1.0; return x + y;
if ((2 * ix < 2 * asuint64(1.0)) == !(iy >> 63)) if (2 * ix == 2 * asuint64 (1.0))
return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */ return 1.0;
return y * y; if ((2 * ix < 2 * asuint64 (1.0)) == !(iy >> 63))
} return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
if (UNLIKELY(zeroinfnan(ix))) { return y * y;
double_t x2 = x * x;
if (ix >> 63 && checkint(iy) == 1)
x2 = -x2;
/* Without the barrier some versions of clang hoist the 1/x2 and
thus division by zero exception can be signaled spuriously. */
return iy >> 63 ? fp_barrier(1 / x2) : x2;
}
/* Here x and y are non-zero finite. */
if (ix >> 63) {
/* Finite x < 0. */
int yint = checkint(iy);
if (yint == 0)
return __math_invalid(x);
if (yint == 1)
sign_bias = SIGN_BIAS;
ix &= 0x7fffffffffffffff;
topx &= 0x7ff;
}
if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be) {
/* Note: sign_bias == 0 here because y is not odd. */
if (ix == asuint64(1.0))
return 1.0;
if ((topy & 0x7ff) < 0x3be) {
/* |y| < 2^-65, x^y ~= 1 + y*log(x). */
if (WANT_ROUNDING)
return ix > asuint64(1.0) ? 1.0 + y :
1.0 - y;
else
return 1.0;
}
return (ix > asuint64(1.0)) == (topy < 0x800) ?
__math_oflow(0) :
__math_uflow(0);
}
if (topx == 0) {
/* Normalize subnormal x so exponent becomes negative. */
ix = asuint64(x * 0x1p52);
ix &= 0x7fffffffffffffff;
ix -= 52ULL << 52;
}
} }
if (unlikely (zeroinfnan (ix)))
{
double_t x2 = x * x;
if (ix >> 63 && checkint (iy) == 1)
{
x2 = -x2;
sign_bias = 1;
}
if (WANT_ERRNO && 2 * ix == 0 && iy >> 63)
return __math_divzero (sign_bias);
/* Without the barrier some versions of clang hoist the 1/x2 and
thus division by zero exception can be signaled spuriously. */
return iy >> 63 ? opt_barrier_double (1 / x2) : x2;
}
/* Here x and y are non-zero finite. */
if (ix >> 63)
{
/* Finite x < 0. */
int yint = checkint (iy);
if (yint == 0)
return __math_invalid (x);
if (yint == 1)
sign_bias = SIGN_BIAS;
ix &= 0x7fffffffffffffff;
topx &= 0x7ff;
}
if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be)
{
/* Note: sign_bias == 0 here because y is not odd. */
if (ix == asuint64 (1.0))
return 1.0;
if ((topy & 0x7ff) < 0x3be)
{
/* |y| < 2^-65, x^y ~= 1 + y*log(x). */
if (WANT_ROUNDING)
return ix > asuint64 (1.0) ? 1.0 + y : 1.0 - y;
else
return 1.0;
}
return (ix > asuint64 (1.0)) == (topy < 0x800) ? __math_oflow (0)
: __math_uflow (0);
}
if (topx == 0)
{
/* Normalize subnormal x so exponent becomes negative. */
/* Without the barrier some versions of clang evalutate the mul
unconditionally causing spurious overflow exceptions. */
ix = asuint64 (opt_barrier_double (x) * 0x1p52);
ix &= 0x7fffffffffffffff;
ix -= 52ULL << 52;
}
}
double_t lo; double_t lo;
double_t hi = log_inline(ix, &lo); double_t hi = log_inline (ix, &lo);
double_t ehi, elo; double_t ehi, elo;
#if __FP_FAST_FMA #if HAVE_FAST_FMA
ehi = y * hi; ehi = y * hi;
elo = y * lo + __builtin_fma(y, hi, -ehi); elo = y * lo + fma (y, hi, -ehi);
#else #else
double_t yhi = asdouble(iy & -1ULL << 27); double_t yhi = asdouble (iy & -1ULL << 27);
double_t ylo = y - yhi; double_t ylo = y - yhi;
double_t lhi = asdouble(asuint64(hi) & -1ULL << 27); double_t lhi = asdouble (asuint64 (hi) & -1ULL << 27);
double_t llo = hi - lhi + lo; double_t llo = hi - lhi + lo;
ehi = yhi * lhi; ehi = yhi * lhi;
elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25. */ elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25. */
#endif #endif
return exp_inline(ehi, elo, sign_bias); return exp_inline (ehi, elo, sign_bias);
} }
__weak_reference(pow, __pow_finite); #if USE_GLIBC_ABI
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 strong_alias (pow, __pow_finite)
__weak_reference(pow, powl); hidden_alias (pow, __ieee754_pow)
# if LDBL_MANT_DIG == 53
long double powl (long double x, long double y) { return pow (x, y); }
# endif
#endif #endif

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines Optimized Routines
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the
@ -25,22 +25,16 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "libc/tinymath/pow_data.internal.h" #include "libc/tinymath/arm.internal.h"
__static_yoink("arm_optimized_routines_notice"); __static_yoink("arm_optimized_routines_notice");
/*
* Data for the log part of pow.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#define N (1 << POW_LOG_TABLE_BITS) #define N (1 << POW_LOG_TABLE_BITS)
const struct pow_log_data __pow_log_data = { const struct pow_log_data __pow_log_data = {
.ln2hi = 0x1.62e42fefa3800p-1, .ln2hi = 0x1.62e42fefa3800p-1,
.ln2lo = 0x1.ef35793c76730p-45, .ln2lo = 0x1.ef35793c76730p-45,
.poly = { .poly = {
#if N == 128 && POW_LOG_POLY_ORDER == 8
// relative error: 0x1.11922ap-70 // relative error: 0x1.11922ap-70
// in -0x1.6bp-8 0x1.6bp-8 // in -0x1.6bp-8 0x1.6bp-8
// Coefficients are scaled to match the scaling during evaluation. // Coefficients are scaled to match the scaling during evaluation.
@ -51,6 +45,7 @@ const struct pow_log_data __pow_log_data = {
-0x1.555555529a47ap-3 * 4, -0x1.555555529a47ap-3 * 4,
0x1.2495b9b4845e9p-3 * -8, 0x1.2495b9b4845e9p-3 * -8,
-0x1.0002b8b263fc3p-3 * -8, -0x1.0002b8b263fc3p-3 * -8,
#endif
}, },
/* Algorithm: /* Algorithm:
@ -75,6 +70,7 @@ the last few bits of logc are rounded away so k*ln2hi + logc has no rounding
error and the interval for z is selected such that near x == 1, where log(x) error and the interval for z is selected such that near x == 1, where log(x)
is tiny, large cancellation error is avoided in logc + poly(z/c - 1). */ is tiny, large cancellation error is avoided in logc + poly(z/c - 1). */
.tab = { .tab = {
#if N == 128
#define A(a, b, c) {a, 0, b, c}, #define A(a, b, c) {a, 0, b, c},
A(0x1.6a00000000000p+0, -0x1.62c82f2b9c800p-2, 0x1.ab42428375680p-48) A(0x1.6a00000000000p+0, -0x1.62c82f2b9c800p-2, 0x1.ab42428375680p-48)
A(0x1.6800000000000p+0, -0x1.5d1bdbf580800p-2, -0x1.ca508d8e0f720p-46) A(0x1.6800000000000p+0, -0x1.5d1bdbf580800p-2, -0x1.ca508d8e0f720p-46)
@ -204,5 +200,6 @@ A(0x1.7200000000000p-1, 0x1.4c9e09e173000p-2, -0x1.e20891b0ad8a4p-45)
A(0x1.7000000000000p-1, 0x1.522ae0738a000p-2, 0x1.ebe708164c759p-45) A(0x1.7000000000000p-1, 0x1.522ae0738a000p-2, 0x1.ebe708164c759p-45)
A(0x1.6e00000000000p-1, 0x1.57bf753c8d000p-2, 0x1.fadedee5d40efp-46) A(0x1.6e00000000000p-1, 0x1.57bf753c8d000p-2, 0x1.fadedee5d40efp-46)
A(0x1.6c00000000000p-1, 0x1.5d5bddf596000p-2, -0x1.a0b2a08a465dcp-47) A(0x1.6c00000000000p-1, 0x1.5d5bddf596000p-2, -0x1.a0b2a08a465dcp-47)
#endif
}, },
}; };

View file

@ -1,20 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_POW_DATA_INTERNAL_H_
#define COSMOPOLITAN_LIBC_TINYMATH_POW_DATA_INTERNAL_H_
#define POW_LOG_TABLE_BITS 7
#define POW_LOG_POLY_ORDER 8
COSMOPOLITAN_C_START_
extern const struct pow_log_data {
double ln2hi;
double ln2lo;
double poly[POW_LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
/* Note: the pad field is unused, but allows slightly faster indexing. */
struct {
double invc, pad, logc, logctail;
} tab[1 << POW_LOG_TABLE_BITS];
} __pow_log_data;
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_TINYMATH_POW_DATA_INTERNAL_H_ */

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines Optimized Routines
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the
@ -25,19 +25,9 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "libc/intrin/likely.h" #include "libc/tinymath/arm.internal.h"
#include "libc/math.h"
#include "libc/tinymath/exp2f_data.internal.h"
#include "libc/tinymath/exp_data.internal.h"
#include "libc/tinymath/internal.h"
#include "libc/tinymath/powf_data.internal.h"
__static_yoink("arm_optimized_routines_notice"); __static_yoink("arm_optimized_routines_notice");
/*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
/* /*
POWF_LOG2_POLY_ORDER = 5 POWF_LOG2_POLY_ORDER = 5
EXP2F_TABLE_BITS = 5 EXP2F_TABLE_BITS = 5
@ -55,37 +45,39 @@ relerr_exp2: 1.69 * 2^-34 (Relative error of exp2(ylogx).)
/* Subnormal input is normalized so ix has negative biased exponent. /* Subnormal input is normalized so ix has negative biased exponent.
Output is multiplied by N (POWF_SCALE) if TOINT_INTRINICS is set. */ Output is multiplied by N (POWF_SCALE) if TOINT_INTRINICS is set. */
static inline double_t log2_inline(uint32_t ix) static inline double_t
log2_inline (uint32_t ix)
{ {
double_t z, r, r2, r4, p, q, y, y0, invc, logc; /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
uint32_t iz, top, tmp; double_t z, r, r2, r4, p, q, y, y0, invc, logc;
int k, i; uint32_t iz, top, tmp;
int k, i;
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact. /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
The range is split into N subintervals. The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */ The ith subinterval contains z and c is near its center. */
tmp = ix - OFF; tmp = ix - OFF;
i = (tmp >> (23 - POWF_LOG2_TABLE_BITS)) % N; i = (tmp >> (23 - POWF_LOG2_TABLE_BITS)) % N;
top = tmp & 0xff800000; top = tmp & 0xff800000;
iz = ix - top; iz = ix - top;
k = (int32_t)top >> (23 - POWF_SCALE_BITS); /* arithmetic shift */ k = (int32_t) top >> (23 - POWF_SCALE_BITS); /* arithmetic shift */
invc = T[i].invc; invc = T[i].invc;
logc = T[i].logc; logc = T[i].logc;
z = (double_t)asfloat(iz); z = (double_t) asfloat (iz);
/* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */ /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
r = z * invc - 1; r = z * invc - 1;
y0 = logc + (double_t)k; y0 = logc + (double_t) k;
/* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */ /* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
r2 = r * r; r2 = r * r;
y = A[0] * r + A[1]; y = A[0] * r + A[1];
p = A[2] * r + A[3]; p = A[2] * r + A[3];
r4 = r2 * r2; r4 = r2 * r2;
q = A[4] * r + y0; q = A[4] * r + y0;
q = p * r2 + q; q = p * r2 + q;
y = y * r4 + q; y = y * r4 + q;
return y; return y;
} }
#undef N #undef N
@ -97,124 +89,164 @@ static inline double_t log2_inline(uint32_t ix)
/* The output of log2 and thus the input of exp2 is either scaled by N /* The output of log2 and thus the input of exp2 is either scaled by N
(in case of fast toint intrinsics) or not. The unscaled xd must be (in case of fast toint intrinsics) or not. The unscaled xd must be
in [-1021,1023], sign_bias sets the sign of the result. */ in [-1021,1023], sign_bias sets the sign of the result. */
static inline float exp2_inline(double_t xd, uint32_t sign_bias) static inline float
exp2_inline (double_t xd, uint32_t sign_bias)
{ {
uint64_t ki, ski, t; uint64_t ki, ski, t;
double_t kd, z, r, r2, y, s; /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t kd, z, r, r2, y, s;
#if TOINT_INTRINSICS #if TOINT_INTRINSICS
#define C __exp2f_data.poly_scaled # define C __exp2f_data.poly_scaled
/* N*x = k + r with r in [-1/2, 1/2] */ /* N*x = k + r with r in [-1/2, 1/2] */
kd = roundtoint(xd); /* k */ kd = roundtoint (xd); /* k */
ki = converttoint(xd); ki = converttoint (xd);
#else #else
#define C __exp2f_data.poly # define C __exp2f_data.poly
#define SHIFT __exp2f_data.shift_scaled # define SHIFT __exp2f_data.shift_scaled
/* x = k/N + r with r in [-1/(2N), 1/(2N)] */ /* x = k/N + r with r in [-1/(2N), 1/(2N)] */
kd = eval_as_double(xd + SHIFT); kd = eval_as_double (xd + SHIFT);
ki = asuint64(kd); ki = asuint64 (kd);
kd -= SHIFT; /* k/N */ kd -= SHIFT; /* k/N */
#endif #endif
r = xd - kd; r = xd - kd;
/* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */ /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
t = T[ki % N]; t = T[ki % N];
ski = ki + sign_bias; ski = ki + sign_bias;
t += ski << (52 - EXP2F_TABLE_BITS); t += ski << (52 - EXP2F_TABLE_BITS);
s = asdouble(t); s = asdouble (t);
z = C[0] * r + C[1]; z = C[0] * r + C[1];
r2 = r * r; r2 = r * r;
y = C[2] * r + 1; y = C[2] * r + 1;
y = z * r2 + y; y = z * r2 + y;
y = y * s; y = y * s;
return eval_as_float(y); return eval_as_float (y);
} }
/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is /* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
the bit representation of a non-zero finite floating-point value. */ the bit representation of a non-zero finite floating-point value. */
static inline int checkint(uint32_t iy) static inline int
checkint (uint32_t iy)
{ {
int e = iy >> 23 & 0xff; int e = iy >> 23 & 0xff;
if (e < 0x7f) if (e < 0x7f)
return 0; return 0;
if (e > 0x7f + 23) if (e > 0x7f + 23)
return 2; return 2;
if (iy & ((1 << (0x7f + 23 - e)) - 1)) if (iy & ((1 << (0x7f + 23 - e)) - 1))
return 0; return 0;
if (iy & (1 << (0x7f + 23 - e))) if (iy & (1 << (0x7f + 23 - e)))
return 1; return 1;
return 2; return 2;
} }
static inline int zeroinfnan(uint32_t ix) static inline int
zeroinfnan (uint32_t ix)
{ {
return 2 * ix - 1 >= 2u * 0x7f800000 - 1; return 2 * ix - 1 >= 2u * 0x7f800000 - 1;
} }
/** /**
* Returns 𝑥^𝑦. * Returns 𝑥^𝑦.
* @note should take ~16ns *
* - ULP error: 0.82 (~ 0.5 + relerr*2^24)
* - relerr: 1.27 * 2^-26 (Relative error ~= 128*Ln2*relerr_log2 + relerr_exp2)
* - relerr_log2: 1.83 * 2^-33 (Relative error of logx.)
* - relerr_exp2: 1.69 * 2^-34 (Relative error of exp2(ylogx).)
*
* @raise ERANGE on overflow or underflow
* @raise EDOM if x is negative and y is a finite non-integer
*/ */
float powf(float x, float y) float
powf (float x, float y)
{ {
uint32_t sign_bias = 0; uint32_t sign_bias = 0;
uint32_t ix, iy; uint32_t ix, iy;
ix = asuint(x); ix = asuint (x);
iy = asuint(y); iy = asuint (y);
if (UNLIKELY(ix - 0x00800000 >= 0x7f800000 - 0x00800000 || if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000 || zeroinfnan (iy)))
zeroinfnan(iy))) { {
/* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan). */ /* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan). */
if (UNLIKELY(zeroinfnan(iy))) { if (unlikely (zeroinfnan (iy)))
if (2 * iy == 0) {
return issignalingf_inline(x) ? x + y : 1.0f; if (2 * iy == 0)
if (ix == 0x3f800000) return issignalingf_inline (x) ? x + y : 1.0f;
return issignalingf_inline(y) ? x + y : 1.0f; if (ix == 0x3f800000)
if (2 * ix > 2u * 0x7f800000 || return issignalingf_inline (y) ? x + y : 1.0f;
2 * iy > 2u * 0x7f800000) if (2 * ix > 2u * 0x7f800000 || 2 * iy > 2u * 0x7f800000)
return x + y; return x + y;
if (2 * ix == 2 * 0x3f800000) if (2 * ix == 2 * 0x3f800000)
return 1.0f; return 1.0f;
if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000)) if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf. */ return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
return y * y; return y * y;
}
if (UNLIKELY(zeroinfnan(ix))) {
float_t x2 = x * x;
if (ix & 0x80000000 && checkint(iy) == 1)
x2 = -x2;
/* Without the barrier some versions of clang hoist the 1/x2 and
thus division by zero exception can be signaled spuriously. */
return iy & 0x80000000 ? fp_barrierf(1 / x2) : x2;
}
/* x and y are non-zero finite. */
if (ix & 0x80000000) {
/* Finite x < 0. */
int yint = checkint(iy);
if (yint == 0)
return __math_invalidf(x);
if (yint == 1)
sign_bias = SIGN_BIAS;
ix &= 0x7fffffff;
}
if (ix < 0x00800000) {
/* Normalize subnormal x so exponent becomes negative. */
ix = asuint(x * 0x1p23f);
ix &= 0x7fffffff;
ix -= 23 << 23;
}
} }
double_t logx = log2_inline(ix); if (unlikely (zeroinfnan (ix)))
double_t ylogx = y * logx; /* cannot overflow, y is single prec. */ {
if (UNLIKELY((asuint64(ylogx) >> 47 & 0xffff) >= float_t x2 = x * x;
asuint64(126.0 * POWF_SCALE) >> 47)) { if (ix & 0x80000000 && checkint (iy) == 1)
/* |y*log(x)| >= 126. */ {
if (ylogx > 0x1.fffffffd1d571p+6 * POWF_SCALE) x2 = -x2;
return __math_oflowf(sign_bias); sign_bias = 1;
if (ylogx <= -150.0 * POWF_SCALE) }
return __math_uflowf(sign_bias); #if WANT_ERRNO
if (2 * ix == 0 && iy & 0x80000000)
return __math_divzerof (sign_bias);
#endif
/* Without the barrier some versions of clang hoist the 1/x2 and
thus division by zero exception can be signaled spuriously. */
return iy & 0x80000000 ? opt_barrier_float (1 / x2) : x2;
} }
return exp2_inline(ylogx, sign_bias); /* x and y are non-zero finite. */
if (ix & 0x80000000)
{
/* Finite x < 0. */
int yint = checkint (iy);
if (yint == 0)
return __math_invalidf (x);
if (yint == 1)
sign_bias = SIGN_BIAS;
ix &= 0x7fffffff;
}
if (ix < 0x00800000)
{
/* Normalize subnormal x so exponent becomes negative. */
ix = asuint (x * 0x1p23f);
ix &= 0x7fffffff;
ix -= 23 << 23;
}
}
double_t logx = log2_inline (ix);
double_t ylogx = y * logx; /* Note: cannot overflow, y is single prec. */
if (unlikely ((asuint64 (ylogx) >> 47 & 0xffff)
>= asuint64 (126.0 * POWF_SCALE) >> 47))
{
/* |y*log(x)| >= 126. */
if (ylogx > 0x1.fffffffd1d571p+6 * POWF_SCALE)
/* |x^y| > 0x1.ffffffp127. */
return __math_oflowf (sign_bias);
if (WANT_ROUNDING && WANT_ERRNO
&& ylogx > 0x1.fffffffa3aae2p+6 * POWF_SCALE)
/* |x^y| > 0x1.fffffep127, check if we round away from 0. */
if ((!sign_bias
&& eval_as_float (1.0f + opt_barrier_float (0x1p-25f)) != 1.0f)
|| (sign_bias
&& eval_as_float (-1.0f - opt_barrier_float (0x1p-25f))
!= -1.0f))
return __math_oflowf (sign_bias);
if (ylogx <= -150.0 * POWF_SCALE)
return __math_uflowf (sign_bias);
#if WANT_ERRNO_UFLOW
if (ylogx < -149.0 * POWF_SCALE)
return __math_may_uflowf (sign_bias);
#endif
}
return exp2_inline (ylogx, sign_bias);
} }
__weak_reference(powf, __powf_finite); #if USE_GLIBC_ABI
strong_alias (powf, __powf_finite)
hidden_alias (powf, __ieee754_powf)
#endif

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines Optimized Routines
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the
@ -25,16 +25,9 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "libc/tinymath/powf_data.internal.h" #include "libc/tinymath/arm.internal.h"
__static_yoink("arm_optimized_routines_notice"); __static_yoink("arm_optimized_routines_notice");
/*
* Data definition for powf.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
const struct powf_log2_data __powf_log2_data = { const struct powf_log2_data __powf_log2_data = {
.tab = { .tab = {
{ 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 * POWF_SCALE }, { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 * POWF_SCALE },

View file

@ -1,23 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_POWF_DATA_INTERNAL_H_
#define COSMOPOLITAN_LIBC_TINYMATH_POWF_DATA_INTERNAL_H_
#define POWF_LOG2_TABLE_BITS 4
#define POWF_LOG2_POLY_ORDER 5
#if TOINT_INTRINSICS
#define POWF_SCALE_BITS EXP2F_TABLE_BITS
#else
#define POWF_SCALE_BITS 0
#endif
#define POWF_SCALE ((double)(1 << POWF_SCALE_BITS))
COSMOPOLITAN_C_START_
extern const struct powf_log2_data {
struct {
double invc, logc;
} tab[1 << POWF_LOG2_TABLE_BITS];
double poly[POWF_LOG2_POLY_ORDER];
} __powf_log2_data;
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_TINYMATH_POWF_DATA_INTERNAL_H_ */

View file

@ -1,120 +1,42 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ /*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8 :vi vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for OpenBSD /usr/src/lib/libm/src/ld80/e_powl.c
any purpose with or without fee is hereby granted, provided that the /usr/src/lib/libm/src/ld128/e_powl.c
above copyright notice and this permission notice appear in all copies.
Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
Permission to use, copy, modify, and distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
Developed at SunPro, a Sun Microsystems, Inc. business.
Permission to use, copy, modify, and distribute this
software is freely granted, provided that this notice
is preserved.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/errno.h" #include "libc/errno.h"
#include "libc/math.h" #include "libc/math.h"
#include "libc/tinymath/internal.h" #include "libc/tinymath/internal.h"
#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) #include "libc/tinymath/freebsd.internal.h"
#ifdef __x86_64__
/**
* Returns 𝑥^𝑦.
* @note should take ~56ns
*/
long double powl(long double x, long double y) {
long double t, u;
if (!isunordered(x, y)) {
if (!isinf(y)) {
if (!isinf(x)) {
if (x) {
if (y) {
if (x < 0 && y != truncl(y)) {
#ifndef __NO_MATH_ERRNO__
errno = EDOM;
#endif
return NAN;
}
asm("fyl2x" : "=t"(u) : "0"(fabsl(x)), "u"(y) : "st(1)");
asm("fprem" : "=t"(t) : "0"(u), "u"(1.L));
asm("f2xm1" : "=t"(t) : "0"(t));
asm("fscale" : "=t"(t) : "0"(t + 1), "u"(u));
if (signbit(x)) {
if (y != truncl(y)) return -NAN;
if ((int64_t)y & 1) t = -t;
}
return t;
} else {
return 1;
}
} else if (y > 0) {
if (signbit(x) && y == truncl(y) && ((int64_t)y & 1)) {
return -0.;
} else {
return 0;
}
} else if (!y) {
return 1;
} else {
#ifndef __NO_MATH_ERRNO__
errno = ERANGE;
#endif
if (y == truncl(y) && ((int64_t)y & 1)) {
return copysignl(INFINITY, x);
} else {
return INFINITY;
}
}
} else if (signbit(x)) {
if (!y) return 1;
x = y < 0 ? 0 : INFINITY;
if (y == truncl(y) && ((int64_t)y & 1)) x = -x;
return x;
} else if (y < 0) {
return 0;
} else if (y > 0) {
return INFINITY;
} else {
return 1;
}
} else {
x = fabsl(x);
if (x < 1) return signbit(y) ? INFINITY : 0;
if (x > 1) return signbit(y) ? 0 : INFINITY;
return 1;
}
} else if (!y || x == 1) {
return 1;
} else {
return NAN;
}
}
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
__static_yoink("musl_libc_notice");
__static_yoink("openbsd_libm_notice"); __static_yoink("openbsd_libm_notice");
__static_yoink("musl_libc_notice");
__static_yoink("fdlibm_notice");
#if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
/* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_powl.c */
/*
* Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/* powl.c /* powl.c
* *
* Power function, long double precision * Power function, long double precision
@ -606,35 +528,9 @@ static long double powil(long double x, int nn)
return y; return y;
} }
__weak_reference(powl, __powl_finite);
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 #elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
#include "libc/tinymath/freebsd.internal.h"
/*-
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/*
* Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/* powl(x,y) return x**y /* powl(x,y) return x**y
* *
@ -1045,8 +941,6 @@ powl(long double x, long double y)
return s * z; return s * z;
} }
#endif /* __x86_64__ */
__weak_reference(powl, __powl_finite); __weak_reference(powl, __powl_finite);
#endif /* long double is long */ #endif /* __x86_64__ */

View file

@ -30,7 +30,6 @@
#include "libc/tinymath/internal.h" #include "libc/tinymath/internal.h"
__static_yoink("musl_libc_notice"); __static_yoink("musl_libc_notice");
#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1 #if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
#define EPS DBL_EPSILON #define EPS DBL_EPSILON
#elif FLT_EVAL_METHOD==2 #elif FLT_EVAL_METHOD==2

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8 :vi vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines Optimized Routines
Copyright (c) 1999-2022, Arm Limited. Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the
@ -25,15 +25,19 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "libc/intrin/likely.h"
#include "libc/math.h"
#include "libc/tinymath/sincosf.internal.h" #include "libc/tinymath/sincosf.internal.h"
__static_yoink("arm_optimized_routines_notice"); __static_yoink("arm_optimized_routines_notice");
/* Fast sincosf implementation. Worst-case ULP is 0.5607, maximum relative /**
error is 0.5303 * 2^-23. A single-step range reduction is used for * Returns sine and cosine of y.
small values. Large inputs have their range reduced using fast integer *
arithmetic. */ * This is a fast sincosf implementation. Worst-case ULP is 0.5607,
* maximum relative error is 0.5303 * 2^-23. A single-step range
* reduction is used for small values. Large inputs have their range
* reduced using fast integer arithmetic.
*
* @raise EDOM if y is an infinity
*/
void void
sincosf (float y, float *sinp, float *cosp) sincosf (float y, float *sinp, float *cosp)
{ {
@ -46,11 +50,11 @@ sincosf (float y, float *sinp, float *cosp)
{ {
double x2 = x * x; double x2 = x * x;
if (UNLIKELY (abstop12 (y) < abstop12 (0x1p-12f))) if (unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
{ {
if (UNLIKELY (abstop12 (y) < abstop12 (0x1p-126f))) if (unlikely (abstop12 (y) < abstop12 (0x1p-126f)))
/* Force underflow for tiny y. */ /* Force underflow for tiny y. */
FORCE_EVAL (x2); force_eval_float (x2);
*sinp = y; *sinp = y;
*cosp = 1.0f; *cosp = 1.0f;
return; return;
@ -70,7 +74,7 @@ sincosf (float y, float *sinp, float *cosp)
sincosf_poly (x * s, x * x, p, n, sinp, cosp); sincosf_poly (x * s, x * x, p, n, sinp, cosp);
} }
else if (LIKELY (abstop12 (y) < abstop12 (INFINITY))) else if (likely (abstop12 (y) < abstop12 (INFINITY)))
{ {
uint32_t xi = asuint (y); uint32_t xi = asuint (y);
int sign = xi >> 31; int sign = xi >> 31;

Some files were not shown because too many files have changed in this diff Show more