mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-02-07 06:53:33 +00:00
Make quality improvements
- Write some more unit tests - memcpy() on ARM is now faster - Address the Musl complex math FIXME comments - Some libm funcs like pow() now support setting errno - Import the latest and greatest math functions from ARM - Use more accurate atan2f() and log1pf() implementations - atoi() and atol() will no longer saturate or clobber errno
This commit is contained in:
parent
af8f2bd19f
commit
592f6ebc20
122 changed files with 6305 additions and 3859 deletions
3
Makefile
3
Makefile
|
@ -206,7 +206,7 @@ endif
|
||||||
.UNVEIL += \
|
.UNVEIL += \
|
||||||
libc/integral \
|
libc/integral \
|
||||||
libc/stdbool.h \
|
libc/stdbool.h \
|
||||||
rwc:/dev/shm \
|
rwc:/dev/shm \
|
||||||
rx:.cosmocc \
|
rx:.cosmocc \
|
||||||
rx:build/bootstrap \
|
rx:build/bootstrap \
|
||||||
r:build/portcosmo.h \
|
r:build/portcosmo.h \
|
||||||
|
@ -297,6 +297,7 @@ include third_party/nsync/testing/BUILD.mk
|
||||||
include libc/testlib/BUILD.mk
|
include libc/testlib/BUILD.mk
|
||||||
include tool/viz/lib/BUILD.mk
|
include tool/viz/lib/BUILD.mk
|
||||||
include tool/args/BUILD.mk
|
include tool/args/BUILD.mk
|
||||||
|
include test/math/BUILD.mk
|
||||||
include test/posix/BUILD.mk
|
include test/posix/BUILD.mk
|
||||||
include test/libcxx/BUILD.mk
|
include test/libcxx/BUILD.mk
|
||||||
include test/tool/args/BUILD.mk
|
include test/tool/args/BUILD.mk
|
||||||
|
|
|
@ -95,7 +95,6 @@ DEFAULT_CCFLAGS += \
|
||||||
DEFAULT_COPTS ?= \
|
DEFAULT_COPTS ?= \
|
||||||
-fno-ident \
|
-fno-ident \
|
||||||
-fno-common \
|
-fno-common \
|
||||||
-fno-math-errno \
|
|
||||||
-fno-gnu-unique \
|
-fno-gnu-unique \
|
||||||
-fstrict-aliasing \
|
-fstrict-aliasing \
|
||||||
-fstrict-overflow \
|
-fstrict-overflow \
|
||||||
|
|
|
@ -16,44 +16,28 @@
|
||||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/errno.h"
|
|
||||||
#include "libc/fmt/conv.h"
|
#include "libc/fmt/conv.h"
|
||||||
#include "libc/limits.h"
|
|
||||||
#include "libc/stdckdint.h"
|
|
||||||
#include "libc/str/str.h"
|
#include "libc/str/str.h"
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Decodes decimal integer from ASCII string.
|
* Turns string into int.
|
||||||
*
|
*
|
||||||
* atoi 10⁸ 22𝑐 7𝑛𝑠
|
* Decimal is the only radix supported. Leading whitespace (as specified
|
||||||
* strtol 10⁸ 37𝑐 12𝑛𝑠
|
* by the isspace() function) is skipped over. Unlike strtol(), the atoi
|
||||||
* strtoul 10⁸ 35𝑐 11𝑛𝑠
|
* function has undefined behavior on error and it never changes `errno`
|
||||||
* wcstol 10⁸ 30𝑐 10𝑛𝑠
|
|
||||||
* wcstoul 10⁸ 30𝑐 10𝑛𝑠
|
|
||||||
* strtoimax 10⁸ 80𝑐 26𝑛𝑠
|
|
||||||
* strtoumax 10⁸ 78𝑐 25𝑛𝑠
|
|
||||||
* wcstoimax 10⁸ 77𝑐 25𝑛𝑠
|
|
||||||
* wcstoumax 10⁸ 76𝑐 25𝑛𝑠
|
|
||||||
*
|
*
|
||||||
* @param s is a non-null nul-terminated string
|
* @param nptr is a non-null nul-terminated string
|
||||||
* @return the decoded signed saturated integer
|
* @return the decoded signed saturated integer
|
||||||
* @raise ERANGE on overflow
|
|
||||||
*/
|
*/
|
||||||
int atoi(const char *s) {
|
int atoi(const char *nptr) {
|
||||||
int x, c, d;
|
int x, c, d;
|
||||||
do c = *s++;
|
do c = *nptr++;
|
||||||
while (c == ' ' || c == '\t');
|
while (isspace(c));
|
||||||
d = c == '-' ? -1 : 1;
|
d = c == '-' ? -1 : 1;
|
||||||
if (c == '-' || c == '+') c = *s++;
|
if (c == '-' || c == '+') c = *nptr++;
|
||||||
for (x = 0; isdigit(c); c = *s++) {
|
for (x = 0; isdigit(c); c = *nptr++) {
|
||||||
if (ckd_mul(&x, x, 10) || ckd_add(&x, x, (c - '0') * d)) {
|
x *= 10;
|
||||||
errno = ERANGE;
|
x += (c - '0') * d;
|
||||||
if (d > 0) {
|
|
||||||
return INT_MAX;
|
|
||||||
} else {
|
|
||||||
return INT_MIN;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,34 +16,29 @@
|
||||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/errno.h"
|
|
||||||
#include "libc/fmt/conv.h"
|
#include "libc/fmt/conv.h"
|
||||||
#include "libc/limits.h"
|
|
||||||
#include "libc/stdckdint.h"
|
|
||||||
#include "libc/str/str.h"
|
#include "libc/str/str.h"
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Decodes decimal integer from ASCII string.
|
* Turns string into long.
|
||||||
*
|
*
|
||||||
* @param s is a non-null nul-terminated string
|
* Decimal is the only radix supported. Leading whitespace (as specified
|
||||||
|
* by the isspace() function) is skipped over. Unlike strtol(), the atoi
|
||||||
|
* function has undefined behavior on error and it never changes `errno`
|
||||||
|
*
|
||||||
|
* @param nptr is a non-null nul-terminated string
|
||||||
* @return the decoded signed saturated integer
|
* @return the decoded signed saturated integer
|
||||||
*/
|
*/
|
||||||
long atol(const char *s) {
|
long atol(const char *nptr) {
|
||||||
long x;
|
long x;
|
||||||
int c, d;
|
int c, d;
|
||||||
do c = *s++;
|
do c = *nptr++;
|
||||||
while (c == ' ' || c == '\t');
|
while (isspace(c));
|
||||||
d = c == '-' ? -1 : 1;
|
d = c == '-' ? -1 : 1;
|
||||||
if (c == '-' || c == '+') c = *s++;
|
if (c == '-' || c == '+') c = *nptr++;
|
||||||
for (x = 0; isdigit(c); c = *s++) {
|
for (x = 0; isdigit(c); c = *nptr++) {
|
||||||
if (ckd_mul(&x, x, 10) || ckd_add(&x, x, (c - '0') * d)) {
|
x *= 10;
|
||||||
errno = ERANGE;
|
x += (c - '0') * d;
|
||||||
if (d > 0) {
|
|
||||||
return LONG_MAX;
|
|
||||||
} else {
|
|
||||||
return LONG_MIN;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ Optimized Routines │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ Optimized Routines │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
@ -80,11 +80,12 @@ ENTRY (__memcpy_aarch64_simd)
|
||||||
PTR_ARG (1)
|
PTR_ARG (1)
|
||||||
SIZE_ARG (2)
|
SIZE_ARG (2)
|
||||||
add srcend, src, count
|
add srcend, src, count
|
||||||
add dstend, dstin, count
|
|
||||||
cmp count, 128
|
cmp count, 128
|
||||||
b.hi L(copy_long)
|
b.hi L(copy_long)
|
||||||
|
add dstend, dstin, count
|
||||||
cmp count, 32
|
cmp count, 32
|
||||||
b.hi L(copy32_128)
|
b.hi L(copy32_128)
|
||||||
|
nop
|
||||||
|
|
||||||
/* Small copies: 0..32 bytes. */
|
/* Small copies: 0..32 bytes. */
|
||||||
cmp count, 16
|
cmp count, 16
|
||||||
|
@ -95,6 +96,18 @@ ENTRY (__memcpy_aarch64_simd)
|
||||||
str B_q, [dstend, -16]
|
str B_q, [dstend, -16]
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
.p2align 4
|
||||||
|
/* Medium copies: 33..128 bytes. */
|
||||||
|
L(copy32_128):
|
||||||
|
ldp A_q, B_q, [src]
|
||||||
|
ldp C_q, D_q, [srcend, -32]
|
||||||
|
cmp count, 64
|
||||||
|
b.hi L(copy128)
|
||||||
|
stp A_q, B_q, [dstin]
|
||||||
|
stp C_q, D_q, [dstend, -32]
|
||||||
|
ret
|
||||||
|
|
||||||
|
.p2align 4
|
||||||
/* Copy 8-15 bytes. */
|
/* Copy 8-15 bytes. */
|
||||||
L(copy16):
|
L(copy16):
|
||||||
tbz count, 3, L(copy8)
|
tbz count, 3, L(copy8)
|
||||||
|
@ -104,7 +117,6 @@ L(copy16):
|
||||||
str A_h, [dstend, -8]
|
str A_h, [dstend, -8]
|
||||||
ret
|
ret
|
||||||
|
|
||||||
.p2align 3
|
|
||||||
/* Copy 4-7 bytes. */
|
/* Copy 4-7 bytes. */
|
||||||
L(copy8):
|
L(copy8):
|
||||||
tbz count, 2, L(copy4)
|
tbz count, 2, L(copy4)
|
||||||
|
@ -114,6 +126,19 @@ L(copy8):
|
||||||
str B_lw, [dstend, -4]
|
str B_lw, [dstend, -4]
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
/* Copy 65..128 bytes. */
|
||||||
|
L(copy128):
|
||||||
|
ldp E_q, F_q, [src, 32]
|
||||||
|
cmp count, 96
|
||||||
|
b.ls L(copy96)
|
||||||
|
ldp G_q, H_q, [srcend, -64]
|
||||||
|
stp G_q, H_q, [dstend, -64]
|
||||||
|
L(copy96):
|
||||||
|
stp A_q, B_q, [dstin]
|
||||||
|
stp E_q, F_q, [dstin, 32]
|
||||||
|
stp C_q, D_q, [dstend, -32]
|
||||||
|
ret
|
||||||
|
|
||||||
/* Copy 0..3 bytes using a branchless sequence. */
|
/* Copy 0..3 bytes using a branchless sequence. */
|
||||||
L(copy4):
|
L(copy4):
|
||||||
cbz count, L(copy0)
|
cbz count, L(copy0)
|
||||||
|
@ -127,33 +152,11 @@ L(copy4):
|
||||||
L(copy0):
|
L(copy0):
|
||||||
ret
|
ret
|
||||||
|
|
||||||
.p2align 4
|
.p2align 3
|
||||||
/* Medium copies: 33..128 bytes. */
|
|
||||||
L(copy32_128):
|
|
||||||
ldp A_q, B_q, [src]
|
|
||||||
ldp C_q, D_q, [srcend, -32]
|
|
||||||
cmp count, 64
|
|
||||||
b.hi L(copy128)
|
|
||||||
stp A_q, B_q, [dstin]
|
|
||||||
stp C_q, D_q, [dstend, -32]
|
|
||||||
ret
|
|
||||||
|
|
||||||
.p2align 4
|
|
||||||
/* Copy 65..128 bytes. */
|
|
||||||
L(copy128):
|
|
||||||
ldp E_q, F_q, [src, 32]
|
|
||||||
cmp count, 96
|
|
||||||
b.ls L(copy96)
|
|
||||||
ldp G_q, H_q, [srcend, -64]
|
|
||||||
stp G_q, H_q, [dstend, -64]
|
|
||||||
L(copy96):
|
|
||||||
stp A_q, B_q, [dstin]
|
|
||||||
stp E_q, F_q, [dstin, 32]
|
|
||||||
stp C_q, D_q, [dstend, -32]
|
|
||||||
ret
|
|
||||||
|
|
||||||
/* Copy more than 128 bytes. */
|
/* Copy more than 128 bytes. */
|
||||||
L(copy_long):
|
L(copy_long):
|
||||||
|
add dstend, dstin, count
|
||||||
|
|
||||||
/* Use backwards copy if there is an overlap. */
|
/* Use backwards copy if there is an overlap. */
|
||||||
sub tmp1, dstin, src
|
sub tmp1, dstin, src
|
||||||
cmp tmp1, count
|
cmp tmp1, count
|
||||||
|
@ -190,6 +193,9 @@ L(copy64_from_end):
|
||||||
stp A_q, B_q, [dstend, -32]
|
stp A_q, B_q, [dstend, -32]
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
.p2align 4
|
||||||
|
nop
|
||||||
|
|
||||||
/* Large backwards copy for overlapping copies.
|
/* Large backwards copy for overlapping copies.
|
||||||
Copy 16 bytes and then align srcend to 16-byte alignment. */
|
Copy 16 bytes and then align srcend to 16-byte alignment. */
|
||||||
L(copy_long_backwards):
|
L(copy_long_backwards):
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ Optimized Routines │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ Optimized Routines │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ Optimized Routines │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ Optimized Routines │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ Optimized Routines │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ Optimized Routines │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ Optimized Routines │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ Optimized Routines │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ Optimized Routines │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ Optimized Routines │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ Optimized Routines │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
|
2
libc/intrin/fbclibm.c
Normal file
2
libc/intrin/fbclibm.c
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
__notice(freebsd_complex_notice, "FreeBSD Complex Math (BSD-2 License)\n\
|
||||||
|
Copyright (c) 2012 Stephen Montgomery-Smith <stephen@FreeBSD.ORG>");
|
65
libc/math.h
65
libc/math.h
|
@ -9,15 +9,47 @@
|
||||||
#define M_LOG10E 0.43429448190325182765 /* log₁₀𝑒 */
|
#define M_LOG10E 0.43429448190325182765 /* log₁₀𝑒 */
|
||||||
#define M_LN2 0.69314718055994530942 /* logₑ2 */
|
#define M_LN2 0.69314718055994530942 /* logₑ2 */
|
||||||
#define M_LN10 2.30258509299404568402 /* logₑ10 */
|
#define M_LN10 2.30258509299404568402 /* logₑ10 */
|
||||||
#define M_PI 3.14159265358979323846 /* pi */
|
#define M_PI 3.14159265358979323846 /* 𝜋 */
|
||||||
#define M_PI_2 1.57079632679489661923 /* pi/2 */
|
#define M_PI_2 1.57079632679489661923 /* 𝜋/2 */
|
||||||
#define M_PI_4 0.78539816339744830962 /* pi/4 */
|
#define M_PI_4 0.78539816339744830962 /* 𝜋/4 */
|
||||||
#define M_1_PI 0.31830988618379067154 /* 1/pi */
|
#define M_1_PI 0.31830988618379067154 /* 1/𝜋 */
|
||||||
#define M_2_PI 0.63661977236758134308 /* 2/pi */
|
#define M_2_PI 0.63661977236758134308 /* 2/𝜋 */
|
||||||
#define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrt(pi) */
|
#define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrt(𝜋) */
|
||||||
#define M_SQRT2 1.41421356237309504880 /* sqrt(2) */
|
#define M_SQRT2 1.41421356237309504880 /* sqrt(2) */
|
||||||
#define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */
|
#define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */
|
||||||
|
|
||||||
|
#if defined(_GNU_SOURCE) || defined(_COSMO_SOURCE)
|
||||||
|
#define M_Ef 2.7182818284590452354f /* 𝑒 */
|
||||||
|
#define M_LOG2Ef 1.4426950408889634074f /* log₂𝑒 */
|
||||||
|
#define M_LOG10Ef 0.43429448190325182765f /* log₁₀𝑒 */
|
||||||
|
#define M_LN2f 0.69314718055994530942f /* logₑ2 */
|
||||||
|
#define M_LN10f 2.30258509299404568402f /* logₑ10 */
|
||||||
|
#define M_PIf 3.14159265358979323846f /* 𝜋 */
|
||||||
|
#define M_PI_2f 1.57079632679489661923f /* 𝜋/2 */
|
||||||
|
#define M_PI_4f 0.78539816339744830962f /* 𝜋/4 */
|
||||||
|
#define M_1_PIf 0.31830988618379067154f /* 1/𝜋 */
|
||||||
|
#define M_2_PIf 0.63661977236758134308f /* 2/𝜋 */
|
||||||
|
#define M_2_SQRTPIf 1.12837916709551257390f /* 2/sqrt(𝜋) */
|
||||||
|
#define M_SQRT2f 1.41421356237309504880f /* sqrt(2) */
|
||||||
|
#define M_SQRT1_2f 0.70710678118654752440f /* 1/sqrt(2) */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(_GNU_SOURCE) || defined(_COSMO_SOURCE)
|
||||||
|
#define M_El 2.718281828459045235360287471352662498L /* 𝑒 */
|
||||||
|
#define M_LOG2El 1.442695040888963407359924681001892137L /* log₂𝑒 */
|
||||||
|
#define M_LOG10El 0.434294481903251827651128918916605082L /* log₁₀𝑒 */
|
||||||
|
#define M_LN2l 0.693147180559945309417232121458176568L /* logₑ2 */
|
||||||
|
#define M_LN10l 2.302585092994045684017991454684364208L /* logₑ10 */
|
||||||
|
#define M_PIl 3.141592653589793238462643383279502884L /* 𝜋 */
|
||||||
|
#define M_PI_2l 1.570796326794896619231321691639751442L /* 𝜋/2 */
|
||||||
|
#define M_PI_4l 0.785398163397448309615660845819875721L /* 𝜋/4 */
|
||||||
|
#define M_1_PIl 0.318309886183790671537767526745028724L /* 1/𝜋 */
|
||||||
|
#define M_2_PIl 0.636619772367581343075535053490057448L /* 2/𝜋 */
|
||||||
|
#define M_2_SQRTPIl 1.128379167095512573896158903121545172L /* 2/sqrt(𝜋) */
|
||||||
|
#define M_SQRT2l 1.414213562373095048801688724209698079L /* sqrt(2) */
|
||||||
|
#define M_SQRT1_2l 0.707106781186547524400844362104849039L /* 1/sqrt(2) */
|
||||||
|
#endif
|
||||||
|
|
||||||
#define DBL_DECIMAL_DIG __DBL_DECIMAL_DIG__
|
#define DBL_DECIMAL_DIG __DBL_DECIMAL_DIG__
|
||||||
#define DBL_DIG __DBL_DIG__
|
#define DBL_DIG __DBL_DIG__
|
||||||
#define DBL_EPSILON __DBL_EPSILON__
|
#define DBL_EPSILON __DBL_EPSILON__
|
||||||
|
@ -76,6 +108,27 @@
|
||||||
#define FP_ILOGB0 (-2147483647 - 1)
|
#define FP_ILOGB0 (-2147483647 - 1)
|
||||||
#define FP_ILOGBNAN (-2147483647 - 1)
|
#define FP_ILOGBNAN (-2147483647 - 1)
|
||||||
|
|
||||||
|
#define MATH_ERRNO 1
|
||||||
|
#define MATH_ERREXCEPT 2
|
||||||
|
|
||||||
|
#ifdef __FAST_MATH__
|
||||||
|
#define math_errhandling 0
|
||||||
|
#elif defined(__NO_MATH_ERRNO__)
|
||||||
|
#define math_errhandling (MATH_ERREXCEPT)
|
||||||
|
#else
|
||||||
|
#define math_errhandling (MATH_ERRNO | MATH_ERREXCEPT)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __FP_FAST_FMA
|
||||||
|
#define FP_FAST_FMA 1
|
||||||
|
#endif
|
||||||
|
#ifdef __FP_FAST_FMAF
|
||||||
|
#define FP_FAST_FMAF 1
|
||||||
|
#endif
|
||||||
|
#ifdef __FP_FAST_FMAL
|
||||||
|
#define FP_FAST_FMAL 1
|
||||||
|
#endif
|
||||||
|
|
||||||
COSMOPOLITAN_C_START_
|
COSMOPOLITAN_C_START_
|
||||||
|
|
||||||
#define NAN __builtin_nanf("")
|
#define NAN __builtin_nanf("")
|
||||||
|
|
|
@ -54,6 +54,7 @@ o/$(MODE)/libc/tinymath/loglq.o: private \
|
||||||
|
|
||||||
$(LIBC_TINYMATH_A_OBJS): private \
|
$(LIBC_TINYMATH_A_OBJS): private \
|
||||||
CFLAGS += \
|
CFLAGS += \
|
||||||
|
-fmath-errno \
|
||||||
-fsigned-zeros \
|
-fsigned-zeros \
|
||||||
-ftrapping-math \
|
-ftrapping-math \
|
||||||
-frounding-math \
|
-frounding-math \
|
||||||
|
|
|
@ -5,7 +5,7 @@ MIT OR Apache-2.0 WITH LLVM-exception
|
||||||
MIT License
|
MIT License
|
||||||
-----------
|
-----------
|
||||||
|
|
||||||
Copyright (c) 1999-2022, Arm Limited.
|
Copyright (c) 2018-2024, Arm Limited.
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
|
|
@ -1,33 +0,0 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
|
||||||
│ │
|
|
||||||
│ Musl Libc │
|
|
||||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
|
||||||
│ │
|
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
|
||||||
│ a copy of this software and associated documentation files (the │
|
|
||||||
│ "Software"), to deal in the Software without restriction, including │
|
|
||||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
|
||||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
|
||||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
|
||||||
│ the following conditions: │
|
|
||||||
│ │
|
|
||||||
│ The above copyright notice and this permission notice shall be │
|
|
||||||
│ included in all copies or substantial portions of the Software. │
|
|
||||||
│ │
|
|
||||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
|
||||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
|
||||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
|
||||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
|
||||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
|
||||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
|
||||||
│ │
|
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
||||||
#include "libc/tinymath/internal.h"
|
|
||||||
|
|
||||||
double __math_divzero(uint32_t sign)
|
|
||||||
{
|
|
||||||
return fp_barrier(sign ? -1.0 : 1.0) / 0.0;
|
|
||||||
}
|
|
|
@ -1,33 +0,0 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
|
||||||
│ │
|
|
||||||
│ Musl Libc │
|
|
||||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
|
||||||
│ │
|
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
|
||||||
│ a copy of this software and associated documentation files (the │
|
|
||||||
│ "Software"), to deal in the Software without restriction, including │
|
|
||||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
|
||||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
|
||||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
|
||||||
│ the following conditions: │
|
|
||||||
│ │
|
|
||||||
│ The above copyright notice and this permission notice shall be │
|
|
||||||
│ included in all copies or substantial portions of the Software. │
|
|
||||||
│ │
|
|
||||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
|
||||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
|
||||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
|
||||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
|
||||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
|
||||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
|
||||||
│ │
|
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
||||||
#include "libc/tinymath/internal.h"
|
|
||||||
|
|
||||||
float __math_divzerof(uint32_t sign)
|
|
||||||
{
|
|
||||||
return fp_barrierf(sign ? -1.0f : 1.0f) / 0.0f;
|
|
||||||
}
|
|
|
@ -1,33 +0,0 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
|
||||||
│ │
|
|
||||||
│ Musl Libc │
|
|
||||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
|
||||||
│ │
|
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
|
||||||
│ a copy of this software and associated documentation files (the │
|
|
||||||
│ "Software"), to deal in the Software without restriction, including │
|
|
||||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
|
||||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
|
||||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
|
||||||
│ the following conditions: │
|
|
||||||
│ │
|
|
||||||
│ The above copyright notice and this permission notice shall be │
|
|
||||||
│ included in all copies or substantial portions of the Software. │
|
|
||||||
│ │
|
|
||||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
|
||||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
|
||||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
|
||||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
|
||||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
|
||||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
|
||||||
│ │
|
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
||||||
#include "libc/tinymath/internal.h"
|
|
||||||
|
|
||||||
double __math_invalid(double x)
|
|
||||||
{
|
|
||||||
return (x - x) / (x - x);
|
|
||||||
}
|
|
|
@ -1,33 +0,0 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
|
||||||
│ │
|
|
||||||
│ Musl Libc │
|
|
||||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
|
||||||
│ │
|
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
|
||||||
│ a copy of this software and associated documentation files (the │
|
|
||||||
│ "Software"), to deal in the Software without restriction, including │
|
|
||||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
|
||||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
|
||||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
|
||||||
│ the following conditions: │
|
|
||||||
│ │
|
|
||||||
│ The above copyright notice and this permission notice shall be │
|
|
||||||
│ included in all copies or substantial portions of the Software. │
|
|
||||||
│ │
|
|
||||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
|
||||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
|
||||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
|
||||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
|
||||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
|
||||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
|
||||||
│ │
|
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
||||||
#include "libc/tinymath/internal.h"
|
|
||||||
|
|
||||||
float __math_invalidf(float x)
|
|
||||||
{
|
|
||||||
return (x - x) / (x - x);
|
|
||||||
}
|
|
|
@ -1,33 +0,0 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
|
||||||
│ │
|
|
||||||
│ Musl Libc │
|
|
||||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
|
||||||
│ │
|
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
|
||||||
│ a copy of this software and associated documentation files (the │
|
|
||||||
│ "Software"), to deal in the Software without restriction, including │
|
|
||||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
|
||||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
|
||||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
|
||||||
│ the following conditions: │
|
|
||||||
│ │
|
|
||||||
│ The above copyright notice and this permission notice shall be │
|
|
||||||
│ included in all copies or substantial portions of the Software. │
|
|
||||||
│ │
|
|
||||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
|
||||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
|
||||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
|
||||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
|
||||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
|
||||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
|
||||||
│ │
|
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
||||||
#include "libc/tinymath/internal.h"
|
|
||||||
|
|
||||||
dontinstrument double __math_oflow(uint32_t sign)
|
|
||||||
{
|
|
||||||
return __math_xflow(sign, 0x1p769);
|
|
||||||
}
|
|
|
@ -1,33 +0,0 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
|
||||||
│ │
|
|
||||||
│ Musl Libc │
|
|
||||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
|
||||||
│ │
|
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
|
||||||
│ a copy of this software and associated documentation files (the │
|
|
||||||
│ "Software"), to deal in the Software without restriction, including │
|
|
||||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
|
||||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
|
||||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
|
||||||
│ the following conditions: │
|
|
||||||
│ │
|
|
||||||
│ The above copyright notice and this permission notice shall be │
|
|
||||||
│ included in all copies or substantial portions of the Software. │
|
|
||||||
│ │
|
|
||||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
|
||||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
|
||||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
|
||||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
|
||||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
|
||||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
|
||||||
│ │
|
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
||||||
#include "libc/tinymath/internal.h"
|
|
||||||
|
|
||||||
dontinstrument float __math_oflowf(uint32_t sign)
|
|
||||||
{
|
|
||||||
return __math_xflowf(sign, 0x1p97f);
|
|
||||||
}
|
|
|
@ -1,33 +0,0 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
|
||||||
│ │
|
|
||||||
│ Musl Libc │
|
|
||||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
|
||||||
│ │
|
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
|
||||||
│ a copy of this software and associated documentation files (the │
|
|
||||||
│ "Software"), to deal in the Software without restriction, including │
|
|
||||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
|
||||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
|
||||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
|
||||||
│ the following conditions: │
|
|
||||||
│ │
|
|
||||||
│ The above copyright notice and this permission notice shall be │
|
|
||||||
│ included in all copies or substantial portions of the Software. │
|
|
||||||
│ │
|
|
||||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
|
||||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
|
||||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
|
||||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
|
||||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
|
||||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
|
||||||
│ │
|
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
||||||
#include "libc/tinymath/internal.h"
|
|
||||||
|
|
||||||
dontinstrument double __math_uflow(uint32_t sign)
|
|
||||||
{
|
|
||||||
return __math_xflow(sign, 0x1p-767);
|
|
||||||
}
|
|
|
@ -1,33 +0,0 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
|
||||||
│ │
|
|
||||||
│ Musl Libc │
|
|
||||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
|
||||||
│ │
|
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
|
||||||
│ a copy of this software and associated documentation files (the │
|
|
||||||
│ "Software"), to deal in the Software without restriction, including │
|
|
||||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
|
||||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
|
||||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
|
||||||
│ the following conditions: │
|
|
||||||
│ │
|
|
||||||
│ The above copyright notice and this permission notice shall be │
|
|
||||||
│ included in all copies or substantial portions of the Software. │
|
|
||||||
│ │
|
|
||||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
|
||||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
|
||||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
|
||||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
|
||||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
|
||||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
|
||||||
│ │
|
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
||||||
#include "libc/tinymath/internal.h"
|
|
||||||
|
|
||||||
dontinstrument float __math_uflowf(uint32_t sign)
|
|
||||||
{
|
|
||||||
return __math_xflowf(sign, 0x1p-95f);
|
|
||||||
}
|
|
|
@ -1,33 +0,0 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
|
||||||
│ │
|
|
||||||
│ Musl Libc │
|
|
||||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
|
||||||
│ │
|
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
|
||||||
│ a copy of this software and associated documentation files (the │
|
|
||||||
│ "Software"), to deal in the Software without restriction, including │
|
|
||||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
|
||||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
|
||||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
|
||||||
│ the following conditions: │
|
|
||||||
│ │
|
|
||||||
│ The above copyright notice and this permission notice shall be │
|
|
||||||
│ included in all copies or substantial portions of the Software. │
|
|
||||||
│ │
|
|
||||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
|
||||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
|
||||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
|
||||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
|
||||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
|
||||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
|
||||||
│ │
|
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
||||||
#include "libc/tinymath/internal.h"
|
|
||||||
|
|
||||||
dontinstrument double __math_xflow(uint32_t sign, double y)
|
|
||||||
{
|
|
||||||
return eval_as_double(fp_barrier(sign ? -y : y) * y);
|
|
||||||
}
|
|
|
@ -1,33 +0,0 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
|
||||||
│ │
|
|
||||||
│ Musl Libc │
|
|
||||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
|
||||||
│ │
|
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
|
||||||
│ a copy of this software and associated documentation files (the │
|
|
||||||
│ "Software"), to deal in the Software without restriction, including │
|
|
||||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
|
||||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
|
||||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
|
||||||
│ the following conditions: │
|
|
||||||
│ │
|
|
||||||
│ The above copyright notice and this permission notice shall be │
|
|
||||||
│ included in all copies or substantial portions of the Software. │
|
|
||||||
│ │
|
|
||||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
|
||||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
|
||||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
|
||||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
|
||||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
|
||||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
|
||||||
│ │
|
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
||||||
#include "libc/tinymath/internal.h"
|
|
||||||
|
|
||||||
dontinstrument float __math_xflowf(uint32_t sign, float y)
|
|
||||||
{
|
|
||||||
return eval_as_float(fp_barrierf(sign ? -y : y) * y);
|
|
||||||
}
|
|
509
libc/tinymath/arm.internal.h
Normal file
509
libc/tinymath/arm.internal.h
Normal file
|
@ -0,0 +1,509 @@
|
||||||
|
#ifndef COSMOPOLITAN_LIBC_TINYMATH_ARM_H_
|
||||||
|
#define COSMOPOLITAN_LIBC_TINYMATH_ARM_H_
|
||||||
|
#include "libc/math.h"
|
||||||
|
COSMOPOLITAN_C_START_
|
||||||
|
|
||||||
|
#define USE_GLIBC_ABI 1
|
||||||
|
|
||||||
|
/* If defined to 1, return correct results for special cases in non-nearest
|
||||||
|
rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than -0.0f).
|
||||||
|
This may be set to 0 if there is no fenv support or if math functions only
|
||||||
|
get called in round to nearest mode. */
|
||||||
|
#ifdef __ROUNDING_MATH__
|
||||||
|
#define WANT_ROUNDING 1
|
||||||
|
#else
|
||||||
|
#define WANT_ROUNDING 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* If defined to 1, set errno in math functions according to ISO C. Many math
|
||||||
|
libraries do not set errno, so this is 0 by default. It may need to be
|
||||||
|
set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0. */
|
||||||
|
#ifdef __NO_MATH_ERRNO__
|
||||||
|
#define WANT_ERRNO 0
|
||||||
|
#else
|
||||||
|
#define WANT_ERRNO 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*------------------------------------------------------------------------------*/
|
||||||
|
/* optimized-routines/math/math_config.h */
|
||||||
|
|
||||||
|
#ifndef WANT_ROUNDING
|
||||||
|
/* If defined to 1, return correct results for special cases in non-nearest
|
||||||
|
rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than -0.0f).
|
||||||
|
This may be set to 0 if there is no fenv support or if math functions only
|
||||||
|
get called in round to nearest mode. */
|
||||||
|
# define WANT_ROUNDING 1
|
||||||
|
#endif
|
||||||
|
#ifndef WANT_ERRNO
|
||||||
|
/* If defined to 1, set errno in math functions according to ISO C. Many math
|
||||||
|
libraries do not set errno, so this is 0 by default. It may need to be
|
||||||
|
set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0. */
|
||||||
|
# define WANT_ERRNO 0
|
||||||
|
#endif
|
||||||
|
#ifndef WANT_ERRNO_UFLOW
|
||||||
|
/* Set errno to ERANGE if result underflows to 0 (in all rounding modes). */
|
||||||
|
# define WANT_ERRNO_UFLOW (WANT_ROUNDING && WANT_ERRNO)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Compiler can inline round as a single instruction. */
|
||||||
|
#ifndef HAVE_FAST_ROUND
|
||||||
|
# if __aarch64__
|
||||||
|
# define HAVE_FAST_ROUND 1
|
||||||
|
# else
|
||||||
|
# define HAVE_FAST_ROUND 0
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Compiler can inline lround, but not (long)round(x). */
|
||||||
|
#ifndef HAVE_FAST_LROUND
|
||||||
|
# if __aarch64__ && (100*__GNUC__ + __GNUC_MINOR__) >= 408 && __NO_MATH_ERRNO__
|
||||||
|
# define HAVE_FAST_LROUND 1
|
||||||
|
# else
|
||||||
|
# define HAVE_FAST_LROUND 0
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Compiler can inline fma as a single instruction. */
|
||||||
|
#ifndef HAVE_FAST_FMA
|
||||||
|
# if defined FP_FAST_FMA || __aarch64__
|
||||||
|
# define HAVE_FAST_FMA 1
|
||||||
|
# else
|
||||||
|
# define HAVE_FAST_FMA 0
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Provide *_finite symbols and some of the glibc hidden symbols
|
||||||
|
so libmathlib can be used with binaries compiled against glibc
|
||||||
|
to interpose math functions with both static and dynamic linking. */
|
||||||
|
#ifndef USE_GLIBC_ABI
|
||||||
|
# if __GNUC__
|
||||||
|
# define USE_GLIBC_ABI 1
|
||||||
|
# else
|
||||||
|
# define USE_GLIBC_ABI 0
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Optionally used extensions. */
|
||||||
|
#ifdef __GNUC__
|
||||||
|
# define HIDDEN __attribute__ ((__visibility__ ("hidden")))
|
||||||
|
# define NOINLINE __attribute__ ((noinline))
|
||||||
|
# define UNUSED __attribute__ ((unused))
|
||||||
|
# define likely(x) __builtin_expect (!!(x), 1)
|
||||||
|
# define unlikely(x) __builtin_expect (x, 0)
|
||||||
|
# if __GNUC__ >= 9
|
||||||
|
# define attribute_copy(f) __attribute__ ((copy (f)))
|
||||||
|
# else
|
||||||
|
# define attribute_copy(f)
|
||||||
|
# endif
|
||||||
|
# define strong_alias(f, a) \
|
||||||
|
extern __typeof (f) a __attribute__ ((alias (#f))) attribute_copy (f);
|
||||||
|
# define hidden_alias(f, a) \
|
||||||
|
extern __typeof (f) a __attribute__ ((alias (#f), visibility ("hidden"))) \
|
||||||
|
attribute_copy (f);
|
||||||
|
#else
|
||||||
|
# define HIDDEN
|
||||||
|
# define NOINLINE
|
||||||
|
# define UNUSED
|
||||||
|
# define likely(x) (x)
|
||||||
|
# define unlikely(x) (x)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Return ptr but hide its value from the compiler so accesses through it
|
||||||
|
cannot be optimized based on the contents. */
|
||||||
|
#define ptr_barrier(ptr) \
|
||||||
|
({ \
|
||||||
|
__typeof (ptr) __ptr = (ptr); \
|
||||||
|
__asm("" : "+r"(__ptr)); \
|
||||||
|
__ptr; \
|
||||||
|
})
|
||||||
|
|
||||||
|
#if HAVE_FAST_ROUND
|
||||||
|
/* When set, the roundtoint and converttoint functions are provided with
|
||||||
|
the semantics documented below. */
|
||||||
|
# define TOINT_INTRINSICS 1
|
||||||
|
|
||||||
|
/* Round x to nearest int in all rounding modes, ties have to be rounded
|
||||||
|
consistently with converttoint so the results match. If the result
|
||||||
|
would be outside of [-2^31, 2^31-1] then the semantics is unspecified. */
|
||||||
|
static inline double_t
|
||||||
|
roundtoint (double_t x)
|
||||||
|
{
|
||||||
|
return round (x);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Convert x to nearest int in all rounding modes, ties have to be rounded
|
||||||
|
consistently with roundtoint. If the result is not representible in an
|
||||||
|
int32_t then the semantics is unspecified. */
|
||||||
|
static inline int32_t
|
||||||
|
converttoint (double_t x)
|
||||||
|
{
|
||||||
|
# if HAVE_FAST_LROUND
|
||||||
|
return lround (x);
|
||||||
|
# else
|
||||||
|
return (long) round (x);
|
||||||
|
# endif
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static inline uint32_t
|
||||||
|
asuint (float f)
|
||||||
|
{
|
||||||
|
union
|
||||||
|
{
|
||||||
|
float f;
|
||||||
|
uint32_t i;
|
||||||
|
} u = {f};
|
||||||
|
return u.i;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline float
|
||||||
|
asfloat (uint32_t i)
|
||||||
|
{
|
||||||
|
union
|
||||||
|
{
|
||||||
|
uint32_t i;
|
||||||
|
float f;
|
||||||
|
} u = {i};
|
||||||
|
return u.f;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint64_t
|
||||||
|
asuint64 (double f)
|
||||||
|
{
|
||||||
|
union
|
||||||
|
{
|
||||||
|
double f;
|
||||||
|
uint64_t i;
|
||||||
|
} u = {f};
|
||||||
|
return u.i;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline double
|
||||||
|
asdouble (uint64_t i)
|
||||||
|
{
|
||||||
|
union
|
||||||
|
{
|
||||||
|
uint64_t i;
|
||||||
|
double f;
|
||||||
|
} u = {i};
|
||||||
|
return u.f;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef IEEE_754_2008_SNAN
|
||||||
|
# define IEEE_754_2008_SNAN 1
|
||||||
|
#endif
|
||||||
|
static inline int
|
||||||
|
issignalingf_inline (float x)
|
||||||
|
{
|
||||||
|
uint32_t ix = asuint (x);
|
||||||
|
if (!IEEE_754_2008_SNAN)
|
||||||
|
return (ix & 0x7fc00000) == 0x7fc00000;
|
||||||
|
return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int
|
||||||
|
issignaling_inline (double x)
|
||||||
|
{
|
||||||
|
uint64_t ix = asuint64 (x);
|
||||||
|
if (!IEEE_754_2008_SNAN)
|
||||||
|
return (ix & 0x7ff8000000000000) == 0x7ff8000000000000;
|
||||||
|
return 2 * (ix ^ 0x0008000000000000) > 2 * 0x7ff8000000000000ULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if __aarch64__ && __GNUC__
|
||||||
|
/* Prevent the optimization of a floating-point expression. */
|
||||||
|
static inline float
|
||||||
|
opt_barrier_float (float x)
|
||||||
|
{
|
||||||
|
__asm__ __volatile__ ("" : "+w" (x));
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
static inline double
|
||||||
|
opt_barrier_double (double x)
|
||||||
|
{
|
||||||
|
__asm__ __volatile__ ("" : "+w" (x));
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
/* Force the evaluation of a floating-point expression for its side-effect. */
|
||||||
|
static inline void
|
||||||
|
force_eval_float (float x)
|
||||||
|
{
|
||||||
|
__asm__ __volatile__ ("" : "+w" (x));
|
||||||
|
}
|
||||||
|
static inline void
|
||||||
|
force_eval_double (double x)
|
||||||
|
{
|
||||||
|
__asm__ __volatile__ ("" : "+w" (x));
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static inline float
|
||||||
|
opt_barrier_float (float x)
|
||||||
|
{
|
||||||
|
volatile float y = x;
|
||||||
|
return y;
|
||||||
|
}
|
||||||
|
static inline double
|
||||||
|
opt_barrier_double (double x)
|
||||||
|
{
|
||||||
|
volatile double y = x;
|
||||||
|
return y;
|
||||||
|
}
|
||||||
|
static inline void
|
||||||
|
force_eval_float (float x)
|
||||||
|
{
|
||||||
|
volatile float y UNUSED = x;
|
||||||
|
}
|
||||||
|
static inline void
|
||||||
|
force_eval_double (double x)
|
||||||
|
{
|
||||||
|
volatile double y UNUSED = x;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Evaluate an expression as the specified type, normally a type
|
||||||
|
cast should be enough, but compilers implement non-standard
|
||||||
|
excess-precision handling, so when FLT_EVAL_METHOD != 0 then
|
||||||
|
these functions may need to be customized. */
|
||||||
|
static inline float
|
||||||
|
eval_as_float (float x)
|
||||||
|
{
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
static inline double
|
||||||
|
eval_as_double (double x)
|
||||||
|
{
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Error handling tail calls for special cases, with a sign argument.
|
||||||
|
The sign of the return value is set if the argument is non-zero. */
|
||||||
|
|
||||||
|
/* The result overflows. */
|
||||||
|
HIDDEN float __math_oflowf (uint32_t);
|
||||||
|
/* The result underflows to 0 in nearest rounding mode. */
|
||||||
|
HIDDEN float __math_uflowf (uint32_t);
|
||||||
|
/* The result underflows to 0 in some directed rounding mode only. */
|
||||||
|
HIDDEN float __math_may_uflowf (uint32_t);
|
||||||
|
/* Division by zero. */
|
||||||
|
HIDDEN float __math_divzerof (uint32_t);
|
||||||
|
/* The result overflows. */
|
||||||
|
HIDDEN double __math_oflow (uint32_t);
|
||||||
|
/* The result underflows to 0 in nearest rounding mode. */
|
||||||
|
HIDDEN double __math_uflow (uint32_t);
|
||||||
|
/* The result underflows to 0 in some directed rounding mode only. */
|
||||||
|
HIDDEN double __math_may_uflow (uint32_t);
|
||||||
|
/* Division by zero. */
|
||||||
|
HIDDEN double __math_divzero (uint32_t);
|
||||||
|
|
||||||
|
/* Error handling using input checking. */
|
||||||
|
|
||||||
|
/* Invalid input unless it is a quiet NaN. */
|
||||||
|
HIDDEN float __math_invalidf (float);
|
||||||
|
/* Invalid input unless it is a quiet NaN. */
|
||||||
|
HIDDEN double __math_invalid (double);
|
||||||
|
/* Invalid input unless it is a quiet NaN. */
|
||||||
|
HIDDEN long double __math_invalidl (long double);
|
||||||
|
|
||||||
|
/* Error handling using output checking, only for errno setting. */
|
||||||
|
|
||||||
|
/* Check if the result overflowed to infinity. */
|
||||||
|
HIDDEN double __math_check_oflow (double);
|
||||||
|
/* Check if the result underflowed to 0. */
|
||||||
|
HIDDEN double __math_check_uflow (double);
|
||||||
|
|
||||||
|
/* Check if the result overflowed to infinity. */
|
||||||
|
static inline double
|
||||||
|
check_oflow (double x)
|
||||||
|
{
|
||||||
|
return WANT_ERRNO ? __math_check_oflow (x) : x;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check if the result underflowed to 0. */
|
||||||
|
static inline double
|
||||||
|
check_uflow (double x)
|
||||||
|
{
|
||||||
|
return WANT_ERRNO ? __math_check_uflow (x) : x;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check if the result overflowed to infinity. */
|
||||||
|
HIDDEN float __math_check_oflowf (float);
|
||||||
|
/* Check if the result underflowed to 0. */
|
||||||
|
HIDDEN float __math_check_uflowf (float);
|
||||||
|
|
||||||
|
/* Check if the result overflowed to infinity. */
|
||||||
|
static inline float
|
||||||
|
check_oflowf (float x)
|
||||||
|
{
|
||||||
|
return WANT_ERRNO ? __math_check_oflowf (x) : x;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check if the result underflowed to 0. */
|
||||||
|
static inline float
|
||||||
|
check_uflowf (float x)
|
||||||
|
{
|
||||||
|
return WANT_ERRNO ? __math_check_uflowf (x) : x;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Shared between expf, exp2f and powf. */
|
||||||
|
#define EXP2F_TABLE_BITS 5
|
||||||
|
#define EXP2F_POLY_ORDER 3
|
||||||
|
extern const struct exp2f_data
|
||||||
|
{
|
||||||
|
uint64_t tab[1 << EXP2F_TABLE_BITS];
|
||||||
|
double shift_scaled;
|
||||||
|
double poly[EXP2F_POLY_ORDER];
|
||||||
|
double shift;
|
||||||
|
double invln2_scaled;
|
||||||
|
double poly_scaled[EXP2F_POLY_ORDER];
|
||||||
|
} __exp2f_data HIDDEN;
|
||||||
|
|
||||||
|
#define LOGF_TABLE_BITS 4
|
||||||
|
#define LOGF_POLY_ORDER 4
|
||||||
|
extern const struct logf_data
|
||||||
|
{
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
double invc, logc;
|
||||||
|
} tab[1 << LOGF_TABLE_BITS];
|
||||||
|
double ln2;
|
||||||
|
double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1. */
|
||||||
|
} __logf_data HIDDEN;
|
||||||
|
|
||||||
|
#define LOG2F_TABLE_BITS 4
|
||||||
|
#define LOG2F_POLY_ORDER 4
|
||||||
|
extern const struct log2f_data
|
||||||
|
{
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
double invc, logc;
|
||||||
|
} tab[1 << LOG2F_TABLE_BITS];
|
||||||
|
double poly[LOG2F_POLY_ORDER];
|
||||||
|
} __log2f_data HIDDEN;
|
||||||
|
|
||||||
|
#define POWF_LOG2_TABLE_BITS 4
|
||||||
|
#define POWF_LOG2_POLY_ORDER 5
|
||||||
|
#if TOINT_INTRINSICS
|
||||||
|
# define POWF_SCALE_BITS EXP2F_TABLE_BITS
|
||||||
|
#else
|
||||||
|
# define POWF_SCALE_BITS 0
|
||||||
|
#endif
|
||||||
|
#define POWF_SCALE ((double) (1 << POWF_SCALE_BITS))
|
||||||
|
extern const struct powf_log2_data
|
||||||
|
{
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
double invc, logc;
|
||||||
|
} tab[1 << POWF_LOG2_TABLE_BITS];
|
||||||
|
double poly[POWF_LOG2_POLY_ORDER];
|
||||||
|
} __powf_log2_data HIDDEN;
|
||||||
|
|
||||||
|
|
||||||
|
#define EXP_TABLE_BITS 7
|
||||||
|
#define EXP_POLY_ORDER 5
|
||||||
|
/* Use polynomial that is optimized for a wider input range. This may be
|
||||||
|
needed for good precision in non-nearest rounding and !TOINT_INTRINSICS. */
|
||||||
|
#define EXP_POLY_WIDE 0
|
||||||
|
/* Use close to nearest rounding toint when !TOINT_INTRINSICS. This may be
|
||||||
|
needed for good precision in non-nearest rouning and !EXP_POLY_WIDE. */
|
||||||
|
#define EXP_USE_TOINT_NARROW 0
|
||||||
|
#define EXP2_POLY_ORDER 5
|
||||||
|
#define EXP2_POLY_WIDE 0
|
||||||
|
/* Wider exp10 polynomial necessary for good precision in non-nearest rounding
|
||||||
|
and !TOINT_INTRINSICS. */
|
||||||
|
#define EXP10_POLY_WIDE 0
|
||||||
|
extern const struct exp_data
|
||||||
|
{
|
||||||
|
double invln2N;
|
||||||
|
double invlog10_2N;
|
||||||
|
double shift;
|
||||||
|
double negln2hiN;
|
||||||
|
double negln2loN;
|
||||||
|
double neglog10_2hiN;
|
||||||
|
double neglog10_2loN;
|
||||||
|
double poly[4]; /* Last four coefficients. */
|
||||||
|
double exp2_shift;
|
||||||
|
double exp2_poly[EXP2_POLY_ORDER];
|
||||||
|
double exp10_poly[5];
|
||||||
|
uint64_t tab[2*(1 << EXP_TABLE_BITS)];
|
||||||
|
} __exp_data HIDDEN;
|
||||||
|
|
||||||
|
#define LOG_TABLE_BITS 7
|
||||||
|
#define LOG_POLY_ORDER 6
|
||||||
|
#define LOG_POLY1_ORDER 12
|
||||||
|
extern const struct log_data
|
||||||
|
{
|
||||||
|
double ln2hi;
|
||||||
|
double ln2lo;
|
||||||
|
double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
|
||||||
|
double poly1[LOG_POLY1_ORDER - 1];
|
||||||
|
struct {double invc, logc;} tab[1 << LOG_TABLE_BITS];
|
||||||
|
#if !HAVE_FAST_FMA
|
||||||
|
struct {double chi, clo;} tab2[1 << LOG_TABLE_BITS];
|
||||||
|
#endif
|
||||||
|
} __log_data HIDDEN;
|
||||||
|
|
||||||
|
#define LOG2_TABLE_BITS 6
|
||||||
|
#define LOG2_POLY_ORDER 7
|
||||||
|
#define LOG2_POLY1_ORDER 11
|
||||||
|
extern const struct log2_data
|
||||||
|
{
|
||||||
|
double invln2hi;
|
||||||
|
double invln2lo;
|
||||||
|
double poly[LOG2_POLY_ORDER - 1];
|
||||||
|
double poly1[LOG2_POLY1_ORDER - 1];
|
||||||
|
struct {double invc, logc;} tab[1 << LOG2_TABLE_BITS];
|
||||||
|
#if !HAVE_FAST_FMA
|
||||||
|
struct {double chi, clo;} tab2[1 << LOG2_TABLE_BITS];
|
||||||
|
#endif
|
||||||
|
} __log2_data HIDDEN;
|
||||||
|
|
||||||
|
#define POW_LOG_TABLE_BITS 7
|
||||||
|
#define POW_LOG_POLY_ORDER 8
|
||||||
|
extern const struct pow_log_data
|
||||||
|
{
|
||||||
|
double ln2hi;
|
||||||
|
double ln2lo;
|
||||||
|
double poly[POW_LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
|
||||||
|
/* Note: the pad field is unused, but allows slightly faster indexing. */
|
||||||
|
struct {double invc, pad, logc, logctail;} tab[1 << POW_LOG_TABLE_BITS];
|
||||||
|
} __pow_log_data HIDDEN;
|
||||||
|
|
||||||
|
extern const struct erff_data
|
||||||
|
{
|
||||||
|
float erff_poly_A[6];
|
||||||
|
float erff_poly_B[7];
|
||||||
|
} __erff_data HIDDEN;
|
||||||
|
|
||||||
|
#define ERF_POLY_A_ORDER 19
|
||||||
|
#define ERF_POLY_A_NCOEFFS 10
|
||||||
|
#define ERFC_POLY_C_NCOEFFS 16
|
||||||
|
#define ERFC_POLY_D_NCOEFFS 18
|
||||||
|
#define ERFC_POLY_E_NCOEFFS 14
|
||||||
|
#define ERFC_POLY_F_NCOEFFS 17
|
||||||
|
extern const struct erf_data
|
||||||
|
{
|
||||||
|
double erf_poly_A[ERF_POLY_A_NCOEFFS];
|
||||||
|
double erf_ratio_N_A[5];
|
||||||
|
double erf_ratio_D_A[5];
|
||||||
|
double erf_ratio_N_B[7];
|
||||||
|
double erf_ratio_D_B[6];
|
||||||
|
double erfc_poly_C[ERFC_POLY_C_NCOEFFS];
|
||||||
|
double erfc_poly_D[ERFC_POLY_D_NCOEFFS];
|
||||||
|
double erfc_poly_E[ERFC_POLY_E_NCOEFFS];
|
||||||
|
double erfc_poly_F[ERFC_POLY_F_NCOEFFS];
|
||||||
|
} __erf_data HIDDEN;
|
||||||
|
|
||||||
|
#define V_EXP_TABLE_BITS 7
|
||||||
|
extern const uint64_t __v_exp_data[1 << V_EXP_TABLE_BITS] HIDDEN;
|
||||||
|
|
||||||
|
#define V_LOG_TABLE_BITS 7
|
||||||
|
extern const struct v_log_data
|
||||||
|
{
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
double invc, logc;
|
||||||
|
} table[1 << V_LOG_TABLE_BITS];
|
||||||
|
} __v_log_data HIDDEN;
|
||||||
|
|
||||||
|
COSMOPOLITAN_C_END_
|
||||||
|
#endif /* COSMOPOLITAN_LIBC_TINYMATH_ARM_H_ */
|
|
@ -5,13 +5,6 @@
|
||||||
│ FreeBSD lib/msun/src/s_asinhl.c │
|
│ FreeBSD lib/msun/src/s_asinhl.c │
|
||||||
│ Converted to ldbl by David Schultz <das@FreeBSD.ORG> and Bruce D. Evans. │
|
│ Converted to ldbl by David Schultz <das@FreeBSD.ORG> and Bruce D. Evans. │
|
||||||
│ │
|
│ │
|
||||||
│ Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. │
|
|
||||||
│ │
|
|
||||||
│ Developed at SunPro, a Sun Microsystems, Inc. business. │
|
|
||||||
│ Permission to use, copy, modify, and distribute this │
|
|
||||||
│ software is freely granted, provided that this notice │
|
|
||||||
│ is preserved. │
|
|
||||||
│ │
|
|
||||||
│ Copyright (c) 1992-2023 The FreeBSD Project. │
|
│ Copyright (c) 1992-2023 The FreeBSD Project. │
|
||||||
│ │
|
│ │
|
||||||
│ Redistribution and use in source and binary forms, with or without │
|
│ Redistribution and use in source and binary forms, with or without │
|
||||||
|
@ -35,12 +28,17 @@
|
||||||
│ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF │
|
│ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF │
|
||||||
│ SUCH DAMAGE. │
|
│ SUCH DAMAGE. │
|
||||||
│ │
|
│ │
|
||||||
|
│ Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. │
|
||||||
|
│ │
|
||||||
|
│ Developed at SunPro, a Sun Microsystems, Inc. business. │
|
||||||
|
│ Permission to use, copy, modify, and distribute this │
|
||||||
|
│ software is freely granted, provided that this notice │
|
||||||
|
│ is preserved. │
|
||||||
|
│ │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/math.h"
|
|
||||||
#include "libc/tinymath/freebsd.internal.h"
|
#include "libc/tinymath/freebsd.internal.h"
|
||||||
|
|
||||||
__static_yoink("fdlibm_notice");
|
|
||||||
__static_yoink("freebsd_libm_notice");
|
__static_yoink("freebsd_libm_notice");
|
||||||
|
__static_yoink("fdlibm_notice");
|
||||||
|
|
||||||
#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
|
#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
|
||||||
|
|
||||||
|
|
|
@ -1,177 +1,120 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
||||||
│ vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8 :vi │
|
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ FreeBSD lib/msun/src/s_asinhl.c │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Converted to ldbl by David Schultz <das@FreeBSD.ORG> and Bruce D. Evans. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. │
|
||||||
│ a copy of this software and associated documentation files (the │
|
|
||||||
│ "Software"), to deal in the Software without restriction, including │
|
|
||||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
|
||||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
|
||||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
|
||||||
│ the following conditions: │
|
|
||||||
│ │
|
│ │
|
||||||
│ The above copyright notice and this permission notice shall be │
|
│ Developed at SunPro, a Sun Microsystems, Inc. business. │
|
||||||
│ included in all copies or substantial portions of the Software. │
|
│ Permission to use, copy, modify, and distribute this │
|
||||||
|
│ software is freely granted, provided that this notice │
|
||||||
|
│ is preserved. │
|
||||||
│ │
|
│ │
|
||||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
│ Copyright (c) 1992-2023 The FreeBSD Project. │
|
||||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
│ │
|
||||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
│ Redistribution and use in source and binary forms, with or without │
|
||||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
│ modification, are permitted provided that the following conditions │
|
||||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
│ are met: │
|
||||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
│ 1. Redistributions of source code must retain the above copyright │
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
│ notice, this list of conditions and the following disclaimer. │
|
||||||
|
│ 2. Redistributions in binary form must reproduce the above copyright │
|
||||||
|
│ notice, this list of conditions and the following disclaimer in the │
|
||||||
|
│ documentation and/or other materials provided with the distribution. │
|
||||||
|
│ │
|
||||||
|
│ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND │
|
||||||
|
│ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE │
|
||||||
|
│ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE │
|
||||||
|
│ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE │
|
||||||
|
│ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL │
|
||||||
|
│ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS │
|
||||||
|
│ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) │
|
||||||
|
│ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT │
|
||||||
|
│ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY │
|
||||||
|
│ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF │
|
||||||
|
│ SUCH DAMAGE. │
|
||||||
│ │
|
│ │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/intrin/likely.h"
|
#include "libc/tinymath/freebsd.internal.h"
|
||||||
#include "libc/math.h"
|
__static_yoink("freebsd_libm_notice");
|
||||||
#include "libc/tinymath/atanf_common.internal.h"
|
__static_yoink("fdlibm_notice");
|
||||||
#include "libc/tinymath/internal.h"
|
|
||||||
__static_yoink("arm_optimized_routines_notice");
|
|
||||||
|
|
||||||
#define Pi (0x1.921fb6p+1f)
|
static volatile float
|
||||||
#define PiOver2 (0x1.921fb6p+0f)
|
tiny = 1.0e-30;
|
||||||
#define PiOver4 (0x1.921fb6p-1f)
|
static const float
|
||||||
#define SignMask (0x80000000)
|
zero = 0.0,
|
||||||
|
pi_o_4 = 7.8539818525e-01, /* 0x3f490fdb */
|
||||||
|
pi_o_2 = 1.5707963705e+00, /* 0x3fc90fdb */
|
||||||
|
pi = 3.1415927410e+00; /* 0x40490fdb */
|
||||||
|
static volatile float
|
||||||
|
pi_lo = -8.7422776573e-08; /* 0xb3bbbd2e */
|
||||||
|
|
||||||
/* We calculate atan2f by P(n/d), where n and d are similar to the input
|
/**
|
||||||
arguments, and P is a polynomial. The polynomial may underflow.
|
* Returns arc tangent of 𝑦/𝑥.
|
||||||
POLY_UFLOW_BOUND is the lower bound of the difference in exponents of n and d
|
*/
|
||||||
for which P underflows, and is used to special-case such inputs. */
|
|
||||||
#define POLY_UFLOW_BOUND 24
|
|
||||||
|
|
||||||
static inline int32_t
|
|
||||||
biased_exponent (float f)
|
|
||||||
{
|
|
||||||
uint32_t fi = asuint (f);
|
|
||||||
int32_t ex = (int32_t) ((fi & 0x7f800000) >> 23);
|
|
||||||
if (UNLIKELY (ex == 0))
|
|
||||||
{
|
|
||||||
/* Subnormal case - we still need to get the exponent right for subnormal
|
|
||||||
numbers as division may take us back inside the normal range. */
|
|
||||||
return ex - __builtin_clz (fi << 9);
|
|
||||||
}
|
|
||||||
return ex;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Fast implementation of scalar atan2f. Largest observed error is
|
|
||||||
2.88ulps in [99.0, 101.0] x [99.0, 101.0]:
|
|
||||||
atan2f(0x1.9332d8p+6, 0x1.8cb6c4p+6) got 0x1.964646p-1
|
|
||||||
want 0x1.964640p-1. */
|
|
||||||
float
|
float
|
||||||
atan2f (float y, float x)
|
atan2f(float y, float x)
|
||||||
{
|
{
|
||||||
uint32_t ix = asuint (x);
|
float z;
|
||||||
uint32_t iy = asuint (y);
|
int32_t k,m,hx,hy,ix,iy;
|
||||||
|
|
||||||
uint32_t sign_x = ix & SignMask;
|
GET_FLOAT_WORD(hx,x);
|
||||||
uint32_t sign_y = iy & SignMask;
|
ix = hx&0x7fffffff;
|
||||||
|
GET_FLOAT_WORD(hy,y);
|
||||||
|
iy = hy&0x7fffffff;
|
||||||
|
if((ix>0x7f800000)||
|
||||||
|
(iy>0x7f800000)) /* x or y is NaN */
|
||||||
|
return nan_mix(x, y);
|
||||||
|
if(hx==0x3f800000) return atanf(y); /* x=1.0 */
|
||||||
|
m = ((hy>>31)&1)|((hx>>30)&2); /* 2*sign(x)+sign(y) */
|
||||||
|
|
||||||
uint32_t iax = ix & ~SignMask;
|
/* when y = 0 */
|
||||||
uint32_t iay = iy & ~SignMask;
|
if(iy==0) {
|
||||||
|
switch(m) {
|
||||||
/* x or y is NaN. */
|
case 0:
|
||||||
if ((iax > 0x7f800000) || (iay > 0x7f800000))
|
case 1: return y; /* atan(+-0,+anything)=+-0 */
|
||||||
return x + y;
|
case 2: return pi+tiny;/* atan(+0,-anything) = pi */
|
||||||
|
case 3: return -pi-tiny;/* atan(-0,-anything) =-pi */
|
||||||
/* m = 2 * sign(x) + sign(y). */
|
|
||||||
uint32_t m = ((iy >> 31) & 1) | ((ix >> 30) & 2);
|
|
||||||
|
|
||||||
/* The following follows glibc ieee754 implementation, except
|
|
||||||
that we do not use +-tiny shifts (non-nearest rounding mode). */
|
|
||||||
|
|
||||||
int32_t exp_diff = biased_exponent (x) - biased_exponent (y);
|
|
||||||
|
|
||||||
/* Special case for (x, y) either on or very close to the x axis. Either y =
|
|
||||||
0, or y is tiny and x is huge (difference in exponents >=
|
|
||||||
POLY_UFLOW_BOUND). In the second case, we only want to use this special
|
|
||||||
case when x is negative (i.e. quadrants 2 or 3). */
|
|
||||||
if (UNLIKELY (iay == 0 || (exp_diff >= POLY_UFLOW_BOUND && m >= 2)))
|
|
||||||
{
|
|
||||||
switch (m)
|
|
||||||
{
|
|
||||||
case 0:
|
|
||||||
case 1:
|
|
||||||
return y; /* atan(+-0,+anything)=+-0. */
|
|
||||||
case 2:
|
|
||||||
return Pi; /* atan(+0,-anything) = pi. */
|
|
||||||
case 3:
|
|
||||||
return -Pi; /* atan(-0,-anything) =-pi. */
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* Special case for (x, y) either on or very close to the y axis. Either x =
|
|
||||||
0, or x is tiny and y is huge (difference in exponents >=
|
|
||||||
POLY_UFLOW_BOUND). */
|
|
||||||
if (UNLIKELY (iax == 0 || exp_diff <= -POLY_UFLOW_BOUND))
|
|
||||||
return sign_y ? -PiOver2 : PiOver2;
|
|
||||||
|
|
||||||
/* x is INF. */
|
|
||||||
if (iax == 0x7f800000)
|
|
||||||
{
|
|
||||||
if (iay == 0x7f800000)
|
|
||||||
{
|
|
||||||
switch (m)
|
|
||||||
{
|
|
||||||
case 0:
|
|
||||||
return PiOver4; /* atan(+INF,+INF). */
|
|
||||||
case 1:
|
|
||||||
return -PiOver4; /* atan(-INF,+INF). */
|
|
||||||
case 2:
|
|
||||||
return 3.0f * PiOver4; /* atan(+INF,-INF). */
|
|
||||||
case 3:
|
|
||||||
return -3.0f * PiOver4; /* atan(-INF,-INF). */
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
/* when x = 0 */
|
||||||
{
|
if(ix==0) return (hy<0)? -pi_o_2-tiny: pi_o_2+tiny;
|
||||||
switch (m)
|
|
||||||
{
|
/* when x is INF */
|
||||||
case 0:
|
if(ix==0x7f800000) {
|
||||||
return 0.0f; /* atan(+...,+INF). */
|
if(iy==0x7f800000) {
|
||||||
case 1:
|
switch(m) {
|
||||||
return -0.0f; /* atan(-...,+INF). */
|
case 0: return pi_o_4+tiny;/* atan(+INF,+INF) */
|
||||||
case 2:
|
case 1: return -pi_o_4-tiny;/* atan(-INF,+INF) */
|
||||||
return Pi; /* atan(+...,-INF). */
|
case 2: return (float)3.0*pi_o_4+tiny;/*atan(+INF,-INF)*/
|
||||||
case 3:
|
case 3: return (float)-3.0*pi_o_4-tiny;/*atan(-INF,-INF)*/
|
||||||
return -Pi; /* atan(-...,-INF). */
|
}
|
||||||
|
} else {
|
||||||
|
switch(m) {
|
||||||
|
case 0: return zero ; /* atan(+...,+INF) */
|
||||||
|
case 1: return -zero ; /* atan(-...,+INF) */
|
||||||
|
case 2: return pi+tiny ; /* atan(+...,-INF) */
|
||||||
|
case 3: return -pi-tiny ; /* atan(-...,-INF) */
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
/* when y is INF */
|
||||||
/* y is INF. */
|
if(iy==0x7f800000) return (hy<0)? -pi_o_2-tiny: pi_o_2+tiny;
|
||||||
if (iay == 0x7f800000)
|
|
||||||
return sign_y ? -PiOver2 : PiOver2;
|
|
||||||
|
|
||||||
uint32_t sign_xy = sign_x ^ sign_y;
|
/* compute y/x */
|
||||||
|
k = (iy-ix)>>23;
|
||||||
float ax = asfloat (iax);
|
if(k > 26) { /* |y/x| > 2**26 */
|
||||||
float ay = asfloat (iay);
|
z=pi_o_2+(float)0.5*pi_lo;
|
||||||
|
m&=1;
|
||||||
bool pred_aygtax = (ay > ax);
|
}
|
||||||
|
else if(k<-26&&hx<0) z=0.0; /* 0 > |y|/x > -2**-26 */
|
||||||
/* Set up z for call to atanf. */
|
else z=atanf(fabsf(y/x)); /* safe to do y/x */
|
||||||
float n = pred_aygtax ? -ax : ay;
|
switch (m) {
|
||||||
float d = pred_aygtax ? ay : ax;
|
case 0: return z ; /* atan(+,+) */
|
||||||
float z = n / d;
|
case 1: return -z ; /* atan(-,+) */
|
||||||
|
case 2: return pi-(z-pi_lo);/* atan(+,-) */
|
||||||
float ret;
|
default: /* case 3 */
|
||||||
if (UNLIKELY (m < 2 && exp_diff >= POLY_UFLOW_BOUND))
|
return (z-pi_lo)-pi;/* atan(-,-) */
|
||||||
{
|
}
|
||||||
/* If (x, y) is very close to x axis and x is positive, the polynomial
|
|
||||||
will underflow and evaluate to z. */
|
|
||||||
ret = z;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* Work out the correct shift. */
|
|
||||||
float shift = sign_x ? -2.0f : 0.0f;
|
|
||||||
shift = pred_aygtax ? shift + 1.0f : shift;
|
|
||||||
shift *= PiOver2;
|
|
||||||
|
|
||||||
ret = eval_poly (z, z, shift);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Account for the sign of x and y. */
|
|
||||||
return asfloat (asuint (ret) ^ sign_xy);
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,54 +0,0 @@
|
||||||
#ifndef COSMOPOLITAN_LIBC_TINYMATH_ATAN_COMMON_H_
|
|
||||||
#define COSMOPOLITAN_LIBC_TINYMATH_ATAN_COMMON_H_
|
|
||||||
#include "libc/tinymath/atan_data.internal.h"
|
|
||||||
#include "libc/tinymath/estrin_wrap.internal.h"
|
|
||||||
#include "libc/tinymath/horner.internal.h"
|
|
||||||
COSMOPOLITAN_C_START_
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Double-precision polynomial evaluation function for scalar and vector atan(x)
|
|
||||||
* and atan2(y,x).
|
|
||||||
*
|
|
||||||
* Copyright (c) 2021-2023, Arm Limited.
|
|
||||||
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
|
|
||||||
*/
|
|
||||||
|
|
||||||
#if WANT_VMATH
|
|
||||||
|
|
||||||
#define DBL_T float64x2_t
|
|
||||||
#define P(i) v_f64 (__atan_poly_data.poly[i])
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
#define DBL_T double
|
|
||||||
#define P(i) __atan_poly_data.poly[i]
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Polynomial used in fast atan(x) and atan2(y,x) implementations
|
|
||||||
The order 19 polynomial P approximates (atan(sqrt(x))-sqrt(x))/x^(3/2). */
|
|
||||||
static inline DBL_T
|
|
||||||
eval_poly (DBL_T z, DBL_T az, DBL_T shift)
|
|
||||||
{
|
|
||||||
/* Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of
|
|
||||||
full scheme to avoid underflow in x^16. */
|
|
||||||
DBL_T z2 = z * z;
|
|
||||||
DBL_T x2 = z2 * z2;
|
|
||||||
DBL_T x4 = x2 * x2;
|
|
||||||
DBL_T x8 = x4 * x4;
|
|
||||||
DBL_T y
|
|
||||||
= FMA (ESTRIN_11_ (z2, x2, x4, x8, P, 8), x8, ESTRIN_7 (z2, x2, x4, P));
|
|
||||||
|
|
||||||
/* Finalize. y = shift + z + z^3 * P(z^2). */
|
|
||||||
y = FMA (y, z2 * az, az);
|
|
||||||
y = y + shift;
|
|
||||||
|
|
||||||
return y;
|
|
||||||
}
|
|
||||||
|
|
||||||
#undef DBL_T
|
|
||||||
#undef FMA
|
|
||||||
#undef P
|
|
||||||
|
|
||||||
COSMOPOLITAN_C_END_
|
|
||||||
#endif /* COSMOPOLITAN_LIBC_TINYMATH_ATAN_COMMON_H_ */
|
|
|
@ -1,11 +0,0 @@
|
||||||
#ifndef COSMOPOLITAN_LIBC_TINYMATH_ATAN_DATA_H_
|
|
||||||
#define COSMOPOLITAN_LIBC_TINYMATH_ATAN_DATA_H_
|
|
||||||
COSMOPOLITAN_C_START_
|
|
||||||
|
|
||||||
#define ATAN_POLY_NCOEFFS 20
|
|
||||||
extern const struct atan_poly_data {
|
|
||||||
double poly[ATAN_POLY_NCOEFFS];
|
|
||||||
} __atan_poly_data;
|
|
||||||
|
|
||||||
COSMOPOLITAN_C_END_
|
|
||||||
#endif /* COSMOPOLITAN_LIBC_TINYMATH_ATAN_DATA_H_ */
|
|
|
@ -1,43 +0,0 @@
|
||||||
#ifndef COSMOPOLITAN_LIBC_TINYMATH_ATANF_COMMON_H_
|
|
||||||
#define COSMOPOLITAN_LIBC_TINYMATH_ATANF_COMMON_H_
|
|
||||||
#include "libc/tinymath/atanf_data.internal.h"
|
|
||||||
#include "libc/tinymath/estrin_wrap.internal.h"
|
|
||||||
#include "libc/tinymath/hornerf.internal.h"
|
|
||||||
COSMOPOLITAN_C_START_
|
|
||||||
|
|
||||||
#if WANT_VMATH
|
|
||||||
|
|
||||||
#define FLT_T float32x4_t
|
|
||||||
#define P(i) v_f32 (__atanf_poly_data.poly[i])
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
#define FLT_T float
|
|
||||||
#define P(i) __atanf_poly_data.poly[i]
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Polynomial used in fast atanf(x) and atan2f(y,x) implementations
|
|
||||||
The order 7 polynomial P approximates (atan(sqrt(x))-sqrt(x))/x^(3/2). */
|
|
||||||
static inline FLT_T
|
|
||||||
eval_poly (FLT_T z, FLT_T az, FLT_T shift)
|
|
||||||
{
|
|
||||||
/* Use 2-level Estrin scheme for P(z^2) with deg(P)=7. However,
|
|
||||||
a standard implementation using z8 creates spurious underflow
|
|
||||||
in the very last fma (when z^8 is small enough).
|
|
||||||
Therefore, we split the last fma into a mul and and an fma.
|
|
||||||
Horner and single-level Estrin have higher errors that exceed
|
|
||||||
threshold. */
|
|
||||||
FLT_T z2 = z * z;
|
|
||||||
FLT_T z4 = z2 * z2;
|
|
||||||
|
|
||||||
/* Then assemble polynomial. */
|
|
||||||
FLT_T y = FMA (z4, z4 * ESTRIN_3_ (z2, z4, P, 4), ESTRIN_3 (z2, z4, P));
|
|
||||||
|
|
||||||
/* Finalize:
|
|
||||||
y = shift + z * P(z^2). */
|
|
||||||
return FMA (y, z2 * az, az) + shift;
|
|
||||||
}
|
|
||||||
|
|
||||||
COSMOPOLITAN_C_END_
|
|
||||||
#endif /* COSMOPOLITAN_LIBC_TINYMATH_ATANF_COMMON_H_ */
|
|
|
@ -1,11 +0,0 @@
|
||||||
#ifndef COSMOPOLITAN_LIBC_TINYMATH_ATANF_DATA_H_
|
|
||||||
#define COSMOPOLITAN_LIBC_TINYMATH_ATANF_DATA_H_
|
|
||||||
COSMOPOLITAN_C_START_
|
|
||||||
|
|
||||||
#define ATANF_POLY_NCOEFFS 8
|
|
||||||
extern const struct atanf_poly_data {
|
|
||||||
float poly[ATANF_POLY_NCOEFFS];
|
|
||||||
} __atanf_poly_data;
|
|
||||||
|
|
||||||
COSMOPOLITAN_C_END_
|
|
||||||
#endif /* COSMOPOLITAN_LIBC_TINYMATH_ATANF_DATA_H_ */
|
|
|
@ -1,41 +0,0 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
|
||||||
│ │
|
|
||||||
│ Musl Libc │
|
|
||||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
|
||||||
│ │
|
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
|
||||||
│ a copy of this software and associated documentation files (the │
|
|
||||||
│ "Software"), to deal in the Software without restriction, including │
|
|
||||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
|
||||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
|
||||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
|
||||||
│ the following conditions: │
|
|
||||||
│ │
|
|
||||||
│ The above copyright notice and this permission notice shall be │
|
|
||||||
│ included in all copies or substantial portions of the Software. │
|
|
||||||
│ │
|
|
||||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
|
||||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
|
||||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
|
||||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
|
||||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
|
||||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
|
||||||
│ │
|
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
||||||
#include "libc/complex.h"
|
|
||||||
#include "libc/math.h"
|
|
||||||
#include "libc/tinymath/complex.internal.h"
|
|
||||||
__static_yoink("musl_libc_notice");
|
|
||||||
|
|
||||||
// FIXME: Hull et al. "Implementing the complex arcsine and arccosine functions using exception handling" 1997
|
|
||||||
|
|
||||||
/* acos(z) = pi/2 - asin(z) */
|
|
||||||
|
|
||||||
double complex cacos(double complex z)
|
|
||||||
{
|
|
||||||
z = casin(z);
|
|
||||||
return CMPLX(M_PI_2 - creal(z), -cimag(z));
|
|
||||||
}
|
|
|
@ -1,39 +0,0 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
|
||||||
│ │
|
|
||||||
│ Musl Libc │
|
|
||||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
|
||||||
│ │
|
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
|
||||||
│ a copy of this software and associated documentation files (the │
|
|
||||||
│ "Software"), to deal in the Software without restriction, including │
|
|
||||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
|
||||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
|
||||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
|
||||||
│ the following conditions: │
|
|
||||||
│ │
|
|
||||||
│ The above copyright notice and this permission notice shall be │
|
|
||||||
│ included in all copies or substantial portions of the Software. │
|
|
||||||
│ │
|
|
||||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
|
||||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
|
||||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
|
||||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
|
||||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
|
||||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
|
||||||
│ │
|
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
||||||
#include "libc/complex.h"
|
|
||||||
#include "libc/math.h"
|
|
||||||
#include "libc/tinymath/complex.internal.h"
|
|
||||||
__static_yoink("musl_libc_notice");
|
|
||||||
|
|
||||||
// FIXME
|
|
||||||
|
|
||||||
float complex cacosf(float complex z)
|
|
||||||
{
|
|
||||||
z = casinf(z);
|
|
||||||
return CMPLXF((float)M_PI_2 - crealf(z), -cimagf(z));
|
|
||||||
}
|
|
|
@ -1,44 +0,0 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
|
||||||
│ │
|
|
||||||
│ Musl Libc │
|
|
||||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
|
||||||
│ │
|
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
|
||||||
│ a copy of this software and associated documentation files (the │
|
|
||||||
│ "Software"), to deal in the Software without restriction, including │
|
|
||||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
|
||||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
|
||||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
|
||||||
│ the following conditions: │
|
|
||||||
│ │
|
|
||||||
│ The above copyright notice and this permission notice shall be │
|
|
||||||
│ included in all copies or substantial portions of the Software. │
|
|
||||||
│ │
|
|
||||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
|
||||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
|
||||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
|
||||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
|
||||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
|
||||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
|
||||||
│ │
|
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
||||||
#include "libc/complex.h"
|
|
||||||
#include "libc/math.h"
|
|
||||||
#include "libc/tinymath/complex.internal.h"
|
|
||||||
__static_yoink("musl_libc_notice");
|
|
||||||
|
|
||||||
/* acosh(z) = i acos(z) */
|
|
||||||
double complex cacosh(double complex z)
|
|
||||||
{
|
|
||||||
int zineg = signbit(cimag(z));
|
|
||||||
z = cacos(z);
|
|
||||||
if (zineg) return CMPLX(cimag(z), -creal(z));
|
|
||||||
else return CMPLX(-cimag(z), creal(z));
|
|
||||||
}
|
|
||||||
|
|
||||||
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
|
|
||||||
__weak_reference(cacosh, cacoshl);
|
|
||||||
#endif
|
|
|
@ -1,39 +0,0 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
|
||||||
│ │
|
|
||||||
│ Musl Libc │
|
|
||||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
|
||||||
│ │
|
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
|
||||||
│ a copy of this software and associated documentation files (the │
|
|
||||||
│ "Software"), to deal in the Software without restriction, including │
|
|
||||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
|
||||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
|
||||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
|
||||||
│ the following conditions: │
|
|
||||||
│ │
|
|
||||||
│ The above copyright notice and this permission notice shall be │
|
|
||||||
│ included in all copies or substantial portions of the Software. │
|
|
||||||
│ │
|
|
||||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
|
||||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
|
||||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
|
||||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
|
||||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
|
||||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
|
||||||
│ │
|
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
||||||
#include "libc/complex.h"
|
|
||||||
#include "libc/math.h"
|
|
||||||
#include "libc/tinymath/complex.internal.h"
|
|
||||||
__static_yoink("musl_libc_notice");
|
|
||||||
|
|
||||||
float complex cacoshf(float complex z)
|
|
||||||
{
|
|
||||||
int zineg = signbit(cimagf(z));
|
|
||||||
z = cacosf(z);
|
|
||||||
if (zineg) return CMPLXF(cimagf(z), -crealf(z));
|
|
||||||
else return CMPLXF(-cimagf(z), crealf(z));
|
|
||||||
}
|
|
|
@ -1,45 +0,0 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
|
||||||
│ │
|
|
||||||
│ Musl Libc │
|
|
||||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
|
||||||
│ │
|
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
|
||||||
│ a copy of this software and associated documentation files (the │
|
|
||||||
│ "Software"), to deal in the Software without restriction, including │
|
|
||||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
|
||||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
|
||||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
|
||||||
│ the following conditions: │
|
|
||||||
│ │
|
|
||||||
│ The above copyright notice and this permission notice shall be │
|
|
||||||
│ included in all copies or substantial portions of the Software. │
|
|
||||||
│ │
|
|
||||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
|
||||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
|
||||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
|
||||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
|
||||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
|
||||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
|
||||||
│ │
|
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
||||||
#include "libc/complex.h"
|
|
||||||
#include "libc/math.h"
|
|
||||||
#include "libc/tinymath/complex.internal.h"
|
|
||||||
__static_yoink("musl_libc_notice");
|
|
||||||
|
|
||||||
// FIXME
|
|
||||||
|
|
||||||
float complex casinf(float complex z)
|
|
||||||
{
|
|
||||||
float complex w;
|
|
||||||
float x, y;
|
|
||||||
|
|
||||||
x = crealf(z);
|
|
||||||
y = cimagf(z);
|
|
||||||
w = CMPLXF(1.0 - (x - y)*(x + y), -2.0*x*y);
|
|
||||||
float complex r = clogf(CMPLXF(-y, x) + csqrtf(w));
|
|
||||||
return CMPLXF(cimagf(r), -crealf(r));
|
|
||||||
}
|
|
|
@ -1,39 +0,0 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
|
||||||
│ │
|
|
||||||
│ Musl Libc │
|
|
||||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
|
||||||
│ │
|
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
|
||||||
│ a copy of this software and associated documentation files (the │
|
|
||||||
│ "Software"), to deal in the Software without restriction, including │
|
|
||||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
|
||||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
|
||||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
|
||||||
│ the following conditions: │
|
|
||||||
│ │
|
|
||||||
│ The above copyright notice and this permission notice shall be │
|
|
||||||
│ included in all copies or substantial portions of the Software. │
|
|
||||||
│ │
|
|
||||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
|
||||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
|
||||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
|
||||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
|
||||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
|
||||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
|
||||||
│ │
|
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
||||||
#include "libc/complex.h"
|
|
||||||
#include "libc/math.h"
|
|
||||||
#include "libc/tinymath/complex.internal.h"
|
|
||||||
__static_yoink("musl_libc_notice");
|
|
||||||
|
|
||||||
/* asinh(z) = -i asin(i z) */
|
|
||||||
|
|
||||||
double complex casinh(double complex z)
|
|
||||||
{
|
|
||||||
z = casin(CMPLX(-cimag(z), creal(z)));
|
|
||||||
return CMPLX(cimag(z), -creal(z));
|
|
||||||
}
|
|
|
@ -1,37 +0,0 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
|
||||||
│ │
|
|
||||||
│ Musl Libc │
|
|
||||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
|
||||||
│ │
|
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
|
||||||
│ a copy of this software and associated documentation files (the │
|
|
||||||
│ "Software"), to deal in the Software without restriction, including │
|
|
||||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
|
||||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
|
||||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
|
||||||
│ the following conditions: │
|
|
||||||
│ │
|
|
||||||
│ The above copyright notice and this permission notice shall be │
|
|
||||||
│ included in all copies or substantial portions of the Software. │
|
|
||||||
│ │
|
|
||||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
|
||||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
|
||||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
|
||||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
|
||||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
|
||||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
|
||||||
│ │
|
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
||||||
#include "libc/complex.h"
|
|
||||||
#include "libc/math.h"
|
|
||||||
#include "libc/tinymath/complex.internal.h"
|
|
||||||
__static_yoink("musl_libc_notice");
|
|
||||||
|
|
||||||
float complex casinhf(float complex z)
|
|
||||||
{
|
|
||||||
z = casinf(CMPLXF(-cimagf(z), crealf(z)));
|
|
||||||
return CMPLXF(cimagf(z), -crealf(z));
|
|
||||||
}
|
|
|
@ -1,142 +0,0 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
|
||||||
│ │
|
|
||||||
│ Musl Libc │
|
|
||||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
|
||||||
│ │
|
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
|
||||||
│ a copy of this software and associated documentation files (the │
|
|
||||||
│ "Software"), to deal in the Software without restriction, including │
|
|
||||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
|
||||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
|
||||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
|
||||||
│ the following conditions: │
|
|
||||||
│ │
|
|
||||||
│ The above copyright notice and this permission notice shall be │
|
|
||||||
│ included in all copies or substantial portions of the Software. │
|
|
||||||
│ │
|
|
||||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
|
||||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
|
||||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
|
||||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
|
||||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
|
||||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
|
||||||
│ │
|
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
||||||
#include "libc/complex.h"
|
|
||||||
#include "libc/math.h"
|
|
||||||
#include "libc/tinymath/complex.internal.h"
|
|
||||||
__static_yoink("musl_libc_notice");
|
|
||||||
__static_yoink("openbsd_libm_notice");
|
|
||||||
|
|
||||||
/* origin: OpenBSD /usr/src/lib/libm/src/s_catan.c */
|
|
||||||
/*
|
|
||||||
* Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
|
|
||||||
*
|
|
||||||
* Permission to use, copy, modify, and distribute this software for any
|
|
||||||
* purpose with or without fee is hereby granted, provided that the above
|
|
||||||
* copyright notice and this permission notice appear in all copies.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
||||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
||||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
||||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
||||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
||||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
||||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
||||||
*/
|
|
||||||
/*
|
|
||||||
* Complex circular arc tangent
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* SYNOPSIS:
|
|
||||||
*
|
|
||||||
* double complex catan();
|
|
||||||
* double complex z, w;
|
|
||||||
*
|
|
||||||
* w = catan (z);
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* DESCRIPTION:
|
|
||||||
*
|
|
||||||
* If
|
|
||||||
* z = x + iy,
|
|
||||||
*
|
|
||||||
* then
|
|
||||||
* 1 ( 2x )
|
|
||||||
* Re w = - arctan(-----------) + k PI
|
|
||||||
* 2 ( 2 2)
|
|
||||||
* (1 - x - y )
|
|
||||||
*
|
|
||||||
* ( 2 2)
|
|
||||||
* 1 (x + (y+1) )
|
|
||||||
* Im w = - log(------------)
|
|
||||||
* 4 ( 2 2)
|
|
||||||
* (x + (y-1) )
|
|
||||||
*
|
|
||||||
* Where k is an arbitrary integer.
|
|
||||||
*
|
|
||||||
* catan(z) = -i catanh(iz).
|
|
||||||
*
|
|
||||||
* ACCURACY:
|
|
||||||
*
|
|
||||||
* Relative error:
|
|
||||||
* arithmetic domain # trials peak rms
|
|
||||||
* DEC -10,+10 5900 1.3e-16 7.8e-18
|
|
||||||
* IEEE -10,+10 30000 2.3e-15 8.5e-17
|
|
||||||
* The check catan( ctan(z) ) = z, with |x| and |y| < PI/2,
|
|
||||||
* had peak relative error 1.5e-16, rms relative error
|
|
||||||
* 2.9e-17. See also clog().
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define MAXNUM 1.0e308
|
|
||||||
|
|
||||||
static const double DP1 = 3.14159265160560607910E0;
|
|
||||||
static const double DP2 = 1.98418714791870343106E-9;
|
|
||||||
static const double DP3 = 1.14423774522196636802E-17;
|
|
||||||
|
|
||||||
static double _redupi(double x)
|
|
||||||
{
|
|
||||||
double t;
|
|
||||||
long i;
|
|
||||||
|
|
||||||
t = x/M_PI;
|
|
||||||
if (t >= 0.0)
|
|
||||||
t += 0.5;
|
|
||||||
else
|
|
||||||
t -= 0.5;
|
|
||||||
|
|
||||||
i = t; /* the multiple */
|
|
||||||
t = i;
|
|
||||||
t = ((x - t * DP1) - t * DP2) - t * DP3;
|
|
||||||
return t;
|
|
||||||
}
|
|
||||||
|
|
||||||
double complex catan(double complex z)
|
|
||||||
{
|
|
||||||
double complex w;
|
|
||||||
double a, t, x, x2, y;
|
|
||||||
|
|
||||||
x = creal(z);
|
|
||||||
y = cimag(z);
|
|
||||||
|
|
||||||
x2 = x * x;
|
|
||||||
a = 1.0 - x2 - (y * y);
|
|
||||||
|
|
||||||
t = 0.5 * atan2(2.0 * x, a);
|
|
||||||
w = _redupi(t);
|
|
||||||
|
|
||||||
t = y - 1.0;
|
|
||||||
a = x2 + (t * t);
|
|
||||||
|
|
||||||
t = y + 1.0;
|
|
||||||
a = (x2 + t * t)/a;
|
|
||||||
w = CMPLX(w, 0.25 * log(a));
|
|
||||||
return w;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
|
|
||||||
__weak_reference(catan, catanl);
|
|
||||||
#endif
|
|
|
@ -1,135 +0,0 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
|
||||||
│ │
|
|
||||||
│ Musl Libc │
|
|
||||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
|
||||||
│ │
|
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
|
||||||
│ a copy of this software and associated documentation files (the │
|
|
||||||
│ "Software"), to deal in the Software without restriction, including │
|
|
||||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
|
||||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
|
||||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
|
||||||
│ the following conditions: │
|
|
||||||
│ │
|
|
||||||
│ The above copyright notice and this permission notice shall be │
|
|
||||||
│ included in all copies or substantial portions of the Software. │
|
|
||||||
│ │
|
|
||||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
|
||||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
|
||||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
|
||||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
|
||||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
|
||||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
|
||||||
│ │
|
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
||||||
#include "libc/complex.h"
|
|
||||||
#include "libc/math.h"
|
|
||||||
#include "libc/tinymath/complex.internal.h"
|
|
||||||
__static_yoink("musl_libc_notice");
|
|
||||||
__static_yoink("openbsd_libm_notice");
|
|
||||||
|
|
||||||
/* origin: OpenBSD /usr/src/lib/libm/src/s_catanf.c */
|
|
||||||
/*
|
|
||||||
* Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
|
|
||||||
*
|
|
||||||
* Permission to use, copy, modify, and distribute this software for any
|
|
||||||
* purpose with or without fee is hereby granted, provided that the above
|
|
||||||
* copyright notice and this permission notice appear in all copies.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
||||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
||||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
||||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
||||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
||||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
||||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
||||||
*/
|
|
||||||
/*
|
|
||||||
* Complex circular arc tangent
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* SYNOPSIS:
|
|
||||||
*
|
|
||||||
* float complex catanf();
|
|
||||||
* float complex z, w;
|
|
||||||
*
|
|
||||||
* w = catanf( z );
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* DESCRIPTION:
|
|
||||||
*
|
|
||||||
* If
|
|
||||||
* z = x + iy,
|
|
||||||
*
|
|
||||||
* then
|
|
||||||
* 1 ( 2x )
|
|
||||||
* Re w = - arctan(-----------) + k PI
|
|
||||||
* 2 ( 2 2)
|
|
||||||
* (1 - x - y )
|
|
||||||
*
|
|
||||||
* ( 2 2)
|
|
||||||
* 1 (x + (y+1) )
|
|
||||||
* Im w = - log(------------)
|
|
||||||
* 4 ( 2 2)
|
|
||||||
* (x + (y-1) )
|
|
||||||
*
|
|
||||||
* Where k is an arbitrary integer.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* ACCURACY:
|
|
||||||
*
|
|
||||||
* Relative error:
|
|
||||||
* arithmetic domain # trials peak rms
|
|
||||||
* IEEE -10,+10 30000 2.3e-6 5.2e-8
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
#define MAXNUMF 1.0e38F
|
|
||||||
|
|
||||||
static const double DP1 = 3.140625;
|
|
||||||
static const double DP2 = 9.67502593994140625E-4;
|
|
||||||
static const double DP3 = 1.509957990978376432E-7;
|
|
||||||
|
|
||||||
static float _redupif(float xx)
|
|
||||||
{
|
|
||||||
float x, t;
|
|
||||||
long i;
|
|
||||||
|
|
||||||
x = xx;
|
|
||||||
t = x/(float)M_PI;
|
|
||||||
if (t >= 0.0f)
|
|
||||||
t += 0.5f;
|
|
||||||
else
|
|
||||||
t -= 0.5f;
|
|
||||||
|
|
||||||
i = t; /* the multiple */
|
|
||||||
t = i;
|
|
||||||
t = ((x - t * DP1) - t * DP2) - t * DP3;
|
|
||||||
return t;
|
|
||||||
}
|
|
||||||
|
|
||||||
float complex catanf(float complex z)
|
|
||||||
{
|
|
||||||
float complex w;
|
|
||||||
float a, t, x, x2, y;
|
|
||||||
|
|
||||||
x = crealf(z);
|
|
||||||
y = cimagf(z);
|
|
||||||
|
|
||||||
x2 = x * x;
|
|
||||||
a = 1.0f - x2 - (y * y);
|
|
||||||
|
|
||||||
t = 0.5f * atan2f(2.0f * x, a);
|
|
||||||
w = _redupif(t);
|
|
||||||
|
|
||||||
t = y - 1.0f;
|
|
||||||
a = x2 + (t * t);
|
|
||||||
|
|
||||||
t = y + 1.0f;
|
|
||||||
a = (x2 + (t * t))/a;
|
|
||||||
w = CMPLXF(w, 0.25f * logf(a));
|
|
||||||
return w;
|
|
||||||
}
|
|
|
@ -1,39 +0,0 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
|
||||||
│ │
|
|
||||||
│ Musl Libc │
|
|
||||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
|
||||||
│ │
|
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
|
||||||
│ a copy of this software and associated documentation files (the │
|
|
||||||
│ "Software"), to deal in the Software without restriction, including │
|
|
||||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
|
||||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
|
||||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
|
||||||
│ the following conditions: │
|
|
||||||
│ │
|
|
||||||
│ The above copyright notice and this permission notice shall be │
|
|
||||||
│ included in all copies or substantial portions of the Software. │
|
|
||||||
│ │
|
|
||||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
|
||||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
|
||||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
|
||||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
|
||||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
|
||||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
|
||||||
│ │
|
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
||||||
#include "libc/complex.h"
|
|
||||||
#include "libc/math.h"
|
|
||||||
#include "libc/tinymath/complex.internal.h"
|
|
||||||
__static_yoink("musl_libc_notice");
|
|
||||||
|
|
||||||
/* atanh = -i atan(i z) */
|
|
||||||
|
|
||||||
double complex catanh(double complex z)
|
|
||||||
{
|
|
||||||
z = catan(CMPLX(-cimag(z), creal(z)));
|
|
||||||
return CMPLX(cimag(z), -creal(z));
|
|
||||||
}
|
|
|
@ -1,37 +0,0 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
|
||||||
│ │
|
|
||||||
│ Musl Libc │
|
|
||||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
|
||||||
│ │
|
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
|
||||||
│ a copy of this software and associated documentation files (the │
|
|
||||||
│ "Software"), to deal in the Software without restriction, including │
|
|
||||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
|
||||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
|
||||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
|
||||||
│ the following conditions: │
|
|
||||||
│ │
|
|
||||||
│ The above copyright notice and this permission notice shall be │
|
|
||||||
│ included in all copies or substantial portions of the Software. │
|
|
||||||
│ │
|
|
||||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
|
||||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
|
||||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
|
||||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
|
||||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
|
||||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
|
||||||
│ │
|
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
||||||
#include "libc/complex.h"
|
|
||||||
#include "libc/math.h"
|
|
||||||
#include "libc/tinymath/complex.internal.h"
|
|
||||||
__static_yoink("musl_libc_notice");
|
|
||||||
|
|
||||||
float complex catanhf(float complex z)
|
|
||||||
{
|
|
||||||
z = catanf(CMPLXF(-cimagf(z), crealf(z)));
|
|
||||||
return CMPLXF(cimagf(z), -crealf(z));
|
|
||||||
}
|
|
651
libc/tinymath/catrig.c
Normal file
651
libc/tinymath/catrig.c
Normal file
|
@ -0,0 +1,651 @@
|
||||||
|
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
||||||
|
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
||||||
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
|
│ │
|
||||||
|
│ FreeBSD lib/msun/src/catrig.c │
|
||||||
|
│ Copyright (c) 2012 Stephen Montgomery-Smith <stephen@FreeBSD.ORG> │
|
||||||
|
│ │
|
||||||
|
│ Redistribution and use in source and binary forms, with or without │
|
||||||
|
│ modification, are permitted provided that the following conditions │
|
||||||
|
│ are met: │
|
||||||
|
│ 1. Redistributions of source code must retain the above copyright │
|
||||||
|
│ notice, this list of conditions and the following disclaimer. │
|
||||||
|
│ 2. Redistributions in binary form must reproduce the above copyright │
|
||||||
|
│ notice, this list of conditions and the following disclaimer in the │
|
||||||
|
│ documentation and/or other materials provided with the distribution. │
|
||||||
|
│ │
|
||||||
|
│ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND │
|
||||||
|
│ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE │
|
||||||
|
│ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE │
|
||||||
|
│ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE │
|
||||||
|
│ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL │
|
||||||
|
│ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS │
|
||||||
|
│ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) │
|
||||||
|
│ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT │
|
||||||
|
│ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY │
|
||||||
|
│ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF │
|
||||||
|
│ SUCH DAMAGE. │
|
||||||
|
│ │
|
||||||
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
|
#include "libc/tinymath/freebsd.internal.h"
|
||||||
|
__static_yoink("freebsd_complex_notice");
|
||||||
|
|
||||||
|
#undef isinf
|
||||||
|
#define isinf(x) (fabs(x) == INFINITY)
|
||||||
|
#undef isnan
|
||||||
|
#define isnan(x) ((x) != (x))
|
||||||
|
#define raise_inexact() do { volatile float _j = 1 + tiny; (void)_j; } while(0)
|
||||||
|
#undef signbit
|
||||||
|
#define signbit(x) (__builtin_signbit(x))
|
||||||
|
|
||||||
|
/* We need that DBL_EPSILON^2/128 is larger than FOUR_SQRT_MIN. */
|
||||||
|
static const double
|
||||||
|
A_crossover = 10, /* Hull et al suggest 1.5, but 10 works better */
|
||||||
|
B_crossover = 0.6417, /* suggested by Hull et al */
|
||||||
|
FOUR_SQRT_MIN = 0x1p-509, /* >= 4 * sqrt(DBL_MIN) */
|
||||||
|
QUARTER_SQRT_MAX = 0x1p509, /* <= sqrt(DBL_MAX) / 4 */
|
||||||
|
m_e = 2.7182818284590452e0, /* 0x15bf0a8b145769.0p-51 */
|
||||||
|
m_ln2 = 6.9314718055994531e-1, /* 0x162e42fefa39ef.0p-53 */
|
||||||
|
pio2_hi = 1.5707963267948966e0, /* 0x1921fb54442d18.0p-52 */
|
||||||
|
RECIP_EPSILON = 1 / DBL_EPSILON,
|
||||||
|
SQRT_3_EPSILON = 2.5809568279517849e-8, /* 0x1bb67ae8584caa.0p-78 */
|
||||||
|
SQRT_6_EPSILON = 3.6500241499888571e-8, /* 0x13988e1409212e.0p-77 */
|
||||||
|
SQRT_MIN = 0x1p-511; /* >= sqrt(DBL_MIN) */
|
||||||
|
|
||||||
|
static const volatile double
|
||||||
|
pio2_lo = 6.1232339957367659e-17; /* 0x11a62633145c07.0p-106 */
|
||||||
|
static const volatile float
|
||||||
|
tiny = 0x1p-100;
|
||||||
|
|
||||||
|
static double complex clog_for_large_values(double complex z);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Testing indicates that all these functions are accurate up to 4 ULP.
|
||||||
|
* The functions casin(h) and cacos(h) are about 2.5 times slower than asinh.
|
||||||
|
* The functions catan(h) are a little under 2 times slower than atanh.
|
||||||
|
*
|
||||||
|
* The code for casinh, casin, cacos, and cacosh comes first. The code is
|
||||||
|
* rather complicated, and the four functions are highly interdependent.
|
||||||
|
*
|
||||||
|
* The code for catanh and catan comes at the end. It is much simpler than
|
||||||
|
* the other functions, and the code for these can be disconnected from the
|
||||||
|
* rest of the code.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ================================
|
||||||
|
* | casinh, casin, cacos, cacosh |
|
||||||
|
* ================================
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The algorithm is very close to that in "Implementing the complex arcsine
|
||||||
|
* and arccosine functions using exception handling" by T. E. Hull, Thomas F.
|
||||||
|
* Fairgrieve, and Ping Tak Peter Tang, published in ACM Transactions on
|
||||||
|
* Mathematical Software, Volume 23 Issue 3, 1997, Pages 299-335,
|
||||||
|
* http://dl.acm.org/citation.cfm?id=275324.
|
||||||
|
*
|
||||||
|
* Throughout we use the convention z = x + I*y.
|
||||||
|
*
|
||||||
|
* casinh(z) = sign(x)*log(A+sqrt(A*A-1)) + I*asin(B)
|
||||||
|
* where
|
||||||
|
* A = (|z+I| + |z-I|) / 2
|
||||||
|
* B = (|z+I| - |z-I|) / 2 = y/A
|
||||||
|
*
|
||||||
|
* These formulas become numerically unstable:
|
||||||
|
* (a) for Re(casinh(z)) when z is close to the line segment [-I, I] (that
|
||||||
|
* is, Re(casinh(z)) is close to 0);
|
||||||
|
* (b) for Im(casinh(z)) when z is close to either of the intervals
|
||||||
|
* [I, I*infinity) or (-I*infinity, -I] (that is, |Im(casinh(z))| is
|
||||||
|
* close to PI/2).
|
||||||
|
*
|
||||||
|
* These numerical problems are overcome by defining
|
||||||
|
* f(a, b) = (hypot(a, b) - b) / 2 = a*a / (hypot(a, b) + b) / 2
|
||||||
|
* Then if A < A_crossover, we use
|
||||||
|
* log(A + sqrt(A*A-1)) = log1p((A-1) + sqrt((A-1)*(A+1)))
|
||||||
|
* A-1 = f(x, 1+y) + f(x, 1-y)
|
||||||
|
* and if B > B_crossover, we use
|
||||||
|
* asin(B) = atan2(y, sqrt(A*A - y*y)) = atan2(y, sqrt((A+y)*(A-y)))
|
||||||
|
* A-y = f(x, y+1) + f(x, y-1)
|
||||||
|
* where without loss of generality we have assumed that x and y are
|
||||||
|
* non-negative.
|
||||||
|
*
|
||||||
|
* Much of the difficulty comes because the intermediate computations may
|
||||||
|
* produce overflows or underflows. This is dealt with in the paper by Hull
|
||||||
|
* et al by using exception handling. We do this by detecting when
|
||||||
|
* computations risk underflow or overflow. The hardest part is handling the
|
||||||
|
* underflows when computing f(a, b).
|
||||||
|
*
|
||||||
|
* Note that the function f(a, b) does not appear explicitly in the paper by
|
||||||
|
* Hull et al, but the idea may be found on pages 308 and 309. Introducing the
|
||||||
|
* function f(a, b) allows us to concentrate many of the clever tricks in this
|
||||||
|
* paper into one function.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Function f(a, b, hypot_a_b) = (hypot(a, b) - b) / 2.
|
||||||
|
* Pass hypot(a, b) as the third argument.
|
||||||
|
*/
|
||||||
|
static inline double
|
||||||
|
f(double a, double b, double hypot_a_b)
|
||||||
|
{
|
||||||
|
if (b < 0)
|
||||||
|
return ((hypot_a_b - b) / 2);
|
||||||
|
if (b == 0)
|
||||||
|
return (a / 2);
|
||||||
|
return (a * a / (hypot_a_b + b) / 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* All the hard work is contained in this function.
|
||||||
|
* x and y are assumed positive or zero, and less than RECIP_EPSILON.
|
||||||
|
* Upon return:
|
||||||
|
* rx = Re(casinh(z)) = -Im(cacos(y + I*x)).
|
||||||
|
* B_is_usable is set to 1 if the value of B is usable.
|
||||||
|
* If B_is_usable is set to 0, sqrt_A2my2 = sqrt(A*A - y*y), and new_y = y.
|
||||||
|
* If returning sqrt_A2my2 has potential to result in an underflow, it is
|
||||||
|
* rescaled, and new_y is similarly rescaled.
|
||||||
|
*/
|
||||||
|
static inline void
|
||||||
|
do_hard_work(double x, double y, double *rx, int *B_is_usable, double *B,
|
||||||
|
double *sqrt_A2my2, double *new_y)
|
||||||
|
{
|
||||||
|
double R, S, A; /* A, B, R, and S are as in Hull et al. */
|
||||||
|
double Am1, Amy; /* A-1, A-y. */
|
||||||
|
|
||||||
|
R = hypot(x, y + 1); /* |z+I| */
|
||||||
|
S = hypot(x, y - 1); /* |z-I| */
|
||||||
|
|
||||||
|
/* A = (|z+I| + |z-I|) / 2 */
|
||||||
|
A = (R + S) / 2;
|
||||||
|
/*
|
||||||
|
* Mathematically A >= 1. There is a small chance that this will not
|
||||||
|
* be so because of rounding errors. So we will make certain it is
|
||||||
|
* so.
|
||||||
|
*/
|
||||||
|
if (A < 1)
|
||||||
|
A = 1;
|
||||||
|
|
||||||
|
if (A < A_crossover) {
|
||||||
|
/*
|
||||||
|
* Am1 = fp + fm, where fp = f(x, 1+y), and fm = f(x, 1-y).
|
||||||
|
* rx = log1p(Am1 + sqrt(Am1*(A+1)))
|
||||||
|
*/
|
||||||
|
if (y == 1 && x < DBL_EPSILON * DBL_EPSILON / 128) {
|
||||||
|
/*
|
||||||
|
* fp is of order x^2, and fm = x/2.
|
||||||
|
* A = 1 (inexactly).
|
||||||
|
*/
|
||||||
|
*rx = sqrt(x);
|
||||||
|
} else if (x >= DBL_EPSILON * fabs(y - 1)) {
|
||||||
|
/*
|
||||||
|
* Underflow will not occur because
|
||||||
|
* x >= DBL_EPSILON^2/128 >= FOUR_SQRT_MIN
|
||||||
|
*/
|
||||||
|
Am1 = f(x, 1 + y, R) + f(x, 1 - y, S);
|
||||||
|
*rx = log1p(Am1 + sqrt(Am1 * (A + 1)));
|
||||||
|
} else if (y < 1) {
|
||||||
|
/*
|
||||||
|
* fp = x*x/(1+y)/4, fm = x*x/(1-y)/4, and
|
||||||
|
* A = 1 (inexactly).
|
||||||
|
*/
|
||||||
|
*rx = x / sqrt((1 - y) * (1 + y));
|
||||||
|
} else { /* if (y > 1) */
|
||||||
|
/*
|
||||||
|
* A-1 = y-1 (inexactly).
|
||||||
|
*/
|
||||||
|
*rx = log1p((y - 1) + sqrt((y - 1) * (y + 1)));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
*rx = log(A + sqrt(A * A - 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
*new_y = y;
|
||||||
|
|
||||||
|
if (y < FOUR_SQRT_MIN) {
|
||||||
|
/*
|
||||||
|
* Avoid a possible underflow caused by y/A. For casinh this
|
||||||
|
* would be legitimate, but will be picked up by invoking atan2
|
||||||
|
* later on. For cacos this would not be legitimate.
|
||||||
|
*/
|
||||||
|
*B_is_usable = 0;
|
||||||
|
*sqrt_A2my2 = A * (2 / DBL_EPSILON);
|
||||||
|
*new_y = y * (2 / DBL_EPSILON);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* B = (|z+I| - |z-I|) / 2 = y/A */
|
||||||
|
*B = y / A;
|
||||||
|
*B_is_usable = 1;
|
||||||
|
|
||||||
|
if (*B > B_crossover) {
|
||||||
|
*B_is_usable = 0;
|
||||||
|
/*
|
||||||
|
* Amy = fp + fm, where fp = f(x, y+1), and fm = f(x, y-1).
|
||||||
|
* sqrt_A2my2 = sqrt(Amy*(A+y))
|
||||||
|
*/
|
||||||
|
if (y == 1 && x < DBL_EPSILON / 128) {
|
||||||
|
/*
|
||||||
|
* fp is of order x^2, and fm = x/2.
|
||||||
|
* A = 1 (inexactly).
|
||||||
|
*/
|
||||||
|
*sqrt_A2my2 = sqrt(x) * sqrt((A + y) / 2);
|
||||||
|
} else if (x >= DBL_EPSILON * fabs(y - 1)) {
|
||||||
|
/*
|
||||||
|
* Underflow will not occur because
|
||||||
|
* x >= DBL_EPSILON/128 >= FOUR_SQRT_MIN
|
||||||
|
* and
|
||||||
|
* x >= DBL_EPSILON^2 >= FOUR_SQRT_MIN
|
||||||
|
*/
|
||||||
|
Amy = f(x, y + 1, R) + f(x, y - 1, S);
|
||||||
|
*sqrt_A2my2 = sqrt(Amy * (A + y));
|
||||||
|
} else if (y > 1) {
|
||||||
|
/*
|
||||||
|
* fp = x*x/(y+1)/4, fm = x*x/(y-1)/4, and
|
||||||
|
* A = y (inexactly).
|
||||||
|
*
|
||||||
|
* y < RECIP_EPSILON. So the following
|
||||||
|
* scaling should avoid any underflow problems.
|
||||||
|
*/
|
||||||
|
*sqrt_A2my2 = x * (4 / DBL_EPSILON / DBL_EPSILON) * y /
|
||||||
|
sqrt((y + 1) * (y - 1));
|
||||||
|
*new_y = y * (4 / DBL_EPSILON / DBL_EPSILON);
|
||||||
|
} else { /* if (y < 1) */
|
||||||
|
/*
|
||||||
|
* fm = 1-y >= DBL_EPSILON, fp is of order x^2, and
|
||||||
|
* A = 1 (inexactly).
|
||||||
|
*/
|
||||||
|
*sqrt_A2my2 = sqrt((1 - y) * (1 + y));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* casinh(z) = z + O(z^3) as z -> 0
|
||||||
|
*
|
||||||
|
* casinh(z) = sign(x)*clog(sign(x)*z) + O(1/z^2) as z -> infinity
|
||||||
|
* The above formula works for the imaginary part as well, because
|
||||||
|
* Im(casinh(z)) = sign(x)*atan2(sign(x)*y, fabs(x)) + O(y/z^3)
|
||||||
|
* as z -> infinity, uniformly in y
|
||||||
|
*/
|
||||||
|
double complex
|
||||||
|
casinh(double complex z)
|
||||||
|
{
|
||||||
|
double x, y, ax, ay, rx, ry, B, sqrt_A2my2, new_y;
|
||||||
|
int B_is_usable;
|
||||||
|
double complex w;
|
||||||
|
|
||||||
|
x = creal(z);
|
||||||
|
y = cimag(z);
|
||||||
|
ax = fabs(x);
|
||||||
|
ay = fabs(y);
|
||||||
|
|
||||||
|
if (isnan(x) || isnan(y)) {
|
||||||
|
/* casinh(+-Inf + I*NaN) = +-Inf + I*NaN */
|
||||||
|
if (isinf(x))
|
||||||
|
return (CMPLX(x, y + y));
|
||||||
|
/* casinh(NaN + I*+-Inf) = opt(+-)Inf + I*NaN */
|
||||||
|
if (isinf(y))
|
||||||
|
return (CMPLX(y, x + x));
|
||||||
|
/* casinh(NaN + I*0) = NaN + I*0 */
|
||||||
|
if (y == 0)
|
||||||
|
return (CMPLX(x + x, y));
|
||||||
|
/*
|
||||||
|
* All other cases involving NaN return NaN + I*NaN.
|
||||||
|
* C99 leaves it optional whether to raise invalid if one of
|
||||||
|
* the arguments is not NaN, so we opt not to raise it.
|
||||||
|
*/
|
||||||
|
return (CMPLX(nan_mix(x, y), nan_mix(x, y)));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) {
|
||||||
|
/* clog...() will raise inexact unless x or y is infinite. */
|
||||||
|
if (signbit(x) == 0)
|
||||||
|
w = clog_for_large_values(z) + m_ln2;
|
||||||
|
else
|
||||||
|
w = clog_for_large_values(-z) + m_ln2;
|
||||||
|
return (CMPLX(copysign(creal(w), x), copysign(cimag(w), y)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Avoid spuriously raising inexact for z = 0. */
|
||||||
|
if (x == 0 && y == 0)
|
||||||
|
return (z);
|
||||||
|
|
||||||
|
/* All remaining cases are inexact. */
|
||||||
|
raise_inexact();
|
||||||
|
|
||||||
|
if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4)
|
||||||
|
return (z);
|
||||||
|
|
||||||
|
do_hard_work(ax, ay, &rx, &B_is_usable, &B, &sqrt_A2my2, &new_y);
|
||||||
|
if (B_is_usable)
|
||||||
|
ry = asin(B);
|
||||||
|
else
|
||||||
|
ry = atan2(new_y, sqrt_A2my2);
|
||||||
|
return (CMPLX(copysign(rx, x), copysign(ry, y)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* casin(z) = reverse(casinh(reverse(z)))
|
||||||
|
* where reverse(x + I*y) = y + I*x = I*conj(z).
|
||||||
|
*/
|
||||||
|
double complex
|
||||||
|
casin(double complex z)
|
||||||
|
{
|
||||||
|
double complex w = casinh(CMPLX(cimag(z), creal(z)));
|
||||||
|
|
||||||
|
return (CMPLX(cimag(w), creal(w)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* cacos(z) = PI/2 - casin(z)
|
||||||
|
* but do the computation carefully so cacos(z) is accurate when z is
|
||||||
|
* close to 1.
|
||||||
|
*
|
||||||
|
* cacos(z) = PI/2 - z + O(z^3) as z -> 0
|
||||||
|
*
|
||||||
|
* cacos(z) = -sign(y)*I*clog(z) + O(1/z^2) as z -> infinity
|
||||||
|
* The above formula works for the real part as well, because
|
||||||
|
* Re(cacos(z)) = atan2(fabs(y), x) + O(y/z^3)
|
||||||
|
* as z -> infinity, uniformly in y
|
||||||
|
*/
|
||||||
|
double complex
|
||||||
|
cacos(double complex z)
|
||||||
|
{
|
||||||
|
double x, y, ax, ay, rx, ry, B, sqrt_A2mx2, new_x;
|
||||||
|
int sx, sy;
|
||||||
|
int B_is_usable;
|
||||||
|
double complex w;
|
||||||
|
|
||||||
|
x = creal(z);
|
||||||
|
y = cimag(z);
|
||||||
|
sx = signbit(x);
|
||||||
|
sy = signbit(y);
|
||||||
|
ax = fabs(x);
|
||||||
|
ay = fabs(y);
|
||||||
|
|
||||||
|
if (isnan(x) || isnan(y)) {
|
||||||
|
/* cacos(+-Inf + I*NaN) = NaN + I*opt(-)Inf */
|
||||||
|
if (isinf(x))
|
||||||
|
return (CMPLX(y + y, -INFINITY));
|
||||||
|
/* cacos(NaN + I*+-Inf) = NaN + I*-+Inf */
|
||||||
|
if (isinf(y))
|
||||||
|
return (CMPLX(x + x, -y));
|
||||||
|
/* cacos(0 + I*NaN) = PI/2 + I*NaN with inexact */
|
||||||
|
if (x == 0)
|
||||||
|
return (CMPLX(pio2_hi + pio2_lo, y + y));
|
||||||
|
/*
|
||||||
|
* All other cases involving NaN return NaN + I*NaN.
|
||||||
|
* C99 leaves it optional whether to raise invalid if one of
|
||||||
|
* the arguments is not NaN, so we opt not to raise it.
|
||||||
|
*/
|
||||||
|
return (CMPLX(nan_mix(x, y), nan_mix(x, y)));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) {
|
||||||
|
/* clog...() will raise inexact unless x or y is infinite. */
|
||||||
|
w = clog_for_large_values(z);
|
||||||
|
rx = fabs(cimag(w));
|
||||||
|
ry = creal(w) + m_ln2;
|
||||||
|
if (sy == 0)
|
||||||
|
ry = -ry;
|
||||||
|
return (CMPLX(rx, ry));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Avoid spuriously raising inexact for z = 1. */
|
||||||
|
if (x == 1 && y == 0)
|
||||||
|
return (CMPLX(0, -y));
|
||||||
|
|
||||||
|
/* All remaining cases are inexact. */
|
||||||
|
raise_inexact();
|
||||||
|
|
||||||
|
if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4)
|
||||||
|
return (CMPLX(pio2_hi - (x - pio2_lo), -y));
|
||||||
|
|
||||||
|
do_hard_work(ay, ax, &ry, &B_is_usable, &B, &sqrt_A2mx2, &new_x);
|
||||||
|
if (B_is_usable) {
|
||||||
|
if (sx == 0)
|
||||||
|
rx = acos(B);
|
||||||
|
else
|
||||||
|
rx = acos(-B);
|
||||||
|
} else {
|
||||||
|
if (sx == 0)
|
||||||
|
rx = atan2(sqrt_A2mx2, new_x);
|
||||||
|
else
|
||||||
|
rx = atan2(sqrt_A2mx2, -new_x);
|
||||||
|
}
|
||||||
|
if (sy == 0)
|
||||||
|
ry = -ry;
|
||||||
|
return (CMPLX(rx, ry));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* cacosh(z) = I*cacos(z) or -I*cacos(z)
|
||||||
|
* where the sign is chosen so Re(cacosh(z)) >= 0.
|
||||||
|
*/
|
||||||
|
double complex
|
||||||
|
cacosh(double complex z)
|
||||||
|
{
|
||||||
|
double complex w;
|
||||||
|
double rx, ry;
|
||||||
|
|
||||||
|
w = cacos(z);
|
||||||
|
rx = creal(w);
|
||||||
|
ry = cimag(w);
|
||||||
|
/* cacosh(NaN + I*NaN) = NaN + I*NaN */
|
||||||
|
if (isnan(rx) && isnan(ry))
|
||||||
|
return (CMPLX(ry, rx));
|
||||||
|
/* cacosh(NaN + I*+-Inf) = +Inf + I*NaN */
|
||||||
|
/* cacosh(+-Inf + I*NaN) = +Inf + I*NaN */
|
||||||
|
if (isnan(rx))
|
||||||
|
return (CMPLX(fabs(ry), rx));
|
||||||
|
/* cacosh(0 + I*NaN) = NaN + I*NaN */
|
||||||
|
if (isnan(ry))
|
||||||
|
return (CMPLX(ry, ry));
|
||||||
|
return (CMPLX(fabs(ry), copysign(rx, cimag(z))));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Optimized version of clog() for |z| finite and larger than ~RECIP_EPSILON.
|
||||||
|
*/
|
||||||
|
static double complex
|
||||||
|
clog_for_large_values(double complex z)
|
||||||
|
{
|
||||||
|
double x, y;
|
||||||
|
double ax, ay, t;
|
||||||
|
|
||||||
|
x = creal(z);
|
||||||
|
y = cimag(z);
|
||||||
|
ax = fabs(x);
|
||||||
|
ay = fabs(y);
|
||||||
|
if (ax < ay) {
|
||||||
|
t = ax;
|
||||||
|
ax = ay;
|
||||||
|
ay = t;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Avoid overflow in hypot() when x and y are both very large.
|
||||||
|
* Divide x and y by E, and then add 1 to the logarithm. This
|
||||||
|
* depends on E being larger than sqrt(2), since the return value of
|
||||||
|
* hypot cannot overflow if neither argument is greater in magnitude
|
||||||
|
* than 1/sqrt(2) of the maximum value of the return type. Likewise
|
||||||
|
* this determines the necessary threshold for using this method
|
||||||
|
* (however, actually use 1/2 instead as it is simpler).
|
||||||
|
*
|
||||||
|
* Dividing by E causes an insignificant loss of accuracy; however
|
||||||
|
* this method is still poor since it is uneccessarily slow.
|
||||||
|
*/
|
||||||
|
if (ax > DBL_MAX / 2)
|
||||||
|
return (CMPLX(log(hypot(x / m_e, y / m_e)) + 1, atan2(y, x)));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Avoid overflow when x or y is large. Avoid underflow when x or
|
||||||
|
* y is small.
|
||||||
|
*/
|
||||||
|
if (ax > QUARTER_SQRT_MAX || ay < SQRT_MIN)
|
||||||
|
return (CMPLX(log(hypot(x, y)), atan2(y, x)));
|
||||||
|
|
||||||
|
return (CMPLX(log(ax * ax + ay * ay) / 2, atan2(y, x)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* =================
|
||||||
|
* | catanh, catan |
|
||||||
|
* =================
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* sum_squares(x,y) = x*x + y*y (or just x*x if y*y would underflow).
|
||||||
|
* Assumes x*x and y*y will not overflow.
|
||||||
|
* Assumes x and y are finite.
|
||||||
|
* Assumes y is non-negative.
|
||||||
|
* Assumes fabs(x) >= DBL_EPSILON.
|
||||||
|
*/
|
||||||
|
static inline double
|
||||||
|
sum_squares(double x, double y)
|
||||||
|
{
|
||||||
|
|
||||||
|
/* Avoid underflow when y is small. */
|
||||||
|
if (y < SQRT_MIN)
|
||||||
|
return (x * x);
|
||||||
|
|
||||||
|
return (x * x + y * y);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* real_part_reciprocal(x, y) = Re(1/(x+I*y)) = x/(x*x + y*y).
|
||||||
|
* Assumes x and y are not NaN, and one of x and y is larger than
|
||||||
|
* RECIP_EPSILON. We avoid unwarranted underflow. It is important to not use
|
||||||
|
* the code creal(1/z), because the imaginary part may produce an unwanted
|
||||||
|
* underflow.
|
||||||
|
* This is only called in a context where inexact is always raised before
|
||||||
|
* the call, so no effort is made to avoid or force inexact.
|
||||||
|
*/
|
||||||
|
static inline double
|
||||||
|
real_part_reciprocal(double x, double y)
|
||||||
|
{
|
||||||
|
double scale;
|
||||||
|
uint32_t hx, hy;
|
||||||
|
int32_t ix, iy;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This code is inspired by the C99 document n1124.pdf, Section G.5.1,
|
||||||
|
* example 2.
|
||||||
|
*/
|
||||||
|
GET_HIGH_WORD(hx, x);
|
||||||
|
ix = hx & 0x7ff00000;
|
||||||
|
GET_HIGH_WORD(hy, y);
|
||||||
|
iy = hy & 0x7ff00000;
|
||||||
|
#undef BIAS
|
||||||
|
#define BIAS (DBL_MAX_EXP - 1)
|
||||||
|
/* XXX more guard digits are useful iff there is extra precision. */
|
||||||
|
#define CUTOFF (DBL_MANT_DIG / 2 + 1) /* just half or 1 guard digit */
|
||||||
|
if (ix - iy >= CUTOFF << 20 || isinf(x))
|
||||||
|
return (1 / x); /* +-Inf -> +-0 is special */
|
||||||
|
if (iy - ix >= CUTOFF << 20)
|
||||||
|
return (x / y / y); /* should avoid double div, but hard */
|
||||||
|
if (ix <= (BIAS + DBL_MAX_EXP / 2 - CUTOFF) << 20)
|
||||||
|
return (x / (x * x + y * y));
|
||||||
|
scale = 1;
|
||||||
|
SET_HIGH_WORD(scale, 0x7ff00000 - ix); /* 2**(1-ilogb(x)) */
|
||||||
|
x *= scale;
|
||||||
|
y *= scale;
|
||||||
|
return (x / (x * x + y * y) * scale);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* catanh(z) = log((1+z)/(1-z)) / 2
|
||||||
|
* = log1p(4*x / |z-1|^2) / 4
|
||||||
|
* + I * atan2(2*y, (1-x)*(1+x)-y*y) / 2
|
||||||
|
*
|
||||||
|
* catanh(z) = z + O(z^3) as z -> 0
|
||||||
|
*
|
||||||
|
* catanh(z) = 1/z + sign(y)*I*PI/2 + O(1/z^3) as z -> infinity
|
||||||
|
* The above formula works for the real part as well, because
|
||||||
|
* Re(catanh(z)) = x/|z|^2 + O(x/z^4)
|
||||||
|
* as z -> infinity, uniformly in x
|
||||||
|
*/
|
||||||
|
double complex
|
||||||
|
catanh(double complex z)
|
||||||
|
{
|
||||||
|
double x, y, ax, ay, rx, ry;
|
||||||
|
|
||||||
|
x = creal(z);
|
||||||
|
y = cimag(z);
|
||||||
|
ax = fabs(x);
|
||||||
|
ay = fabs(y);
|
||||||
|
|
||||||
|
/* This helps handle many cases. */
|
||||||
|
if (y == 0 && ax <= 1)
|
||||||
|
return (CMPLX(atanh(x), y));
|
||||||
|
|
||||||
|
/* To ensure the same accuracy as atan(), and to filter out z = 0. */
|
||||||
|
if (x == 0)
|
||||||
|
return (CMPLX(x, atan(y)));
|
||||||
|
|
||||||
|
if (isnan(x) || isnan(y)) {
|
||||||
|
/* catanh(+-Inf + I*NaN) = +-0 + I*NaN */
|
||||||
|
if (isinf(x))
|
||||||
|
return (CMPLX(copysign(0, x), y + y));
|
||||||
|
/* catanh(NaN + I*+-Inf) = sign(NaN)0 + I*+-PI/2 */
|
||||||
|
if (isinf(y))
|
||||||
|
return (CMPLX(copysign(0, x),
|
||||||
|
copysign(pio2_hi + pio2_lo, y)));
|
||||||
|
/*
|
||||||
|
* All other cases involving NaN return NaN + I*NaN.
|
||||||
|
* C99 leaves it optional whether to raise invalid if one of
|
||||||
|
* the arguments is not NaN, so we opt not to raise it.
|
||||||
|
*/
|
||||||
|
return (CMPLX(nan_mix(x, y), nan_mix(x, y)));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ax > RECIP_EPSILON || ay > RECIP_EPSILON)
|
||||||
|
return (CMPLX(real_part_reciprocal(x, y),
|
||||||
|
copysign(pio2_hi + pio2_lo, y)));
|
||||||
|
|
||||||
|
if (ax < SQRT_3_EPSILON / 2 && ay < SQRT_3_EPSILON / 2) {
|
||||||
|
/*
|
||||||
|
* z = 0 was filtered out above. All other cases must raise
|
||||||
|
* inexact, but this is the only case that needs to do it
|
||||||
|
* explicitly.
|
||||||
|
*/
|
||||||
|
raise_inexact();
|
||||||
|
return (z);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ax == 1 && ay < DBL_EPSILON)
|
||||||
|
rx = (m_ln2 - log(ay)) / 2;
|
||||||
|
else
|
||||||
|
rx = log1p(4 * ax / sum_squares(ax - 1, ay)) / 4;
|
||||||
|
|
||||||
|
if (ax == 1)
|
||||||
|
ry = atan2(2, -ay) / 2;
|
||||||
|
else if (ay < DBL_EPSILON)
|
||||||
|
ry = atan2(2 * ay, (1 - ax) * (1 + ax)) / 2;
|
||||||
|
else
|
||||||
|
ry = atan2(2 * ay, (1 - ax) * (1 + ax) - ay * ay) / 2;
|
||||||
|
|
||||||
|
return (CMPLX(copysign(rx, x), copysign(ry, y)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* catan(z) = reverse(catanh(reverse(z)))
|
||||||
|
* where reverse(x + I*y) = y + I*x = I*conj(z).
|
||||||
|
*/
|
||||||
|
double complex
|
||||||
|
catan(double complex z)
|
||||||
|
{
|
||||||
|
double complex w = catanh(CMPLX(cimag(z), creal(z)));
|
||||||
|
|
||||||
|
return (CMPLX(cimag(w), creal(w)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#if LDBL_MANT_DIG == 53
|
||||||
|
__weak_reference(cacosh, cacoshl);
|
||||||
|
__weak_reference(cacos, cacosl);
|
||||||
|
__weak_reference(casinh, casinhl);
|
||||||
|
__weak_reference(casin, casinl);
|
||||||
|
__weak_reference(catanh, catanhl);
|
||||||
|
__weak_reference(catan, catanl);
|
||||||
|
#endif
|
377
libc/tinymath/catrigf.c
Normal file
377
libc/tinymath/catrigf.c
Normal file
|
@ -0,0 +1,377 @@
|
||||||
|
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
||||||
|
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
||||||
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
|
│ │
|
||||||
|
│ FreeBSD lib/msun/src/catrigf.c │
|
||||||
|
│ Copyright (c) 2012 Stephen Montgomery-Smith <stephen@FreeBSD.ORG> │
|
||||||
|
│ │
|
||||||
|
│ Redistribution and use in source and binary forms, with or without │
|
||||||
|
│ modification, are permitted provided that the following conditions │
|
||||||
|
│ are met: │
|
||||||
|
│ 1. Redistributions of source code must retain the above copyright │
|
||||||
|
│ notice, this list of conditions and the following disclaimer. │
|
||||||
|
│ 2. Redistributions in binary form must reproduce the above copyright │
|
||||||
|
│ notice, this list of conditions and the following disclaimer in the │
|
||||||
|
│ documentation and/or other materials provided with the distribution. │
|
||||||
|
│ │
|
||||||
|
│ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND │
|
||||||
|
│ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE │
|
||||||
|
│ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE │
|
||||||
|
│ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE │
|
||||||
|
│ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL │
|
||||||
|
│ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS │
|
||||||
|
│ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) │
|
||||||
|
│ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT │
|
||||||
|
│ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY │
|
||||||
|
│ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF │
|
||||||
|
│ SUCH DAMAGE. │
|
||||||
|
│ │
|
||||||
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
|
#include "libc/tinymath/freebsd.internal.h"
|
||||||
|
__static_yoink("freebsd_complex_notice");
|
||||||
|
|
||||||
|
#undef isinf
|
||||||
|
#define isinf(x) (fabsf(x) == INFINITY)
|
||||||
|
#undef isnan
|
||||||
|
#define isnan(x) ((x) != (x))
|
||||||
|
#define raise_inexact() do { volatile float _j = 1 + tiny; (void)_j; } while(0)
|
||||||
|
#undef signbit
|
||||||
|
#define signbit(x) (__builtin_signbitf(x))
|
||||||
|
|
||||||
|
static const float
|
||||||
|
A_crossover = 10,
|
||||||
|
B_crossover = 0.6417,
|
||||||
|
FOUR_SQRT_MIN = 0x1p-61,
|
||||||
|
QUARTER_SQRT_MAX = 0x1p61,
|
||||||
|
m_e = 2.7182818285e0, /* 0xadf854.0p-22 */
|
||||||
|
m_ln2 = 6.9314718056e-1, /* 0xb17218.0p-24 */
|
||||||
|
pio2_hi = 1.5707962513e0, /* 0xc90fda.0p-23 */
|
||||||
|
RECIP_EPSILON = 1 / FLT_EPSILON,
|
||||||
|
SQRT_3_EPSILON = 5.9801995673e-4, /* 0x9cc471.0p-34 */
|
||||||
|
SQRT_6_EPSILON = 8.4572793338e-4, /* 0xddb3d7.0p-34 */
|
||||||
|
SQRT_MIN = 0x1p-63;
|
||||||
|
|
||||||
|
static const volatile float
|
||||||
|
pio2_lo = 7.5497899549e-8, /* 0xa22169.0p-47 */
|
||||||
|
tiny = 0x1p-100;
|
||||||
|
|
||||||
|
static float complex clog_for_large_values(float complex z);
|
||||||
|
|
||||||
|
static inline float
|
||||||
|
f(float a, float b, float hypot_a_b)
|
||||||
|
{
|
||||||
|
if (b < 0)
|
||||||
|
return ((hypot_a_b - b) / 2);
|
||||||
|
if (b == 0)
|
||||||
|
return (a / 2);
|
||||||
|
return (a * a / (hypot_a_b + b) / 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
do_hard_work(float x, float y, float *rx, int *B_is_usable, float *B,
|
||||||
|
float *sqrt_A2my2, float *new_y)
|
||||||
|
{
|
||||||
|
float R, S, A;
|
||||||
|
float Am1, Amy;
|
||||||
|
|
||||||
|
R = hypotf(x, y + 1);
|
||||||
|
S = hypotf(x, y - 1);
|
||||||
|
|
||||||
|
A = (R + S) / 2;
|
||||||
|
if (A < 1)
|
||||||
|
A = 1;
|
||||||
|
|
||||||
|
if (A < A_crossover) {
|
||||||
|
if (y == 1 && x < FLT_EPSILON * FLT_EPSILON / 128) {
|
||||||
|
*rx = sqrtf(x);
|
||||||
|
} else if (x >= FLT_EPSILON * fabsf(y - 1)) {
|
||||||
|
Am1 = f(x, 1 + y, R) + f(x, 1 - y, S);
|
||||||
|
*rx = log1pf(Am1 + sqrtf(Am1 * (A + 1)));
|
||||||
|
} else if (y < 1) {
|
||||||
|
*rx = x / sqrtf((1 - y) * (1 + y));
|
||||||
|
} else {
|
||||||
|
*rx = log1pf((y - 1) + sqrtf((y - 1) * (y + 1)));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
*rx = logf(A + sqrtf(A * A - 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
*new_y = y;
|
||||||
|
|
||||||
|
if (y < FOUR_SQRT_MIN) {
|
||||||
|
*B_is_usable = 0;
|
||||||
|
*sqrt_A2my2 = A * (2 / FLT_EPSILON);
|
||||||
|
*new_y = y * (2 / FLT_EPSILON);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
*B = y / A;
|
||||||
|
*B_is_usable = 1;
|
||||||
|
|
||||||
|
if (*B > B_crossover) {
|
||||||
|
*B_is_usable = 0;
|
||||||
|
if (y == 1 && x < FLT_EPSILON / 128) {
|
||||||
|
*sqrt_A2my2 = sqrtf(x) * sqrtf((A + y) / 2);
|
||||||
|
} else if (x >= FLT_EPSILON * fabsf(y - 1)) {
|
||||||
|
Amy = f(x, y + 1, R) + f(x, y - 1, S);
|
||||||
|
*sqrt_A2my2 = sqrtf(Amy * (A + y));
|
||||||
|
} else if (y > 1) {
|
||||||
|
*sqrt_A2my2 = x * (4 / FLT_EPSILON / FLT_EPSILON) * y /
|
||||||
|
sqrtf((y + 1) * (y - 1));
|
||||||
|
*new_y = y * (4 / FLT_EPSILON / FLT_EPSILON);
|
||||||
|
} else {
|
||||||
|
*sqrt_A2my2 = sqrtf((1 - y) * (1 + y));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
float complex
|
||||||
|
casinhf(float complex z)
|
||||||
|
{
|
||||||
|
float x, y, ax, ay, rx, ry, B, sqrt_A2my2, new_y;
|
||||||
|
int B_is_usable;
|
||||||
|
float complex w;
|
||||||
|
|
||||||
|
x = crealf(z);
|
||||||
|
y = cimagf(z);
|
||||||
|
ax = fabsf(x);
|
||||||
|
ay = fabsf(y);
|
||||||
|
|
||||||
|
if (isnan(x) || isnan(y)) {
|
||||||
|
if (isinf(x))
|
||||||
|
return (CMPLXF(x, y + y));
|
||||||
|
if (isinf(y))
|
||||||
|
return (CMPLXF(y, x + x));
|
||||||
|
if (y == 0)
|
||||||
|
return (CMPLXF(x + x, y));
|
||||||
|
return (CMPLXF(nan_mix(x, y), nan_mix(x, y)));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) {
|
||||||
|
if (signbit(x) == 0)
|
||||||
|
w = clog_for_large_values(z) + m_ln2;
|
||||||
|
else
|
||||||
|
w = clog_for_large_values(-z) + m_ln2;
|
||||||
|
return (CMPLXF(copysignf(crealf(w), x),
|
||||||
|
copysignf(cimagf(w), y)));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (x == 0 && y == 0)
|
||||||
|
return (z);
|
||||||
|
|
||||||
|
raise_inexact();
|
||||||
|
|
||||||
|
if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4)
|
||||||
|
return (z);
|
||||||
|
|
||||||
|
do_hard_work(ax, ay, &rx, &B_is_usable, &B, &sqrt_A2my2, &new_y);
|
||||||
|
if (B_is_usable)
|
||||||
|
ry = asinf(B);
|
||||||
|
else
|
||||||
|
ry = atan2f(new_y, sqrt_A2my2);
|
||||||
|
return (CMPLXF(copysignf(rx, x), copysignf(ry, y)));
|
||||||
|
}
|
||||||
|
|
||||||
|
float complex
|
||||||
|
casinf(float complex z)
|
||||||
|
{
|
||||||
|
float complex w = casinhf(CMPLXF(cimagf(z), crealf(z)));
|
||||||
|
|
||||||
|
return (CMPLXF(cimagf(w), crealf(w)));
|
||||||
|
}
|
||||||
|
|
||||||
|
float complex
|
||||||
|
cacosf(float complex z)
|
||||||
|
{
|
||||||
|
float x, y, ax, ay, rx, ry, B, sqrt_A2mx2, new_x;
|
||||||
|
int sx, sy;
|
||||||
|
int B_is_usable;
|
||||||
|
float complex w;
|
||||||
|
|
||||||
|
x = crealf(z);
|
||||||
|
y = cimagf(z);
|
||||||
|
sx = signbit(x);
|
||||||
|
sy = signbit(y);
|
||||||
|
ax = fabsf(x);
|
||||||
|
ay = fabsf(y);
|
||||||
|
|
||||||
|
if (isnan(x) || isnan(y)) {
|
||||||
|
if (isinf(x))
|
||||||
|
return (CMPLXF(y + y, -INFINITY));
|
||||||
|
if (isinf(y))
|
||||||
|
return (CMPLXF(x + x, -y));
|
||||||
|
if (x == 0)
|
||||||
|
return (CMPLXF(pio2_hi + pio2_lo, y + y));
|
||||||
|
return (CMPLXF(nan_mix(x, y), nan_mix(x, y)));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) {
|
||||||
|
w = clog_for_large_values(z);
|
||||||
|
rx = fabsf(cimagf(w));
|
||||||
|
ry = crealf(w) + m_ln2;
|
||||||
|
if (sy == 0)
|
||||||
|
ry = -ry;
|
||||||
|
return (CMPLXF(rx, ry));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (x == 1 && y == 0)
|
||||||
|
return (CMPLXF(0, -y));
|
||||||
|
|
||||||
|
raise_inexact();
|
||||||
|
|
||||||
|
if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4)
|
||||||
|
return (CMPLXF(pio2_hi - (x - pio2_lo), -y));
|
||||||
|
|
||||||
|
do_hard_work(ay, ax, &ry, &B_is_usable, &B, &sqrt_A2mx2, &new_x);
|
||||||
|
if (B_is_usable) {
|
||||||
|
if (sx == 0)
|
||||||
|
rx = acosf(B);
|
||||||
|
else
|
||||||
|
rx = acosf(-B);
|
||||||
|
} else {
|
||||||
|
if (sx == 0)
|
||||||
|
rx = atan2f(sqrt_A2mx2, new_x);
|
||||||
|
else
|
||||||
|
rx = atan2f(sqrt_A2mx2, -new_x);
|
||||||
|
}
|
||||||
|
if (sy == 0)
|
||||||
|
ry = -ry;
|
||||||
|
return (CMPLXF(rx, ry));
|
||||||
|
}
|
||||||
|
|
||||||
|
float complex
|
||||||
|
cacoshf(float complex z)
|
||||||
|
{
|
||||||
|
float complex w;
|
||||||
|
float rx, ry;
|
||||||
|
|
||||||
|
w = cacosf(z);
|
||||||
|
rx = crealf(w);
|
||||||
|
ry = cimagf(w);
|
||||||
|
if (isnan(rx) && isnan(ry))
|
||||||
|
return (CMPLXF(ry, rx));
|
||||||
|
if (isnan(rx))
|
||||||
|
return (CMPLXF(fabsf(ry), rx));
|
||||||
|
if (isnan(ry))
|
||||||
|
return (CMPLXF(ry, ry));
|
||||||
|
return (CMPLXF(fabsf(ry), copysignf(rx, cimagf(z))));
|
||||||
|
}
|
||||||
|
|
||||||
|
static float complex
|
||||||
|
clog_for_large_values(float complex z)
|
||||||
|
{
|
||||||
|
float x, y;
|
||||||
|
float ax, ay, t;
|
||||||
|
|
||||||
|
x = crealf(z);
|
||||||
|
y = cimagf(z);
|
||||||
|
ax = fabsf(x);
|
||||||
|
ay = fabsf(y);
|
||||||
|
if (ax < ay) {
|
||||||
|
t = ax;
|
||||||
|
ax = ay;
|
||||||
|
ay = t;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ax > FLT_MAX / 2)
|
||||||
|
return (CMPLXF(logf(hypotf(x / m_e, y / m_e)) + 1,
|
||||||
|
atan2f(y, x)));
|
||||||
|
|
||||||
|
if (ax > QUARTER_SQRT_MAX || ay < SQRT_MIN)
|
||||||
|
return (CMPLXF(logf(hypotf(x, y)), atan2f(y, x)));
|
||||||
|
|
||||||
|
return (CMPLXF(logf(ax * ax + ay * ay) / 2, atan2f(y, x)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline float
|
||||||
|
sum_squares(float x, float y)
|
||||||
|
{
|
||||||
|
|
||||||
|
if (y < SQRT_MIN)
|
||||||
|
return (x * x);
|
||||||
|
|
||||||
|
return (x * x + y * y);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline float
|
||||||
|
real_part_reciprocal(float x, float y)
|
||||||
|
{
|
||||||
|
float scale;
|
||||||
|
uint32_t hx, hy;
|
||||||
|
int32_t ix, iy;
|
||||||
|
|
||||||
|
GET_FLOAT_WORD(hx, x);
|
||||||
|
ix = hx & 0x7f800000;
|
||||||
|
GET_FLOAT_WORD(hy, y);
|
||||||
|
iy = hy & 0x7f800000;
|
||||||
|
#undef BIAS
|
||||||
|
#define BIAS (FLT_MAX_EXP - 1)
|
||||||
|
#define CUTOFF (FLT_MANT_DIG / 2 + 1)
|
||||||
|
if (ix - iy >= CUTOFF << 23 || isinf(x))
|
||||||
|
return (1 / x);
|
||||||
|
if (iy - ix >= CUTOFF << 23)
|
||||||
|
return (x / y / y);
|
||||||
|
if (ix <= (BIAS + FLT_MAX_EXP / 2 - CUTOFF) << 23)
|
||||||
|
return (x / (x * x + y * y));
|
||||||
|
SET_FLOAT_WORD(scale, 0x7f800000 - ix);
|
||||||
|
x *= scale;
|
||||||
|
y *= scale;
|
||||||
|
return (x / (x * x + y * y) * scale);
|
||||||
|
}
|
||||||
|
|
||||||
|
float complex
|
||||||
|
catanhf(float complex z)
|
||||||
|
{
|
||||||
|
float x, y, ax, ay, rx, ry;
|
||||||
|
|
||||||
|
x = crealf(z);
|
||||||
|
y = cimagf(z);
|
||||||
|
ax = fabsf(x);
|
||||||
|
ay = fabsf(y);
|
||||||
|
|
||||||
|
if (y == 0 && ax <= 1)
|
||||||
|
return (CMPLXF(atanhf(x), y));
|
||||||
|
|
||||||
|
if (x == 0)
|
||||||
|
return (CMPLXF(x, atanf(y)));
|
||||||
|
|
||||||
|
if (isnan(x) || isnan(y)) {
|
||||||
|
if (isinf(x))
|
||||||
|
return (CMPLXF(copysignf(0, x), y + y));
|
||||||
|
if (isinf(y))
|
||||||
|
return (CMPLXF(copysignf(0, x),
|
||||||
|
copysignf(pio2_hi + pio2_lo, y)));
|
||||||
|
return (CMPLXF(nan_mix(x, y), nan_mix(x, y)));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ax > RECIP_EPSILON || ay > RECIP_EPSILON)
|
||||||
|
return (CMPLXF(real_part_reciprocal(x, y),
|
||||||
|
copysignf(pio2_hi + pio2_lo, y)));
|
||||||
|
|
||||||
|
if (ax < SQRT_3_EPSILON / 2 && ay < SQRT_3_EPSILON / 2) {
|
||||||
|
raise_inexact();
|
||||||
|
return (z);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ax == 1 && ay < FLT_EPSILON)
|
||||||
|
rx = (m_ln2 - logf(ay)) / 2;
|
||||||
|
else
|
||||||
|
rx = log1pf(4 * ax / sum_squares(ax - 1, ay)) / 4;
|
||||||
|
|
||||||
|
if (ax == 1)
|
||||||
|
ry = atan2f(2, -ay) / 2;
|
||||||
|
else if (ay < FLT_EPSILON)
|
||||||
|
ry = atan2f(2 * ay, (1 - ax) * (1 + ax)) / 2;
|
||||||
|
else
|
||||||
|
ry = atan2f(2 * ay, (1 - ax) * (1 + ax) - ay * ay) / 2;
|
||||||
|
|
||||||
|
return (CMPLXF(copysignf(rx, x), copysignf(ry, y)));
|
||||||
|
}
|
||||||
|
|
||||||
|
float complex
|
||||||
|
catanf(float complex z)
|
||||||
|
{
|
||||||
|
float complex w = catanhf(CMPLXF(cimagf(z), crealf(z)));
|
||||||
|
|
||||||
|
return (CMPLXF(cimagf(w), crealf(w)));
|
||||||
|
}
|
|
@ -78,7 +78,6 @@ __static_yoink("fdlibm_notice");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns cosine of 𝑥.
|
* Returns cosine of 𝑥.
|
||||||
* @note should take ~5ns
|
|
||||||
*/
|
*/
|
||||||
double cos(double x)
|
double cos(double x)
|
||||||
{
|
{
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Musl Libc │
|
│ Optimized Routines │
|
||||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
@ -25,92 +25,63 @@
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||||
│ │
|
│ │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/math.h"
|
#include "libc/tinymath/sincosf.internal.h"
|
||||||
#include "libc/tinymath/complex.internal.h"
|
__static_yoink("arm_optimized_routines_notice");
|
||||||
#include "libc/tinymath/feval.internal.h"
|
|
||||||
#include "libc/tinymath/kernel.internal.h"
|
|
||||||
__static_yoink("freebsd_libm_notice");
|
|
||||||
__static_yoink("musl_libc_notice");
|
|
||||||
__static_yoink("fdlibm_notice");
|
|
||||||
|
|
||||||
|
|
||||||
/* origin: FreeBSD /usr/src/lib/msun/src/s_cosf.c */
|
|
||||||
/*
|
|
||||||
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
|
|
||||||
* Optimized by Bruce D. Evans.
|
|
||||||
*/
|
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Small multiples of pi/2 rounded to double precision. */
|
|
||||||
static const double
|
|
||||||
c1pio2 = 1*M_PI_2, /* 0x3FF921FB, 0x54442D18 */
|
|
||||||
c2pio2 = 2*M_PI_2, /* 0x400921FB, 0x54442D18 */
|
|
||||||
c3pio2 = 3*M_PI_2, /* 0x4012D97C, 0x7F3321D2 */
|
|
||||||
c4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns cosine of 𝑥.
|
* Returns cosine of y.
|
||||||
* @note should take about 5ns
|
*
|
||||||
|
* This is a fast cosf implementation. The worst-case ULP is 0.5607, and
|
||||||
|
* the maximum relative error is 0.5303 * 2^-23. A single-step range
|
||||||
|
* reduction is used for small values. Large inputs have their range
|
||||||
|
* reduced using fast integer arithmetic.
|
||||||
|
*
|
||||||
|
* @raise EDOM and FE_INVALID if y is an infinity
|
||||||
*/
|
*/
|
||||||
float cosf(float x)
|
float
|
||||||
|
cosf (float y)
|
||||||
{
|
{
|
||||||
double y;
|
double x = y;
|
||||||
uint32_t ix;
|
double s;
|
||||||
unsigned n, sign;
|
int n;
|
||||||
|
const sincos_t *p = &__sincosf_table[0];
|
||||||
|
|
||||||
GET_FLOAT_WORD(ix, x);
|
if (abstop12 (y) < abstop12 (pio4f))
|
||||||
sign = ix >> 31;
|
{
|
||||||
ix &= 0x7fffffff;
|
double x2 = x * x;
|
||||||
|
|
||||||
if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
|
if (unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
|
||||||
if (ix < 0x39800000) { /* |x| < 2**-12 */
|
return 1.0f;
|
||||||
/* raise inexact if x != 0 */
|
|
||||||
FORCE_EVAL(x + 0x1p120f);
|
|
||||||
return 1.0f;
|
|
||||||
}
|
|
||||||
return __cosdf(x);
|
|
||||||
}
|
|
||||||
if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */
|
|
||||||
if (ix > 0x4016cbe3) /* |x| ~> 3*pi/4 */
|
|
||||||
return -__cosdf(sign ? x+c2pio2 : x-c2pio2);
|
|
||||||
else {
|
|
||||||
if (sign)
|
|
||||||
return __sindf(x + c1pio2);
|
|
||||||
else
|
|
||||||
return __sindf(c1pio2 - x);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */
|
|
||||||
if (ix > 0x40afeddf) /* |x| ~> 7*pi/4 */
|
|
||||||
return __cosdf(sign ? x+c4pio2 : x-c4pio2);
|
|
||||||
else {
|
|
||||||
if (sign)
|
|
||||||
return __sindf(-x - c3pio2);
|
|
||||||
else
|
|
||||||
return __sindf(x - c3pio2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* cos(Inf or NaN) is NaN */
|
return sinf_poly (x, x2, p, 1);
|
||||||
if (ix >= 0x7f800000)
|
}
|
||||||
return x-x;
|
else if (likely (abstop12 (y) < abstop12 (120.0f)))
|
||||||
|
{
|
||||||
|
x = reduce_fast (x, p, &n);
|
||||||
|
|
||||||
/* general argument reduction needed */
|
/* Setup the signs for sin and cos. */
|
||||||
n = __rem_pio2f(x,&y);
|
s = p->sign[n & 3];
|
||||||
switch (n&3) {
|
|
||||||
case 0: return __cosdf(y);
|
if (n & 2)
|
||||||
case 1: return __sindf(-y);
|
p = &__sincosf_table[1];
|
||||||
case 2: return -__cosdf(y);
|
|
||||||
default:
|
return sinf_poly (x * s, x * x, p, n ^ 1);
|
||||||
return __sindf(y);
|
}
|
||||||
}
|
else if (abstop12 (y) < abstop12 (INFINITY))
|
||||||
|
{
|
||||||
|
uint32_t xi = asuint (y);
|
||||||
|
int sign = xi >> 31;
|
||||||
|
|
||||||
|
x = reduce_large (xi, &n);
|
||||||
|
|
||||||
|
/* Setup signs for sin and cos - include original sign. */
|
||||||
|
s = p->sign[(n + sign) & 3];
|
||||||
|
|
||||||
|
if ((n + sign) & 2)
|
||||||
|
p = &__sincosf_table[1];
|
||||||
|
|
||||||
|
return sinf_poly (x * s, x * x, p, n ^ 1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return __math_invalidf (y);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Musl Libc │
|
│ Optimized Routines │
|
||||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
@ -25,310 +25,247 @@
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||||
│ │
|
│ │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/math.h"
|
#include "libc/tinymath/arm.internal.h"
|
||||||
__static_yoink("musl_libc_notice");
|
__static_yoink("arm_optimized_routines_notice");
|
||||||
__static_yoink("fdlibm_notice");
|
|
||||||
|
|
||||||
/* origin: FreeBSD /usr/src/lib/msun/src/s_erf.c */
|
#define TwoOverSqrtPiMinusOne 0x1.06eba8214db69p-3
|
||||||
/*
|
#define C 0x1.b0ac16p-1
|
||||||
* ====================================================
|
#define PA __erf_data.erf_poly_A
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
#define NA __erf_data.erf_ratio_N_A
|
||||||
*
|
#define DA __erf_data.erf_ratio_D_A
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
#define NB __erf_data.erf_ratio_N_B
|
||||||
* Permission to use, copy, modify, and distribute this
|
#define DB __erf_data.erf_ratio_D_B
|
||||||
* software is freely granted, provided that this notice
|
#define PC __erf_data.erfc_poly_C
|
||||||
* is preserved.
|
#define PD __erf_data.erfc_poly_D
|
||||||
* ====================================================
|
#define PE __erf_data.erfc_poly_E
|
||||||
*/
|
#define PF __erf_data.erfc_poly_F
|
||||||
/* double erf(double x)
|
|
||||||
* double erfc(double x)
|
|
||||||
* x
|
|
||||||
* 2 |\
|
|
||||||
* erf(x) = --------- | exp(-t*t)dt
|
|
||||||
* sqrt(pi) \|
|
|
||||||
* 0
|
|
||||||
*
|
|
||||||
* erfc(x) = 1-erf(x)
|
|
||||||
* Note that
|
|
||||||
* erf(-x) = -erf(x)
|
|
||||||
* erfc(-x) = 2 - erfc(x)
|
|
||||||
*
|
|
||||||
* Method:
|
|
||||||
* 1. For |x| in [0, 0.84375]
|
|
||||||
* erf(x) = x + x*R(x^2)
|
|
||||||
* erfc(x) = 1 - erf(x) if x in [-.84375,0.25]
|
|
||||||
* = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375]
|
|
||||||
* where R = P/Q where P is an odd poly of degree 8 and
|
|
||||||
* Q is an odd poly of degree 10.
|
|
||||||
* -57.90
|
|
||||||
* | R - (erf(x)-x)/x | <= 2
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* Remark. The formula is derived by noting
|
|
||||||
* erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....)
|
|
||||||
* and that
|
|
||||||
* 2/sqrt(pi) = 1.128379167095512573896158903121545171688
|
|
||||||
* is close to one. The interval is chosen because the fix
|
|
||||||
* point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is
|
|
||||||
* near 0.6174), and by some experiment, 0.84375 is chosen to
|
|
||||||
* guarantee the error is less than one ulp for erf.
|
|
||||||
*
|
|
||||||
* 2. For |x| in [0.84375,1.25], let s = |x| - 1, and
|
|
||||||
* c = 0.84506291151 rounded to single (24 bits)
|
|
||||||
* erf(x) = sign(x) * (c + P1(s)/Q1(s))
|
|
||||||
* erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0
|
|
||||||
* 1+(c+P1(s)/Q1(s)) if x < 0
|
|
||||||
* |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06
|
|
||||||
* Remark: here we use the taylor series expansion at x=1.
|
|
||||||
* erf(1+s) = erf(1) + s*Poly(s)
|
|
||||||
* = 0.845.. + P1(s)/Q1(s)
|
|
||||||
* That is, we use rational approximation to approximate
|
|
||||||
* erf(1+s) - (c = (single)0.84506291151)
|
|
||||||
* Note that |P1/Q1|< 0.078 for x in [0.84375,1.25]
|
|
||||||
* where
|
|
||||||
* P1(s) = degree 6 poly in s
|
|
||||||
* Q1(s) = degree 6 poly in s
|
|
||||||
*
|
|
||||||
* 3. For x in [1.25,1/0.35(~2.857143)],
|
|
||||||
* erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1)
|
|
||||||
* erf(x) = 1 - erfc(x)
|
|
||||||
* where
|
|
||||||
* R1(z) = degree 7 poly in z, (z=1/x^2)
|
|
||||||
* S1(z) = degree 8 poly in z
|
|
||||||
*
|
|
||||||
* 4. For x in [1/0.35,28]
|
|
||||||
* erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0
|
|
||||||
* = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6<x<0
|
|
||||||
* = 2.0 - tiny (if x <= -6)
|
|
||||||
* erf(x) = sign(x)*(1.0 - erfc(x)) if x < 6, else
|
|
||||||
* erf(x) = sign(x)*(1.0 - tiny)
|
|
||||||
* where
|
|
||||||
* R2(z) = degree 6 poly in z, (z=1/x^2)
|
|
||||||
* S2(z) = degree 7 poly in z
|
|
||||||
*
|
|
||||||
* Note1:
|
|
||||||
* To compute exp(-x*x-0.5625+R/S), let s be a single
|
|
||||||
* precision number and s := x; then
|
|
||||||
* -x*x = -s*s + (s-x)*(s+x)
|
|
||||||
* exp(-x*x-0.5626+R/S) =
|
|
||||||
* exp(-s*s-0.5625)*exp((s-x)*(s+x)+R/S);
|
|
||||||
* Note2:
|
|
||||||
* Here 4 and 5 make use of the asymptotic series
|
|
||||||
* exp(-x*x)
|
|
||||||
* erfc(x) ~ ---------- * ( 1 + Poly(1/x^2) )
|
|
||||||
* x*sqrt(pi)
|
|
||||||
* We use rational approximation to approximate
|
|
||||||
* g(s)=f(1/x^2) = log(erfc(x)*x) - x*x + 0.5625
|
|
||||||
* Here is the error bound for R1/S1 and R2/S2
|
|
||||||
* |R1/S1 - f(x)| < 2**(-62.57)
|
|
||||||
* |R2/S2 - f(x)| < 2**(-61.52)
|
|
||||||
*
|
|
||||||
* 5. For inf > x >= 28
|
|
||||||
* erf(x) = sign(x) *(1 - tiny) (raise inexact)
|
|
||||||
* erfc(x) = tiny*tiny (raise underflow) if x > 0
|
|
||||||
* = 2 - tiny if x<0
|
|
||||||
*
|
|
||||||
* 7. Special case:
|
|
||||||
* erf(0) = 0, erf(inf) = 1, erf(-inf) = -1,
|
|
||||||
* erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2,
|
|
||||||
* erfc/erf(NaN) is NaN
|
|
||||||
*/
|
|
||||||
|
|
||||||
static const double
|
/* Top 32 bits of a double. */
|
||||||
erx = 8.45062911510467529297e-01, /* 0x3FEB0AC1, 0x60000000 */
|
static inline uint32_t
|
||||||
/*
|
top32 (double x)
|
||||||
* Coefficients for approximation to erf on [0,0.84375]
|
|
||||||
*/
|
|
||||||
efx8 = 1.02703333676410069053e+00, /* 0x3FF06EBA, 0x8214DB69 */
|
|
||||||
pp0 = 1.28379167095512558561e-01, /* 0x3FC06EBA, 0x8214DB68 */
|
|
||||||
pp1 = -3.25042107247001499370e-01, /* 0xBFD4CD7D, 0x691CB913 */
|
|
||||||
pp2 = -2.84817495755985104766e-02, /* 0xBF9D2A51, 0xDBD7194F */
|
|
||||||
pp3 = -5.77027029648944159157e-03, /* 0xBF77A291, 0x236668E4 */
|
|
||||||
pp4 = -2.37630166566501626084e-05, /* 0xBEF8EAD6, 0x120016AC */
|
|
||||||
qq1 = 3.97917223959155352819e-01, /* 0x3FD97779, 0xCDDADC09 */
|
|
||||||
qq2 = 6.50222499887672944485e-02, /* 0x3FB0A54C, 0x5536CEBA */
|
|
||||||
qq3 = 5.08130628187576562776e-03, /* 0x3F74D022, 0xC4D36B0F */
|
|
||||||
qq4 = 1.32494738004321644526e-04, /* 0x3F215DC9, 0x221C1A10 */
|
|
||||||
qq5 = -3.96022827877536812320e-06, /* 0xBED09C43, 0x42A26120 */
|
|
||||||
/*
|
|
||||||
* Coefficients for approximation to erf in [0.84375,1.25]
|
|
||||||
*/
|
|
||||||
pa0 = -2.36211856075265944077e-03, /* 0xBF6359B8, 0xBEF77538 */
|
|
||||||
pa1 = 4.14856118683748331666e-01, /* 0x3FDA8D00, 0xAD92B34D */
|
|
||||||
pa2 = -3.72207876035701323847e-01, /* 0xBFD7D240, 0xFBB8C3F1 */
|
|
||||||
pa3 = 3.18346619901161753674e-01, /* 0x3FD45FCA, 0x805120E4 */
|
|
||||||
pa4 = -1.10894694282396677476e-01, /* 0xBFBC6398, 0x3D3E28EC */
|
|
||||||
pa5 = 3.54783043256182359371e-02, /* 0x3FA22A36, 0x599795EB */
|
|
||||||
pa6 = -2.16637559486879084300e-03, /* 0xBF61BF38, 0x0A96073F */
|
|
||||||
qa1 = 1.06420880400844228286e-01, /* 0x3FBB3E66, 0x18EEE323 */
|
|
||||||
qa2 = 5.40397917702171048937e-01, /* 0x3FE14AF0, 0x92EB6F33 */
|
|
||||||
qa3 = 7.18286544141962662868e-02, /* 0x3FB2635C, 0xD99FE9A7 */
|
|
||||||
qa4 = 1.26171219808761642112e-01, /* 0x3FC02660, 0xE763351F */
|
|
||||||
qa5 = 1.36370839120290507362e-02, /* 0x3F8BEDC2, 0x6B51DD1C */
|
|
||||||
qa6 = 1.19844998467991074170e-02, /* 0x3F888B54, 0x5735151D */
|
|
||||||
/*
|
|
||||||
* Coefficients for approximation to erfc in [1.25,1/0.35]
|
|
||||||
*/
|
|
||||||
ra0 = -9.86494403484714822705e-03, /* 0xBF843412, 0x600D6435 */
|
|
||||||
ra1 = -6.93858572707181764372e-01, /* 0xBFE63416, 0xE4BA7360 */
|
|
||||||
ra2 = -1.05586262253232909814e+01, /* 0xC0251E04, 0x41B0E726 */
|
|
||||||
ra3 = -6.23753324503260060396e+01, /* 0xC04F300A, 0xE4CBA38D */
|
|
||||||
ra4 = -1.62396669462573470355e+02, /* 0xC0644CB1, 0x84282266 */
|
|
||||||
ra5 = -1.84605092906711035994e+02, /* 0xC067135C, 0xEBCCABB2 */
|
|
||||||
ra6 = -8.12874355063065934246e+01, /* 0xC0545265, 0x57E4D2F2 */
|
|
||||||
ra7 = -9.81432934416914548592e+00, /* 0xC023A0EF, 0xC69AC25C */
|
|
||||||
sa1 = 1.96512716674392571292e+01, /* 0x4033A6B9, 0xBD707687 */
|
|
||||||
sa2 = 1.37657754143519042600e+02, /* 0x4061350C, 0x526AE721 */
|
|
||||||
sa3 = 4.34565877475229228821e+02, /* 0x407B290D, 0xD58A1A71 */
|
|
||||||
sa4 = 6.45387271733267880336e+02, /* 0x40842B19, 0x21EC2868 */
|
|
||||||
sa5 = 4.29008140027567833386e+02, /* 0x407AD021, 0x57700314 */
|
|
||||||
sa6 = 1.08635005541779435134e+02, /* 0x405B28A3, 0xEE48AE2C */
|
|
||||||
sa7 = 6.57024977031928170135e+00, /* 0x401A47EF, 0x8E484A93 */
|
|
||||||
sa8 = -6.04244152148580987438e-02, /* 0xBFAEEFF2, 0xEE749A62 */
|
|
||||||
/*
|
|
||||||
* Coefficients for approximation to erfc in [1/.35,28]
|
|
||||||
*/
|
|
||||||
rb0 = -9.86494292470009928597e-03, /* 0xBF843412, 0x39E86F4A */
|
|
||||||
rb1 = -7.99283237680523006574e-01, /* 0xBFE993BA, 0x70C285DE */
|
|
||||||
rb2 = -1.77579549177547519889e+01, /* 0xC031C209, 0x555F995A */
|
|
||||||
rb3 = -1.60636384855821916062e+02, /* 0xC064145D, 0x43C5ED98 */
|
|
||||||
rb4 = -6.37566443368389627722e+02, /* 0xC083EC88, 0x1375F228 */
|
|
||||||
rb5 = -1.02509513161107724954e+03, /* 0xC0900461, 0x6A2E5992 */
|
|
||||||
rb6 = -4.83519191608651397019e+02, /* 0xC07E384E, 0x9BDC383F */
|
|
||||||
sb1 = 3.03380607434824582924e+01, /* 0x403E568B, 0x261D5190 */
|
|
||||||
sb2 = 3.25792512996573918826e+02, /* 0x40745CAE, 0x221B9F0A */
|
|
||||||
sb3 = 1.53672958608443695994e+03, /* 0x409802EB, 0x189D5118 */
|
|
||||||
sb4 = 3.19985821950859553908e+03, /* 0x40A8FFB7, 0x688C246A */
|
|
||||||
sb5 = 2.55305040643316442583e+03, /* 0x40A3F219, 0xCEDF3BE6 */
|
|
||||||
sb6 = 4.74528541206955367215e+02, /* 0x407DA874, 0xE79FE763 */
|
|
||||||
sb7 = -2.24409524465858183362e+01; /* 0xC03670E2, 0x42712D62 */
|
|
||||||
|
|
||||||
#define asuint(f) ((union{float _f; uint32_t _i;}){f})._i
|
|
||||||
#define asfloat(i) ((union{uint32_t _i; float _f;}){i})._f
|
|
||||||
#define asuint64(f) ((union{double _f; uint64_t _i;}){f})._i
|
|
||||||
#define asdouble(i) ((union{uint64_t _i; double _f;}){i})._f
|
|
||||||
#define INSERT_WORDS(d,hi,lo) \
|
|
||||||
do { \
|
|
||||||
(d) = asdouble(((uint64_t)(hi)<<32) | (uint32_t)(lo)); \
|
|
||||||
} while (0)
|
|
||||||
#define GET_HIGH_WORD(hi,d) \
|
|
||||||
do { \
|
|
||||||
(hi) = asuint64(d) >> 32; \
|
|
||||||
} while (0)
|
|
||||||
#define GET_LOW_WORD(lo,d) \
|
|
||||||
do { \
|
|
||||||
(lo) = (uint32_t)asuint64(d); \
|
|
||||||
} while (0)
|
|
||||||
#define SET_HIGH_WORD(d,hi) \
|
|
||||||
INSERT_WORDS(d, hi, (uint32_t)asuint64(d))
|
|
||||||
#define SET_LOW_WORD(d,lo) \
|
|
||||||
INSERT_WORDS(d, asuint64(d)>>32, lo)
|
|
||||||
|
|
||||||
static double erfc1(double x)
|
|
||||||
{
|
{
|
||||||
double_t s,P,Q;
|
return asuint64 (x) >> 32;
|
||||||
|
|
||||||
s = fabs(x) - 1;
|
|
||||||
P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
|
|
||||||
Q = 1+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
|
|
||||||
return 1 - erx - P/Q;
|
|
||||||
}
|
|
||||||
|
|
||||||
static double erfc2(uint32_t ix, double x)
|
|
||||||
{
|
|
||||||
double_t s,R,S;
|
|
||||||
double z;
|
|
||||||
|
|
||||||
if (ix < 0x3ff40000) /* |x| < 1.25 */
|
|
||||||
return erfc1(x);
|
|
||||||
|
|
||||||
x = fabs(x);
|
|
||||||
s = 1/(x*x);
|
|
||||||
if (ix < 0x4006db6d) { /* |x| < 1/.35 ~ 2.85714 */
|
|
||||||
R = ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(
|
|
||||||
ra5+s*(ra6+s*ra7))))));
|
|
||||||
S = 1.0+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(
|
|
||||||
sa5+s*(sa6+s*(sa7+s*sa8)))))));
|
|
||||||
} else { /* |x| > 1/.35 */
|
|
||||||
R = rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(
|
|
||||||
rb5+s*rb6)))));
|
|
||||||
S = 1.0+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(
|
|
||||||
sb5+s*(sb6+s*sb7))))));
|
|
||||||
}
|
|
||||||
z = x;
|
|
||||||
SET_LOW_WORD(z,0);
|
|
||||||
return exp(-z*z-0.5625)*exp((z-x)*(z+x)+R/S)/x;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns error function of 𝑥.
|
* Returns error function of x.
|
||||||
|
*
|
||||||
|
* Highest measured error is 1.01 ULPs at 0x1.39956ac43382fp+0.
|
||||||
|
*
|
||||||
|
* @raise ERANGE on underflow
|
||||||
*/
|
*/
|
||||||
double erf(double x)
|
double
|
||||||
|
erf (double x)
|
||||||
{
|
{
|
||||||
double r,s,z,y;
|
/* Get top word and sign. */
|
||||||
uint32_t ix;
|
uint32_t ix = top32 (x);
|
||||||
int sign;
|
uint32_t ia = ix & 0x7fffffff;
|
||||||
|
uint32_t sign = ix >> 31;
|
||||||
|
|
||||||
GET_HIGH_WORD(ix, x);
|
/* Normalized and subnormal cases */
|
||||||
sign = ix>>31;
|
if (ia < 0x3feb0000)
|
||||||
ix &= 0x7fffffff;
|
{ /* a = |x| < 0.84375. */
|
||||||
if (ix >= 0x7ff00000) {
|
|
||||||
/* erf(nan)=nan, erf(+-inf)=+-1 */
|
if (ia < 0x3e300000)
|
||||||
return 1-2*sign + 1/x;
|
{ /* a < 2^(-28). */
|
||||||
|
if (ia < 0x00800000)
|
||||||
|
{ /* a < 2^(-1015). */
|
||||||
|
double y = fma (TwoOverSqrtPiMinusOne, x, x);
|
||||||
|
return check_uflow (y);
|
||||||
|
}
|
||||||
|
return x + TwoOverSqrtPiMinusOne * x;
|
||||||
}
|
}
|
||||||
if (ix < 0x3feb0000) { /* |x| < 0.84375 */
|
|
||||||
if (ix < 0x3e300000) { /* |x| < 2**-28 */
|
double x2 = x * x;
|
||||||
/* avoid underflow */
|
|
||||||
return 0.125*(8*x + efx8*x);
|
if (ia < 0x3fe00000)
|
||||||
}
|
{ /* a < 0.5 - Use polynomial approximation. */
|
||||||
z = x*x;
|
double r1 = fma (x2, PA[1], PA[0]);
|
||||||
r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
|
double r2 = fma (x2, PA[3], PA[2]);
|
||||||
s = 1.0+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
|
double r3 = fma (x2, PA[5], PA[4]);
|
||||||
y = r/s;
|
double r4 = fma (x2, PA[7], PA[6]);
|
||||||
return x + x*y;
|
double r5 = fma (x2, PA[9], PA[8]);
|
||||||
|
double x4 = x2 * x2;
|
||||||
|
double r = r5;
|
||||||
|
r = fma (x4, r, r4);
|
||||||
|
r = fma (x4, r, r3);
|
||||||
|
r = fma (x4, r, r2);
|
||||||
|
r = fma (x4, r, r1);
|
||||||
|
return fma (r, x, x); /* This fma is crucial for accuracy. */
|
||||||
}
|
}
|
||||||
if (ix < 0x40180000) /* 0.84375 <= |x| < 6 */
|
else
|
||||||
y = 1 - erfc2(ix,x);
|
{ /* 0.5 <= a < 0.84375 - Use rational approximation. */
|
||||||
else
|
double x4, x8, r1n, r2n, r1d, r2d, r3d;
|
||||||
y = 1 - 0x1p-1022;
|
|
||||||
return sign ? -y : y;
|
r1n = fma (x2, NA[1], NA[0]);
|
||||||
|
x4 = x2 * x2;
|
||||||
|
r2n = fma (x2, NA[3], NA[2]);
|
||||||
|
x8 = x4 * x4;
|
||||||
|
r1d = fma (x2, DA[0], 1.0);
|
||||||
|
r2d = fma (x2, DA[2], DA[1]);
|
||||||
|
r3d = fma (x2, DA[4], DA[3]);
|
||||||
|
double P = r1n + x4 * r2n + x8 * NA[4];
|
||||||
|
double Q = r1d + x4 * r2d + x8 * r3d;
|
||||||
|
return fma (P / Q, x, x);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (ia < 0x3ff40000)
|
||||||
|
{ /* 0.84375 <= |x| < 1.25. */
|
||||||
|
double a2, a4, a6, r1n, r2n, r3n, r4n, r1d, r2d, r3d, r4d;
|
||||||
|
double a = fabs (x) - 1.0;
|
||||||
|
r1n = fma (a, NB[1], NB[0]);
|
||||||
|
a2 = a * a;
|
||||||
|
r1d = fma (a, DB[0], 1.0);
|
||||||
|
a4 = a2 * a2;
|
||||||
|
r2n = fma (a, NB[3], NB[2]);
|
||||||
|
a6 = a4 * a2;
|
||||||
|
r2d = fma (a, DB[2], DB[1]);
|
||||||
|
r3n = fma (a, NB[5], NB[4]);
|
||||||
|
r3d = fma (a, DB[4], DB[3]);
|
||||||
|
r4n = NB[6];
|
||||||
|
r4d = DB[5];
|
||||||
|
double P = r1n + a2 * r2n + a4 * r3n + a6 * r4n;
|
||||||
|
double Q = r1d + a2 * r2d + a4 * r3d + a6 * r4d;
|
||||||
|
if (sign)
|
||||||
|
return -C - P / Q;
|
||||||
|
else
|
||||||
|
return C + P / Q;
|
||||||
|
}
|
||||||
|
else if (ia < 0x40000000)
|
||||||
|
{ /* 1.25 <= |x| < 2.0. */
|
||||||
|
double a = fabs (x);
|
||||||
|
a = a - 1.25;
|
||||||
|
|
||||||
|
double r1 = fma (a, PC[1], PC[0]);
|
||||||
|
double r2 = fma (a, PC[3], PC[2]);
|
||||||
|
double r3 = fma (a, PC[5], PC[4]);
|
||||||
|
double r4 = fma (a, PC[7], PC[6]);
|
||||||
|
double r5 = fma (a, PC[9], PC[8]);
|
||||||
|
double r6 = fma (a, PC[11], PC[10]);
|
||||||
|
double r7 = fma (a, PC[13], PC[12]);
|
||||||
|
double r8 = fma (a, PC[15], PC[14]);
|
||||||
|
|
||||||
|
double a2 = a * a;
|
||||||
|
|
||||||
|
double r = r8;
|
||||||
|
r = fma (a2, r, r7);
|
||||||
|
r = fma (a2, r, r6);
|
||||||
|
r = fma (a2, r, r5);
|
||||||
|
r = fma (a2, r, r4);
|
||||||
|
r = fma (a2, r, r3);
|
||||||
|
r = fma (a2, r, r2);
|
||||||
|
r = fma (a2, r, r1);
|
||||||
|
|
||||||
|
if (sign)
|
||||||
|
return -1.0 + r;
|
||||||
|
else
|
||||||
|
return 1.0 - r;
|
||||||
|
}
|
||||||
|
else if (ia < 0x400a0000)
|
||||||
|
{ /* 2 <= |x| < 3.25. */
|
||||||
|
double a = fabs (x);
|
||||||
|
a = fma (0.5, a, -1.0);
|
||||||
|
|
||||||
|
double r1 = fma (a, PD[1], PD[0]);
|
||||||
|
double r2 = fma (a, PD[3], PD[2]);
|
||||||
|
double r3 = fma (a, PD[5], PD[4]);
|
||||||
|
double r4 = fma (a, PD[7], PD[6]);
|
||||||
|
double r5 = fma (a, PD[9], PD[8]);
|
||||||
|
double r6 = fma (a, PD[11], PD[10]);
|
||||||
|
double r7 = fma (a, PD[13], PD[12]);
|
||||||
|
double r8 = fma (a, PD[15], PD[14]);
|
||||||
|
double r9 = fma (a, PD[17], PD[16]);
|
||||||
|
|
||||||
|
double a2 = a * a;
|
||||||
|
|
||||||
|
double r = r9;
|
||||||
|
r = fma (a2, r, r8);
|
||||||
|
r = fma (a2, r, r7);
|
||||||
|
r = fma (a2, r, r6);
|
||||||
|
r = fma (a2, r, r5);
|
||||||
|
r = fma (a2, r, r4);
|
||||||
|
r = fma (a2, r, r3);
|
||||||
|
r = fma (a2, r, r2);
|
||||||
|
r = fma (a2, r, r1);
|
||||||
|
|
||||||
|
if (sign)
|
||||||
|
return -1.0 + r;
|
||||||
|
else
|
||||||
|
return 1.0 - r;
|
||||||
|
}
|
||||||
|
else if (ia < 0x40100000)
|
||||||
|
{ /* 3.25 <= |x| < 4.0. */
|
||||||
|
double a = fabs (x);
|
||||||
|
a = a - 3.25;
|
||||||
|
|
||||||
|
double r1 = fma (a, PE[1], PE[0]);
|
||||||
|
double r2 = fma (a, PE[3], PE[2]);
|
||||||
|
double r3 = fma (a, PE[5], PE[4]);
|
||||||
|
double r4 = fma (a, PE[7], PE[6]);
|
||||||
|
double r5 = fma (a, PE[9], PE[8]);
|
||||||
|
double r6 = fma (a, PE[11], PE[10]);
|
||||||
|
double r7 = fma (a, PE[13], PE[12]);
|
||||||
|
|
||||||
|
double a2 = a * a;
|
||||||
|
|
||||||
|
double r = r7;
|
||||||
|
r = fma (a2, r, r6);
|
||||||
|
r = fma (a2, r, r5);
|
||||||
|
r = fma (a2, r, r4);
|
||||||
|
r = fma (a2, r, r3);
|
||||||
|
r = fma (a2, r, r2);
|
||||||
|
r = fma (a2, r, r1);
|
||||||
|
|
||||||
|
if (sign)
|
||||||
|
return -1.0 + r;
|
||||||
|
else
|
||||||
|
return 1.0 - r;
|
||||||
|
}
|
||||||
|
else if (ia < 0x4017a000)
|
||||||
|
{ /* 4 <= |x| < 5.90625. */
|
||||||
|
double a = fabs (x);
|
||||||
|
a = fma (0.5, a, -2.0);
|
||||||
|
|
||||||
|
double r1 = fma (a, PF[1], PF[0]);
|
||||||
|
double r2 = fma (a, PF[3], PF[2]);
|
||||||
|
double r3 = fma (a, PF[5], PF[4]);
|
||||||
|
double r4 = fma (a, PF[7], PF[6]);
|
||||||
|
double r5 = fma (a, PF[9], PF[8]);
|
||||||
|
double r6 = fma (a, PF[11], PF[10]);
|
||||||
|
double r7 = fma (a, PF[13], PF[12]);
|
||||||
|
double r8 = fma (a, PF[15], PF[14]);
|
||||||
|
double r9 = PF[16];
|
||||||
|
|
||||||
|
double a2 = a * a;
|
||||||
|
|
||||||
|
double r = r9;
|
||||||
|
r = fma (a2, r, r8);
|
||||||
|
r = fma (a2, r, r7);
|
||||||
|
r = fma (a2, r, r6);
|
||||||
|
r = fma (a2, r, r5);
|
||||||
|
r = fma (a2, r, r4);
|
||||||
|
r = fma (a2, r, r3);
|
||||||
|
r = fma (a2, r, r2);
|
||||||
|
r = fma (a2, r, r1);
|
||||||
|
|
||||||
|
if (sign)
|
||||||
|
return -1.0 + r;
|
||||||
|
else
|
||||||
|
return 1.0 - r;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Special cases : erf(nan)=nan, erf(+inf)=+1 and erf(-inf)=-1. */
|
||||||
|
if (unlikely (ia >= 0x7ff00000))
|
||||||
|
return (double) (1.0 - (sign << 1)) + 1.0 / x;
|
||||||
|
|
||||||
|
if (sign)
|
||||||
|
return -1.0;
|
||||||
|
else
|
||||||
|
return 1.0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
#if LDBL_MANT_DIG == 53
|
||||||
* Returns complementary error function of 𝑥.
|
|
||||||
*/
|
|
||||||
double erfc(double x)
|
|
||||||
{
|
|
||||||
double r,s,z,y;
|
|
||||||
uint32_t ix;
|
|
||||||
int sign;
|
|
||||||
|
|
||||||
GET_HIGH_WORD(ix, x);
|
|
||||||
sign = ix>>31;
|
|
||||||
ix &= 0x7fffffff;
|
|
||||||
if (ix >= 0x7ff00000) {
|
|
||||||
/* erfc(nan)=nan, erfc(+-inf)=0,2 */
|
|
||||||
return 2*sign + 1/x;
|
|
||||||
}
|
|
||||||
if (ix < 0x3feb0000) { /* |x| < 0.84375 */
|
|
||||||
if (ix < 0x3c700000) /* |x| < 2**-56 */
|
|
||||||
return 1.0 - x;
|
|
||||||
z = x*x;
|
|
||||||
r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
|
|
||||||
s = 1.0+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
|
|
||||||
y = r/s;
|
|
||||||
if (sign || ix < 0x3fd00000) { /* x < 1/4 */
|
|
||||||
return 1.0 - (x+x*y);
|
|
||||||
}
|
|
||||||
return 0.5 - (x - 0.5 + x*y);
|
|
||||||
}
|
|
||||||
if (ix < 0x403c0000) { /* 0.84375 <= |x| < 28 */
|
|
||||||
return sign ? 2 - erfc2(ix,x) : erfc2(ix,x);
|
|
||||||
}
|
|
||||||
return sign ? 2 - 0x1p-1022 : 0x1p-1022*0x1p-1022;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
|
|
||||||
__weak_reference(erf, erfl);
|
__weak_reference(erf, erfl);
|
||||||
__weak_reference(erfc, erfcl);
|
|
||||||
#endif
|
#endif
|
||||||
|
|
105
libc/tinymath/erf_data.c
Normal file
105
libc/tinymath/erf_data.c
Normal file
|
@ -0,0 +1,105 @@
|
||||||
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||||
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||||
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
|
│ │
|
||||||
|
│ Optimized Routines │
|
||||||
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
|
│ │
|
||||||
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
│ "Software"), to deal in the Software without restriction, including │
|
||||||
|
│ without limitation the rights to use, copy, modify, merge, publish, │
|
||||||
|
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
||||||
|
│ permit persons to whom the Software is furnished to do so, subject to │
|
||||||
|
│ the following conditions: │
|
||||||
|
│ │
|
||||||
|
│ The above copyright notice and this permission notice shall be │
|
||||||
|
│ included in all copies or substantial portions of the Software. │
|
||||||
|
│ │
|
||||||
|
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
||||||
|
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
||||||
|
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
||||||
|
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
||||||
|
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
||||||
|
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
||||||
|
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||||
|
│ │
|
||||||
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
|
#include "libc/tinymath/arm.internal.h"
|
||||||
|
__static_yoink("arm_optimized_routines_notice");
|
||||||
|
|
||||||
|
/*
|
||||||
|
Minimax approximation of erf
|
||||||
|
*/
|
||||||
|
const struct erf_data __erf_data = {
|
||||||
|
.erf_poly_A = {
|
||||||
|
#if ERF_POLY_A_NCOEFFS == 10
|
||||||
|
0x1.06eba8214db68p-3, -0x1.812746b037948p-2, 0x1.ce2f21a03872p-4,
|
||||||
|
-0x1.b82ce30e6548p-6, 0x1.565bcc360a2f2p-8, -0x1.c02d812bc979ap-11,
|
||||||
|
0x1.f99bddfc1ebe9p-14, -0x1.f42c457cee912p-17, 0x1.b0e414ec20ee9p-20,
|
||||||
|
-0x1.18c47fd143c5ep-23
|
||||||
|
#endif
|
||||||
|
},
|
||||||
|
/* Rational approximation on [0x1p-28, 0.84375] */
|
||||||
|
.erf_ratio_N_A = {
|
||||||
|
0x1.06eba8214db68p-3, -0x1.4cd7d691cb913p-2, -0x1.d2a51dbd7194fp-6,
|
||||||
|
-0x1.7a291236668e4p-8, -0x1.8ead6120016acp-16
|
||||||
|
},
|
||||||
|
.erf_ratio_D_A = {
|
||||||
|
0x1.97779cddadc09p-2, 0x1.0a54c5536cebap-4, 0x1.4d022c4d36b0fp-8,
|
||||||
|
0x1.15dc9221c1a1p-13, -0x1.09c4342a2612p-18
|
||||||
|
},
|
||||||
|
/* Rational approximation on [0.84375, 1.25] */
|
||||||
|
.erf_ratio_N_B = {
|
||||||
|
-0x1.359b8bef77538p-9, 0x1.a8d00ad92b34dp-2, -0x1.7d240fbb8c3f1p-2,
|
||||||
|
0x1.45fca805120e4p-2, -0x1.c63983d3e28ecp-4, 0x1.22a36599795ebp-5,
|
||||||
|
-0x1.1bf380a96073fp-9
|
||||||
|
},
|
||||||
|
.erf_ratio_D_B = {
|
||||||
|
0x1.b3e6618eee323p-4, 0x1.14af092eb6f33p-1, 0x1.2635cd99fe9a7p-4,
|
||||||
|
0x1.02660e763351fp-3, 0x1.bedc26b51dd1cp-7, 0x1.88b545735151dp-7
|
||||||
|
},
|
||||||
|
.erfc_poly_C = {
|
||||||
|
#if ERFC_POLY_C_NCOEFFS == 16
|
||||||
|
/* Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=15 a=1.25 b=2 c=1 d=1.25 */
|
||||||
|
0x1.3bcd133aa0ffcp-4, -0x1.e4652fadcb702p-3, 0x1.2ebf3dcca0446p-2,
|
||||||
|
-0x1.571d01c62d66p-3, 0x1.93a9a8f5b3413p-8, 0x1.8281cbcc2cd52p-5,
|
||||||
|
-0x1.5cffd86b4de16p-6, -0x1.db4ccf595053ep-9, 0x1.757cbf8684edap-8,
|
||||||
|
-0x1.ce7dfd2a9e56ap-11, -0x1.99ee3bc5a3263p-11, 0x1.3c57cf9213f5fp-12,
|
||||||
|
0x1.60692996bf254p-14, -0x1.6e44cb7c1fa2ap-14, 0x1.9d4484ac482b2p-16,
|
||||||
|
-0x1.578c9e375d37p-19
|
||||||
|
#endif
|
||||||
|
},
|
||||||
|
.erfc_poly_D = {
|
||||||
|
#if ERFC_POLY_D_NCOEFFS == 18
|
||||||
|
/* Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=17 a=2 b=3.25 c=2 d=2 */
|
||||||
|
0x1.328f5ec350e5p-8, -0x1.529b9e8cf8e99p-5, 0x1.529b9e8cd9e71p-3,
|
||||||
|
-0x1.8b0ae3a023bf2p-2, 0x1.1a2c592599d82p-1, -0x1.ace732477e494p-2,
|
||||||
|
-0x1.e1a06a27920ffp-6, 0x1.bae92a6d27af6p-2, -0x1.a15470fcf5ce7p-2,
|
||||||
|
0x1.bafe45d18e213p-6, 0x1.0d950680d199ap-2, -0x1.8c9481e8f22e3p-3,
|
||||||
|
-0x1.158450ed5c899p-4, 0x1.c01f2973b44p-3, -0x1.73ed2827546a7p-3,
|
||||||
|
0x1.47733687d1ff7p-4, -0x1.2dec70d00b8e1p-6, 0x1.a947ab83cd4fp-10
|
||||||
|
#endif
|
||||||
|
},
|
||||||
|
.erfc_poly_E = {
|
||||||
|
#if ERFC_POLY_E_NCOEFFS == 14
|
||||||
|
/* Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=13 a=3.25 b=4 c=1 d=3.25 */
|
||||||
|
0x1.20c13035539e4p-18, -0x1.e9b5e8d16df7ep-16, 0x1.8de3cd4733bf9p-14,
|
||||||
|
-0x1.9aa48beb8382fp-13, 0x1.2c7d713370a9fp-12, -0x1.490b12110b9e2p-12,
|
||||||
|
0x1.1459c5d989d23p-12, -0x1.64b28e9f1269p-13, 0x1.57c76d9d05cf8p-14,
|
||||||
|
-0x1.bf271d9951cf8p-16, 0x1.db7ea4d4535c9p-19, 0x1.91c2e102d5e49p-20,
|
||||||
|
-0x1.e9f0826c2149ep-21, 0x1.60eebaea236e1p-23
|
||||||
|
#endif
|
||||||
|
},
|
||||||
|
.erfc_poly_F = {
|
||||||
|
#if ERFC_POLY_F_NCOEFFS == 17
|
||||||
|
/* Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=16 a=4 b=5.90625 c=2 d=4 */
|
||||||
|
0x1.08ddd130d1fa6p-26, -0x1.10b146f59ff06p-22, 0x1.10b135328b7b2p-19,
|
||||||
|
-0x1.6039988e7575fp-17, 0x1.497d365e19367p-15, -0x1.da48d9afac83ep-14,
|
||||||
|
0x1.1024c9b1fbb48p-12, -0x1.fc962e7066272p-12, 0x1.87297282d4651p-11,
|
||||||
|
-0x1.f057b255f8c59p-11, 0x1.0228d0eee063p-10, -0x1.b1b21b84ec41cp-11,
|
||||||
|
0x1.1ead8ae9e1253p-11, -0x1.1e708fba37fccp-12, 0x1.9559363991edap-14,
|
||||||
|
-0x1.68c827b783d9cp-16, 0x1.2ec4adeccf4a2p-19
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
};
|
279
libc/tinymath/erfc.c
Normal file
279
libc/tinymath/erfc.c
Normal file
|
@ -0,0 +1,279 @@
|
||||||
|
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
||||||
|
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
||||||
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
|
│ │
|
||||||
|
│ Copyright (c) 1992-2024 The FreeBSD Project │
|
||||||
|
│ │
|
||||||
|
│ Redistribution and use in source and binary forms, with or without │
|
||||||
|
│ modification, are permitted provided that the following conditions │
|
||||||
|
│ are met: │
|
||||||
|
│ 1. Redistributions of source code must retain the above copyright │
|
||||||
|
│ notice, this list of conditions and the following disclaimer. │
|
||||||
|
│ 2. Redistributions in binary form must reproduce the above copyright │
|
||||||
|
│ notice, this list of conditions and the following disclaimer in the │
|
||||||
|
│ documentation and/or other materials provided with the distribution. │
|
||||||
|
│ │
|
||||||
|
│ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND │
|
||||||
|
│ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE │
|
||||||
|
│ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE │
|
||||||
|
│ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE │
|
||||||
|
│ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL │
|
||||||
|
│ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS │
|
||||||
|
│ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) │
|
||||||
|
│ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT │
|
||||||
|
│ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY │
|
||||||
|
│ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF │
|
||||||
|
│ SUCH DAMAGE. │
|
||||||
|
│ │
|
||||||
|
│ Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. │
|
||||||
|
│ │
|
||||||
|
│ Developed at SunPro, a Sun Microsystems, Inc. business. │
|
||||||
|
│ Permission to use, copy, modify, and distribute this │
|
||||||
|
│ software is freely granted, provided that this notice │
|
||||||
|
│ is preserved. │
|
||||||
|
│ │
|
||||||
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
|
#include "libc/tinymath/freebsd.internal.h"
|
||||||
|
__static_yoink("freebsd_libm_notice");
|
||||||
|
__static_yoink("fdlibm_notice");
|
||||||
|
|
||||||
|
/* double erf(double x)
|
||||||
|
* double erfc(double x)
|
||||||
|
* x
|
||||||
|
* 2 |\
|
||||||
|
* erf(x) = --------- | exp(-t*t)dt
|
||||||
|
* sqrt(pi) \|
|
||||||
|
* 0
|
||||||
|
*
|
||||||
|
* erfc(x) = 1-erf(x)
|
||||||
|
* Note that
|
||||||
|
* erf(-x) = -erf(x)
|
||||||
|
* erfc(-x) = 2 - erfc(x)
|
||||||
|
*
|
||||||
|
* Method:
|
||||||
|
* 1. For |x| in [0, 0.84375]
|
||||||
|
* erf(x) = x + x*R(x^2)
|
||||||
|
* erfc(x) = 1 - erf(x) if x in [-.84375,0.25]
|
||||||
|
* = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375]
|
||||||
|
* where R = P/Q where P is an odd poly of degree 8 and
|
||||||
|
* Q is an odd poly of degree 10.
|
||||||
|
* -57.90
|
||||||
|
* | R - (erf(x)-x)/x | <= 2
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* Remark. The formula is derived by noting
|
||||||
|
* erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....)
|
||||||
|
* and that
|
||||||
|
* 2/sqrt(pi) = 1.128379167095512573896158903121545171688
|
||||||
|
* is close to one. The interval is chosen because the fix
|
||||||
|
* point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is
|
||||||
|
* near 0.6174), and by some experiment, 0.84375 is chosen to
|
||||||
|
* guarantee the error is less than one ulp for erf.
|
||||||
|
*
|
||||||
|
* 2. For |x| in [0.84375,1.25], let s = |x| - 1, and
|
||||||
|
* c = 0.84506291151 rounded to single (24 bits)
|
||||||
|
* erf(x) = sign(x) * (c + P1(s)/Q1(s))
|
||||||
|
* erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0
|
||||||
|
* 1+(c+P1(s)/Q1(s)) if x < 0
|
||||||
|
* |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06
|
||||||
|
* Remark: here we use the taylor series expansion at x=1.
|
||||||
|
* erf(1+s) = erf(1) + s*Poly(s)
|
||||||
|
* = 0.845.. + P1(s)/Q1(s)
|
||||||
|
* That is, we use rational approximation to approximate
|
||||||
|
* erf(1+s) - (c = (single)0.84506291151)
|
||||||
|
* Note that |P1/Q1|< 0.078 for x in [0.84375,1.25]
|
||||||
|
* where
|
||||||
|
* P1(s) = degree 6 poly in s
|
||||||
|
* Q1(s) = degree 6 poly in s
|
||||||
|
*
|
||||||
|
* 3. For x in [1.25,1/0.35(~2.857143)],
|
||||||
|
* erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1)
|
||||||
|
* erf(x) = 1 - erfc(x)
|
||||||
|
* where
|
||||||
|
* R1(z) = degree 7 poly in z, (z=1/x^2)
|
||||||
|
* S1(z) = degree 8 poly in z
|
||||||
|
*
|
||||||
|
* 4. For x in [1/0.35,28]
|
||||||
|
* erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0
|
||||||
|
* = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6<x<0
|
||||||
|
* = 2.0 - tiny (if x <= -6)
|
||||||
|
* erf(x) = sign(x)*(1.0 - erfc(x)) if x < 6, else
|
||||||
|
* erf(x) = sign(x)*(1.0 - tiny)
|
||||||
|
* where
|
||||||
|
* R2(z) = degree 6 poly in z, (z=1/x^2)
|
||||||
|
* S2(z) = degree 7 poly in z
|
||||||
|
*
|
||||||
|
* Note1:
|
||||||
|
* To compute exp(-x*x-0.5625+R/S), let s be a single
|
||||||
|
* precision number and s := x; then
|
||||||
|
* -x*x = -s*s + (s-x)*(s+x)
|
||||||
|
* exp(-x*x-0.5626+R/S) =
|
||||||
|
* exp(-s*s-0.5625)*exp((s-x)*(s+x)+R/S);
|
||||||
|
* Note2:
|
||||||
|
* Here 4 and 5 make use of the asymptotic series
|
||||||
|
* exp(-x*x)
|
||||||
|
* erfc(x) ~ ---------- * ( 1 + Poly(1/x^2) )
|
||||||
|
* x*sqrt(pi)
|
||||||
|
* We use rational approximation to approximate
|
||||||
|
* g(s)=f(1/x^2) = log(erfc(x)*x) - x*x + 0.5625
|
||||||
|
* Here is the error bound for R1/S1 and R2/S2
|
||||||
|
* |R1/S1 - f(x)| < 2**(-62.57)
|
||||||
|
* |R2/S2 - f(x)| < 2**(-61.52)
|
||||||
|
*
|
||||||
|
* 5. For inf > x >= 28
|
||||||
|
* erf(x) = sign(x) *(1 - tiny) (raise inexact)
|
||||||
|
* erfc(x) = tiny*tiny (raise underflow) if x > 0
|
||||||
|
* = 2 - tiny if x<0
|
||||||
|
*
|
||||||
|
* 7. Special case:
|
||||||
|
* erf(0) = 0, erf(inf) = 1, erf(-inf) = -1,
|
||||||
|
* erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2,
|
||||||
|
* erfc/erf(NaN) is NaN
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* XXX Prevent compilers from erroneously constant folding: */
|
||||||
|
static const volatile double tiny= 1e-300;
|
||||||
|
|
||||||
|
static const double
|
||||||
|
half= 0.5,
|
||||||
|
one = 1,
|
||||||
|
two = 2,
|
||||||
|
/* c = (float)0.84506291151 */
|
||||||
|
erx = 8.45062911510467529297e-01, /* 0x3FEB0AC1, 0x60000000 */
|
||||||
|
/*
|
||||||
|
* In the domain [0, 2**-28], only the first term in the power series
|
||||||
|
* expansion of erf(x) is used. The magnitude of the first neglected
|
||||||
|
* terms is less than 2**-84.
|
||||||
|
*/
|
||||||
|
efx = 1.28379167095512586316e-01, /* 0x3FC06EBA, 0x8214DB69 */
|
||||||
|
efx8= 1.02703333676410069053e+00, /* 0x3FF06EBA, 0x8214DB69 */
|
||||||
|
/*
|
||||||
|
* Coefficients for approximation to erf on [0,0.84375]
|
||||||
|
*/
|
||||||
|
pp0 = 1.28379167095512558561e-01, /* 0x3FC06EBA, 0x8214DB68 */
|
||||||
|
pp1 = -3.25042107247001499370e-01, /* 0xBFD4CD7D, 0x691CB913 */
|
||||||
|
pp2 = -2.84817495755985104766e-02, /* 0xBF9D2A51, 0xDBD7194F */
|
||||||
|
pp3 = -5.77027029648944159157e-03, /* 0xBF77A291, 0x236668E4 */
|
||||||
|
pp4 = -2.37630166566501626084e-05, /* 0xBEF8EAD6, 0x120016AC */
|
||||||
|
qq1 = 3.97917223959155352819e-01, /* 0x3FD97779, 0xCDDADC09 */
|
||||||
|
qq2 = 6.50222499887672944485e-02, /* 0x3FB0A54C, 0x5536CEBA */
|
||||||
|
qq3 = 5.08130628187576562776e-03, /* 0x3F74D022, 0xC4D36B0F */
|
||||||
|
qq4 = 1.32494738004321644526e-04, /* 0x3F215DC9, 0x221C1A10 */
|
||||||
|
qq5 = -3.96022827877536812320e-06, /* 0xBED09C43, 0x42A26120 */
|
||||||
|
/*
|
||||||
|
* Coefficients for approximation to erf in [0.84375,1.25]
|
||||||
|
*/
|
||||||
|
pa0 = -2.36211856075265944077e-03, /* 0xBF6359B8, 0xBEF77538 */
|
||||||
|
pa1 = 4.14856118683748331666e-01, /* 0x3FDA8D00, 0xAD92B34D */
|
||||||
|
pa2 = -3.72207876035701323847e-01, /* 0xBFD7D240, 0xFBB8C3F1 */
|
||||||
|
pa3 = 3.18346619901161753674e-01, /* 0x3FD45FCA, 0x805120E4 */
|
||||||
|
pa4 = -1.10894694282396677476e-01, /* 0xBFBC6398, 0x3D3E28EC */
|
||||||
|
pa5 = 3.54783043256182359371e-02, /* 0x3FA22A36, 0x599795EB */
|
||||||
|
pa6 = -2.16637559486879084300e-03, /* 0xBF61BF38, 0x0A96073F */
|
||||||
|
qa1 = 1.06420880400844228286e-01, /* 0x3FBB3E66, 0x18EEE323 */
|
||||||
|
qa2 = 5.40397917702171048937e-01, /* 0x3FE14AF0, 0x92EB6F33 */
|
||||||
|
qa3 = 7.18286544141962662868e-02, /* 0x3FB2635C, 0xD99FE9A7 */
|
||||||
|
qa4 = 1.26171219808761642112e-01, /* 0x3FC02660, 0xE763351F */
|
||||||
|
qa5 = 1.36370839120290507362e-02, /* 0x3F8BEDC2, 0x6B51DD1C */
|
||||||
|
qa6 = 1.19844998467991074170e-02, /* 0x3F888B54, 0x5735151D */
|
||||||
|
/*
|
||||||
|
* Coefficients for approximation to erfc in [1.25,1/0.35]
|
||||||
|
*/
|
||||||
|
ra0 = -9.86494403484714822705e-03, /* 0xBF843412, 0x600D6435 */
|
||||||
|
ra1 = -6.93858572707181764372e-01, /* 0xBFE63416, 0xE4BA7360 */
|
||||||
|
ra2 = -1.05586262253232909814e+01, /* 0xC0251E04, 0x41B0E726 */
|
||||||
|
ra3 = -6.23753324503260060396e+01, /* 0xC04F300A, 0xE4CBA38D */
|
||||||
|
ra4 = -1.62396669462573470355e+02, /* 0xC0644CB1, 0x84282266 */
|
||||||
|
ra5 = -1.84605092906711035994e+02, /* 0xC067135C, 0xEBCCABB2 */
|
||||||
|
ra6 = -8.12874355063065934246e+01, /* 0xC0545265, 0x57E4D2F2 */
|
||||||
|
ra7 = -9.81432934416914548592e+00, /* 0xC023A0EF, 0xC69AC25C */
|
||||||
|
sa1 = 1.96512716674392571292e+01, /* 0x4033A6B9, 0xBD707687 */
|
||||||
|
sa2 = 1.37657754143519042600e+02, /* 0x4061350C, 0x526AE721 */
|
||||||
|
sa3 = 4.34565877475229228821e+02, /* 0x407B290D, 0xD58A1A71 */
|
||||||
|
sa4 = 6.45387271733267880336e+02, /* 0x40842B19, 0x21EC2868 */
|
||||||
|
sa5 = 4.29008140027567833386e+02, /* 0x407AD021, 0x57700314 */
|
||||||
|
sa6 = 1.08635005541779435134e+02, /* 0x405B28A3, 0xEE48AE2C */
|
||||||
|
sa7 = 6.57024977031928170135e+00, /* 0x401A47EF, 0x8E484A93 */
|
||||||
|
sa8 = -6.04244152148580987438e-02, /* 0xBFAEEFF2, 0xEE749A62 */
|
||||||
|
/*
|
||||||
|
* Coefficients for approximation to erfc in [1/.35,28]
|
||||||
|
*/
|
||||||
|
rb0 = -9.86494292470009928597e-03, /* 0xBF843412, 0x39E86F4A */
|
||||||
|
rb1 = -7.99283237680523006574e-01, /* 0xBFE993BA, 0x70C285DE */
|
||||||
|
rb2 = -1.77579549177547519889e+01, /* 0xC031C209, 0x555F995A */
|
||||||
|
rb3 = -1.60636384855821916062e+02, /* 0xC064145D, 0x43C5ED98 */
|
||||||
|
rb4 = -6.37566443368389627722e+02, /* 0xC083EC88, 0x1375F228 */
|
||||||
|
rb5 = -1.02509513161107724954e+03, /* 0xC0900461, 0x6A2E5992 */
|
||||||
|
rb6 = -4.83519191608651397019e+02, /* 0xC07E384E, 0x9BDC383F */
|
||||||
|
sb1 = 3.03380607434824582924e+01, /* 0x403E568B, 0x261D5190 */
|
||||||
|
sb2 = 3.25792512996573918826e+02, /* 0x40745CAE, 0x221B9F0A */
|
||||||
|
sb3 = 1.53672958608443695994e+03, /* 0x409802EB, 0x189D5118 */
|
||||||
|
sb4 = 3.19985821950859553908e+03, /* 0x40A8FFB7, 0x688C246A */
|
||||||
|
sb5 = 2.55305040643316442583e+03, /* 0x40A3F219, 0xCEDF3BE6 */
|
||||||
|
sb6 = 4.74528541206955367215e+02, /* 0x407DA874, 0xE79FE763 */
|
||||||
|
sb7 = -2.24409524465858183362e+01; /* 0xC03670E2, 0x42712D62 */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns complementary error function of x, i.e. 1.0 - erf(x).
|
||||||
|
*/
|
||||||
|
double
|
||||||
|
erfc(double x)
|
||||||
|
{
|
||||||
|
int32_t hx,ix;
|
||||||
|
double R,S,P,Q,s,y,z,r;
|
||||||
|
GET_HIGH_WORD(hx,x);
|
||||||
|
ix = hx&0x7fffffff;
|
||||||
|
if(ix>=0x7ff00000) { /* erfc(nan)=nan */
|
||||||
|
/* erfc(+-inf)=0,2 */
|
||||||
|
return (double)(((uint32_t)hx>>31)<<1)+one/x;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(ix < 0x3feb0000) { /* |x|<0.84375 */
|
||||||
|
if(ix < 0x3c700000) /* |x|<2**-56 */
|
||||||
|
return one-x;
|
||||||
|
z = x*x;
|
||||||
|
r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
|
||||||
|
s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
|
||||||
|
y = r/s;
|
||||||
|
if(hx < 0x3fd00000) { /* x<1/4 */
|
||||||
|
return one-(x+x*y);
|
||||||
|
} else {
|
||||||
|
r = x*y;
|
||||||
|
r += (x-half);
|
||||||
|
return half - r ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(ix < 0x3ff40000) { /* 0.84375 <= |x| < 1.25 */
|
||||||
|
s = fabs(x)-one;
|
||||||
|
P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
|
||||||
|
Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
|
||||||
|
if(hx>=0) {
|
||||||
|
z = one-erx; return z - P/Q;
|
||||||
|
} else {
|
||||||
|
z = erx+P/Q; return one+z;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ix < 0x403c0000) { /* |x|<28 */
|
||||||
|
x = fabs(x);
|
||||||
|
s = one/(x*x);
|
||||||
|
if(ix< 0x4006DB6D) { /* |x| < 1/.35 ~ 2.857143*/
|
||||||
|
R=ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(ra5+s*(ra6+s*ra7))))));
|
||||||
|
S=one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(sa5+s*(sa6+s*(sa7+
|
||||||
|
s*sa8)))))));
|
||||||
|
} else { /* |x| >= 1/.35 ~ 2.857143 */
|
||||||
|
if(hx<0&&ix>=0x40180000) return two-tiny;/* x < -6 */
|
||||||
|
R=rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(rb5+s*rb6)))));
|
||||||
|
S=one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(sb5+s*(sb6+s*sb7))))));
|
||||||
|
}
|
||||||
|
z = x;
|
||||||
|
SET_LOW_WORD(z,0);
|
||||||
|
r = exp(-z*z-0.5625)*exp((z-x)*(z+x)+R/S);
|
||||||
|
if(hx>0) return r/x; else return two-r/x;
|
||||||
|
} else {
|
||||||
|
if(hx>0) return tiny*tiny; else return two-tiny;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#if LDBL_MANT_DIG == 53
|
||||||
|
__weak_reference(erfc, erfcl);
|
||||||
|
#endif
|
|
@ -1,9 +1,9 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Musl Libc │
|
│ Optimized Routines │
|
||||||
│ Copyright © 2005-2020 Rich Felker, et al. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
@ -25,189 +25,99 @@
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||||
│ │
|
│ │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/math.h"
|
#include "libc/tinymath/arm.internal.h"
|
||||||
__static_yoink("freebsd_libm_notice");
|
__static_yoink("arm_optimized_routines_notice");
|
||||||
__static_yoink("musl_libc_notice");
|
|
||||||
__static_yoink("fdlibm_notice");
|
|
||||||
|
|
||||||
/* origin: FreeBSD /usr/src/lib/msun/src/s_erff.c */
|
#define TwoOverSqrtPiMinusOne 0x1.06eba8p-3f
|
||||||
/*
|
#define A __erff_data.erff_poly_A
|
||||||
* ====================================================
|
#define B __erff_data.erff_poly_B
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define asuint(f) ((union{float _f; uint32_t _i;}){f})._i
|
/* Top 12 bits of a float. */
|
||||||
#define asfloat(i) ((union{uint32_t _i; float _f;}){i})._f
|
static inline uint32_t
|
||||||
|
top12 (float x)
|
||||||
static const float
|
|
||||||
erx = 8.4506291151e-01, /* 0x3f58560b */
|
|
||||||
/*
|
|
||||||
* Coefficients for approximation to erf on [0,0.84375]
|
|
||||||
*/
|
|
||||||
efx8 = 1.0270333290e+00, /* 0x3f8375d4 */
|
|
||||||
pp0 = 1.2837916613e-01, /* 0x3e0375d4 */
|
|
||||||
pp1 = -3.2504209876e-01, /* 0xbea66beb */
|
|
||||||
pp2 = -2.8481749818e-02, /* 0xbce9528f */
|
|
||||||
pp3 = -5.7702702470e-03, /* 0xbbbd1489 */
|
|
||||||
pp4 = -2.3763017452e-05, /* 0xb7c756b1 */
|
|
||||||
qq1 = 3.9791721106e-01, /* 0x3ecbbbce */
|
|
||||||
qq2 = 6.5022252500e-02, /* 0x3d852a63 */
|
|
||||||
qq3 = 5.0813062117e-03, /* 0x3ba68116 */
|
|
||||||
qq4 = 1.3249473704e-04, /* 0x390aee49 */
|
|
||||||
qq5 = -3.9602282413e-06, /* 0xb684e21a */
|
|
||||||
/*
|
|
||||||
* Coefficients for approximation to erf in [0.84375,1.25]
|
|
||||||
*/
|
|
||||||
pa0 = -2.3621185683e-03, /* 0xbb1acdc6 */
|
|
||||||
pa1 = 4.1485610604e-01, /* 0x3ed46805 */
|
|
||||||
pa2 = -3.7220788002e-01, /* 0xbebe9208 */
|
|
||||||
pa3 = 3.1834661961e-01, /* 0x3ea2fe54 */
|
|
||||||
pa4 = -1.1089469492e-01, /* 0xbde31cc2 */
|
|
||||||
pa5 = 3.5478305072e-02, /* 0x3d1151b3 */
|
|
||||||
pa6 = -2.1663755178e-03, /* 0xbb0df9c0 */
|
|
||||||
qa1 = 1.0642088205e-01, /* 0x3dd9f331 */
|
|
||||||
qa2 = 5.4039794207e-01, /* 0x3f0a5785 */
|
|
||||||
qa3 = 7.1828655899e-02, /* 0x3d931ae7 */
|
|
||||||
qa4 = 1.2617121637e-01, /* 0x3e013307 */
|
|
||||||
qa5 = 1.3637083583e-02, /* 0x3c5f6e13 */
|
|
||||||
qa6 = 1.1984500103e-02, /* 0x3c445aa3 */
|
|
||||||
/*
|
|
||||||
* Coefficients for approximation to erfc in [1.25,1/0.35]
|
|
||||||
*/
|
|
||||||
ra0 = -9.8649440333e-03, /* 0xbc21a093 */
|
|
||||||
ra1 = -6.9385856390e-01, /* 0xbf31a0b7 */
|
|
||||||
ra2 = -1.0558626175e+01, /* 0xc128f022 */
|
|
||||||
ra3 = -6.2375331879e+01, /* 0xc2798057 */
|
|
||||||
ra4 = -1.6239666748e+02, /* 0xc322658c */
|
|
||||||
ra5 = -1.8460508728e+02, /* 0xc3389ae7 */
|
|
||||||
ra6 = -8.1287437439e+01, /* 0xc2a2932b */
|
|
||||||
ra7 = -9.8143291473e+00, /* 0xc11d077e */
|
|
||||||
sa1 = 1.9651271820e+01, /* 0x419d35ce */
|
|
||||||
sa2 = 1.3765776062e+02, /* 0x4309a863 */
|
|
||||||
sa3 = 4.3456588745e+02, /* 0x43d9486f */
|
|
||||||
sa4 = 6.4538726807e+02, /* 0x442158c9 */
|
|
||||||
sa5 = 4.2900814819e+02, /* 0x43d6810b */
|
|
||||||
sa6 = 1.0863500214e+02, /* 0x42d9451f */
|
|
||||||
sa7 = 6.5702495575e+00, /* 0x40d23f7c */
|
|
||||||
sa8 = -6.0424413532e-02, /* 0xbd777f97 */
|
|
||||||
/*
|
|
||||||
* Coefficients for approximation to erfc in [1/.35,28]
|
|
||||||
*/
|
|
||||||
rb0 = -9.8649431020e-03, /* 0xbc21a092 */
|
|
||||||
rb1 = -7.9928326607e-01, /* 0xbf4c9dd4 */
|
|
||||||
rb2 = -1.7757955551e+01, /* 0xc18e104b */
|
|
||||||
rb3 = -1.6063638306e+02, /* 0xc320a2ea */
|
|
||||||
rb4 = -6.3756646729e+02, /* 0xc41f6441 */
|
|
||||||
rb5 = -1.0250950928e+03, /* 0xc480230b */
|
|
||||||
rb6 = -4.8351919556e+02, /* 0xc3f1c275 */
|
|
||||||
sb1 = 3.0338060379e+01, /* 0x41f2b459 */
|
|
||||||
sb2 = 3.2579251099e+02, /* 0x43a2e571 */
|
|
||||||
sb3 = 1.5367296143e+03, /* 0x44c01759 */
|
|
||||||
sb4 = 3.1998581543e+03, /* 0x4547fdbb */
|
|
||||||
sb5 = 2.5530502930e+03, /* 0x451f90ce */
|
|
||||||
sb6 = 4.7452853394e+02, /* 0x43ed43a7 */
|
|
||||||
sb7 = -2.2440952301e+01; /* 0xc1b38712 */
|
|
||||||
|
|
||||||
static float erfc1(float x)
|
|
||||||
{
|
{
|
||||||
float_t s,P,Q;
|
return asuint (x) >> 20;
|
||||||
|
|
||||||
s = fabsf(x) - 1;
|
|
||||||
P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
|
|
||||||
Q = 1+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
|
|
||||||
return 1 - erx - P/Q;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static float erfc2(uint32_t ix, float x)
|
/* Efficient implementation of erff
|
||||||
|
using either a pure polynomial approximation or
|
||||||
|
the exponential of a polynomial.
|
||||||
|
Worst-case error is 1.09ulps at 0x1.c111acp-1. */
|
||||||
|
float
|
||||||
|
erff (float x)
|
||||||
{
|
{
|
||||||
float_t s,R,S;
|
float r, x2, u;
|
||||||
float z;
|
|
||||||
|
|
||||||
if (ix < 0x3fa00000) /* |x| < 1.25 */
|
/* Get top word. */
|
||||||
return erfc1(x);
|
uint32_t ix = asuint (x);
|
||||||
|
uint32_t sign = ix >> 31;
|
||||||
|
uint32_t ia12 = top12 (x) & 0x7ff;
|
||||||
|
|
||||||
x = fabsf(x);
|
/* Limit of both intervals is 0.875 for performance reasons but coefficients
|
||||||
s = 1/(x*x);
|
computed on [0.0, 0.921875] and [0.921875, 4.0], which brought accuracy
|
||||||
if (ix < 0x4036db6d) { /* |x| < 1/0.35 */
|
from 0.94 to 1.1ulps. */
|
||||||
R = ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(
|
if (ia12 < 0x3f6)
|
||||||
ra5+s*(ra6+s*ra7))))));
|
{ /* a = |x| < 0.875. */
|
||||||
S = 1.0f+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(
|
|
||||||
sa5+s*(sa6+s*(sa7+s*sa8)))))));
|
/* Tiny and subnormal cases. */
|
||||||
} else { /* |x| >= 1/0.35 */
|
if (unlikely (ia12 < 0x318))
|
||||||
R = rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(
|
{ /* |x| < 2^(-28). */
|
||||||
rb5+s*rb6)))));
|
if (unlikely (ia12 < 0x040))
|
||||||
S = 1.0f+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(
|
{ /* |x| < 2^(-119). */
|
||||||
sb5+s*(sb6+s*sb7))))));
|
float y = fmaf (TwoOverSqrtPiMinusOne, x, x);
|
||||||
|
return check_uflowf (y);
|
||||||
|
}
|
||||||
|
return x + TwoOverSqrtPiMinusOne * x;
|
||||||
}
|
}
|
||||||
ix = asuint(x);
|
|
||||||
z = asfloat(ix&0xffffe000);
|
x2 = x * x;
|
||||||
return expf(-z*z - 0.5625f) * expf((z-x)*(z+x) + R/S)/x;
|
|
||||||
}
|
/* Normalized cases (|x| < 0.921875). Use Horner scheme for x+x*P(x^2). */
|
||||||
|
r = A[5];
|
||||||
float erff(float x)
|
r = fmaf (r, x2, A[4]);
|
||||||
{
|
r = fmaf (r, x2, A[3]);
|
||||||
float r,s,z,y;
|
r = fmaf (r, x2, A[2]);
|
||||||
uint32_t ix;
|
r = fmaf (r, x2, A[1]);
|
||||||
int sign;
|
r = fmaf (r, x2, A[0]);
|
||||||
|
r = fmaf (r, x, x);
|
||||||
ix = asuint(x);
|
}
|
||||||
sign = ix>>31;
|
else if (ia12 < 0x408)
|
||||||
ix &= 0x7fffffff;
|
{ /* |x| < 4.0 - Use a custom Estrin scheme. */
|
||||||
if (ix >= 0x7f800000) {
|
|
||||||
/* erf(nan)=nan, erf(+-inf)=+-1 */
|
float a = fabsf (x);
|
||||||
return 1-2*sign + 1/x;
|
/* Start with Estrin scheme on high order (small magnitude) coefficients. */
|
||||||
}
|
r = fmaf (B[6], a, B[5]);
|
||||||
if (ix < 0x3f580000) { /* |x| < 0.84375 */
|
u = fmaf (B[4], a, B[3]);
|
||||||
if (ix < 0x31800000) { /* |x| < 2**-28 */
|
x2 = x * x;
|
||||||
/*avoid underflow */
|
r = fmaf (r, x2, u);
|
||||||
return 0.125f*(8*x + efx8*x);
|
/* Then switch to pure Horner scheme. */
|
||||||
}
|
r = fmaf (r, a, B[2]);
|
||||||
z = x*x;
|
r = fmaf (r, a, B[1]);
|
||||||
r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
|
r = fmaf (r, a, B[0]);
|
||||||
s = 1+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
|
r = fmaf (r, a, a);
|
||||||
y = r/s;
|
/* Single precision exponential with ~0.5ulps,
|
||||||
return x + x*y;
|
ensures erff has max. rel. error
|
||||||
}
|
< 1ulp on [0.921875, 4.0],
|
||||||
if (ix < 0x40c00000) /* |x| < 6 */
|
< 1.1ulps on [0.875, 4.0]. */
|
||||||
y = 1 - erfc2(ix,x);
|
r = expf (-r);
|
||||||
else
|
/* Explicit copysign (calling copysignf increases latency). */
|
||||||
y = 1 - 0x1p-120f;
|
if (sign)
|
||||||
return sign ? -y : y;
|
r = -1.0f + r;
|
||||||
}
|
else
|
||||||
|
r = 1.0f - r;
|
||||||
float erfcf(float x)
|
}
|
||||||
{
|
else
|
||||||
float r,s,z,y;
|
{ /* |x| >= 4.0. */
|
||||||
uint32_t ix;
|
|
||||||
int sign;
|
/* Special cases : erff(nan)=nan, erff(+inf)=+1 and erff(-inf)=-1. */
|
||||||
|
if (unlikely (ia12 >= 0x7f8))
|
||||||
ix = asuint(x);
|
return (1.f - (float) ((ix >> 31) << 1)) + 1.f / x;
|
||||||
sign = ix>>31;
|
|
||||||
ix &= 0x7fffffff;
|
/* Explicit copysign (calling copysignf increases latency). */
|
||||||
if (ix >= 0x7f800000) {
|
if (sign)
|
||||||
/* erfc(nan)=nan, erfc(+-inf)=0,2 */
|
r = -1.0f;
|
||||||
return 2*sign + 1/x;
|
else
|
||||||
}
|
r = 1.0f;
|
||||||
|
}
|
||||||
if (ix < 0x3f580000) { /* |x| < 0.84375 */
|
return r;
|
||||||
if (ix < 0x23800000) /* |x| < 2**-56 */
|
|
||||||
return 1.0f - x;
|
|
||||||
z = x*x;
|
|
||||||
r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
|
|
||||||
s = 1.0f+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
|
|
||||||
y = r/s;
|
|
||||||
if (sign || ix < 0x3e800000) /* x < 1/4 */
|
|
||||||
return 1.0f - (x+x*y);
|
|
||||||
return 0.5f - (x - 0.5f + x*y);
|
|
||||||
}
|
|
||||||
if (ix < 0x41e00000) { /* |x| < 28 */
|
|
||||||
return sign ? 2 - erfc2(ix,x) : erfc2(ix,x);
|
|
||||||
}
|
|
||||||
return sign ? 2 - 0x1p-120f : 0x1p-120f*0x1p-120f;
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||||
│ vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8 :vi │
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ Optimized Routines │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
@ -25,12 +25,19 @@
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||||
│ │
|
│ │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/tinymath/log1pf_data.internal.h"
|
#include "libc/tinymath/arm.internal.h"
|
||||||
__static_yoink("arm_optimized_routines_notice");
|
__static_yoink("arm_optimized_routines_notice");
|
||||||
|
|
||||||
/* Polynomial coefficients generated using floating-point minimax
|
/* Minimax approximation of erff. */
|
||||||
algorithm, see tools/log1pf.sollya for details. */
|
const struct erff_data __erff_data = {
|
||||||
const struct log1pf_data __log1pf_data
|
.erff_poly_A = {
|
||||||
= {.coeffs = {-0x1p-1f, 0x1.5555aap-2f, -0x1.000038p-2f, 0x1.99675cp-3f,
|
0x1.06eba6p-03f, -0x1.8126e0p-02f, 0x1.ce1a46p-04f,
|
||||||
-0x1.54ef78p-3f, 0x1.28a1f4p-3f, -0x1.0da91p-3f, 0x1.abcb6p-4f,
|
-0x1.b68bd2p-06f, 0x1.473f48p-08f, -0x1.3a1a82p-11f
|
||||||
-0x1.6f0d5ep-5f}};
|
},
|
||||||
|
.erff_poly_B = {
|
||||||
|
0x1.079d0cp-3f, 0x1.450aa0p-1f, 0x1.b55cb0p-4f,
|
||||||
|
-0x1.8d6300p-6f, 0x1.fd1336p-9f, -0x1.91d2ccp-12f,
|
||||||
|
0x1.222900p-16f
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ Optimized Routines │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
@ -25,19 +25,9 @@
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||||
│ │
|
│ │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/intrin/likely.h"
|
#include "libc/tinymath/arm.internal.h"
|
||||||
#include "libc/math.h"
|
|
||||||
#include "libc/tinymath/exp_data.internal.h"
|
|
||||||
#include "libc/tinymath/internal.h"
|
|
||||||
__static_yoink("arm_optimized_routines_notice");
|
__static_yoink("arm_optimized_routines_notice");
|
||||||
|
|
||||||
/*
|
|
||||||
* Double-precision e^x function.
|
|
||||||
*
|
|
||||||
* Copyright (c) 2018, Arm Limited.
|
|
||||||
* SPDX-License-Identifier: MIT
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define N (1 << EXP_TABLE_BITS)
|
#define N (1 << EXP_TABLE_BITS)
|
||||||
#define InvLn2N __exp_data.invln2N
|
#define InvLn2N __exp_data.invln2N
|
||||||
#define NegLn2hiN __exp_data.negln2hiN
|
#define NegLn2hiN __exp_data.negln2hiN
|
||||||
|
@ -48,6 +38,7 @@ __static_yoink("arm_optimized_routines_notice");
|
||||||
#define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
|
#define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
|
||||||
#define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
|
#define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
|
||||||
#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
|
#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
|
||||||
|
#define C6 __exp_data.poly[9 - EXP_POLY_ORDER]
|
||||||
|
|
||||||
/* Handle cases that may overflow or underflow when computing the result that
|
/* Handle cases that may overflow or underflow when computing the result that
|
||||||
is scale*(1+TMP) without intermediate rounding. The bit representation of
|
is scale*(1+TMP) without intermediate rounding. The bit representation of
|
||||||
|
@ -56,114 +47,154 @@ __static_yoink("arm_optimized_routines_notice");
|
||||||
a double. (int32_t)KI is the k used in the argument reduction and exponent
|
a double. (int32_t)KI is the k used in the argument reduction and exponent
|
||||||
adjustment of scale, positive k here means the result may overflow and
|
adjustment of scale, positive k here means the result may overflow and
|
||||||
negative k means the result may underflow. */
|
negative k means the result may underflow. */
|
||||||
static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
|
static inline double
|
||||||
|
specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
|
||||||
{
|
{
|
||||||
double_t scale, y;
|
double_t scale, y;
|
||||||
|
|
||||||
if ((ki & 0x80000000) == 0) {
|
if ((ki & 0x80000000) == 0)
|
||||||
/* k > 0, the exponent of scale might have overflowed by <= 460. */
|
{
|
||||||
sbits -= 1009ull << 52;
|
/* k > 0, the exponent of scale might have overflowed by <= 460. */
|
||||||
scale = asdouble(sbits);
|
sbits -= 1009ull << 52;
|
||||||
y = 0x1p1009 * (scale + scale * tmp);
|
scale = asdouble (sbits);
|
||||||
return eval_as_double(y);
|
y = 0x1p1009 * (scale + scale * tmp);
|
||||||
}
|
return check_oflow (eval_as_double (y));
|
||||||
/* k < 0, need special care in the subnormal range. */
|
}
|
||||||
sbits += 1022ull << 52;
|
/* k < 0, need special care in the subnormal range. */
|
||||||
scale = asdouble(sbits);
|
sbits += 1022ull << 52;
|
||||||
y = scale + scale * tmp;
|
scale = asdouble (sbits);
|
||||||
if (y < 1.0) {
|
y = scale + scale * tmp;
|
||||||
/* Round y to the right precision before scaling it into the subnormal
|
if (y < 1.0)
|
||||||
range to avoid double rounding that can cause 0.5+E/2 ulp error where
|
{
|
||||||
E is the worst-case ulp error outside the subnormal range. So this
|
/* Round y to the right precision before scaling it into the subnormal
|
||||||
is only useful if the goal is better than 1 ulp worst-case error. */
|
range to avoid double rounding that can cause 0.5+E/2 ulp error where
|
||||||
double_t hi, lo;
|
E is the worst-case ulp error outside the subnormal range. So this
|
||||||
lo = scale - y + scale * tmp;
|
is only useful if the goal is better than 1 ulp worst-case error. */
|
||||||
hi = 1.0 + y;
|
double_t hi, lo;
|
||||||
lo = 1.0 - hi + y + lo;
|
lo = scale - y + scale * tmp;
|
||||||
y = eval_as_double(hi + lo) - 1.0;
|
hi = 1.0 + y;
|
||||||
/* Avoid -0.0 with downward rounding. */
|
lo = 1.0 - hi + y + lo;
|
||||||
if (WANT_ROUNDING && y == 0.0)
|
y = eval_as_double (hi + lo) - 1.0;
|
||||||
y = 0.0;
|
/* Avoid -0.0 with downward rounding. */
|
||||||
/* The underflow exception needs to be signaled explicitly. */
|
if (WANT_ROUNDING && y == 0.0)
|
||||||
fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);
|
y = 0.0;
|
||||||
}
|
/* The underflow exception needs to be signaled explicitly. */
|
||||||
y = 0x1p-1022 * y;
|
force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
|
||||||
return eval_as_double(y);
|
}
|
||||||
|
y = 0x1p-1022 * y;
|
||||||
|
return check_uflow (eval_as_double (y));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Top 12 bits of a double (sign and exponent bits). */
|
/* Top 12 bits of a double (sign and exponent bits). */
|
||||||
static inline uint32_t top12(double x)
|
static inline uint32_t
|
||||||
|
top12 (double x)
|
||||||
{
|
{
|
||||||
return asuint64(x) >> 52;
|
return asuint64 (x) >> 52;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Computes exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
|
||||||
|
If hastail is 0 then xtail is assumed to be 0 too. */
|
||||||
|
static inline double
|
||||||
|
exp_inline (double x, double xtail, int hastail)
|
||||||
|
{
|
||||||
|
uint32_t abstop;
|
||||||
|
uint64_t ki, idx, top, sbits;
|
||||||
|
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
|
||||||
|
double_t kd, z, r, r2, scale, tail, tmp;
|
||||||
|
|
||||||
|
abstop = top12 (x) & 0x7ff;
|
||||||
|
if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
|
||||||
|
{
|
||||||
|
if (abstop - top12 (0x1p-54) >= 0x80000000)
|
||||||
|
/* Avoid spurious underflow for tiny x. */
|
||||||
|
/* Note: 0 is common input. */
|
||||||
|
return WANT_ROUNDING ? 1.0 + x : 1.0;
|
||||||
|
if (abstop >= top12 (1024.0))
|
||||||
|
{
|
||||||
|
if (asuint64 (x) == asuint64 (-INFINITY))
|
||||||
|
return 0.0;
|
||||||
|
if (abstop >= top12 (INFINITY))
|
||||||
|
return 1.0 + x;
|
||||||
|
if (asuint64 (x) >> 63)
|
||||||
|
return __math_uflow (0);
|
||||||
|
else
|
||||||
|
return __math_oflow (0);
|
||||||
|
}
|
||||||
|
/* Large x is special cased below. */
|
||||||
|
abstop = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
|
||||||
|
/* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
|
||||||
|
z = InvLn2N * x;
|
||||||
|
#if TOINT_INTRINSICS
|
||||||
|
kd = roundtoint (z);
|
||||||
|
ki = converttoint (z);
|
||||||
|
#elif EXP_USE_TOINT_NARROW
|
||||||
|
/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
|
||||||
|
kd = eval_as_double (z + Shift);
|
||||||
|
ki = asuint64 (kd) >> 16;
|
||||||
|
kd = (double_t) (int32_t) ki;
|
||||||
|
#else
|
||||||
|
/* z - kd is in [-1, 1] in non-nearest rounding modes. */
|
||||||
|
kd = eval_as_double (z + Shift);
|
||||||
|
ki = asuint64 (kd);
|
||||||
|
kd -= Shift;
|
||||||
|
#endif
|
||||||
|
r = x + kd * NegLn2hiN + kd * NegLn2loN;
|
||||||
|
/* The code assumes 2^-200 < |xtail| < 2^-8/N. */
|
||||||
|
if (hastail)
|
||||||
|
r += xtail;
|
||||||
|
/* 2^(k/N) ~= scale * (1 + tail). */
|
||||||
|
idx = 2 * (ki % N);
|
||||||
|
top = ki << (52 - EXP_TABLE_BITS);
|
||||||
|
tail = asdouble (T[idx]);
|
||||||
|
/* This is only a valid scale when -1023*N < k < 1024*N. */
|
||||||
|
sbits = T[idx + 1] + top;
|
||||||
|
/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
|
||||||
|
/* Evaluation is optimized assuming superscalar pipelined execution. */
|
||||||
|
r2 = r * r;
|
||||||
|
/* Without fma the worst case error is 0.25/N ulp larger. */
|
||||||
|
/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
|
||||||
|
#if EXP_POLY_ORDER == 4
|
||||||
|
tmp = tail + r + r2 * C2 + r * r2 * (C3 + r * C4);
|
||||||
|
#elif EXP_POLY_ORDER == 5
|
||||||
|
tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
|
||||||
|
#elif EXP_POLY_ORDER == 6
|
||||||
|
tmp = tail + r + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
|
||||||
|
#endif
|
||||||
|
if (unlikely (abstop == 0))
|
||||||
|
return specialcase (tmp, sbits, ki);
|
||||||
|
scale = asdouble (sbits);
|
||||||
|
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
|
||||||
|
is no spurious underflow here even without fma. */
|
||||||
|
return eval_as_double (scale + scale * tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns 𝑒^x.
|
* Returns 𝑒^x.
|
||||||
|
*
|
||||||
|
* @raise ERANGE on overflow or underflow
|
||||||
*/
|
*/
|
||||||
double exp(double x)
|
double
|
||||||
|
exp (double x)
|
||||||
{
|
{
|
||||||
uint32_t abstop;
|
return exp_inline (x, 0, 0);
|
||||||
uint64_t ki, idx, top, sbits;
|
|
||||||
double_t kd, z, r, r2, scale, tail, tmp;
|
|
||||||
|
|
||||||
abstop = top12(x) & 0x7ff;
|
|
||||||
if (UNLIKELY(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) {
|
|
||||||
if (abstop - top12(0x1p-54) >= 0x80000000)
|
|
||||||
/* Avoid spurious underflow for tiny x. */
|
|
||||||
/* Note: 0 is common input. */
|
|
||||||
return WANT_ROUNDING ? 1.0 + x : 1.0;
|
|
||||||
if (abstop >= top12(1024.0)) {
|
|
||||||
if (asuint64(x) == asuint64(-INFINITY))
|
|
||||||
return 0.0;
|
|
||||||
if (abstop >= top12(INFINITY))
|
|
||||||
return 1.0 + x;
|
|
||||||
if (asuint64(x) >> 63)
|
|
||||||
return __math_uflow(0);
|
|
||||||
else
|
|
||||||
return __math_oflow(0);
|
|
||||||
}
|
|
||||||
/* Large x is special cased below. */
|
|
||||||
abstop = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
|
|
||||||
/* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
|
|
||||||
z = InvLn2N * x;
|
|
||||||
#if TOINT_INTRINSICS
|
|
||||||
kd = roundtoint(z);
|
|
||||||
ki = converttoint(z);
|
|
||||||
#elif EXP_USE_TOINT_NARROW
|
|
||||||
/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
|
|
||||||
kd = eval_as_double(z + Shift);
|
|
||||||
ki = asuint64(kd) >> 16;
|
|
||||||
kd = (double_t)(int32_t)ki;
|
|
||||||
#else
|
|
||||||
/* z - kd is in [-1, 1] in non-nearest rounding modes. */
|
|
||||||
kd = eval_as_double(z + Shift);
|
|
||||||
ki = asuint64(kd);
|
|
||||||
kd -= Shift;
|
|
||||||
#endif
|
|
||||||
r = x + kd * NegLn2hiN + kd * NegLn2loN;
|
|
||||||
/* 2^(k/N) ~= scale * (1 + tail). */
|
|
||||||
idx = 2 * (ki % N);
|
|
||||||
top = ki << (52 - EXP_TABLE_BITS);
|
|
||||||
tail = asdouble(T[idx]);
|
|
||||||
/* This is only a valid scale when -1023*N < k < 1024*N. */
|
|
||||||
sbits = T[idx + 1] + top;
|
|
||||||
/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
|
|
||||||
/* Evaluation is optimized assuming superscalar pipelined execution. */
|
|
||||||
r2 = r * r;
|
|
||||||
/* Without fma the worst case error is 0.25/N ulp larger. */
|
|
||||||
/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
|
|
||||||
tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
|
|
||||||
if (UNLIKELY(abstop == 0))
|
|
||||||
return specialcase(tmp, sbits, ki);
|
|
||||||
scale = asdouble(sbits);
|
|
||||||
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
|
|
||||||
is no spurious underflow here even without fma. */
|
|
||||||
return eval_as_double(scale + scale * tmp);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
|
/* May be useful for implementing pow where more than double
|
||||||
__weak_reference(exp, expl);
|
precision input is needed. */
|
||||||
|
double
|
||||||
|
__exp_dd (double x, double xtail)
|
||||||
|
{
|
||||||
|
return exp_inline (x, xtail, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if USE_GLIBC_ABI
|
||||||
|
strong_alias (exp, __exp_finite)
|
||||||
|
hidden_alias (exp, __ieee754_exp)
|
||||||
|
hidden_alias (__exp_dd, __exp1)
|
||||||
|
# if LDBL_MANT_DIG == 53
|
||||||
|
long double expl (long double x) { return exp (x); }
|
||||||
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Musl Libc │
|
│ Optimized Routines │
|
||||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
@ -25,33 +25,135 @@
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||||
│ │
|
│ │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/math.h"
|
#include "libc/tinymath/arm.internal.h"
|
||||||
__static_yoink("musl_libc_notice");
|
__static_yoink("arm_optimized_routines_notice");
|
||||||
|
|
||||||
|
#define N (1 << EXP_TABLE_BITS)
|
||||||
|
#define IndexMask (N - 1)
|
||||||
|
#define OFlowBound 0x1.34413509f79ffp8 /* log10(DBL_MAX). */
|
||||||
|
#define UFlowBound -0x1.5ep+8 /* -350. */
|
||||||
|
#define SmallTop 0x3c6 /* top12(0x1p-57). */
|
||||||
|
#define BigTop 0x407 /* top12(0x1p8). */
|
||||||
|
#define Thresh 0x41 /* BigTop - SmallTop. */
|
||||||
|
#define Shift __exp_data.shift
|
||||||
|
#define C(i) __exp_data.exp10_poly[i]
|
||||||
|
|
||||||
|
static double
|
||||||
|
special_case (uint64_t sbits, double_t tmp, uint64_t ki)
|
||||||
|
{
|
||||||
|
double_t scale, y;
|
||||||
|
|
||||||
|
if (ki - (1ull << 16) < 0x80000000)
|
||||||
|
{
|
||||||
|
/* The exponent of scale might have overflowed by 1. */
|
||||||
|
sbits -= 1ull << 52;
|
||||||
|
scale = asdouble (sbits);
|
||||||
|
y = 2 * (scale + scale * tmp);
|
||||||
|
return check_oflow (eval_as_double (y));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* n < 0, need special care in the subnormal range. */
|
||||||
|
sbits += 1022ull << 52;
|
||||||
|
scale = asdouble (sbits);
|
||||||
|
y = scale + scale * tmp;
|
||||||
|
|
||||||
|
if (y < 1.0)
|
||||||
|
{
|
||||||
|
/* Round y to the right precision before scaling it into the subnormal
|
||||||
|
range to avoid double rounding that can cause 0.5+E/2 ulp error where
|
||||||
|
E is the worst-case ulp error outside the subnormal range. So this
|
||||||
|
is only useful if the goal is better than 1 ulp worst-case error. */
|
||||||
|
double_t lo = scale - y + scale * tmp;
|
||||||
|
double_t hi = 1.0 + y;
|
||||||
|
lo = 1.0 - hi + y + lo;
|
||||||
|
y = eval_as_double (hi + lo) - 1.0;
|
||||||
|
/* Avoid -0.0 with downward rounding. */
|
||||||
|
if (WANT_ROUNDING && y == 0.0)
|
||||||
|
y = 0.0;
|
||||||
|
/* The underflow exception needs to be signaled explicitly. */
|
||||||
|
force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
|
||||||
|
}
|
||||||
|
y = 0x1p-1022 * y;
|
||||||
|
|
||||||
|
return check_uflow (y);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns 10ˣ.
|
* Returns 10ˣ.
|
||||||
|
*
|
||||||
|
* The largest observed error is ~0.513 ULP.
|
||||||
*/
|
*/
|
||||||
double exp10(double x)
|
double
|
||||||
|
exp10 (double x)
|
||||||
{
|
{
|
||||||
static const double p10[] = {
|
uint64_t ix = asuint64 (x);
|
||||||
1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10,
|
uint32_t abstop = (ix >> 52) & 0x7ff;
|
||||||
1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1,
|
|
||||||
1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
|
if (unlikely (abstop - SmallTop >= Thresh))
|
||||||
1e10, 1e11, 1e12, 1e13, 1e14, 1e15
|
{
|
||||||
};
|
if (abstop - SmallTop >= 0x80000000)
|
||||||
double n, y = modf(x, &n);
|
/* Avoid spurious underflow for tiny x.
|
||||||
union {double f; uint64_t i;} u = {n};
|
Note: 0 is common input. */
|
||||||
/* fabs(n) < 16 without raising invalid on nan */
|
return x + 1;
|
||||||
if ((u.i>>52 & 0x7ff) < 0x3ff+4) {
|
if (abstop == 0x7ff)
|
||||||
if (!y) return p10[(int)n+15];
|
return ix == asuint64 (-INFINITY) ? 0.0 : x + 1.0;
|
||||||
y = exp2(3.32192809488736234787031942948939 * y);
|
if (x >= OFlowBound)
|
||||||
return y * p10[(int)n+15];
|
return __math_oflow (0);
|
||||||
}
|
if (x < UFlowBound)
|
||||||
return pow(10.0, x);
|
return __math_uflow (0);
|
||||||
|
|
||||||
|
/* Large x is special-cased below. */
|
||||||
|
abstop = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Reduce x: z = x * N / log10(2), k = round(z). */
|
||||||
|
double_t z = __exp_data.invlog10_2N * x;
|
||||||
|
double_t kd;
|
||||||
|
int64_t ki;
|
||||||
|
#if TOINT_INTRINSICS
|
||||||
|
kd = roundtoint (z);
|
||||||
|
ki = converttoint (z);
|
||||||
|
#else
|
||||||
|
kd = eval_as_double (z + Shift);
|
||||||
|
kd -= Shift;
|
||||||
|
ki = kd;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* r = x - k * log10(2), r in [-0.5, 0.5]. */
|
||||||
|
double_t r = x;
|
||||||
|
r = __exp_data.neglog10_2hiN * kd + r;
|
||||||
|
r = __exp_data.neglog10_2loN * kd + r;
|
||||||
|
|
||||||
|
/* exp10(x) = 2^(k/N) * 2^(r/N).
|
||||||
|
Approximate the two components separately. */
|
||||||
|
|
||||||
|
/* s = 2^(k/N), using lookup table. */
|
||||||
|
uint64_t e = ki << (52 - EXP_TABLE_BITS);
|
||||||
|
uint64_t i = (ki & IndexMask) * 2;
|
||||||
|
uint64_t u = __exp_data.tab[i + 1];
|
||||||
|
uint64_t sbits = u + e;
|
||||||
|
|
||||||
|
double_t tail = asdouble (__exp_data.tab[i]);
|
||||||
|
|
||||||
|
/* 2^(r/N) ~= 1 + r * Poly(r). */
|
||||||
|
double_t r2 = r * r;
|
||||||
|
double_t p = C (0) + r * C (1);
|
||||||
|
double_t y = C (2) + r * C (3);
|
||||||
|
y = y + r2 * C (4);
|
||||||
|
y = p + r2 * y;
|
||||||
|
y = tail + y * r;
|
||||||
|
|
||||||
|
if (unlikely (abstop == 0))
|
||||||
|
return special_case (sbits, y, ki);
|
||||||
|
|
||||||
|
/* Assemble components:
|
||||||
|
y = 2^(r/N) * 2^(k/N)
|
||||||
|
~= (y + 1) * s. */
|
||||||
|
double_t s = asdouble (sbits);
|
||||||
|
return eval_as_double (s * y + s);
|
||||||
}
|
}
|
||||||
|
|
||||||
__strong_reference(exp10, pow10);
|
__strong_reference(exp10, pow10);
|
||||||
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
|
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
|
||||||
__weak_reference(exp10, exp10l);
|
__weak_reference(exp10, exp10l);
|
||||||
__weak_reference(exp10, pow10l);
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Musl Libc │
|
│ Optimized Routines │
|
||||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
@ -25,19 +25,9 @@
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||||
│ │
|
│ │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/intrin/likely.h"
|
#include "libc/tinymath/arm.internal.h"
|
||||||
#include "libc/math.h"
|
|
||||||
#include "libc/tinymath/exp_data.internal.h"
|
|
||||||
#include "libc/tinymath/internal.h"
|
|
||||||
__static_yoink("arm_optimized_routines_notice");
|
__static_yoink("arm_optimized_routines_notice");
|
||||||
|
|
||||||
/*
|
|
||||||
* Double-precision 2^x function.
|
|
||||||
*
|
|
||||||
* Copyright (c) 2018, Arm Limited.
|
|
||||||
* SPDX-License-Identifier: MIT
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define N (1 << EXP_TABLE_BITS)
|
#define N (1 << EXP_TABLE_BITS)
|
||||||
#define Shift __exp_data.exp2_shift
|
#define Shift __exp_data.exp2_shift
|
||||||
#define T __exp_data.tab
|
#define T __exp_data.tab
|
||||||
|
@ -46,6 +36,7 @@ __static_yoink("arm_optimized_routines_notice");
|
||||||
#define C3 __exp_data.exp2_poly[2]
|
#define C3 __exp_data.exp2_poly[2]
|
||||||
#define C4 __exp_data.exp2_poly[3]
|
#define C4 __exp_data.exp2_poly[3]
|
||||||
#define C5 __exp_data.exp2_poly[4]
|
#define C5 __exp_data.exp2_poly[4]
|
||||||
|
#define C6 __exp_data.exp2_poly[5]
|
||||||
|
|
||||||
/* Handle cases that may overflow or underflow when computing the result that
|
/* Handle cases that may overflow or underflow when computing the result that
|
||||||
is scale*(1+TMP) without intermediate rounding. The bit representation of
|
is scale*(1+TMP) without intermediate rounding. The bit representation of
|
||||||
|
@ -54,103 +45,121 @@ __static_yoink("arm_optimized_routines_notice");
|
||||||
a double. (int32_t)KI is the k used in the argument reduction and exponent
|
a double. (int32_t)KI is the k used in the argument reduction and exponent
|
||||||
adjustment of scale, positive k here means the result may overflow and
|
adjustment of scale, positive k here means the result may overflow and
|
||||||
negative k means the result may underflow. */
|
negative k means the result may underflow. */
|
||||||
static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
|
static inline double
|
||||||
|
specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
|
||||||
{
|
{
|
||||||
double_t scale, y;
|
double_t scale, y;
|
||||||
|
|
||||||
if ((ki & 0x80000000) == 0) {
|
if ((ki & 0x80000000) == 0)
|
||||||
/* k > 0, the exponent of scale might have overflowed by 1. */
|
{
|
||||||
sbits -= 1ull << 52;
|
/* k > 0, the exponent of scale might have overflowed by 1. */
|
||||||
scale = asdouble(sbits);
|
sbits -= 1ull << 52;
|
||||||
y = 2 * (scale + scale * tmp);
|
scale = asdouble (sbits);
|
||||||
return eval_as_double(y);
|
y = 2 * (scale + scale * tmp);
|
||||||
}
|
return check_oflow (eval_as_double (y));
|
||||||
/* k < 0, need special care in the subnormal range. */
|
}
|
||||||
sbits += 1022ull << 52;
|
/* k < 0, need special care in the subnormal range. */
|
||||||
scale = asdouble(sbits);
|
sbits += 1022ull << 52;
|
||||||
y = scale + scale * tmp;
|
scale = asdouble (sbits);
|
||||||
if (y < 1.0) {
|
y = scale + scale * tmp;
|
||||||
/* Round y to the right precision before scaling it into the subnormal
|
if (y < 1.0)
|
||||||
range to avoid double rounding that can cause 0.5+E/2 ulp error where
|
{
|
||||||
E is the worst-case ulp error outside the subnormal range. So this
|
/* Round y to the right precision before scaling it into the subnormal
|
||||||
is only useful if the goal is better than 1 ulp worst-case error. */
|
range to avoid double rounding that can cause 0.5+E/2 ulp error where
|
||||||
double_t hi, lo;
|
E is the worst-case ulp error outside the subnormal range. So this
|
||||||
lo = scale - y + scale * tmp;
|
is only useful if the goal is better than 1 ulp worst-case error. */
|
||||||
hi = 1.0 + y;
|
double_t hi, lo;
|
||||||
lo = 1.0 - hi + y + lo;
|
lo = scale - y + scale * tmp;
|
||||||
y = eval_as_double(hi + lo) - 1.0;
|
hi = 1.0 + y;
|
||||||
/* Avoid -0.0 with downward rounding. */
|
lo = 1.0 - hi + y + lo;
|
||||||
if (WANT_ROUNDING && y == 0.0)
|
y = eval_as_double (hi + lo) - 1.0;
|
||||||
y = 0.0;
|
/* Avoid -0.0 with downward rounding. */
|
||||||
/* The underflow exception needs to be signaled explicitly. */
|
if (WANT_ROUNDING && y == 0.0)
|
||||||
fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);
|
y = 0.0;
|
||||||
}
|
/* The underflow exception needs to be signaled explicitly. */
|
||||||
y = 0x1p-1022 * y;
|
force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
|
||||||
return eval_as_double(y);
|
}
|
||||||
|
y = 0x1p-1022 * y;
|
||||||
|
return check_uflow (eval_as_double (y));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Top 12 bits of a double (sign and exponent bits). */
|
/* Top 12 bits of a double (sign and exponent bits). */
|
||||||
static inline uint32_t top12(double x)
|
static inline uint32_t
|
||||||
|
top12 (double x)
|
||||||
{
|
{
|
||||||
return asuint64(x) >> 52;
|
return asuint64 (x) >> 52;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns 2^𝑥.
|
* Returns 2^𝑥.
|
||||||
*/
|
*/
|
||||||
double exp2(double x)
|
double
|
||||||
|
exp2 (double x)
|
||||||
{
|
{
|
||||||
uint32_t abstop;
|
uint32_t abstop;
|
||||||
uint64_t ki, idx, top, sbits;
|
uint64_t ki, idx, top, sbits;
|
||||||
double_t kd, r, r2, scale, tail, tmp;
|
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
|
||||||
|
double_t kd, r, r2, scale, tail, tmp;
|
||||||
|
|
||||||
abstop = top12(x) & 0x7ff;
|
abstop = top12 (x) & 0x7ff;
|
||||||
if (UNLIKELY(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) {
|
if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
|
||||||
if (abstop - top12(0x1p-54) >= 0x80000000)
|
{
|
||||||
/* Avoid spurious underflow for tiny x. */
|
if (abstop - top12 (0x1p-54) >= 0x80000000)
|
||||||
/* Note: 0 is common input. */
|
/* Avoid spurious underflow for tiny x. */
|
||||||
return WANT_ROUNDING ? 1.0 + x : 1.0;
|
/* Note: 0 is common input. */
|
||||||
if (abstop >= top12(1024.0)) {
|
return WANT_ROUNDING ? 1.0 + x : 1.0;
|
||||||
if (asuint64(x) == asuint64(-INFINITY))
|
if (abstop >= top12 (1024.0))
|
||||||
return 0.0;
|
{
|
||||||
if (abstop >= top12(INFINITY))
|
if (asuint64 (x) == asuint64 (-INFINITY))
|
||||||
return 1.0 + x;
|
return 0.0;
|
||||||
if (!(asuint64(x) >> 63))
|
if (abstop >= top12 (INFINITY))
|
||||||
return __math_oflow(0);
|
return 1.0 + x;
|
||||||
else if (asuint64(x) >= asuint64(-1075.0))
|
if (!(asuint64 (x) >> 63))
|
||||||
return __math_uflow(0);
|
return __math_oflow (0);
|
||||||
}
|
else if (asuint64 (x) >= asuint64 (-1075.0))
|
||||||
if (2 * asuint64(x) > 2 * asuint64(928.0))
|
return __math_uflow (0);
|
||||||
/* Large x is special cased below. */
|
|
||||||
abstop = 0;
|
|
||||||
}
|
}
|
||||||
|
if (2 * asuint64 (x) > 2 * asuint64 (928.0))
|
||||||
|
/* Large x is special cased below. */
|
||||||
|
abstop = 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)]. */
|
/* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)]. */
|
||||||
/* x = k/N + r, with int k and r in [-1/2N, 1/2N]. */
|
/* x = k/N + r, with int k and r in [-1/2N, 1/2N]. */
|
||||||
kd = eval_as_double(x + Shift);
|
kd = eval_as_double (x + Shift);
|
||||||
ki = asuint64(kd); /* k. */
|
ki = asuint64 (kd); /* k. */
|
||||||
kd -= Shift; /* k/N for int k. */
|
kd -= Shift; /* k/N for int k. */
|
||||||
r = x - kd;
|
r = x - kd;
|
||||||
/* 2^(k/N) ~= scale * (1 + tail). */
|
/* 2^(k/N) ~= scale * (1 + tail). */
|
||||||
idx = 2 * (ki % N);
|
idx = 2 * (ki % N);
|
||||||
top = ki << (52 - EXP_TABLE_BITS);
|
top = ki << (52 - EXP_TABLE_BITS);
|
||||||
tail = asdouble(T[idx]);
|
tail = asdouble (T[idx]);
|
||||||
/* This is only a valid scale when -1023*N < k < 1024*N. */
|
/* This is only a valid scale when -1023*N < k < 1024*N. */
|
||||||
sbits = T[idx + 1] + top;
|
sbits = T[idx + 1] + top;
|
||||||
/* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1). */
|
/* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1). */
|
||||||
/* Evaluation is optimized assuming superscalar pipelined execution. */
|
/* Evaluation is optimized assuming superscalar pipelined execution. */
|
||||||
r2 = r * r;
|
r2 = r * r;
|
||||||
/* Without fma the worst case error is 0.5/N ulp larger. */
|
/* Without fma the worst case error is 0.5/N ulp larger. */
|
||||||
/* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp. */
|
/* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp. */
|
||||||
tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
|
#if EXP2_POLY_ORDER == 4
|
||||||
if (UNLIKELY(abstop == 0))
|
tmp = tail + r * C1 + r2 * C2 + r * r2 * (C3 + r * C4);
|
||||||
return specialcase(tmp, sbits, ki);
|
#elif EXP2_POLY_ORDER == 5
|
||||||
scale = asdouble(sbits);
|
tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
|
||||||
/* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there
|
#elif EXP2_POLY_ORDER == 6
|
||||||
is no spurious underflow here even without fma. */
|
tmp = tail + r * C1 + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
|
||||||
return eval_as_double(scale + scale * tmp);
|
#endif
|
||||||
|
if (unlikely (abstop == 0))
|
||||||
|
return specialcase (tmp, sbits, ki);
|
||||||
|
scale = asdouble (sbits);
|
||||||
|
/* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there
|
||||||
|
is no spurious underflow here even without fma. */
|
||||||
|
return eval_as_double (scale + scale * tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
|
#if USE_GLIBC_ABI
|
||||||
__weak_reference(exp2, exp2l);
|
strong_alias (exp2, __exp2_finite)
|
||||||
|
hidden_alias (exp2, __ieee754_exp2)
|
||||||
|
# if LDBL_MANT_DIG == 53
|
||||||
|
long double exp2l (long double x) { return exp2 (x); }
|
||||||
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ Optimized Routines │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
@ -25,19 +25,9 @@
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||||
│ │
|
│ │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/intrin/likely.h"
|
#include "libc/tinymath/arm.internal.h"
|
||||||
#include "libc/math.h"
|
|
||||||
#include "libc/tinymath/exp2f_data.internal.h"
|
|
||||||
#include "libc/tinymath/internal.h"
|
|
||||||
__static_yoink("arm_optimized_routines_notice");
|
__static_yoink("arm_optimized_routines_notice");
|
||||||
|
|
||||||
/*
|
|
||||||
* Single-precision 2^x function.
|
|
||||||
*
|
|
||||||
* Copyright (c) 2017-2018, Arm Limited.
|
|
||||||
* SPDX-License-Identifier: MIT
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
EXP2F_TABLE_BITS = 5
|
EXP2F_TABLE_BITS = 5
|
||||||
EXP2F_POLY_ORDER = 3
|
EXP2F_POLY_ORDER = 3
|
||||||
|
@ -53,48 +43,66 @@ Non-nearest ULP error: 1 (rounded ULP error)
|
||||||
#define C __exp2f_data.poly
|
#define C __exp2f_data.poly
|
||||||
#define SHIFT __exp2f_data.shift_scaled
|
#define SHIFT __exp2f_data.shift_scaled
|
||||||
|
|
||||||
static inline uint32_t top12(float x)
|
static inline uint32_t
|
||||||
|
top12 (float x)
|
||||||
{
|
{
|
||||||
return asuint(x) >> 20;
|
return asuint (x) >> 20;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns 2^𝑥.
|
* Returns 2^𝑥.
|
||||||
|
*
|
||||||
|
* - ULP error: 0.502 (nearest rounding.)
|
||||||
|
* - Relative error: 1.69 * 2^-34 in [-1/64, 1/64] (before rounding.)
|
||||||
|
* - Wrong count: 168353 (all nearest rounding wrong results with fma.)
|
||||||
|
* - Non-nearest ULP error: 1 (rounded ULP error)
|
||||||
*/
|
*/
|
||||||
float exp2f(float x)
|
float
|
||||||
|
exp2f (float x)
|
||||||
{
|
{
|
||||||
uint32_t abstop;
|
uint32_t abstop;
|
||||||
uint64_t ki, t;
|
uint64_t ki, t;
|
||||||
double_t kd, xd, z, r, r2, y, s;
|
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
|
||||||
|
double_t kd, xd, z, r, r2, y, s;
|
||||||
|
|
||||||
xd = (double_t)x;
|
xd = (double_t) x;
|
||||||
abstop = top12(x) & 0x7ff;
|
abstop = top12 (x) & 0x7ff;
|
||||||
if (UNLIKELY(abstop >= top12(128.0f))) {
|
if (unlikely (abstop >= top12 (128.0f)))
|
||||||
/* |x| >= 128 or x is nan. */
|
{
|
||||||
if (asuint(x) == asuint(-INFINITY))
|
/* |x| >= 128 or x is nan. */
|
||||||
return 0.0f;
|
if (asuint (x) == asuint (-INFINITY))
|
||||||
if (abstop >= top12(INFINITY))
|
return 0.0f;
|
||||||
return x + x;
|
if (abstop >= top12 (INFINITY))
|
||||||
if (x > 0.0f)
|
return x + x;
|
||||||
return __math_oflowf(0);
|
if (x > 0.0f)
|
||||||
if (x <= -150.0f)
|
return __math_oflowf (0);
|
||||||
return __math_uflowf(0);
|
if (x <= -150.0f)
|
||||||
}
|
return __math_uflowf (0);
|
||||||
|
#if WANT_ERRNO_UFLOW
|
||||||
|
if (x < -149.0f)
|
||||||
|
return __math_may_uflowf (0);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
/* x = k/N + r with r in [-1/(2N), 1/(2N)] and int k. */
|
/* x = k/N + r with r in [-1/(2N), 1/(2N)] and int k. */
|
||||||
kd = eval_as_double(xd + SHIFT);
|
kd = eval_as_double (xd + SHIFT);
|
||||||
ki = asuint64(kd);
|
ki = asuint64 (kd);
|
||||||
kd -= SHIFT; /* k/N for int k. */
|
kd -= SHIFT; /* k/N for int k. */
|
||||||
r = xd - kd;
|
r = xd - kd;
|
||||||
|
|
||||||
/* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
|
/* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
|
||||||
t = T[ki % N];
|
t = T[ki % N];
|
||||||
t += ki << (52 - EXP2F_TABLE_BITS);
|
t += ki << (52 - EXP2F_TABLE_BITS);
|
||||||
s = asdouble(t);
|
s = asdouble (t);
|
||||||
z = C[0] * r + C[1];
|
z = C[0] * r + C[1];
|
||||||
r2 = r * r;
|
r2 = r * r;
|
||||||
y = C[2] * r + 1;
|
y = C[2] * r + 1;
|
||||||
y = z * r2 + y;
|
y = z * r2 + y;
|
||||||
y = y * s;
|
y = y * s;
|
||||||
return eval_as_float(y);
|
return eval_as_float (y);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if USE_GLIBC_ABI
|
||||||
|
strong_alias (exp2f, __exp2f_finite)
|
||||||
|
hidden_alias (exp2f, __ieee754_exp2f)
|
||||||
|
#endif
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ Optimized Routines │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
@ -25,16 +25,9 @@
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||||
│ │
|
│ │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/tinymath/exp2f_data.internal.h"
|
#include "libc/tinymath/arm.internal.h"
|
||||||
__static_yoink("arm_optimized_routines_notice");
|
__static_yoink("arm_optimized_routines_notice");
|
||||||
|
|
||||||
/*
|
|
||||||
* Shared data between expf, exp2f and powf.
|
|
||||||
*
|
|
||||||
* Copyright (c) 2017-2018, Arm Limited.
|
|
||||||
* SPDX-License-Identifier: MIT
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define N (1 << EXP2F_TABLE_BITS)
|
#define N (1 << EXP2F_TABLE_BITS)
|
||||||
|
|
||||||
const struct exp2f_data __exp2f_data = {
|
const struct exp2f_data __exp2f_data = {
|
||||||
|
@ -42,6 +35,15 @@ const struct exp2f_data __exp2f_data = {
|
||||||
used for computing 2^(k/N) for an int |k| < 150 N as
|
used for computing 2^(k/N) for an int |k| < 150 N as
|
||||||
double(tab[k%N] + (k << 52-BITS)) */
|
double(tab[k%N] + (k << 52-BITS)) */
|
||||||
.tab = {
|
.tab = {
|
||||||
|
#if N == 8
|
||||||
|
0x3ff0000000000000, 0x3fef72b83c7d517b, 0x3fef06fe0a31b715, 0x3feebfdad5362a27,
|
||||||
|
0x3feea09e667f3bcd, 0x3feeace5422aa0db, 0x3feee89f995ad3ad, 0x3fef5818dcfba487,
|
||||||
|
#elif N == 16
|
||||||
|
0x3ff0000000000000, 0x3fefb5586cf9890f, 0x3fef72b83c7d517b, 0x3fef387a6e756238,
|
||||||
|
0x3fef06fe0a31b715, 0x3feedea64c123422, 0x3feebfdad5362a27, 0x3feeab07dd485429,
|
||||||
|
0x3feea09e667f3bcd, 0x3feea11473eb0187, 0x3feeace5422aa0db, 0x3feec49182a3f090,
|
||||||
|
0x3feee89f995ad3ad, 0x3fef199bdd85529c, 0x3fef5818dcfba487, 0x3fefa4afa2a490da,
|
||||||
|
#elif N == 32
|
||||||
0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51,
|
0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51,
|
||||||
0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1,
|
0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1,
|
||||||
0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
|
0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
|
||||||
|
@ -50,14 +52,48 @@ const struct exp2f_data __exp2f_data = {
|
||||||
0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
|
0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
|
||||||
0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069,
|
0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069,
|
||||||
0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540,
|
0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540,
|
||||||
|
#elif N == 64
|
||||||
|
0x3ff0000000000000, 0x3fefec9a3e778061, 0x3fefd9b0d3158574, 0x3fefc74518759bc8,
|
||||||
|
0x3fefb5586cf9890f, 0x3fefa3ec32d3d1a2, 0x3fef9301d0125b51, 0x3fef829aaea92de0,
|
||||||
|
0x3fef72b83c7d517b, 0x3fef635beb6fcb75, 0x3fef54873168b9aa, 0x3fef463b88628cd6,
|
||||||
|
0x3fef387a6e756238, 0x3fef2b4565e27cdd, 0x3fef1e9df51fdee1, 0x3fef1285a6e4030b,
|
||||||
|
0x3fef06fe0a31b715, 0x3feefc08b26416ff, 0x3feef1a7373aa9cb, 0x3feee7db34e59ff7,
|
||||||
|
0x3feedea64c123422, 0x3feed60a21f72e2a, 0x3feece086061892d, 0x3feec6a2b5c13cd0,
|
||||||
|
0x3feebfdad5362a27, 0x3feeb9b2769d2ca7, 0x3feeb42b569d4f82, 0x3feeaf4736b527da,
|
||||||
|
0x3feeab07dd485429, 0x3feea76f15ad2148, 0x3feea47eb03a5585, 0x3feea23882552225,
|
||||||
|
0x3feea09e667f3bcd, 0x3fee9fb23c651a2f, 0x3fee9f75e8ec5f74, 0x3fee9feb564267c9,
|
||||||
|
0x3feea11473eb0187, 0x3feea2f336cf4e62, 0x3feea589994cce13, 0x3feea8d99b4492ed,
|
||||||
|
0x3feeace5422aa0db, 0x3feeb1ae99157736, 0x3feeb737b0cdc5e5, 0x3feebd829fde4e50,
|
||||||
|
0x3feec49182a3f090, 0x3feecc667b5de565, 0x3feed503b23e255d, 0x3feede6b5579fdbf,
|
||||||
|
0x3feee89f995ad3ad, 0x3feef3a2b84f15fb, 0x3feeff76f2fb5e47, 0x3fef0c1e904bc1d2,
|
||||||
|
0x3fef199bdd85529c, 0x3fef27f12e57d14b, 0x3fef3720dcef9069, 0x3fef472d4a07897c,
|
||||||
|
0x3fef5818dcfba487, 0x3fef69e603db3285, 0x3fef7c97337b9b5f, 0x3fef902ee78b3ff6,
|
||||||
|
0x3fefa4afa2a490da, 0x3fefba1bee615a27, 0x3fefd0765b6e4540, 0x3fefe7c1819e90d8,
|
||||||
|
#endif
|
||||||
},
|
},
|
||||||
.shift_scaled = 0x1.8p+52 / N,
|
.shift_scaled = 0x1.8p+52 / N,
|
||||||
.poly = {
|
.poly = {
|
||||||
|
#if N == 8
|
||||||
|
0x1.c6a00335106e2p-5, 0x1.ec0c313449f55p-3, 0x1.62e431111f69fp-1,
|
||||||
|
#elif N == 16
|
||||||
|
0x1.c6ac6aa313963p-5, 0x1.ebfff4532d9bap-3, 0x1.62e43001bc49fp-1,
|
||||||
|
#elif N == 32
|
||||||
0x1.c6af84b912394p-5, 0x1.ebfce50fac4f3p-3, 0x1.62e42ff0c52d6p-1,
|
0x1.c6af84b912394p-5, 0x1.ebfce50fac4f3p-3, 0x1.62e42ff0c52d6p-1,
|
||||||
|
#elif N == 64
|
||||||
|
0x1.c6b04b4221b2ap-5, 0x1.ebfc213e184d7p-3, 0x1.62e42fefb5b7fp-1,
|
||||||
|
#endif
|
||||||
},
|
},
|
||||||
.shift = 0x1.8p+52,
|
.shift = 0x1.8p+52,
|
||||||
.invln2_scaled = 0x1.71547652b82fep+0 * N,
|
.invln2_scaled = 0x1.71547652b82fep+0 * N,
|
||||||
.poly_scaled = {
|
.poly_scaled = {
|
||||||
|
#if N == 8
|
||||||
|
0x1.c6a00335106e2p-5/N/N/N, 0x1.ec0c313449f55p-3/N/N, 0x1.62e431111f69fp-1/N,
|
||||||
|
#elif N == 16
|
||||||
|
0x1.c6ac6aa313963p-5/N/N/N, 0x1.ebfff4532d9bap-3/N/N, 0x1.62e43001bc49fp-1/N,
|
||||||
|
#elif N == 32
|
||||||
0x1.c6af84b912394p-5/N/N/N, 0x1.ebfce50fac4f3p-3/N/N, 0x1.62e42ff0c52d6p-1/N,
|
0x1.c6af84b912394p-5/N/N/N, 0x1.ebfce50fac4f3p-3/N/N, 0x1.62e42ff0c52d6p-1/N,
|
||||||
|
#elif N == 64
|
||||||
|
0x1.c6b04b4221b2ap-5/N/N/N, 0x1.ebfc213e184d7p-3/N/N, 0x1.62e42fefb5b7fp-1/N,
|
||||||
|
#endif
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
|
@ -1,19 +0,0 @@
|
||||||
#ifndef COSMOPOLITAN_LIBC_TINYMATH_EXP2F_DATA_INTERNAL_H_
|
|
||||||
#define COSMOPOLITAN_LIBC_TINYMATH_EXP2F_DATA_INTERNAL_H_
|
|
||||||
|
|
||||||
#define EXP2F_TABLE_BITS 5
|
|
||||||
#define EXP2F_POLY_ORDER 3
|
|
||||||
|
|
||||||
COSMOPOLITAN_C_START_
|
|
||||||
|
|
||||||
extern const struct exp2f_data {
|
|
||||||
uint64_t tab[1 << EXP2F_TABLE_BITS];
|
|
||||||
double shift_scaled;
|
|
||||||
double poly[EXP2F_POLY_ORDER];
|
|
||||||
double shift;
|
|
||||||
double invln2_scaled;
|
|
||||||
double poly_scaled[EXP2F_POLY_ORDER];
|
|
||||||
} __exp2f_data;
|
|
||||||
|
|
||||||
COSMOPOLITAN_C_END_
|
|
||||||
#endif /* COSMOPOLITAN_LIBC_TINYMATH_EXP2F_DATA_INTERNAL_H_ */
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,23 +0,0 @@
|
||||||
#ifndef COSMOPOLITAN_LIBC_TINYMATH_EXP_DATA_H_
|
|
||||||
#define COSMOPOLITAN_LIBC_TINYMATH_EXP_DATA_H_
|
|
||||||
|
|
||||||
#define EXP_TABLE_BITS 7
|
|
||||||
#define EXP_POLY_ORDER 5
|
|
||||||
#define EXP_USE_TOINT_NARROW 0
|
|
||||||
#define EXP2_POLY_ORDER 5
|
|
||||||
|
|
||||||
COSMOPOLITAN_C_START_
|
|
||||||
|
|
||||||
extern const struct exp_data {
|
|
||||||
double invln2N;
|
|
||||||
double shift;
|
|
||||||
double negln2hiN;
|
|
||||||
double negln2loN;
|
|
||||||
double poly[4]; /* Last four coefficients. */
|
|
||||||
double exp2_shift;
|
|
||||||
double exp2_poly[EXP2_POLY_ORDER];
|
|
||||||
uint64_t tab[2 * (1 << EXP_TABLE_BITS)];
|
|
||||||
} __exp_data;
|
|
||||||
|
|
||||||
COSMOPOLITAN_C_END_
|
|
||||||
#endif /* COSMOPOLITAN_LIBC_TINYMATH_EXP_DATA_H_ */
|
|
|
@ -1,9 +1,9 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ Optimized Routines │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
@ -25,19 +25,9 @@
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||||
│ │
|
│ │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/intrin/likely.h"
|
#include "libc/tinymath/arm.internal.h"
|
||||||
#include "libc/math.h"
|
|
||||||
#include "libc/tinymath/exp2f_data.internal.h"
|
|
||||||
#include "libc/tinymath/internal.h"
|
|
||||||
__static_yoink("arm_optimized_routines_notice");
|
__static_yoink("arm_optimized_routines_notice");
|
||||||
|
|
||||||
/*
|
|
||||||
* Single-precision e^x function.
|
|
||||||
*
|
|
||||||
* Copyright (c) 2017-2018, Arm Limited.
|
|
||||||
* SPDX-License-Identifier: MIT
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
EXP2F_TABLE_BITS = 5
|
EXP2F_TABLE_BITS = 5
|
||||||
EXP2F_POLY_ORDER = 3
|
EXP2F_POLY_ORDER = 3
|
||||||
|
@ -53,59 +43,79 @@ Non-nearest ULP error: 1 (rounded ULP error)
|
||||||
#define T __exp2f_data.tab
|
#define T __exp2f_data.tab
|
||||||
#define C __exp2f_data.poly_scaled
|
#define C __exp2f_data.poly_scaled
|
||||||
|
|
||||||
static inline uint32_t top12(float x)
|
static inline uint32_t
|
||||||
|
top12 (float x)
|
||||||
{
|
{
|
||||||
return asuint(x) >> 20;
|
return asuint (x) >> 20;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns 𝑒^x.
|
* Returns 𝑒^x.
|
||||||
|
*
|
||||||
|
* - ULP error: 0.502 (nearest rounding.)
|
||||||
|
* - Relative error: 1.69 * 2^-34 in [-ln2/64, ln2/64] (before rounding.)
|
||||||
|
* - Wrong count: 170635 (all nearest rounding wrong results with fma.)
|
||||||
|
* - Non-nearest ULP error: 1 (rounded ULP error)
|
||||||
|
*
|
||||||
|
* @raise ERANGE on overflow or underflow
|
||||||
*/
|
*/
|
||||||
float expf(float x)
|
float
|
||||||
|
expf (float x)
|
||||||
{
|
{
|
||||||
uint32_t abstop;
|
uint32_t abstop;
|
||||||
uint64_t ki, t;
|
uint64_t ki, t;
|
||||||
double_t kd, xd, z, r, r2, y, s;
|
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
|
||||||
|
double_t kd, xd, z, r, r2, y, s;
|
||||||
|
|
||||||
xd = (double_t)x;
|
xd = (double_t) x;
|
||||||
abstop = top12(x) & 0x7ff;
|
abstop = top12 (x) & 0x7ff;
|
||||||
if (UNLIKELY(abstop >= top12(88.0f))) {
|
if (unlikely (abstop >= top12 (88.0f)))
|
||||||
/* |x| >= 88 or x is nan. */
|
{
|
||||||
if (asuint(x) == asuint(-INFINITY))
|
/* |x| >= 88 or x is nan. */
|
||||||
return 0.0f;
|
if (asuint (x) == asuint (-INFINITY))
|
||||||
if (abstop >= top12(INFINITY))
|
return 0.0f;
|
||||||
return x + x;
|
if (abstop >= top12 (INFINITY))
|
||||||
if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */
|
return x + x;
|
||||||
return __math_oflowf(0);
|
if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */
|
||||||
if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */
|
return __math_oflowf (0);
|
||||||
return __math_uflowf(0);
|
if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */
|
||||||
}
|
return __math_uflowf (0);
|
||||||
|
#if WANT_ERRNO_UFLOW
|
||||||
|
if (x < -0x1.9d1d9ep6f) /* x < log(0x1p-149) ~= -103.28 */
|
||||||
|
return __math_may_uflowf (0);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
/* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k. */
|
/* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k. */
|
||||||
z = InvLn2N * xd;
|
z = InvLn2N * xd;
|
||||||
|
|
||||||
/* Round and convert z to int, the result is in [-150*N, 128*N] and
|
/* Round and convert z to int, the result is in [-150*N, 128*N] and
|
||||||
ideally ties-to-even rule is used, otherwise the magnitude of r
|
ideally nearest int is used, otherwise the magnitude of r can be
|
||||||
can be bigger which gives larger approximation error. */
|
bigger which gives larger approximation error. */
|
||||||
#if TOINT_INTRINSICS
|
#if TOINT_INTRINSICS
|
||||||
kd = roundtoint(z);
|
kd = roundtoint (z);
|
||||||
ki = converttoint(z);
|
ki = converttoint (z);
|
||||||
#else
|
#else
|
||||||
# define SHIFT __exp2f_data.shift
|
# define SHIFT __exp2f_data.shift
|
||||||
kd = eval_as_double(z + SHIFT);
|
kd = eval_as_double (z + SHIFT);
|
||||||
ki = asuint64(kd);
|
ki = asuint64 (kd);
|
||||||
kd -= SHIFT;
|
kd -= SHIFT;
|
||||||
#endif
|
#endif
|
||||||
r = z - kd;
|
r = z - kd;
|
||||||
|
|
||||||
/* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
|
/* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
|
||||||
t = T[ki % N];
|
t = T[ki % N];
|
||||||
t += ki << (52 - EXP2F_TABLE_BITS);
|
t += ki << (52 - EXP2F_TABLE_BITS);
|
||||||
s = asdouble(t);
|
s = asdouble (t);
|
||||||
z = C[0] * r + C[1];
|
z = C[0] * r + C[1];
|
||||||
r2 = r * r;
|
r2 = r * r;
|
||||||
y = C[2] * r + 1;
|
y = C[2] * r + 1;
|
||||||
y = z * r2 + y;
|
y = z * r2 + y;
|
||||||
y = y * s;
|
y = y * s;
|
||||||
return eval_as_float(y);
|
return eval_as_float (y);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if USE_GLIBC_ABI
|
||||||
|
strong_alias (expf, __expf_finite)
|
||||||
|
hidden_alias (expf, __ieee754_expf)
|
||||||
|
#endif
|
||||||
|
|
|
@ -29,7 +29,6 @@
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/math.h"
|
#include "libc/math.h"
|
||||||
#include "libc/tinymath/freebsd.internal.h"
|
#include "libc/tinymath/freebsd.internal.h"
|
||||||
|
|
||||||
__static_yoink("freebsd_libm_notice");
|
__static_yoink("freebsd_libm_notice");
|
||||||
|
|
||||||
#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
|
#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
|
||||||
|
|
|
@ -28,7 +28,6 @@
|
||||||
#include "libc/math.h"
|
#include "libc/math.h"
|
||||||
__static_yoink("musl_libc_notice");
|
__static_yoink("musl_libc_notice");
|
||||||
|
|
||||||
|
|
||||||
#if FLT_EVAL_METHOD > 1U && LDBL_MANT_DIG == 64
|
#if FLT_EVAL_METHOD > 1U && LDBL_MANT_DIG == 64
|
||||||
#define SPLIT (0x1p32 + 1)
|
#define SPLIT (0x1p32 + 1)
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -28,7 +28,11 @@
|
||||||
#include "libc/math.h"
|
#include "libc/math.h"
|
||||||
__static_yoink("musl_libc_notice");
|
__static_yoink("musl_libc_notice");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns euclidean distance.
|
||||||
|
*
|
||||||
|
* Max observed error is 1 ulp.
|
||||||
|
*/
|
||||||
float hypotf(float x, float y)
|
float hypotf(float x, float y)
|
||||||
{
|
{
|
||||||
union {float f; uint32_t i;} ux = {x}, uy = {y}, ut;
|
union {float f; uint32_t i;} ux = {x}, uy = {y}, ut;
|
||||||
|
|
175
libc/tinymath/hypotf2.c
Normal file
175
libc/tinymath/hypotf2.c
Normal file
|
@ -0,0 +1,175 @@
|
||||||
|
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
||||||
|
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
||||||
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
|
│ │
|
||||||
|
│ FreeBSD lib/msun/src/e_hypotf.c │
|
||||||
|
│ Copyright (c) 1992-2023 The FreeBSD Project. │
|
||||||
|
│ │
|
||||||
|
│ Redistribution and use in source and binary forms, with or without │
|
||||||
|
│ modification, are permitted provided that the following conditions │
|
||||||
|
│ are met: │
|
||||||
|
│ 1. Redistributions of source code must retain the above copyright │
|
||||||
|
│ notice, this list of conditions and the following disclaimer. │
|
||||||
|
│ 2. Redistributions in binary form must reproduce the above copyright │
|
||||||
|
│ notice, this list of conditions and the following disclaimer in the │
|
||||||
|
│ documentation and/or other materials provided with the distribution. │
|
||||||
|
│ │
|
||||||
|
│ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND │
|
||||||
|
│ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE │
|
||||||
|
│ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE │
|
||||||
|
│ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE │
|
||||||
|
│ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL │
|
||||||
|
│ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS │
|
||||||
|
│ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) │
|
||||||
|
│ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT │
|
||||||
|
│ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY │
|
||||||
|
│ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF │
|
||||||
|
│ SUCH DAMAGE. │
|
||||||
|
│ │
|
||||||
|
│ Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. │
|
||||||
|
│ │
|
||||||
|
│ Developed at SunPro, a Sun Microsystems, Inc. business. │
|
||||||
|
│ Permission to use, copy, modify, and distribute this │
|
||||||
|
│ software is freely granted, provided that this notice │
|
||||||
|
│ is preserved. │
|
||||||
|
│ │
|
||||||
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
|
#include "libc/tinymath/freebsd.internal.h"
|
||||||
|
__static_yoink("freebsd_libm_notice");
|
||||||
|
__static_yoink("fdlibm_notice");
|
||||||
|
|
||||||
|
static const float one = 1.0, tiny=1.0e-30;
|
||||||
|
|
||||||
|
float
|
||||||
|
sqrtf2(float x)
|
||||||
|
{
|
||||||
|
float z;
|
||||||
|
int32_t sign = (int)0x80000000;
|
||||||
|
int32_t ix,s,q,m,t,i;
|
||||||
|
uint32_t r;
|
||||||
|
|
||||||
|
GET_FLOAT_WORD(ix,x);
|
||||||
|
|
||||||
|
/* take care of Inf and NaN */
|
||||||
|
if((ix&0x7f800000)==0x7f800000) {
|
||||||
|
return x*x+x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf
|
||||||
|
sqrt(-inf)=sNaN */
|
||||||
|
}
|
||||||
|
/* take care of zero */
|
||||||
|
if(ix<=0) {
|
||||||
|
if((ix&(~sign))==0) return x;/* sqrt(+-0) = +-0 */
|
||||||
|
else if(ix<0)
|
||||||
|
return (x-x)/(x-x); /* sqrt(-ve) = sNaN */
|
||||||
|
}
|
||||||
|
/* normalize x */
|
||||||
|
m = (ix>>23);
|
||||||
|
if(m==0) { /* subnormal x */
|
||||||
|
for(i=0;(ix&0x00800000)==0;i++) ix<<=1;
|
||||||
|
m -= i-1;
|
||||||
|
}
|
||||||
|
m -= 127; /* unbias exponent */
|
||||||
|
ix = (ix&0x007fffff)|0x00800000;
|
||||||
|
if(m&1) /* odd m, double x to make it even */
|
||||||
|
ix += ix;
|
||||||
|
m >>= 1; /* m = [m/2] */
|
||||||
|
|
||||||
|
/* generate sqrt(x) bit by bit */
|
||||||
|
ix += ix;
|
||||||
|
q = s = 0; /* q = sqrt(x) */
|
||||||
|
r = 0x01000000; /* r = moving bit from right to left */
|
||||||
|
|
||||||
|
while(r!=0) {
|
||||||
|
t = s+r;
|
||||||
|
if(t<=ix) {
|
||||||
|
s = t+r;
|
||||||
|
ix -= t;
|
||||||
|
q += r;
|
||||||
|
}
|
||||||
|
ix += ix;
|
||||||
|
r>>=1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* use floating add to find out rounding direction */
|
||||||
|
if(ix!=0) {
|
||||||
|
z = one-tiny; /* trigger inexact flag */
|
||||||
|
if (z>=one) {
|
||||||
|
z = one+tiny;
|
||||||
|
if (z>one)
|
||||||
|
q += 2;
|
||||||
|
else
|
||||||
|
q += (q&1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ix = (q>>1)+0x3f000000;
|
||||||
|
ix += ((uint32_t)m <<23);
|
||||||
|
SET_FLOAT_WORD(z,ix);
|
||||||
|
return z;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns euclidean distance.
|
||||||
|
*
|
||||||
|
* Error is less than 1 ULP.
|
||||||
|
*/
|
||||||
|
float
|
||||||
|
hypotf2(float x, float y)
|
||||||
|
{
|
||||||
|
float a,b,t1,t2,y1,y2,w;
|
||||||
|
int32_t j,k,ha,hb;
|
||||||
|
|
||||||
|
GET_FLOAT_WORD(ha,x);
|
||||||
|
ha &= 0x7fffffff;
|
||||||
|
GET_FLOAT_WORD(hb,y);
|
||||||
|
hb &= 0x7fffffff;
|
||||||
|
if(hb > ha) {a=y;b=x;j=ha; ha=hb;hb=j;} else {a=x;b=y;}
|
||||||
|
a = fabsf(a);
|
||||||
|
b = fabsf(b);
|
||||||
|
if((ha-hb)>0xf000000) {return a+b;} /* x/y > 2**30 */
|
||||||
|
k=0;
|
||||||
|
if(ha > 0x58800000) { /* a>2**50 */
|
||||||
|
if(ha >= 0x7f800000) { /* Inf or NaN */
|
||||||
|
/* Use original arg order iff result is NaN; quieten sNaNs. */
|
||||||
|
w = fabsl(x+0.0L)-fabsf(y+0);
|
||||||
|
if(ha == 0x7f800000) w = a;
|
||||||
|
if(hb == 0x7f800000) w = b;
|
||||||
|
return w;
|
||||||
|
}
|
||||||
|
/* scale a and b by 2**-68 */
|
||||||
|
ha -= 0x22000000; hb -= 0x22000000; k += 68;
|
||||||
|
SET_FLOAT_WORD(a,ha);
|
||||||
|
SET_FLOAT_WORD(b,hb);
|
||||||
|
}
|
||||||
|
if(hb < 0x26800000) { /* b < 2**-50 */
|
||||||
|
if(hb <= 0x007fffff) { /* subnormal b or 0 */
|
||||||
|
if(hb==0) return a;
|
||||||
|
SET_FLOAT_WORD(t1,0x7e800000); /* t1=2^126 */
|
||||||
|
b *= t1;
|
||||||
|
a *= t1;
|
||||||
|
k -= 126;
|
||||||
|
} else { /* scale a and b by 2^68 */
|
||||||
|
ha += 0x22000000; /* a *= 2^68 */
|
||||||
|
hb += 0x22000000; /* b *= 2^68 */
|
||||||
|
k -= 68;
|
||||||
|
SET_FLOAT_WORD(a,ha);
|
||||||
|
SET_FLOAT_WORD(b,hb);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* medium size a and b */
|
||||||
|
w = a-b;
|
||||||
|
if (w>b) {
|
||||||
|
SET_FLOAT_WORD(t1,ha&0xfffff000);
|
||||||
|
t2 = a-t1;
|
||||||
|
w = sqrtf2(t1*t1-(b*(-b)-t2*(a+t1)));
|
||||||
|
} else {
|
||||||
|
a = a+a;
|
||||||
|
SET_FLOAT_WORD(y1,hb&0xfffff000);
|
||||||
|
y2 = b - y1;
|
||||||
|
SET_FLOAT_WORD(t1,(ha+0x00800000)&0xfffff000);
|
||||||
|
t2 = a - t1;
|
||||||
|
w = sqrtf2(t1*y1-(w*(-w)-(t1*y2+t2*b)));
|
||||||
|
}
|
||||||
|
if(k!=0) {
|
||||||
|
SET_FLOAT_WORD(t1,(127+k)<<23);
|
||||||
|
return t1*w;
|
||||||
|
} else return w;
|
||||||
|
}
|
|
@ -30,7 +30,6 @@
|
||||||
#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
|
#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
|
||||||
__static_yoink("musl_libc_notice");
|
__static_yoink("musl_libc_notice");
|
||||||
|
|
||||||
|
|
||||||
#if LDBL_MANT_DIG == 64
|
#if LDBL_MANT_DIG == 64
|
||||||
#define SPLIT (0x1p32L+1)
|
#define SPLIT (0x1p32L+1)
|
||||||
#elif LDBL_MANT_DIG == 113
|
#elif LDBL_MANT_DIG == 113
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ Optimized Routines │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
@ -25,19 +25,9 @@
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||||
│ │
|
│ │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/intrin/likely.h"
|
#include "libc/tinymath/arm.internal.h"
|
||||||
#include "libc/math.h"
|
|
||||||
#include "libc/tinymath/internal.h"
|
|
||||||
#include "libc/tinymath/log_data.internal.h"
|
|
||||||
__static_yoink("arm_optimized_routines_notice");
|
__static_yoink("arm_optimized_routines_notice");
|
||||||
|
|
||||||
/*
|
|
||||||
* Double-precision log(x) function.
|
|
||||||
*
|
|
||||||
* Copyright (c) 2018, Arm Limited.
|
|
||||||
* SPDX-License-Identifier: MIT
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define T __log_data.tab
|
#define T __log_data.tab
|
||||||
#define T2 __log_data.tab2
|
#define T2 __log_data.tab2
|
||||||
#define B __log_data.poly1
|
#define B __log_data.poly1
|
||||||
|
@ -47,95 +37,151 @@ __static_yoink("arm_optimized_routines_notice");
|
||||||
#define N (1 << LOG_TABLE_BITS)
|
#define N (1 << LOG_TABLE_BITS)
|
||||||
#define OFF 0x3fe6000000000000
|
#define OFF 0x3fe6000000000000
|
||||||
|
|
||||||
/**
|
/* Top 16 bits of a double. */
|
||||||
* Returns natural logarithm of 𝑥.
|
static inline uint32_t
|
||||||
*/
|
top16 (double x)
|
||||||
double log(double x)
|
|
||||||
{
|
{
|
||||||
double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
|
return asuint64 (x) >> 48;
|
||||||
uint64_t ix, iz, tmp;
|
|
||||||
uint32_t top;
|
|
||||||
int k, i;
|
|
||||||
|
|
||||||
ix = asuint64(x);
|
|
||||||
top = ix >> 48;
|
|
||||||
#define LO asuint64(1.0 - 0x1p-4)
|
|
||||||
#define HI asuint64(1.0 + 0x1.09p-4)
|
|
||||||
if (UNLIKELY(ix - LO < HI - LO)) {
|
|
||||||
/* Handle close to 1.0 inputs separately. */
|
|
||||||
/* Fix sign of zero with downward rounding when x==1. */
|
|
||||||
if (WANT_ROUNDING && UNLIKELY(ix == asuint64(1.0)))
|
|
||||||
return 0;
|
|
||||||
r = x - 1.0;
|
|
||||||
r2 = r * r;
|
|
||||||
r3 = r * r2;
|
|
||||||
y = r3 *
|
|
||||||
(B[1] + r * B[2] + r2 * B[3] +
|
|
||||||
r3 * (B[4] + r * B[5] + r2 * B[6] +
|
|
||||||
r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
|
|
||||||
/* Worst-case error is around 0.507 ULP. */
|
|
||||||
w = r * 0x1p27;
|
|
||||||
double_t rhi = r + w - w;
|
|
||||||
double_t rlo = r - rhi;
|
|
||||||
w = rhi * rhi * B[0]; /* B[0] == -0.5. */
|
|
||||||
hi = r + w;
|
|
||||||
lo = r - hi + w;
|
|
||||||
lo += B[0] * rlo * (rhi + r);
|
|
||||||
y += lo;
|
|
||||||
y += hi;
|
|
||||||
return eval_as_double(y);
|
|
||||||
}
|
|
||||||
if (UNLIKELY(top - 0x0010 >= 0x7ff0 - 0x0010)) {
|
|
||||||
/* x < 0x1p-1022 or inf or nan. */
|
|
||||||
if (ix * 2 == 0)
|
|
||||||
return __math_divzero(1);
|
|
||||||
if (ix == asuint64(INFINITY)) /* log(inf) == inf. */
|
|
||||||
return x;
|
|
||||||
if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
|
|
||||||
return __math_invalid(x);
|
|
||||||
/* x is subnormal, normalize it. */
|
|
||||||
ix = asuint64(x * 0x1p52);
|
|
||||||
ix -= 52ULL << 52;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
|
|
||||||
The range is split into N subintervals.
|
|
||||||
The ith subinterval contains z and c is near its center. */
|
|
||||||
tmp = ix - OFF;
|
|
||||||
i = (tmp >> (52 - LOG_TABLE_BITS)) % N;
|
|
||||||
k = (int64_t)tmp >> 52; /* arithmetic shift */
|
|
||||||
iz = ix - (tmp & 0xfffULL << 52);
|
|
||||||
invc = T[i].invc;
|
|
||||||
logc = T[i].logc;
|
|
||||||
z = asdouble(iz);
|
|
||||||
|
|
||||||
/* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
|
|
||||||
/* r ~= z/c - 1, |r| < 1/(2*N). */
|
|
||||||
#if __FP_FAST_FMA
|
|
||||||
/* rounding error: 0x1p-55/N. */
|
|
||||||
r = __builtin_fma(z, invc, -1.0);
|
|
||||||
#else
|
|
||||||
/* rounding error: 0x1p-55/N + 0x1p-66. */
|
|
||||||
r = (z - T2[i].chi - T2[i].clo) * invc;
|
|
||||||
#endif
|
|
||||||
kd = (double_t)k;
|
|
||||||
|
|
||||||
/* hi + lo = r + log(c) + k*Ln2. */
|
|
||||||
w = kd * Ln2hi + logc;
|
|
||||||
hi = w + r;
|
|
||||||
lo = w - hi + r + kd * Ln2lo;
|
|
||||||
|
|
||||||
/* log(x) = lo + (log1p(r) - r) + hi. */
|
|
||||||
r2 = r * r; /* rounding error: 0x1p-54/N^2. */
|
|
||||||
/* Worst case error if |y| > 0x1p-5:
|
|
||||||
0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
|
|
||||||
Worst case error if |y| > 0x1p-4:
|
|
||||||
0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma). */
|
|
||||||
y = lo + r2 * A[0] +
|
|
||||||
r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
|
|
||||||
return eval_as_double(y);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
|
/**
|
||||||
__weak_reference(log, logl);
|
* Returns natural logarithm of 𝑥.
|
||||||
|
*
|
||||||
|
* @raise EDOM and FE_INVALID if x is negative
|
||||||
|
* @raise ERANGE and FE_DIVBYZERO if x is zero
|
||||||
|
*/
|
||||||
|
double
|
||||||
|
log (double x)
|
||||||
|
{
|
||||||
|
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
|
||||||
|
double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
|
||||||
|
uint64_t ix, iz, tmp;
|
||||||
|
uint32_t top;
|
||||||
|
int k, i;
|
||||||
|
|
||||||
|
ix = asuint64 (x);
|
||||||
|
top = top16 (x);
|
||||||
|
|
||||||
|
#if LOG_POLY1_ORDER == 10 || LOG_POLY1_ORDER == 11
|
||||||
|
# define LO asuint64 (1.0 - 0x1p-5)
|
||||||
|
# define HI asuint64 (1.0 + 0x1.1p-5)
|
||||||
|
#elif LOG_POLY1_ORDER == 12
|
||||||
|
# define LO asuint64 (1.0 - 0x1p-4)
|
||||||
|
# define HI asuint64 (1.0 + 0x1.09p-4)
|
||||||
|
#endif
|
||||||
|
if (unlikely (ix - LO < HI - LO))
|
||||||
|
{
|
||||||
|
/* Handle close to 1.0 inputs separately. */
|
||||||
|
/* Fix sign of zero with downward rounding when x==1. */
|
||||||
|
if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
|
||||||
|
return 0;
|
||||||
|
r = x - 1.0;
|
||||||
|
r2 = r * r;
|
||||||
|
r3 = r * r2;
|
||||||
|
#if LOG_POLY1_ORDER == 10
|
||||||
|
/* Worst-case error is around 0.516 ULP. */
|
||||||
|
y = r3 * (B[1] + r * B[2] + r2 * B[3]
|
||||||
|
+ r3 * (B[4] + r * B[5] + r2 * B[6] + r3 * (B[7] + r * B[8])));
|
||||||
|
w = B[0] * r2; /* B[0] == -0.5. */
|
||||||
|
hi = r + w;
|
||||||
|
y += r - hi + w;
|
||||||
|
y += hi;
|
||||||
|
#elif LOG_POLY1_ORDER == 11
|
||||||
|
/* Worst-case error is around 0.516 ULP. */
|
||||||
|
y = r3 * (B[1] + r * B[2]
|
||||||
|
+ r2 * (B[3] + r * B[4] + r2 * B[5]
|
||||||
|
+ r3 * (B[6] + r * B[7] + r2 * B[8] + r3 * B[9])));
|
||||||
|
w = B[0] * r2; /* B[0] == -0.5. */
|
||||||
|
hi = r + w;
|
||||||
|
y += r - hi + w;
|
||||||
|
y += hi;
|
||||||
|
#elif LOG_POLY1_ORDER == 12
|
||||||
|
y = r3 * (B[1] + r * B[2] + r2 * B[3]
|
||||||
|
+ r3 * (B[4] + r * B[5] + r2 * B[6]
|
||||||
|
+ r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
|
||||||
|
# if N <= 64
|
||||||
|
/* Worst-case error is around 0.532 ULP. */
|
||||||
|
w = B[0] * r2; /* B[0] == -0.5. */
|
||||||
|
hi = r + w;
|
||||||
|
y += r - hi + w;
|
||||||
|
y += hi;
|
||||||
|
# else
|
||||||
|
/* Worst-case error is around 0.507 ULP. */
|
||||||
|
w = r * 0x1p27;
|
||||||
|
double_t rhi = r + w - w;
|
||||||
|
double_t rlo = r - rhi;
|
||||||
|
w = rhi * rhi * B[0]; /* B[0] == -0.5. */
|
||||||
|
hi = r + w;
|
||||||
|
lo = r - hi + w;
|
||||||
|
lo += B[0] * rlo * (rhi + r);
|
||||||
|
y += lo;
|
||||||
|
y += hi;
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
return eval_as_double (y);
|
||||||
|
}
|
||||||
|
if (unlikely (top - 0x0010 >= 0x7ff0 - 0x0010))
|
||||||
|
{
|
||||||
|
/* x < 0x1p-1022 or inf or nan. */
|
||||||
|
if (ix * 2 == 0)
|
||||||
|
return __math_divzero (1);
|
||||||
|
if (ix == asuint64 (INFINITY)) /* log(inf) == inf. */
|
||||||
|
return x;
|
||||||
|
if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
|
||||||
|
return __math_invalid (x);
|
||||||
|
/* x is subnormal, normalize it. */
|
||||||
|
ix = asuint64 (x * 0x1p52);
|
||||||
|
ix -= 52ULL << 52;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
|
||||||
|
The range is split into N subintervals.
|
||||||
|
The ith subinterval contains z and c is near its center. */
|
||||||
|
tmp = ix - OFF;
|
||||||
|
i = (tmp >> (52 - LOG_TABLE_BITS)) % N;
|
||||||
|
k = (int64_t) tmp >> 52; /* arithmetic shift */
|
||||||
|
iz = ix - (tmp & 0xfffULL << 52);
|
||||||
|
invc = T[i].invc;
|
||||||
|
logc = T[i].logc;
|
||||||
|
z = asdouble (iz);
|
||||||
|
|
||||||
|
/* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
|
||||||
|
/* r ~= z/c - 1, |r| < 1/(2*N). */
|
||||||
|
#if HAVE_FAST_FMA
|
||||||
|
/* rounding error: 0x1p-55/N. */
|
||||||
|
r = fma (z, invc, -1.0);
|
||||||
|
#else
|
||||||
|
/* rounding error: 0x1p-55/N + 0x1p-66. */
|
||||||
|
r = (z - T2[i].chi - T2[i].clo) * invc;
|
||||||
|
#endif
|
||||||
|
kd = (double_t) k;
|
||||||
|
|
||||||
|
/* hi + lo = r + log(c) + k*Ln2. */
|
||||||
|
w = kd * Ln2hi + logc;
|
||||||
|
hi = w + r;
|
||||||
|
lo = w - hi + r + kd * Ln2lo;
|
||||||
|
|
||||||
|
/* log(x) = lo + (log1p(r) - r) + hi. */
|
||||||
|
r2 = r * r; /* rounding error: 0x1p-54/N^2. */
|
||||||
|
/* Worst case error if |y| > 0x1p-5:
|
||||||
|
0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
|
||||||
|
Worst case error if |y| > 0x1p-4:
|
||||||
|
0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma). */
|
||||||
|
#if LOG_POLY_ORDER == 6
|
||||||
|
y = lo + r2 * A[0] + r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
|
||||||
|
#elif LOG_POLY_ORDER == 7
|
||||||
|
y = lo
|
||||||
|
+ r2 * (A[0] + r * A[1] + r2 * (A[2] + r * A[3])
|
||||||
|
+ r2 * r2 * (A[4] + r * A[5]))
|
||||||
|
+ hi;
|
||||||
|
#endif
|
||||||
|
return eval_as_double (y);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if USE_GLIBC_ABI
|
||||||
|
strong_alias (log, __log_finite)
|
||||||
|
hidden_alias (log, __ieee754_log)
|
||||||
|
# if LDBL_MANT_DIG == 53
|
||||||
|
long double logl (long double x) { return log (x); }
|
||||||
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -29,11 +29,9 @@
|
||||||
#include "libc/math.h"
|
#include "libc/math.h"
|
||||||
#include "libc/tinymath/complex.internal.h"
|
#include "libc/tinymath/complex.internal.h"
|
||||||
#include "libc/tinymath/internal.h"
|
#include "libc/tinymath/internal.h"
|
||||||
#include "libc/tinymath/log2_data.internal.h"
|
|
||||||
__static_yoink("musl_libc_notice");
|
__static_yoink("musl_libc_notice");
|
||||||
__static_yoink("fdlibm_notice");
|
__static_yoink("fdlibm_notice");
|
||||||
|
|
||||||
|
|
||||||
/* origin: FreeBSD /usr/src/lib/msun/src/e_log10.c */
|
/* origin: FreeBSD /usr/src/lib/msun/src/e_log10.c */
|
||||||
/*
|
/*
|
||||||
* ====================================================
|
* ====================================================
|
||||||
|
|
|
@ -2,74 +2,84 @@
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Musl Libc │
|
│ Copyright (c) 1992-2024 The FreeBSD Project │
|
||||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
│ Copyright (c) 1993 Sun Microsystems, Inc. │
|
||||||
|
│ All rights reserved. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Redistribution and use in source and binary forms, with or without │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ modification, are permitted provided that the following conditions │
|
||||||
│ "Software"), to deal in the Software without restriction, including │
|
│ are met: │
|
||||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
│ 1. Redistributions of source code must retain the above copyright │
|
||||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
│ notice, this list of conditions and the following disclaimer. │
|
||||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
│ 2. Redistributions in binary form must reproduce the above copyright │
|
||||||
│ the following conditions: │
|
│ notice, this list of conditions and the following disclaimer in the │
|
||||||
|
│ documentation and/or other materials provided with the distribution. │
|
||||||
│ │
|
│ │
|
||||||
│ The above copyright notice and this permission notice shall be │
|
│ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND │
|
||||||
│ included in all copies or substantial portions of the Software. │
|
│ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE │
|
||||||
│ │
|
│ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE │
|
||||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
│ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE │
|
||||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
│ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL │
|
||||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
│ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS │
|
||||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
│ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) │
|
||||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
│ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT │
|
||||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
│ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY │
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
│ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF │
|
||||||
|
│ SUCH DAMAGE. │
|
||||||
│ │
|
│ │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/intrin/likely.h"
|
#include "libc/tinymath/freebsd.internal.h"
|
||||||
#include "libc/math.h"
|
|
||||||
#include "libc/tinymath/internal.h"
|
|
||||||
#include "libc/tinymath/log_data.internal.h"
|
|
||||||
__static_yoink("freebsd_libm_notice");
|
__static_yoink("freebsd_libm_notice");
|
||||||
__static_yoink("fdlibm_notice");
|
__static_yoink("fdlibm_notice");
|
||||||
|
|
||||||
/* origin: FreeBSD /usr/src/lib/msun/src/s_log1p.c */
|
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
/* double log1p(double x)
|
/* double log1p(double x)
|
||||||
* Return the natural logarithm of 1+x.
|
|
||||||
*
|
*
|
||||||
* Method :
|
* Method :
|
||||||
* 1. Argument Reduction: find k and f such that
|
* 1. Argument Reduction: find k and f such that
|
||||||
* 1+x = 2^k * (1+f),
|
* 1+x = 2^k * (1+f),
|
||||||
* where sqrt(2)/2 < 1+f < sqrt(2) .
|
* where sqrt(2)/2 < 1+f < sqrt(2) .
|
||||||
*
|
*
|
||||||
* Note. If k=0, then f=x is exact. However, if k!=0, then f
|
* Note. If k=0, then f=x is exact. However, if k!=0, then f
|
||||||
* may not be representable exactly. In that case, a correction
|
* may not be representable exactly. In that case, a correction
|
||||||
* term is need. Let u=1+x rounded. Let c = (1+x)-u, then
|
* term is need. Let u=1+x rounded. Let c = (1+x)-u, then
|
||||||
* log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u),
|
* log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u),
|
||||||
* and add back the correction term c/u.
|
* and add back the correction term c/u.
|
||||||
* (Note: when x > 2**53, one can simply return log(x))
|
* (Note: when x > 2**53, one can simply return log(x))
|
||||||
*
|
*
|
||||||
* 2. Approximation of log(1+f): See log.c
|
* 2. Approximation of log1p(f).
|
||||||
|
* Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
|
||||||
|
* = 2s + 2/3 s**3 + 2/5 s**5 + .....,
|
||||||
|
* = 2s + s*R
|
||||||
|
* We use a special Reme algorithm on [0,0.1716] to generate
|
||||||
|
* a polynomial of degree 14 to approximate R The maximum error
|
||||||
|
* of this polynomial approximation is bounded by 2**-58.45. In
|
||||||
|
* other words,
|
||||||
|
* 2 4 6 8 10 12 14
|
||||||
|
* R(z) ~ Lp1*s +Lp2*s +Lp3*s +Lp4*s +Lp5*s +Lp6*s +Lp7*s
|
||||||
|
* (the values of Lp1 to Lp7 are listed in the program)
|
||||||
|
* and
|
||||||
|
* | 2 14 | -58.45
|
||||||
|
* | Lp1*s +...+Lp7*s - R(z) | <= 2
|
||||||
|
* | |
|
||||||
|
* Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
|
||||||
|
* In order to guarantee error in log below 1ulp, we compute log
|
||||||
|
* by
|
||||||
|
* log1p(f) = f - (hfsq - s*(hfsq+R)).
|
||||||
*
|
*
|
||||||
* 3. Finally, log1p(x) = k*ln2 + log(1+f) + c/u. See log.c
|
* 3. Finally, log1p(x) = k*ln2 + log1p(f).
|
||||||
|
* = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
|
||||||
|
* Here ln2 is split into two floating point number:
|
||||||
|
* ln2_hi + ln2_lo,
|
||||||
|
* where n*ln2_hi is always exact for |n| < 2000.
|
||||||
*
|
*
|
||||||
* Special cases:
|
* Special cases:
|
||||||
* log1p(x) is NaN with signal if x < -1 (including -INF) ;
|
* log1p(x) is NaN with signal if x < -1 (including -INF) ;
|
||||||
* log1p(+INF) is +INF; log1p(-1) is -INF with signal;
|
* log1p(+INF) is +INF; log1p(-1) is -INF with signal;
|
||||||
* log1p(NaN) is that NaN with no signal.
|
* log1p(NaN) is that NaN with no signal.
|
||||||
*
|
*
|
||||||
* Accuracy:
|
* Accuracy:
|
||||||
* according to an error analysis, the error is always less than
|
* according to an error analysis, the error is always less than
|
||||||
* 1 ulp (unit in the last place).
|
* 1 ulp (unit in the last place).
|
||||||
*
|
*
|
||||||
* Constants:
|
* Constants:
|
||||||
* The hexadecimal values are the intended ones for the following
|
* The hexadecimal values are the intended ones for the following
|
||||||
|
@ -78,84 +88,110 @@ __static_yoink("fdlibm_notice");
|
||||||
* to produce the hexadecimal values shown.
|
* to produce the hexadecimal values shown.
|
||||||
*
|
*
|
||||||
* Note: Assuming log() return accurate answer, the following
|
* Note: Assuming log() return accurate answer, the following
|
||||||
* algorithm can be used to compute log1p(x) to within a few ULP:
|
* algorithm can be used to compute log1p(x) to within a few ULP:
|
||||||
*
|
*
|
||||||
* u = 1+x;
|
* u = 1+x;
|
||||||
* if(u==1.0) return x ; else
|
* if(u==1.0) return x ; else
|
||||||
* return log(u)*(x/(u-1.0));
|
* return log(u)*(x/(u-1.0));
|
||||||
*
|
*
|
||||||
* See HP-15C Advanced Functions Handbook, p.193.
|
* See HP-15C Advanced Functions Handbook, p.193.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static const double
|
static const double
|
||||||
ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
|
ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
|
||||||
ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
|
ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
|
||||||
Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */
|
two54 = 1.80143985094819840000e+16, /* 43500000 00000000 */
|
||||||
Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */
|
Lp1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */
|
||||||
Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */
|
Lp2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */
|
||||||
Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */
|
Lp3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */
|
||||||
Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */
|
Lp4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */
|
||||||
Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */
|
Lp5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */
|
||||||
Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
|
Lp6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */
|
||||||
|
Lp7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
|
||||||
|
|
||||||
|
static const double zero = 0.0;
|
||||||
|
static volatile double vzero = 0.0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns log(𝟷+𝑥).
|
* Returns log(1 + x).
|
||||||
*/
|
*/
|
||||||
double log1p(double x)
|
double
|
||||||
|
log1p(double x)
|
||||||
{
|
{
|
||||||
union {double f; uint64_t i;} u = {x};
|
double hfsq,f,c,s,z,R,u;
|
||||||
double_t hfsq,f,c,s,z,R,w,t1,t2,dk;
|
int32_t k,hx,hu,ax;
|
||||||
uint32_t hx,hu;
|
|
||||||
int k;
|
GET_HIGH_WORD(hx,x);
|
||||||
|
ax = hx&0x7fffffff;
|
||||||
|
|
||||||
hx = u.i>>32;
|
|
||||||
k = 1;
|
k = 1;
|
||||||
if (hx < 0x3fda827a || hx>>31) { /* 1+x < sqrt(2)+ */
|
if (hx < 0x3FDA827A) { /* 1+x < sqrt(2)+ */
|
||||||
if (hx >= 0xbff00000) { /* x <= -1.0 */
|
if(ax>=0x3ff00000) { /* x <= -1.0 */
|
||||||
if (x == -1)
|
if(x==-1.0) return -two54/vzero; /* log1p(-1)=+inf */
|
||||||
return x/0.0; /* log1p(-1) = -inf */
|
else return (x-x)/(x-x); /* log1p(x<-1)=NaN */
|
||||||
return (x-x)/0.0; /* log1p(x<-1) = NaN */
|
}
|
||||||
}
|
if(ax<0x3e200000) { /* |x| < 2**-29 */
|
||||||
if (hx<<1 < 0x3ca00000<<1) { /* |x| < 2**-53 */
|
if(two54+x>zero /* raise inexact */
|
||||||
/* underflow if subnormal */
|
&&ax<0x3c900000) /* |x| < 2**-54 */
|
||||||
if ((hx&0x7ff00000) == 0)
|
return x;
|
||||||
FORCE_EVAL((float)x);
|
else
|
||||||
return x;
|
return x - x*x*0.5;
|
||||||
}
|
}
|
||||||
if (hx <= 0xbfd2bec4) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
|
if(hx>0||hx<=((int32_t)0xbfd2bec4)) {
|
||||||
k = 0;
|
k=0;f=x;hu=1;} /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
|
||||||
c = 0;
|
|
||||||
f = x;
|
|
||||||
}
|
|
||||||
} else if (hx >= 0x7ff00000)
|
|
||||||
return x;
|
|
||||||
if (k) {
|
|
||||||
u.f = 1 + x;
|
|
||||||
hu = u.i>>32;
|
|
||||||
hu += 0x3ff00000 - 0x3fe6a09e;
|
|
||||||
k = (int)(hu>>20) - 0x3ff;
|
|
||||||
/* correction term ~ log(1+x)-log(u), avoid underflow in c/u */
|
|
||||||
if (k < 54) {
|
|
||||||
c = k >= 2 ? 1-(u.f-x) : x-(u.f-1);
|
|
||||||
c /= u.f;
|
|
||||||
} else
|
|
||||||
c = 0;
|
|
||||||
/* reduce u into [sqrt(2)/2, sqrt(2)] */
|
|
||||||
hu = (hu&0x000fffff) + 0x3fe6a09e;
|
|
||||||
u.i = (uint64_t)hu<<32 | (u.i&0xffffffff);
|
|
||||||
f = u.f - 1;
|
|
||||||
}
|
}
|
||||||
hfsq = 0.5*f*f;
|
if (hx >= 0x7ff00000) return x+x;
|
||||||
s = f/(2.0+f);
|
if(k!=0) {
|
||||||
|
if(hx<0x43400000) {
|
||||||
|
STRICT_ASSIGN(double,u,1.0+x);
|
||||||
|
GET_HIGH_WORD(hu,u);
|
||||||
|
k = (hu>>20)-1023;
|
||||||
|
c = (k>0)? 1.0-(u-x):x-(u-1.0);/* correction term */
|
||||||
|
c /= u;
|
||||||
|
} else {
|
||||||
|
u = x;
|
||||||
|
GET_HIGH_WORD(hu,u);
|
||||||
|
k = (hu>>20)-1023;
|
||||||
|
c = 0;
|
||||||
|
}
|
||||||
|
hu &= 0x000fffff;
|
||||||
|
/*
|
||||||
|
* The approximation to sqrt(2) used in thresholds is not
|
||||||
|
* critical. However, the ones used above must give less
|
||||||
|
* strict bounds than the one here so that the k==0 case is
|
||||||
|
* never reached from here, since here we have committed to
|
||||||
|
* using the correction term but don't use it if k==0.
|
||||||
|
*/
|
||||||
|
if(hu<0x6a09e) { /* u ~< sqrt(2) */
|
||||||
|
SET_HIGH_WORD(u,hu|0x3ff00000); /* normalize u */
|
||||||
|
} else {
|
||||||
|
k += 1;
|
||||||
|
SET_HIGH_WORD(u,hu|0x3fe00000); /* normalize u/2 */
|
||||||
|
hu = (0x00100000-hu)>>2;
|
||||||
|
}
|
||||||
|
f = u-1.0;
|
||||||
|
}
|
||||||
|
hfsq=0.5*f*f;
|
||||||
|
if(hu==0) { /* |f| < 2**-20 */
|
||||||
|
if(f==zero) {
|
||||||
|
if(k==0) {
|
||||||
|
return zero;
|
||||||
|
} else {
|
||||||
|
c += k*ln2_lo;
|
||||||
|
return k*ln2_hi+c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
R = hfsq*(1.0-0.66666666666666666*f);
|
||||||
|
if(k==0) return f-R; else
|
||||||
|
return k*ln2_hi-((R-(k*ln2_lo+c))-f);
|
||||||
|
}
|
||||||
|
s = f/(2.0+f);
|
||||||
z = s*s;
|
z = s*s;
|
||||||
w = z*z;
|
R = z*(Lp1+z*(Lp2+z*(Lp3+z*(Lp4+z*(Lp5+z*(Lp6+z*Lp7))))));
|
||||||
t1 = w*(Lg2+w*(Lg4+w*Lg6));
|
if(k==0) return f-(hfsq-s*(hfsq+R)); else
|
||||||
t2 = z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
|
return k*ln2_hi-((hfsq-(s*(hfsq+R)+(k*ln2_lo+c)))-f);
|
||||||
R = t2 + t1;
|
|
||||||
dk = k;
|
|
||||||
return s*(hfsq+R) + (dk*ln2_lo+c) - hfsq + f + dk*ln2_hi;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
|
#if (LDBL_MANT_DIG == 53)
|
||||||
__weak_reference(log1p, log1pl);
|
__weak_reference(log1p, log1pl);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,175 +1,133 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
||||||
│ vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8 :vi │
|
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ Copyright (c) 1992-2024 The FreeBSD Project │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Copyright (c) 1993 Sun Microsystems, Inc. │
|
||||||
|
│ All rights reserved. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Redistribution and use in source and binary forms, with or without │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ modification, are permitted provided that the following conditions │
|
||||||
│ "Software"), to deal in the Software without restriction, including │
|
│ are met: │
|
||||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
│ 1. Redistributions of source code must retain the above copyright │
|
||||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
│ notice, this list of conditions and the following disclaimer. │
|
||||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
│ 2. Redistributions in binary form must reproduce the above copyright │
|
||||||
│ the following conditions: │
|
│ notice, this list of conditions and the following disclaimer in the │
|
||||||
|
│ documentation and/or other materials provided with the distribution. │
|
||||||
│ │
|
│ │
|
||||||
│ The above copyright notice and this permission notice shall be │
|
│ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND │
|
||||||
│ included in all copies or substantial portions of the Software. │
|
│ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE │
|
||||||
│ │
|
│ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE │
|
||||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
│ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE │
|
||||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
│ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL │
|
||||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
│ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS │
|
||||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
│ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) │
|
||||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
│ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT │
|
||||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
│ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY │
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
│ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF │
|
||||||
|
│ SUCH DAMAGE. │
|
||||||
│ │
|
│ │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/intrin/likely.h"
|
#include "libc/tinymath/freebsd.internal.h"
|
||||||
#include "libc/math.h"
|
__static_yoink("freebsd_libm_notice");
|
||||||
#include "libc/tinymath/internal.h"
|
__static_yoink("fdlibm_notice");
|
||||||
#include "libc/tinymath/log1pf_data.internal.h"
|
|
||||||
__static_yoink("arm_optimized_routines_notice");
|
|
||||||
|
|
||||||
#define Ln2 (0x1.62e43p-1f)
|
/* s_log1pf.c -- float version of s_log1p.c.
|
||||||
#define SignMask (0x80000000)
|
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
|
||||||
|
*/
|
||||||
|
|
||||||
/* Biased exponent of the largest float m for which m^8 underflows. */
|
static const float
|
||||||
#define M8UFLOW_BOUND_BEXP 112
|
ln2_hi = 6.9313812256e-01, /* 0x3f317180 */
|
||||||
/* Biased exponent of the largest float for which we just return x. */
|
ln2_lo = 9.0580006145e-06, /* 0x3717f7d1 */
|
||||||
#define TINY_BOUND_BEXP 103
|
two25 = 3.355443200e+07, /* 0x4c000000 */
|
||||||
|
Lp1 = 6.6666668653e-01, /* 3F2AAAAB */
|
||||||
|
Lp2 = 4.0000000596e-01, /* 3ECCCCCD */
|
||||||
|
Lp3 = 2.8571429849e-01, /* 3E924925 */
|
||||||
|
Lp4 = 2.2222198546e-01, /* 3E638E29 */
|
||||||
|
Lp5 = 1.8183572590e-01, /* 3E3A3325 */
|
||||||
|
Lp6 = 1.5313838422e-01, /* 3E1CD04F */
|
||||||
|
Lp7 = 1.4798198640e-01; /* 3E178897 */
|
||||||
|
|
||||||
#define C(i) __log1pf_data.coeffs[i]
|
static const float zero = 0.0;
|
||||||
|
static volatile float vzero = 0.0;
|
||||||
|
|
||||||
static inline float
|
/**
|
||||||
eval_poly (float m, uint32_t e)
|
* Returns log(1 + x).
|
||||||
{
|
*/
|
||||||
#ifdef LOG1PF_2U5
|
|
||||||
|
|
||||||
/* 2.5 ulp variant. Approximate log(1+m) on [-0.25, 0.5] using
|
|
||||||
slightly modified Estrin scheme (no x^0 term, and x term is just x). */
|
|
||||||
float p_12 = fmaf (m, C (1), C (0));
|
|
||||||
float p_34 = fmaf (m, C (3), C (2));
|
|
||||||
float p_56 = fmaf (m, C (5), C (4));
|
|
||||||
float p_78 = fmaf (m, C (7), C (6));
|
|
||||||
|
|
||||||
float m2 = m * m;
|
|
||||||
float p_02 = fmaf (m2, p_12, m);
|
|
||||||
float p_36 = fmaf (m2, p_56, p_34);
|
|
||||||
float p_79 = fmaf (m2, C (8), p_78);
|
|
||||||
|
|
||||||
float m4 = m2 * m2;
|
|
||||||
float p_06 = fmaf (m4, p_36, p_02);
|
|
||||||
|
|
||||||
if (UNLIKELY (e < M8UFLOW_BOUND_BEXP))
|
|
||||||
return p_06;
|
|
||||||
|
|
||||||
float m8 = m4 * m4;
|
|
||||||
return fmaf (m8, p_79, p_06);
|
|
||||||
|
|
||||||
#elif defined(LOG1PF_1U3)
|
|
||||||
|
|
||||||
/* 1.3 ulp variant. Approximate log(1+m) on [-0.25, 0.5] using Horner
|
|
||||||
scheme. Our polynomial approximation for log1p has the form
|
|
||||||
x + C1 * x^2 + C2 * x^3 + C3 * x^4 + ...
|
|
||||||
Hence approximation has the form m + m^2 * P(m)
|
|
||||||
where P(x) = C1 + C2 * x + C3 * x^2 + ... . */
|
|
||||||
return fmaf (m, m * HORNER_8 (m, C), m);
|
|
||||||
|
|
||||||
#else
|
|
||||||
#error No log1pf approximation exists with the requested precision. Options are 13 or 25.
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline uint32_t
|
|
||||||
biased_exponent (uint32_t ix)
|
|
||||||
{
|
|
||||||
return (ix & 0x7f800000) >> 23;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* log1pf approximation using polynomial on reduced interval. Worst-case error
|
|
||||||
when using Estrin is roughly 2.02 ULP:
|
|
||||||
log1pf(0x1.21e13ap-2) got 0x1.fe8028p-3 want 0x1.fe802cp-3. */
|
|
||||||
float
|
float
|
||||||
log1pf (float x)
|
log1pf(float x)
|
||||||
{
|
{
|
||||||
uint32_t ix = asuint (x);
|
float hfsq,f,c,s,z,R,u;
|
||||||
uint32_t ia = ix & ~SignMask;
|
int32_t k,hx,hu,ax;
|
||||||
uint32_t ia12 = ia >> 20;
|
|
||||||
uint32_t e = biased_exponent (ix);
|
|
||||||
|
|
||||||
/* Handle special cases first. */
|
GET_FLOAT_WORD(hx,x);
|
||||||
if (UNLIKELY (ia12 >= 0x7f8 || ix >= 0xbf800000 || ix == 0x80000000
|
ax = hx&0x7fffffff;
|
||||||
|| e <= TINY_BOUND_BEXP))
|
|
||||||
{
|
k = 1;
|
||||||
if (ix == 0xff800000)
|
if (hx < 0x3ed413d0) { /* 1+x < sqrt(2)+ */
|
||||||
{
|
if(ax>=0x3f800000) { /* x <= -1.0 */
|
||||||
/* x == -Inf => log1pf(x) = NaN. */
|
if(x==(float)-1.0) return -two25/vzero; /* log1p(-1)=+inf */
|
||||||
return NAN;
|
else return (x-x)/(x-x); /* log1p(x<-1)=NaN */
|
||||||
|
}
|
||||||
|
if(ax<0x38000000) { /* |x| < 2**-15 */
|
||||||
|
if(two25+x>zero /* raise inexact */
|
||||||
|
&&ax<0x33800000) /* |x| < 2**-24 */
|
||||||
|
return x;
|
||||||
|
else
|
||||||
|
return x - x*x*(float)0.5;
|
||||||
|
}
|
||||||
|
if(hx>0||hx<=((int32_t)0xbe95f619)) {
|
||||||
|
k=0;f=x;hu=1;} /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
|
||||||
}
|
}
|
||||||
if ((ix == 0x7f800000 || e <= TINY_BOUND_BEXP) && ia12 <= 0x7f8)
|
if (hx >= 0x7f800000) return x+x;
|
||||||
{
|
if(k!=0) {
|
||||||
/* |x| < TinyBound => log1p(x) = x.
|
if(hx<0x5a000000) {
|
||||||
x == Inf => log1pf(x) = Inf. */
|
STRICT_ASSIGN(float,u,(float)1.0+x);
|
||||||
return x;
|
GET_FLOAT_WORD(hu,u);
|
||||||
|
k = (hu>>23)-127;
|
||||||
|
/* correction term */
|
||||||
|
c = (k>0)? (float)1.0-(u-x):x-(u-(float)1.0);
|
||||||
|
c /= u;
|
||||||
|
} else {
|
||||||
|
u = x;
|
||||||
|
GET_FLOAT_WORD(hu,u);
|
||||||
|
k = (hu>>23)-127;
|
||||||
|
c = 0;
|
||||||
|
}
|
||||||
|
hu &= 0x007fffff;
|
||||||
|
/*
|
||||||
|
* The approximation to sqrt(2) used in thresholds is not
|
||||||
|
* critical. However, the ones used above must give less
|
||||||
|
* strict bounds than the one here so that the k==0 case is
|
||||||
|
* never reached from here, since here we have committed to
|
||||||
|
* using the correction term but don't use it if k==0.
|
||||||
|
*/
|
||||||
|
if(hu<0x3504f4) { /* u < sqrt(2) */
|
||||||
|
SET_FLOAT_WORD(u,hu|0x3f800000);/* normalize u */
|
||||||
|
} else {
|
||||||
|
k += 1;
|
||||||
|
SET_FLOAT_WORD(u,hu|0x3f000000); /* normalize u/2 */
|
||||||
|
hu = (0x00800000-hu)>>2;
|
||||||
|
}
|
||||||
|
f = u-(float)1.0;
|
||||||
}
|
}
|
||||||
if (ix == 0xbf800000)
|
hfsq=(float)0.5*f*f;
|
||||||
{
|
if(hu==0) { /* |f| < 2**-20 */
|
||||||
/* x == -1.0 => log1pf(x) = -Inf. */
|
if(f==zero) {
|
||||||
return __math_divzerof (-1);
|
if(k==0) {
|
||||||
|
return zero;
|
||||||
|
} else {
|
||||||
|
c += k*ln2_lo;
|
||||||
|
return k*ln2_hi+c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
R = hfsq*((float)1.0-(float)0.66666666666666666*f);
|
||||||
|
if(k==0) return f-R; else
|
||||||
|
return k*ln2_hi-((R-(k*ln2_lo+c))-f);
|
||||||
}
|
}
|
||||||
if (ia12 >= 0x7f8)
|
s = f/((float)2.0+f);
|
||||||
{
|
z = s*s;
|
||||||
/* x == +/-NaN => log1pf(x) = NaN. */
|
R = z*(Lp1+z*(Lp2+z*(Lp3+z*(Lp4+z*(Lp5+z*(Lp6+z*Lp7))))));
|
||||||
return __math_invalidf (asfloat (ia));
|
if(k==0) return f-(hfsq-s*(hfsq+R)); else
|
||||||
}
|
return k*ln2_hi-((hfsq-(s*(hfsq+R)+(k*ln2_lo+c)))-f);
|
||||||
/* x < -1.0 => log1pf(x) = NaN. */
|
|
||||||
return __math_invalidf (x);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m
|
|
||||||
is in [-0.25, 0.5]):
|
|
||||||
log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2).
|
|
||||||
|
|
||||||
We approximate log1p(m) with a polynomial, then scale by
|
|
||||||
k*log(2). Instead of doing this directly, we use an intermediate
|
|
||||||
scale factor s = 4*k*log(2) to ensure the scale is representable
|
|
||||||
as a normalised fp32 number. */
|
|
||||||
|
|
||||||
if (ix <= 0x3f000000 || ia <= 0x3e800000)
|
|
||||||
{
|
|
||||||
/* If x is in [-0.25, 0.5] then we can shortcut all the logic
|
|
||||||
below, as k = 0 and m = x. All we need is to return the
|
|
||||||
polynomial. */
|
|
||||||
return eval_poly (x, e);
|
|
||||||
}
|
|
||||||
|
|
||||||
float m = x + 1.0f;
|
|
||||||
|
|
||||||
/* k is used scale the input. 0x3f400000 is chosen as we are trying to
|
|
||||||
reduce x to the range [-0.25, 0.5]. Inside this range, k is 0.
|
|
||||||
Outside this range, if k is reinterpreted as (NOT CONVERTED TO) float:
|
|
||||||
let k = sign * 2^p where sign = -1 if x < 0
|
|
||||||
1 otherwise
|
|
||||||
and p is a negative integer whose magnitude increases with the
|
|
||||||
magnitude of x. */
|
|
||||||
int k = (asuint (m) - 0x3f400000) & 0xff800000;
|
|
||||||
|
|
||||||
/* By using integer arithmetic, we obtain the necessary scaling by
|
|
||||||
subtracting the unbiased exponent of k from the exponent of x. */
|
|
||||||
float m_scale = asfloat (asuint (x) - k);
|
|
||||||
|
|
||||||
/* Scale up to ensure that the scale factor is representable as normalised
|
|
||||||
fp32 number (s in [2**-126,2**26]), and scale m down accordingly. */
|
|
||||||
float s = asfloat (asuint (4.0f) - k);
|
|
||||||
m_scale = m_scale + fmaf (0.25f, s, -1.0f);
|
|
||||||
|
|
||||||
float p = eval_poly (m_scale, biased_exponent (asuint (m_scale)));
|
|
||||||
|
|
||||||
/* The scale factor to be applied back at the end - by multiplying float(k)
|
|
||||||
by 2^-23 we get the unbiased exponent of k. */
|
|
||||||
float scale_back = (float) k * 0x1.0p-23f;
|
|
||||||
|
|
||||||
/* Apply the scaling back. */
|
|
||||||
return fmaf (scale_back, Ln2, p);
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,13 +0,0 @@
|
||||||
#ifndef COSMOPOLITAN_LIBC_TINYMATH_LOG1PF_DATA_H_
|
|
||||||
#define COSMOPOLITAN_LIBC_TINYMATH_LOG1PF_DATA_H_
|
|
||||||
COSMOPOLITAN_C_START_
|
|
||||||
|
|
||||||
#define LOG1PF_2U5
|
|
||||||
#define V_LOG1PF_2U5
|
|
||||||
#define LOG1PF_NCOEFFS 9
|
|
||||||
extern const struct log1pf_data {
|
|
||||||
float coeffs[LOG1PF_NCOEFFS]; //
|
|
||||||
} __log1pf_data;
|
|
||||||
|
|
||||||
COSMOPOLITAN_C_END_
|
|
||||||
#endif /* COSMOPOLITAN_LIBC_TINYMATH_LOG1PF_DATA_H_ */
|
|
|
@ -1,9 +1,9 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ Optimized Routines │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
@ -25,20 +25,9 @@
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||||
│ │
|
│ │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/intrin/likely.h"
|
#include "libc/tinymath/arm.internal.h"
|
||||||
#include "libc/math.h"
|
|
||||||
#include "libc/tinymath/complex.internal.h"
|
|
||||||
#include "libc/tinymath/internal.h"
|
|
||||||
#include "libc/tinymath/log2_data.internal.h"
|
|
||||||
__static_yoink("arm_optimized_routines_notice");
|
__static_yoink("arm_optimized_routines_notice");
|
||||||
|
|
||||||
/*
|
|
||||||
* Double-precision log2(x) function.
|
|
||||||
*
|
|
||||||
* Copyright (c) 2018, Arm Limited.
|
|
||||||
* SPDX-License-Identifier: MIT
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define T __log2_data.tab
|
#define T __log2_data.tab
|
||||||
#define T2 __log2_data.tab2
|
#define T2 __log2_data.tab2
|
||||||
#define B __log2_data.poly1
|
#define B __log2_data.poly1
|
||||||
|
@ -49,110 +38,126 @@ __static_yoink("arm_optimized_routines_notice");
|
||||||
#define OFF 0x3fe6000000000000
|
#define OFF 0x3fe6000000000000
|
||||||
|
|
||||||
/* Top 16 bits of a double. */
|
/* Top 16 bits of a double. */
|
||||||
static inline uint32_t top16(double x)
|
static inline uint32_t
|
||||||
|
top16 (double x)
|
||||||
{
|
{
|
||||||
return asuint64(x) >> 48;
|
return asuint64 (x) >> 48;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculates log₂𝑥.
|
* Returns base 2 logarithm of x.
|
||||||
*/
|
*/
|
||||||
double log2(double x)
|
double
|
||||||
|
log2 (double x)
|
||||||
{
|
{
|
||||||
double_t z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p;
|
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
|
||||||
uint64_t ix, iz, tmp;
|
double_t z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p;
|
||||||
uint32_t top;
|
uint64_t ix, iz, tmp;
|
||||||
int k, i;
|
uint32_t top;
|
||||||
|
int k, i;
|
||||||
|
|
||||||
ix = asuint64(x);
|
ix = asuint64 (x);
|
||||||
top = top16(x);
|
top = top16 (x);
|
||||||
#define LO asuint64(1.0 - 0x1.5b51p-5)
|
|
||||||
#define HI asuint64(1.0 + 0x1.6ab2p-5)
|
#if LOG2_POLY1_ORDER == 11
|
||||||
if (UNLIKELY(ix - LO < HI - LO)) {
|
# define LO asuint64 (1.0 - 0x1.5b51p-5)
|
||||||
/* Handle close to 1.0 inputs separately. */
|
# define HI asuint64 (1.0 + 0x1.6ab2p-5)
|
||||||
/* Fix sign of zero with downward rounding when x==1. */
|
|
||||||
if (WANT_ROUNDING && UNLIKELY(ix == asuint64(1.0)))
|
|
||||||
return 0;
|
|
||||||
r = x - 1.0;
|
|
||||||
#if __FP_FAST_FMA
|
|
||||||
hi = r * InvLn2hi;
|
|
||||||
lo = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -hi);
|
|
||||||
#else
|
|
||||||
double_t rhi, rlo;
|
|
||||||
rhi = asdouble(asuint64(r) & -1ULL << 32);
|
|
||||||
rlo = r - rhi;
|
|
||||||
hi = rhi * InvLn2hi;
|
|
||||||
lo = rlo * InvLn2hi + r * InvLn2lo;
|
|
||||||
#endif
|
#endif
|
||||||
r2 = r * r; /* rounding error: 0x1p-62. */
|
if (unlikely (ix - LO < HI - LO))
|
||||||
r4 = r2 * r2;
|
{
|
||||||
/* Worst-case error is less than 0.54 ULP (0.55 ULP without fma). */
|
/* Handle close to 1.0 inputs separately. */
|
||||||
p = r2 * (B[0] + r * B[1]);
|
/* Fix sign of zero with downward rounding when x==1. */
|
||||||
y = hi + p;
|
if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
|
||||||
lo += hi - y + p;
|
return 0;
|
||||||
lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5]) +
|
r = x - 1.0;
|
||||||
r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9])));
|
#if HAVE_FAST_FMA
|
||||||
y += lo;
|
hi = r * InvLn2hi;
|
||||||
return eval_as_double(y);
|
lo = r * InvLn2lo + fma (r, InvLn2hi, -hi);
|
||||||
}
|
|
||||||
if (UNLIKELY(top - 0x0010 >= 0x7ff0 - 0x0010)) {
|
|
||||||
/* x < 0x1p-1022 or inf or nan. */
|
|
||||||
if (ix * 2 == 0)
|
|
||||||
return __math_divzero(1);
|
|
||||||
if (ix == asuint64(INFINITY)) /* log(inf) == inf. */
|
|
||||||
return x;
|
|
||||||
if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
|
|
||||||
return __math_invalid(x);
|
|
||||||
/* x is subnormal, normalize it. */
|
|
||||||
ix = asuint64(x * 0x1p52);
|
|
||||||
ix -= 52ULL << 52;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
|
|
||||||
The range is split into N subintervals.
|
|
||||||
The ith subinterval contains z and c is near its center. */
|
|
||||||
tmp = ix - OFF;
|
|
||||||
i = (tmp >> (52 - LOG2_TABLE_BITS)) % N;
|
|
||||||
k = (int64_t)tmp >> 52; /* arithmetic shift */
|
|
||||||
iz = ix - (tmp & 0xfffULL << 52);
|
|
||||||
invc = T[i].invc;
|
|
||||||
logc = T[i].logc;
|
|
||||||
z = asdouble(iz);
|
|
||||||
kd = (double_t)k;
|
|
||||||
|
|
||||||
/* log2(x) = log2(z/c) + log2(c) + k. */
|
|
||||||
/* r ~= z/c - 1, |r| < 1/(2*N). */
|
|
||||||
#if __FP_FAST_FMA
|
|
||||||
/* rounding error: 0x1p-55/N. */
|
|
||||||
r = __builtin_fma(z, invc, -1.0);
|
|
||||||
t1 = r * InvLn2hi;
|
|
||||||
t2 = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -t1);
|
|
||||||
#else
|
#else
|
||||||
double_t rhi, rlo;
|
double_t rhi, rlo;
|
||||||
/* rounding error: 0x1p-55/N + 0x1p-65. */
|
rhi = asdouble (asuint64 (r) & -1ULL << 32);
|
||||||
r = (z - T2[i].chi - T2[i].clo) * invc;
|
rlo = r - rhi;
|
||||||
rhi = asdouble(asuint64(r) & -1ULL << 32);
|
hi = rhi * InvLn2hi;
|
||||||
rlo = r - rhi;
|
lo = rlo * InvLn2hi + r * InvLn2lo;
|
||||||
t1 = rhi * InvLn2hi;
|
#endif
|
||||||
t2 = rlo * InvLn2hi + r * InvLn2lo;
|
r2 = r * r; /* rounding error: 0x1p-62. */
|
||||||
|
r4 = r2 * r2;
|
||||||
|
#if LOG2_POLY1_ORDER == 11
|
||||||
|
/* Worst-case error is less than 0.54 ULP (0.55 ULP without fma). */
|
||||||
|
p = r2 * (B[0] + r * B[1]);
|
||||||
|
y = hi + p;
|
||||||
|
lo += hi - y + p;
|
||||||
|
lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5])
|
||||||
|
+ r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9])));
|
||||||
|
y += lo;
|
||||||
|
#endif
|
||||||
|
return eval_as_double (y);
|
||||||
|
}
|
||||||
|
if (unlikely (top - 0x0010 >= 0x7ff0 - 0x0010))
|
||||||
|
{
|
||||||
|
/* x < 0x1p-1022 or inf or nan. */
|
||||||
|
if (ix * 2 == 0)
|
||||||
|
return __math_divzero (1);
|
||||||
|
if (ix == asuint64 (INFINITY)) /* log(inf) == inf. */
|
||||||
|
return x;
|
||||||
|
if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
|
||||||
|
return __math_invalid (x);
|
||||||
|
/* x is subnormal, normalize it. */
|
||||||
|
ix = asuint64 (x * 0x1p52);
|
||||||
|
ix -= 52ULL << 52;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
|
||||||
|
The range is split into N subintervals.
|
||||||
|
The ith subinterval contains z and c is near its center. */
|
||||||
|
tmp = ix - OFF;
|
||||||
|
i = (tmp >> (52 - LOG2_TABLE_BITS)) % N;
|
||||||
|
k = (int64_t) tmp >> 52; /* arithmetic shift */
|
||||||
|
iz = ix - (tmp & 0xfffULL << 52);
|
||||||
|
invc = T[i].invc;
|
||||||
|
logc = T[i].logc;
|
||||||
|
z = asdouble (iz);
|
||||||
|
kd = (double_t) k;
|
||||||
|
|
||||||
|
/* log2(x) = log2(z/c) + log2(c) + k. */
|
||||||
|
/* r ~= z/c - 1, |r| < 1/(2*N). */
|
||||||
|
#if HAVE_FAST_FMA
|
||||||
|
/* rounding error: 0x1p-55/N. */
|
||||||
|
r = fma (z, invc, -1.0);
|
||||||
|
t1 = r * InvLn2hi;
|
||||||
|
t2 = r * InvLn2lo + fma (r, InvLn2hi, -t1);
|
||||||
|
#else
|
||||||
|
double_t rhi, rlo;
|
||||||
|
/* rounding error: 0x1p-55/N + 0x1p-65. */
|
||||||
|
r = (z - T2[i].chi - T2[i].clo) * invc;
|
||||||
|
rhi = asdouble (asuint64 (r) & -1ULL << 32);
|
||||||
|
rlo = r - rhi;
|
||||||
|
t1 = rhi * InvLn2hi;
|
||||||
|
t2 = rlo * InvLn2hi + r * InvLn2lo;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* hi + lo = r/ln2 + log2(c) + k. */
|
/* hi + lo = r/ln2 + log2(c) + k. */
|
||||||
t3 = kd + logc;
|
t3 = kd + logc;
|
||||||
hi = t3 + t1;
|
hi = t3 + t1;
|
||||||
lo = t3 - hi + t1 + t2;
|
lo = t3 - hi + t1 + t2;
|
||||||
|
|
||||||
/* log2(r+1) = r/ln2 + r^2*poly(r). */
|
/* log2(r+1) = r/ln2 + r^2*poly(r). */
|
||||||
/* Evaluation is optimized assuming superscalar pipelined execution. */
|
/* Evaluation is optimized assuming superscalar pipelined execution. */
|
||||||
r2 = r * r; /* rounding error: 0x1p-54/N^2. */
|
r2 = r * r; /* rounding error: 0x1p-54/N^2. */
|
||||||
r4 = r2 * r2;
|
r4 = r2 * r2;
|
||||||
/* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
|
#if LOG2_POLY_ORDER == 7
|
||||||
~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma). */
|
/* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
|
||||||
p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]);
|
~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma). */
|
||||||
y = lo + r2 * p + hi;
|
p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]);
|
||||||
return eval_as_double(y);
|
y = lo + r2 * p + hi;
|
||||||
|
#endif
|
||||||
|
return eval_as_double (y);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
|
#if USE_GLIBC_ABI
|
||||||
__weak_reference(log2, log2l);
|
strong_alias (log2, __log2_finite)
|
||||||
|
hidden_alias (log2, __ieee754_log2)
|
||||||
|
# if LDBL_MANT_DIG == 53
|
||||||
|
long double log2l (long double x) { return log2 (x); }
|
||||||
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Musl Libc │
|
│ Optimized Routines │
|
||||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
@ -25,14 +25,8 @@
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||||
│ │
|
│ │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/tinymath/log2_data.internal.h"
|
#include "libc/tinymath/arm.internal.h"
|
||||||
|
__static_yoink("arm_optimized_routines_notice");
|
||||||
/*
|
|
||||||
* Data for log2.
|
|
||||||
*
|
|
||||||
* Copyright (c) 2018, Arm Limited.
|
|
||||||
* SPDX-License-Identifier: MIT
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define N (1 << LOG2_TABLE_BITS)
|
#define N (1 << LOG2_TABLE_BITS)
|
||||||
|
|
||||||
|
@ -41,6 +35,7 @@ const struct log2_data __log2_data = {
|
||||||
.invln2hi = 0x1.7154765200000p+0,
|
.invln2hi = 0x1.7154765200000p+0,
|
||||||
.invln2lo = 0x1.705fc2eefa200p-33,
|
.invln2lo = 0x1.705fc2eefa200p-33,
|
||||||
.poly1 = {
|
.poly1 = {
|
||||||
|
#if LOG2_POLY1_ORDER == 11
|
||||||
// relative error: 0x1.2fad8188p-63
|
// relative error: 0x1.2fad8188p-63
|
||||||
// in -0x1.5b51p-5 0x1.6ab2p-5
|
// in -0x1.5b51p-5 0x1.6ab2p-5
|
||||||
-0x1.71547652b82fep-1,
|
-0x1.71547652b82fep-1,
|
||||||
|
@ -53,8 +48,10 @@ const struct log2_data __log2_data = {
|
||||||
0x1.484d154f01b4ap-3,
|
0x1.484d154f01b4ap-3,
|
||||||
-0x1.289e4a72c383cp-3,
|
-0x1.289e4a72c383cp-3,
|
||||||
0x1.0b32f285aee66p-3,
|
0x1.0b32f285aee66p-3,
|
||||||
|
#endif
|
||||||
},
|
},
|
||||||
.poly = {
|
.poly = {
|
||||||
|
#if N == 64 && LOG2_POLY_ORDER == 7
|
||||||
// relative error: 0x1.a72c2bf8p-58
|
// relative error: 0x1.a72c2bf8p-58
|
||||||
// abs error: 0x1.67a552c8p-66
|
// abs error: 0x1.67a552c8p-66
|
||||||
// in -0x1.f45p-8 0x1.f45p-8
|
// in -0x1.f45p-8 0x1.f45p-8
|
||||||
|
@ -64,6 +61,7 @@ const struct log2_data __log2_data = {
|
||||||
0x1.2776c50034c48p-2,
|
0x1.2776c50034c48p-2,
|
||||||
-0x1.ec7b328ea92bcp-3,
|
-0x1.ec7b328ea92bcp-3,
|
||||||
0x1.a6225e117f92ep-3,
|
0x1.a6225e117f92ep-3,
|
||||||
|
#endif
|
||||||
},
|
},
|
||||||
/* Algorithm:
|
/* Algorithm:
|
||||||
|
|
||||||
|
@ -92,6 +90,7 @@ single rounding error when there is no fast fma for z*invc - 1, 3) ensures
|
||||||
that logc + poly(z/c - 1) has small error, however near x == 1 when
|
that logc + poly(z/c - 1) has small error, however near x == 1 when
|
||||||
|log2(x)| < 0x1p-4, this is not enough so that is special cased. */
|
|log2(x)| < 0x1p-4, this is not enough so that is special cased. */
|
||||||
.tab = {
|
.tab = {
|
||||||
|
#if N == 64
|
||||||
{0x1.724286bb1acf8p+0, -0x1.1095feecdb000p-1},
|
{0x1.724286bb1acf8p+0, -0x1.1095feecdb000p-1},
|
||||||
{0x1.6e1f766d2cca1p+0, -0x1.08494bd76d000p-1},
|
{0x1.6e1f766d2cca1p+0, -0x1.08494bd76d000p-1},
|
||||||
{0x1.6a13d0e30d48ap+0, -0x1.00143aee8f800p-1},
|
{0x1.6a13d0e30d48ap+0, -0x1.00143aee8f800p-1},
|
||||||
|
@ -156,9 +155,11 @@ that logc + poly(z/c - 1) has small error, however near x == 1 when
|
||||||
{0x1.7f405ffc61022p-1, 0x1.abe186ed3d000p-2},
|
{0x1.7f405ffc61022p-1, 0x1.abe186ed3d000p-2},
|
||||||
{0x1.7ad22181415cap-1, 0x1.bd0f2aea0e000p-2},
|
{0x1.7ad22181415cap-1, 0x1.bd0f2aea0e000p-2},
|
||||||
{0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2},
|
{0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2},
|
||||||
|
#endif
|
||||||
},
|
},
|
||||||
#if !__FP_FAST_FMA
|
#if !HAVE_FAST_FMA
|
||||||
.tab2 = {
|
.tab2 = {
|
||||||
|
# if N == 64
|
||||||
{0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55},
|
{0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55},
|
||||||
{0x1.66000045734a6p-1, 0x1.1ff9bea62f7a9p-57},
|
{0x1.66000045734a6p-1, 0x1.1ff9bea62f7a9p-57},
|
||||||
{0x1.69fffc325f2c5p-1, 0x1.27ecfcb3c90bap-55},
|
{0x1.69fffc325f2c5p-1, 0x1.27ecfcb3c90bap-55},
|
||||||
|
@ -223,6 +224,7 @@ that logc + poly(z/c - 1) has small error, however near x == 1 when
|
||||||
{0x1.55ffffd87b36fp+0, -0x1.709e731d02807p-55},
|
{0x1.55ffffd87b36fp+0, -0x1.709e731d02807p-55},
|
||||||
{0x1.59ffff21df7bap+0, 0x1.7f79f68727b02p-55},
|
{0x1.59ffff21df7bap+0, 0x1.7f79f68727b02p-55},
|
||||||
{0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54},
|
{0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54},
|
||||||
|
# endif
|
||||||
},
|
},
|
||||||
#endif
|
#endif /* !HAVE_FAST_FMA */
|
||||||
};
|
};
|
||||||
|
|
|
@ -1,26 +0,0 @@
|
||||||
#ifndef COSMOPOLITAN_LIBC_TINYMATH_LOG2_DATA_INTERNAL_H_
|
|
||||||
#define COSMOPOLITAN_LIBC_TINYMATH_LOG2_DATA_INTERNAL_H_
|
|
||||||
|
|
||||||
#define LOG2_TABLE_BITS 6
|
|
||||||
#define LOG2_POLY_ORDER 7
|
|
||||||
#define LOG2_POLY1_ORDER 11
|
|
||||||
|
|
||||||
COSMOPOLITAN_C_START_
|
|
||||||
|
|
||||||
extern const struct log2_data {
|
|
||||||
double invln2hi;
|
|
||||||
double invln2lo;
|
|
||||||
double poly[LOG2_POLY_ORDER - 1];
|
|
||||||
double poly1[LOG2_POLY1_ORDER - 1];
|
|
||||||
struct {
|
|
||||||
double invc, logc;
|
|
||||||
} tab[1 << LOG2_TABLE_BITS];
|
|
||||||
#if !__FP_FAST_FMA
|
|
||||||
struct {
|
|
||||||
double chi, clo;
|
|
||||||
} tab2[1 << LOG2_TABLE_BITS];
|
|
||||||
#endif
|
|
||||||
} __log2_data;
|
|
||||||
|
|
||||||
COSMOPOLITAN_C_END_
|
|
||||||
#endif /* COSMOPOLITAN_LIBC_TINYMATH_LOG2_DATA_INTERNAL_H_ */
|
|
|
@ -1,9 +1,9 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ Optimized Routines │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
@ -25,20 +25,9 @@
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||||
│ │
|
│ │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/intrin/likely.h"
|
#include "libc/tinymath/arm.internal.h"
|
||||||
#include "libc/math.h"
|
|
||||||
#include "libc/tinymath/complex.internal.h"
|
|
||||||
#include "libc/tinymath/internal.h"
|
|
||||||
#include "libc/tinymath/log2f_data.internal.h"
|
|
||||||
__static_yoink("arm_optimized_routines_notice");
|
__static_yoink("arm_optimized_routines_notice");
|
||||||
|
|
||||||
/*
|
|
||||||
* Single-precision log2 function.
|
|
||||||
*
|
|
||||||
* Copyright (c) 2017-2018, Arm Limited.
|
|
||||||
* SPDX-License-Identifier: MIT
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
LOG2F_TABLE_BITS = 4
|
LOG2F_TABLE_BITS = 4
|
||||||
LOG2F_POLY_ORDER = 4
|
LOG2F_POLY_ORDER = 4
|
||||||
|
@ -53,52 +42,65 @@ Relative error: 1.9 * 2^-26 (before rounding.)
|
||||||
#define OFF 0x3f330000
|
#define OFF 0x3f330000
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculates log₂𝑥.
|
* Returns base-2 logarithm of x.
|
||||||
|
*
|
||||||
|
* - ULP error: 0.752 (nearest rounding.)
|
||||||
|
* - Relative error: 1.9 * 2^-26 (before rounding.)
|
||||||
*/
|
*/
|
||||||
float log2f(float x)
|
float
|
||||||
|
log2f (float x)
|
||||||
{
|
{
|
||||||
double_t z, r, r2, p, y, y0, invc, logc;
|
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
|
||||||
uint32_t ix, iz, top, tmp;
|
double_t z, r, r2, p, y, y0, invc, logc;
|
||||||
int k, i;
|
uint32_t ix, iz, top, tmp;
|
||||||
|
int k, i;
|
||||||
|
|
||||||
ix = asuint(x);
|
ix = asuint (x);
|
||||||
/* Fix sign of zero with downward rounding when x==1. */
|
#if WANT_ROUNDING
|
||||||
if (WANT_ROUNDING && UNLIKELY(ix == 0x3f800000))
|
/* Fix sign of zero with downward rounding when x==1. */
|
||||||
return 0;
|
if (unlikely (ix == 0x3f800000))
|
||||||
if (UNLIKELY(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) {
|
return 0;
|
||||||
/* x < 0x1p-126 or inf or nan. */
|
#endif
|
||||||
if (ix * 2 == 0)
|
if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000))
|
||||||
return __math_divzerof(1);
|
{
|
||||||
if (ix == 0x7f800000) /* log2(inf) == inf. */
|
/* x < 0x1p-126 or inf or nan. */
|
||||||
return x;
|
if (ix * 2 == 0)
|
||||||
if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
|
return __math_divzerof (1);
|
||||||
return __math_invalidf(x);
|
if (ix == 0x7f800000) /* log2(inf) == inf. */
|
||||||
/* x is subnormal, normalize it. */
|
return x;
|
||||||
ix = asuint(x * 0x1p23f);
|
if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
|
||||||
ix -= 23 << 23;
|
return __math_invalidf (x);
|
||||||
}
|
/* x is subnormal, normalize it. */
|
||||||
|
ix = asuint (x * 0x1p23f);
|
||||||
|
ix -= 23 << 23;
|
||||||
|
}
|
||||||
|
|
||||||
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
|
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
|
||||||
The range is split into N subintervals.
|
The range is split into N subintervals.
|
||||||
The ith subinterval contains z and c is near its center. */
|
The ith subinterval contains z and c is near its center. */
|
||||||
tmp = ix - OFF;
|
tmp = ix - OFF;
|
||||||
i = (tmp >> (23 - LOG2F_TABLE_BITS)) % N;
|
i = (tmp >> (23 - LOG2F_TABLE_BITS)) % N;
|
||||||
top = tmp & 0xff800000;
|
top = tmp & 0xff800000;
|
||||||
iz = ix - top;
|
iz = ix - top;
|
||||||
k = (int32_t)tmp >> 23; /* arithmetic shift */
|
k = (int32_t) tmp >> 23; /* arithmetic shift */
|
||||||
invc = T[i].invc;
|
invc = T[i].invc;
|
||||||
logc = T[i].logc;
|
logc = T[i].logc;
|
||||||
z = (double_t)asfloat(iz);
|
z = (double_t) asfloat (iz);
|
||||||
|
|
||||||
/* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
|
/* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
|
||||||
r = z * invc - 1;
|
r = z * invc - 1;
|
||||||
y0 = logc + (double_t)k;
|
y0 = logc + (double_t) k;
|
||||||
|
|
||||||
/* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
|
/* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
|
||||||
r2 = r * r;
|
r2 = r * r;
|
||||||
y = A[1] * r + A[2];
|
y = A[1] * r + A[2];
|
||||||
y = A[0] * r2 + y;
|
y = A[0] * r2 + y;
|
||||||
p = A[3] * r + y0;
|
p = A[3] * r + y0;
|
||||||
y = y * r2 + p;
|
y = y * r2 + p;
|
||||||
return eval_as_float(y);
|
return eval_as_float (y);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if USE_GLIBC_ABI
|
||||||
|
strong_alias (log2f, __log2f_finite)
|
||||||
|
hidden_alias (log2f, __ieee754_log2f)
|
||||||
|
#endif
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ Optimized Routines │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
@ -25,16 +25,9 @@
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||||
│ │
|
│ │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/tinymath/log2f_data.internal.h"
|
#include "libc/tinymath/arm.internal.h"
|
||||||
__static_yoink("arm_optimized_routines_notice");
|
__static_yoink("arm_optimized_routines_notice");
|
||||||
|
|
||||||
/*
|
|
||||||
* Data definition for log2f.
|
|
||||||
*
|
|
||||||
* Copyright (c) 2017-2018, Arm Limited.
|
|
||||||
* SPDX-License-Identifier: MIT
|
|
||||||
*/
|
|
||||||
|
|
||||||
const struct log2f_data __log2f_data = {
|
const struct log2f_data __log2f_data = {
|
||||||
.tab = {
|
.tab = {
|
||||||
{ 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 },
|
{ 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 },
|
||||||
|
|
|
@ -1,17 +0,0 @@
|
||||||
#ifndef COSMOPOLITAN_LIBC_TINYMATH_LOG2F_DATA_INTERNAL_H_
|
|
||||||
#define COSMOPOLITAN_LIBC_TINYMATH_LOG2F_DATA_INTERNAL_H_
|
|
||||||
|
|
||||||
#define LOG2F_TABLE_BITS 4
|
|
||||||
#define LOG2F_POLY_ORDER 4
|
|
||||||
|
|
||||||
COSMOPOLITAN_C_START_
|
|
||||||
|
|
||||||
extern const struct log2f_data {
|
|
||||||
struct {
|
|
||||||
double invc, logc;
|
|
||||||
} tab[1 << LOG2F_TABLE_BITS];
|
|
||||||
double poly[LOG2F_POLY_ORDER];
|
|
||||||
} __log2f_data;
|
|
||||||
|
|
||||||
COSMOPOLITAN_C_END_
|
|
||||||
#endif /* COSMOPOLITAN_LIBC_TINYMATH_LOG2F_DATA_INTERNAL_H_ */
|
|
|
@ -1,9 +1,9 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ Optimized Routines │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
@ -25,22 +25,41 @@
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||||
│ │
|
│ │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/tinymath/log_data.internal.h"
|
#include "libc/tinymath/arm.internal.h"
|
||||||
__static_yoink("arm_optimized_routines_notice");
|
__static_yoink("arm_optimized_routines_notice");
|
||||||
|
|
||||||
/*
|
|
||||||
* Data for log.
|
|
||||||
*
|
|
||||||
* Copyright (c) 2018, Arm Limited.
|
|
||||||
* SPDX-License-Identifier: MIT
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define N (1 << LOG_TABLE_BITS)
|
#define N (1 << LOG_TABLE_BITS)
|
||||||
|
|
||||||
const struct log_data __log_data = {
|
const struct log_data __log_data = {
|
||||||
.ln2hi = 0x1.62e42fefa3800p-1,
|
.ln2hi = 0x1.62e42fefa3800p-1,
|
||||||
.ln2lo = 0x1.ef35793c76730p-45,
|
.ln2lo = 0x1.ef35793c76730p-45,
|
||||||
.poly1 = {
|
.poly1 = {
|
||||||
|
#if LOG_POLY1_ORDER == 10
|
||||||
|
// relative error: 0x1.32eccc6p-62
|
||||||
|
// in -0x1p-5 0x1.1p-5 (|log(1+x)| > 0x1p-5 outside this interval)
|
||||||
|
-0x1p-1,
|
||||||
|
0x1.55555555554e5p-2,
|
||||||
|
-0x1.0000000000af2p-2,
|
||||||
|
0x1.9999999bbe436p-3,
|
||||||
|
-0x1.55555537f9cdep-3,
|
||||||
|
0x1.24922fc8127cfp-3,
|
||||||
|
-0x1.0000b7d6bb612p-3,
|
||||||
|
0x1.c806ee1ddbcafp-4,
|
||||||
|
-0x1.972335a9c2d6ep-4,
|
||||||
|
#elif LOG_POLY1_ORDER == 11
|
||||||
|
// relative error: 0x1.52c8b708p-68
|
||||||
|
// in -0x1p-5 0x1.1p-5 (|log(1+x)| > 0x1p-5 outside this interval)
|
||||||
|
-0x1p-1,
|
||||||
|
0x1.5555555555555p-2,
|
||||||
|
-0x1.ffffffffffea9p-3,
|
||||||
|
0x1.999999999c4d4p-3,
|
||||||
|
-0x1.55555557f5541p-3,
|
||||||
|
0x1.249248fbe33e4p-3,
|
||||||
|
-0x1.ffffc9a3c825bp-4,
|
||||||
|
0x1.c71e1f204435dp-4,
|
||||||
|
-0x1.9a7f26377d06ep-4,
|
||||||
|
0x1.71c30cf8f7364p-4,
|
||||||
|
#elif LOG_POLY1_ORDER == 12
|
||||||
// relative error: 0x1.c04d76cp-63
|
// relative error: 0x1.c04d76cp-63
|
||||||
// in -0x1p-4 0x1.09p-4 (|log(1+x)| > 0x1p-4 outside the interval)
|
// in -0x1p-4 0x1.09p-4 (|log(1+x)| > 0x1p-4 outside the interval)
|
||||||
-0x1p-1,
|
-0x1p-1,
|
||||||
|
@ -54,8 +73,20 @@ const struct log_data __log_data = {
|
||||||
-0x1.999eb43b068ffp-4,
|
-0x1.999eb43b068ffp-4,
|
||||||
0x1.78182f7afd085p-4,
|
0x1.78182f7afd085p-4,
|
||||||
-0x1.5521375d145cdp-4,
|
-0x1.5521375d145cdp-4,
|
||||||
|
#endif
|
||||||
},
|
},
|
||||||
.poly = {
|
.poly = {
|
||||||
|
#if N == 64 && LOG_POLY_ORDER == 7
|
||||||
|
// relative error: 0x1.906eb8ap-58
|
||||||
|
// abs error: 0x1.d2cad5a8p-67
|
||||||
|
// in -0x1.fp-8 0x1.fp-8
|
||||||
|
-0x1.0000000000027p-1,
|
||||||
|
0x1.555555555556ap-2,
|
||||||
|
-0x1.fffffff0440bap-3,
|
||||||
|
0x1.99999991906c3p-3,
|
||||||
|
-0x1.555c8d7e8201ep-3,
|
||||||
|
0x1.24978c59151fap-3,
|
||||||
|
#elif N == 128 && LOG_POLY_ORDER == 6
|
||||||
// relative error: 0x1.926199e8p-56
|
// relative error: 0x1.926199e8p-56
|
||||||
// abs error: 0x1.882ff33p-65
|
// abs error: 0x1.882ff33p-65
|
||||||
// in -0x1.fp-9 0x1.fp-9
|
// in -0x1.fp-9 0x1.fp-9
|
||||||
|
@ -64,6 +95,17 @@ const struct log_data __log_data = {
|
||||||
-0x1.fffffffeb459p-3,
|
-0x1.fffffffeb459p-3,
|
||||||
0x1.999b324f10111p-3,
|
0x1.999b324f10111p-3,
|
||||||
-0x1.55575e506c89fp-3,
|
-0x1.55575e506c89fp-3,
|
||||||
|
#elif N == 128 && LOG_POLY_ORDER == 7
|
||||||
|
// relative error: 0x1.649fc4bp-64
|
||||||
|
// abs error: 0x1.c3b5769p-74
|
||||||
|
// in -0x1.fp-9 0x1.fp-9
|
||||||
|
-0x1.0000000000001p-1,
|
||||||
|
0x1.5555555555556p-2,
|
||||||
|
-0x1.fffffffea1a8p-3,
|
||||||
|
0x1.99999998e9139p-3,
|
||||||
|
-0x1.555776801b968p-3,
|
||||||
|
0x1.2493c29331a5cp-3,
|
||||||
|
#endif
|
||||||
},
|
},
|
||||||
/* Algorithm:
|
/* Algorithm:
|
||||||
|
|
||||||
|
@ -92,6 +134,72 @@ a single rounding error when there is no fast fma for z*invc - 1, 3) ensures
|
||||||
that logc + poly(z/c - 1) has small error, however near x == 1 when
|
that logc + poly(z/c - 1) has small error, however near x == 1 when
|
||||||
|log(x)| < 0x1p-4, this is not enough so that is special cased. */
|
|log(x)| < 0x1p-4, this is not enough so that is special cased. */
|
||||||
.tab = {
|
.tab = {
|
||||||
|
#if N == 64
|
||||||
|
{0x1.7242886495cd8p+0, -0x1.79e267bdfe000p-2},
|
||||||
|
{0x1.6e1f769340dc9p+0, -0x1.6e60ee0ecb000p-2},
|
||||||
|
{0x1.6a13ccc8f195cp+0, -0x1.63002fdbf6000p-2},
|
||||||
|
{0x1.661ec72e86f3ap+0, -0x1.57bf76c597000p-2},
|
||||||
|
{0x1.623fa6c447b16p+0, -0x1.4c9e07f0d2000p-2},
|
||||||
|
{0x1.5e75bbca31702p+0, -0x1.419b42f027000p-2},
|
||||||
|
{0x1.5ac05655adb10p+0, -0x1.36b67660e6000p-2},
|
||||||
|
{0x1.571ed3e940191p+0, -0x1.2bef0839e4800p-2},
|
||||||
|
{0x1.539094ac0fbbfp+0, -0x1.21445727cb000p-2},
|
||||||
|
{0x1.5015007e7fc42p+0, -0x1.16b5ca3c3d000p-2},
|
||||||
|
{0x1.4cab877c31cf9p+0, -0x1.0c42d3805f800p-2},
|
||||||
|
{0x1.49539e76a88d3p+0, -0x1.01eae61b60800p-2},
|
||||||
|
{0x1.460cbc12211dap+0, -0x1.ef5adb9fb0000p-3},
|
||||||
|
{0x1.42d6624debe3ap+0, -0x1.db13daab99000p-3},
|
||||||
|
{0x1.3fb0144f0d462p+0, -0x1.c6ffbe896e000p-3},
|
||||||
|
{0x1.3c995a1f9a9b4p+0, -0x1.b31d84722d000p-3},
|
||||||
|
{0x1.3991c23952500p+0, -0x1.9f6c3cf6eb000p-3},
|
||||||
|
{0x1.3698df35eaa14p+0, -0x1.8beafe7f13000p-3},
|
||||||
|
{0x1.33ae463091760p+0, -0x1.7898db878d000p-3},
|
||||||
|
{0x1.30d190aae3d72p+0, -0x1.6574efe4ec000p-3},
|
||||||
|
{0x1.2e025c9203c89p+0, -0x1.527e620845000p-3},
|
||||||
|
{0x1.2b404a7244988p+0, -0x1.3fb457d798000p-3},
|
||||||
|
{0x1.288b01dc19544p+0, -0x1.2d1615a077000p-3},
|
||||||
|
{0x1.25e2268085f69p+0, -0x1.1aa2b431e5000p-3},
|
||||||
|
{0x1.23456812abb74p+0, -0x1.08598f1d2b000p-3},
|
||||||
|
{0x1.20b4703174157p+0, -0x1.ec738fee40000p-4},
|
||||||
|
{0x1.1e2ef308b4e9bp+0, -0x1.c885768862000p-4},
|
||||||
|
{0x1.1bb4a36b70a3fp+0, -0x1.a4e75b6a46000p-4},
|
||||||
|
{0x1.194538e960658p+0, -0x1.8197efba9a000p-4},
|
||||||
|
{0x1.16e0692a10ac8p+0, -0x1.5e95ad734e000p-4},
|
||||||
|
{0x1.1485f1ba1568bp+0, -0x1.3bdf67117c000p-4},
|
||||||
|
{0x1.12358e123ed6fp+0, -0x1.1973b744f0000p-4},
|
||||||
|
{0x1.0fef01de37c8dp+0, -0x1.eea33446bc000p-5},
|
||||||
|
{0x1.0db20b82be414p+0, -0x1.aaef4ab304000p-5},
|
||||||
|
{0x1.0b7e6f67f69b3p+0, -0x1.67c962fd2c000p-5},
|
||||||
|
{0x1.0953f342fc108p+0, -0x1.252f29acf8000p-5},
|
||||||
|
{0x1.0732604ec956bp+0, -0x1.c63d19e9c0000p-6},
|
||||||
|
{0x1.051980117f9b0p+0, -0x1.432ab6a388000p-6},
|
||||||
|
{0x1.03091aa6810f1p+0, -0x1.8244357f50000p-7},
|
||||||
|
{0x1.01010152cf066p+0, -0x1.0080a711c0000p-8},
|
||||||
|
{0x1.fc07ef6b6e30bp-1, 0x1.fe03018e80000p-8},
|
||||||
|
{0x1.f4465aa1024afp-1, 0x1.7b91986450000p-6},
|
||||||
|
{0x1.ecc07a8fd3f5ep-1, 0x1.39e88608c8000p-5},
|
||||||
|
{0x1.e573ad856b537p-1, 0x1.b42dc6e624000p-5},
|
||||||
|
{0x1.de5d6dc7b8057p-1, 0x1.165372ec20000p-4},
|
||||||
|
{0x1.d77b6498bddf7p-1, 0x1.51b07a0170000p-4},
|
||||||
|
{0x1.d0cb580315c0fp-1, 0x1.8c3465c7ea000p-4},
|
||||||
|
{0x1.ca4b30d1cf449p-1, 0x1.c5e544a290000p-4},
|
||||||
|
{0x1.c3f8ef4810d8ep-1, 0x1.fec91aa0a6000p-4},
|
||||||
|
{0x1.bdd2b8b311f44p-1, 0x1.1b72acdc5c000p-3},
|
||||||
|
{0x1.b7d6c2eeac054p-1, 0x1.371fc65a98000p-3},
|
||||||
|
{0x1.b20363474c8f5p-1, 0x1.526e61c1aa000p-3},
|
||||||
|
{0x1.ac570165eeab1p-1, 0x1.6d60ffc240000p-3},
|
||||||
|
{0x1.a6d019f331df4p-1, 0x1.87fa08a013000p-3},
|
||||||
|
{0x1.a16d3ebc9e3c3p-1, 0x1.a23bc630c3000p-3},
|
||||||
|
{0x1.9c2d14567ef45p-1, 0x1.bc286a3512000p-3},
|
||||||
|
{0x1.970e4efae9169p-1, 0x1.d5c2195697000p-3},
|
||||||
|
{0x1.920fb3bd0b802p-1, 0x1.ef0ae132d3000p-3},
|
||||||
|
{0x1.8d3018b58699ap-1, 0x1.040259974e000p-2},
|
||||||
|
{0x1.886e5ff170ee6p-1, 0x1.1058bd40e2000p-2},
|
||||||
|
{0x1.83c977ad35d27p-1, 0x1.1c898c1137800p-2},
|
||||||
|
{0x1.7f405ed16c520p-1, 0x1.2895a3e65b000p-2},
|
||||||
|
{0x1.7ad220d0335c4p-1, 0x1.347dd8f6bd000p-2},
|
||||||
|
{0x1.767dce53474fdp-1, 0x1.4043083cb3800p-2},
|
||||||
|
#elif N == 128
|
||||||
{0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2},
|
{0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2},
|
||||||
{0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2},
|
{0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2},
|
||||||
{0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2},
|
{0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2},
|
||||||
|
@ -220,9 +328,76 @@ that logc + poly(z/c - 1) has small error, however near x == 1 when
|
||||||
{0x1.79baa679725c2p-1, 0x1.377266dec1800p-2},
|
{0x1.79baa679725c2p-1, 0x1.377266dec1800p-2},
|
||||||
{0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2},
|
{0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2},
|
||||||
{0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2},
|
{0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2},
|
||||||
|
#endif
|
||||||
},
|
},
|
||||||
#if !__FP_FAST_FMA
|
#if !HAVE_FAST_FMA
|
||||||
.tab2 = {
|
.tab2 = {
|
||||||
|
# if N == 64
|
||||||
|
{0x1.61ffff94c4fecp-1, -0x1.9fe4fc998f325p-56},
|
||||||
|
{0x1.66000020377ddp-1, 0x1.e804c7a9519f2p-55},
|
||||||
|
{0x1.6a00004c41678p-1, 0x1.902c675d9ecfep-55},
|
||||||
|
{0x1.6dffff7384f87p-1, -0x1.2fd6b95e55043p-56},
|
||||||
|
{0x1.720000b37216ep-1, 0x1.802bc8d437043p-55},
|
||||||
|
{0x1.75ffffbeb3c9dp-1, 0x1.6047ad0a0d4e4p-57},
|
||||||
|
{0x1.7a0000628daep-1, -0x1.e00434b49313dp-56},
|
||||||
|
{0x1.7dffffd7abd1ap-1, -0x1.6015f8a083576p-56},
|
||||||
|
{0x1.81ffffdf40c54p-1, 0x1.7f54bf76a42c9p-57},
|
||||||
|
{0x1.860000f334e11p-1, 0x1.60054cb5344d7p-56},
|
||||||
|
{0x1.8a0001238aca7p-1, 0x1.c03c9bd132f55p-57},
|
||||||
|
{0x1.8dffffb81d212p-1, -0x1.001e519f2764fp-55},
|
||||||
|
{0x1.92000086adc7cp-1, 0x1.1fe40f88f49c6p-55},
|
||||||
|
{0x1.960000135d8eap-1, -0x1.f832268dc3095p-55},
|
||||||
|
{0x1.99ffff9435acp-1, 0x1.7031d8b835edcp-56},
|
||||||
|
{0x1.9e00003478565p-1, -0x1.0030b221ce3eep-58},
|
||||||
|
{0x1.a20000b592948p-1, 0x1.8fd2f1dbd4639p-55},
|
||||||
|
{0x1.a600000ad0bcfp-1, 0x1.901d6a974e6bep-55},
|
||||||
|
{0x1.a9ffff55953a5p-1, 0x1.a07556192db98p-57},
|
||||||
|
{0x1.adffff29ce03dp-1, -0x1.fff0717ec71c2p-56},
|
||||||
|
{0x1.b1ffff34f3ac8p-1, 0x1.8005573de89d1p-57},
|
||||||
|
{0x1.b60000894c55bp-1, -0x1.ff2fb51b044c7p-57},
|
||||||
|
{0x1.b9fffef45ec7dp-1, -0x1.9ff7c4e8730fp-56},
|
||||||
|
{0x1.be0000cda7b2ap-1, 0x1.57d058dbf3c1dp-55},
|
||||||
|
{0x1.c1ffff2c57917p-1, 0x1.7e66d7e48dbc9p-58},
|
||||||
|
{0x1.c60000ea5b82ap-1, -0x1.47f5e132ed4bep-55},
|
||||||
|
{0x1.ca0001121ae98p-1, -0x1.40958c8d5e00ap-58},
|
||||||
|
{0x1.ce0000f9241cbp-1, -0x1.7da063caa81c8p-59},
|
||||||
|
{0x1.d1fffe8be95a4p-1, -0x1.82e3a411afcd9p-59},
|
||||||
|
{0x1.d5ffff035932bp-1, -0x1.00f901b3fe87dp-58},
|
||||||
|
{0x1.d9fffe8b54ba7p-1, 0x1.ffef55d6e3a4p-55},
|
||||||
|
{0x1.de0000ad95d19p-1, 0x1.5feb2efd4c7c7p-55},
|
||||||
|
{0x1.e1fffe925ce47p-1, 0x1.c8085484eaf08p-55},
|
||||||
|
{0x1.e5fffe3ddf853p-1, -0x1.fd5ed02c5cadp-60},
|
||||||
|
{0x1.e9fffed0a0e5fp-1, -0x1.a80aaef411586p-55},
|
||||||
|
{0x1.ee00008f82eep-1, -0x1.b000aeaf97276p-55},
|
||||||
|
{0x1.f20000a22d2f4p-1, -0x1.8f8906e13eba3p-56},
|
||||||
|
{0x1.f5fffee35b57dp-1, 0x1.1fdd33b2d3714p-57},
|
||||||
|
{0x1.fa00014eec3a6p-1, -0x1.3ee0b7a18c1a5p-58},
|
||||||
|
{0x1.fdffff5daa89fp-1, -0x1.c1e24c8e3b503p-58},
|
||||||
|
{0x1.0200005b93349p+0, -0x1.50197fe6bedcap-54},
|
||||||
|
{0x1.05ffff9d597acp+0, 0x1.20160d062d0dcp-55},
|
||||||
|
{0x1.0a00005687a63p+0, -0x1.27f3f9307696ep-54},
|
||||||
|
{0x1.0dffff779164ep+0, 0x1.b7eb40bb9c4f4p-54},
|
||||||
|
{0x1.12000044a0aa8p+0, 0x1.efbc914d512c4p-55},
|
||||||
|
{0x1.16000069685bcp+0, -0x1.c0bea3eb2d82cp-57},
|
||||||
|
{0x1.1a000093f0d78p+0, 0x1.1fecbf1e8c52p-54},
|
||||||
|
{0x1.1dffffb2b1457p+0, -0x1.3fc91365637d6p-55},
|
||||||
|
{0x1.2200008824a1p+0, -0x1.dff7e9feb578ap-54},
|
||||||
|
{0x1.25ffffeef953p+0, -0x1.b00a61ec912f7p-55},
|
||||||
|
{0x1.2a0000a1e7783p+0, 0x1.60048318b0483p-56},
|
||||||
|
{0x1.2e0000853d4c7p+0, -0x1.77fbedf2c8cf3p-54},
|
||||||
|
{0x1.320000324c55bp+0, 0x1.f81983997354fp-54},
|
||||||
|
{0x1.360000594f796p+0, -0x1.cfe4beff900a9p-54},
|
||||||
|
{0x1.3a0000a4c1c0fp+0, 0x1.07dbb2e268d0ep-54},
|
||||||
|
{0x1.3e0000751c61bp+0, 0x1.80583ed1c566ep-56},
|
||||||
|
{0x1.42000069e8a9fp+0, 0x1.f01f1edf82045p-54},
|
||||||
|
{0x1.460000b5a1e34p+0, -0x1.dfdf0cf45c14ap-55},
|
||||||
|
{0x1.4a0000187e513p+0, 0x1.401306b83a98dp-55},
|
||||||
|
{0x1.4dffff3ba420bp+0, 0x1.9fc6539a6454ep-56},
|
||||||
|
{0x1.51fffffe391c9p+0, -0x1.601ef3353ac83p-54},
|
||||||
|
{0x1.560000e342455p+0, 0x1.3fb7fac8ac151p-55},
|
||||||
|
{0x1.59ffffc39676fp+0, 0x1.4fe7dd6659cc2p-55},
|
||||||
|
{0x1.5dfffff10ef42p+0, -0x1.48154cb592bcbp-54},
|
||||||
|
# elif N == 128
|
||||||
{0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56},
|
{0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56},
|
||||||
{0x1.63000034db495p-1, 0x1.dbfea48005d41p-55},
|
{0x1.63000034db495p-1, 0x1.dbfea48005d41p-55},
|
||||||
{0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55},
|
{0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55},
|
||||||
|
@ -351,6 +526,7 @@ that logc + poly(z/c - 1) has small error, however near x == 1 when
|
||||||
{0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54},
|
{0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54},
|
||||||
{0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54},
|
{0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54},
|
||||||
{0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54},
|
{0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54},
|
||||||
},
|
|
||||||
#endif
|
#endif
|
||||||
|
},
|
||||||
|
#endif /* !HAVE_FAST_FMA */
|
||||||
};
|
};
|
||||||
|
|
|
@ -1,26 +0,0 @@
|
||||||
#ifndef COSMOPOLITAN_LIBC_TINYMATH_LOG_DATA_H_
|
|
||||||
#define COSMOPOLITAN_LIBC_TINYMATH_LOG_DATA_H_
|
|
||||||
|
|
||||||
#define LOG_TABLE_BITS 7
|
|
||||||
#define LOG_POLY_ORDER 6
|
|
||||||
#define LOG_POLY1_ORDER 12
|
|
||||||
|
|
||||||
COSMOPOLITAN_C_START_
|
|
||||||
|
|
||||||
extern const struct log_data {
|
|
||||||
double ln2hi;
|
|
||||||
double ln2lo;
|
|
||||||
double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
|
|
||||||
double poly1[LOG_POLY1_ORDER - 1];
|
|
||||||
struct {
|
|
||||||
double invc, logc;
|
|
||||||
} tab[1 << LOG_TABLE_BITS];
|
|
||||||
#if !__FP_FAST_FMA
|
|
||||||
struct {
|
|
||||||
double chi, clo;
|
|
||||||
} tab2[1 << LOG_TABLE_BITS];
|
|
||||||
#endif
|
|
||||||
} __log_data;
|
|
||||||
|
|
||||||
COSMOPOLITAN_C_END_
|
|
||||||
#endif /* COSMOPOLITAN_LIBC_TINYMATH_LOG_DATA_H_ */
|
|
|
@ -1,9 +1,9 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ Optimized Routines │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
@ -25,19 +25,9 @@
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||||
│ │
|
│ │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/intrin/likely.h"
|
#include "libc/tinymath/arm.internal.h"
|
||||||
#include "libc/math.h"
|
|
||||||
#include "libc/tinymath/internal.h"
|
|
||||||
#include "libc/tinymath/logf_data.internal.h"
|
|
||||||
__static_yoink("arm_optimized_routines_notice");
|
__static_yoink("arm_optimized_routines_notice");
|
||||||
|
|
||||||
/*
|
|
||||||
* Single-precision log function.
|
|
||||||
*
|
|
||||||
* Copyright (c) 2017-2018, Arm Limited.
|
|
||||||
* SPDX-License-Identifier: MIT
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
LOGF_TABLE_BITS = 4
|
LOGF_TABLE_BITS = 4
|
||||||
LOGF_POLY_ORDER = 4
|
LOGF_POLY_ORDER = 4
|
||||||
|
@ -53,50 +43,63 @@ Relative error: 1.957 * 2^-26 (before rounding.)
|
||||||
#define OFF 0x3f330000
|
#define OFF 0x3f330000
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns natural logarithm of 𝑥.
|
* Returns natural logarithm of x.
|
||||||
|
*
|
||||||
|
* - ULP error: 0.818 (nearest rounding.)
|
||||||
|
* - Relative error: 1.957 * 2^-26 (before rounding.)
|
||||||
*/
|
*/
|
||||||
float logf(float x)
|
float
|
||||||
|
logf (float x)
|
||||||
{
|
{
|
||||||
double_t z, r, r2, y, y0, invc, logc;
|
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
|
||||||
uint32_t ix, iz, tmp;
|
double_t z, r, r2, y, y0, invc, logc;
|
||||||
int k, i;
|
uint32_t ix, iz, tmp;
|
||||||
|
int k, i;
|
||||||
|
|
||||||
ix = asuint(x);
|
ix = asuint (x);
|
||||||
/* Fix sign of zero with downward rounding when x==1. */
|
#if WANT_ROUNDING
|
||||||
if (WANT_ROUNDING && UNLIKELY(ix == 0x3f800000))
|
/* Fix sign of zero with downward rounding when x==1. */
|
||||||
return 0;
|
if (unlikely (ix == 0x3f800000))
|
||||||
if (UNLIKELY(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) {
|
return 0;
|
||||||
/* x < 0x1p-126 or inf or nan. */
|
#endif
|
||||||
if (ix * 2 == 0)
|
if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000))
|
||||||
return __math_divzerof(1);
|
{
|
||||||
if (ix == 0x7f800000) /* log(inf) == inf. */
|
/* x < 0x1p-126 or inf or nan. */
|
||||||
return x;
|
if (ix * 2 == 0)
|
||||||
if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
|
return __math_divzerof (1);
|
||||||
return __math_invalidf(x);
|
if (ix == 0x7f800000) /* log(inf) == inf. */
|
||||||
/* x is subnormal, normalize it. */
|
return x;
|
||||||
ix = asuint(x * 0x1p23f);
|
if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
|
||||||
ix -= 23 << 23;
|
return __math_invalidf (x);
|
||||||
}
|
/* x is subnormal, normalize it. */
|
||||||
|
ix = asuint (x * 0x1p23f);
|
||||||
|
ix -= 23 << 23;
|
||||||
|
}
|
||||||
|
|
||||||
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
|
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
|
||||||
The range is split into N subintervals.
|
The range is split into N subintervals.
|
||||||
The ith subinterval contains z and c is near its center. */
|
The ith subinterval contains z and c is near its center. */
|
||||||
tmp = ix - OFF;
|
tmp = ix - OFF;
|
||||||
i = (tmp >> (23 - LOGF_TABLE_BITS)) % N;
|
i = (tmp >> (23 - LOGF_TABLE_BITS)) % N;
|
||||||
k = (int32_t)tmp >> 23; /* arithmetic shift */
|
k = (int32_t) tmp >> 23; /* arithmetic shift */
|
||||||
iz = ix - (tmp & 0xff800000);
|
iz = ix - (tmp & 0xff800000);
|
||||||
invc = T[i].invc;
|
invc = T[i].invc;
|
||||||
logc = T[i].logc;
|
logc = T[i].logc;
|
||||||
z = (double_t)asfloat(iz);
|
z = (double_t) asfloat (iz);
|
||||||
|
|
||||||
/* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
|
/* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
|
||||||
r = z * invc - 1;
|
r = z * invc - 1;
|
||||||
y0 = logc + (double_t)k * Ln2;
|
y0 = logc + (double_t) k * Ln2;
|
||||||
|
|
||||||
/* Pipelined polynomial evaluation to approximate log1p(r). */
|
/* Pipelined polynomial evaluation to approximate log1p(r). */
|
||||||
r2 = r * r;
|
r2 = r * r;
|
||||||
y = A[1] * r + A[2];
|
y = A[1] * r + A[2];
|
||||||
y = A[0] * r2 + y;
|
y = A[0] * r2 + y;
|
||||||
y = y * r2 + (y0 + r);
|
y = y * r2 + (y0 + r);
|
||||||
return eval_as_float(y);
|
return eval_as_float (y);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if USE_GLIBC_ABI
|
||||||
|
strong_alias (logf, __logf_finite)
|
||||||
|
hidden_alias (logf, __ieee754_logf)
|
||||||
|
#endif
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ Optimized Routines │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
@ -25,16 +25,9 @@
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||||
│ │
|
│ │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/tinymath/logf_data.internal.h"
|
#include "libc/tinymath/arm.internal.h"
|
||||||
__static_yoink("arm_optimized_routines_notice");
|
__static_yoink("arm_optimized_routines_notice");
|
||||||
|
|
||||||
/*
|
|
||||||
* Data definition for logf.
|
|
||||||
*
|
|
||||||
* Copyright (c) 2017-2018, Arm Limited.
|
|
||||||
* SPDX-License-Identifier: MIT
|
|
||||||
*/
|
|
||||||
|
|
||||||
const struct logf_data __logf_data = {
|
const struct logf_data __logf_data = {
|
||||||
.tab = {
|
.tab = {
|
||||||
{ 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2 },
|
{ 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2 },
|
||||||
|
|
|
@ -1,18 +0,0 @@
|
||||||
#ifndef COSMOPOLITAN_LIBC_TINYMATH_LOGF_DATA_INTERNAL_H_
|
|
||||||
#define COSMOPOLITAN_LIBC_TINYMATH_LOGF_DATA_INTERNAL_H_
|
|
||||||
|
|
||||||
#define LOGF_TABLE_BITS 4
|
|
||||||
#define LOGF_POLY_ORDER 4
|
|
||||||
|
|
||||||
COSMOPOLITAN_C_START_
|
|
||||||
|
|
||||||
extern const struct logf_data {
|
|
||||||
struct {
|
|
||||||
double invc, logc;
|
|
||||||
} tab[1 << LOGF_TABLE_BITS];
|
|
||||||
double ln2;
|
|
||||||
double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1. */
|
|
||||||
} __logf_data;
|
|
||||||
|
|
||||||
COSMOPOLITAN_C_END_
|
|
||||||
#endif /* COSMOPOLITAN_LIBC_TINYMATH_LOGF_DATA_INTERNAL_H_ */
|
|
|
@ -1,9 +1,9 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||||
│ vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8 :vi │
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ Optimized Routines │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
@ -25,16 +25,76 @@
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||||
│ │
|
│ │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/tinymath/atan_data.internal.h"
|
#include "libc/errno.h"
|
||||||
|
#include "libc/tinymath/arm.internal.h"
|
||||||
|
|
||||||
const struct atan_poly_data __atan_poly_data = {
|
#if WANT_ERRNO
|
||||||
.poly = {/* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
|
/* dontinline reduces code size and avoids making math functions non-leaf
|
||||||
[2**-1022, 1.0]. See atan.sollya for details of how these were
|
when the error handling is inlined. */
|
||||||
generated. */
|
dontinline static double
|
||||||
-0x1.5555555555555p-2, 0x1.99999999996c1p-3, -0x1.2492492478f88p-3,
|
with_errno (double y, int e)
|
||||||
0x1.c71c71bc3951cp-4, -0x1.745d160a7e368p-4, 0x1.3b139b6a88ba1p-4,
|
{
|
||||||
-0x1.11100ee084227p-4, 0x1.e1d0f9696f63bp-5, -0x1.aebfe7b418581p-5,
|
errno = e;
|
||||||
0x1.842dbe9b0d916p-5, -0x1.5d30140ae5e99p-5, 0x1.338e31eb2fbbcp-5,
|
return y;
|
||||||
-0x1.00e6eece7de8p-5, 0x1.860897b29e5efp-6, -0x1.0051381722a59p-6,
|
}
|
||||||
0x1.14e9dc19a4a4ep-7, -0x1.d0062b42fe3bfp-9, 0x1.17739e210171ap-10,
|
#else
|
||||||
-0x1.ab24da7be7402p-13, 0x1.358851160a528p-16}};
|
#define with_errno(x, e) (x)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* dontinline reduces code size. */
|
||||||
|
dontinline static double
|
||||||
|
xflow (uint32_t sign, double y)
|
||||||
|
{
|
||||||
|
y = eval_as_double (opt_barrier_double (sign ? -y : y) * y);
|
||||||
|
return with_errno (y, ERANGE);
|
||||||
|
}
|
||||||
|
|
||||||
|
double
|
||||||
|
__math_uflow (uint32_t sign)
|
||||||
|
{
|
||||||
|
return xflow (sign, 0x1p-767);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if WANT_ERRNO_UFLOW
|
||||||
|
/* Underflows to zero in some non-nearest rounding mode, setting errno
|
||||||
|
is valid even if the result is non-zero, but in the subnormal range. */
|
||||||
|
double
|
||||||
|
__math_may_uflow (uint32_t sign)
|
||||||
|
{
|
||||||
|
return xflow (sign, 0x1.8p-538);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
double
|
||||||
|
__math_oflow (uint32_t sign)
|
||||||
|
{
|
||||||
|
return xflow (sign, 0x1p769);
|
||||||
|
}
|
||||||
|
|
||||||
|
double
|
||||||
|
__math_divzero (uint32_t sign)
|
||||||
|
{
|
||||||
|
double y = opt_barrier_double (sign ? -1.0 : 1.0) / 0.0;
|
||||||
|
return with_errno (y, ERANGE);
|
||||||
|
}
|
||||||
|
|
||||||
|
dontinstrument double
|
||||||
|
__math_invalid (double x)
|
||||||
|
{
|
||||||
|
double y = (x - x) / (x - x);
|
||||||
|
return isnan (x) ? y : with_errno (y, EDOM);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check result and set errno if necessary. */
|
||||||
|
|
||||||
|
dontinstrument double
|
||||||
|
__math_check_uflow (double y)
|
||||||
|
{
|
||||||
|
return y == 0.0 ? with_errno (y, ERANGE) : y;
|
||||||
|
}
|
||||||
|
|
||||||
|
dontinstrument double
|
||||||
|
__math_check_oflow (double y)
|
||||||
|
{
|
||||||
|
return isinf (y) ? with_errno (y, ERANGE) : y;
|
||||||
|
}
|
|
@ -1,9 +1,9 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Musl Libc │
|
│ Optimized Routines │
|
||||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
@ -25,27 +25,76 @@
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||||
│ │
|
│ │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/complex.h"
|
#include "libc/errno.h"
|
||||||
#include "libc/math.h"
|
#include "libc/tinymath/arm.internal.h"
|
||||||
#include "libc/tinymath/complex.internal.h"
|
|
||||||
__static_yoink("musl_libc_notice");
|
|
||||||
|
|
||||||
// FIXME
|
#if WANT_ERRNO
|
||||||
|
/* dontinline reduces code size and avoids making math functions non-leaf
|
||||||
/* asin(z) = -i log(i z + sqrt(1 - z*z)) */
|
when the error handling is inlined. */
|
||||||
|
dontinline static float
|
||||||
double complex casin(double complex z)
|
with_errnof (float y, int e)
|
||||||
{
|
{
|
||||||
double complex w;
|
errno = e;
|
||||||
double x, y;
|
return y;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
#define with_errnof(x, e) (x)
|
||||||
|
#endif
|
||||||
|
|
||||||
x = creal(z);
|
/* dontinline reduces code size. */
|
||||||
y = cimag(z);
|
dontinline static float
|
||||||
w = CMPLX(1.0 - (x - y)*(x + y), -2.0*x*y);
|
xflowf (uint32_t sign, float y)
|
||||||
double complex r = clog(CMPLX(-y, x) + csqrt(w));
|
{
|
||||||
return CMPLX(cimag(r), -creal(r));
|
y = eval_as_float (opt_barrier_float (sign ? -y : y) * y);
|
||||||
|
return with_errnof (y, ERANGE);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
|
float
|
||||||
__weak_reference(casin, casinl);
|
__math_uflowf (uint32_t sign)
|
||||||
|
{
|
||||||
|
return xflowf (sign, 0x1p-95f);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if WANT_ERRNO_UFLOW
|
||||||
|
/* Underflows to zero in some non-nearest rounding mode, setting errno
|
||||||
|
is valid even if the result is non-zero, but in the subnormal range. */
|
||||||
|
float
|
||||||
|
__math_may_uflowf (uint32_t sign)
|
||||||
|
{
|
||||||
|
return xflowf (sign, 0x1.4p-75f);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
float
|
||||||
|
__math_oflowf (uint32_t sign)
|
||||||
|
{
|
||||||
|
return xflowf (sign, 0x1p97f);
|
||||||
|
}
|
||||||
|
|
||||||
|
float
|
||||||
|
__math_divzerof (uint32_t sign)
|
||||||
|
{
|
||||||
|
float y = opt_barrier_float (sign ? -1.0f : 1.0f) / 0.0f;
|
||||||
|
return with_errnof (y, ERANGE);
|
||||||
|
}
|
||||||
|
|
||||||
|
dontinstrument float
|
||||||
|
__math_invalidf (float x)
|
||||||
|
{
|
||||||
|
float y = (x - x) / (x - x);
|
||||||
|
return isnan (x) ? y : with_errnof (y, EDOM);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check result and set errno if necessary. */
|
||||||
|
|
||||||
|
dontinstrument float
|
||||||
|
__math_check_uflowf (float y)
|
||||||
|
{
|
||||||
|
return y == 0.0f ? with_errnof (y, ERANGE) : y;
|
||||||
|
}
|
||||||
|
|
||||||
|
dontinstrument float
|
||||||
|
__math_check_oflowf (float y)
|
||||||
|
{
|
||||||
|
return isinf (y) ? with_errnof (y, ERANGE) : y;
|
||||||
|
}
|
|
@ -1,9 +1,9 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||||
│ vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8 :vi │
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ Optimized Routines │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
@ -25,11 +25,25 @@
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||||
│ │
|
│ │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/tinymath/atanf_data.internal.h"
|
#include "libc/errno.h"
|
||||||
|
#include "libc/tinymath/arm.internal.h"
|
||||||
|
|
||||||
/* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-128, 1.0].
|
#if WANT_ERRNO
|
||||||
*/
|
/* dontinline reduces code size and avoids making math functions non-leaf
|
||||||
const struct atanf_poly_data __atanf_poly_data = {
|
when the error handling is inlined. */
|
||||||
.poly = {/* See atanf.sollya for details of how these were generated. */
|
dontinline static long double
|
||||||
-0x1.55555p-2f, 0x1.99935ep-3f, -0x1.24051ep-3f, 0x1.bd7368p-4f,
|
with_errnol (long double y, int e)
|
||||||
-0x1.491f0ep-4f, 0x1.93a2c0p-5f, -0x1.4c3c60p-6f, 0x1.01fd88p-8f}};
|
{
|
||||||
|
errno = e;
|
||||||
|
return y;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
#define with_errnol(x, e) (x)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
dontinstrument long double
|
||||||
|
__math_invalidl (long double x)
|
||||||
|
{
|
||||||
|
long double y = (x - x) / (x - x);
|
||||||
|
return isnan (x) ? y : with_errnol (y, EDOM);
|
||||||
|
}
|
|
@ -1,9 +1,9 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Musl Libc │
|
│ Optimized Routines │
|
||||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
@ -25,20 +25,9 @@
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||||
│ │
|
│ │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/intrin/likely.h"
|
#include "libc/tinymath/arm.internal.h"
|
||||||
#include "libc/math.h"
|
|
||||||
#include "libc/tinymath/exp_data.internal.h"
|
|
||||||
#include "libc/tinymath/internal.h"
|
|
||||||
#include "libc/tinymath/pow_data.internal.h"
|
|
||||||
__static_yoink("arm_optimized_routines_notice");
|
__static_yoink("arm_optimized_routines_notice");
|
||||||
|
|
||||||
/*
|
|
||||||
* Double-precision x^y function.
|
|
||||||
*
|
|
||||||
* Copyright (c) 2018, Arm Limited.
|
|
||||||
* SPDX-License-Identifier: MIT
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Worst-case error: 0.54 ULP (~= ulperr_exp + 1024*Ln2*relerr_log*2^53)
|
Worst-case error: 0.54 ULP (~= ulperr_exp + 1024*Ln2*relerr_log*2^53)
|
||||||
relerr_log: 1.3 * 2^-68 (Relative error of log, 1.5 * 2^-68 without fma)
|
relerr_log: 1.3 * 2^-68 (Relative error of log, 1.5 * 2^-68 without fma)
|
||||||
|
@ -53,79 +42,83 @@ ulperr_exp: 0.509 ULP (ULP error of exp, 0.511 ULP without fma)
|
||||||
#define OFF 0x3fe6955500000000
|
#define OFF 0x3fe6955500000000
|
||||||
|
|
||||||
/* Top 12 bits of a double (sign and exponent bits). */
|
/* Top 12 bits of a double (sign and exponent bits). */
|
||||||
static inline uint32_t top12(double x)
|
static inline uint32_t
|
||||||
|
top12 (double x)
|
||||||
{
|
{
|
||||||
return asuint64(x) >> 52;
|
return asuint64 (x) >> 52;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
|
/* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
|
||||||
additional 15 bits precision. IX is the bit representation of x, but
|
additional 15 bits precision. IX is the bit representation of x, but
|
||||||
normalized in the subnormal range using the sign bit for the exponent. */
|
normalized in the subnormal range using the sign bit for the exponent. */
|
||||||
static inline double_t log_inline(uint64_t ix, double_t *tail)
|
static inline double_t
|
||||||
|
log_inline (uint64_t ix, double_t *tail)
|
||||||
{
|
{
|
||||||
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
|
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
|
||||||
double_t z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
|
double_t z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
|
||||||
uint64_t iz, tmp;
|
uint64_t iz, tmp;
|
||||||
int k, i;
|
int k, i;
|
||||||
|
|
||||||
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
|
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
|
||||||
The range is split into N subintervals.
|
The range is split into N subintervals.
|
||||||
The ith subinterval contains z and c is near its center. */
|
The ith subinterval contains z and c is near its center. */
|
||||||
tmp = ix - OFF;
|
tmp = ix - OFF;
|
||||||
i = (tmp >> (52 - POW_LOG_TABLE_BITS)) % N;
|
i = (tmp >> (52 - POW_LOG_TABLE_BITS)) % N;
|
||||||
k = (int64_t)tmp >> 52; /* arithmetic shift */
|
k = (int64_t) tmp >> 52; /* arithmetic shift */
|
||||||
iz = ix - (tmp & 0xfffULL << 52);
|
iz = ix - (tmp & 0xfffULL << 52);
|
||||||
z = asdouble(iz);
|
z = asdouble (iz);
|
||||||
kd = (double_t)k;
|
kd = (double_t) k;
|
||||||
|
|
||||||
/* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */
|
/* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */
|
||||||
invc = T[i].invc;
|
invc = T[i].invc;
|
||||||
logc = T[i].logc;
|
logc = T[i].logc;
|
||||||
logctail = T[i].logctail;
|
logctail = T[i].logctail;
|
||||||
|
|
||||||
/* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
|
/* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
|
||||||
|z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */
|
|z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */
|
||||||
#if __FP_FAST_FMA
|
#if HAVE_FAST_FMA
|
||||||
r = __builtin_fma(z, invc, -1.0);
|
r = fma (z, invc, -1.0);
|
||||||
#else
|
#else
|
||||||
/* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|. */
|
/* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|. */
|
||||||
double_t zhi = asdouble((iz + (1ULL << 31)) & (-1ULL << 32));
|
double_t zhi = asdouble ((iz + (1ULL << 31)) & (-1ULL << 32));
|
||||||
double_t zlo = z - zhi;
|
double_t zlo = z - zhi;
|
||||||
double_t rhi = zhi * invc - 1.0;
|
double_t rhi = zhi * invc - 1.0;
|
||||||
double_t rlo = zlo * invc;
|
double_t rlo = zlo * invc;
|
||||||
r = rhi + rlo;
|
r = rhi + rlo;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* k*Ln2 + log(c) + r. */
|
/* k*Ln2 + log(c) + r. */
|
||||||
t1 = kd * Ln2hi + logc;
|
t1 = kd * Ln2hi + logc;
|
||||||
t2 = t1 + r;
|
t2 = t1 + r;
|
||||||
lo1 = kd * Ln2lo + logctail;
|
lo1 = kd * Ln2lo + logctail;
|
||||||
lo2 = t1 - t2 + r;
|
lo2 = t1 - t2 + r;
|
||||||
|
|
||||||
/* Evaluation is optimized assuming superscalar pipelined execution. */
|
/* Evaluation is optimized assuming superscalar pipelined execution. */
|
||||||
double_t ar, ar2, ar3, lo3, lo4;
|
double_t ar, ar2, ar3, lo3, lo4;
|
||||||
ar = A[0] * r; /* A[0] = -0.5. */
|
ar = A[0] * r; /* A[0] = -0.5. */
|
||||||
ar2 = r * ar;
|
ar2 = r * ar;
|
||||||
ar3 = r * ar2;
|
ar3 = r * ar2;
|
||||||
/* k*Ln2 + log(c) + r + A[0]*r*r. */
|
/* k*Ln2 + log(c) + r + A[0]*r*r. */
|
||||||
#if __FP_FAST_FMA
|
#if HAVE_FAST_FMA
|
||||||
hi = t2 + ar2;
|
hi = t2 + ar2;
|
||||||
lo3 = __builtin_fma(ar, r, -ar2);
|
lo3 = fma (ar, r, -ar2);
|
||||||
lo4 = t2 - hi + ar2;
|
lo4 = t2 - hi + ar2;
|
||||||
#else
|
#else
|
||||||
double_t arhi = A[0] * rhi;
|
double_t arhi = A[0] * rhi;
|
||||||
double_t arhi2 = rhi * arhi;
|
double_t arhi2 = rhi * arhi;
|
||||||
hi = t2 + arhi2;
|
hi = t2 + arhi2;
|
||||||
lo3 = rlo * (ar + arhi);
|
lo3 = rlo * (ar + arhi);
|
||||||
lo4 = t2 - hi + arhi2;
|
lo4 = t2 - hi + arhi2;
|
||||||
#endif
|
#endif
|
||||||
/* p = log1p(r) - r - A[0]*r*r. */
|
/* p = log1p(r) - r - A[0]*r*r. */
|
||||||
p = (ar3 * (A[1] + r * A[2] +
|
#if POW_LOG_POLY_ORDER == 8
|
||||||
ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6]))));
|
p = (ar3
|
||||||
lo = lo1 + lo2 + lo3 + lo4 + p;
|
* (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6]))));
|
||||||
y = hi + lo;
|
#endif
|
||||||
*tail = hi - y + lo;
|
lo = lo1 + lo2 + lo3 + lo4 + p;
|
||||||
return y;
|
y = hi + lo;
|
||||||
|
*tail = hi - y + lo;
|
||||||
|
return y;
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef N
|
#undef N
|
||||||
|
@ -149,232 +142,268 @@ static inline double_t log_inline(uint64_t ix, double_t *tail)
|
||||||
a double. (int32_t)KI is the k used in the argument reduction and exponent
|
a double. (int32_t)KI is the k used in the argument reduction and exponent
|
||||||
adjustment of scale, positive k here means the result may overflow and
|
adjustment of scale, positive k here means the result may overflow and
|
||||||
negative k means the result may underflow. */
|
negative k means the result may underflow. */
|
||||||
forceinline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
|
static inline double
|
||||||
|
specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
|
||||||
{
|
{
|
||||||
double_t scale, y;
|
double_t scale, y;
|
||||||
|
|
||||||
if ((ki & 0x80000000) == 0) {
|
if ((ki & 0x80000000) == 0)
|
||||||
/* k > 0, the exponent of scale might have overflowed by <= 460. */
|
{
|
||||||
sbits -= 1009ull << 52;
|
/* k > 0, the exponent of scale might have overflowed by <= 460. */
|
||||||
scale = asdouble(sbits);
|
sbits -= 1009ull << 52;
|
||||||
y = 0x1p1009 * (scale + scale * tmp);
|
scale = asdouble (sbits);
|
||||||
return eval_as_double(y);
|
y = 0x1p1009 * (scale + scale * tmp);
|
||||||
}
|
return check_oflow (eval_as_double (y));
|
||||||
/* k < 0, need special care in the subnormal range. */
|
}
|
||||||
sbits += 1022ull << 52;
|
/* k < 0, need special care in the subnormal range. */
|
||||||
/* Note: sbits is signed scale. */
|
sbits += 1022ull << 52;
|
||||||
scale = asdouble(sbits);
|
/* Note: sbits is signed scale. */
|
||||||
y = scale + scale * tmp;
|
scale = asdouble (sbits);
|
||||||
if (fabs(y) < 1.0) {
|
y = scale + scale * tmp;
|
||||||
/* Round y to the right precision before scaling it into the subnormal
|
if (fabs (y) < 1.0)
|
||||||
range to avoid double rounding that can cause 0.5+E/2 ulp error where
|
{
|
||||||
E is the worst-case ulp error outside the subnormal range. So this
|
/* Round y to the right precision before scaling it into the subnormal
|
||||||
is only useful if the goal is better than 1 ulp worst-case error. */
|
range to avoid double rounding that can cause 0.5+E/2 ulp error where
|
||||||
double_t hi, lo, one = 1.0;
|
E is the worst-case ulp error outside the subnormal range. So this
|
||||||
if (y < 0.0)
|
is only useful if the goal is better than 1 ulp worst-case error. */
|
||||||
one = -1.0;
|
double_t hi, lo, one = 1.0;
|
||||||
lo = scale - y + scale * tmp;
|
if (y < 0.0)
|
||||||
hi = one + y;
|
one = -1.0;
|
||||||
lo = one - hi + y + lo;
|
lo = scale - y + scale * tmp;
|
||||||
y = eval_as_double(hi + lo) - one;
|
hi = one + y;
|
||||||
/* Fix the sign of 0. */
|
lo = one - hi + y + lo;
|
||||||
if (y == 0.0)
|
y = eval_as_double (hi + lo) - one;
|
||||||
y = asdouble(sbits & 0x8000000000000000);
|
/* Fix the sign of 0. */
|
||||||
/* The underflow exception needs to be signaled explicitly. */
|
if (y == 0.0)
|
||||||
fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);
|
y = asdouble (sbits & 0x8000000000000000);
|
||||||
}
|
/* The underflow exception needs to be signaled explicitly. */
|
||||||
y = 0x1p-1022 * y;
|
force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
|
||||||
return eval_as_double(y);
|
}
|
||||||
|
y = 0x1p-1022 * y;
|
||||||
|
return check_uflow (eval_as_double (y));
|
||||||
}
|
}
|
||||||
|
|
||||||
#define SIGN_BIAS (0x800 << EXP_TABLE_BITS)
|
#define SIGN_BIAS (0x800 << EXP_TABLE_BITS)
|
||||||
|
|
||||||
/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
|
/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
|
||||||
The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */
|
The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */
|
||||||
forceinline double exp_inline(double_t x, double_t xtail, uint32_t sign_bias)
|
static inline double
|
||||||
|
exp_inline (double_t x, double_t xtail, uint32_t sign_bias)
|
||||||
{
|
{
|
||||||
uint32_t abstop;
|
uint32_t abstop;
|
||||||
uint64_t ki, idx, top, sbits;
|
uint64_t ki, idx, top, sbits;
|
||||||
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
|
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
|
||||||
double_t kd, z, r, r2, scale, tail, tmp;
|
double_t kd, z, r, r2, scale, tail, tmp;
|
||||||
|
|
||||||
abstop = top12(x) & 0x7ff;
|
abstop = top12 (x) & 0x7ff;
|
||||||
if (UNLIKELY(abstop - top12(0x1p-54) >=
|
if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
|
||||||
top12(512.0) - top12(0x1p-54))) {
|
{
|
||||||
if (abstop - top12(0x1p-54) >= 0x80000000) {
|
if (abstop - top12 (0x1p-54) >= 0x80000000)
|
||||||
/* Avoid spurious underflow for tiny x. */
|
{
|
||||||
/* Note: 0 is common input. */
|
/* Avoid spurious underflow for tiny x. */
|
||||||
double_t one = WANT_ROUNDING ? 1.0 + x : 1.0;
|
/* Note: 0 is common input. */
|
||||||
return sign_bias ? -one : one;
|
double_t one = WANT_ROUNDING ? 1.0 + x : 1.0;
|
||||||
}
|
return sign_bias ? -one : one;
|
||||||
if (abstop >= top12(1024.0)) {
|
|
||||||
/* Note: inf and nan are already handled. */
|
|
||||||
if (asuint64(x) >> 63)
|
|
||||||
return __math_uflow(sign_bias);
|
|
||||||
else
|
|
||||||
return __math_oflow(sign_bias);
|
|
||||||
}
|
|
||||||
/* Large x is special cased below. */
|
|
||||||
abstop = 0;
|
|
||||||
}
|
}
|
||||||
|
if (abstop >= top12 (1024.0))
|
||||||
|
{
|
||||||
|
/* Note: inf and nan are already handled. */
|
||||||
|
if (asuint64 (x) >> 63)
|
||||||
|
return __math_uflow (sign_bias);
|
||||||
|
else
|
||||||
|
return __math_oflow (sign_bias);
|
||||||
|
}
|
||||||
|
/* Large x is special cased below. */
|
||||||
|
abstop = 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
|
/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
|
||||||
/* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
|
/* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
|
||||||
z = InvLn2N * x;
|
z = InvLn2N * x;
|
||||||
#if TOINT_INTRINSICS
|
#if TOINT_INTRINSICS
|
||||||
kd = roundtoint(z);
|
kd = roundtoint (z);
|
||||||
ki = converttoint(z);
|
ki = converttoint (z);
|
||||||
#elif EXP_USE_TOINT_NARROW
|
#elif EXP_USE_TOINT_NARROW
|
||||||
/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
|
/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
|
||||||
kd = eval_as_double(z + Shift);
|
kd = eval_as_double (z + Shift);
|
||||||
ki = asuint64(kd) >> 16;
|
ki = asuint64 (kd) >> 16;
|
||||||
kd = (double_t)(int32_t)ki;
|
kd = (double_t) (int32_t) ki;
|
||||||
#else
|
#else
|
||||||
/* z - kd is in [-1, 1] in non-nearest rounding modes. */
|
/* z - kd is in [-1, 1] in non-nearest rounding modes. */
|
||||||
kd = eval_as_double(z + Shift);
|
kd = eval_as_double (z + Shift);
|
||||||
ki = asuint64(kd);
|
ki = asuint64 (kd);
|
||||||
kd -= Shift;
|
kd -= Shift;
|
||||||
#endif
|
#endif
|
||||||
r = x + kd * NegLn2hiN + kd * NegLn2loN;
|
r = x + kd * NegLn2hiN + kd * NegLn2loN;
|
||||||
/* The code assumes 2^-200 < |xtail| < 2^-8/N. */
|
/* The code assumes 2^-200 < |xtail| < 2^-8/N. */
|
||||||
r += xtail;
|
r += xtail;
|
||||||
/* 2^(k/N) ~= scale * (1 + tail). */
|
/* 2^(k/N) ~= scale * (1 + tail). */
|
||||||
idx = 2 * (ki % N);
|
idx = 2 * (ki % N);
|
||||||
top = (ki + sign_bias) << (52 - EXP_TABLE_BITS);
|
top = (ki + sign_bias) << (52 - EXP_TABLE_BITS);
|
||||||
tail = asdouble(T[idx]);
|
tail = asdouble (T[idx]);
|
||||||
/* This is only a valid scale when -1023*N < k < 1024*N. */
|
/* This is only a valid scale when -1023*N < k < 1024*N. */
|
||||||
sbits = T[idx + 1] + top;
|
sbits = T[idx + 1] + top;
|
||||||
/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
|
/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
|
||||||
/* Evaluation is optimized assuming superscalar pipelined execution. */
|
/* Evaluation is optimized assuming superscalar pipelined execution. */
|
||||||
r2 = r * r;
|
r2 = r * r;
|
||||||
/* Without fma the worst case error is 0.25/N ulp larger. */
|
/* Without fma the worst case error is 0.25/N ulp larger. */
|
||||||
/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
|
/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
|
||||||
tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
|
#if EXP_POLY_ORDER == 4
|
||||||
if (UNLIKELY(abstop == 0))
|
tmp = tail + r + r2 * C2 + r * r2 * (C3 + r * C4);
|
||||||
return specialcase(tmp, sbits, ki);
|
#elif EXP_POLY_ORDER == 5
|
||||||
scale = asdouble(sbits);
|
tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
|
||||||
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
|
#elif EXP_POLY_ORDER == 6
|
||||||
is no spurious underflow here even without fma. */
|
tmp = tail + r + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
|
||||||
return eval_as_double(scale + scale * tmp);
|
#endif
|
||||||
|
if (unlikely (abstop == 0))
|
||||||
|
return specialcase (tmp, sbits, ki);
|
||||||
|
scale = asdouble (sbits);
|
||||||
|
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
|
||||||
|
is no spurious underflow here even without fma. */
|
||||||
|
return eval_as_double (scale + scale * tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
|
/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
|
||||||
the bit representation of a non-zero finite floating-point value. */
|
the bit representation of a non-zero finite floating-point value. */
|
||||||
static inline int checkint(uint64_t iy)
|
static inline int
|
||||||
|
checkint (uint64_t iy)
|
||||||
{
|
{
|
||||||
int e = iy >> 52 & 0x7ff;
|
int e = iy >> 52 & 0x7ff;
|
||||||
if (e < 0x3ff)
|
if (e < 0x3ff)
|
||||||
return 0;
|
return 0;
|
||||||
if (e > 0x3ff + 52)
|
if (e > 0x3ff + 52)
|
||||||
return 2;
|
return 2;
|
||||||
if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
|
if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
|
||||||
return 0;
|
return 0;
|
||||||
if (iy & (1ULL << (0x3ff + 52 - e)))
|
if (iy & (1ULL << (0x3ff + 52 - e)))
|
||||||
return 1;
|
return 1;
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Returns 1 if input is the bit representation of 0, infinity or nan. */
|
/* Returns 1 if input is the bit representation of 0, infinity or nan. */
|
||||||
static inline int zeroinfnan(uint64_t i)
|
static inline int
|
||||||
|
zeroinfnan (uint64_t i)
|
||||||
{
|
{
|
||||||
return 2 * i - 1 >= 2 * asuint64(INFINITY) - 1;
|
return 2 * i - 1 >= 2 * asuint64 (INFINITY) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns 𝑥^𝑦.
|
* Returns 𝑥^𝑦.
|
||||||
* @note should take ~18ns
|
*
|
||||||
|
* - Worst-case error: 0.54 ULP (~= ulperr_exp + 1024*Ln2*relerr_log*2^53)
|
||||||
|
* - relerr_log: 1.3 * 2^-68 (Relative error of log, 1.5 * 2^-68 without fma)
|
||||||
|
* - ulperr_exp: 0.509 ULP (ULP error of exp, 0.511 ULP without fma)
|
||||||
|
*
|
||||||
|
* @raise ERANGE on overflow or underflow
|
||||||
|
* @raise EDOM if x is negative and y is a finite non-integer
|
||||||
*/
|
*/
|
||||||
double pow(double x, double y)
|
double
|
||||||
|
pow (double x, double y)
|
||||||
{
|
{
|
||||||
uint32_t sign_bias = 0;
|
uint32_t sign_bias = 0;
|
||||||
uint64_t ix, iy;
|
uint64_t ix, iy;
|
||||||
uint32_t topx, topy;
|
uint32_t topx, topy;
|
||||||
|
|
||||||
ix = asuint64(x);
|
ix = asuint64 (x);
|
||||||
iy = asuint64(y);
|
iy = asuint64 (y);
|
||||||
topx = top12(x);
|
topx = top12 (x);
|
||||||
topy = top12(y);
|
topy = top12 (y);
|
||||||
if (UNLIKELY(topx - 0x001 >= 0x7ff - 0x001 ||
|
if (unlikely (topx - 0x001 >= 0x7ff - 0x001
|
||||||
(topy & 0x7ff) - 0x3be >= 0x43e - 0x3be)) {
|
|| (topy & 0x7ff) - 0x3be >= 0x43e - 0x3be))
|
||||||
/* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
|
{
|
||||||
and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */
|
/* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
|
||||||
/* Special cases: (x < 0x1p-126 or inf or nan) or
|
and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */
|
||||||
(|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */
|
/* Special cases: (x < 0x1p-126 or inf or nan) or
|
||||||
if (UNLIKELY(zeroinfnan(iy))) {
|
(|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */
|
||||||
if (2 * iy == 0)
|
if (unlikely (zeroinfnan (iy)))
|
||||||
return issignaling_inline(x) ? x + y : 1.0;
|
{
|
||||||
if (ix == asuint64(1.0))
|
if (2 * iy == 0)
|
||||||
return issignaling_inline(y) ? x + y : 1.0;
|
return issignaling_inline (x) ? x + y : 1.0;
|
||||||
if (2 * ix > 2 * asuint64(INFINITY) ||
|
if (ix == asuint64 (1.0))
|
||||||
2 * iy > 2 * asuint64(INFINITY))
|
return issignaling_inline (y) ? x + y : 1.0;
|
||||||
return x + y;
|
if (2 * ix > 2 * asuint64 (INFINITY)
|
||||||
if (2 * ix == 2 * asuint64(1.0))
|
|| 2 * iy > 2 * asuint64 (INFINITY))
|
||||||
return 1.0;
|
return x + y;
|
||||||
if ((2 * ix < 2 * asuint64(1.0)) == !(iy >> 63))
|
if (2 * ix == 2 * asuint64 (1.0))
|
||||||
return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
|
return 1.0;
|
||||||
return y * y;
|
if ((2 * ix < 2 * asuint64 (1.0)) == !(iy >> 63))
|
||||||
}
|
return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
|
||||||
if (UNLIKELY(zeroinfnan(ix))) {
|
return y * y;
|
||||||
double_t x2 = x * x;
|
|
||||||
if (ix >> 63 && checkint(iy) == 1)
|
|
||||||
x2 = -x2;
|
|
||||||
/* Without the barrier some versions of clang hoist the 1/x2 and
|
|
||||||
thus division by zero exception can be signaled spuriously. */
|
|
||||||
return iy >> 63 ? fp_barrier(1 / x2) : x2;
|
|
||||||
}
|
|
||||||
/* Here x and y are non-zero finite. */
|
|
||||||
if (ix >> 63) {
|
|
||||||
/* Finite x < 0. */
|
|
||||||
int yint = checkint(iy);
|
|
||||||
if (yint == 0)
|
|
||||||
return __math_invalid(x);
|
|
||||||
if (yint == 1)
|
|
||||||
sign_bias = SIGN_BIAS;
|
|
||||||
ix &= 0x7fffffffffffffff;
|
|
||||||
topx &= 0x7ff;
|
|
||||||
}
|
|
||||||
if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be) {
|
|
||||||
/* Note: sign_bias == 0 here because y is not odd. */
|
|
||||||
if (ix == asuint64(1.0))
|
|
||||||
return 1.0;
|
|
||||||
if ((topy & 0x7ff) < 0x3be) {
|
|
||||||
/* |y| < 2^-65, x^y ~= 1 + y*log(x). */
|
|
||||||
if (WANT_ROUNDING)
|
|
||||||
return ix > asuint64(1.0) ? 1.0 + y :
|
|
||||||
1.0 - y;
|
|
||||||
else
|
|
||||||
return 1.0;
|
|
||||||
}
|
|
||||||
return (ix > asuint64(1.0)) == (topy < 0x800) ?
|
|
||||||
__math_oflow(0) :
|
|
||||||
__math_uflow(0);
|
|
||||||
}
|
|
||||||
if (topx == 0) {
|
|
||||||
/* Normalize subnormal x so exponent becomes negative. */
|
|
||||||
ix = asuint64(x * 0x1p52);
|
|
||||||
ix &= 0x7fffffffffffffff;
|
|
||||||
ix -= 52ULL << 52;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
if (unlikely (zeroinfnan (ix)))
|
||||||
|
{
|
||||||
|
double_t x2 = x * x;
|
||||||
|
if (ix >> 63 && checkint (iy) == 1)
|
||||||
|
{
|
||||||
|
x2 = -x2;
|
||||||
|
sign_bias = 1;
|
||||||
|
}
|
||||||
|
if (WANT_ERRNO && 2 * ix == 0 && iy >> 63)
|
||||||
|
return __math_divzero (sign_bias);
|
||||||
|
/* Without the barrier some versions of clang hoist the 1/x2 and
|
||||||
|
thus division by zero exception can be signaled spuriously. */
|
||||||
|
return iy >> 63 ? opt_barrier_double (1 / x2) : x2;
|
||||||
|
}
|
||||||
|
/* Here x and y are non-zero finite. */
|
||||||
|
if (ix >> 63)
|
||||||
|
{
|
||||||
|
/* Finite x < 0. */
|
||||||
|
int yint = checkint (iy);
|
||||||
|
if (yint == 0)
|
||||||
|
return __math_invalid (x);
|
||||||
|
if (yint == 1)
|
||||||
|
sign_bias = SIGN_BIAS;
|
||||||
|
ix &= 0x7fffffffffffffff;
|
||||||
|
topx &= 0x7ff;
|
||||||
|
}
|
||||||
|
if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be)
|
||||||
|
{
|
||||||
|
/* Note: sign_bias == 0 here because y is not odd. */
|
||||||
|
if (ix == asuint64 (1.0))
|
||||||
|
return 1.0;
|
||||||
|
if ((topy & 0x7ff) < 0x3be)
|
||||||
|
{
|
||||||
|
/* |y| < 2^-65, x^y ~= 1 + y*log(x). */
|
||||||
|
if (WANT_ROUNDING)
|
||||||
|
return ix > asuint64 (1.0) ? 1.0 + y : 1.0 - y;
|
||||||
|
else
|
||||||
|
return 1.0;
|
||||||
|
}
|
||||||
|
return (ix > asuint64 (1.0)) == (topy < 0x800) ? __math_oflow (0)
|
||||||
|
: __math_uflow (0);
|
||||||
|
}
|
||||||
|
if (topx == 0)
|
||||||
|
{
|
||||||
|
/* Normalize subnormal x so exponent becomes negative. */
|
||||||
|
/* Without the barrier some versions of clang evalutate the mul
|
||||||
|
unconditionally causing spurious overflow exceptions. */
|
||||||
|
ix = asuint64 (opt_barrier_double (x) * 0x1p52);
|
||||||
|
ix &= 0x7fffffffffffffff;
|
||||||
|
ix -= 52ULL << 52;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
double_t lo;
|
double_t lo;
|
||||||
double_t hi = log_inline(ix, &lo);
|
double_t hi = log_inline (ix, &lo);
|
||||||
double_t ehi, elo;
|
double_t ehi, elo;
|
||||||
#if __FP_FAST_FMA
|
#if HAVE_FAST_FMA
|
||||||
ehi = y * hi;
|
ehi = y * hi;
|
||||||
elo = y * lo + __builtin_fma(y, hi, -ehi);
|
elo = y * lo + fma (y, hi, -ehi);
|
||||||
#else
|
#else
|
||||||
double_t yhi = asdouble(iy & -1ULL << 27);
|
double_t yhi = asdouble (iy & -1ULL << 27);
|
||||||
double_t ylo = y - yhi;
|
double_t ylo = y - yhi;
|
||||||
double_t lhi = asdouble(asuint64(hi) & -1ULL << 27);
|
double_t lhi = asdouble (asuint64 (hi) & -1ULL << 27);
|
||||||
double_t llo = hi - lhi + lo;
|
double_t llo = hi - lhi + lo;
|
||||||
ehi = yhi * lhi;
|
ehi = yhi * lhi;
|
||||||
elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25. */
|
elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25. */
|
||||||
#endif
|
#endif
|
||||||
return exp_inline(ehi, elo, sign_bias);
|
return exp_inline (ehi, elo, sign_bias);
|
||||||
}
|
}
|
||||||
|
|
||||||
__weak_reference(pow, __pow_finite);
|
#if USE_GLIBC_ABI
|
||||||
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
|
strong_alias (pow, __pow_finite)
|
||||||
__weak_reference(pow, powl);
|
hidden_alias (pow, __ieee754_pow)
|
||||||
|
# if LDBL_MANT_DIG == 53
|
||||||
|
long double powl (long double x, long double y) { return pow (x, y); }
|
||||||
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ Optimized Routines │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
@ -25,22 +25,16 @@
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||||
│ │
|
│ │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/tinymath/pow_data.internal.h"
|
#include "libc/tinymath/arm.internal.h"
|
||||||
__static_yoink("arm_optimized_routines_notice");
|
__static_yoink("arm_optimized_routines_notice");
|
||||||
|
|
||||||
/*
|
|
||||||
* Data for the log part of pow.
|
|
||||||
*
|
|
||||||
* Copyright (c) 2018, Arm Limited.
|
|
||||||
* SPDX-License-Identifier: MIT
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define N (1 << POW_LOG_TABLE_BITS)
|
#define N (1 << POW_LOG_TABLE_BITS)
|
||||||
|
|
||||||
const struct pow_log_data __pow_log_data = {
|
const struct pow_log_data __pow_log_data = {
|
||||||
.ln2hi = 0x1.62e42fefa3800p-1,
|
.ln2hi = 0x1.62e42fefa3800p-1,
|
||||||
.ln2lo = 0x1.ef35793c76730p-45,
|
.ln2lo = 0x1.ef35793c76730p-45,
|
||||||
.poly = {
|
.poly = {
|
||||||
|
#if N == 128 && POW_LOG_POLY_ORDER == 8
|
||||||
// relative error: 0x1.11922ap-70
|
// relative error: 0x1.11922ap-70
|
||||||
// in -0x1.6bp-8 0x1.6bp-8
|
// in -0x1.6bp-8 0x1.6bp-8
|
||||||
// Coefficients are scaled to match the scaling during evaluation.
|
// Coefficients are scaled to match the scaling during evaluation.
|
||||||
|
@ -51,6 +45,7 @@ const struct pow_log_data __pow_log_data = {
|
||||||
-0x1.555555529a47ap-3 * 4,
|
-0x1.555555529a47ap-3 * 4,
|
||||||
0x1.2495b9b4845e9p-3 * -8,
|
0x1.2495b9b4845e9p-3 * -8,
|
||||||
-0x1.0002b8b263fc3p-3 * -8,
|
-0x1.0002b8b263fc3p-3 * -8,
|
||||||
|
#endif
|
||||||
},
|
},
|
||||||
/* Algorithm:
|
/* Algorithm:
|
||||||
|
|
||||||
|
@ -75,6 +70,7 @@ the last few bits of logc are rounded away so k*ln2hi + logc has no rounding
|
||||||
error and the interval for z is selected such that near x == 1, where log(x)
|
error and the interval for z is selected such that near x == 1, where log(x)
|
||||||
is tiny, large cancellation error is avoided in logc + poly(z/c - 1). */
|
is tiny, large cancellation error is avoided in logc + poly(z/c - 1). */
|
||||||
.tab = {
|
.tab = {
|
||||||
|
#if N == 128
|
||||||
#define A(a, b, c) {a, 0, b, c},
|
#define A(a, b, c) {a, 0, b, c},
|
||||||
A(0x1.6a00000000000p+0, -0x1.62c82f2b9c800p-2, 0x1.ab42428375680p-48)
|
A(0x1.6a00000000000p+0, -0x1.62c82f2b9c800p-2, 0x1.ab42428375680p-48)
|
||||||
A(0x1.6800000000000p+0, -0x1.5d1bdbf580800p-2, -0x1.ca508d8e0f720p-46)
|
A(0x1.6800000000000p+0, -0x1.5d1bdbf580800p-2, -0x1.ca508d8e0f720p-46)
|
||||||
|
@ -204,5 +200,6 @@ A(0x1.7200000000000p-1, 0x1.4c9e09e173000p-2, -0x1.e20891b0ad8a4p-45)
|
||||||
A(0x1.7000000000000p-1, 0x1.522ae0738a000p-2, 0x1.ebe708164c759p-45)
|
A(0x1.7000000000000p-1, 0x1.522ae0738a000p-2, 0x1.ebe708164c759p-45)
|
||||||
A(0x1.6e00000000000p-1, 0x1.57bf753c8d000p-2, 0x1.fadedee5d40efp-46)
|
A(0x1.6e00000000000p-1, 0x1.57bf753c8d000p-2, 0x1.fadedee5d40efp-46)
|
||||||
A(0x1.6c00000000000p-1, 0x1.5d5bddf596000p-2, -0x1.a0b2a08a465dcp-47)
|
A(0x1.6c00000000000p-1, 0x1.5d5bddf596000p-2, -0x1.a0b2a08a465dcp-47)
|
||||||
|
#endif
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
|
@ -1,20 +0,0 @@
|
||||||
#ifndef COSMOPOLITAN_LIBC_TINYMATH_POW_DATA_INTERNAL_H_
|
|
||||||
#define COSMOPOLITAN_LIBC_TINYMATH_POW_DATA_INTERNAL_H_
|
|
||||||
|
|
||||||
#define POW_LOG_TABLE_BITS 7
|
|
||||||
#define POW_LOG_POLY_ORDER 8
|
|
||||||
|
|
||||||
COSMOPOLITAN_C_START_
|
|
||||||
|
|
||||||
extern const struct pow_log_data {
|
|
||||||
double ln2hi;
|
|
||||||
double ln2lo;
|
|
||||||
double poly[POW_LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
|
|
||||||
/* Note: the pad field is unused, but allows slightly faster indexing. */
|
|
||||||
struct {
|
|
||||||
double invc, pad, logc, logctail;
|
|
||||||
} tab[1 << POW_LOG_TABLE_BITS];
|
|
||||||
} __pow_log_data;
|
|
||||||
|
|
||||||
COSMOPOLITAN_C_END_
|
|
||||||
#endif /* COSMOPOLITAN_LIBC_TINYMATH_POW_DATA_INTERNAL_H_ */
|
|
|
@ -1,9 +1,9 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ Optimized Routines │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
@ -25,19 +25,9 @@
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||||
│ │
|
│ │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/intrin/likely.h"
|
#include "libc/tinymath/arm.internal.h"
|
||||||
#include "libc/math.h"
|
|
||||||
#include "libc/tinymath/exp2f_data.internal.h"
|
|
||||||
#include "libc/tinymath/exp_data.internal.h"
|
|
||||||
#include "libc/tinymath/internal.h"
|
|
||||||
#include "libc/tinymath/powf_data.internal.h"
|
|
||||||
__static_yoink("arm_optimized_routines_notice");
|
__static_yoink("arm_optimized_routines_notice");
|
||||||
|
|
||||||
/*
|
|
||||||
* Copyright (c) 2017-2018, Arm Limited.
|
|
||||||
* SPDX-License-Identifier: MIT
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
POWF_LOG2_POLY_ORDER = 5
|
POWF_LOG2_POLY_ORDER = 5
|
||||||
EXP2F_TABLE_BITS = 5
|
EXP2F_TABLE_BITS = 5
|
||||||
|
@ -55,37 +45,39 @@ relerr_exp2: 1.69 * 2^-34 (Relative error of exp2(ylogx).)
|
||||||
|
|
||||||
/* Subnormal input is normalized so ix has negative biased exponent.
|
/* Subnormal input is normalized so ix has negative biased exponent.
|
||||||
Output is multiplied by N (POWF_SCALE) if TOINT_INTRINICS is set. */
|
Output is multiplied by N (POWF_SCALE) if TOINT_INTRINICS is set. */
|
||||||
static inline double_t log2_inline(uint32_t ix)
|
static inline double_t
|
||||||
|
log2_inline (uint32_t ix)
|
||||||
{
|
{
|
||||||
double_t z, r, r2, r4, p, q, y, y0, invc, logc;
|
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
|
||||||
uint32_t iz, top, tmp;
|
double_t z, r, r2, r4, p, q, y, y0, invc, logc;
|
||||||
int k, i;
|
uint32_t iz, top, tmp;
|
||||||
|
int k, i;
|
||||||
|
|
||||||
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
|
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
|
||||||
The range is split into N subintervals.
|
The range is split into N subintervals.
|
||||||
The ith subinterval contains z and c is near its center. */
|
The ith subinterval contains z and c is near its center. */
|
||||||
tmp = ix - OFF;
|
tmp = ix - OFF;
|
||||||
i = (tmp >> (23 - POWF_LOG2_TABLE_BITS)) % N;
|
i = (tmp >> (23 - POWF_LOG2_TABLE_BITS)) % N;
|
||||||
top = tmp & 0xff800000;
|
top = tmp & 0xff800000;
|
||||||
iz = ix - top;
|
iz = ix - top;
|
||||||
k = (int32_t)top >> (23 - POWF_SCALE_BITS); /* arithmetic shift */
|
k = (int32_t) top >> (23 - POWF_SCALE_BITS); /* arithmetic shift */
|
||||||
invc = T[i].invc;
|
invc = T[i].invc;
|
||||||
logc = T[i].logc;
|
logc = T[i].logc;
|
||||||
z = (double_t)asfloat(iz);
|
z = (double_t) asfloat (iz);
|
||||||
|
|
||||||
/* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
|
/* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
|
||||||
r = z * invc - 1;
|
r = z * invc - 1;
|
||||||
y0 = logc + (double_t)k;
|
y0 = logc + (double_t) k;
|
||||||
|
|
||||||
/* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
|
/* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
|
||||||
r2 = r * r;
|
r2 = r * r;
|
||||||
y = A[0] * r + A[1];
|
y = A[0] * r + A[1];
|
||||||
p = A[2] * r + A[3];
|
p = A[2] * r + A[3];
|
||||||
r4 = r2 * r2;
|
r4 = r2 * r2;
|
||||||
q = A[4] * r + y0;
|
q = A[4] * r + y0;
|
||||||
q = p * r2 + q;
|
q = p * r2 + q;
|
||||||
y = y * r4 + q;
|
y = y * r4 + q;
|
||||||
return y;
|
return y;
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef N
|
#undef N
|
||||||
|
@ -97,124 +89,164 @@ static inline double_t log2_inline(uint32_t ix)
|
||||||
/* The output of log2 and thus the input of exp2 is either scaled by N
|
/* The output of log2 and thus the input of exp2 is either scaled by N
|
||||||
(in case of fast toint intrinsics) or not. The unscaled xd must be
|
(in case of fast toint intrinsics) or not. The unscaled xd must be
|
||||||
in [-1021,1023], sign_bias sets the sign of the result. */
|
in [-1021,1023], sign_bias sets the sign of the result. */
|
||||||
static inline float exp2_inline(double_t xd, uint32_t sign_bias)
|
static inline float
|
||||||
|
exp2_inline (double_t xd, uint32_t sign_bias)
|
||||||
{
|
{
|
||||||
uint64_t ki, ski, t;
|
uint64_t ki, ski, t;
|
||||||
double_t kd, z, r, r2, y, s;
|
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
|
||||||
|
double_t kd, z, r, r2, y, s;
|
||||||
|
|
||||||
#if TOINT_INTRINSICS
|
#if TOINT_INTRINSICS
|
||||||
#define C __exp2f_data.poly_scaled
|
# define C __exp2f_data.poly_scaled
|
||||||
/* N*x = k + r with r in [-1/2, 1/2] */
|
/* N*x = k + r with r in [-1/2, 1/2] */
|
||||||
kd = roundtoint(xd); /* k */
|
kd = roundtoint (xd); /* k */
|
||||||
ki = converttoint(xd);
|
ki = converttoint (xd);
|
||||||
#else
|
#else
|
||||||
#define C __exp2f_data.poly
|
# define C __exp2f_data.poly
|
||||||
#define SHIFT __exp2f_data.shift_scaled
|
# define SHIFT __exp2f_data.shift_scaled
|
||||||
/* x = k/N + r with r in [-1/(2N), 1/(2N)] */
|
/* x = k/N + r with r in [-1/(2N), 1/(2N)] */
|
||||||
kd = eval_as_double(xd + SHIFT);
|
kd = eval_as_double (xd + SHIFT);
|
||||||
ki = asuint64(kd);
|
ki = asuint64 (kd);
|
||||||
kd -= SHIFT; /* k/N */
|
kd -= SHIFT; /* k/N */
|
||||||
#endif
|
#endif
|
||||||
r = xd - kd;
|
r = xd - kd;
|
||||||
|
|
||||||
/* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
|
/* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
|
||||||
t = T[ki % N];
|
t = T[ki % N];
|
||||||
ski = ki + sign_bias;
|
ski = ki + sign_bias;
|
||||||
t += ski << (52 - EXP2F_TABLE_BITS);
|
t += ski << (52 - EXP2F_TABLE_BITS);
|
||||||
s = asdouble(t);
|
s = asdouble (t);
|
||||||
z = C[0] * r + C[1];
|
z = C[0] * r + C[1];
|
||||||
r2 = r * r;
|
r2 = r * r;
|
||||||
y = C[2] * r + 1;
|
y = C[2] * r + 1;
|
||||||
y = z * r2 + y;
|
y = z * r2 + y;
|
||||||
y = y * s;
|
y = y * s;
|
||||||
return eval_as_float(y);
|
return eval_as_float (y);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
|
/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
|
||||||
the bit representation of a non-zero finite floating-point value. */
|
the bit representation of a non-zero finite floating-point value. */
|
||||||
static inline int checkint(uint32_t iy)
|
static inline int
|
||||||
|
checkint (uint32_t iy)
|
||||||
{
|
{
|
||||||
int e = iy >> 23 & 0xff;
|
int e = iy >> 23 & 0xff;
|
||||||
if (e < 0x7f)
|
if (e < 0x7f)
|
||||||
return 0;
|
return 0;
|
||||||
if (e > 0x7f + 23)
|
if (e > 0x7f + 23)
|
||||||
return 2;
|
return 2;
|
||||||
if (iy & ((1 << (0x7f + 23 - e)) - 1))
|
if (iy & ((1 << (0x7f + 23 - e)) - 1))
|
||||||
return 0;
|
return 0;
|
||||||
if (iy & (1 << (0x7f + 23 - e)))
|
if (iy & (1 << (0x7f + 23 - e)))
|
||||||
return 1;
|
return 1;
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int zeroinfnan(uint32_t ix)
|
static inline int
|
||||||
|
zeroinfnan (uint32_t ix)
|
||||||
{
|
{
|
||||||
return 2 * ix - 1 >= 2u * 0x7f800000 - 1;
|
return 2 * ix - 1 >= 2u * 0x7f800000 - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns 𝑥^𝑦.
|
* Returns 𝑥^𝑦.
|
||||||
* @note should take ~16ns
|
*
|
||||||
|
* - ULP error: 0.82 (~ 0.5 + relerr*2^24)
|
||||||
|
* - relerr: 1.27 * 2^-26 (Relative error ~= 128*Ln2*relerr_log2 + relerr_exp2)
|
||||||
|
* - relerr_log2: 1.83 * 2^-33 (Relative error of logx.)
|
||||||
|
* - relerr_exp2: 1.69 * 2^-34 (Relative error of exp2(ylogx).)
|
||||||
|
*
|
||||||
|
* @raise ERANGE on overflow or underflow
|
||||||
|
* @raise EDOM if x is negative and y is a finite non-integer
|
||||||
*/
|
*/
|
||||||
float powf(float x, float y)
|
float
|
||||||
|
powf (float x, float y)
|
||||||
{
|
{
|
||||||
uint32_t sign_bias = 0;
|
uint32_t sign_bias = 0;
|
||||||
uint32_t ix, iy;
|
uint32_t ix, iy;
|
||||||
|
|
||||||
ix = asuint(x);
|
ix = asuint (x);
|
||||||
iy = asuint(y);
|
iy = asuint (y);
|
||||||
if (UNLIKELY(ix - 0x00800000 >= 0x7f800000 - 0x00800000 ||
|
if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000 || zeroinfnan (iy)))
|
||||||
zeroinfnan(iy))) {
|
{
|
||||||
/* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan). */
|
/* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan). */
|
||||||
if (UNLIKELY(zeroinfnan(iy))) {
|
if (unlikely (zeroinfnan (iy)))
|
||||||
if (2 * iy == 0)
|
{
|
||||||
return issignalingf_inline(x) ? x + y : 1.0f;
|
if (2 * iy == 0)
|
||||||
if (ix == 0x3f800000)
|
return issignalingf_inline (x) ? x + y : 1.0f;
|
||||||
return issignalingf_inline(y) ? x + y : 1.0f;
|
if (ix == 0x3f800000)
|
||||||
if (2 * ix > 2u * 0x7f800000 ||
|
return issignalingf_inline (y) ? x + y : 1.0f;
|
||||||
2 * iy > 2u * 0x7f800000)
|
if (2 * ix > 2u * 0x7f800000 || 2 * iy > 2u * 0x7f800000)
|
||||||
return x + y;
|
return x + y;
|
||||||
if (2 * ix == 2 * 0x3f800000)
|
if (2 * ix == 2 * 0x3f800000)
|
||||||
return 1.0f;
|
return 1.0f;
|
||||||
if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
|
if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
|
||||||
return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
|
return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
|
||||||
return y * y;
|
return y * y;
|
||||||
}
|
|
||||||
if (UNLIKELY(zeroinfnan(ix))) {
|
|
||||||
float_t x2 = x * x;
|
|
||||||
if (ix & 0x80000000 && checkint(iy) == 1)
|
|
||||||
x2 = -x2;
|
|
||||||
/* Without the barrier some versions of clang hoist the 1/x2 and
|
|
||||||
thus division by zero exception can be signaled spuriously. */
|
|
||||||
return iy & 0x80000000 ? fp_barrierf(1 / x2) : x2;
|
|
||||||
}
|
|
||||||
/* x and y are non-zero finite. */
|
|
||||||
if (ix & 0x80000000) {
|
|
||||||
/* Finite x < 0. */
|
|
||||||
int yint = checkint(iy);
|
|
||||||
if (yint == 0)
|
|
||||||
return __math_invalidf(x);
|
|
||||||
if (yint == 1)
|
|
||||||
sign_bias = SIGN_BIAS;
|
|
||||||
ix &= 0x7fffffff;
|
|
||||||
}
|
|
||||||
if (ix < 0x00800000) {
|
|
||||||
/* Normalize subnormal x so exponent becomes negative. */
|
|
||||||
ix = asuint(x * 0x1p23f);
|
|
||||||
ix &= 0x7fffffff;
|
|
||||||
ix -= 23 << 23;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
double_t logx = log2_inline(ix);
|
if (unlikely (zeroinfnan (ix)))
|
||||||
double_t ylogx = y * logx; /* cannot overflow, y is single prec. */
|
{
|
||||||
if (UNLIKELY((asuint64(ylogx) >> 47 & 0xffff) >=
|
float_t x2 = x * x;
|
||||||
asuint64(126.0 * POWF_SCALE) >> 47)) {
|
if (ix & 0x80000000 && checkint (iy) == 1)
|
||||||
/* |y*log(x)| >= 126. */
|
{
|
||||||
if (ylogx > 0x1.fffffffd1d571p+6 * POWF_SCALE)
|
x2 = -x2;
|
||||||
return __math_oflowf(sign_bias);
|
sign_bias = 1;
|
||||||
if (ylogx <= -150.0 * POWF_SCALE)
|
}
|
||||||
return __math_uflowf(sign_bias);
|
#if WANT_ERRNO
|
||||||
|
if (2 * ix == 0 && iy & 0x80000000)
|
||||||
|
return __math_divzerof (sign_bias);
|
||||||
|
#endif
|
||||||
|
/* Without the barrier some versions of clang hoist the 1/x2 and
|
||||||
|
thus division by zero exception can be signaled spuriously. */
|
||||||
|
return iy & 0x80000000 ? opt_barrier_float (1 / x2) : x2;
|
||||||
}
|
}
|
||||||
return exp2_inline(ylogx, sign_bias);
|
/* x and y are non-zero finite. */
|
||||||
|
if (ix & 0x80000000)
|
||||||
|
{
|
||||||
|
/* Finite x < 0. */
|
||||||
|
int yint = checkint (iy);
|
||||||
|
if (yint == 0)
|
||||||
|
return __math_invalidf (x);
|
||||||
|
if (yint == 1)
|
||||||
|
sign_bias = SIGN_BIAS;
|
||||||
|
ix &= 0x7fffffff;
|
||||||
|
}
|
||||||
|
if (ix < 0x00800000)
|
||||||
|
{
|
||||||
|
/* Normalize subnormal x so exponent becomes negative. */
|
||||||
|
ix = asuint (x * 0x1p23f);
|
||||||
|
ix &= 0x7fffffff;
|
||||||
|
ix -= 23 << 23;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
double_t logx = log2_inline (ix);
|
||||||
|
double_t ylogx = y * logx; /* Note: cannot overflow, y is single prec. */
|
||||||
|
if (unlikely ((asuint64 (ylogx) >> 47 & 0xffff)
|
||||||
|
>= asuint64 (126.0 * POWF_SCALE) >> 47))
|
||||||
|
{
|
||||||
|
/* |y*log(x)| >= 126. */
|
||||||
|
if (ylogx > 0x1.fffffffd1d571p+6 * POWF_SCALE)
|
||||||
|
/* |x^y| > 0x1.ffffffp127. */
|
||||||
|
return __math_oflowf (sign_bias);
|
||||||
|
if (WANT_ROUNDING && WANT_ERRNO
|
||||||
|
&& ylogx > 0x1.fffffffa3aae2p+6 * POWF_SCALE)
|
||||||
|
/* |x^y| > 0x1.fffffep127, check if we round away from 0. */
|
||||||
|
if ((!sign_bias
|
||||||
|
&& eval_as_float (1.0f + opt_barrier_float (0x1p-25f)) != 1.0f)
|
||||||
|
|| (sign_bias
|
||||||
|
&& eval_as_float (-1.0f - opt_barrier_float (0x1p-25f))
|
||||||
|
!= -1.0f))
|
||||||
|
return __math_oflowf (sign_bias);
|
||||||
|
if (ylogx <= -150.0 * POWF_SCALE)
|
||||||
|
return __math_uflowf (sign_bias);
|
||||||
|
#if WANT_ERRNO_UFLOW
|
||||||
|
if (ylogx < -149.0 * POWF_SCALE)
|
||||||
|
return __math_may_uflowf (sign_bias);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
return exp2_inline (ylogx, sign_bias);
|
||||||
}
|
}
|
||||||
|
|
||||||
__weak_reference(powf, __powf_finite);
|
#if USE_GLIBC_ABI
|
||||||
|
strong_alias (powf, __powf_finite)
|
||||||
|
hidden_alias (powf, __ieee754_powf)
|
||||||
|
#endif
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||||
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ Optimized Routines │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
@ -25,16 +25,9 @@
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||||
│ │
|
│ │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/tinymath/powf_data.internal.h"
|
#include "libc/tinymath/arm.internal.h"
|
||||||
__static_yoink("arm_optimized_routines_notice");
|
__static_yoink("arm_optimized_routines_notice");
|
||||||
|
|
||||||
/*
|
|
||||||
* Data definition for powf.
|
|
||||||
*
|
|
||||||
* Copyright (c) 2017-2018, Arm Limited.
|
|
||||||
* SPDX-License-Identifier: MIT
|
|
||||||
*/
|
|
||||||
|
|
||||||
const struct powf_log2_data __powf_log2_data = {
|
const struct powf_log2_data __powf_log2_data = {
|
||||||
.tab = {
|
.tab = {
|
||||||
{ 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 * POWF_SCALE },
|
{ 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 * POWF_SCALE },
|
||||||
|
|
|
@ -1,23 +0,0 @@
|
||||||
#ifndef COSMOPOLITAN_LIBC_TINYMATH_POWF_DATA_INTERNAL_H_
|
|
||||||
#define COSMOPOLITAN_LIBC_TINYMATH_POWF_DATA_INTERNAL_H_
|
|
||||||
|
|
||||||
#define POWF_LOG2_TABLE_BITS 4
|
|
||||||
#define POWF_LOG2_POLY_ORDER 5
|
|
||||||
#if TOINT_INTRINSICS
|
|
||||||
#define POWF_SCALE_BITS EXP2F_TABLE_BITS
|
|
||||||
#else
|
|
||||||
#define POWF_SCALE_BITS 0
|
|
||||||
#endif
|
|
||||||
#define POWF_SCALE ((double)(1 << POWF_SCALE_BITS))
|
|
||||||
|
|
||||||
COSMOPOLITAN_C_START_
|
|
||||||
|
|
||||||
extern const struct powf_log2_data {
|
|
||||||
struct {
|
|
||||||
double invc, logc;
|
|
||||||
} tab[1 << POWF_LOG2_TABLE_BITS];
|
|
||||||
double poly[POWF_LOG2_POLY_ORDER];
|
|
||||||
} __powf_log2_data;
|
|
||||||
|
|
||||||
COSMOPOLITAN_C_END_
|
|
||||||
#endif /* COSMOPOLITAN_LIBC_TINYMATH_POWF_DATA_INTERNAL_H_ */
|
|
|
@ -1,120 +1,42 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
||||||
│ vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8 :vi │
|
│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │
|
||||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
|
||||||
│ │
|
│ │
|
||||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
│ OpenBSD /usr/src/lib/libm/src/ld80/e_powl.c │
|
||||||
│ any purpose with or without fee is hereby granted, provided that the │
|
│ /usr/src/lib/libm/src/ld128/e_powl.c │
|
||||||
│ above copyright notice and this permission notice appear in all copies. │
|
│ │
|
||||||
|
│ Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net> │
|
||||||
|
│ │
|
||||||
|
│ Permission to use, copy, modify, and distribute this software for any │
|
||||||
|
│ purpose with or without fee is hereby granted, provided that the above │
|
||||||
|
│ copyright notice and this permission notice appear in all copies. │
|
||||||
|
│ │
|
||||||
|
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES │
|
||||||
|
│ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF │
|
||||||
|
│ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR │
|
||||||
|
│ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES │
|
||||||
|
│ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN │
|
||||||
|
│ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF │
|
||||||
|
│ │
|
||||||
|
│ Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. │
|
||||||
|
│ │
|
||||||
|
│ Developed at SunPro, a Sun Microsystems, Inc. business. │
|
||||||
|
│ Permission to use, copy, modify, and distribute this │
|
||||||
|
│ software is freely granted, provided that this notice │
|
||||||
|
│ is preserved. │
|
||||||
│ │
|
│ │
|
||||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
|
||||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
|
||||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
|
||||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
|
||||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
|
||||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
|
||||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
|
||||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/errno.h"
|
#include "libc/errno.h"
|
||||||
#include "libc/math.h"
|
#include "libc/math.h"
|
||||||
#include "libc/tinymath/internal.h"
|
#include "libc/tinymath/internal.h"
|
||||||
#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
|
#include "libc/tinymath/freebsd.internal.h"
|
||||||
|
|
||||||
#ifdef __x86_64__
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns 𝑥^𝑦.
|
|
||||||
* @note should take ~56ns
|
|
||||||
*/
|
|
||||||
long double powl(long double x, long double y) {
|
|
||||||
long double t, u;
|
|
||||||
if (!isunordered(x, y)) {
|
|
||||||
if (!isinf(y)) {
|
|
||||||
if (!isinf(x)) {
|
|
||||||
if (x) {
|
|
||||||
if (y) {
|
|
||||||
if (x < 0 && y != truncl(y)) {
|
|
||||||
#ifndef __NO_MATH_ERRNO__
|
|
||||||
errno = EDOM;
|
|
||||||
#endif
|
|
||||||
return NAN;
|
|
||||||
}
|
|
||||||
asm("fyl2x" : "=t"(u) : "0"(fabsl(x)), "u"(y) : "st(1)");
|
|
||||||
asm("fprem" : "=t"(t) : "0"(u), "u"(1.L));
|
|
||||||
asm("f2xm1" : "=t"(t) : "0"(t));
|
|
||||||
asm("fscale" : "=t"(t) : "0"(t + 1), "u"(u));
|
|
||||||
if (signbit(x)) {
|
|
||||||
if (y != truncl(y)) return -NAN;
|
|
||||||
if ((int64_t)y & 1) t = -t;
|
|
||||||
}
|
|
||||||
return t;
|
|
||||||
} else {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
} else if (y > 0) {
|
|
||||||
if (signbit(x) && y == truncl(y) && ((int64_t)y & 1)) {
|
|
||||||
return -0.;
|
|
||||||
} else {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
} else if (!y) {
|
|
||||||
return 1;
|
|
||||||
} else {
|
|
||||||
#ifndef __NO_MATH_ERRNO__
|
|
||||||
errno = ERANGE;
|
|
||||||
#endif
|
|
||||||
if (y == truncl(y) && ((int64_t)y & 1)) {
|
|
||||||
return copysignl(INFINITY, x);
|
|
||||||
} else {
|
|
||||||
return INFINITY;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (signbit(x)) {
|
|
||||||
if (!y) return 1;
|
|
||||||
x = y < 0 ? 0 : INFINITY;
|
|
||||||
if (y == truncl(y) && ((int64_t)y & 1)) x = -x;
|
|
||||||
return x;
|
|
||||||
} else if (y < 0) {
|
|
||||||
return 0;
|
|
||||||
} else if (y > 0) {
|
|
||||||
return INFINITY;
|
|
||||||
} else {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
x = fabsl(x);
|
|
||||||
if (x < 1) return signbit(y) ? INFINITY : 0;
|
|
||||||
if (x > 1) return signbit(y) ? 0 : INFINITY;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
} else if (!y || x == 1) {
|
|
||||||
return 1;
|
|
||||||
} else {
|
|
||||||
return NAN;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
|
|
||||||
__static_yoink("musl_libc_notice");
|
|
||||||
__static_yoink("openbsd_libm_notice");
|
__static_yoink("openbsd_libm_notice");
|
||||||
|
__static_yoink("musl_libc_notice");
|
||||||
|
__static_yoink("fdlibm_notice");
|
||||||
|
|
||||||
|
#if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
|
||||||
|
|
||||||
/* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_powl.c */
|
|
||||||
/*
|
|
||||||
* Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
|
|
||||||
*
|
|
||||||
* Permission to use, copy, modify, and distribute this software for any
|
|
||||||
* purpose with or without fee is hereby granted, provided that the above
|
|
||||||
* copyright notice and this permission notice appear in all copies.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
||||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
||||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
||||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
||||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
||||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
||||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
||||||
*/
|
|
||||||
/* powl.c
|
/* powl.c
|
||||||
*
|
*
|
||||||
* Power function, long double precision
|
* Power function, long double precision
|
||||||
|
@ -606,35 +528,9 @@ static long double powil(long double x, int nn)
|
||||||
return y;
|
return y;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__weak_reference(powl, __powl_finite);
|
||||||
|
|
||||||
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
|
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
|
||||||
#include "libc/tinymath/freebsd.internal.h"
|
|
||||||
|
|
||||||
/*-
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
|
|
||||||
*
|
|
||||||
* Permission to use, copy, modify, and distribute this software for any
|
|
||||||
* purpose with or without fee is hereby granted, provided that the above
|
|
||||||
* copyright notice and this permission notice appear in all copies.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
||||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
||||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
||||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
||||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
||||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
||||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* powl(x,y) return x**y
|
/* powl(x,y) return x**y
|
||||||
*
|
*
|
||||||
|
@ -1045,8 +941,6 @@ powl(long double x, long double y)
|
||||||
return s * z;
|
return s * z;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* __x86_64__ */
|
|
||||||
|
|
||||||
__weak_reference(powl, __powl_finite);
|
__weak_reference(powl, __powl_finite);
|
||||||
|
|
||||||
#endif /* long double is long */
|
#endif /* __x86_64__ */
|
||||||
|
|
|
@ -30,7 +30,6 @@
|
||||||
#include "libc/tinymath/internal.h"
|
#include "libc/tinymath/internal.h"
|
||||||
__static_yoink("musl_libc_notice");
|
__static_yoink("musl_libc_notice");
|
||||||
|
|
||||||
|
|
||||||
#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
|
#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
|
||||||
#define EPS DBL_EPSILON
|
#define EPS DBL_EPSILON
|
||||||
#elif FLT_EVAL_METHOD==2
|
#elif FLT_EVAL_METHOD==2
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||||
│ vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8 :vi │
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||||
│ │
|
│ │
|
||||||
│ Optimized Routines │
|
│ Optimized Routines │
|
||||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
│ Copyright (c) 2018-2024, Arm Limited. │
|
||||||
│ │
|
│ │
|
||||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||||
│ a copy of this software and associated documentation files (the │
|
│ a copy of this software and associated documentation files (the │
|
||||||
|
@ -25,15 +25,19 @@
|
||||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||||
│ │
|
│ │
|
||||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||||
#include "libc/intrin/likely.h"
|
|
||||||
#include "libc/math.h"
|
|
||||||
#include "libc/tinymath/sincosf.internal.h"
|
#include "libc/tinymath/sincosf.internal.h"
|
||||||
__static_yoink("arm_optimized_routines_notice");
|
__static_yoink("arm_optimized_routines_notice");
|
||||||
|
|
||||||
/* Fast sincosf implementation. Worst-case ULP is 0.5607, maximum relative
|
/**
|
||||||
error is 0.5303 * 2^-23. A single-step range reduction is used for
|
* Returns sine and cosine of y.
|
||||||
small values. Large inputs have their range reduced using fast integer
|
*
|
||||||
arithmetic. */
|
* This is a fast sincosf implementation. Worst-case ULP is 0.5607,
|
||||||
|
* maximum relative error is 0.5303 * 2^-23. A single-step range
|
||||||
|
* reduction is used for small values. Large inputs have their range
|
||||||
|
* reduced using fast integer arithmetic.
|
||||||
|
*
|
||||||
|
* @raise EDOM if y is an infinity
|
||||||
|
*/
|
||||||
void
|
void
|
||||||
sincosf (float y, float *sinp, float *cosp)
|
sincosf (float y, float *sinp, float *cosp)
|
||||||
{
|
{
|
||||||
|
@ -46,11 +50,11 @@ sincosf (float y, float *sinp, float *cosp)
|
||||||
{
|
{
|
||||||
double x2 = x * x;
|
double x2 = x * x;
|
||||||
|
|
||||||
if (UNLIKELY (abstop12 (y) < abstop12 (0x1p-12f)))
|
if (unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
|
||||||
{
|
{
|
||||||
if (UNLIKELY (abstop12 (y) < abstop12 (0x1p-126f)))
|
if (unlikely (abstop12 (y) < abstop12 (0x1p-126f)))
|
||||||
/* Force underflow for tiny y. */
|
/* Force underflow for tiny y. */
|
||||||
FORCE_EVAL (x2);
|
force_eval_float (x2);
|
||||||
*sinp = y;
|
*sinp = y;
|
||||||
*cosp = 1.0f;
|
*cosp = 1.0f;
|
||||||
return;
|
return;
|
||||||
|
@ -70,7 +74,7 @@ sincosf (float y, float *sinp, float *cosp)
|
||||||
|
|
||||||
sincosf_poly (x * s, x * x, p, n, sinp, cosp);
|
sincosf_poly (x * s, x * x, p, n, sinp, cosp);
|
||||||
}
|
}
|
||||||
else if (LIKELY (abstop12 (y) < abstop12 (INFINITY)))
|
else if (likely (abstop12 (y) < abstop12 (INFINITY)))
|
||||||
{
|
{
|
||||||
uint32_t xi = asuint (y);
|
uint32_t xi = asuint (y);
|
||||||
int sign = xi >> 31;
|
int sign = xi >> 31;
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue