Port a lot more code to AARCH64

- Introduce epoll_pwait()
- Rewrite -ftrapv and ffs() libraries in C code
- Use more FreeBSD code in math function library
- Get significantly more tests passing on qemu-aarch64
- Fix many Musl long double functions that were broken on AARCH64
This commit is contained in:
Justine Tunney 2023-05-14 09:32:15 -07:00
parent 91791e9f38
commit 550b52abf6
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
158 changed files with 6018 additions and 3499 deletions

View file

@ -2,60 +2,108 @@
vi: set et ft=c ts=8 tw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
FreeBSD lib/msun/src/e_acoshl.c
Converted to ldbl by David Schultz <das@FreeBSD.ORG> and Bruce D. Evans.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
Copyright (c) 1992-2023 The FreeBSD Project.
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
Developed at SunPro, a Sun Microsystems, Inc. business.
Permission to use, copy, modify, and distribute this
software is freely granted, provided that this notice
is preserved.
*/
#include "libc/math.h"
#include "libc/tinymath/ldshape.internal.h"
#include "libc/tinymath/freebsd.internal.h"
asm(".ident\t\"\\n\\n\
Musl libc (MIT License)\\n\
Copyright 2005-2014 Rich Felker, et. al.\"");
FreeBSD libm (BSD-2 License)\\n\
Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\"");
asm(".ident\t\"\\n\\n\
fdlibm (fdlibm license)\\n\
Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
asm(".include \"libc/disclaimer.inc\"");
// clang-format off
/* EXP_LARGE is the threshold above which we use acosh(x) ~= log(2x). */
#if LDBL_MANT_DIG == 64
#define EXP_LARGE 34
#elif LDBL_MANT_DIG == 113
#define EXP_LARGE 58
#else
#error "Unsupported long double format"
#endif
#if LDBL_MAX_EXP != 0x4000
/* We also require the usual expsign encoding. */
#error "Unsupported long double format"
#endif
#define BIAS (LDBL_MAX_EXP - 1)
static const double
one = 1.0;
#if LDBL_MANT_DIG == 64
static const union IEEEl2bits
u_ln2 = LD80C(0xb17217f7d1cf79ac, -1, 6.93147180559945309417e-1L);
#define ln2 u_ln2.e
#elif LDBL_MANT_DIG == 113
static const long double
ln2 = 6.93147180559945309417232121458176568e-1L; /* 0x162e42fefa39ef35793c7673007e6.0p-113 */
#else
#error "Unsupported long double format"
#endif
/**
* Returns inverse hyperbolic cosine of 𝑥.
* @define acosh(x) = log(x + sqrt(x*x-1))
*/
long double acoshl(long double x) {
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
return acosh(x);
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
union ldshape u = {x};
int e = u.i.se & 0x7fff;
long double
acoshl(long double x)
{
long double t;
int16_t hx;
if (e < 0x3fff + 1)
/* |x| < 2, invalid if x < 1 or nan */
return log1pl(x-1 + sqrtl((x-1)*(x-1)+2*(x-1)));
if (e < 0x3fff + 32)
/* |x| < 0x1p32 */
return logl(2*x - 1/(x+sqrtl(x*x-1)));
return logl(x) + 0.693147180559945309417232121458176568L;
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
// TODO: broken implementation to make things compile
return acosh(x);
#else
#error "architecture unsupported"
#endif
ENTERI();
GET_LDBL_EXPSIGN(hx, x);
if (hx < 0x3fff) { /* x < 1, or misnormal */
RETURNI((x-x)/(x-x));
} else if (hx >= BIAS + EXP_LARGE) { /* x >= LARGE */
if (hx >= 0x7fff) { /* x is inf, NaN or misnormal */
RETURNI(x+x);
} else {
RETURNI(logl(x)+ln2); /* acosh(huge)=log(2x), or misnormal */
}
} else if (hx == 0x3fff && x == 1) {
RETURNI(0.0); /* acosh(1) = 0 */
} else if (hx >= 0x4000) { /* LARGE > x >= 2, or misnormal */
t=x*x;
RETURNI(logl(2.0*x-one/(x+sqrtl(t-one))));
} else { /* 1<x<2 */
t = x-one;
RETURNI(log1pl(t+sqrtl(2.0*t+t*t)));
}
}

View file

@ -32,7 +32,7 @@ asm(".ident\t\"\\n\\n\
Musl libc (MIT License)\\n\
Copyright 2005-2014 Rich Felker, et. al.\"");
asm(".include \"libc/disclaimer.inc\"");
/* clang-format off */
// clang-format off
/**
* Returns inverse hyperbolic sine of 𝑥.

View file

@ -2,74 +2,110 @@
vi: set et ft=c ts=8 tw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
FreeBSD lib/msun/src/s_asinhl.c
Converted to ldbl by David Schultz <das@FreeBSD.ORG> and Bruce D. Evans.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
Copyright (c) 1992-2023 The FreeBSD Project.
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
Developed at SunPro, a Sun Microsystems, Inc. business.
Permission to use, copy, modify, and distribute this
software is freely granted, provided that this notice
is preserved.
*/
#include "libc/math.h"
#include "libc/tinymath/feval.internal.h"
#include "libc/tinymath/internal.h"
#include "libc/tinymath/ldshape.internal.h"
#include "libc/tinymath/freebsd.internal.h"
asm(".ident\t\"\\n\\n\
Musl libc (MIT License)\\n\
Copyright 2005-2014 Rich Felker, et. al.\"");
FreeBSD libm (BSD-2 License)\\n\
Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\"");
asm(".ident\t\"\\n\\n\
fdlibm (fdlibm license)\\n\
Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
asm(".include \"libc/disclaimer.inc\"");
// clang-format off
/* EXP_LARGE is the threshold above which we use asinh(x) ~= log(2x). */
/* EXP_TINY is the threshold below which we use asinh(x) ~= x. */
#if LDBL_MANT_DIG == 64
#define EXP_LARGE 34
#define EXP_TINY -34
#elif LDBL_MANT_DIG == 113
#define EXP_LARGE 58
#define EXP_TINY -58
#else
#error "Unsupported long double format"
#endif
#if LDBL_MAX_EXP != 0x4000
/* We also require the usual expsign encoding. */
#error "Unsupported long double format"
#endif
#define BIAS (LDBL_MAX_EXP - 1)
static const double
one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
huge= 1.00000000000000000000e+300;
#if LDBL_MANT_DIG == 64
static const union IEEEl2bits
u_ln2 = LD80C(0xb17217f7d1cf79ac, -1, 6.93147180559945309417e-1L);
#define ln2 u_ln2.e
#elif LDBL_MANT_DIG == 113
static const long double
ln2 = 6.93147180559945309417232121458176568e-1L; /* 0x162e42fefa39ef35793c7673007e6.0p-113 */
#else
#error "Unsupported long double format"
#endif
/**
* Returns inverse hyperbolic sine of 𝑥.
* @define asinh(x) = sign(x)*log(|x|+sqrt(x*x+1)) ~= x - x^3/6 + o(x^5)
*/
long double asinhl(long double x) {
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
return asinh(x);
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
union ldshape u = {x};
unsigned e = u.i.se & 0x7fff;
unsigned s = u.i.se >> 15;
long double
asinhl(long double x)
{
long double t, w;
uint16_t hx, ix;
/* |x| */
u.i.se = e;
x = u.f;
if (e >= 0x3fff + 32) {
/* |x| >= 0x1p32 or inf or nan */
x = logl(x) + 0.693147180559945309417232121458176568L;
} else if (e >= 0x3fff + 1) {
/* |x| >= 2 */
x = logl(2*x + 1/(sqrtl(x*x+1)+x));
} else if (e >= 0x3fff - 32) {
/* |x| >= 0x1p-32 */
x = log1pl(x + x*x/(sqrtl(x*x+1)+1));
} else {
/* |x| < 0x1p-32, raise inexact if x!=0 */
FORCE_EVAL(x + 0x1p120f);
ENTERI();
GET_LDBL_EXPSIGN(hx, x);
ix = hx & 0x7fff;
if (ix >= 0x7fff) RETURNI(x+x); /* x is inf, NaN or misnormal */
if (ix < BIAS + EXP_TINY) { /* |x| < TINY, or misnormal */
if (huge + x > one) RETURNI(x); /* return x inexact except 0 */
}
return s ? -x : x;
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
// TODO: broken implementation to make things compile
return asinh(x);
#else
#error "architecture unsupported"
#endif
if (ix >= BIAS + EXP_LARGE) { /* |x| >= LARGE, or misnormal */
w = logl(fabsl(x))+ln2;
} else if (ix >= 0x4000) { /* LARGE > |x| >= 2.0, or misnormal */
t = fabsl(x);
w = logl(2.0*t+one/(sqrtl(x*x+one)+t));
} else { /* 2.0 > |x| >= TINY, or misnormal */
t = x*x;
w =log1pl(fabsl(x)+t/(one+sqrtl(one+t)));
}
RETURNI((hx & 0x8000) == 0 ? w : -w);
}

View file

@ -1,174 +1,154 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=8 tw=8 fenc=utf-8 :vi
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
FreeBSD lib/msun/src/e_atan2.c
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
Copyright (c) 1992-2023 The FreeBSD Project.
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
Developed at SunPro, a Sun Microsystems, Inc. business.
Permission to use, copy, modify, and distribute this
software is freely granted, provided that this notice
is preserved.
*/
#include "libc/intrin/likely.h"
#include "libc/math.h"
#include "libc/tinymath/atan_common.internal.h"
#include "libc/tinymath/internal.h"
#include "libc/tinymath/freebsd.internal.h"
asm(".ident\t\"\\n\\n\
Optimized Routines (MIT License)\\n\
Copyright 2022 ARM Limited\"");
FreeBSD libm (BSD-2 License)\\n\
Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\"");
asm(".ident\t\"\\n\\n\
fdlibm (fdlibm license)\\n\
Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
asm(".include \"libc/disclaimer.inc\"");
/* clang-format off */
// clang-format off
#define Pi (0x1.921fb54442d18p+1)
#define PiOver2 (0x1.921fb54442d18p+0)
#define PiOver4 (0x1.921fb54442d18p-1)
#define SignMask (0x8000000000000000)
#define ExpMask (0x7ff0000000000000)
/* atan2(y,x)
* Method :
* 1. Reduce y to positive by atan2(y,x)=-atan2(-y,x).
* 2. Reduce x to positive by (if x and y are unexceptional):
* ARG (x+iy) = arctan(y/x) ... if x > 0,
* ARG (x+iy) = pi - arctan[y/(-x)] ... if x < 0,
*
* Special cases:
*
* ATAN2((anything), NaN ) is NaN;
* ATAN2(NAN , (anything) ) is NaN;
* ATAN2(+-0, +(anything but NaN)) is +-0 ;
* ATAN2(+-0, -(anything but NaN)) is +-pi ;
* ATAN2(+-(anything but 0 and NaN), 0) is +-pi/2;
* ATAN2(+-(anything but INF and NaN), +INF) is +-0 ;
* ATAN2(+-(anything but INF and NaN), -INF) is +-pi;
* ATAN2(+-INF,+INF ) is +-pi/4 ;
* ATAN2(+-INF,-INF ) is +-3pi/4;
* ATAN2(+-INF, (anything but,0,NaN, and INF)) is +-pi/2;
*
* Constants:
* The hexadecimal values are the intended ones for the following
* constants. The decimal values may be used, provided that the
* compiler will convert from decimal to binary accurately enough
* to produce the hexadecimal values shown.
*/
/* We calculate atan2 by P(n/d), where n and d are similar to the input
arguments, and P is a polynomial. Evaluating P(x) requires calculating x^8,
which may underflow if n and d have very different magnitude.
POW8_EXP_UFLOW_BOUND is the lower bound of the difference in exponents of n
and d for which P underflows, and is used to special-case such inputs. */
#define POW8_EXP_UFLOW_BOUND 62
static volatile double
tiny = 1.0e-300;
static const double
zero = 0.0,
pi_o_4 = 7.8539816339744827900E-01, /* 0x3FE921FB, 0x54442D18 */
pi_o_2 = 1.5707963267948965580E+00, /* 0x3FF921FB, 0x54442D18 */
pi = 3.1415926535897931160E+00; /* 0x400921FB, 0x54442D18 */
static volatile double
pi_lo = 1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */
static inline int64_t
biased_exponent (double f)
{
uint64_t fi = asuint64 (f);
return (fi & ExpMask) >> 52;
}
/* Fast implementation of scalar atan2. Largest errors are when y and x are
close together. The greatest observed error is 2.28 ULP:
atan2(-0x1.5915b1498e82fp+732, 0x1.54d11ef838826p+732)
got -0x1.954f42f1fa841p-1 want -0x1.954f42f1fa843p-1. */
/**
* Returns arc tangent of 𝑦/𝑥.
*/
double
atan2 (double y, double x)
atan2(double y, double x)
{
uint64_t ix = asuint64 (x);
uint64_t iy = asuint64 (y);
double z;
int32_t k,m,hx,hy,ix,iy;
uint32_t lx,ly;
uint64_t sign_x = ix & SignMask;
uint64_t sign_y = iy & SignMask;
EXTRACT_WORDS(hx,lx,x);
ix = hx&0x7fffffff;
EXTRACT_WORDS(hy,ly,y);
iy = hy&0x7fffffff;
if(((ix|((lx|-lx)>>31))>0x7ff00000)||
((iy|((ly|-ly)>>31))>0x7ff00000)) /* x or y is NaN */
return nan_mix(x, y);
if(hx==0x3ff00000&&lx==0) return atan(y); /* x=1.0 */
m = ((hy>>31)&1)|((hx>>30)&2); /* 2*sign(x)+sign(y) */
uint64_t iax = ix & ~SignMask;
uint64_t iay = iy & ~SignMask;
bool xisnan = isnan (x);
if (UNLIKELY (isnan (y) && !xisnan))
return __math_invalid (y);
if (UNLIKELY (xisnan))
return __math_invalid (x);
/* m = 2 * sign(x) + sign(y). */
uint32_t m = ((iy >> 63) & 1) | ((ix >> 62) & 2);
int64_t exp_diff = biased_exponent (x) - biased_exponent (y);
/* y = 0. */
if (iay == 0)
{
switch (m)
{
case 0:
case 1:
return y; /* atan(+-0,+anything)=+-0. */
case 2:
return Pi; /* atan(+0,-anything) = pi. */
case 3:
return -Pi; /* atan(-0,-anything) =-pi. */
}
}
/* Special case for (x, y) either on or very close to the y axis. Either x =
0, or y is much larger than x (difference in exponents >=
POW8_EXP_UFLOW_BOUND). */
if (UNLIKELY (iax == 0 || exp_diff <= -POW8_EXP_UFLOW_BOUND))
return sign_y ? -PiOver2 : PiOver2;
/* Special case for either x is INF or (x, y) is very close to x axis and x is
negative. */
if (UNLIKELY (iax == 0x7ff0000000000000
|| (exp_diff >= POW8_EXP_UFLOW_BOUND && m >= 2)))
{
if (iay == 0x7ff0000000000000)
{
switch (m)
{
case 0:
return PiOver4; /* atan(+INF,+INF). */
case 1:
return -PiOver4; /* atan(-INF,+INF). */
case 2:
return 3.0 * PiOver4; /* atan(+INF,-INF). */
case 3:
return -3.0 * PiOver4; /* atan(-INF,-INF). */
/* when y = 0 */
if((iy|ly)==0) {
switch(m) {
case 0:
case 1: return y; /* atan(+-0,+anything)=+-0 */
case 2: return pi+tiny;/* atan(+0,-anything) = pi */
case 3: return -pi-tiny;/* atan(-0,-anything) =-pi */
}
}
else
{
switch (m)
{
case 0:
return 0.0; /* atan(+...,+INF). */
case 1:
return -0.0; /* atan(-...,+INF). */
case 2:
return Pi; /* atan(+...,-INF). */
case 3:
return -Pi; /* atan(-...,-INF). */
/* when x = 0 */
if((ix|lx)==0) return (hy<0)? -pi_o_2-tiny: pi_o_2+tiny;
/* when x is INF */
if(ix==0x7ff00000) {
if(iy==0x7ff00000) {
switch(m) {
case 0: return pi_o_4+tiny;/* atan(+INF,+INF) */
case 1: return -pi_o_4-tiny;/* atan(-INF,+INF) */
case 2: return 3.0*pi_o_4+tiny;/*atan(+INF,-INF)*/
case 3: return -3.0*pi_o_4-tiny;/*atan(-INF,-INF)*/
}
} else {
switch(m) {
case 0: return zero ; /* atan(+...,+INF) */
case 1: return -zero ; /* atan(-...,+INF) */
case 2: return pi+tiny ; /* atan(+...,-INF) */
case 3: return -pi-tiny ; /* atan(-...,-INF) */
}
}
}
}
/* y is INF. */
if (iay == 0x7ff0000000000000)
return sign_y ? -PiOver2 : PiOver2;
/* when y is INF */
if(iy==0x7ff00000) return (hy<0)? -pi_o_2-tiny: pi_o_2+tiny;
uint64_t sign_xy = sign_x ^ sign_y;
double ax = asdouble (iax);
double ay = asdouble (iay);
uint64_t pred_aygtax = (ay > ax);
/* Set up z for call to atan. */
double n = pred_aygtax ? -ax : ay;
double d = pred_aygtax ? ay : ax;
double z = n / d;
double ret;
if (UNLIKELY (m < 2 && exp_diff >= POW8_EXP_UFLOW_BOUND))
{
/* If (x, y) is very close to x axis and x is positive, the polynomial
will underflow and evaluate to z. */
ret = z;
}
else
{
/* Work out the correct shift. */
double shift = sign_x ? -2.0 : 0.0;
shift = pred_aygtax ? shift + 1.0 : shift;
shift *= PiOver2;
ret = eval_poly (z, z, shift);
}
/* Account for the sign of x and y. */
return asdouble (asuint64 (ret) ^ sign_xy);
/* compute y/x */
k = (iy-ix)>>20;
if(k > 60) { /* |y/x| > 2**60 */
z=pi_o_2+0.5*pi_lo;
m&=1;
}
else if(hx<0&&k<-60) z=0.0; /* 0 > |y|/x > -2**-60 */
else z=atan(fabs(y/x)); /* safe to do y/x */
switch (m) {
case 0: return z ; /* atan(+,+) */
case 1: return -z ; /* atan(-,+) */
case 2: return pi-(z-pi_lo);/* atan(+,-) */
default: /* case 3 */
return (z-pi_lo)-pi;/* atan(-,-) */
}
}

View file

@ -59,20 +59,21 @@ asm(".include \"libc/disclaimer.inc\"");
/**
* Returns arc tangent of 𝑦/𝑥.
*/
long double atan2l(long double y, long double x) {
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
return atan2(y, x);
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
long double atan2l(long double y, long double x)
{
#ifdef __x86__
long double res;
asm("fpatan"
: "=t" (res)
: "0"(x), "u"(y)
asm("fpatan"
: "=t"(x)
: "0"(x), "u"(y)
: "st(1)");
return res;
return x;
#else
#elif LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
return atan2(y, x);
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
union ldshape ux, uy;
long double z;
@ -129,7 +130,6 @@ long double atan2l(long double y, long double x) {
return (z-2*pio2_lo)-2*pio2_hi; /* atan(-,-) */
}
#endif /* __x86__ */
#else
#error "architecture unsupported"
#endif

View file

@ -30,7 +30,7 @@
#include "libc/tinymath/complex.internal.h"
asm(".ident\t\"\\n\\n\
OpenBSD libm (MIT License)\\n\
OpenBSD libm (ISC License)\\n\
Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>\"");
asm(".ident\t\"\\n\\n\
Musl libc (MIT License)\\n\

View file

@ -30,7 +30,7 @@
#include "libc/tinymath/complex.internal.h"
asm(".ident\t\"\\n\\n\
OpenBSD libm (MIT License)\\n\
OpenBSD libm (ISC License)\\n\
Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>\"");
asm(".ident\t\"\\n\\n\
Musl libc (MIT License)\\n\

View file

@ -30,7 +30,7 @@
#include "libc/tinymath/complex.internal.h"
asm(".ident\t\"\\n\\n\
OpenBSD libm (MIT License)\\n\
OpenBSD libm (ISC License)\\n\
Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>\"");
asm(".ident\t\"\\n\\n\
Musl libc (MIT License)\\n\

View file

@ -57,19 +57,15 @@ asm(".include \"libc/disclaimer.inc\"");
* and David A. Schultz.
*/
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
long double cbrtl(long double x)
{
return cbrt(x);
}
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
static const unsigned B1 = 709958130; /* B1 = (127-127.0/3-0.03306235651)*2**23 */
/**
* Returns cube root of 𝑥.
*/
long double cbrtl(long double x)
{
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
return cbrt(x);
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
static const unsigned B1 = 709958130; /* B1 = (127-127.0/3-0.03306235651)*2**23 */
union ldshape u = {x}, v;
union {float f; uint32_t i;} uft;
long double r, s, t, w;
@ -163,8 +159,8 @@ long double cbrtl(long double x)
t *= v.f;
return t;
}
#else
#error "architecture unsupported"
#endif
}

View file

@ -36,8 +36,7 @@ asm(".ident\t\"\\n\\n\
Musl libc (MIT License)\\n\
Copyright 2005-2014 Rich Felker, et. al.\"");
asm(".include \"libc/disclaimer.inc\"");
/* clang-format off */
// clang-format off
/* origin: FreeBSD /usr/src/lib/msun/src/s_ccoshf.c */
/*-

View file

@ -2,39 +2,102 @@
vi: set et ft=c ts=8 tw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
FreeBSD lib/msun/src/s_tanhf.c
Converted to long double by Bruce D. Evans.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
Copyright (c) 1992-2023 The FreeBSD Project.
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
Developed at SunPro, a Sun Microsystems, Inc. business.
Permission to use, copy, modify, and distribute this
software is freely granted, provided that this notice
is preserved.
*/
#include "libc/math.h"
#include "libc/tinymath/expo.internal.h"
#include "libc/tinymath/feval.internal.h"
#include "libc/tinymath/ldshape.internal.h"
#include "libc/tinymath/freebsd.internal.h"
asm(".ident\t\"\\n\\n\
Musl libc (MIT License)\\n\
Copyright 2005-2014 Rich Felker, et. al.\"");
FreeBSD libm (BSD-2 License)\\n\
Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\"");
asm(".ident\t\"\\n\\n\
fdlibm (fdlibm license)\\n\
Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
asm(".include \"libc/disclaimer.inc\"");
/* clang-format off */
// clang-format off
#if LDBL_MAX_EXP != 0x4000
/* We also require the usual expsign encoding. */
#error "Unsupported long double format"
#endif
#define BIAS (LDBL_MAX_EXP - 1)
static const volatile long double huge = 0x1p10000L, tiny = 0x1p-10000L;
#if LDBL_MANT_DIG == 64
/*
* Domain [-1, 1], range ~[-1.8211e-21, 1.8211e-21]:
* |cosh(x) - c(x)| < 2**-68.8
*/
static const union IEEEl2bits
C4u = LD80C(0xaaaaaaaaaaaaac78, -5, 4.16666666666666682297e-2L);
#define C4 C4u.e
static const double
C2 = 0.5,
C6 = 1.3888888888888616e-3, /* 0x16c16c16c16b99.0p-62 */
C8 = 2.4801587301767953e-5, /* 0x1a01a01a027061.0p-68 */
C10 = 2.7557319163300398e-7, /* 0x127e4fb6c9b55f.0p-74 */
C12 = 2.0876768371393075e-9, /* 0x11eed99406a3f4.0p-81 */
C14 = 1.1469537039374480e-11, /* 0x1938c67cd18c48.0p-89 */
C16 = 4.8473490896852041e-14; /* 0x1b49c429701e45.0p-97 */
#elif LDBL_MANT_DIG == 113
/*
* Domain [-1, 1], range ~[-2.3194e-37, 2.3194e-37]:
* |cosh(x) - c(x)| < 2**-121.69
*/
static const long double
C4 = 4.16666666666666666666666666666666225e-2L, /* 0x1555555555555555555555555554e.0p-117L */
C6 = 1.38888888888888888888888888889434831e-3L, /* 0x16c16c16c16c16c16c16c16c1dd7a.0p-122L */
C8 = 2.48015873015873015873015871870962089e-5L, /* 0x1a01a01a01a01a01a01a017af2756.0p-128L */
C10 = 2.75573192239858906525574318600800201e-7L, /* 0x127e4fb7789f5c72ef01c8a040640.0p-134L */
C12 = 2.08767569878680989791444691755468269e-9L, /* 0x11eed8eff8d897b543d0679607399.0p-141L */
C14= 1.14707455977297247387801189650495351e-11L, /* 0x193974a8c07c9d24ae169a7fa9b54.0p-149L */
C16 = 4.77947733238737883626416876486279985e-14L; /* 0x1ae7f3e733b814d4e1b90f5727fe4.0p-157L */
static const double
C2 = 0.5,
C18 = 1.5619206968597871e-16, /* 0x16827863b9900b.0p-105 */
C20 = 4.1103176218528049e-19, /* 0x1e542ba3d3c269.0p-114 */
C22 = 8.8967926401641701e-22, /* 0x10ce399542a014.0p-122 */
C24 = 1.6116681626523904e-24, /* 0x1f2c981d1f0cb7.0p-132 */
C26 = 2.5022374732804632e-27; /* 0x18c7ecf8b2c4a0.0p-141 */
#else
#error "Unsupported long double format"
#endif /* LDBL_MANT_DIG == 64 */
/* log(2**16385 - 0.5) rounded up: */
static const float
o_threshold = 1.13572168e4; /* 0xb174de.0p-10 */
/**
* Returns hyperbolic cosine of 𝑥.
@ -43,43 +106,51 @@ asm(".include \"libc/disclaimer.inc\"");
* = 1 + 0.5*(exp(x)-1)*(exp(x)-1)/exp(x)
* = 1 + x*x/2 + o(x^4)
*/
long double coshl(long double x) {
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
return cosh(x);
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
union ldshape u = {x};
unsigned ex = u.i.se & 0x7fff;
uint32_t w;
long double t;
/* |x| */
u.i.se = ex;
x = u.f;
w = u.i.m >> 32;
/* |x| < log(2) */
if (ex < 0x3fff-1 || (ex == 0x3fff-1 && w < 0xb17217f7)) {
if (ex < 0x3fff-32) {
fevalf(x + 0x1p120f);
return 1;
}
t = expm1l(x);
return 1 + t*t/(2*(1+t));
}
/* |x| < log(LDBL_MAX) */
if (ex < 0x3fff+13 || (ex == 0x3fff+13 && w < 0xb17217f7)) {
t = expl(x);
return 0.5*(t + 1/t);
}
/* |x| > log(LDBL_MAX) or nan */
t = expl(0.5*x);
return 0.5*t*t;
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
// TODO: broken implementation to make things compile
return cosh(x);
#else
#error "architecture unsupported"
long double
coshl(long double x)
{
long double hi,lo,x2,x4;
#if LDBL_MANT_DIG == 113
double dx2;
#endif
uint16_t ix;
GET_LDBL_EXPSIGN(ix,x);
ix &= 0x7fff;
/* x is INF or NaN */
if(ix>=0x7fff) return x*x;
ENTERI();
/* |x| < 1, return 1 or c(x) */
if(ix<0x3fff) {
if (ix<BIAS-(LDBL_MANT_DIG+1)/2) /* |x| < TINY */
RETURNI(1+tiny); /* cosh(tiny) = 1(+) with inexact */
x2 = x*x;
#if LDBL_MANT_DIG == 64
x4 = x2*x2;
RETURNI(((C16*x2 + C14)*x4 + (C12*x2 + C10))*(x4*x4*x2) +
((C8*x2 + C6)*x2 + C4)*x4 + C2*x2 + 1);
#elif LDBL_MANT_DIG == 113
dx2 = x2;
RETURNI((((((((((((C26*dx2 + C24)*dx2 + C22)*dx2 +
C20)*x2 + C18)*x2 +
C16)*x2 + C14)*x2 + C12)*x2 + C10)*x2 + C8)*x2 + C6)*x2 +
C4)*(x2*x2) + C2*x2 + 1);
#endif
}
/* |x| in [1, 64), return accurate exp(|x|)/2+1/exp(|x|)/2 */
if (ix < 0x4005) {
k_hexpl(fabsl(x), &hi, &lo);
RETURNI(lo + 0.25/(hi + lo) + hi);
}
/* |x| in [64, o_threshold], return correctly-overflowing exp(|x|)/2 */
if (fabsl(x) <= o_threshold)
RETURNI(hexpl(fabsl(x)));
/* |x| > o_threshold, cosh(x) overflow */
RETURNI(huge*huge);
}

View file

@ -33,9 +33,7 @@ asm(".ident\t\"\\n\\n\
Musl libc (MIT License)\\n\
Copyright 2005-2014 Rich Felker, et. al.\"");
asm(".include \"libc/disclaimer.inc\"");
/* clang-format off */
// clang-format off
/* pow(z, c) = exp(c log(z)), See C99 G.6.4.1 */

View file

@ -36,7 +36,7 @@ asm(".ident\t\"\\n\\n\
Musl libc (MIT License)\\n\
Copyright 2005-2014 Rich Felker, et. al.\"");
asm(".include \"libc/disclaimer.inc\"");
/* clang-format off */
// clang-format off
/* origin: FreeBSD /usr/src/lib/msun/src/s_csinh.c */

View file

@ -34,7 +34,7 @@ asm(".ident\t\"\\n\\n\
Double-precision math functions (MIT License)\\n\
Copyright 2018 ARM Limited\"");
asm(".include \"libc/disclaimer.inc\"");
/* clang-format off */
// clang-format off
/*
* Double-precision e^x function.

View file

@ -1,35 +1,34 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=8 tw=8 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2023 Justine Alexandra Roberts Tunney
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/math.h"
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
long double expl(long double x) {
return exp(x);
}
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
#include "libc/tinymath/internal.h"
asm(".ident\t\"\\n\\n\
OpenBSD libm (MIT License)\\n\
OpenBSD libm (ISC License)\\n\
Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>\"");
asm(".ident\t\"\\n\\n\
Musl libc (MIT License)\\n\
@ -53,6 +52,7 @@ asm(".include \"libc/disclaimer.inc\"");
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/*
* Exponential function, long double precision
*
@ -104,13 +104,6 @@ asm(".include \"libc/disclaimer.inc\"");
*
*/
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
long double expl(long double x)
{
return exp(x);
}
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
static const long double P[3] = {
1.2617719307481059087798E-4L,
3.0299440770744196129956E-2L,
@ -156,12 +149,336 @@ long double expl(long double x)
x = 1.0 + 2.0 * x;
return scalbnl(x, k);
}
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
// TODO: broken implementation to make things compile
long double expl(long double x)
#include "libc/tinymath/freebsd.internal.h"
asm(".ident\t\"\\n\\n\
FreeBSD libm (BSD-2 License)\\n\
Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\"");
asm(".ident\t\"\\n\\n\
fdlibm (fdlibm license)\\n\
Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
asm(".include \"libc/disclaimer.inc\"");
// clang-format off
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2009-2013 Steven G. Kargl
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Optimized by Bruce D. Evans.
*/
/*
* ld128 version of s_expl.c. See ../ld80/s_expl.c for most comments.
*/
/* XXX Prevent compilers from erroneously constant folding these: */
static const volatile long double
huge = 0x1p10000L,
tiny = 0x1p-10000L;
static const long double
twom10000 = 0x1p-10000L;
static const long double
/* log(2**16384 - 0.5) rounded towards zero: */
/* log(2**16384 - 0.5 + 1) rounded towards zero for expm1l() is the same: */
o_threshold = 11356.523406294143949491931077970763428L,
/* log(2**(-16381-64-1)) rounded towards zero: */
u_threshold = -11433.462743336297878837243843452621503L;
long double
expl(long double x)
{
return exp(x);
union IEEEl2bits u;
long double hi, lo, t, twopk;
int k;
uint16_t hx, ix;
DOPRINT_START(&x);
/* Filter out exceptional cases. */
u.e = x;
hx = u.xbits.expsign;
ix = hx & 0x7fff;
if (ix >= BIAS + 13) { /* |x| >= 8192 or x is NaN */
if (ix == BIAS + LDBL_MAX_EXP) {
if (hx & 0x8000) /* x is -Inf or -NaN */
RETURNP(-1 / x);
RETURNP(x + x); /* x is +Inf or +NaN */
}
if (x > o_threshold)
RETURNP(huge * huge);
if (x < u_threshold)
RETURNP(tiny * tiny);
} else if (ix < BIAS - 114) { /* |x| < 0x1p-114 */
RETURN2P(1, x); /* 1 with inexact iff x != 0 */
}
ENTERI();
twopk = 1;
__k_expl(x, &hi, &lo, &k);
t = SUM2P(hi, lo);
/* Scale by 2**k. */
/*
* XXX sparc64 multiplication was so slow that scalbnl() is faster,
* but performance on aarch64 and riscv hasn't yet been quantified.
*/
if (k >= LDBL_MIN_EXP) {
if (k == LDBL_MAX_EXP)
RETURNI(t * 2 * 0x1p16383L);
SET_LDBL_EXPSIGN(twopk, BIAS + k);
RETURNI(t * twopk);
} else {
SET_LDBL_EXPSIGN(twopk, BIAS + k + 10000);
RETURNI(t * twopk * twom10000);
}
}
/*
* Our T1 and T2 are chosen to be approximately the points where method
* A and method B have the same accuracy. Tang's T1 and T2 are the
* points where method A's accuracy changes by a full bit. For Tang,
* this drop in accuracy makes method A immediately less accurate than
* method B, but our larger INTERVALS makes method A 2 bits more
* accurate so it remains the most accurate method significantly
* closer to the origin despite losing the full bit in our extended
* range for it.
*
* Split the interval [T1, T2] into two intervals [T1, T3] and [T3, T2].
* Setting T3 to 0 would require the |x| < 0x1p-113 condition to appear
* in both subintervals, so set T3 = 2**-5, which places the condition
* into the [T1, T3] interval.
*
* XXX we now do this more to (partially) balance the number of terms
* in the C and D polys than to avoid checking the condition in both
* intervals.
*
* XXX these micro-optimizations are excessive.
*/
static const double
T1 = -0.1659, /* ~-30.625/128 * log(2) */
T2 = 0.1659, /* ~30.625/128 * log(2) */
T3 = 0.03125;
/*
* Domain [-0.1659, 0.03125], range ~[2.9134e-44, 1.8404e-37]:
* |(exp(x)-1-x-x**2/2)/x - p(x)| < 2**-122.03
*
* XXX none of the long double C or D coeffs except C10 is correctly printed.
* If you re-print their values in %.35Le format, the result is always
* different. For example, the last 2 digits in C3 should be 59, not 67.
* 67 is apparently from rounding an extra-precision value to 36 decimal
* places.
*/
static const long double
C3 = 1.66666666666666666666666666666666667e-1L,
C4 = 4.16666666666666666666666666666666645e-2L,
C5 = 8.33333333333333333333333333333371638e-3L,
C6 = 1.38888888888888888888888888891188658e-3L,
C7 = 1.98412698412698412698412697235950394e-4L,
C8 = 2.48015873015873015873015112487849040e-5L,
C9 = 2.75573192239858906525606685484412005e-6L,
C10 = 2.75573192239858906612966093057020362e-7L,
C11 = 2.50521083854417203619031960151253944e-8L,
C12 = 2.08767569878679576457272282566520649e-9L,
C13 = 1.60590438367252471783548748824255707e-10L;
/*
* XXX this has 1 more coeff than needed.
* XXX can start the double coeffs but not the double mults at C10.
* With my coeffs (C10-C17 double; s = best_s):
* Domain [-0.1659, 0.03125], range ~[-1.1976e-37, 1.1976e-37]:
* |(exp(x)-1-x-x**2/2)/x - p(x)| ~< 2**-122.65
*/
static const double
C14 = 1.1470745580491932e-11, /* 0x1.93974a81dae30p-37 */
C15 = 7.6471620181090468e-13, /* 0x1.ae7f3820adab1p-41 */
C16 = 4.7793721460260450e-14, /* 0x1.ae7cd18a18eacp-45 */
C17 = 2.8074757356658877e-15, /* 0x1.949992a1937d9p-49 */
C18 = 1.4760610323699476e-16; /* 0x1.545b43aabfbcdp-53 */
/*
* Domain [0.03125, 0.1659], range ~[-2.7676e-37, -1.0367e-38]:
* |(exp(x)-1-x-x**2/2)/x - p(x)| < 2**-121.44
*/
static const long double
D3 = 1.66666666666666666666666666666682245e-1L,
D4 = 4.16666666666666666666666666634228324e-2L,
D5 = 8.33333333333333333333333364022244481e-3L,
D6 = 1.38888888888888888888887138722762072e-3L,
D7 = 1.98412698412698412699085805424661471e-4L,
D8 = 2.48015873015873015687993712101479612e-5L,
D9 = 2.75573192239858944101036288338208042e-6L,
D10 = 2.75573192239853161148064676533754048e-7L,
D11 = 2.50521083855084570046480450935267433e-8L,
D12 = 2.08767569819738524488686318024854942e-9L,
D13 = 1.60590442297008495301927448122499313e-10L;
/*
* XXX this has 1 more coeff than needed.
* XXX can start the double coeffs but not the double mults at D11.
* With my coeffs (D11-D16 double):
* Domain [0.03125, 0.1659], range ~[-1.1980e-37, 1.1980e-37]:
* |(exp(x)-1-x-x**2/2)/x - p(x)| ~< 2**-122.65
*/
static const double
D14 = 1.1470726176204336e-11, /* 0x1.93971dc395d9ep-37 */
D15 = 7.6478532249581686e-13, /* 0x1.ae892e3D16fcep-41 */
D16 = 4.7628892832607741e-14, /* 0x1.ad00Dfe41feccp-45 */
D17 = 3.0524857220358650e-15; /* 0x1.D7e8d886Df921p-49 */
long double
expm1l(long double x)
{
union IEEEl2bits u, v;
long double hx2_hi, hx2_lo, q, r, r1, t, twomk, twopk, x_hi;
long double x_lo, x2;
double dr, dx, fn, r2;
int k, n, n2;
uint16_t hx, ix;
DOPRINT_START(&x);
/* Filter out exceptional cases. */
u.e = x;
hx = u.xbits.expsign;
ix = hx & 0x7fff;
if (ix >= BIAS + 7) { /* |x| >= 128 or x is NaN */
if (ix == BIAS + LDBL_MAX_EXP) {
if (hx & 0x8000) /* x is -Inf or -NaN */
RETURNP(-1 / x - 1);
RETURNP(x + x); /* x is +Inf or +NaN */
}
if (x > o_threshold)
RETURNP(huge * huge);
/*
* expm1l() never underflows, but it must avoid
* unrepresentable large negative exponents. We used a
* much smaller threshold for large |x| above than in
* expl() so as to handle not so large negative exponents
* in the same way as large ones here.
*/
if (hx & 0x8000) /* x <= -128 */
RETURN2P(tiny, -1); /* good for x < -114ln2 - eps */
}
ENTERI();
if (T1 < x && x < T2) {
x2 = x * x;
dx = x;
if (x < T3) {
if (ix < BIAS - 113) { /* |x| < 0x1p-113 */
/* x (rounded) with inexact if x != 0: */
RETURNPI(x == 0 ? x :
(0x1p200 * x + fabsl(x)) * 0x1p-200);
}
q = x * x2 * C3 + x2 * x2 * (C4 + x * (C5 + x * (C6 +
x * (C7 + x * (C8 + x * (C9 + x * (C10 +
x * (C11 + x * (C12 + x * (C13 +
dx * (C14 + dx * (C15 + dx * (C16 +
dx * (C17 + dx * C18))))))))))))));
} else {
q = x * x2 * D3 + x2 * x2 * (D4 + x * (D5 + x * (D6 +
x * (D7 + x * (D8 + x * (D9 + x * (D10 +
x * (D11 + x * (D12 + x * (D13 +
dx * (D14 + dx * (D15 + dx * (D16 +
dx * D17)))))))))))));
}
x_hi = (float)x;
x_lo = x - x_hi;
hx2_hi = x_hi * x_hi / 2;
hx2_lo = x_lo * (x + x_hi) / 2;
if (ix >= BIAS - 7)
RETURN2PI(hx2_hi + x_hi, hx2_lo + x_lo + q);
else
RETURN2PI(x, hx2_lo + q + hx2_hi);
}
/* Reduce x to (k*ln2 + endpoint[n2] + r1 + r2). */
fn = rnint((double)x * INV_L);
n = irint(fn);
n2 = (unsigned)n % INTERVALS;
k = n >> LOG2_INTERVALS;
r1 = x - fn * L1;
r2 = fn * -L2;
r = r1 + r2;
/* Prepare scale factor. */
v.e = 1;
v.xbits.expsign = BIAS + k;
twopk = v.e;
/*
* Evaluate lower terms of
* expl(endpoint[n2] + r1 + r2) = tbl[n2] * expl(r1 + r2).
*/
dr = r;
q = r2 + r * r * (A2 + r * (A3 + r * (A4 + r * (A5 + r * (A6 +
dr * (A7 + dr * (A8 + dr * (A9 + dr * A10))))))));
t = tbl[n2].lo + tbl[n2].hi;
if (k == 0) {
t = SUM2P(tbl[n2].hi - 1, tbl[n2].lo * (r1 + 1) + t * q +
tbl[n2].hi * r1);
RETURNI(t);
}
if (k == -1) {
t = SUM2P(tbl[n2].hi - 2, tbl[n2].lo * (r1 + 1) + t * q +
tbl[n2].hi * r1);
RETURNI(t / 2);
}
if (k < -7) {
t = SUM2P(tbl[n2].hi, tbl[n2].lo + t * (q + r1));
RETURNI(t * twopk - 1);
}
if (k > 2 * LDBL_MANT_DIG - 1) {
t = SUM2P(tbl[n2].hi, tbl[n2].lo + t * (q + r1));
if (k == LDBL_MAX_EXP)
RETURNI(t * 2 * 0x1p16383L - 1);
RETURNI(t * twopk - 1);
}
v.xbits.expsign = BIAS - k;
twomk = v.e;
if (k > LDBL_MANT_DIG - 1)
t = SUM2P(tbl[n2].hi, tbl[n2].lo - twomk + t * (q + r1));
else
t = SUM2P(tbl[n2].hi - twomk, tbl[n2].lo + t * (q + r1));
RETURNI(t * twopk);
}
#else
#error "architecture unsupported"
#endif

View file

@ -1,103 +1,152 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=8 tw=8 fenc=utf-8 :vi
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
FreeBSD lib/msun/src/s_expm1f.c
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
Copyright (c) 1992-2023 The FreeBSD Project.
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
Developed at SunPro, a Sun Microsystems, Inc. business.
Permission to use, copy, modify, and distribute this
software is freely granted, provided that this notice
is preserved.
*/
#include "libc/math.h"
#include "libc/tinymath/hornerf.internal.h"
#include "libc/tinymath/internal.h"
#include "third_party/libcxx/math.h"
#include "libc/tinymath/freebsd.internal.h"
asm(".ident\t\"\\n\\n\
Optimized Routines (MIT License)\\n\
Copyright 2022 ARM Limited\"");
FreeBSD libm (BSD-2 License)\\n\
Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\"");
asm(".ident\t\"\\n\\n\
fdlibm (fdlibm license)\\n\
Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
asm(".include \"libc/disclaimer.inc\"");
// clang-format off
#define Shift (0x1.8p23f)
#define InvLn2 (0x1.715476p+0f)
#define Ln2hi (0x1.62e4p-1f)
#define Ln2lo (0x1.7f7d1cp-20f)
#define AbsMask (0x7fffffff)
#define InfLimit \
(0x1.644716p6) /* Smallest value of x for which expm1(x) overflows. */
#define NegLimit \
(-0x1.9bbabcp+6) /* Largest value of x for which expm1(x) rounds to 1. */
static const float
one = 1.0,
tiny = 1.0e-30,
o_threshold = 8.8721679688e+01,/* 0x42b17180 */
ln2_hi = 6.9313812256e-01,/* 0x3f317180 */
ln2_lo = 9.0580006145e-06,/* 0x3717f7d1 */
invln2 = 1.4426950216e+00,/* 0x3fb8aa3b */
/*
* Domain [-0.34568, 0.34568], range ~[-6.694e-10, 6.696e-10]:
* |6 / x * (1 + 2 * (1 / (exp(x) - 1) - 1 / x)) - q(x)| < 2**-30.04
* Scaled coefficients: Qn_here = 2**n * Qn_for_q (see s_expm1.c):
*/
Q1 = -3.3333212137e-2, /* -0x888868.0p-28 */
Q2 = 1.5807170421e-3; /* 0xcf3010.0p-33 */
#define C(i) __expm1f_poly[i]
static volatile float huge = 1.0e+30;
/* Generated using fpminimax, see tools/expm1f.sollya for details. */
const float __expm1f_poly[] = {0x1.fffffep-2, 0x1.5554aep-3, 0x1.555736p-5,
0x1.12287cp-7, 0x1.6b55a2p-10};
/* Approximation for exp(x) - 1 using polynomial on a reduced interval.
The maximum error is 1.51 ULP:
expm1f(0x1.8baa96p-2) got 0x1.e2fb9p-2
want 0x1.e2fb94p-2. */
/**
* Returns 𝑒^𝑥-𝟷.
*/
float
expm1f (float x)
expm1f(float x)
{
uint32_t ix = asuint (x);
uint32_t ax = ix & AbsMask;
float y,hi,lo,c,t,e,hxs,hfx,r1,twopk;
int32_t k,xsb;
uint32_t hx;
/* Tiny: |x| < 0x1p-23. expm1(x) is closely approximated by x.
Inf: x == +Inf => expm1(x) = x. */
if (ax <= 0x34000000 || (ix == 0x7f800000))
return x;
GET_FLOAT_WORD(hx,x);
xsb = hx&0x80000000; /* sign bit of x */
hx &= 0x7fffffff; /* high word of |x| */
/* +/-NaN. */
if (ax > 0x7f800000)
return __math_invalidf (x);
/* filter out huge and non-finite argument */
if(hx >= 0x4195b844) { /* if |x|>=27*ln2 */
if(hx >= 0x42b17218) { /* if |x|>=88.721... */
if(hx>0x7f800000)
return x+x; /* NaN */
if(hx==0x7f800000)
return (xsb==0)? x:-1.0;/* exp(+-inf)={inf,-1} */
if(x > o_threshold) return huge*huge; /* overflow */
}
if(xsb!=0) { /* x < -27*ln2, return -1.0 with inexact */
if(x+tiny<(float)0.0) /* raise inexact */
return tiny-one; /* return -1 */
}
}
if (x >= InfLimit)
return __math_oflowf (0);
/* argument reduction */
if(hx > 0x3eb17218) { /* if |x| > 0.5 ln2 */
if(hx < 0x3F851592) { /* and |x| < 1.5 ln2 */
if(xsb==0)
{hi = x - ln2_hi; lo = ln2_lo; k = 1;}
else
{hi = x + ln2_hi; lo = -ln2_lo; k = -1;}
} else {
k = invln2*x+((xsb==0)?(float)0.5:(float)-0.5);
t = k;
hi = x - t*ln2_hi; /* t*ln2_hi is exact here */
lo = t*ln2_lo;
}
STRICT_ASSIGN(float, x, hi - lo);
c = (hi-x)-lo;
}
else if(hx < 0x33000000) { /* when |x|<2**-25, return x */
t = huge+x; /* return x with inexact flags when x!=0 */
return x - (t-(huge+x));
}
else k = 0;
if (x <= NegLimit || ix == 0xff800000)
return -1;
/* Reduce argument to smaller range:
Let i = round(x / ln2)
and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
where 2^i is exact because i is an integer. */
float j = fmaf (InvLn2, x, Shift) - Shift;
int32_t i = j;
float f = fmaf (j, -Ln2hi, x);
f = fmaf (j, -Ln2lo, f);
/* Approximate expm1(f) using polynomial.
Taylor expansion for expm1(x) has the form:
x + ax^2 + bx^3 + cx^4 ....
So we calculate the polynomial P(f) = a + bf + cf^2 + ...
and assemble the approximation expm1(f) ~= f + f^2 * P(f). */
float p = fmaf (f * f, HORNER_4 (f, C), f);
/* Assemble the result, using a slight rearrangement to achieve acceptable
accuracy.
expm1(x) ~= 2^i * (p + 1) - 1
Let t = 2^(i - 1). */
float t = ldexpf (0.5f, i);
/* expm1(x) ~= 2 * (p * t + (t - 1/2)). */
return 2 * fmaf (p, t, t - 0.5f);
/* x is now in primary range */
hfx = (float)0.5*x;
hxs = x*hfx;
r1 = one+hxs*(Q1+hxs*Q2);
t = (float)3.0-r1*hfx;
e = hxs*((r1-t)/((float)6.0 - x*t));
if(k==0) return x - (x*e-hxs); /* c is 0 */
else {
SET_FLOAT_WORD(twopk,((uint32_t)(0x7f+k))<<23); /* 2^k */
e = (x*(e-c)-c);
e -= hxs;
if(k== -1) return (float)0.5*(x-e)-(float)0.5;
if(k==1) {
if(x < (float)-0.25) return -(float)2.0*(e-(x+(float)0.5));
else return one+(float)2.0*(x-e);
}
if (k <= -2 || k>56) { /* suffice to return exp(x)-1 */
y = one-(e-x);
if (k == 128) y = y*2.0F*0x1p127F;
else y = y*twopk;
return y-one;
}
t = one;
if(k<23) {
SET_FLOAT_WORD(t,0x3f800000 - (0x1000000>>k)); /* t=1-2^-k */
y = t-(e-x);
y = y*twopk;
} else {
SET_FLOAT_WORD(t,((0x7f-k)<<23)); /* 2^-k */
y = x-(e+t);
y += one;
y = y*twopk;
}
}
return y;
}

View file

@ -1,41 +1,19 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=8 tw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/math.h"
#include "libc/tinymath/internal.h"
// clang-format off
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
long double expm1l(long double x) {
return expm1(x);
}
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
asm(".ident\t\"\\n\\n\
OpenBSD libm (MIT License)\\n\
OpenBSD libm (ISC License)\\n\
Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>\"");
asm(".ident\t\"\\n\\n\
Musl libc (MIT License)\\n\
Copyright 2005-2014 Rich Felker, et. al.\"");
asm(".include \"libc/disclaimer.inc\"");
// clang-format off
/* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_expm1l.c */
/*
@ -53,6 +31,7 @@ asm(".include \"libc/disclaimer.inc\"");
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/*
* Exponential function, minus 1
* Long double precision
@ -86,13 +65,6 @@ asm(".include \"libc/disclaimer.inc\"");
* IEEE -45,+maxarg 200,000 1.2e-19 2.5e-20
*/
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
long double expm1l(long double x)
{
return expm1(x);
}
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
/* exp(x) - 1 = x + 0.5 x^2 + x^3 P(x)/Q(x)
-.5 ln 2 < x < .5 ln 2
Theoretical peak relative error = 3.4e-22 */
@ -151,12 +123,11 @@ long double expm1l(long double x)
x = px * qx + (px - 1.0);
return x;
}
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
// TODO: broken implementation to make things compile
long double expm1l(long double x)
{
return expm1(x);
}
// see expl.c
#else
#error "architecture unsupported"
#endif

View file

@ -38,9 +38,12 @@ asm(".include \"libc/disclaimer.inc\"");
/**
* Returns largest integral value not greater than 𝑥.
*/
long double floorl(long double x) {
long double floorl(long double x)
{
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
return floor(x);
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
static const long double toint = 1/LDBL_EPSILON;
@ -63,6 +66,7 @@ long double floorl(long double x) {
if (y > 0)
return x + y - 1;
return x + y;
#else
#error "architecture unsupported"
#endif

File diff suppressed because it is too large Load diff

View file

@ -2,63 +2,65 @@
vi: set et ft=c ts=8 tw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
*/
#include "libc/math.h"
#include "libc/tinymath/ldshape.internal.h"
#include "libc/tinymath/freebsd.internal.h"
asm(".ident\t\"\\n\\n\
Musl libc (MIT License)\\n\
Copyright 2005-2014 Rich Felker, et. al.\"");
FreeBSD libm (BSD-2 License)\\n\
Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\"");
asm(".include \"libc/disclaimer.inc\"");
// clang-format off
/**
* Splits number normalized fraction and exponent.
*/
long double frexpl(long double x, int *e) {
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
return frexp(x, e);
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
union ldshape u = {x};
int ee = u.i.se & 0x7fff;
long double
frexpl(long double x, int *ex)
{
union IEEEl2bits u;
if (!ee) {
if (x) {
x = frexpl(x*0x1p120, e);
*e -= 120;
} else *e = 0;
return x;
} else if (ee == 0x7fff) {
return x;
u.e = x;
switch (u.bits.exp) {
case 0: /* 0 or subnormal */
if ((u.bits.manl | u.bits.manh) == 0) {
*ex = 0;
} else {
u.e *= 0x1.0p514;
*ex = u.bits.exp - 0x4200;
u.bits.exp = 0x3ffe;
}
break;
case 0x7fff: /* infinity or NaN; value of *ex is unspecified */
break;
default: /* normal */
*ex = u.bits.exp - 0x3ffe;
u.bits.exp = 0x3ffe;
break;
}
*e = ee - 0x3ffe;
u.i.se &= 0x8000;
u.i.se |= 0x3ffe;
return u.f;
#else
#error "architecture unsupported"
#endif
return (u.e);
}

View file

@ -142,6 +142,9 @@ S02 = 1.16926784663337450260e-04, /* 0x3F1EA6D2, 0xDD57DBF4 */
S03 = 5.13546550207318111446e-07, /* 0x3EA13B54, 0xCE84D5A9 */
S04 = 1.16614003333790000205e-09; /* 0x3E1408BC, 0xF4745D8F */
/**
* Returns Bessel function of 𝑥 of first kind of order 0.
*/
double j0(double x)
{
double z,r,s;
@ -190,6 +193,9 @@ v02 = 7.60068627350353253702e-05, /* 0x3F13ECBB, 0xF578C6C1 */
v03 = 2.59150851840457805467e-07, /* 0x3E91642D, 0x7FF202FD */
v04 = 4.41110311332675467403e-10; /* 0x3DFE5018, 0x3BD6D9EF */
/**
* Returns Bessel function of 𝑥 of second kind of order 0.
*/
double y0(double x)
{
double z,u,v;

View file

@ -144,6 +144,9 @@ s03 = 1.17718464042623683263e-06, /* 0x3EB3BFF8, 0x333F8498 */
s04 = 5.04636257076217042715e-09, /* 0x3E35AC88, 0xC97DFF2C */
s05 = 1.23542274426137913908e-11; /* 0x3DAB2ACF, 0xCFB97ED8 */
/**
* Returns Bessel function of 𝑥 of first kind of order 1.
*/
double j1(double x)
{
double z,r,s;
@ -183,6 +186,9 @@ static const double V0[5] = {
1.66559246207992079114e-11, /* 0x3DB25039, 0xDACA772A */
};
/**
* Returns Bessel function of 𝑥 of second kind of order 1.
*/
double y1(double x)
{
double z,u,v;

View file

@ -72,6 +72,9 @@ asm(".include \"libc/disclaimer.inc\"");
static const double invsqrtpi = 5.64189583547756279280e-01; /* 0x3FE20DD7, 0x50429B6D */
/**
* Returns Bessel function of 𝑥 of first kind of order 𝑛.
*/
double jn(int n, double x)
{
uint32_t ix, lx;
@ -245,7 +248,9 @@ double jn(int n, double x)
return sign ? -b : b;
}
/**
* Returns Bessel function of 𝑥 of second kind of order 𝑛.
*/
double yn(int n, double x)
{
uint32_t ix, lx, ib;

View file

@ -49,6 +49,9 @@ asm(".include \"libc/disclaimer.inc\"");
* ====================================================
*/
/**
* Returns Bessel function of 𝑥 of first kind of order 𝑛.
*/
float jnf(int n, float x)
{
uint32_t ix;
@ -192,6 +195,9 @@ float jnf(int n, float x)
return sign ? -b : b;
}
/**
* Returns Bessel function of 𝑥 of second kind of order 𝑛.
*/
float ynf(int n, float x)
{
uint32_t ix, ib;

View file

@ -35,8 +35,8 @@ asm(".ident\t\"\\n\\n\
Musl libc (MIT License)\\n\
Copyright 2005-2014 Rich Felker, et. al.\"");
asm(".include \"libc/disclaimer.inc\"");
// clang-format off
/* clang-format off */
/* origin: FreeBSD /usr/src/lib/msun/src/k_sin.c */
/*
* ====================================================

View file

@ -35,8 +35,8 @@ asm(".ident\t\"\\n\\n\
Musl libc (MIT License)\\n\
Copyright 2005-2014 Rich Felker, et. al.\"");
asm(".include \"libc/disclaimer.inc\"");
// clang-format off
/* clang-format off */
/* origin: FreeBSD /usr/src/lib/msun/src/k_tan.c */
/*
* ====================================================

View file

@ -35,7 +35,7 @@ asm(".ident\t\"\\n\\n\
Musl libc (MIT License)\\n\
Copyright 2005-2014 Rich Felker, et. al.\"");
asm(".include \"libc/disclaimer.inc\"");
/* clang-format off */
// clang-format off
/* origin: FreeBSD /usr/src/lib/msun/src/e_lgamma_r.c */
/*

View file

@ -2,8 +2,8 @@
vi: set et ft=c ts=8 tw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -31,10 +31,10 @@
#include "libc/tinymath/log_data.internal.h"
asm(".ident\t\"\\n\\n\
Double-precision math functions (MIT License)\\n\
Copyright 2018 ARM Limited\"");
Optimized Routines (MIT License)\\n\
Copyright 2022 ARM Limited\"");
asm(".include \"libc/disclaimer.inc\"");
/* clang-format off */
// clang-format off
/*
* Double-precision log(x) function.
@ -52,12 +52,6 @@ asm(".include \"libc/disclaimer.inc\"");
#define N (1 << LOG_TABLE_BITS)
#define OFF 0x3fe6000000000000
/* Top 16 bits of a double. */
static inline uint32_t top16(double x)
{
return asuint64(x) >> 48;
}
/**
* Returns natural logarithm of 𝑥.
*/
@ -69,7 +63,7 @@ double log(double x)
int k, i;
ix = asuint64(x);
top = top16(x);
top = ix >> 48;
#define LO asuint64(1.0 - 0x1p-4)
#define HI asuint64(1.0 + 0x1.09p-4)
if (UNLIKELY(ix - LO < HI - LO)) {

View file

@ -38,7 +38,7 @@ asm(".ident\t\"\\n\\n\
Musl libc (MIT License)\\n\
Copyright 2005-2014 Rich Felker, et. al.\"");
asm(".include \"libc/disclaimer.inc\"");
/* clang-format off */
// clang-format off
/* origin: FreeBSD /usr/src/lib/msun/src/e_log10.c */
/*

View file

@ -29,7 +29,7 @@
#include "libc/tinymath/internal.h"
asm(".ident\t\"\\n\\n\
OpenBSD libm (MIT License)\\n\
OpenBSD libm (ISC License)\\n\
Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>\"");
asm(".ident\t\"\\n\\n\
Musl libc (MIT License)\\n\
@ -96,12 +96,6 @@ asm(".include \"libc/disclaimer.inc\"");
* log domain: x < 0; returns MINLOG
*/
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
long double log10l(long double x)
{
return log10(x);
}
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
/* Coefficients for log(1+x) = x - x**2/2 + x**3 P(x)/Q(x)
* 1/sqrt(2) <= x < sqrt(2)
* Theoretical peak relative error = 6.2e-22
@ -159,6 +153,8 @@ long double log10l(long double x)
{
#ifdef __x86__
// asm improves performance 41ns → 21ns
// measurement made on an intel core i9
long double lg2;
asm("fldlg2" : "=t"(lg2));
asm("fyl2x"
@ -167,7 +163,11 @@ long double log10l(long double x)
: "st(1)");
return x;
#else
#elif LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
return log10(x);
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
long double y, z;
int e;
@ -234,15 +234,12 @@ done:
z += e * (L102A);
return z;
#endif /* __x86__ */
}
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
// TODO: broken implementation to make things compile
long double log10l(long double x)
{
return log10(x);
}
long double __log10lq(long double);
return __log10lq(x);
#else
#error "architecture unsupported"
#endif
}

View file

@ -34,7 +34,7 @@ asm(".ident\t\"\\n\\n\
Double-precision math functions (MIT License)\\n\
Copyright 2018 ARM Limited\"");
asm(".include \"libc/disclaimer.inc\"");
/* clang-format off */
// clang-format off
/* origin: FreeBSD /usr/src/lib/msun/src/s_log1p.c */
/*

View file

@ -30,7 +30,7 @@
#include "libc/tinymath/complex.internal.h"
asm(".ident\t\"\\n\\n\
OpenBSD libm (MIT License)\\n\
OpenBSD libm (ISC License)\\n\
Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>\"");
asm(".ident\t\"\\n\\n\
Musl libc (MIT License)\\n\
@ -88,12 +88,6 @@ asm(".include \"libc/disclaimer.inc\"");
* IEEE -1.0, 9.0 100000 8.2e-20 2.5e-20
*/
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
long double log1pl(long double x)
{
return log1p(x);
}
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
/* Coefficients for log(1+x) = x - x^2 / 2 + x^3 P(x)/Q(x)
* 1/sqrt(2) <= x < sqrt(2)
* Theoretical peak relative error = 2.32e-20
@ -144,6 +138,12 @@ static const long double C2 = 1.4286068203094172321215E-6L;
*/
long double log1pl(long double xm1)
{
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
return log1p(xm1);
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
long double x, y, z;
int e;
@ -208,13 +208,13 @@ long double log1pl(long double xm1)
z = z + x;
z = z + e * C1;
return z;
}
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
// TODO: broken implementation to make things compile
long double log1pl(long double x)
{
return log1p(x);
}
long double __log1plq(long double);
return __log1plq(xm1);
#else
#error "architecture unsupported"
#endif
}

View file

@ -30,7 +30,7 @@
#include "libc/tinymath/complex.internal.h"
asm(".ident\t\"\\n\\n\
OpenBSD libm (MIT License)\\n\
OpenBSD libm (ISC License)\\n\
Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>\"");
asm(".ident\t\"\\n\\n\
Musl libc (MIT License)\\n\
@ -92,12 +92,6 @@ asm(".include \"libc/disclaimer.inc\"");
* [-10000, +10000].
*/
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
long double log2l(long double x)
{
return log2(x);
}
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
/* Coefficients for ln(1+x) = x - x**2/2 + x**3 P(x)/Q(x)
* 1/sqrt(2) <= x < sqrt(2)
* Theoretical peak relative error = 6.2e-22
@ -144,8 +138,29 @@ static const long double S[4] = {
#define SQRTH 0.70710678118654752440L
/**
* Calculates log𝑥.
*/
long double log2l(long double x)
{
#ifdef __x86__
// asm improves performance 39ns → 21ns
// measurement made on an intel core i9
long double one;
asm("fld1" : "=t"(one));
asm("fyl2x"
: "=t"(x)
: "0"(x), "u"(one)
: "st(1)");
return x;
#elif LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
return log2(x);
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
long double y, z;
int e;
@ -210,13 +225,13 @@ done:
z += x;
z += e;
return z;
}
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
// TODO: broken implementation to make things compile
long double log2l(long double x)
{
return log2(x);
}
long double __log2lq(long double);
return __log2lq(x);
#else
#error "architecture unsupported"
#endif
}

View file

@ -34,7 +34,7 @@ asm(".ident\t\"\\n\\n\
Optimized Routines (MIT License)\\n\
Copyright 2022 ARM Limited\"");
asm(".include \"libc/disclaimer.inc\"");
/* clang-format off */
// clang-format off
/*
* Single-precision log function.
@ -57,6 +57,9 @@ Relative error: 1.957 * 2^-26 (before rounding.)
#define N (1 << LOGF_TABLE_BITS)
#define OFF 0x3f330000
/**
* Returns natural logarithm of 𝑥.
*/
float logf(float x)
{
double_t z, r, r2, y, y0, invc, logc;

View file

@ -29,13 +29,13 @@
#include "libc/tinymath/internal.h"
asm(".ident\t\"\\n\\n\
OpenBSD libm (MIT License)\\n\
OpenBSD libm (ISC License)\\n\
Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>\"");
asm(".ident\t\"\\n\\n\
Musl libc (MIT License)\\n\
Copyright 2005-2014 Rich Felker, et. al.\"");
asm(".include \"libc/disclaimer.inc\"");
/* clang-format off */
// clang-format off
/* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_logl.c */
/*
@ -91,12 +91,6 @@ asm(".include \"libc/disclaimer.inc\"");
* [-10000, +10000].
*/
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
long double logl(long double x)
{
return log(x);
}
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
/* Coefficients for log(1+x) = x - x**2/2 + x**3 P(x)/Q(x)
* 1/sqrt(2) <= x < sqrt(2)
* Theoretical peak relative error = 2.32e-20
@ -142,8 +136,27 @@ static const long double C2 = 1.4286068203094172321215E-6L;
#define SQRTH 0.70710678118654752440L
/**
* Returns natural logarithm of 𝑥.
*/
long double logl(long double x)
{
#ifdef __x86__
long double ln2;
asm("fldln2" : "=t"(ln2));
asm("fyl2x"
: "=t"(x)
: "0"(x), "u"(ln2)
: "st(1)");
return x;
#elif LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
return log(x);
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
long double y, z;
int e;
@ -202,13 +215,13 @@ long double logl(long double x)
z = z + x;
z = z + e * C1; /* This sum has an error of 1/2 lsb. */
return z;
}
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
// TODO: broken implementation to make things compile
long double logl(long double x)
{
return log(x);
}
long double __loglq(long double);
return __loglq(x);
#else
#error "architecture unsupported"
#endif
}

741
libc/tinymath/loglq.c Normal file
View file

@ -0,0 +1,741 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=8 tw=8 fenc=utf-8 :vi
Copyright (c) 2007-2013 Bruce D. Evans
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice unmodified, this list of conditions, and the following
disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "libc/math.h"
#include "libc/tinymath/freebsd.internal.h"
#if LDBL_MANT_DIG == 113
asm(".ident\t\"\\n\\n\
FreeBSD libm (BSD-2 License)\\n\
Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\"");
asm(".include \"libc/disclaimer.inc\"");
// clang-format off
/**
* Implementation of the natural logarithm of x for 128-bit format.
*
* First decompose x into its base 2 representation:
*
* log(x) = log(X * 2**k), where X is in [1, 2)
* = log(X) + k * log(2).
*
* Let X = X_i + e, where X_i is the center of one of the intervals
* [-1.0/256, 1.0/256), [1.0/256, 3.0/256), .... [2.0-1.0/256, 2.0+1.0/256)
* and X is in this interval. Then
*
* log(X) = log(X_i + e)
* = log(X_i * (1 + e / X_i))
* = log(X_i) + log(1 + e / X_i).
*
* The values log(X_i) are tabulated below. Let d = e / X_i and use
*
* log(1 + d) = p(d)
*
* where p(d) = d - 0.5*d*d + ... is a special minimax polynomial of
* suitably high degree.
*
* To get sufficiently small roundoff errors, k * log(2), log(X_i), and
* sometimes (if |k| is not large) the first term in p(d) must be evaluated
* and added up in extra precision. Extra precision is not needed for the
* rest of p(d). In the worst case when k = 0 and log(X_i) is 0, the final
* error is controlled mainly by the error in the second term in p(d). The
* error in this term itself is at most 0.5 ulps from the d*d operation in
* it. The error in this term relative to the first term is thus at most
* 0.5 * |-0.5| * |d| < 1.0/1024 ulps. We aim for an accumulated error of
* at most twice this at the point of the final rounding step. Thus the
* final error should be at most 0.5 + 1.0/512 = 0.5020 ulps. Exhaustive
* testing of a float variant of this function showed a maximum final error
* of 0.5008 ulps. Non-exhaustive testing of a double variant of this
* function showed a maximum final error of 0.5078 ulps (near 1+1.0/256).
*
* We made the maximum of |d| (and thus the total relative error and the
* degree of p(d)) small by using a large number of intervals. Using
* centers of intervals instead of endpoints reduces this maximum by a
* factor of 2 for a given number of intervals. p(d) is special only
* in beginning with the Taylor coefficients 0 + 1*d, which tends to happen
* naturally. The most accurate minimax polynomial of a given degree might
* be different, but then we wouldn't want it since we would have to do
* extra work to avoid roundoff error (especially for P0*d instead of d).
*/
#ifndef NO_STRUCT_RETURN
#define STRUCT_RETURN
#endif
#if !defined(NO_UTAB) && !defined(NO_UTABL)
#define USE_UTAB
#endif
/*
* Domain [-0.005280, 0.004838], range ~[-1.1577e-37, 1.1582e-37]:
* |log(1 + d)/d - p(d)| < 2**-122.7
*/
static const long double
P2 = -0.5L,
P3 = 3.33333333333333333333333333333233795e-1L, /* 0x15555555555555555555555554d42.0p-114L */
P4 = -2.49999999999999999999999999941139296e-1L, /* -0x1ffffffffffffffffffffffdab14e.0p-115L */
P5 = 2.00000000000000000000000085468039943e-1L, /* 0x19999999999999999999a6d3567f4.0p-115L */
P6 = -1.66666666666666666666696142372698408e-1L, /* -0x15555555555555555567267a58e13.0p-115L */
P7 = 1.42857142857142857119522943477166120e-1L, /* 0x1249249249249248ed79a0ae434de.0p-115L */
P8 = -1.24999999999999994863289015033581301e-1L; /* -0x1fffffffffffffa13e91765e46140.0p-116L */
/* Double precision gives ~ 53 + log2(P9 * max(|d|)**8) ~= 120 bits. */
static const double
P9 = 1.1111111111111401e-1, /* 0x1c71c71c71c7ed.0p-56 */
P10 = -1.0000000000040135e-1, /* -0x199999999a0a92.0p-56 */
P11 = 9.0909090728136258e-2, /* 0x1745d173962111.0p-56 */
P12 = -8.3333318851855284e-2, /* -0x1555551722c7a3.0p-56 */
P13 = 7.6928634666404178e-2, /* 0x13b1985204a4ae.0p-56 */
P14 = -7.1626810078462499e-2; /* -0x12562276cdc5d0.0p-56 */
static volatile const double zero = 0;
#define INTERVALS 128
#define LOG2_INTERVALS 7
#define TSIZE (INTERVALS + 1)
#define G(i) (T[(i)].G)
#define F_hi(i) (T[(i)].F_hi)
#define F_lo(i) (T[(i)].F_lo)
#define ln2_hi F_hi(TSIZE - 1)
#define ln2_lo F_lo(TSIZE - 1)
#define E(i) (U[(i)].E)
#define H(i) (U[(i)].H)
static const struct {
float G; /* 1/(1 + i/128) rounded to 8/9 bits */
float F_hi; /* log(1 / G_i) rounded (see below) */
/* The compiler will insert 8 bytes of padding here. */
long double F_lo; /* next 113 bits for log(1 / G_i) */
} T[TSIZE] = {
/*
* ln2_hi and each F_hi(i) are rounded to a number of bits that
* makes F_hi(i) + dk*ln2_hi exact for all i and all dk.
*
* The last entry (for X just below 2) is used to define ln2_hi
* and ln2_lo, to ensure that F_hi(i) and F_lo(i) cancel exactly
* with dk*ln2_hi and dk*ln2_lo, respectively, when dk = -1.
* This is needed for accuracy when x is just below 1. (To avoid
* special cases, such x are "reduced" strangely to X just below
* 2 and dk = -1, and then the exact cancellation is needed
* because any the error from any non-exactness would be too
* large).
*
* The relevant range of dk is [-16445, 16383]. The maximum number
* of bits in F_hi(i) that works is very dependent on i but has
* a minimum of 93. We only need about 12 bits in F_hi(i) for
* it to provide enough extra precision.
*
* We round F_hi(i) to 24 bits so that it can have type float,
* mainly to minimize the size of the table. Using all 24 bits
* in a float for it automatically satisfies the above constraints.
*/
{0x800000.0p-23, 0, 0},
{0xfe0000.0p-24, 0x8080ac.0p-30, -0x14ee431dae6674afa0c4bfe16e8fd.0p-144L},
{0xfc0000.0p-24, 0x8102b3.0p-29, -0x1db29ee2d83717be918e1119642ab.0p-144L},
{0xfa0000.0p-24, 0xc24929.0p-29, 0x1191957d173697cf302cc9476f561.0p-143L},
{0xf80000.0p-24, 0x820aec.0p-28, 0x13ce8888e02e78eba9b1113bc1c18.0p-142L},
{0xf60000.0p-24, 0xa33577.0p-28, -0x17a4382ce6eb7bfa509bec8da5f22.0p-142L},
{0xf48000.0p-24, 0xbc42cb.0p-28, -0x172a21161a107674986dcdca6709c.0p-143L},
{0xf30000.0p-24, 0xd57797.0p-28, -0x1e09de07cb958897a3ea46e84abb3.0p-142L},
{0xf10000.0p-24, 0xf7518e.0p-28, 0x1ae1eec1b036c484993c549c4bf40.0p-151L},
{0xef0000.0p-24, 0x8cb9df.0p-27, -0x1d7355325d560d9e9ab3d6ebab580.0p-141L},
{0xed8000.0p-24, 0x999ec0.0p-27, -0x1f9f02d256d5037108f4ec21e48cd.0p-142L},
{0xec0000.0p-24, 0xa6988b.0p-27, -0x16fc0a9d12c17a70f7a684c596b12.0p-143L},
{0xea0000.0p-24, 0xb80698.0p-27, 0x15d581c1e8da99ded322fb08b8462.0p-141L},
{0xe80000.0p-24, 0xc99af3.0p-27, -0x1535b3ba8f150ae09996d7bb4653e.0p-143L},
{0xe70000.0p-24, 0xd273b2.0p-27, 0x163786f5251aefe0ded34c8318f52.0p-145L},
{0xe50000.0p-24, 0xe442c0.0p-27, 0x1bc4b2368e32d56699c1799a244d4.0p-144L},
{0xe38000.0p-24, 0xf1b83f.0p-27, 0x1c6090f684e6766abceccab1d7174.0p-141L},
{0xe20000.0p-24, 0xff448a.0p-27, -0x1890aa69ac9f4215f93936b709efb.0p-142L},
{0xe08000.0p-24, 0x8673f6.0p-26, 0x1b9985194b6affd511b534b72a28e.0p-140L},
{0xdf0000.0p-24, 0x8d515c.0p-26, -0x1dc08d61c6ef1d9b2ef7e68680598.0p-143L},
{0xdd8000.0p-24, 0x943a9e.0p-26, -0x1f72a2dac729b3f46662238a9425a.0p-142L},
{0xdc0000.0p-24, 0x9b2fe6.0p-26, -0x1fd4dfd3a0afb9691aed4d5e3df94.0p-140L},
{0xda8000.0p-24, 0xa2315d.0p-26, -0x11b26121629c46c186384993e1c93.0p-142L},
{0xd90000.0p-24, 0xa93f2f.0p-26, 0x1286d633e8e5697dc6a402a56fce1.0p-141L},
{0xd78000.0p-24, 0xb05988.0p-26, 0x16128eba9367707ebfa540e45350c.0p-144L},
{0xd60000.0p-24, 0xb78094.0p-26, 0x16ead577390d31ef0f4c9d43f79b2.0p-140L},
{0xd50000.0p-24, 0xbc4c6c.0p-26, 0x151131ccf7c7b75e7d900b521c48d.0p-141L},
{0xd38000.0p-24, 0xc3890a.0p-26, -0x115e2cd714bd06508aeb00d2ae3e9.0p-140L},
{0xd20000.0p-24, 0xcad2d7.0p-26, -0x1847f406ebd3af80485c2f409633c.0p-142L},
{0xd10000.0p-24, 0xcfb620.0p-26, 0x1c2259904d686581799fbce0b5f19.0p-141L},
{0xcf8000.0p-24, 0xd71653.0p-26, 0x1ece57a8d5ae54f550444ecf8b995.0p-140L},
{0xce0000.0p-24, 0xde843a.0p-26, -0x1f109d4bc4595412b5d2517aaac13.0p-141L},
{0xcd0000.0p-24, 0xe37fde.0p-26, 0x1bc03dc271a74d3a85b5b43c0e727.0p-141L},
{0xcb8000.0p-24, 0xeb050c.0p-26, -0x1bf2badc0df841a71b79dd5645b46.0p-145L},
{0xca0000.0p-24, 0xf29878.0p-26, -0x18efededd89fbe0bcfbe6d6db9f66.0p-147L},
{0xc90000.0p-24, 0xf7ad6f.0p-26, 0x1373ff977baa6911c7bafcb4d84fb.0p-141L},
{0xc80000.0p-24, 0xfcc8e3.0p-26, 0x196766f2fb328337cc050c6d83b22.0p-140L},
{0xc68000.0p-24, 0x823f30.0p-25, 0x19bd076f7c434e5fcf1a212e2a91e.0p-139L},
{0xc58000.0p-24, 0x84d52c.0p-25, -0x1a327257af0f465e5ecab5f2a6f81.0p-139L},
{0xc40000.0p-24, 0x88bc74.0p-25, 0x113f23def19c5a0fe396f40f1dda9.0p-141L},
{0xc30000.0p-24, 0x8b5ae6.0p-25, 0x1759f6e6b37de945a049a962e66c6.0p-139L},
{0xc20000.0p-24, 0x8dfccb.0p-25, 0x1ad35ca6ed5147bdb6ddcaf59c425.0p-141L},
{0xc10000.0p-24, 0x90a22b.0p-25, 0x1a1d71a87deba46bae9827221dc98.0p-139L},
{0xbf8000.0p-24, 0x94a0d8.0p-25, -0x139e5210c2b730e28aba001a9b5e0.0p-140L},
{0xbe8000.0p-24, 0x974f16.0p-25, -0x18f6ebcff3ed72e23e13431adc4a5.0p-141L},
{0xbd8000.0p-24, 0x9a00f1.0p-25, -0x1aa268be39aab7148e8d80caa10b7.0p-139L},
{0xbc8000.0p-24, 0x9cb672.0p-25, -0x14c8815839c5663663d15faed7771.0p-139L},
{0xbb0000.0p-24, 0xa0cda1.0p-25, 0x1eaf46390dbb2438273918db7df5c.0p-141L},
{0xba0000.0p-24, 0xa38c6e.0p-25, 0x138e20d831f698298adddd7f32686.0p-141L},
{0xb90000.0p-24, 0xa64f05.0p-25, -0x1e8d3c41123615b147a5d47bc208f.0p-142L},
{0xb80000.0p-24, 0xa91570.0p-25, 0x1ce28f5f3840b263acb4351104631.0p-140L},
{0xb70000.0p-24, 0xabdfbb.0p-25, -0x186e5c0a42423457e22d8c650b355.0p-139L},
{0xb60000.0p-24, 0xaeadef.0p-25, -0x14d41a0b2a08a465dc513b13f567d.0p-143L},
{0xb50000.0p-24, 0xb18018.0p-25, 0x16755892770633947ffe651e7352f.0p-139L},
{0xb40000.0p-24, 0xb45642.0p-25, -0x16395ebe59b15228bfe8798d10ff0.0p-142L},
{0xb30000.0p-24, 0xb73077.0p-25, 0x1abc65c8595f088b61a335f5b688c.0p-140L},
{0xb20000.0p-24, 0xba0ec4.0p-25, -0x1273089d3dad88e7d353e9967d548.0p-139L},
{0xb10000.0p-24, 0xbcf133.0p-25, 0x10f9f67b1f4bbf45de06ecebfaf6d.0p-139L},
{0xb00000.0p-24, 0xbfd7d2.0p-25, -0x109fab904864092b34edda19a831e.0p-140L},
{0xaf0000.0p-24, 0xc2c2ac.0p-25, -0x1124680aa43333221d8a9b475a6ba.0p-139L},
{0xae8000.0p-24, 0xc439b3.0p-25, -0x1f360cc4710fbfe24b633f4e8d84d.0p-140L},
{0xad8000.0p-24, 0xc72afd.0p-25, -0x132d91f21d89c89c45003fc5d7807.0p-140L},
{0xac8000.0p-24, 0xca20a2.0p-25, -0x16bf9b4d1f8da8002f2449e174504.0p-139L},
{0xab8000.0p-24, 0xcd1aae.0p-25, 0x19deb5ce6a6a8717d5626e16acc7d.0p-141L},
{0xaa8000.0p-24, 0xd0192f.0p-25, 0x1a29fb48f7d3ca87dabf351aa41f4.0p-139L},
{0xaa0000.0p-24, 0xd19a20.0p-25, 0x1127d3c6457f9d79f51dcc73014c9.0p-141L},
{0xa90000.0p-24, 0xd49f6a.0p-25, -0x1ba930e486a0ac42d1bf9199188e7.0p-141L},
{0xa80000.0p-24, 0xd7a94b.0p-25, -0x1b6e645f31549dd1160bcc45c7e2c.0p-139L},
{0xa70000.0p-24, 0xdab7d0.0p-25, 0x1118a425494b610665377f15625b6.0p-140L},
{0xa68000.0p-24, 0xdc40d5.0p-25, 0x1966f24d29d3a2d1b2176010478be.0p-140L},
{0xa58000.0p-24, 0xdf566d.0p-25, -0x1d8e52eb2248f0c95dd83626d7333.0p-142L},
{0xa48000.0p-24, 0xe270ce.0p-25, -0x1ee370f96e6b67ccb006a5b9890ea.0p-140L},
{0xa40000.0p-24, 0xe3ffce.0p-25, 0x1d155324911f56db28da4d629d00a.0p-140L},
{0xa30000.0p-24, 0xe72179.0p-25, -0x1fe6e2f2f867d8f4d60c713346641.0p-140L},
{0xa20000.0p-24, 0xea4812.0p-25, 0x1b7be9add7f4d3b3d406b6cbf3ce5.0p-140L},
{0xa18000.0p-24, 0xebdd3d.0p-25, 0x1b3cfb3f7511dd73692609040ccc2.0p-139L},
{0xa08000.0p-24, 0xef0b5b.0p-25, -0x1220de1f7301901b8ad85c25afd09.0p-139L},
{0xa00000.0p-24, 0xf0a451.0p-25, -0x176364c9ac81cc8a4dfb804de6867.0p-140L},
{0x9f0000.0p-24, 0xf3da16.0p-25, 0x1eed6b9aafac8d42f78d3e65d3727.0p-141L},
{0x9e8000.0p-24, 0xf576e9.0p-25, 0x1d593218675af269647b783d88999.0p-139L},
{0x9d8000.0p-24, 0xf8b47c.0p-25, -0x13e8eb7da053e063714615f7cc91d.0p-144L},
{0x9d0000.0p-24, 0xfa553f.0p-25, 0x1c063259bcade02951686d5373aec.0p-139L},
{0x9c0000.0p-24, 0xfd9ac5.0p-25, 0x1ef491085fa3c1649349630531502.0p-139L},
{0x9b8000.0p-24, 0xff3f8c.0p-25, 0x1d607a7c2b8c5320619fb9433d841.0p-139L},
{0x9a8000.0p-24, 0x814697.0p-24, -0x12ad3817004f3f0bdff99f932b273.0p-138L},
{0x9a0000.0p-24, 0x821b06.0p-24, -0x189fc53117f9e54e78103a2bc1767.0p-141L},
{0x990000.0p-24, 0x83c5f8.0p-24, 0x14cf15a048907b7d7f47ddb45c5a3.0p-139L},
{0x988000.0p-24, 0x849c7d.0p-24, 0x1cbb1d35fb82873b04a9af1dd692c.0p-138L},
{0x978000.0p-24, 0x864ba6.0p-24, 0x1128639b814f9b9770d8cb6573540.0p-138L},
{0x970000.0p-24, 0x87244c.0p-24, 0x184733853300f002e836dfd47bd41.0p-139L},
{0x968000.0p-24, 0x87fdaa.0p-24, 0x109d23aef77dd5cd7cc94306fb3ff.0p-140L},
{0x958000.0p-24, 0x89b293.0p-24, -0x1a81ef367a59de2b41eeebd550702.0p-138L},
{0x950000.0p-24, 0x8a8e20.0p-24, -0x121ad3dbb2f45275c917a30df4ac9.0p-138L},
{0x948000.0p-24, 0x8b6a6a.0p-24, -0x1cfb981628af71a89df4e6df2e93b.0p-139L},
{0x938000.0p-24, 0x8d253a.0p-24, -0x1d21730ea76cfdec367828734cae5.0p-139L},
{0x930000.0p-24, 0x8e03c2.0p-24, 0x135cc00e566f76b87333891e0dec4.0p-138L},
{0x928000.0p-24, 0x8ee30d.0p-24, -0x10fcb5df257a263e3bf446c6e3f69.0p-140L},
{0x918000.0p-24, 0x90a3ee.0p-24, -0x16e171b15433d723a4c7380a448d8.0p-139L},
{0x910000.0p-24, 0x918587.0p-24, -0x1d050da07f3236f330972da2a7a87.0p-139L},
{0x908000.0p-24, 0x9267e7.0p-24, 0x1be03669a5268d21148c6002becd3.0p-139L},
{0x8f8000.0p-24, 0x942f04.0p-24, 0x10b28e0e26c336af90e00533323ba.0p-139L},
{0x8f0000.0p-24, 0x9513c3.0p-24, 0x1a1d820da57cf2f105a89060046aa.0p-138L},
{0x8e8000.0p-24, 0x95f950.0p-24, -0x19ef8f13ae3cf162409d8ea99d4c0.0p-139L},
{0x8e0000.0p-24, 0x96dfab.0p-24, -0x109e417a6e507b9dc10dac743ad7a.0p-138L},
{0x8d0000.0p-24, 0x98aed2.0p-24, 0x10d01a2c5b0e97c4990b23d9ac1f5.0p-139L},
{0x8c8000.0p-24, 0x9997a2.0p-24, -0x1d6a50d4b61ea74540bdd2aa99a42.0p-138L},
{0x8c0000.0p-24, 0x9a8145.0p-24, 0x1b3b190b83f9527e6aba8f2d783c1.0p-138L},
{0x8b8000.0p-24, 0x9b6bbf.0p-24, 0x13a69fad7e7abe7ba81c664c107e0.0p-138L},
{0x8b0000.0p-24, 0x9c5711.0p-24, -0x11cd12316f576aad348ae79867223.0p-138L},
{0x8a8000.0p-24, 0x9d433b.0p-24, 0x1c95c444b807a246726b304ccae56.0p-139L},
{0x898000.0p-24, 0x9f1e22.0p-24, -0x1b9c224ea698c2f9b47466d6123fe.0p-139L},
{0x890000.0p-24, 0xa00ce1.0p-24, 0x125ca93186cf0f38b4619a2483399.0p-141L},
{0x888000.0p-24, 0xa0fc80.0p-24, -0x1ee38a7bc228b3597043be78eaf49.0p-139L},
{0x880000.0p-24, 0xa1ed00.0p-24, -0x1a0db876613d204147dc69a07a649.0p-138L},
{0x878000.0p-24, 0xa2de62.0p-24, 0x193224e8516c008d3602a7b41c6e8.0p-139L},
{0x870000.0p-24, 0xa3d0a9.0p-24, 0x1fa28b4d2541aca7d5844606b2421.0p-139L},
{0x868000.0p-24, 0xa4c3d6.0p-24, 0x1c1b5760fb4571acbcfb03f16daf4.0p-138L},
{0x858000.0p-24, 0xa6acea.0p-24, 0x1fed5d0f65949c0a345ad743ae1ae.0p-140L},
{0x850000.0p-24, 0xa7a2d4.0p-24, 0x1ad270c9d749362382a7688479e24.0p-140L},
{0x848000.0p-24, 0xa899ab.0p-24, 0x199ff15ce532661ea9643a3a2d378.0p-139L},
{0x840000.0p-24, 0xa99171.0p-24, 0x1a19e15ccc45d257530a682b80490.0p-139L},
{0x838000.0p-24, 0xaa8a28.0p-24, -0x121a14ec532b35ba3e1f868fd0b5e.0p-140L},
{0x830000.0p-24, 0xab83d1.0p-24, 0x1aee319980bff3303dd481779df69.0p-139L},
{0x828000.0p-24, 0xac7e6f.0p-24, -0x18ffd9e3900345a85d2d86161742e.0p-140L},
{0x820000.0p-24, 0xad7a03.0p-24, -0x1e4db102ce29f79b026b64b42caa1.0p-140L},
{0x818000.0p-24, 0xae768f.0p-24, 0x17c35c55a04a82ab19f77652d977a.0p-141L},
{0x810000.0p-24, 0xaf7415.0p-24, 0x1448324047019b48d7b98c1cf7234.0p-138L},
{0x808000.0p-24, 0xb07298.0p-24, -0x1750ee3915a197e9c7359dd94152f.0p-138L},
{0x800000.0p-24, 0xb17218.0p-24, -0x105c610ca86c3898cff81a12a17e2.0p-141L},
};
#ifdef USE_UTAB
static const struct {
float H; /* 1 + i/INTERVALS (exact) */
float E; /* H(i) * G(i) - 1 (exact) */
} U[TSIZE] = {
{0x800000.0p-23, 0},
{0x810000.0p-23, -0x800000.0p-37},
{0x820000.0p-23, -0x800000.0p-35},
{0x830000.0p-23, -0x900000.0p-34},
{0x840000.0p-23, -0x800000.0p-33},
{0x850000.0p-23, -0xc80000.0p-33},
{0x860000.0p-23, -0xa00000.0p-36},
{0x870000.0p-23, 0x940000.0p-33},
{0x880000.0p-23, 0x800000.0p-35},
{0x890000.0p-23, -0xc80000.0p-34},
{0x8a0000.0p-23, 0xe00000.0p-36},
{0x8b0000.0p-23, 0x900000.0p-33},
{0x8c0000.0p-23, -0x800000.0p-35},
{0x8d0000.0p-23, -0xe00000.0p-33},
{0x8e0000.0p-23, 0x880000.0p-33},
{0x8f0000.0p-23, -0xa80000.0p-34},
{0x900000.0p-23, -0x800000.0p-35},
{0x910000.0p-23, 0x800000.0p-37},
{0x920000.0p-23, 0x900000.0p-35},
{0x930000.0p-23, 0xd00000.0p-35},
{0x940000.0p-23, 0xe00000.0p-35},
{0x950000.0p-23, 0xc00000.0p-35},
{0x960000.0p-23, 0xe00000.0p-36},
{0x970000.0p-23, -0x800000.0p-38},
{0x980000.0p-23, -0xc00000.0p-35},
{0x990000.0p-23, -0xd00000.0p-34},
{0x9a0000.0p-23, 0x880000.0p-33},
{0x9b0000.0p-23, 0xe80000.0p-35},
{0x9c0000.0p-23, -0x800000.0p-35},
{0x9d0000.0p-23, 0xb40000.0p-33},
{0x9e0000.0p-23, 0x880000.0p-34},
{0x9f0000.0p-23, -0xe00000.0p-35},
{0xa00000.0p-23, 0x800000.0p-33},
{0xa10000.0p-23, -0x900000.0p-36},
{0xa20000.0p-23, -0xb00000.0p-33},
{0xa30000.0p-23, -0xa00000.0p-36},
{0xa40000.0p-23, 0x800000.0p-33},
{0xa50000.0p-23, -0xf80000.0p-35},
{0xa60000.0p-23, 0x880000.0p-34},
{0xa70000.0p-23, -0x900000.0p-33},
{0xa80000.0p-23, -0x800000.0p-35},
{0xa90000.0p-23, 0x900000.0p-34},
{0xaa0000.0p-23, 0xa80000.0p-33},
{0xab0000.0p-23, -0xac0000.0p-34},
{0xac0000.0p-23, -0x800000.0p-37},
{0xad0000.0p-23, 0xf80000.0p-35},
{0xae0000.0p-23, 0xf80000.0p-34},
{0xaf0000.0p-23, -0xac0000.0p-33},
{0xb00000.0p-23, -0x800000.0p-33},
{0xb10000.0p-23, -0xb80000.0p-34},
{0xb20000.0p-23, -0x800000.0p-34},
{0xb30000.0p-23, -0xb00000.0p-35},
{0xb40000.0p-23, -0x800000.0p-35},
{0xb50000.0p-23, -0xe00000.0p-36},
{0xb60000.0p-23, -0x800000.0p-35},
{0xb70000.0p-23, -0xb00000.0p-35},
{0xb80000.0p-23, -0x800000.0p-34},
{0xb90000.0p-23, -0xb80000.0p-34},
{0xba0000.0p-23, -0x800000.0p-33},
{0xbb0000.0p-23, -0xac0000.0p-33},
{0xbc0000.0p-23, 0x980000.0p-33},
{0xbd0000.0p-23, 0xbc0000.0p-34},
{0xbe0000.0p-23, 0xe00000.0p-36},
{0xbf0000.0p-23, -0xb80000.0p-35},
{0xc00000.0p-23, -0x800000.0p-33},
{0xc10000.0p-23, 0xa80000.0p-33},
{0xc20000.0p-23, 0x900000.0p-34},
{0xc30000.0p-23, -0x800000.0p-35},
{0xc40000.0p-23, -0x900000.0p-33},
{0xc50000.0p-23, 0x820000.0p-33},
{0xc60000.0p-23, 0x800000.0p-38},
{0xc70000.0p-23, -0x820000.0p-33},
{0xc80000.0p-23, 0x800000.0p-33},
{0xc90000.0p-23, -0xa00000.0p-36},
{0xca0000.0p-23, -0xb00000.0p-33},
{0xcb0000.0p-23, 0x840000.0p-34},
{0xcc0000.0p-23, -0xd00000.0p-34},
{0xcd0000.0p-23, 0x800000.0p-33},
{0xce0000.0p-23, -0xe00000.0p-35},
{0xcf0000.0p-23, 0xa60000.0p-33},
{0xd00000.0p-23, -0x800000.0p-35},
{0xd10000.0p-23, 0xb40000.0p-33},
{0xd20000.0p-23, -0x800000.0p-35},
{0xd30000.0p-23, 0xaa0000.0p-33},
{0xd40000.0p-23, -0xe00000.0p-35},
{0xd50000.0p-23, 0x880000.0p-33},
{0xd60000.0p-23, -0xd00000.0p-34},
{0xd70000.0p-23, 0x9c0000.0p-34},
{0xd80000.0p-23, -0xb00000.0p-33},
{0xd90000.0p-23, -0x800000.0p-38},
{0xda0000.0p-23, 0xa40000.0p-33},
{0xdb0000.0p-23, -0xdc0000.0p-34},
{0xdc0000.0p-23, 0xc00000.0p-35},
{0xdd0000.0p-23, 0xca0000.0p-33},
{0xde0000.0p-23, -0xb80000.0p-34},
{0xdf0000.0p-23, 0xd00000.0p-35},
{0xe00000.0p-23, 0xc00000.0p-33},
{0xe10000.0p-23, -0xf40000.0p-34},
{0xe20000.0p-23, 0x800000.0p-37},
{0xe30000.0p-23, 0x860000.0p-33},
{0xe40000.0p-23, -0xc80000.0p-33},
{0xe50000.0p-23, -0xa80000.0p-34},
{0xe60000.0p-23, 0xe00000.0p-36},
{0xe70000.0p-23, 0x880000.0p-33},
{0xe80000.0p-23, -0xe00000.0p-33},
{0xe90000.0p-23, -0xfc0000.0p-34},
{0xea0000.0p-23, -0x800000.0p-35},
{0xeb0000.0p-23, 0xe80000.0p-35},
{0xec0000.0p-23, 0x900000.0p-33},
{0xed0000.0p-23, 0xe20000.0p-33},
{0xee0000.0p-23, -0xac0000.0p-33},
{0xef0000.0p-23, -0xc80000.0p-34},
{0xf00000.0p-23, -0x800000.0p-35},
{0xf10000.0p-23, 0x800000.0p-35},
{0xf20000.0p-23, 0xb80000.0p-34},
{0xf30000.0p-23, 0x940000.0p-33},
{0xf40000.0p-23, 0xc80000.0p-33},
{0xf50000.0p-23, -0xf20000.0p-33},
{0xf60000.0p-23, -0xc80000.0p-33},
{0xf70000.0p-23, -0xa20000.0p-33},
{0xf80000.0p-23, -0x800000.0p-33},
{0xf90000.0p-23, -0xc40000.0p-34},
{0xfa0000.0p-23, -0x900000.0p-34},
{0xfb0000.0p-23, -0xc80000.0p-35},
{0xfc0000.0p-23, -0x800000.0p-35},
{0xfd0000.0p-23, -0x900000.0p-36},
{0xfe0000.0p-23, -0x800000.0p-37},
{0xff0000.0p-23, -0x800000.0p-39},
{0x800000.0p-22, 0},
};
#endif /* USE_UTAB */
#ifdef STRUCT_RETURN
#define RETURN1(rp, v) do { \
(rp)->hi = (v); \
(rp)->lo_set = 0; \
return; \
} while (0)
#define RETURN2(rp, h, l) do { \
(rp)->hi = (h); \
(rp)->lo = (l); \
(rp)->lo_set = 1; \
return; \
} while (0)
struct ld {
long double hi;
long double lo;
int lo_set;
};
#else
#define RETURN1(rp, v) RETURNF(v)
#define RETURN2(rp, h, l) RETURNI((h) + (l))
#endif
#ifdef STRUCT_RETURN
forceinline void
k_logl(long double x, struct ld *rp)
#else
long double
logl(long double x)
#endif
{
long double d, val_hi, val_lo;
double dd, dk;
uint64_t lx, llx;
int i, k;
uint16_t hx;
EXTRACT_LDBL128_WORDS(hx, lx, llx, x);
k = -16383;
#if 0 /* Hard to do efficiently. Don't do it until we support all modes. */
if (x == 1)
RETURN1(rp, 0); /* log(1) = +0 in all rounding modes */
#endif
if (hx == 0 || hx >= 0x8000) { /* zero, negative or subnormal? */
if (((hx & 0x7fff) | lx | llx) == 0)
RETURN1(rp, -1 / zero); /* log(+-0) = -Inf */
if (hx != 0)
/* log(neg or NaN) = qNaN: */
RETURN1(rp, (x - x) / zero);
x *= 0x1.0p113; /* subnormal; scale up x */
EXTRACT_LDBL128_WORDS(hx, lx, llx, x);
k = -16383 - 113;
} else if (hx >= 0x7fff)
RETURN1(rp, x + x); /* log(Inf or NaN) = Inf or qNaN */
#ifndef STRUCT_RETURN
ENTERI();
#endif
k += hx;
dk = k;
/* Scale x to be in [1, 2). */
SET_LDBL_EXPSIGN(x, 0x3fff);
/* 0 <= i <= INTERVALS: */
#define L2I (49 - LOG2_INTERVALS)
i = (lx + (1LL << (L2I - 2))) >> (L2I - 1);
/*
* -0.005280 < d < 0.004838. In particular, the infinite-
* precision |d| is <= 2**-7. Rounding of G(i) to 8 bits
* ensures that d is representable without extra precision for
* this bound on |d| (since when this calculation is expressed
* as x*G(i)-1, the multiplication needs as many extra bits as
* G(i) has and the subtraction cancels 8 bits). But for
* most i (107 cases out of 129), the infinite-precision |d|
* is <= 2**-8. G(i) is rounded to 9 bits for such i to give
* better accuracy (this works by improving the bound on |d|,
* which in turn allows rounding to 9 bits in more cases).
* This is only important when the original x is near 1 -- it
* lets us avoid using a special method to give the desired
* accuracy for such x.
*/
if (0)
d = x * G(i) - 1;
else {
#ifdef USE_UTAB
d = (x - H(i)) * G(i) + E(i);
#else
long double x_hi;
double x_lo;
/*
* Split x into x_hi + x_lo to calculate x*G(i)-1 exactly.
* G(i) has at most 9 bits, so the splitting point is not
* critical.
*/
INSERT_LDBL128_WORDS(x_hi, 0x3fff, lx,
llx & 0xffffffffff000000ULL);
x_lo = x - x_hi;
d = x_hi * G(i) - 1 + x_lo * G(i);
#endif
}
/*
* Our algorithm depends on exact cancellation of F_lo(i) and
* F_hi(i) with dk*ln_2_lo and dk*ln2_hi when k is -1 and i is
* at the end of the table. This and other technical complications
* make it difficult to avoid the double scaling in (dk*ln2) *
* log(base) for base != e without losing more accuracy and/or
* efficiency than is gained.
*/
/*
* Use double precision operations wherever possible, since
* long double operations are emulated and were very slow on
* the old sparc64 and unknown on the newer aarch64 and riscv
* machines. Also, don't try to improve parallelism by
* increasing the number of operations, since any parallelism
* on such machines is needed for the emulation. Horner's
* method is good for this, and is also good for accuracy.
* Horner's method doesn't handle the `lo' term well, either
* for efficiency or accuracy. However, for accuracy we
* evaluate d * d * P2 separately to take advantage of by P2
* being exact, and this gives a good place to sum the 'lo'
* term too.
*/
dd = (double)d;
val_lo = d * d * d * (P3 +
d * (P4 + d * (P5 + d * (P6 + d * (P7 + d * (P8 +
dd * (P9 + dd * (P10 + dd * (P11 + dd * (P12 + dd * (P13 +
dd * P14))))))))))) + (F_lo(i) + dk * ln2_lo) + d * d * P2;
val_hi = d;
#ifdef DEBUG
if (fetestexcept(FE_UNDERFLOW))
breakpoint();
#endif
_3sumF(val_hi, val_lo, F_hi(i) + dk * ln2_hi);
RETURN2(rp, val_hi, val_lo);
}
long double
__log1plq(long double x)
{
long double d, d_hi, f_lo, val_hi, val_lo;
long double f_hi, twopminusk;
double d_lo, dd, dk;
uint64_t lx, llx;
int i, k;
int16_t ax, hx;
DOPRINT_START(&x);
EXTRACT_LDBL128_WORDS(hx, lx, llx, x);
if (hx < 0x3fff) { /* x < 1, or x neg NaN */
ax = hx & 0x7fff;
if (ax >= 0x3fff) { /* x <= -1, or x neg NaN */
if (ax == 0x3fff && (lx | llx) == 0)
RETURNP(-1 / zero); /* log1p(-1) = -Inf */
/* log1p(x < 1, or x NaN) = qNaN: */
RETURNP((x - x) / (x - x));
}
if (ax <= 0x3f8d) { /* |x| < 2**-113 */
if ((int)x == 0)
RETURNP(x); /* x with inexact if x != 0 */
}
f_hi = 1;
f_lo = x;
} else if (hx >= 0x7fff) { /* x +Inf or non-neg NaN */
RETURNP(x + x); /* log1p(Inf or NaN) = Inf or qNaN */
} else if (hx < 0x40e1) { /* 1 <= x < 2**226 */
f_hi = x;
f_lo = 1;
} else { /* 2**226 <= x < +Inf */
f_hi = x;
f_lo = 0; /* avoid underflow of the P3 term */
}
ENTERI();
x = f_hi + f_lo;
f_lo = (f_hi - x) + f_lo;
EXTRACT_LDBL128_WORDS(hx, lx, llx, x);
k = -16383;
k += hx;
dk = k;
SET_LDBL_EXPSIGN(x, 0x3fff);
twopminusk = 1;
SET_LDBL_EXPSIGN(twopminusk, 0x7ffe - (hx & 0x7fff));
f_lo *= twopminusk;
i = (lx + (1LL << (L2I - 2))) >> (L2I - 1);
/*
* x*G(i)-1 (with a reduced x) can be represented exactly, as
* above, but now we need to evaluate the polynomial on d =
* (x+f_lo)*G(i)-1 and extra precision is needed for that.
* Since x+x_lo is a hi+lo decomposition and subtracting 1
* doesn't lose too many bits, an inexact calculation for
* f_lo*G(i) is good enough.
*/
if (0)
d_hi = x * G(i) - 1;
else {
#ifdef USE_UTAB
d_hi = (x - H(i)) * G(i) + E(i);
#else
long double x_hi;
double x_lo;
INSERT_LDBL128_WORDS(x_hi, 0x3fff, lx,
llx & 0xffffffffff000000ULL);
x_lo = x - x_hi;
d_hi = x_hi * G(i) - 1 + x_lo * G(i);
#endif
}
d_lo = f_lo * G(i);
/*
* This is _2sumF(d_hi, d_lo) inlined. The condition
* (d_hi == 0 || |d_hi| >= |d_lo|) for using _2sumF() is not
* always satisifed, so it is not clear that this works, but
* it works in practice. It works even if it gives a wrong
* normalized d_lo, since |d_lo| > |d_hi| implies that i is
* nonzero and d is tiny, so the F(i) term dominates d_lo.
* In float precision:
* (By exhaustive testing, the worst case is d_hi = 0x1.bp-25.
* And if d is only a little tinier than that, we would have
* another underflow problem for the P3 term; this is also ruled
* out by exhaustive testing.)
*/
d = d_hi + d_lo;
d_lo = d_hi - d + d_lo;
d_hi = d;
dd = (double)d;
val_lo = d * d * d * (P3 +
d * (P4 + d * (P5 + d * (P6 + d * (P7 + d * (P8 +
dd * (P9 + dd * (P10 + dd * (P11 + dd * (P12 + dd * (P13 +
dd * P14))))))))))) + (F_lo(i) + dk * ln2_lo + d_lo) + d * d * P2;
val_hi = d_hi;
#ifdef DEBUG
if (fetestexcept(FE_UNDERFLOW))
breakpoint();
#endif
_3sumF(val_hi, val_lo, F_hi(i) + dk * ln2_hi);
RETURN2PI(val_hi, val_lo);
}
#ifdef STRUCT_RETURN
long double
__loglq(long double x)
{
struct ld r;
ENTERI();
DOPRINT_START(&x);
k_logl(x, &r);
RETURNSPI(&r);
}
/*
* 29+113 bit decompositions. The bits are distributed so that the products
* of the hi terms are exact in double precision. The types are chosen so
* that the products of the hi terms are done in at least double precision,
* without any explicit conversions. More natural choices would require a
* slow long double precision multiplication.
*/
static const double
invln10_hi = 4.3429448176175356e-1, /* 0x1bcb7b15000000.0p-54 */
invln2_hi = 1.4426950402557850e0; /* 0x17154765000000.0p-52 */
static const long double
invln10_lo = 1.41498268538580090791605082294397000e-10L, /* 0x137287195355baaafad33dc323ee3.0p-145L */
invln2_lo = 6.33178418956604368501892137426645911e-10L, /* 0x15c17f0bbbe87fed0691d3e88eb57.0p-143L */
invln10_lo_plus_hi = invln10_lo + invln10_hi,
invln2_lo_plus_hi = invln2_lo + invln2_hi;
long double
__log10lq(long double x)
{
struct ld r;
long double hi, lo;
ENTERI();
DOPRINT_START(&x);
k_logl(x, &r);
if (!r.lo_set)
RETURNPI(r.hi);
_2sumF(r.hi, r.lo);
hi = (float)r.hi;
lo = r.lo + (r.hi - hi);
RETURN2PI(invln10_hi * hi,
invln10_lo_plus_hi * lo + invln10_lo * hi);
}
long double
__log2lq(long double x)
{
struct ld r;
long double hi, lo;
ENTERI();
DOPRINT_START(&x);
k_logl(x, &r);
if (!r.lo_set)
RETURNPI(r.hi);
_2sumF(r.hi, r.lo);
hi = (float)r.hi;
lo = r.lo + (r.hi - hi);
RETURN2PI(invln2_hi * hi,
invln2_lo_plus_hi * lo + invln2_lo * hi);
}
#endif /* STRUCT_RETURN */
#endif /* LDBL_MANT_DIG == 113 */

View file

@ -31,7 +31,7 @@ asm(".ident\t\"\\n\\n\
Musl libc (MIT License)\\n\
Copyright 2005-2014 Rich Felker, et. al.\"");
asm(".ident\t\"\\n\\n\
OpenBSD libm (MIT License)\\n\
OpenBSD libm (ISC License)\\n\
Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>\"");
asm(".include \"libc/disclaimer.inc\"");
/* clang-format off */

View file

@ -96,7 +96,7 @@ long double powl(long double x, long double y) {
#else
asm(".ident\t\"\\n\\n\
OpenBSD libm (MIT License)\\n\
OpenBSD libm (ISC License)\\n\
Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>\"");
asm(".ident\t\"\\n\\n\
Musl libc (MIT License)\\n\
@ -618,11 +618,448 @@ static long double powil(long double x, int nn)
return y;
}
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
// TODO: broken implementation to make things compile
long double powl(long double x, long double y)
#include "libc/tinymath/freebsd.internal.h"
asm(".ident\t\"\\n\\n\
OpenBSD libm (ISC License)\\n\
Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>\"");
/*-
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/*
* Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/* powl(x,y) return x**y
*
* n
* Method: Let x = 2 * (1+f)
* 1. Compute and return log2(x) in two pieces:
* log2(x) = w1 + w2,
* where w1 has 113-53 = 60 bit trailing zeros.
* 2. Perform y*log2(x) = n+y' by simulating multi-precision
* arithmetic, where |y'|<=0.5.
* 3. Return x**y = 2**n*exp(y'*log2)
*
* Special cases:
* 1. (anything) ** 0 is 1
* 2. (anything) ** 1 is itself
* 3. (anything) ** NAN is NAN
* 4. NAN ** (anything except 0) is NAN
* 5. +-(|x| > 1) ** +INF is +INF
* 6. +-(|x| > 1) ** -INF is +0
* 7. +-(|x| < 1) ** +INF is +0
* 8. +-(|x| < 1) ** -INF is +INF
* 9. +-1 ** +-INF is NAN
* 10. +0 ** (+anything except 0, NAN) is +0
* 11. -0 ** (+anything except 0, NAN, odd integer) is +0
* 12. +0 ** (-anything except 0, NAN) is +INF
* 13. -0 ** (-anything except 0, NAN, odd integer) is +INF
* 14. -0 ** (odd integer) = -( +0 ** (odd integer) )
* 15. +INF ** (+anything except 0,NAN) is +INF
* 16. +INF ** (-anything except 0,NAN) is +0
* 17. -INF ** (anything) = -0 ** (-anything)
* 18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
* 19. (-anything except 0 and inf) ** (non-integer) is NAN
*
*/
static const long double bp[] = {
1.0L,
1.5L,
};
/* log_2(1.5) */
static const long double dp_h[] = {
0.0,
5.8496250072115607565592654282227158546448E-1L
};
/* Low part of log_2(1.5) */
static const long double dp_l[] = {
0.0,
1.0579781240112554492329533686862998106046E-16L
};
static const long double zero = 0.0L,
one = 1.0L,
two = 2.0L,
two113 = 1.0384593717069655257060992658440192E34L,
huge = 1.0e3000L,
tiny = 1.0e-3000L;
/* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
z = (x-1)/(x+1)
1 <= x <= 1.25
Peak relative error 2.3e-37 */
static const long double LN[] =
{
return pow(x, y);
-3.0779177200290054398792536829702930623200E1L,
6.5135778082209159921251824580292116201640E1L,
-4.6312921812152436921591152809994014413540E1L,
1.2510208195629420304615674658258363295208E1L,
-9.9266909031921425609179910128531667336670E-1L
};
static const long double LD[] =
{
-5.129862866715009066465422805058933131960E1L,
1.452015077564081884387441590064272782044E2L,
-1.524043275549860505277434040464085593165E2L,
7.236063513651544224319663428634139768808E1L,
-1.494198912340228235853027849917095580053E1L
/* 1.0E0 */
};
/* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
0 <= x <= 0.5
Peak relative error 5.7e-38 */
static const long double PN[] =
{
5.081801691915377692446852383385968225675E8L,
9.360895299872484512023336636427675327355E6L,
4.213701282274196030811629773097579432957E4L,
5.201006511142748908655720086041570288182E1L,
9.088368420359444263703202925095675982530E-3L,
};
static const long double PD[] =
{
3.049081015149226615468111430031590411682E9L,
1.069833887183886839966085436512368982758E8L,
8.259257717868875207333991924545445705394E5L,
1.872583833284143212651746812884298360922E3L,
/* 1.0E0 */
};
static const long double
/* ln 2 */
lg2 = 6.9314718055994530941723212145817656807550E-1L,
lg2_h = 6.9314718055994528622676398299518041312695E-1L,
lg2_l = 2.3190468138462996154948554638754786504121E-17L,
ovt = 8.0085662595372944372e-0017L,
/* 2/(3*log(2)) */
cp = 9.6179669392597560490661645400126142495110E-1L,
cp_h = 9.6179669392597555432899980587535537779331E-1L,
cp_l = 5.0577616648125906047157785230014751039424E-17L;
long double
powl(long double x, long double y)
{
long double z, ax, z_h, z_l, p_h, p_l;
long double yy1, t1, t2, r, s, t, u, v, w;
long double s2, s_h, s_l, t_h, t_l;
int32_t i, j, k, yisint, n;
uint32_t ix, iy;
int32_t hx, hy;
ieee_quad_shape_type o, p, q;
p.value = x;
hx = p.parts32.mswhi;
ix = hx & 0x7fffffff;
q.value = y;
hy = q.parts32.mswhi;
iy = hy & 0x7fffffff;
/* y==zero: x**0 = 1 */
if ((iy | q.parts32.mswlo | q.parts32.lswhi | q.parts32.lswlo) == 0)
return one;
/* 1.0**y = 1; -1.0**+-Inf = 1 */
if (x == one)
return one;
if (x == -1.0L && iy == 0x7fff0000
&& (q.parts32.mswlo | q.parts32.lswhi | q.parts32.lswlo) == 0)
return one;
/* +-NaN return x+y */
if ((ix > 0x7fff0000)
|| ((ix == 0x7fff0000)
&& ((p.parts32.mswlo | p.parts32.lswhi | p.parts32.lswlo) != 0))
|| (iy > 0x7fff0000)
|| ((iy == 0x7fff0000)
&& ((q.parts32.mswlo | q.parts32.lswhi | q.parts32.lswlo) != 0)))
return nan_mix(x, y);
/* determine if y is an odd int when x < 0
* yisint = 0 ... y is not an integer
* yisint = 1 ... y is an odd int
* yisint = 2 ... y is an even int
*/
yisint = 0;
if (hx < 0)
{
if (iy >= 0x40700000) /* 2^113 */
yisint = 2; /* even integer y */
else if (iy >= 0x3fff0000) /* 1.0 */
{
if (floorl (y) == y)
{
z = 0.5 * y;
if (floorl (z) == z)
yisint = 2;
else
yisint = 1;
}
}
}
/* special value of y */
if ((q.parts32.mswlo | q.parts32.lswhi | q.parts32.lswlo) == 0)
{
if (iy == 0x7fff0000) /* y is +-inf */
{
if (((ix - 0x3fff0000) | p.parts32.mswlo | p.parts32.lswhi |
p.parts32.lswlo) == 0)
return y - y; /* +-1**inf is NaN */
else if (ix >= 0x3fff0000) /* (|x|>1)**+-inf = inf,0 */
return (hy >= 0) ? y : zero;
else /* (|x|<1)**-,+inf = inf,0 */
return (hy < 0) ? -y : zero;
}
if (iy == 0x3fff0000)
{ /* y is +-1 */
if (hy < 0)
return one / x;
else
return x;
}
if (hy == 0x40000000)
return x * x; /* y is 2 */
if (hy == 0x3ffe0000)
{ /* y is 0.5 */
if (hx >= 0) /* x >= +0 */
return sqrtl (x);
}
}
ax = fabsl (x);
/* special value of x */
if ((p.parts32.mswlo | p.parts32.lswhi | p.parts32.lswlo) == 0)
{
if (ix == 0x7fff0000 || ix == 0 || ix == 0x3fff0000)
{
z = ax; /*x is +-0,+-inf,+-1 */
if (hy < 0)
z = one / z; /* z = (1/|x|) */
if (hx < 0)
{
if (((ix - 0x3fff0000) | yisint) == 0)
{
z = (z - z) / (z - z); /* (-1)**non-int is NaN */
}
else if (yisint == 1)
z = -z; /* (x<0)**odd = -(|x|**odd) */
}
return z;
}
}
/* (x<0)**(non-int) is NaN */
if (((((uint32_t) hx >> 31) - 1) | yisint) == 0)
return (x - x) / (x - x);
/* |y| is huge.
2^-16495 = 1/2 of smallest representable value.
If (1 - 1/131072)^y underflows, y > 1.4986e9 */
if (iy > 0x401d654b)
{
/* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
if (iy > 0x407d654b)
{
if (ix <= 0x3ffeffff)
return (hy < 0) ? huge * huge : tiny * tiny;
if (ix >= 0x3fff0000)
return (hy > 0) ? huge * huge : tiny * tiny;
}
/* over/underflow if x is not close to one */
if (ix < 0x3ffeffff)
return (hy < 0) ? huge * huge : tiny * tiny;
if (ix > 0x3fff0000)
return (hy > 0) ? huge * huge : tiny * tiny;
}
n = 0;
/* take care subnormal number */
if (ix < 0x00010000)
{
ax *= two113;
n -= 113;
o.value = ax;
ix = o.parts32.mswhi;
}
n += ((ix) >> 16) - 0x3fff;
j = ix & 0x0000ffff;
/* determine interval */
ix = j | 0x3fff0000; /* normalize ix */
if (j <= 0x3988)
k = 0; /* |x|<sqrt(3/2) */
else if (j < 0xbb67)
k = 1; /* |x|<sqrt(3) */
else
{
k = 0;
n += 1;
ix -= 0x00010000;
}
o.value = ax;
o.parts32.mswhi = ix;
ax = o.value;
/* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
u = ax - bp[k]; /* bp[0]=1.0, bp[1]=1.5 */
v = one / (ax + bp[k]);
s = u * v;
s_h = s;
o.value = s_h;
o.parts32.lswlo = 0;
o.parts32.lswhi &= 0xf8000000;
s_h = o.value;
/* t_h=ax+bp[k] High */
t_h = ax + bp[k];
o.value = t_h;
o.parts32.lswlo = 0;
o.parts32.lswhi &= 0xf8000000;
t_h = o.value;
t_l = ax - (t_h - bp[k]);
s_l = v * ((u - s_h * t_h) - s_h * t_l);
/* compute log(ax) */
s2 = s * s;
u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
r = s2 * s2 * u / v;
r += s_l * (s_h + s);
s2 = s_h * s_h;
t_h = 3.0 + s2 + r;
o.value = t_h;
o.parts32.lswlo = 0;
o.parts32.lswhi &= 0xf8000000;
t_h = o.value;
t_l = r - ((t_h - 3.0) - s2);
/* u+v = s*(1+...) */
u = s_h * t_h;
v = s_l * t_h + t_l * s;
/* 2/(3log2)*(s+...) */
p_h = u + v;
o.value = p_h;
o.parts32.lswlo = 0;
o.parts32.lswhi &= 0xf8000000;
p_h = o.value;
p_l = v - (p_h - u);
z_h = cp_h * p_h; /* cp_h+cp_l = 2/(3*log2) */
z_l = cp_l * p_h + p_l * cp + dp_l[k];
/* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
t = (long double) n;
t1 = (((z_h + z_l) + dp_h[k]) + t);
o.value = t1;
o.parts32.lswlo = 0;
o.parts32.lswhi &= 0xf8000000;
t1 = o.value;
t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
/* s (sign of result -ve**odd) = -1 else = 1 */
s = one;
if (((((uint32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
s = -one; /* (-ve)**(odd int) */
/* split up y into yy1+y2 and compute (yy1+y2)*(t1+t2) */
yy1 = y;
o.value = yy1;
o.parts32.lswlo = 0;
o.parts32.lswhi &= 0xf8000000;
yy1 = o.value;
p_l = (y - yy1) * t1 + y * t2;
p_h = yy1 * t1;
z = p_l + p_h;
o.value = z;
j = o.parts32.mswhi;
if (j >= 0x400d0000) /* z >= 16384 */
{
/* if z > 16384 */
if (((j - 0x400d0000) | o.parts32.mswlo | o.parts32.lswhi |
o.parts32.lswlo) != 0)
return s * huge * huge; /* overflow */
else
{
if (p_l + ovt > z - p_h)
return s * huge * huge; /* overflow */
}
}
else if ((j & 0x7fffffff) >= 0x400d01b9) /* z <= -16495 */
{
/* z < -16495 */
if (((j - 0xc00d01bc) | o.parts32.mswlo | o.parts32.lswhi |
o.parts32.lswlo)
!= 0)
return s * tiny * tiny; /* underflow */
else
{
if (p_l <= z - p_h)
return s * tiny * tiny; /* underflow */
}
}
/* compute 2**(p_h+p_l) */
i = j & 0x7fffffff;
k = (i >> 16) - 0x3fff;
n = 0;
if (i > 0x3ffe0000)
{ /* if |z| > 0.5, set n = [z+0.5] */
n = floorl (z + 0.5L);
t = n;
p_h -= t;
}
t = p_l + p_h;
o.value = t;
o.parts32.lswlo = 0;
o.parts32.lswhi &= 0xf8000000;
t = o.value;
u = t * lg2_h;
v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
z = u + v;
w = v - (z - u);
/* exp(z) */
t = z * z;
u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
t1 = z - t * u / v;
r = (z * t1) / (t1 - two) - (w + z * w);
z = one - (r - z);
o.value = z;
j = o.parts32.mswhi;
j += (n << 16);
if ((j >> 16) <= 0)
z = scalbnl (z, n); /* subnormal output */
else
{
o.parts32.mswhi = j;
z = o.value;
}
return s * z;
}
#else
#error "architecture unsupported"
#endif

View file

@ -39,13 +39,17 @@ asm(".include \"libc/disclaimer.inc\"");
/**
* Returns sine and cosine of 𝑥.
*/
void sincosl(long double x, long double *sin, long double *cos) {
void sincosl(long double x, long double *sin, long double *cos)
{
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
double sind, cosd;
sincos(x, &sind, &cosd);
*sin = sind;
*cos = cosd;
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
union ldshape u = {x};
unsigned n;
long double y[2], s, c;
@ -90,6 +94,7 @@ void sincosl(long double x, long double *sin, long double *cos) {
*cos = s;
break;
}
#else
#error "architecture unsupported"
#endif

View file

@ -2,79 +2,154 @@
vi: set et ft=c ts=8 tw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
FreeBSD lib/msun/src/e_sinhl.c
Converted to long double by Bruce D. Evans
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
Copyright (c) 1992-2023 The FreeBSD Project.
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
Developed at SunPro, a Sun Microsystems, Inc. business.
Permission to use, copy, modify, and distribute this
software is freely granted, provided that this notice
is preserved.
*/
#include "libc/intrin/likely.h"
#include "libc/math.h"
#include "libc/tinymath/expo.internal.h"
#include "libc/tinymath/ldshape.internal.h"
#include "libc/tinymath/freebsd.internal.h"
asm(".ident\t\"\\n\\n\
Musl libc (MIT License)\\n\
Copyright 2005-2014 Rich Felker, et. al.\"");
FreeBSD libm (BSD-2 License)\\n\
Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\"");
asm(".ident\t\"\\n\\n\
fdlibm (fdlibm license)\\n\
Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
asm(".include \"libc/disclaimer.inc\"");
/* clang-format off */
// clang-format off
#if LDBL_MAX_EXP != 0x4000
/* We also require the usual expsign encoding. */
#error "Unsupported long double format"
#endif
#define BIAS (LDBL_MAX_EXP - 1)
static const long double shuge = 0x1p16383L;
#if LDBL_MANT_DIG == 64
/*
* Domain [-1, 1], range ~[-6.6749e-22, 6.6749e-22]:
* |sinh(x)/x - s(x)| < 2**-70.3
*/
static const union IEEEl2bits
S3u = LD80C(0xaaaaaaaaaaaaaaaa, -3, 1.66666666666666666658e-1L);
#define S3 S3u.e
static const double
S5 = 8.3333333333333332e-3, /* 0x11111111111111.0p-59 */
S7 = 1.9841269841270074e-4, /* 0x1a01a01a01a070.0p-65 */
S9 = 2.7557319223873889e-6, /* 0x171de3a5565fe6.0p-71 */
S11 = 2.5052108406704084e-8, /* 0x1ae6456857530f.0p-78 */
S13 = 1.6059042748655297e-10, /* 0x161245fa910697.0p-85 */
S15 = 7.6470006914396920e-13, /* 0x1ae7ce4eff2792.0p-93 */
S17 = 2.8346142308424267e-15; /* 0x19882ce789ffc6.0p-101 */
#elif LDBL_MANT_DIG == 113
/*
* Domain [-1, 1], range ~[-2.9673e-36, 2.9673e-36]:
* |sinh(x)/x - s(x)| < 2**-118.0
*/
static const long double
S3 = 1.66666666666666666666666666666666033e-1L, /* 0x1555555555555555555555555553b.0p-115L */
S5 = 8.33333333333333333333333333337643193e-3L, /* 0x111111111111111111111111180f5.0p-119L */
S7 = 1.98412698412698412698412697391263199e-4L, /* 0x1a01a01a01a01a01a01a0176aad11.0p-125L */
S9 = 2.75573192239858906525574406205464218e-6L, /* 0x171de3a556c7338faac243aaa9592.0p-131L */
S11 = 2.50521083854417187749675637460977997e-8L, /* 0x1ae64567f544e38fe59b3380d7413.0p-138L */
S13 = 1.60590438368216146368737762431552702e-10L, /* 0x16124613a86d098059c7620850fc2.0p-145L */
S15 = 7.64716373181980539786802470969096440e-13L, /* 0x1ae7f3e733b814193af09ce723043.0p-153L */
S17 = 2.81145725434775409870584280722701574e-15L; /* 0x1952c77030c36898c3fd0b6dfc562.0p-161L */
static const double
S19= 8.2206352435411005e-18, /* 0x12f49b4662b86d.0p-109 */
S21= 1.9572943931418891e-20, /* 0x171b8f2fab9628.0p-118 */
S23 = 3.8679983530666939e-23, /* 0x17617002b73afc.0p-127 */
S25 = 6.5067867911512749e-26; /* 0x1423352626048a.0p-136 */
#else
#error "Unsupported long double format"
#endif /* LDBL_MANT_DIG == 64 */
/* log(2**16385 - 0.5) rounded up: */
static const float
o_threshold = 1.13572168e4; /* 0xb174de.0p-10 */
/**
* Returns hyperbolic sine of 𝑥.
*
* sinh(x) = (exp(x) - 1/exp(x))/2
* = (exp(x)-1 + (exp(x)-1)/exp(x))/2
* = x + x^3/6 + o(x^5)
*/
long double sinhl(long double x) {
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
return sinh(x);
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
union ldshape u = {x};
unsigned ex = u.i.se & 0x7fff;
long double h, t, absx;
long double
sinhl(long double x)
{
long double hi,lo,x2,x4;
#if LDBL_MANT_DIG == 113
double dx2;
#endif
double s;
int16_t ix,jx;
h = 0.5;
if (u.i.se & 0x8000)
h = -h;
/* |x| */
u.i.se = ex;
absx = u.f;
GET_LDBL_EXPSIGN(jx,x);
ix = jx&0x7fff;
/* |x| < log(LDBL_MAX) */
if (ex < 0x3fff+13 || (ex == 0x3fff+13 && u.i.m>>32 < 0xb17217f7)) {
t = expm1l(absx);
if (ex < 0x3fff) {
if (ex < 0x3fff-32)
return x;
return h*(2*t - t*t/(1+t));
}
return h*(t + t/(t+1));
/* x is INF or NaN */
if(ix>=0x7fff) return x+x;
ENTERI();
s = 1;
if (jx<0) s = -1;
/* |x| < 64, return x, s(x), or accurate s*(exp(|x|)/2-1/exp(|x|)/2) */
if (ix<0x4005) { /* |x|<64 */
if (ix<BIAS-(LDBL_MANT_DIG+1)/2) /* |x|<TINY */
if(shuge+x>1) RETURNI(x); /* sinh(tiny) = tiny with inexact */
if (ix<0x3fff) { /* |x|<1 */
x2 = x*x;
#if LDBL_MANT_DIG == 64
x4 = x2*x2;
RETURNI(((S17*x2 + S15)*x4 + (S13*x2 + S11))*(x2*x*x4*x4) +
((S9*x2 + S7)*x2 + S5)*(x2*x*x2) + S3*(x2*x) + x);
#elif LDBL_MANT_DIG == 113
dx2 = x2;
RETURNI(((((((((((S25*dx2 + S23)*dx2 +
S21)*x2 + S19)*x2 +
S17)*x2 + S15)*x2 + S13)*x2 + S11)*x2 + S9)*x2 + S7)*x2 +
S5)* (x2*x*x2) +
S3*(x2*x) + x);
#endif
}
k_hexpl(fabsl(x), &hi, &lo);
RETURNI(s*(lo - 0.25/(hi + lo) + hi));
}
/* |x| > log(LDBL_MAX) or nan */
t = expl(0.5*absx);
return h*t*t;
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
// TODO: broken implementation to make things compile
return sinh(x);
#else
#error "architecture unsupported"
#endif
/* |x| in [64, o_threshold], return correctly-overflowing s*exp(|x|)/2 */
if (fabsl(x) <= o_threshold)
RETURNI(s*hexpl(fabsl(x)));
/* |x| > o_threshold, sinh(x) overflow */
return x*shuge;
}

View file

@ -32,14 +32,10 @@ asm(".ident\t\"\\n\\n\
Musl libc (MIT License)\\n\
Copyright 2005-2014 Rich Felker, et. al.\"");
asm(".include \"libc/disclaimer.inc\"");
/* clang-format off */
// clang-format off
/**
* Returns hyperbolic tangent of 𝑥.
*
* tanh(x) = (exp(x) - exp(-x))/(exp(x) + exp(-x))
* = (exp(2*x) - 1)/(exp(2*x) - 1 + 2)
* = (1 - exp(-2*x))/(exp(-2*x) - 1 + 2)
*/
double tanh(double x)
{

View file

@ -2,79 +2,87 @@
vi: set et ft=c ts=8 tw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
FreeBSD lib/msun/src/s_tanhf.c
Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
Copyright (c) 1992-2023 The FreeBSD Project.
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
Developed at SunPro, a Sun Microsystems, Inc. business.
Permission to use, copy, modify, and distribute this
software is freely granted, provided that this notice
is preserved.
*/
#include "libc/math.h"
#include "libc/tinymath/feval.internal.h"
#include "libc/tinymath/freebsd.internal.h"
asm(".ident\t\"\\n\\n\
Musl libc (MIT License)\\n\
Copyright 2005-2014 Rich Felker, et. al.\"");
FreeBSD libm (BSD-2 License)\\n\
Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\"");
asm(".ident\t\"\\n\\n\
fdlibm (fdlibm license)\\n\
Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
asm(".include \"libc/disclaimer.inc\"");
/* clang-format off */
// clang-format off
static const volatile float tiny = 1.0e-30;
static const float one=1.0, two=2.0, huge = 1.0e30;
/**
* Returns hyperbolic tangent of 𝑥.
*
* tanh(x) = (exp(x) - exp(-x))/(exp(x) + exp(-x))
* = (exp(2*x) - 1)/(exp(2*x) - 1 + 2)
* = (1 - exp(-2*x))/(exp(-2*x) - 1 + 2)
*/
float tanhf(float x)
float
tanhf(float x)
{
union {float f; uint32_t i;} u = {.f = x};
uint32_t w;
int sign;
float t;
float t,z;
int32_t jx,ix;
/* x = |x| */
sign = u.i >> 31;
u.i &= 0x7fffffff;
x = u.f;
w = u.i;
GET_FLOAT_WORD(jx,x);
ix = jx&0x7fffffff;
if (w > 0x3f0c9f54) {
/* |x| > log(3)/2 ~= 0.5493 or nan */
if (w > 0x41200000) {
/* |x| > 10 */
t = 1 + 0/x;
} else {
t = expm1f(2*x);
t = 1 - 2/(t+2);
}
} else if (w > 0x3e82c578) {
/* |x| > log(5/3)/2 ~= 0.2554 */
t = expm1f(2*x);
t = t/(t+2);
} else if (w >= 0x00800000) {
/* |x| >= 0x1p-126 */
t = expm1f(-2*x);
t = -t/(t+2);
} else {
/* |x| is subnormal */
fevalf(x*x);
t = x;
/* x is INF or NaN */
if(ix>=0x7f800000) {
if (jx>=0) return one/x+one; /* tanh(+-inf)=+-1 */
else return one/x-one; /* tanh(NaN) = NaN */
}
return sign ? -t : t;
/* |x| < 9 */
if (ix < 0x41100000) { /* |x|<9 */
if (ix<0x39800000) { /* |x|<2**-12 */
if(huge+x>one) return x; /* tanh(tiny) = tiny with inexact */
}
if (ix>=0x3f800000) { /* |x|>=1 */
t = expm1f(two*fabsf(x));
z = one - two/(t+two);
} else {
t = expm1f(-two*fabsf(x));
z= -t/(t+two);
}
/* |x| >= 9, return +-1 */
} else {
z = one - tiny; /* raise inexact flag */
}
return (jx>=0)? z: -z;
}

View file

@ -2,83 +2,185 @@
vi: set et ft=c ts=8 tw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
FreeBSD lib/msun/src/s_tanhl.c
Converted to long double by Bruce D. Evans
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
Copyright (c) 1992-2023 The FreeBSD Project.
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
Developed at SunPro, a Sun Microsystems, Inc. business.
Permission to use, copy, modify, and distribute this
software is freely granted, provided that this notice
is preserved.
*/
#include "libc/intrin/likely.h"
#include "libc/math.h"
#include "libc/tinymath/ldshape.internal.h"
#include "libc/tinymath/freebsd.internal.h"
asm(".ident\t\"\\n\\n\
Musl libc (MIT License)\\n\
Copyright 2005-2014 Rich Felker, et. al.\"");
FreeBSD libm (BSD-2 License)\\n\
Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\"");
asm(".ident\t\"\\n\\n\
fdlibm (fdlibm license)\\n\
Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
asm(".include \"libc/disclaimer.inc\"");
/* clang-format off */
// clang-format off
#if LDBL_MAX_EXP != 0x4000
/* We also require the usual expsign encoding. */
#error "Unsupported long double format"
#endif
#define BIAS (LDBL_MAX_EXP - 1)
static const volatile double tiny = 1.0e-300;
static const double one = 1.0;
#if LDBL_MANT_DIG == 64
/*
* Domain [-0.25, 0.25], range ~[-1.6304e-22, 1.6304e-22]:
* |tanh(x)/x - t(x)| < 2**-72.3
*/
static const union IEEEl2bits
T3u = LD80C(0xaaaaaaaaaaaaaa9f, -2, -3.33333333333333333017e-1L);
#define T3 T3u.e
static const double
T5 = 1.3333333333333314e-1, /* 0x1111111111110a.0p-55 */
T7 = -5.3968253968210485e-2, /* -0x1ba1ba1ba1a1a1.0p-57 */
T9 = 2.1869488531393817e-2, /* 0x1664f488172022.0p-58 */
T11 = -8.8632352345964591e-3, /* -0x1226e34bc138d5.0p-59 */
T13 = 3.5921169709993771e-3, /* 0x1d6d371d3e400f.0p-61 */
T15 = -1.4555786415756001e-3, /* -0x17d923aa63814d.0p-62 */
T17 = 5.8645267876296793e-4, /* 0x13378589b85aa7.0p-63 */
T19 = -2.1121033571392224e-4; /* -0x1baf0af80c4090.0p-65 */
#elif LDBL_MANT_DIG == 113
/*
* Domain [-0.25, 0.25], range ~[-2.4211e-37, 2.4211e-37]:
* |tanh(x)/x - t(x)| < 2**121.6
*/
static const long double
T3 = -3.33333333333333333333333333333332980e-1L, /* -0x1555555555555555555555555554e.0p-114L */
T5 = 1.33333333333333333333333333332707260e-1L, /* 0x1111111111111111111111110ab7b.0p-115L */
T7 = -5.39682539682539682539682535723482314e-2L, /* -0x1ba1ba1ba1ba1ba1ba1ba17b5fc98.0p-117L */
T9 = 2.18694885361552028218693591149061717e-2L, /* 0x1664f4882c10f9f32d6b1a12a25e5.0p-118L */
T11 = -8.86323552990219656883762347736381851e-3L, /* -0x1226e355e6c23c8f5a5a0f386cb4d.0p-119L */
T13 = 3.59212803657248101358314398220822722e-3L, /* 0x1d6d3d0e157ddfb403ad3637442c6.0p-121L */
T15 = -1.45583438705131796512568010348874662e-3L; /* -0x17da36452b75e150c44cc34253b34.0p-122L */
static const double
T17 = 5.9002744094556621e-4, /* 0x1355824803668e.0p-63 */
T19 = -2.3912911424260516e-4, /* -0x1f57d7734c8dde.0p-65 */
T21 = 9.6915379535512898e-5, /* 0x1967e18ad6a6ca.0p-66 */
T23 = -3.9278322983156353e-5, /* -0x1497d8e6b75729.0p-67 */
T25 = 1.5918887220143869e-5, /* 0x10b1319998cafa.0p-68 */
T27 = -6.4514295231630956e-6, /* -0x1b0f2b71b218eb.0p-70 */
T29 = 2.6120754043964365e-6, /* 0x15e963a3cf3a39.0p-71 */
T31 = -1.0407567231003314e-6, /* -0x1176041e656869.0p-72 */
T33 = 3.4744117554063574e-7; /* 0x1750fe732cab9c.0p-74 */
#endif /* LDBL_MANT_DIG == 64 */
static inline long double
divl(long double a, long double b, long double c, long double d,
long double e, long double f)
{
long double inv, r;
float fr, fw;
_2sumF(a, c);
b = b + c;
_2sumF(d, f);
e = e + f;
inv = 1 / (d + e);
r = (a + b) * inv;
fr = r;
r = fr;
fw = d + e;
e = d - fw + e;
d = fw;
r = r + (a - d * r + b - e * r) * inv;
return r;
}
/**
* Returns hyperbolic tangent of 𝑥.
*
* tanh(x) = (exp(x) - exp(-x))/(exp(x) + exp(-x))
* = (exp(2*x) - 1)/(exp(2*x) - 1 + 2)
* = (1 - exp(-2*x))/(exp(-2*x) - 1 + 2)
*/
long double tanhl(long double x) {
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
return tanh(x);
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
union ldshape u = {x};
unsigned ex = u.i.se & 0x7fff;
unsigned sign = u.i.se & 0x8000;
uint32_t w;
long double t;
/* x = |x| */
u.i.se = ex;
x = u.f;
w = u.i.m >> 32;
if (ex > 0x3ffe || (ex == 0x3ffe && w > 0x8c9f53d5)) {
/* |x| > log(3)/2 ~= 0.5493 or nan */
if (ex >= 0x3fff+5) {
/* |x| >= 32 */
t = 1 + 0/(x + 0x1p-120f);
} else {
t = expm1l(2*x);
t = 1 - 2/(t+2);
}
} else if (ex > 0x3ffd || (ex == 0x3ffd && w > 0x82c577d4)) {
/* |x| > log(5/3)/2 ~= 0.2554 */
t = expm1l(2*x);
t = t/(t+2);
} else {
/* |x| is small */
t = expm1l(-2*x);
t = -t/(t+2);
}
return sign ? -t : t;
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
// TODO: broken implementation to make things compile
return tanh(x);
#else
#error "architecture unsupported"
long double
tanhl(long double x)
{
long double hi,lo,s,x2,x4,z;
#if LDBL_MANT_DIG == 113
double dx2;
#endif
int16_t jx,ix;
GET_LDBL_EXPSIGN(jx,x);
ix = jx&0x7fff;
/* x is INF or NaN */
if(ix>=0x7fff) {
if (jx>=0) return one/x+one; /* tanh(+-inf)=+-1 */
else return one/x-one; /* tanh(NaN) = NaN */
}
ENTERI();
/* |x| < 40 */
if (ix < 0x4004 || fabsl(x) < 40) { /* |x|<40 */
if (UNLIKELY(ix<BIAS-(LDBL_MANT_DIG+1)/2)) { /* |x|<TINY */
/* tanh(+-0) = +0; tanh(tiny) = tiny(-+) with inexact: */
return (x == 0 ? x : (0x1p200 * x - x) * 0x1p-200);
}
if (ix<0x3ffd) { /* |x|<0.25 */
x2 = x*x;
#if LDBL_MANT_DIG == 64
x4 = x2*x2;
RETURNI(((T19*x2 + T17)*x4 + (T15*x2 + T13))*(x2*x*x2*x4*x4) +
((T11*x2 + T9)*x4 + (T7*x2 + T5))*(x2*x*x2) +
T3*(x2*x) + x);
#elif LDBL_MANT_DIG == 113
dx2 = x2;
long double q = ((((((((((((((T33*dx2 + T31)*dx2 + T29)*dx2 + T27)*dx2 +
T25)*x2 + T23)*x2 + T21)*x2 + T19)*x2 + T17)*x2 +
T15)*x2 + T13)*x2 + T11)*x2 + T9)*x2 + T7)*x2 + T5)*
(x2*x*x2);
RETURNI(q + T3*(x2*x) + x);
#endif
}
k_hexpl(2*fabsl(x), &hi, &lo);
if (ix<0x4001 && fabsl(x) < 1.5) /* |x|<1.5 */
z = divl(hi, lo, -0.5, hi, lo, 0.5);
else
z = one - one/(lo+0.5+hi);
/* |x| >= 40, return +-1 */
} else {
z = one - tiny; /* raise inexact flag */
}
s = 1;
if (jx<0) s = -1;
RETURNI(s*z);
}

View file

@ -36,10 +36,17 @@ Copyright 2005-2014 Rich Felker, et. al.\"");
asm(".include \"libc/disclaimer.inc\"");
// clang-format off
long double tanl(long double x) {
/**
* Returns tangent of x.
*/
long double tanl(long double x)
{
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
return tan(x);
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
union ldshape u = {x};
long double y[2];
unsigned n;
@ -57,6 +64,7 @@ long double tanl(long double x) {
}
n = __rem_pio2l(x, y);
return __tanl(y[0], y[1], n&1);
#else
#error "architecture unsupported"
#endif

View file

@ -45,16 +45,21 @@ $(LIBC_TINYMATH_A).pkg: \
o/$(MODE)/libc/tinymath/cpow.o \
o/$(MODE)/libc/tinymath/cpowf.o \
o/$(MODE)/libc/tinymath/cpowl.o \
o/$(MODE)/libc/tinymath/powfin.o : private \
o/$(MODE)/libc/tinymath/powfin.o: private \
OVERRIDE_CFLAGS += \
-ffast-math
o/$(MODE)/libc/tinymath/lround.o \
o/$(MODE)/libc/tinymath/lroundf.o \
o/$(MODE)/libc/tinymath/lroundl.o : private \
o/$(MODE)/libc/tinymath/lroundl.o: private \
OVERRIDE_CFLAGS += \
-fno-builtin
o/$(MODE)/libc/tinymath/expl.o \
o/$(MODE)/libc/tinymath/loglq.o: private \
OVERRIDE_CFLAGS += \
-ffunction-sections
LIBC_TINYMATH_LIBS = $(foreach x,$(LIBC_TINYMATH_ARTIFACTS),$($(x)))
LIBC_TINYMATH_HDRS = $(foreach x,$(LIBC_TINYMATH_ARTIFACTS),$($(x)_HDRS))
LIBC_TINYMATH_SRCS = $(foreach x,$(LIBC_TINYMATH_ARTIFACTS),$($(x)_SRCS))

View file

@ -35,9 +35,15 @@ Copyright 2005-2014 Rich Felker, et. al.\"");
asm(".include \"libc/disclaimer.inc\"");
// clang-format off
long double truncl(long double x) {
/**
* Rounds to integer, towards zero.
*/
long double truncl(long double x)
{
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
return trunc(x);
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
static const long double toint = 1/LDBL_EPSILON;
@ -60,6 +66,7 @@ long double truncl(long double x) {
y -= 1;
x += y;
return s ? -x : x;
#else
#error "architecture unsupported"
#endif