Make more code aarch64 friendly

This commit is contained in:
Justine Tunney 2023-05-02 13:38:16 -07:00
parent ca2860947f
commit 2b73e72d59
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
568 changed files with 2197 additions and 1061 deletions

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=8 tw=8 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2020 Rich Felker, et al.
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -25,124 +25,150 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/intrin/likely.h"
#include "libc/math.h"
#include "libc/tinymath/complex.internal.h"
#include "libc/tinymath/atan_common.internal.h"
#include "libc/tinymath/internal.h"
asm(".ident\t\"\\n\\n\
fdlibm (fdlibm license)\\n\
Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
asm(".ident\t\"\\n\\n\
Musl libc (MIT License)\\n\
Copyright 2005-2014 Rich Felker, et. al.\"");
Optimized Routines (MIT License)\\n\
Copyright 2022 ARM Limited\"");
asm(".include \"libc/disclaimer.inc\"");
// clang-format off
/* clang-format off */
/* origin: FreeBSD /usr/src/lib/msun/src/e_atan2.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*
*/
/* atan2(y,x)
* Method :
* 1. Reduce y to positive by atan2(y,x)=-atan2(-y,x).
* 2. Reduce x to positive by (if x and y are unexceptional):
* ARG (x+iy) = arctan(y/x) ... if x > 0,
* ARG (x+iy) = pi - arctan[y/(-x)] ... if x < 0,
*
* Special cases:
*
* ATAN2((anything), NaN ) is NaN;
* ATAN2(NAN , (anything) ) is NaN;
* ATAN2(+-0, +(anything but NaN)) is +-0 ;
* ATAN2(+-0, -(anything but NaN)) is +-pi ;
* ATAN2(+-(anything but 0 and NaN), 0) is +-pi/2;
* ATAN2(+-(anything but INF and NaN), +INF) is +-0 ;
* ATAN2(+-(anything but INF and NaN), -INF) is +-pi;
* ATAN2(+-INF,+INF ) is +-pi/4 ;
* ATAN2(+-INF,-INF ) is +-3pi/4;
* ATAN2(+-INF, (anything but,0,NaN, and INF)) is +-pi/2;
*
* Constants:
* The hexadecimal values are the intended ones for the following
* constants. The decimal values may be used, provided that the
* compiler will convert from decimal to binary accurately enough
* to produce the hexadecimal values shown.
*/
#define Pi (0x1.921fb54442d18p+1)
#define PiOver2 (0x1.921fb54442d18p+0)
#define PiOver4 (0x1.921fb54442d18p-1)
#define SignMask (0x8000000000000000)
#define ExpMask (0x7ff0000000000000)
static const double
pi = 3.1415926535897931160E+00, /* 0x400921FB, 0x54442D18 */
pi_lo = 1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */
/* We calculate atan2 by P(n/d), where n and d are similar to the input
arguments, and P is a polynomial. Evaluating P(x) requires calculating x^8,
which may underflow if n and d have very different magnitude.
POW8_EXP_UFLOW_BOUND is the lower bound of the difference in exponents of n
and d for which P underflows, and is used to special-case such inputs. */
#define POW8_EXP_UFLOW_BOUND 62
/**
* Returns arc tangent of 𝑦/𝑥.
* @note the greatest of all libm functions
*/
double atan2(double y, double x)
static inline int64_t
biased_exponent (double f)
{
double z;
uint32_t m,lx,ly,ix,iy;
if (isnan(x) || isnan(y))
return x+y;
EXTRACT_WORDS(ix, lx, x);
EXTRACT_WORDS(iy, ly, y);
if ((ix-0x3ff00000 | lx) == 0) /* x = 1.0 */
return atan(y);
m = ((iy>>31)&1) | ((ix>>30)&2); /* 2*sign(x)+sign(y) */
ix = ix & 0x7fffffff;
iy = iy & 0x7fffffff;
/* when y = 0 */
if ((iy|ly) == 0) {
switch(m) {
case 0:
case 1: return y; /* atan(+-0,+anything)=+-0 */
case 2: return pi; /* atan(+0,-anything) = pi */
case 3: return -pi; /* atan(-0,-anything) =-pi */
}
}
/* when x = 0 */
if ((ix|lx) == 0)
return m&1 ? -pi/2 : pi/2;
/* when x is INF */
if (ix == 0x7ff00000) {
if (iy == 0x7ff00000) {
switch(m) {
case 0: return pi/4; /* atan(+INF,+INF) */
case 1: return -pi/4; /* atan(-INF,+INF) */
case 2: return 3*pi/4; /* atan(+INF,-INF) */
case 3: return -3*pi/4; /* atan(-INF,-INF) */
}
} else {
switch(m) {
case 0: return 0.0; /* atan(+...,+INF) */
case 1: return -0.0; /* atan(-...,+INF) */
case 2: return pi; /* atan(+...,-INF) */
case 3: return -pi; /* atan(-...,-INF) */
}
}
}
/* |y/x| > 0x1p64 */
if (ix+(64<<20) < iy || iy == 0x7ff00000)
return m&1 ? -pi/2 : pi/2;
/* z = atan(|y/x|) without spurious underflow */
if ((m&2) && iy+(64<<20) < ix) /* |y/x| < 0x1p-64, x<0 */
z = 0;
else
z = atan(fabs(y/x));
switch (m) {
case 0: return z; /* atan(+,+) */
case 1: return -z; /* atan(-,+) */
case 2: return pi - (z-pi_lo); /* atan(+,-) */
default: /* case 3 */
return (z-pi_lo) - pi; /* atan(-,-) */
}
uint64_t fi = asuint64 (f);
return (fi & ExpMask) >> 52;
}
/* Fast implementation of scalar atan2. Largest errors are when y and x are
close together. The greatest observed error is 2.28 ULP:
atan2(-0x1.5915b1498e82fp+732, 0x1.54d11ef838826p+732)
got -0x1.954f42f1fa841p-1 want -0x1.954f42f1fa843p-1. */
double
atan2 (double y, double x)
{
uint64_t ix = asuint64 (x);
uint64_t iy = asuint64 (y);
uint64_t sign_x = ix & SignMask;
uint64_t sign_y = iy & SignMask;
uint64_t iax = ix & ~SignMask;
uint64_t iay = iy & ~SignMask;
bool xisnan = isnan (x);
if (UNLIKELY (isnan (y) && !xisnan))
return __math_invalid (y);
if (UNLIKELY (xisnan))
return __math_invalid (x);
/* m = 2 * sign(x) + sign(y). */
uint32_t m = ((iy >> 63) & 1) | ((ix >> 62) & 2);
int64_t exp_diff = biased_exponent (x) - biased_exponent (y);
/* y = 0. */
if (iay == 0)
{
switch (m)
{
case 0:
case 1:
return y; /* atan(+-0,+anything)=+-0. */
case 2:
return Pi; /* atan(+0,-anything) = pi. */
case 3:
return -Pi; /* atan(-0,-anything) =-pi. */
}
}
/* Special case for (x, y) either on or very close to the y axis. Either x =
0, or y is much larger than x (difference in exponents >=
POW8_EXP_UFLOW_BOUND). */
if (UNLIKELY (iax == 0 || exp_diff <= -POW8_EXP_UFLOW_BOUND))
return sign_y ? -PiOver2 : PiOver2;
/* Special case for either x is INF or (x, y) is very close to x axis and x is
negative. */
if (UNLIKELY (iax == 0x7ff0000000000000
|| (exp_diff >= POW8_EXP_UFLOW_BOUND && m >= 2)))
{
if (iay == 0x7ff0000000000000)
{
switch (m)
{
case 0:
return PiOver4; /* atan(+INF,+INF). */
case 1:
return -PiOver4; /* atan(-INF,+INF). */
case 2:
return 3.0 * PiOver4; /* atan(+INF,-INF). */
case 3:
return -3.0 * PiOver4; /* atan(-INF,-INF). */
}
}
else
{
switch (m)
{
case 0:
return 0.0; /* atan(+...,+INF). */
case 1:
return -0.0; /* atan(-...,+INF). */
case 2:
return Pi; /* atan(+...,-INF). */
case 3:
return -Pi; /* atan(-...,-INF). */
}
}
}
/* y is INF. */
if (iay == 0x7ff0000000000000)
return sign_y ? -PiOver2 : PiOver2;
uint64_t sign_xy = sign_x ^ sign_y;
double ax = asdouble (iax);
double ay = asdouble (iay);
uint64_t pred_aygtax = (ay > ax);
/* Set up z for call to atan. */
double n = pred_aygtax ? -ax : ay;
double d = pred_aygtax ? ay : ax;
double z = n / d;
double ret;
if (UNLIKELY (m < 2 && exp_diff >= POW8_EXP_UFLOW_BOUND))
{
/* If (x, y) is very close to x axis and x is positive, the polynomial
will underflow and evaluate to z. */
ret = z;
}
else
{
/* Work out the correct shift. */
double shift = sign_x ? -2.0 : 0.0;
shift = pred_aygtax ? shift + 1.0 : shift;
shift *= PiOver2;
ret = eval_poly (z, z, shift);
}
/* Account for the sign of x and y. */
return asdouble (asuint64 (ret) ^ sign_xy);
}

View file

@ -1,28 +1,182 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/likely.h"
#include "libc/math.h"
#include "libc/tinymath/atanf_common.internal.h"
#include "libc/tinymath/internal.h"
/**
* Returns arc tangent of 𝑦/𝑥.
*/
float atan2f(float y, float x) {
long double st;
asm("fpatan" : "=t"(st) : "0"((long double)x), "u"((long double)y) : "st(1)");
return st;
asm(".ident\t\"\\n\\n\
Optimized Routines (MIT License)\\n\
Copyright 2022 ARM Limited\"");
asm(".include \"libc/disclaimer.inc\"");
/* clang-format off */
#define Pi (0x1.921fb6p+1f)
#define PiOver2 (0x1.921fb6p+0f)
#define PiOver4 (0x1.921fb6p-1f)
#define SignMask (0x80000000)
/* We calculate atan2f by P(n/d), where n and d are similar to the input
arguments, and P is a polynomial. The polynomial may underflow.
POLY_UFLOW_BOUND is the lower bound of the difference in exponents of n and d
for which P underflows, and is used to special-case such inputs. */
#define POLY_UFLOW_BOUND 24
static inline int32_t
biased_exponent (float f)
{
uint32_t fi = asuint (f);
int32_t ex = (int32_t) ((fi & 0x7f800000) >> 23);
if (UNLIKELY (ex == 0))
{
/* Subnormal case - we still need to get the exponent right for subnormal
numbers as division may take us back inside the normal range. */
return ex - __builtin_clz (fi << 9);
}
return ex;
}
/* Fast implementation of scalar atan2f. Largest observed error is
2.88ulps in [99.0, 101.0] x [99.0, 101.0]:
atan2f(0x1.9332d8p+6, 0x1.8cb6c4p+6) got 0x1.964646p-1
want 0x1.964640p-1. */
float
atan2f (float y, float x)
{
uint32_t ix = asuint (x);
uint32_t iy = asuint (y);
uint32_t sign_x = ix & SignMask;
uint32_t sign_y = iy & SignMask;
uint32_t iax = ix & ~SignMask;
uint32_t iay = iy & ~SignMask;
/* x or y is NaN. */
if ((iax > 0x7f800000) || (iay > 0x7f800000))
return x + y;
/* m = 2 * sign(x) + sign(y). */
uint32_t m = ((iy >> 31) & 1) | ((ix >> 30) & 2);
/* The following follows glibc ieee754 implementation, except
that we do not use +-tiny shifts (non-nearest rounding mode). */
int32_t exp_diff = biased_exponent (x) - biased_exponent (y);
/* Special case for (x, y) either on or very close to the x axis. Either y =
0, or y is tiny and x is huge (difference in exponents >=
POLY_UFLOW_BOUND). In the second case, we only want to use this special
case when x is negative (i.e. quadrants 2 or 3). */
if (UNLIKELY (iay == 0 || (exp_diff >= POLY_UFLOW_BOUND && m >= 2)))
{
switch (m)
{
case 0:
case 1:
return y; /* atan(+-0,+anything)=+-0. */
case 2:
return Pi; /* atan(+0,-anything) = pi. */
case 3:
return -Pi; /* atan(-0,-anything) =-pi. */
}
}
/* Special case for (x, y) either on or very close to the y axis. Either x =
0, or x is tiny and y is huge (difference in exponents >=
POLY_UFLOW_BOUND). */
if (UNLIKELY (iax == 0 || exp_diff <= -POLY_UFLOW_BOUND))
return sign_y ? -PiOver2 : PiOver2;
/* x is INF. */
if (iax == 0x7f800000)
{
if (iay == 0x7f800000)
{
switch (m)
{
case 0:
return PiOver4; /* atan(+INF,+INF). */
case 1:
return -PiOver4; /* atan(-INF,+INF). */
case 2:
return 3.0f * PiOver4; /* atan(+INF,-INF). */
case 3:
return -3.0f * PiOver4; /* atan(-INF,-INF). */
}
}
else
{
switch (m)
{
case 0:
return 0.0f; /* atan(+...,+INF). */
case 1:
return -0.0f; /* atan(-...,+INF). */
case 2:
return Pi; /* atan(+...,-INF). */
case 3:
return -Pi; /* atan(-...,-INF). */
}
}
}
/* y is INF. */
if (iay == 0x7f800000)
return sign_y ? -PiOver2 : PiOver2;
uint32_t sign_xy = sign_x ^ sign_y;
float ax = asfloat (iax);
float ay = asfloat (iay);
bool pred_aygtax = (ay > ax);
/* Set up z for call to atanf. */
float n = pred_aygtax ? -ax : ay;
float d = pred_aygtax ? ay : ax;
float z = n / d;
float ret;
if (UNLIKELY (m < 2 && exp_diff >= POLY_UFLOW_BOUND))
{
/* If (x, y) is very close to x axis and x is positive, the polynomial
will underflow and evaluate to z. */
ret = z;
}
else
{
/* Work out the correct shift. */
float shift = sign_x ? -2.0f : 0.0f;
shift = pred_aygtax ? shift + 1.0f : shift;
shift *= PiOver2;
ret = eval_poly (z, z, shift);
}
/* Account for the sign of x and y. */
return asfloat (asuint (ret) ^ sign_xy);
}

View file

@ -0,0 +1,57 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_ATAN_COMMON_H_
#define COSMOPOLITAN_LIBC_TINYMATH_ATAN_COMMON_H_
#include "libc/tinymath/atan_data.internal.h"
#include "libc/tinymath/estrin_wrap.internal.h"
#include "libc/tinymath/horner.internal.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
// clang-format off
/*
* Double-precision polynomial evaluation function for scalar and vector atan(x)
* and atan2(y,x).
*
* Copyright (c) 2021-2023, Arm Limited.
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#if WANT_VMATH
#define DBL_T float64x2_t
#define P(i) v_f64 (__atan_poly_data.poly[i])
#else
#define DBL_T double
#define P(i) __atan_poly_data.poly[i]
#endif
/* Polynomial used in fast atan(x) and atan2(y,x) implementations
The order 19 polynomial P approximates (atan(sqrt(x))-sqrt(x))/x^(3/2). */
static inline DBL_T
eval_poly (DBL_T z, DBL_T az, DBL_T shift)
{
/* Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of
full scheme to avoid underflow in x^16. */
DBL_T z2 = z * z;
DBL_T x2 = z2 * z2;
DBL_T x4 = x2 * x2;
DBL_T x8 = x4 * x4;
DBL_T y
= FMA (ESTRIN_11_ (z2, x2, x4, x8, P, 8), x8, ESTRIN_7 (z2, x2, x4, P));
/* Finalize. y = shift + z + z^3 * P(z^2). */
y = FMA (y, z2 * az, az);
y = y + shift;
return y;
}
#undef DBL_T
#undef FMA
#undef P
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_TINYMATH_ATAN_COMMON_H_ */

46
libc/tinymath/atan_data.c Normal file
View file

@ -0,0 +1,46 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/atan_data.internal.h"
asm(".ident\t\"\\n\\n\
Optimized Routines (MIT License)\\n\
Copyright 2022 ARM Limited\"");
asm(".include \"libc/disclaimer.inc\"");
/* clang-format off */
const struct atan_poly_data __atan_poly_data = {
.poly = {/* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
[2**-1022, 1.0]. See atan.sollya for details of how these were
generated. */
-0x1.5555555555555p-2, 0x1.99999999996c1p-3, -0x1.2492492478f88p-3,
0x1.c71c71bc3951cp-4, -0x1.745d160a7e368p-4, 0x1.3b139b6a88ba1p-4,
-0x1.11100ee084227p-4, 0x1.e1d0f9696f63bp-5, -0x1.aebfe7b418581p-5,
0x1.842dbe9b0d916p-5, -0x1.5d30140ae5e99p-5, 0x1.338e31eb2fbbcp-5,
-0x1.00e6eece7de8p-5, 0x1.860897b29e5efp-6, -0x1.0051381722a59p-6,
0x1.14e9dc19a4a4ep-7, -0x1.d0062b42fe3bfp-9, 0x1.17739e210171ap-10,
-0x1.ab24da7be7402p-13, 0x1.358851160a528p-16}};

View file

@ -0,0 +1,13 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_ATAN_DATA_H_
#define COSMOPOLITAN_LIBC_TINYMATH_ATAN_DATA_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
#define ATAN_POLY_NCOEFFS 20
extern const struct atan_poly_data {
double poly[ATAN_POLY_NCOEFFS];
} __atan_poly_data _Hide;
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_TINYMATH_ATAN_DATA_H_ */

View file

@ -0,0 +1,46 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_ATANF_COMMON_H_
#define COSMOPOLITAN_LIBC_TINYMATH_ATANF_COMMON_H_
#include "libc/tinymath/atanf_data.internal.h"
#include "libc/tinymath/estrin_wrap.internal.h"
#include "libc/tinymath/hornerf.internal.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
// clang-format off
#if WANT_VMATH
#define FLT_T float32x4_t
#define P(i) v_f32 (__atanf_poly_data.poly[i])
#else
#define FLT_T float
#define P(i) __atanf_poly_data.poly[i]
#endif
/* Polynomial used in fast atanf(x) and atan2f(y,x) implementations
The order 7 polynomial P approximates (atan(sqrt(x))-sqrt(x))/x^(3/2). */
static inline FLT_T
eval_poly (FLT_T z, FLT_T az, FLT_T shift)
{
/* Use 2-level Estrin scheme for P(z^2) with deg(P)=7. However,
a standard implementation using z8 creates spurious underflow
in the very last fma (when z^8 is small enough).
Therefore, we split the last fma into a mul and and an fma.
Horner and single-level Estrin have higher errors that exceed
threshold. */
FLT_T z2 = z * z;
FLT_T z4 = z2 * z2;
/* Then assemble polynomial. */
FLT_T y = FMA (z4, z4 * ESTRIN_3_ (z2, z4, P, 4), ESTRIN_3 (z2, z4, P));
/* Finalize:
y = shift + z * P(z^2). */
return FMA (y, z2 * az, az) + shift;
}
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_TINYMATH_ATANF_COMMON_H_ */

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=8 tw=8 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -25,44 +25,17 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/math.h"
#include "libc/tinymath/feval.internal.h"
#include "libc/tinymath/kernel.internal.h"
#include "libc/tinymath/atanf_data.internal.h"
asm(".ident\t\"\\n\\n\
Musl libc (MIT License)\\n\
Copyright 2005-2014 Rich Felker, et. al.\"");
Optimized Routines (MIT License)\\n\
Copyright 2022 ARM Limited\"");
asm(".include \"libc/disclaimer.inc\"");
/* clang-format off */
/**
* Returns 𝑥 × 2ʸ.
/* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-128, 1.0].
*/
float ldexpf(float x, int n)
{
union {float f; uint32_t i;} u;
float_t y = x;
if (n > 127) {
y *= 0x1p127f;
n -= 127;
if (n > 127) {
y *= 0x1p127f;
n -= 127;
if (n > 127)
n = 127;
}
} else if (n < -126) {
y *= 0x1p-126f * 0x1p24f;
n += 126 - 24;
if (n < -126) {
y *= 0x1p-126f * 0x1p24f;
n += 126 - 24;
if (n < -126)
n = -126;
}
}
u.i = (uint32_t)(0x7f+n)<<23;
x = y * u.f;
return x;
}
const struct atanf_poly_data __atanf_poly_data = {
.poly = {/* See atanf.sollya for details of how these were generated. */
-0x1.55555p-2f, 0x1.99935ep-3f, -0x1.24051ep-3f, 0x1.bd7368p-4f,
-0x1.491f0ep-4f, 0x1.93a2c0p-5f, -0x1.4c3c60p-6f, 0x1.01fd88p-8f}};

View file

@ -0,0 +1,13 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_ATANF_DATA_H_
#define COSMOPOLITAN_LIBC_TINYMATH_ATANF_DATA_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
#define ATANF_POLY_NCOEFFS 8
extern const struct atanf_poly_data {
float poly[ATANF_POLY_NCOEFFS];
} __atanf_poly_data _Hide;
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_TINYMATH_ATANF_DATA_H_ */

View file

@ -0,0 +1,53 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_ESTRIN_WRAP_INTERNAL_H_
#define COSMOPOLITAN_LIBC_TINYMATH_ESTRIN_WRAP_INTERNAL_H_
/*
* Helper macros for double-precision Estrin polynomial evaluation.
*
* Copyright (c) 2022-2023, Arm Limited.
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
// clang-format off
#define ESTRIN_1_(x, c, i) FMA(x, c(1 + i), c(i))
#define ESTRIN_2_(x, x2, c, i) FMA(x2, c(2 + i), ESTRIN_1_(x, c, i))
#define ESTRIN_3_(x, x2, c, i) FMA(x2, ESTRIN_1_(x, c, 2 + i), ESTRIN_1_(x, c, i))
#define ESTRIN_4_(x, x2, x4, c, i) FMA(x4, c(4 + i), ESTRIN_3_(x, x2, c, i))
#define ESTRIN_5_(x, x2, x4, c, i) FMA(x4, ESTRIN_1_(x, c, 4 + i), ESTRIN_3_(x, x2, c, i))
#define ESTRIN_6_(x, x2, x4, c, i) FMA(x4, ESTRIN_2_(x, x2, c, 4 + i), ESTRIN_3_(x, x2, c, i))
#define ESTRIN_7_(x, x2, x4, c, i) FMA(x4, ESTRIN_3_(x, x2, c, 4 + i), ESTRIN_3_(x, x2, c, i))
#define ESTRIN_8_(x, x2, x4, x8, c, i) FMA(x8, c(8 + i), ESTRIN_7_(x, x2, x4, c, i))
#define ESTRIN_9_(x, x2, x4, x8, c, i) FMA(x8, ESTRIN_1_(x, c, 8 + i), ESTRIN_7_(x, x2, x4, c, i))
#define ESTRIN_10_(x, x2, x4, x8, c, i) FMA(x8, ESTRIN_2_(x, x2, c, 8 + i), ESTRIN_7_(x, x2, x4, c, i))
#define ESTRIN_11_(x, x2, x4, x8, c, i) FMA(x8, ESTRIN_3_(x, x2, c, 8 + i), ESTRIN_7_(x, x2, x4, c, i))
#define ESTRIN_12_(x, x2, x4, x8, c, i) FMA(x8, ESTRIN_4_(x, x2, x4, c, 8 + i), ESTRIN_7_(x, x2, x4, c, i))
#define ESTRIN_13_(x, x2, x4, x8, c, i) FMA(x8, ESTRIN_5_(x, x2, x4, c, 8 + i), ESTRIN_7_(x, x2, x4, c, i))
#define ESTRIN_14_(x, x2, x4, x8, c, i) FMA(x8, ESTRIN_6_(x, x2, x4, c, 8 + i), ESTRIN_7_(x, x2, x4, c, i))
#define ESTRIN_15_(x, x2, x4, x8, c, i) FMA(x8, ESTRIN_7_(x, x2, x4, c, 8 + i), ESTRIN_7_(x, x2, x4, c, i))
#define ESTRIN_16_(x, x2, x4, x8, x16, c, i) FMA(x16, c(16 + i), ESTRIN_15_(x, x2, x4, x8, c, i))
#define ESTRIN_17_(x, x2, x4, x8, x16, c, i) FMA(x16, ESTRIN_1_(x, c, 16 + i), ESTRIN_15_(x, x2, x4, x8, c, i))
#define ESTRIN_18_(x, x2, x4, x8, x16, c, i) FMA(x16, ESTRIN_2_(x, x2, c, 16 + i), ESTRIN_15_(x, x2, x4, x8, c, i))
#define ESTRIN_19_(x, x2, x4, x8, x16, c, i) FMA(x16, ESTRIN_3_(x, x2, c, 16 + i), ESTRIN_15_(x, x2, x4, x8, c, i))
#define ESTRIN_1(x, c) ESTRIN_1_(x, c, 0)
#define ESTRIN_2(x, x2, c) ESTRIN_2_(x, x2, c, 0)
#define ESTRIN_3(x, x2, c) ESTRIN_3_(x, x2, c, 0)
#define ESTRIN_4(x, x2, x4, c) ESTRIN_4_(x, x2, x4, c, 0)
#define ESTRIN_5(x, x2, x4, c) ESTRIN_5_(x, x2, x4, c, 0)
#define ESTRIN_6(x, x2, x4, c) ESTRIN_6_(x, x2, x4, c, 0)
#define ESTRIN_7(x, x2, x4, c) ESTRIN_7_(x, x2, x4, c, 0)
#define ESTRIN_8(x, x2, x4, x8, c) ESTRIN_8_(x, x2, x4, x8, c, 0)
#define ESTRIN_9(x, x2, x4, x8, c) ESTRIN_9_(x, x2, x4, x8, c, 0)
#define ESTRIN_10(x, x2, x4, x8, c) ESTRIN_10_(x, x2, x4, x8, c, 0)
#define ESTRIN_11(x, x2, x4, x8, c) ESTRIN_11_(x, x2, x4, x8, c, 0)
#define ESTRIN_12(x, x2, x4, x8, c) ESTRIN_12_(x, x2, x4, x8, c, 0)
#define ESTRIN_13(x, x2, x4, x8, c) ESTRIN_13_(x, x2, x4, x8, c, 0)
#define ESTRIN_14(x, x2, x4, x8, c) ESTRIN_14_(x, x2, x4, x8, c, 0)
#define ESTRIN_15(x, x2, x4, x8, c) ESTRIN_15_(x, x2, x4, x8, c, 0)
#define ESTRIN_16(x, x2, x4, x8, x16, c) ESTRIN_16_(x, x2, x4, x8, x16, c, 0)
#define ESTRIN_17(x, x2, x4, x8, x16, c) ESTRIN_17_(x, x2, x4, x8, x16, c, 0)
#define ESTRIN_18(x, x2, x4, x8, x16, c) ESTRIN_18_(x, x2, x4, x8, x16, c, 0)
#define ESTRIN_19(x, x2, x4, x8, x16, c) ESTRIN_19_(x, x2, x4, x8, x16, c, 0)
// clang-format on
#endif /* COSMOPOLITAN_LIBC_TINYMATH_ESTRIN_WRAP_INTERNAL_H_ */

View file

@ -1,35 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/math.h"
/**
* Returns maximum of two doubles.
*
* If one argument is NAN then the other is returned.
* This function is designed to do the right thing with
* signed zeroes.
*/
double fmax(double x, double y) {
if (__builtin_isnan(x)) return y;
if (__builtin_isnan(y)) return x;
if (__builtin_signbit(x) != __builtin_signbit(y)) {
return __builtin_signbit(x) ? y : x; /* C99 Annex F.9.9.2 */
}
return x < y ? y : x;
}

View file

@ -1,35 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/math.h"
/**
* Returns maximum of two floats.
*
* If one argument is NAN then the other is returned.
* This function is designed to do the right thing with
* signed zeroes.
*/
float fmaxf(float x, float y) {
if (__builtin_isnan(x)) return y;
if (__builtin_isnan(y)) return x;
if (__builtin_signbitf(x) != __builtin_signbitf(y)) {
return __builtin_signbitf(x) ? y : x; /* C99 Annex F.9.9.2 */
}
return x < y ? y : x;
}

View file

@ -1,35 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/math.h"
/**
* Returns maximum of two long doubles.
*
* If one argument is NAN then the other is returned.
* This function is designed to do the right thing with
* signed zeroes.
*/
long double fmaxl(long double x, long double y) {
if (__builtin_isnan(x)) return y;
if (__builtin_isnan(y)) return x;
if (__builtin_signbitl(x) != __builtin_signbitl(y)) {
return __builtin_signbitl(x) ? y : x; /* C99 Annex F.9.9.2 */
}
return x < y ? y : x;
}

View file

@ -0,0 +1,17 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_HORNER_INTERNAL_H_
#define COSMOPOLITAN_LIBC_TINYMATH_HORNER_INTERNAL_H_
#include "libc/math.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
#if WANT_VMATH
#define FMA(x, y, z) vfmaq_f64(z, x, y)
#else
#define FMA fma
#endif
#include "libc/tinymath/horner_wrap.internal.h"
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_TINYMATH_HORNER_INTERNAL_H_ */

View file

@ -1,16 +1,9 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_HORNERF_INTERNAL_H_
#define COSMOPOLITAN_LIBC_TINYMATH_HORNERF_INTERNAL_H_
#include "third_party/libcxx/math.h"
#include "libc/math.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
/*
* Helper macros for double-precision Horner polynomial evaluation.
*
* Copyright (c) 2022-2023, Arm Limited.
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#if WANT_VMATH
#define FMA(x, y, z) vfmaq_f32(z, x, y)
#else
@ -18,6 +11,7 @@ COSMOPOLITAN_C_START_
#endif
#include "libc/tinymath/horner_wrap.internal.h"
#include "third_party/libcxx/math.h"
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */

View file

@ -1,37 +0,0 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
// Returns logx exponent part of long double.
//
// @param 𝑥 is long double passed on stack
// @return result in %eax
// @note needs sse3
ilogbl: push %rbp
mov %rsp,%rbp
.profilable
fldt 16(%rbp)
fxtract
fstp %st
push %rax
fisttpl (%rsp)
mov (%rsp),%eax
leave
ret
.endfn ilogbl,globl

View file

@ -1,38 +0,0 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
// Returns 𝑥 × 2ʸ.
//
// @param 𝑥 is long double passed on stack
// @param 𝑦 is exponent via %edi
// @return result in %st0
ldexpl: push %rbp
mov %rsp,%rbp
.profilable
push %rdi
fildl (%rsp)
fldt 16(%rbp)
fscale
fstp %st(1)
leave
ret
.endfn ldexpl,globl
.alias ldexpl,scalbnl
.alias ldexpl,scalblnl

View file

@ -1,57 +0,0 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
// Returns log(𝟷+𝑥).
//
// @param 𝑥 is float scalar in low quarter of %xmm0
// @return float scalar in low quarter of %xmm0
log1pf: push %rbp
mov %rsp,%rbp
.profilable
push %rax
movss %xmm0,-4(%rbp)
flds -4(%rbp)
fld %st
fabs
fldt .Lnnan(%rip)
fxch
fcomip %st(1),%st
fstp %st
jnb 2f
fldln2
fxch
fyl2xp1
1: fstps -4(%rbp)
movss -4(%rbp),%xmm0
leave
ret
2: fld1
faddp %st,%st(1)
fldln2
fxch
fyl2x
jmp 1b
.endfn log1pf,globl
.rodata.cst16
.Lnnan: .long 0x0c4336f8
.long 0x95f61998
.long 0x3ffd
.long 0

180
libc/tinymath/log1pf.c Normal file
View file

@ -0,0 +1,180 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/intrin/likely.h"
#include "libc/math.h"
#include "libc/tinymath/internal.h"
#include "libc/tinymath/log1pf_data.internal.h"
asm(".ident\t\"\\n\\n\
Optimized Routines (MIT License)\\n\
Copyright 2022 ARM Limited\"");
asm(".include \"libc/disclaimer.inc\"");
/* clang-format off */
#define Ln2 (0x1.62e43p-1f)
#define SignMask (0x80000000)
/* Biased exponent of the largest float m for which m^8 underflows. */
#define M8UFLOW_BOUND_BEXP 112
/* Biased exponent of the largest float for which we just return x. */
#define TINY_BOUND_BEXP 103
#define C(i) __log1pf_data.coeffs[i]
static inline float
eval_poly (float m, uint32_t e)
{
#ifdef LOG1PF_2U5
/* 2.5 ulp variant. Approximate log(1+m) on [-0.25, 0.5] using
slightly modified Estrin scheme (no x^0 term, and x term is just x). */
float p_12 = fmaf (m, C (1), C (0));
float p_34 = fmaf (m, C (3), C (2));
float p_56 = fmaf (m, C (5), C (4));
float p_78 = fmaf (m, C (7), C (6));
float m2 = m * m;
float p_02 = fmaf (m2, p_12, m);
float p_36 = fmaf (m2, p_56, p_34);
float p_79 = fmaf (m2, C (8), p_78);
float m4 = m2 * m2;
float p_06 = fmaf (m4, p_36, p_02);
if (UNLIKELY (e < M8UFLOW_BOUND_BEXP))
return p_06;
float m8 = m4 * m4;
return fmaf (m8, p_79, p_06);
#elif defined(LOG1PF_1U3)
/* 1.3 ulp variant. Approximate log(1+m) on [-0.25, 0.5] using Horner
scheme. Our polynomial approximation for log1p has the form
x + C1 * x^2 + C2 * x^3 + C3 * x^4 + ...
Hence approximation has the form m + m^2 * P(m)
where P(x) = C1 + C2 * x + C3 * x^2 + ... . */
return fmaf (m, m * HORNER_8 (m, C), m);
#else
#error No log1pf approximation exists with the requested precision. Options are 13 or 25.
#endif
}
static inline uint32_t
biased_exponent (uint32_t ix)
{
return (ix & 0x7f800000) >> 23;
}
/* log1pf approximation using polynomial on reduced interval. Worst-case error
when using Estrin is roughly 2.02 ULP:
log1pf(0x1.21e13ap-2) got 0x1.fe8028p-3 want 0x1.fe802cp-3. */
float
log1pf (float x)
{
uint32_t ix = asuint (x);
uint32_t ia = ix & ~SignMask;
uint32_t ia12 = ia >> 20;
uint32_t e = biased_exponent (ix);
/* Handle special cases first. */
if (UNLIKELY (ia12 >= 0x7f8 || ix >= 0xbf800000 || ix == 0x80000000
|| e <= TINY_BOUND_BEXP))
{
if (ix == 0xff800000)
{
/* x == -Inf => log1pf(x) = NaN. */
return NAN;
}
if ((ix == 0x7f800000 || e <= TINY_BOUND_BEXP) && ia12 <= 0x7f8)
{
/* |x| < TinyBound => log1p(x) = x.
x == Inf => log1pf(x) = Inf. */
return x;
}
if (ix == 0xbf800000)
{
/* x == -1.0 => log1pf(x) = -Inf. */
return __math_divzerof (-1);
}
if (ia12 >= 0x7f8)
{
/* x == +/-NaN => log1pf(x) = NaN. */
return __math_invalidf (asfloat (ia));
}
/* x < -1.0 => log1pf(x) = NaN. */
return __math_invalidf (x);
}
/* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m
is in [-0.25, 0.5]):
log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2).
We approximate log1p(m) with a polynomial, then scale by
k*log(2). Instead of doing this directly, we use an intermediate
scale factor s = 4*k*log(2) to ensure the scale is representable
as a normalised fp32 number. */
if (ix <= 0x3f000000 || ia <= 0x3e800000)
{
/* If x is in [-0.25, 0.5] then we can shortcut all the logic
below, as k = 0 and m = x. All we need is to return the
polynomial. */
return eval_poly (x, e);
}
float m = x + 1.0f;
/* k is used scale the input. 0x3f400000 is chosen as we are trying to
reduce x to the range [-0.25, 0.5]. Inside this range, k is 0.
Outside this range, if k is reinterpreted as (NOT CONVERTED TO) float:
let k = sign * 2^p where sign = -1 if x < 0
1 otherwise
and p is a negative integer whose magnitude increases with the
magnitude of x. */
int k = (asuint (m) - 0x3f400000) & 0xff800000;
/* By using integer arithmetic, we obtain the necessary scaling by
subtracting the unbiased exponent of k from the exponent of x. */
float m_scale = asfloat (asuint (x) - k);
/* Scale up to ensure that the scale factor is representable as normalised
fp32 number (s in [2**-126,2**26]), and scale m down accordingly. */
float s = asfloat (asuint (4.0f) - k);
m_scale = m_scale + fmaf (0.25f, s, -1.0f);
float p = eval_poly (m_scale, biased_exponent (asuint (m_scale)));
/* The scale factor to be applied back at the end - by multiplying float(k)
by 2^-23 we get the unbiased exponent of k. */
float scale_back = (float) k * 0x1.0p-23f;
/* Apply the scaling back. */
return fmaf (scale_back, Ln2, p);
}

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=8 tw=8 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -25,46 +25,17 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/math.h"
#include "libc/tinymath/feval.internal.h"
#include "libc/tinymath/kernel.internal.h"
#include "libc/tinymath/log1pf_data.internal.h"
asm(".ident\t\"\\n\\n\
Musl libc (MIT License)\\n\
Copyright 2005-2014 Rich Felker, et. al.\"");
Optimized Routines (MIT License)\\n\
Copyright 2022 ARM Limited\"");
asm(".include \"libc/disclaimer.inc\"");
/* clang-format off */
/**
* Returns 𝑥 × 2ʸ.
*/
double ldexp(double x, int n)
{
union {double f; uint64_t i;} u;
double_t y = x;
if (n > 1023) {
y *= 0x1p1023;
n -= 1023;
if (n > 1023) {
y *= 0x1p1023;
n -= 1023;
if (n > 1023)
n = 1023;
}
} else if (n < -1022) {
/* make sure final n < -53 to avoid double
rounding in the subnormal range */
y *= 0x1p-1022 * 0x1p53;
n += 1022 - 53;
if (n < -1022) {
y *= 0x1p-1022 * 0x1p53;
n += 1022 - 53;
if (n < -1022)
n = -1022;
}
}
u.i = (uint64_t)(0x3ff+n)<<52;
x = y * u.f;
return x;
}
/* Polynomial coefficients generated using floating-point minimax
algorithm, see tools/log1pf.sollya for details. */
const struct log1pf_data __log1pf_data
= {.coeffs = {-0x1p-1f, 0x1.5555aap-2f, -0x1.000038p-2f, 0x1.99675cp-3f,
-0x1.54ef78p-3f, 0x1.28a1f4p-3f, -0x1.0da91p-3f, 0x1.abcb6p-4f,
-0x1.6f0d5ep-5f}};

View file

@ -0,0 +1,15 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_LOG1PF_DATA_H_
#define COSMOPOLITAN_LIBC_TINYMATH_LOG1PF_DATA_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
#define LOG1PF_2U5
#define V_LOG1PF_2U5
#define LOG1PF_NCOEFFS 9
extern const struct log1pf_data {
float coeffs[LOG1PF_NCOEFFS];
} __log1pf_data _Hide;
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_TINYMATH_LOG1PF_DATA_H_ */

View file

@ -1,33 +0,0 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
// Returns log exponent part of long double.
//
// @param 𝑥 is long double passed on stack
// @return result in %st0
logbl: push %rbp
mov %rsp,%rbp
.profilable
fldt 16(%rbp)
fxtract
fstp %st
pop %rbp
ret
.endfn logbl,globl

View file

@ -18,12 +18,14 @@
*/
#include "libc/errno.h"
#include "libc/math.h"
#include "third_party/libcxx/math.h"
/**
* Returns 𝑥^𝑦.
* @note should take ~56ns
*/
long double powl(long double x, long double y) {
#ifdef __x86_64__
long double t, u;
if (!isunordered(x, y)) {
if (!isinf(y)) {
@ -85,4 +87,7 @@ long double powl(long double x, long double y) {
} else {
return NAN;
}
#else
return pow(x, y);
#endif
}

View file

@ -1,26 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/math.h"
/**
* Returns 𝑥 × 2ʸ.
*/
double scalbn(double x, int n) {
return ldexp(x, n);
}

View file

@ -1,26 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/math.h"
/**
* Returns 𝑥 × 2ʸ.
*/
float scalbnf(float x, int n) {
return ldexpf(x, n);
}

View file

@ -27,7 +27,8 @@ LIBC_TINYMATH_A_DIRECTDEPS = \
LIBC_INTRIN \
LIBC_STUBS \
LIBC_NEXGEN32E \
LIBC_SYSV
LIBC_SYSV \
THIRD_PARTY_COMPILER_RT
LIBC_TINYMATH_A_DEPS := \
$(call uniq,$(foreach x,$(LIBC_TINYMATH_A_DIRECTDEPS),$($(x))))
@ -57,4 +58,6 @@ LIBC_TINYMATH_CHECKS = $(LIBC_TINYMATH_HDRS:%=o/$(MODE)/%.ok)
$(LIBC_TINYMATH_OBJS): $(BUILD_FILES) libc/tinymath/tinymath.mk
.PHONY: o/$(MODE)/libc/tinymath
o/$(MODE)/libc/tinymath: $(LIBC_TINYMATH_CHECKS)
o/$(MODE)/libc/tinymath: \
$(LIBC_TINYMATH_CHECKS) \
o/$(MODE)/libc/tinymath/tinymath.a