mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-05-22 21:32:31 +00:00
Make more code aarch64 friendly
This commit is contained in:
parent
ca2860947f
commit
2b73e72d59
568 changed files with 2197 additions and 1061 deletions
|
@ -1,9 +1,9 @@
|
|||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set et ft=c ts=8 tw=8 fenc=utf-8 :vi│
|
||||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||
│ │
|
||||
│ Musl Libc │
|
||||
│ Copyright © 2005-2020 Rich Felker, et al. │
|
||||
│ Optimized Routines │
|
||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
||||
│ │
|
||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||
│ a copy of this software and associated documentation files (the │
|
||||
|
@ -25,124 +25,150 @@
|
|||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||
│ │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/likely.h"
|
||||
#include "libc/math.h"
|
||||
#include "libc/tinymath/complex.internal.h"
|
||||
#include "libc/tinymath/atan_common.internal.h"
|
||||
#include "libc/tinymath/internal.h"
|
||||
|
||||
asm(".ident\t\"\\n\\n\
|
||||
fdlibm (fdlibm license)\\n\
|
||||
Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
|
||||
asm(".ident\t\"\\n\\n\
|
||||
Musl libc (MIT License)\\n\
|
||||
Copyright 2005-2014 Rich Felker, et. al.\"");
|
||||
Optimized Routines (MIT License)\\n\
|
||||
Copyright 2022 ARM Limited\"");
|
||||
asm(".include \"libc/disclaimer.inc\"");
|
||||
// clang-format off
|
||||
/* clang-format off */
|
||||
|
||||
/* origin: FreeBSD /usr/src/lib/msun/src/e_atan2.c */
|
||||
/*
|
||||
* ====================================================
|
||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
*
|
||||
* Developed at SunSoft, a Sun Microsystems, Inc. business.
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software is freely granted, provided that this notice
|
||||
* is preserved.
|
||||
* ====================================================
|
||||
*
|
||||
*/
|
||||
/* atan2(y,x)
|
||||
* Method :
|
||||
* 1. Reduce y to positive by atan2(y,x)=-atan2(-y,x).
|
||||
* 2. Reduce x to positive by (if x and y are unexceptional):
|
||||
* ARG (x+iy) = arctan(y/x) ... if x > 0,
|
||||
* ARG (x+iy) = pi - arctan[y/(-x)] ... if x < 0,
|
||||
*
|
||||
* Special cases:
|
||||
*
|
||||
* ATAN2((anything), NaN ) is NaN;
|
||||
* ATAN2(NAN , (anything) ) is NaN;
|
||||
* ATAN2(+-0, +(anything but NaN)) is +-0 ;
|
||||
* ATAN2(+-0, -(anything but NaN)) is +-pi ;
|
||||
* ATAN2(+-(anything but 0 and NaN), 0) is +-pi/2;
|
||||
* ATAN2(+-(anything but INF and NaN), +INF) is +-0 ;
|
||||
* ATAN2(+-(anything but INF and NaN), -INF) is +-pi;
|
||||
* ATAN2(+-INF,+INF ) is +-pi/4 ;
|
||||
* ATAN2(+-INF,-INF ) is +-3pi/4;
|
||||
* ATAN2(+-INF, (anything but,0,NaN, and INF)) is +-pi/2;
|
||||
*
|
||||
* Constants:
|
||||
* The hexadecimal values are the intended ones for the following
|
||||
* constants. The decimal values may be used, provided that the
|
||||
* compiler will convert from decimal to binary accurately enough
|
||||
* to produce the hexadecimal values shown.
|
||||
*/
|
||||
#define Pi (0x1.921fb54442d18p+1)
|
||||
#define PiOver2 (0x1.921fb54442d18p+0)
|
||||
#define PiOver4 (0x1.921fb54442d18p-1)
|
||||
#define SignMask (0x8000000000000000)
|
||||
#define ExpMask (0x7ff0000000000000)
|
||||
|
||||
static const double
|
||||
pi = 3.1415926535897931160E+00, /* 0x400921FB, 0x54442D18 */
|
||||
pi_lo = 1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */
|
||||
/* We calculate atan2 by P(n/d), where n and d are similar to the input
|
||||
arguments, and P is a polynomial. Evaluating P(x) requires calculating x^8,
|
||||
which may underflow if n and d have very different magnitude.
|
||||
POW8_EXP_UFLOW_BOUND is the lower bound of the difference in exponents of n
|
||||
and d for which P underflows, and is used to special-case such inputs. */
|
||||
#define POW8_EXP_UFLOW_BOUND 62
|
||||
|
||||
/**
|
||||
* Returns arc tangent of 𝑦/𝑥.
|
||||
* @note the greatest of all libm functions
|
||||
*/
|
||||
double atan2(double y, double x)
|
||||
static inline int64_t
|
||||
biased_exponent (double f)
|
||||
{
|
||||
double z;
|
||||
uint32_t m,lx,ly,ix,iy;
|
||||
|
||||
if (isnan(x) || isnan(y))
|
||||
return x+y;
|
||||
EXTRACT_WORDS(ix, lx, x);
|
||||
EXTRACT_WORDS(iy, ly, y);
|
||||
if ((ix-0x3ff00000 | lx) == 0) /* x = 1.0 */
|
||||
return atan(y);
|
||||
m = ((iy>>31)&1) | ((ix>>30)&2); /* 2*sign(x)+sign(y) */
|
||||
ix = ix & 0x7fffffff;
|
||||
iy = iy & 0x7fffffff;
|
||||
|
||||
/* when y = 0 */
|
||||
if ((iy|ly) == 0) {
|
||||
switch(m) {
|
||||
case 0:
|
||||
case 1: return y; /* atan(+-0,+anything)=+-0 */
|
||||
case 2: return pi; /* atan(+0,-anything) = pi */
|
||||
case 3: return -pi; /* atan(-0,-anything) =-pi */
|
||||
}
|
||||
}
|
||||
/* when x = 0 */
|
||||
if ((ix|lx) == 0)
|
||||
return m&1 ? -pi/2 : pi/2;
|
||||
/* when x is INF */
|
||||
if (ix == 0x7ff00000) {
|
||||
if (iy == 0x7ff00000) {
|
||||
switch(m) {
|
||||
case 0: return pi/4; /* atan(+INF,+INF) */
|
||||
case 1: return -pi/4; /* atan(-INF,+INF) */
|
||||
case 2: return 3*pi/4; /* atan(+INF,-INF) */
|
||||
case 3: return -3*pi/4; /* atan(-INF,-INF) */
|
||||
}
|
||||
} else {
|
||||
switch(m) {
|
||||
case 0: return 0.0; /* atan(+...,+INF) */
|
||||
case 1: return -0.0; /* atan(-...,+INF) */
|
||||
case 2: return pi; /* atan(+...,-INF) */
|
||||
case 3: return -pi; /* atan(-...,-INF) */
|
||||
}
|
||||
}
|
||||
}
|
||||
/* |y/x| > 0x1p64 */
|
||||
if (ix+(64<<20) < iy || iy == 0x7ff00000)
|
||||
return m&1 ? -pi/2 : pi/2;
|
||||
|
||||
/* z = atan(|y/x|) without spurious underflow */
|
||||
if ((m&2) && iy+(64<<20) < ix) /* |y/x| < 0x1p-64, x<0 */
|
||||
z = 0;
|
||||
else
|
||||
z = atan(fabs(y/x));
|
||||
switch (m) {
|
||||
case 0: return z; /* atan(+,+) */
|
||||
case 1: return -z; /* atan(-,+) */
|
||||
case 2: return pi - (z-pi_lo); /* atan(+,-) */
|
||||
default: /* case 3 */
|
||||
return (z-pi_lo) - pi; /* atan(-,-) */
|
||||
}
|
||||
uint64_t fi = asuint64 (f);
|
||||
return (fi & ExpMask) >> 52;
|
||||
}
|
||||
|
||||
/* Fast implementation of scalar atan2. Largest errors are when y and x are
|
||||
close together. The greatest observed error is 2.28 ULP:
|
||||
atan2(-0x1.5915b1498e82fp+732, 0x1.54d11ef838826p+732)
|
||||
got -0x1.954f42f1fa841p-1 want -0x1.954f42f1fa843p-1. */
|
||||
double
|
||||
atan2 (double y, double x)
|
||||
{
|
||||
uint64_t ix = asuint64 (x);
|
||||
uint64_t iy = asuint64 (y);
|
||||
|
||||
uint64_t sign_x = ix & SignMask;
|
||||
uint64_t sign_y = iy & SignMask;
|
||||
|
||||
uint64_t iax = ix & ~SignMask;
|
||||
uint64_t iay = iy & ~SignMask;
|
||||
|
||||
bool xisnan = isnan (x);
|
||||
if (UNLIKELY (isnan (y) && !xisnan))
|
||||
return __math_invalid (y);
|
||||
if (UNLIKELY (xisnan))
|
||||
return __math_invalid (x);
|
||||
|
||||
/* m = 2 * sign(x) + sign(y). */
|
||||
uint32_t m = ((iy >> 63) & 1) | ((ix >> 62) & 2);
|
||||
|
||||
int64_t exp_diff = biased_exponent (x) - biased_exponent (y);
|
||||
|
||||
/* y = 0. */
|
||||
if (iay == 0)
|
||||
{
|
||||
switch (m)
|
||||
{
|
||||
case 0:
|
||||
case 1:
|
||||
return y; /* atan(+-0,+anything)=+-0. */
|
||||
case 2:
|
||||
return Pi; /* atan(+0,-anything) = pi. */
|
||||
case 3:
|
||||
return -Pi; /* atan(-0,-anything) =-pi. */
|
||||
}
|
||||
}
|
||||
/* Special case for (x, y) either on or very close to the y axis. Either x =
|
||||
0, or y is much larger than x (difference in exponents >=
|
||||
POW8_EXP_UFLOW_BOUND). */
|
||||
if (UNLIKELY (iax == 0 || exp_diff <= -POW8_EXP_UFLOW_BOUND))
|
||||
return sign_y ? -PiOver2 : PiOver2;
|
||||
|
||||
/* Special case for either x is INF or (x, y) is very close to x axis and x is
|
||||
negative. */
|
||||
if (UNLIKELY (iax == 0x7ff0000000000000
|
||||
|| (exp_diff >= POW8_EXP_UFLOW_BOUND && m >= 2)))
|
||||
{
|
||||
if (iay == 0x7ff0000000000000)
|
||||
{
|
||||
switch (m)
|
||||
{
|
||||
case 0:
|
||||
return PiOver4; /* atan(+INF,+INF). */
|
||||
case 1:
|
||||
return -PiOver4; /* atan(-INF,+INF). */
|
||||
case 2:
|
||||
return 3.0 * PiOver4; /* atan(+INF,-INF). */
|
||||
case 3:
|
||||
return -3.0 * PiOver4; /* atan(-INF,-INF). */
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (m)
|
||||
{
|
||||
case 0:
|
||||
return 0.0; /* atan(+...,+INF). */
|
||||
case 1:
|
||||
return -0.0; /* atan(-...,+INF). */
|
||||
case 2:
|
||||
return Pi; /* atan(+...,-INF). */
|
||||
case 3:
|
||||
return -Pi; /* atan(-...,-INF). */
|
||||
}
|
||||
}
|
||||
}
|
||||
/* y is INF. */
|
||||
if (iay == 0x7ff0000000000000)
|
||||
return sign_y ? -PiOver2 : PiOver2;
|
||||
|
||||
uint64_t sign_xy = sign_x ^ sign_y;
|
||||
|
||||
double ax = asdouble (iax);
|
||||
double ay = asdouble (iay);
|
||||
uint64_t pred_aygtax = (ay > ax);
|
||||
|
||||
/* Set up z for call to atan. */
|
||||
double n = pred_aygtax ? -ax : ay;
|
||||
double d = pred_aygtax ? ay : ax;
|
||||
double z = n / d;
|
||||
|
||||
double ret;
|
||||
if (UNLIKELY (m < 2 && exp_diff >= POW8_EXP_UFLOW_BOUND))
|
||||
{
|
||||
/* If (x, y) is very close to x axis and x is positive, the polynomial
|
||||
will underflow and evaluate to z. */
|
||||
ret = z;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Work out the correct shift. */
|
||||
double shift = sign_x ? -2.0 : 0.0;
|
||||
shift = pred_aygtax ? shift + 1.0 : shift;
|
||||
shift *= PiOver2;
|
||||
|
||||
ret = eval_poly (z, z, shift);
|
||||
}
|
||||
|
||||
/* Account for the sign of x and y. */
|
||||
return asdouble (asuint64 (ret) ^ sign_xy);
|
||||
}
|
||||
|
|
|
@ -1,28 +1,182 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ Optimized Routines │
|
||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
||||
│ │
|
||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||
│ a copy of this software and associated documentation files (the │
|
||||
│ "Software"), to deal in the Software without restriction, including │
|
||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
||||
│ the following conditions: │
|
||||
│ │
|
||||
│ The above copyright notice and this permission notice shall be │
|
||||
│ included in all copies or substantial portions of the Software. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/likely.h"
|
||||
#include "libc/math.h"
|
||||
#include "libc/tinymath/atanf_common.internal.h"
|
||||
#include "libc/tinymath/internal.h"
|
||||
|
||||
/**
|
||||
* Returns arc tangent of 𝑦/𝑥.
|
||||
*/
|
||||
float atan2f(float y, float x) {
|
||||
long double st;
|
||||
asm("fpatan" : "=t"(st) : "0"((long double)x), "u"((long double)y) : "st(1)");
|
||||
return st;
|
||||
asm(".ident\t\"\\n\\n\
|
||||
Optimized Routines (MIT License)\\n\
|
||||
Copyright 2022 ARM Limited\"");
|
||||
asm(".include \"libc/disclaimer.inc\"");
|
||||
/* clang-format off */
|
||||
|
||||
#define Pi (0x1.921fb6p+1f)
|
||||
#define PiOver2 (0x1.921fb6p+0f)
|
||||
#define PiOver4 (0x1.921fb6p-1f)
|
||||
#define SignMask (0x80000000)
|
||||
|
||||
/* We calculate atan2f by P(n/d), where n and d are similar to the input
|
||||
arguments, and P is a polynomial. The polynomial may underflow.
|
||||
POLY_UFLOW_BOUND is the lower bound of the difference in exponents of n and d
|
||||
for which P underflows, and is used to special-case such inputs. */
|
||||
#define POLY_UFLOW_BOUND 24
|
||||
|
||||
static inline int32_t
|
||||
biased_exponent (float f)
|
||||
{
|
||||
uint32_t fi = asuint (f);
|
||||
int32_t ex = (int32_t) ((fi & 0x7f800000) >> 23);
|
||||
if (UNLIKELY (ex == 0))
|
||||
{
|
||||
/* Subnormal case - we still need to get the exponent right for subnormal
|
||||
numbers as division may take us back inside the normal range. */
|
||||
return ex - __builtin_clz (fi << 9);
|
||||
}
|
||||
return ex;
|
||||
}
|
||||
|
||||
/* Fast implementation of scalar atan2f. Largest observed error is
|
||||
2.88ulps in [99.0, 101.0] x [99.0, 101.0]:
|
||||
atan2f(0x1.9332d8p+6, 0x1.8cb6c4p+6) got 0x1.964646p-1
|
||||
want 0x1.964640p-1. */
|
||||
float
|
||||
atan2f (float y, float x)
|
||||
{
|
||||
uint32_t ix = asuint (x);
|
||||
uint32_t iy = asuint (y);
|
||||
|
||||
uint32_t sign_x = ix & SignMask;
|
||||
uint32_t sign_y = iy & SignMask;
|
||||
|
||||
uint32_t iax = ix & ~SignMask;
|
||||
uint32_t iay = iy & ~SignMask;
|
||||
|
||||
/* x or y is NaN. */
|
||||
if ((iax > 0x7f800000) || (iay > 0x7f800000))
|
||||
return x + y;
|
||||
|
||||
/* m = 2 * sign(x) + sign(y). */
|
||||
uint32_t m = ((iy >> 31) & 1) | ((ix >> 30) & 2);
|
||||
|
||||
/* The following follows glibc ieee754 implementation, except
|
||||
that we do not use +-tiny shifts (non-nearest rounding mode). */
|
||||
|
||||
int32_t exp_diff = biased_exponent (x) - biased_exponent (y);
|
||||
|
||||
/* Special case for (x, y) either on or very close to the x axis. Either y =
|
||||
0, or y is tiny and x is huge (difference in exponents >=
|
||||
POLY_UFLOW_BOUND). In the second case, we only want to use this special
|
||||
case when x is negative (i.e. quadrants 2 or 3). */
|
||||
if (UNLIKELY (iay == 0 || (exp_diff >= POLY_UFLOW_BOUND && m >= 2)))
|
||||
{
|
||||
switch (m)
|
||||
{
|
||||
case 0:
|
||||
case 1:
|
||||
return y; /* atan(+-0,+anything)=+-0. */
|
||||
case 2:
|
||||
return Pi; /* atan(+0,-anything) = pi. */
|
||||
case 3:
|
||||
return -Pi; /* atan(-0,-anything) =-pi. */
|
||||
}
|
||||
}
|
||||
/* Special case for (x, y) either on or very close to the y axis. Either x =
|
||||
0, or x is tiny and y is huge (difference in exponents >=
|
||||
POLY_UFLOW_BOUND). */
|
||||
if (UNLIKELY (iax == 0 || exp_diff <= -POLY_UFLOW_BOUND))
|
||||
return sign_y ? -PiOver2 : PiOver2;
|
||||
|
||||
/* x is INF. */
|
||||
if (iax == 0x7f800000)
|
||||
{
|
||||
if (iay == 0x7f800000)
|
||||
{
|
||||
switch (m)
|
||||
{
|
||||
case 0:
|
||||
return PiOver4; /* atan(+INF,+INF). */
|
||||
case 1:
|
||||
return -PiOver4; /* atan(-INF,+INF). */
|
||||
case 2:
|
||||
return 3.0f * PiOver4; /* atan(+INF,-INF). */
|
||||
case 3:
|
||||
return -3.0f * PiOver4; /* atan(-INF,-INF). */
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (m)
|
||||
{
|
||||
case 0:
|
||||
return 0.0f; /* atan(+...,+INF). */
|
||||
case 1:
|
||||
return -0.0f; /* atan(-...,+INF). */
|
||||
case 2:
|
||||
return Pi; /* atan(+...,-INF). */
|
||||
case 3:
|
||||
return -Pi; /* atan(-...,-INF). */
|
||||
}
|
||||
}
|
||||
}
|
||||
/* y is INF. */
|
||||
if (iay == 0x7f800000)
|
||||
return sign_y ? -PiOver2 : PiOver2;
|
||||
|
||||
uint32_t sign_xy = sign_x ^ sign_y;
|
||||
|
||||
float ax = asfloat (iax);
|
||||
float ay = asfloat (iay);
|
||||
|
||||
bool pred_aygtax = (ay > ax);
|
||||
|
||||
/* Set up z for call to atanf. */
|
||||
float n = pred_aygtax ? -ax : ay;
|
||||
float d = pred_aygtax ? ay : ax;
|
||||
float z = n / d;
|
||||
|
||||
float ret;
|
||||
if (UNLIKELY (m < 2 && exp_diff >= POLY_UFLOW_BOUND))
|
||||
{
|
||||
/* If (x, y) is very close to x axis and x is positive, the polynomial
|
||||
will underflow and evaluate to z. */
|
||||
ret = z;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Work out the correct shift. */
|
||||
float shift = sign_x ? -2.0f : 0.0f;
|
||||
shift = pred_aygtax ? shift + 1.0f : shift;
|
||||
shift *= PiOver2;
|
||||
|
||||
ret = eval_poly (z, z, shift);
|
||||
}
|
||||
|
||||
/* Account for the sign of x and y. */
|
||||
return asfloat (asuint (ret) ^ sign_xy);
|
||||
}
|
||||
|
|
57
libc/tinymath/atan_common.internal.h
Normal file
57
libc/tinymath/atan_common.internal.h
Normal file
|
@ -0,0 +1,57 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_TINYMATH_ATAN_COMMON_H_
|
||||
#define COSMOPOLITAN_LIBC_TINYMATH_ATAN_COMMON_H_
|
||||
#include "libc/tinymath/atan_data.internal.h"
|
||||
#include "libc/tinymath/estrin_wrap.internal.h"
|
||||
#include "libc/tinymath/horner.internal.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
// clang-format off
|
||||
|
||||
/*
|
||||
* Double-precision polynomial evaluation function for scalar and vector atan(x)
|
||||
* and atan2(y,x).
|
||||
*
|
||||
* Copyright (c) 2021-2023, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
|
||||
*/
|
||||
|
||||
#if WANT_VMATH
|
||||
|
||||
#define DBL_T float64x2_t
|
||||
#define P(i) v_f64 (__atan_poly_data.poly[i])
|
||||
|
||||
#else
|
||||
|
||||
#define DBL_T double
|
||||
#define P(i) __atan_poly_data.poly[i]
|
||||
|
||||
#endif
|
||||
|
||||
/* Polynomial used in fast atan(x) and atan2(y,x) implementations
|
||||
The order 19 polynomial P approximates (atan(sqrt(x))-sqrt(x))/x^(3/2). */
|
||||
static inline DBL_T
|
||||
eval_poly (DBL_T z, DBL_T az, DBL_T shift)
|
||||
{
|
||||
/* Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of
|
||||
full scheme to avoid underflow in x^16. */
|
||||
DBL_T z2 = z * z;
|
||||
DBL_T x2 = z2 * z2;
|
||||
DBL_T x4 = x2 * x2;
|
||||
DBL_T x8 = x4 * x4;
|
||||
DBL_T y
|
||||
= FMA (ESTRIN_11_ (z2, x2, x4, x8, P, 8), x8, ESTRIN_7 (z2, x2, x4, P));
|
||||
|
||||
/* Finalize. y = shift + z + z^3 * P(z^2). */
|
||||
y = FMA (y, z2 * az, az);
|
||||
y = y + shift;
|
||||
|
||||
return y;
|
||||
}
|
||||
|
||||
#undef DBL_T
|
||||
#undef FMA
|
||||
#undef P
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_TINYMATH_ATAN_COMMON_H_ */
|
46
libc/tinymath/atan_data.c
Normal file
46
libc/tinymath/atan_data.c
Normal file
|
@ -0,0 +1,46 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||
│ │
|
||||
│ Optimized Routines │
|
||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
||||
│ │
|
||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||
│ a copy of this software and associated documentation files (the │
|
||||
│ "Software"), to deal in the Software without restriction, including │
|
||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
||||
│ the following conditions: │
|
||||
│ │
|
||||
│ The above copyright notice and this permission notice shall be │
|
||||
│ included in all copies or substantial portions of the Software. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||
│ │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/tinymath/atan_data.internal.h"
|
||||
|
||||
asm(".ident\t\"\\n\\n\
|
||||
Optimized Routines (MIT License)\\n\
|
||||
Copyright 2022 ARM Limited\"");
|
||||
asm(".include \"libc/disclaimer.inc\"");
|
||||
/* clang-format off */
|
||||
|
||||
const struct atan_poly_data __atan_poly_data = {
|
||||
.poly = {/* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
|
||||
[2**-1022, 1.0]. See atan.sollya for details of how these were
|
||||
generated. */
|
||||
-0x1.5555555555555p-2, 0x1.99999999996c1p-3, -0x1.2492492478f88p-3,
|
||||
0x1.c71c71bc3951cp-4, -0x1.745d160a7e368p-4, 0x1.3b139b6a88ba1p-4,
|
||||
-0x1.11100ee084227p-4, 0x1.e1d0f9696f63bp-5, -0x1.aebfe7b418581p-5,
|
||||
0x1.842dbe9b0d916p-5, -0x1.5d30140ae5e99p-5, 0x1.338e31eb2fbbcp-5,
|
||||
-0x1.00e6eece7de8p-5, 0x1.860897b29e5efp-6, -0x1.0051381722a59p-6,
|
||||
0x1.14e9dc19a4a4ep-7, -0x1.d0062b42fe3bfp-9, 0x1.17739e210171ap-10,
|
||||
-0x1.ab24da7be7402p-13, 0x1.358851160a528p-16}};
|
13
libc/tinymath/atan_data.internal.h
Normal file
13
libc/tinymath/atan_data.internal.h
Normal file
|
@ -0,0 +1,13 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_TINYMATH_ATAN_DATA_H_
|
||||
#define COSMOPOLITAN_LIBC_TINYMATH_ATAN_DATA_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
#define ATAN_POLY_NCOEFFS 20
|
||||
extern const struct atan_poly_data {
|
||||
double poly[ATAN_POLY_NCOEFFS];
|
||||
} __atan_poly_data _Hide;
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_TINYMATH_ATAN_DATA_H_ */
|
46
libc/tinymath/atanf_common.internal.h
Normal file
46
libc/tinymath/atanf_common.internal.h
Normal file
|
@ -0,0 +1,46 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_TINYMATH_ATANF_COMMON_H_
|
||||
#define COSMOPOLITAN_LIBC_TINYMATH_ATANF_COMMON_H_
|
||||
#include "libc/tinymath/atanf_data.internal.h"
|
||||
#include "libc/tinymath/estrin_wrap.internal.h"
|
||||
#include "libc/tinymath/hornerf.internal.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
// clang-format off
|
||||
|
||||
#if WANT_VMATH
|
||||
|
||||
#define FLT_T float32x4_t
|
||||
#define P(i) v_f32 (__atanf_poly_data.poly[i])
|
||||
|
||||
#else
|
||||
|
||||
#define FLT_T float
|
||||
#define P(i) __atanf_poly_data.poly[i]
|
||||
|
||||
#endif
|
||||
|
||||
/* Polynomial used in fast atanf(x) and atan2f(y,x) implementations
|
||||
The order 7 polynomial P approximates (atan(sqrt(x))-sqrt(x))/x^(3/2). */
|
||||
static inline FLT_T
|
||||
eval_poly (FLT_T z, FLT_T az, FLT_T shift)
|
||||
{
|
||||
/* Use 2-level Estrin scheme for P(z^2) with deg(P)=7. However,
|
||||
a standard implementation using z8 creates spurious underflow
|
||||
in the very last fma (when z^8 is small enough).
|
||||
Therefore, we split the last fma into a mul and and an fma.
|
||||
Horner and single-level Estrin have higher errors that exceed
|
||||
threshold. */
|
||||
FLT_T z2 = z * z;
|
||||
FLT_T z4 = z2 * z2;
|
||||
|
||||
/* Then assemble polynomial. */
|
||||
FLT_T y = FMA (z4, z4 * ESTRIN_3_ (z2, z4, P, 4), ESTRIN_3 (z2, z4, P));
|
||||
|
||||
/* Finalize:
|
||||
y = shift + z * P(z^2). */
|
||||
return FMA (y, z2 * az, az) + shift;
|
||||
}
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_TINYMATH_ATANF_COMMON_H_ */
|
|
@ -1,9 +1,9 @@
|
|||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set et ft=c ts=8 tw=8 fenc=utf-8 :vi│
|
||||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||
│ │
|
||||
│ Musl Libc │
|
||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
||||
│ Optimized Routines │
|
||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
||||
│ │
|
||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||
│ a copy of this software and associated documentation files (the │
|
||||
|
@ -25,44 +25,17 @@
|
|||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||
│ │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/math.h"
|
||||
#include "libc/tinymath/feval.internal.h"
|
||||
#include "libc/tinymath/kernel.internal.h"
|
||||
#include "libc/tinymath/atanf_data.internal.h"
|
||||
|
||||
asm(".ident\t\"\\n\\n\
|
||||
Musl libc (MIT License)\\n\
|
||||
Copyright 2005-2014 Rich Felker, et. al.\"");
|
||||
Optimized Routines (MIT License)\\n\
|
||||
Copyright 2022 ARM Limited\"");
|
||||
asm(".include \"libc/disclaimer.inc\"");
|
||||
/* clang-format off */
|
||||
|
||||
/**
|
||||
* Returns 𝑥 × 2ʸ.
|
||||
/* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-128, 1.0].
|
||||
*/
|
||||
float ldexpf(float x, int n)
|
||||
{
|
||||
union {float f; uint32_t i;} u;
|
||||
float_t y = x;
|
||||
|
||||
if (n > 127) {
|
||||
y *= 0x1p127f;
|
||||
n -= 127;
|
||||
if (n > 127) {
|
||||
y *= 0x1p127f;
|
||||
n -= 127;
|
||||
if (n > 127)
|
||||
n = 127;
|
||||
}
|
||||
} else if (n < -126) {
|
||||
y *= 0x1p-126f * 0x1p24f;
|
||||
n += 126 - 24;
|
||||
if (n < -126) {
|
||||
y *= 0x1p-126f * 0x1p24f;
|
||||
n += 126 - 24;
|
||||
if (n < -126)
|
||||
n = -126;
|
||||
}
|
||||
}
|
||||
u.i = (uint32_t)(0x7f+n)<<23;
|
||||
x = y * u.f;
|
||||
return x;
|
||||
}
|
||||
const struct atanf_poly_data __atanf_poly_data = {
|
||||
.poly = {/* See atanf.sollya for details of how these were generated. */
|
||||
-0x1.55555p-2f, 0x1.99935ep-3f, -0x1.24051ep-3f, 0x1.bd7368p-4f,
|
||||
-0x1.491f0ep-4f, 0x1.93a2c0p-5f, -0x1.4c3c60p-6f, 0x1.01fd88p-8f}};
|
13
libc/tinymath/atanf_data.internal.h
Normal file
13
libc/tinymath/atanf_data.internal.h
Normal file
|
@ -0,0 +1,13 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_TINYMATH_ATANF_DATA_H_
|
||||
#define COSMOPOLITAN_LIBC_TINYMATH_ATANF_DATA_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
#define ATANF_POLY_NCOEFFS 8
|
||||
extern const struct atanf_poly_data {
|
||||
float poly[ATANF_POLY_NCOEFFS];
|
||||
} __atanf_poly_data _Hide;
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_TINYMATH_ATANF_DATA_H_ */
|
53
libc/tinymath/estrin_wrap.internal.h
Normal file
53
libc/tinymath/estrin_wrap.internal.h
Normal file
|
@ -0,0 +1,53 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_TINYMATH_ESTRIN_WRAP_INTERNAL_H_
|
||||
#define COSMOPOLITAN_LIBC_TINYMATH_ESTRIN_WRAP_INTERNAL_H_
|
||||
|
||||
/*
|
||||
* Helper macros for double-precision Estrin polynomial evaluation.
|
||||
*
|
||||
* Copyright (c) 2022-2023, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
|
||||
*/
|
||||
|
||||
// clang-format off
|
||||
#define ESTRIN_1_(x, c, i) FMA(x, c(1 + i), c(i))
|
||||
#define ESTRIN_2_(x, x2, c, i) FMA(x2, c(2 + i), ESTRIN_1_(x, c, i))
|
||||
#define ESTRIN_3_(x, x2, c, i) FMA(x2, ESTRIN_1_(x, c, 2 + i), ESTRIN_1_(x, c, i))
|
||||
#define ESTRIN_4_(x, x2, x4, c, i) FMA(x4, c(4 + i), ESTRIN_3_(x, x2, c, i))
|
||||
#define ESTRIN_5_(x, x2, x4, c, i) FMA(x4, ESTRIN_1_(x, c, 4 + i), ESTRIN_3_(x, x2, c, i))
|
||||
#define ESTRIN_6_(x, x2, x4, c, i) FMA(x4, ESTRIN_2_(x, x2, c, 4 + i), ESTRIN_3_(x, x2, c, i))
|
||||
#define ESTRIN_7_(x, x2, x4, c, i) FMA(x4, ESTRIN_3_(x, x2, c, 4 + i), ESTRIN_3_(x, x2, c, i))
|
||||
#define ESTRIN_8_(x, x2, x4, x8, c, i) FMA(x8, c(8 + i), ESTRIN_7_(x, x2, x4, c, i))
|
||||
#define ESTRIN_9_(x, x2, x4, x8, c, i) FMA(x8, ESTRIN_1_(x, c, 8 + i), ESTRIN_7_(x, x2, x4, c, i))
|
||||
#define ESTRIN_10_(x, x2, x4, x8, c, i) FMA(x8, ESTRIN_2_(x, x2, c, 8 + i), ESTRIN_7_(x, x2, x4, c, i))
|
||||
#define ESTRIN_11_(x, x2, x4, x8, c, i) FMA(x8, ESTRIN_3_(x, x2, c, 8 + i), ESTRIN_7_(x, x2, x4, c, i))
|
||||
#define ESTRIN_12_(x, x2, x4, x8, c, i) FMA(x8, ESTRIN_4_(x, x2, x4, c, 8 + i), ESTRIN_7_(x, x2, x4, c, i))
|
||||
#define ESTRIN_13_(x, x2, x4, x8, c, i) FMA(x8, ESTRIN_5_(x, x2, x4, c, 8 + i), ESTRIN_7_(x, x2, x4, c, i))
|
||||
#define ESTRIN_14_(x, x2, x4, x8, c, i) FMA(x8, ESTRIN_6_(x, x2, x4, c, 8 + i), ESTRIN_7_(x, x2, x4, c, i))
|
||||
#define ESTRIN_15_(x, x2, x4, x8, c, i) FMA(x8, ESTRIN_7_(x, x2, x4, c, 8 + i), ESTRIN_7_(x, x2, x4, c, i))
|
||||
#define ESTRIN_16_(x, x2, x4, x8, x16, c, i) FMA(x16, c(16 + i), ESTRIN_15_(x, x2, x4, x8, c, i))
|
||||
#define ESTRIN_17_(x, x2, x4, x8, x16, c, i) FMA(x16, ESTRIN_1_(x, c, 16 + i), ESTRIN_15_(x, x2, x4, x8, c, i))
|
||||
#define ESTRIN_18_(x, x2, x4, x8, x16, c, i) FMA(x16, ESTRIN_2_(x, x2, c, 16 + i), ESTRIN_15_(x, x2, x4, x8, c, i))
|
||||
#define ESTRIN_19_(x, x2, x4, x8, x16, c, i) FMA(x16, ESTRIN_3_(x, x2, c, 16 + i), ESTRIN_15_(x, x2, x4, x8, c, i))
|
||||
|
||||
#define ESTRIN_1(x, c) ESTRIN_1_(x, c, 0)
|
||||
#define ESTRIN_2(x, x2, c) ESTRIN_2_(x, x2, c, 0)
|
||||
#define ESTRIN_3(x, x2, c) ESTRIN_3_(x, x2, c, 0)
|
||||
#define ESTRIN_4(x, x2, x4, c) ESTRIN_4_(x, x2, x4, c, 0)
|
||||
#define ESTRIN_5(x, x2, x4, c) ESTRIN_5_(x, x2, x4, c, 0)
|
||||
#define ESTRIN_6(x, x2, x4, c) ESTRIN_6_(x, x2, x4, c, 0)
|
||||
#define ESTRIN_7(x, x2, x4, c) ESTRIN_7_(x, x2, x4, c, 0)
|
||||
#define ESTRIN_8(x, x2, x4, x8, c) ESTRIN_8_(x, x2, x4, x8, c, 0)
|
||||
#define ESTRIN_9(x, x2, x4, x8, c) ESTRIN_9_(x, x2, x4, x8, c, 0)
|
||||
#define ESTRIN_10(x, x2, x4, x8, c) ESTRIN_10_(x, x2, x4, x8, c, 0)
|
||||
#define ESTRIN_11(x, x2, x4, x8, c) ESTRIN_11_(x, x2, x4, x8, c, 0)
|
||||
#define ESTRIN_12(x, x2, x4, x8, c) ESTRIN_12_(x, x2, x4, x8, c, 0)
|
||||
#define ESTRIN_13(x, x2, x4, x8, c) ESTRIN_13_(x, x2, x4, x8, c, 0)
|
||||
#define ESTRIN_14(x, x2, x4, x8, c) ESTRIN_14_(x, x2, x4, x8, c, 0)
|
||||
#define ESTRIN_15(x, x2, x4, x8, c) ESTRIN_15_(x, x2, x4, x8, c, 0)
|
||||
#define ESTRIN_16(x, x2, x4, x8, x16, c) ESTRIN_16_(x, x2, x4, x8, x16, c, 0)
|
||||
#define ESTRIN_17(x, x2, x4, x8, x16, c) ESTRIN_17_(x, x2, x4, x8, x16, c, 0)
|
||||
#define ESTRIN_18(x, x2, x4, x8, x16, c) ESTRIN_18_(x, x2, x4, x8, x16, c, 0)
|
||||
#define ESTRIN_19(x, x2, x4, x8, x16, c) ESTRIN_19_(x, x2, x4, x8, x16, c, 0)
|
||||
// clang-format on
|
||||
|
||||
#endif /* COSMOPOLITAN_LIBC_TINYMATH_ESTRIN_WRAP_INTERNAL_H_ */
|
|
@ -1,35 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/math.h"
|
||||
|
||||
/**
|
||||
* Returns maximum of two doubles.
|
||||
*
|
||||
* If one argument is NAN then the other is returned.
|
||||
* This function is designed to do the right thing with
|
||||
* signed zeroes.
|
||||
*/
|
||||
double fmax(double x, double y) {
|
||||
if (__builtin_isnan(x)) return y;
|
||||
if (__builtin_isnan(y)) return x;
|
||||
if (__builtin_signbit(x) != __builtin_signbit(y)) {
|
||||
return __builtin_signbit(x) ? y : x; /* C99 Annex F.9.9.2 */
|
||||
}
|
||||
return x < y ? y : x;
|
||||
}
|
|
@ -1,35 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/math.h"
|
||||
|
||||
/**
|
||||
* Returns maximum of two floats.
|
||||
*
|
||||
* If one argument is NAN then the other is returned.
|
||||
* This function is designed to do the right thing with
|
||||
* signed zeroes.
|
||||
*/
|
||||
float fmaxf(float x, float y) {
|
||||
if (__builtin_isnan(x)) return y;
|
||||
if (__builtin_isnan(y)) return x;
|
||||
if (__builtin_signbitf(x) != __builtin_signbitf(y)) {
|
||||
return __builtin_signbitf(x) ? y : x; /* C99 Annex F.9.9.2 */
|
||||
}
|
||||
return x < y ? y : x;
|
||||
}
|
|
@ -1,35 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/math.h"
|
||||
|
||||
/**
|
||||
* Returns maximum of two long doubles.
|
||||
*
|
||||
* If one argument is NAN then the other is returned.
|
||||
* This function is designed to do the right thing with
|
||||
* signed zeroes.
|
||||
*/
|
||||
long double fmaxl(long double x, long double y) {
|
||||
if (__builtin_isnan(x)) return y;
|
||||
if (__builtin_isnan(y)) return x;
|
||||
if (__builtin_signbitl(x) != __builtin_signbitl(y)) {
|
||||
return __builtin_signbitl(x) ? y : x; /* C99 Annex F.9.9.2 */
|
||||
}
|
||||
return x < y ? y : x;
|
||||
}
|
17
libc/tinymath/horner.internal.h
Normal file
17
libc/tinymath/horner.internal.h
Normal file
|
@ -0,0 +1,17 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_TINYMATH_HORNER_INTERNAL_H_
|
||||
#define COSMOPOLITAN_LIBC_TINYMATH_HORNER_INTERNAL_H_
|
||||
#include "libc/math.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
#if WANT_VMATH
|
||||
#define FMA(x, y, z) vfmaq_f64(z, x, y)
|
||||
#else
|
||||
#define FMA fma
|
||||
#endif
|
||||
|
||||
#include "libc/tinymath/horner_wrap.internal.h"
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_TINYMATH_HORNER_INTERNAL_H_ */
|
|
@ -1,16 +1,9 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_TINYMATH_HORNERF_INTERNAL_H_
|
||||
#define COSMOPOLITAN_LIBC_TINYMATH_HORNERF_INTERNAL_H_
|
||||
#include "third_party/libcxx/math.h"
|
||||
#include "libc/math.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
/*
|
||||
* Helper macros for double-precision Horner polynomial evaluation.
|
||||
*
|
||||
* Copyright (c) 2022-2023, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
|
||||
*/
|
||||
|
||||
#if WANT_VMATH
|
||||
#define FMA(x, y, z) vfmaq_f32(z, x, y)
|
||||
#else
|
||||
|
@ -18,6 +11,7 @@ COSMOPOLITAN_C_START_
|
|||
#endif
|
||||
|
||||
#include "libc/tinymath/horner_wrap.internal.h"
|
||||
#include "third_party/libcxx/math.h"
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
|
|
|
@ -1,37 +0,0 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// Returns log₂x exponent part of long double.
|
||||
//
|
||||
// @param 𝑥 is long double passed on stack
|
||||
// @return result in %eax
|
||||
// @note needs sse3
|
||||
ilogbl: push %rbp
|
||||
mov %rsp,%rbp
|
||||
.profilable
|
||||
fldt 16(%rbp)
|
||||
fxtract
|
||||
fstp %st
|
||||
push %rax
|
||||
fisttpl (%rsp)
|
||||
mov (%rsp),%eax
|
||||
leave
|
||||
ret
|
||||
.endfn ilogbl,globl
|
|
@ -1,38 +0,0 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// Returns 𝑥 × 2ʸ.
|
||||
//
|
||||
// @param 𝑥 is long double passed on stack
|
||||
// @param 𝑦 is exponent via %edi
|
||||
// @return result in %st0
|
||||
ldexpl: push %rbp
|
||||
mov %rsp,%rbp
|
||||
.profilable
|
||||
push %rdi
|
||||
fildl (%rsp)
|
||||
fldt 16(%rbp)
|
||||
fscale
|
||||
fstp %st(1)
|
||||
leave
|
||||
ret
|
||||
.endfn ldexpl,globl
|
||||
.alias ldexpl,scalbnl
|
||||
.alias ldexpl,scalblnl
|
|
@ -1,57 +0,0 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// Returns log(𝟷+𝑥).
|
||||
//
|
||||
// @param 𝑥 is float scalar in low quarter of %xmm0
|
||||
// @return float scalar in low quarter of %xmm0
|
||||
log1pf: push %rbp
|
||||
mov %rsp,%rbp
|
||||
.profilable
|
||||
push %rax
|
||||
movss %xmm0,-4(%rbp)
|
||||
flds -4(%rbp)
|
||||
fld %st
|
||||
fabs
|
||||
fldt .Lnnan(%rip)
|
||||
fxch
|
||||
fcomip %st(1),%st
|
||||
fstp %st
|
||||
jnb 2f
|
||||
fldln2
|
||||
fxch
|
||||
fyl2xp1
|
||||
1: fstps -4(%rbp)
|
||||
movss -4(%rbp),%xmm0
|
||||
leave
|
||||
ret
|
||||
2: fld1
|
||||
faddp %st,%st(1)
|
||||
fldln2
|
||||
fxch
|
||||
fyl2x
|
||||
jmp 1b
|
||||
.endfn log1pf,globl
|
||||
|
||||
.rodata.cst16
|
||||
.Lnnan: .long 0x0c4336f8
|
||||
.long 0x95f61998
|
||||
.long 0x3ffd
|
||||
.long 0
|
180
libc/tinymath/log1pf.c
Normal file
180
libc/tinymath/log1pf.c
Normal file
|
@ -0,0 +1,180 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||
│ │
|
||||
│ Optimized Routines │
|
||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
||||
│ │
|
||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||
│ a copy of this software and associated documentation files (the │
|
||||
│ "Software"), to deal in the Software without restriction, including │
|
||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
||||
│ the following conditions: │
|
||||
│ │
|
||||
│ The above copyright notice and this permission notice shall be │
|
||||
│ included in all copies or substantial portions of the Software. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||
│ │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/likely.h"
|
||||
#include "libc/math.h"
|
||||
#include "libc/tinymath/internal.h"
|
||||
#include "libc/tinymath/log1pf_data.internal.h"
|
||||
|
||||
asm(".ident\t\"\\n\\n\
|
||||
Optimized Routines (MIT License)\\n\
|
||||
Copyright 2022 ARM Limited\"");
|
||||
asm(".include \"libc/disclaimer.inc\"");
|
||||
/* clang-format off */
|
||||
|
||||
#define Ln2 (0x1.62e43p-1f)
|
||||
#define SignMask (0x80000000)
|
||||
|
||||
/* Biased exponent of the largest float m for which m^8 underflows. */
|
||||
#define M8UFLOW_BOUND_BEXP 112
|
||||
/* Biased exponent of the largest float for which we just return x. */
|
||||
#define TINY_BOUND_BEXP 103
|
||||
|
||||
#define C(i) __log1pf_data.coeffs[i]
|
||||
|
||||
static inline float
|
||||
eval_poly (float m, uint32_t e)
|
||||
{
|
||||
#ifdef LOG1PF_2U5
|
||||
|
||||
/* 2.5 ulp variant. Approximate log(1+m) on [-0.25, 0.5] using
|
||||
slightly modified Estrin scheme (no x^0 term, and x term is just x). */
|
||||
float p_12 = fmaf (m, C (1), C (0));
|
||||
float p_34 = fmaf (m, C (3), C (2));
|
||||
float p_56 = fmaf (m, C (5), C (4));
|
||||
float p_78 = fmaf (m, C (7), C (6));
|
||||
|
||||
float m2 = m * m;
|
||||
float p_02 = fmaf (m2, p_12, m);
|
||||
float p_36 = fmaf (m2, p_56, p_34);
|
||||
float p_79 = fmaf (m2, C (8), p_78);
|
||||
|
||||
float m4 = m2 * m2;
|
||||
float p_06 = fmaf (m4, p_36, p_02);
|
||||
|
||||
if (UNLIKELY (e < M8UFLOW_BOUND_BEXP))
|
||||
return p_06;
|
||||
|
||||
float m8 = m4 * m4;
|
||||
return fmaf (m8, p_79, p_06);
|
||||
|
||||
#elif defined(LOG1PF_1U3)
|
||||
|
||||
/* 1.3 ulp variant. Approximate log(1+m) on [-0.25, 0.5] using Horner
|
||||
scheme. Our polynomial approximation for log1p has the form
|
||||
x + C1 * x^2 + C2 * x^3 + C3 * x^4 + ...
|
||||
Hence approximation has the form m + m^2 * P(m)
|
||||
where P(x) = C1 + C2 * x + C3 * x^2 + ... . */
|
||||
return fmaf (m, m * HORNER_8 (m, C), m);
|
||||
|
||||
#else
|
||||
#error No log1pf approximation exists with the requested precision. Options are 13 or 25.
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
biased_exponent (uint32_t ix)
|
||||
{
|
||||
return (ix & 0x7f800000) >> 23;
|
||||
}
|
||||
|
||||
/* log1pf approximation using polynomial on reduced interval. Worst-case error
|
||||
when using Estrin is roughly 2.02 ULP:
|
||||
log1pf(0x1.21e13ap-2) got 0x1.fe8028p-3 want 0x1.fe802cp-3. */
|
||||
float
|
||||
log1pf (float x)
|
||||
{
|
||||
uint32_t ix = asuint (x);
|
||||
uint32_t ia = ix & ~SignMask;
|
||||
uint32_t ia12 = ia >> 20;
|
||||
uint32_t e = biased_exponent (ix);
|
||||
|
||||
/* Handle special cases first. */
|
||||
if (UNLIKELY (ia12 >= 0x7f8 || ix >= 0xbf800000 || ix == 0x80000000
|
||||
|| e <= TINY_BOUND_BEXP))
|
||||
{
|
||||
if (ix == 0xff800000)
|
||||
{
|
||||
/* x == -Inf => log1pf(x) = NaN. */
|
||||
return NAN;
|
||||
}
|
||||
if ((ix == 0x7f800000 || e <= TINY_BOUND_BEXP) && ia12 <= 0x7f8)
|
||||
{
|
||||
/* |x| < TinyBound => log1p(x) = x.
|
||||
x == Inf => log1pf(x) = Inf. */
|
||||
return x;
|
||||
}
|
||||
if (ix == 0xbf800000)
|
||||
{
|
||||
/* x == -1.0 => log1pf(x) = -Inf. */
|
||||
return __math_divzerof (-1);
|
||||
}
|
||||
if (ia12 >= 0x7f8)
|
||||
{
|
||||
/* x == +/-NaN => log1pf(x) = NaN. */
|
||||
return __math_invalidf (asfloat (ia));
|
||||
}
|
||||
/* x < -1.0 => log1pf(x) = NaN. */
|
||||
return __math_invalidf (x);
|
||||
}
|
||||
|
||||
/* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m
|
||||
is in [-0.25, 0.5]):
|
||||
log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2).
|
||||
|
||||
We approximate log1p(m) with a polynomial, then scale by
|
||||
k*log(2). Instead of doing this directly, we use an intermediate
|
||||
scale factor s = 4*k*log(2) to ensure the scale is representable
|
||||
as a normalised fp32 number. */
|
||||
|
||||
if (ix <= 0x3f000000 || ia <= 0x3e800000)
|
||||
{
|
||||
/* If x is in [-0.25, 0.5] then we can shortcut all the logic
|
||||
below, as k = 0 and m = x. All we need is to return the
|
||||
polynomial. */
|
||||
return eval_poly (x, e);
|
||||
}
|
||||
|
||||
float m = x + 1.0f;
|
||||
|
||||
/* k is used scale the input. 0x3f400000 is chosen as we are trying to
|
||||
reduce x to the range [-0.25, 0.5]. Inside this range, k is 0.
|
||||
Outside this range, if k is reinterpreted as (NOT CONVERTED TO) float:
|
||||
let k = sign * 2^p where sign = -1 if x < 0
|
||||
1 otherwise
|
||||
and p is a negative integer whose magnitude increases with the
|
||||
magnitude of x. */
|
||||
int k = (asuint (m) - 0x3f400000) & 0xff800000;
|
||||
|
||||
/* By using integer arithmetic, we obtain the necessary scaling by
|
||||
subtracting the unbiased exponent of k from the exponent of x. */
|
||||
float m_scale = asfloat (asuint (x) - k);
|
||||
|
||||
/* Scale up to ensure that the scale factor is representable as normalised
|
||||
fp32 number (s in [2**-126,2**26]), and scale m down accordingly. */
|
||||
float s = asfloat (asuint (4.0f) - k);
|
||||
m_scale = m_scale + fmaf (0.25f, s, -1.0f);
|
||||
|
||||
float p = eval_poly (m_scale, biased_exponent (asuint (m_scale)));
|
||||
|
||||
/* The scale factor to be applied back at the end - by multiplying float(k)
|
||||
by 2^-23 we get the unbiased exponent of k. */
|
||||
float scale_back = (float) k * 0x1.0p-23f;
|
||||
|
||||
/* Apply the scaling back. */
|
||||
return fmaf (scale_back, Ln2, p);
|
||||
}
|
|
@ -1,9 +1,9 @@
|
|||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set et ft=c ts=8 tw=8 fenc=utf-8 :vi│
|
||||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||
│ │
|
||||
│ Musl Libc │
|
||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
||||
│ Optimized Routines │
|
||||
│ Copyright (c) 1999-2022, Arm Limited. │
|
||||
│ │
|
||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||
│ a copy of this software and associated documentation files (the │
|
||||
|
@ -25,46 +25,17 @@
|
|||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||
│ │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/math.h"
|
||||
#include "libc/tinymath/feval.internal.h"
|
||||
#include "libc/tinymath/kernel.internal.h"
|
||||
#include "libc/tinymath/log1pf_data.internal.h"
|
||||
|
||||
asm(".ident\t\"\\n\\n\
|
||||
Musl libc (MIT License)\\n\
|
||||
Copyright 2005-2014 Rich Felker, et. al.\"");
|
||||
Optimized Routines (MIT License)\\n\
|
||||
Copyright 2022 ARM Limited\"");
|
||||
asm(".include \"libc/disclaimer.inc\"");
|
||||
/* clang-format off */
|
||||
|
||||
/**
|
||||
* Returns 𝑥 × 2ʸ.
|
||||
*/
|
||||
double ldexp(double x, int n)
|
||||
{
|
||||
union {double f; uint64_t i;} u;
|
||||
double_t y = x;
|
||||
|
||||
if (n > 1023) {
|
||||
y *= 0x1p1023;
|
||||
n -= 1023;
|
||||
if (n > 1023) {
|
||||
y *= 0x1p1023;
|
||||
n -= 1023;
|
||||
if (n > 1023)
|
||||
n = 1023;
|
||||
}
|
||||
} else if (n < -1022) {
|
||||
/* make sure final n < -53 to avoid double
|
||||
rounding in the subnormal range */
|
||||
y *= 0x1p-1022 * 0x1p53;
|
||||
n += 1022 - 53;
|
||||
if (n < -1022) {
|
||||
y *= 0x1p-1022 * 0x1p53;
|
||||
n += 1022 - 53;
|
||||
if (n < -1022)
|
||||
n = -1022;
|
||||
}
|
||||
}
|
||||
u.i = (uint64_t)(0x3ff+n)<<52;
|
||||
x = y * u.f;
|
||||
return x;
|
||||
}
|
||||
/* Polynomial coefficients generated using floating-point minimax
|
||||
algorithm, see tools/log1pf.sollya for details. */
|
||||
const struct log1pf_data __log1pf_data
|
||||
= {.coeffs = {-0x1p-1f, 0x1.5555aap-2f, -0x1.000038p-2f, 0x1.99675cp-3f,
|
||||
-0x1.54ef78p-3f, 0x1.28a1f4p-3f, -0x1.0da91p-3f, 0x1.abcb6p-4f,
|
||||
-0x1.6f0d5ep-5f}};
|
15
libc/tinymath/log1pf_data.internal.h
Normal file
15
libc/tinymath/log1pf_data.internal.h
Normal file
|
@ -0,0 +1,15 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_TINYMATH_LOG1PF_DATA_H_
|
||||
#define COSMOPOLITAN_LIBC_TINYMATH_LOG1PF_DATA_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
#define LOG1PF_2U5
|
||||
#define V_LOG1PF_2U5
|
||||
#define LOG1PF_NCOEFFS 9
|
||||
extern const struct log1pf_data {
|
||||
float coeffs[LOG1PF_NCOEFFS];
|
||||
} __log1pf_data _Hide;
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_TINYMATH_LOG1PF_DATA_H_ */
|
|
@ -1,33 +0,0 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// Returns log₂ₓ exponent part of long double.
|
||||
//
|
||||
// @param 𝑥 is long double passed on stack
|
||||
// @return result in %st0
|
||||
logbl: push %rbp
|
||||
mov %rsp,%rbp
|
||||
.profilable
|
||||
fldt 16(%rbp)
|
||||
fxtract
|
||||
fstp %st
|
||||
pop %rbp
|
||||
ret
|
||||
.endfn logbl,globl
|
|
@ -18,12 +18,14 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/errno.h"
|
||||
#include "libc/math.h"
|
||||
#include "third_party/libcxx/math.h"
|
||||
|
||||
/**
|
||||
* Returns 𝑥^𝑦.
|
||||
* @note should take ~56ns
|
||||
*/
|
||||
long double powl(long double x, long double y) {
|
||||
#ifdef __x86_64__
|
||||
long double t, u;
|
||||
if (!isunordered(x, y)) {
|
||||
if (!isinf(y)) {
|
||||
|
@ -85,4 +87,7 @@ long double powl(long double x, long double y) {
|
|||
} else {
|
||||
return NAN;
|
||||
}
|
||||
#else
|
||||
return pow(x, y);
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -1,26 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2022 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/math.h"
|
||||
|
||||
/**
|
||||
* Returns 𝑥 × 2ʸ.
|
||||
*/
|
||||
double scalbn(double x, int n) {
|
||||
return ldexp(x, n);
|
||||
}
|
|
@ -1,26 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2022 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/math.h"
|
||||
|
||||
/**
|
||||
* Returns 𝑥 × 2ʸ.
|
||||
*/
|
||||
float scalbnf(float x, int n) {
|
||||
return ldexpf(x, n);
|
||||
}
|
|
@ -27,7 +27,8 @@ LIBC_TINYMATH_A_DIRECTDEPS = \
|
|||
LIBC_INTRIN \
|
||||
LIBC_STUBS \
|
||||
LIBC_NEXGEN32E \
|
||||
LIBC_SYSV
|
||||
LIBC_SYSV \
|
||||
THIRD_PARTY_COMPILER_RT
|
||||
|
||||
LIBC_TINYMATH_A_DEPS := \
|
||||
$(call uniq,$(foreach x,$(LIBC_TINYMATH_A_DIRECTDEPS),$($(x))))
|
||||
|
@ -57,4 +58,6 @@ LIBC_TINYMATH_CHECKS = $(LIBC_TINYMATH_HDRS:%=o/$(MODE)/%.ok)
|
|||
$(LIBC_TINYMATH_OBJS): $(BUILD_FILES) libc/tinymath/tinymath.mk
|
||||
|
||||
.PHONY: o/$(MODE)/libc/tinymath
|
||||
o/$(MODE)/libc/tinymath: $(LIBC_TINYMATH_CHECKS)
|
||||
o/$(MODE)/libc/tinymath: \
|
||||
$(LIBC_TINYMATH_CHECKS) \
|
||||
o/$(MODE)/libc/tinymath/tinymath.a
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue