Make quality improvements

- Write some more unit tests
- memcpy() on ARM is now faster
- Address the Musl complex math FIXME comments
- Some libm funcs like pow() now support setting errno
- Import the latest and greatest math functions from ARM
- Use more accurate atan2f() and log1pf() implementations
- atoi() and atol() will no longer saturate or clobber errno
This commit is contained in:
Justine Tunney 2024-02-25 14:57:28 -08:00
parent af8f2bd19f
commit 592f6ebc20
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
122 changed files with 6305 additions and 3859 deletions

View file

@ -1,9 +1,9 @@
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Optimized Routines
Copyright (c) 1999-2022, Arm Limited.
Copyright (c) 2018-2024, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@ -25,22 +25,41 @@
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/tinymath/log_data.internal.h"
#include "libc/tinymath/arm.internal.h"
__static_yoink("arm_optimized_routines_notice");
/*
* Data for log.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#define N (1 << LOG_TABLE_BITS)
const struct log_data __log_data = {
.ln2hi = 0x1.62e42fefa3800p-1,
.ln2lo = 0x1.ef35793c76730p-45,
.poly1 = {
#if LOG_POLY1_ORDER == 10
// relative error: 0x1.32eccc6p-62
// in -0x1p-5 0x1.1p-5 (|log(1+x)| > 0x1p-5 outside this interval)
-0x1p-1,
0x1.55555555554e5p-2,
-0x1.0000000000af2p-2,
0x1.9999999bbe436p-3,
-0x1.55555537f9cdep-3,
0x1.24922fc8127cfp-3,
-0x1.0000b7d6bb612p-3,
0x1.c806ee1ddbcafp-4,
-0x1.972335a9c2d6ep-4,
#elif LOG_POLY1_ORDER == 11
// relative error: 0x1.52c8b708p-68
// in -0x1p-5 0x1.1p-5 (|log(1+x)| > 0x1p-5 outside this interval)
-0x1p-1,
0x1.5555555555555p-2,
-0x1.ffffffffffea9p-3,
0x1.999999999c4d4p-3,
-0x1.55555557f5541p-3,
0x1.249248fbe33e4p-3,
-0x1.ffffc9a3c825bp-4,
0x1.c71e1f204435dp-4,
-0x1.9a7f26377d06ep-4,
0x1.71c30cf8f7364p-4,
#elif LOG_POLY1_ORDER == 12
// relative error: 0x1.c04d76cp-63
// in -0x1p-4 0x1.09p-4 (|log(1+x)| > 0x1p-4 outside the interval)
-0x1p-1,
@ -54,8 +73,20 @@ const struct log_data __log_data = {
-0x1.999eb43b068ffp-4,
0x1.78182f7afd085p-4,
-0x1.5521375d145cdp-4,
#endif
},
.poly = {
#if N == 64 && LOG_POLY_ORDER == 7
// relative error: 0x1.906eb8ap-58
// abs error: 0x1.d2cad5a8p-67
// in -0x1.fp-8 0x1.fp-8
-0x1.0000000000027p-1,
0x1.555555555556ap-2,
-0x1.fffffff0440bap-3,
0x1.99999991906c3p-3,
-0x1.555c8d7e8201ep-3,
0x1.24978c59151fap-3,
#elif N == 128 && LOG_POLY_ORDER == 6
// relative error: 0x1.926199e8p-56
// abs error: 0x1.882ff33p-65
// in -0x1.fp-9 0x1.fp-9
@ -64,6 +95,17 @@ const struct log_data __log_data = {
-0x1.fffffffeb459p-3,
0x1.999b324f10111p-3,
-0x1.55575e506c89fp-3,
#elif N == 128 && LOG_POLY_ORDER == 7
// relative error: 0x1.649fc4bp-64
// abs error: 0x1.c3b5769p-74
// in -0x1.fp-9 0x1.fp-9
-0x1.0000000000001p-1,
0x1.5555555555556p-2,
-0x1.fffffffea1a8p-3,
0x1.99999998e9139p-3,
-0x1.555776801b968p-3,
0x1.2493c29331a5cp-3,
#endif
},
/* Algorithm:
@ -92,6 +134,72 @@ a single rounding error when there is no fast fma for z*invc - 1, 3) ensures
that logc + poly(z/c - 1) has small error, however near x == 1 when
|log(x)| < 0x1p-4, this is not enough so that is special cased. */
.tab = {
#if N == 64
{0x1.7242886495cd8p+0, -0x1.79e267bdfe000p-2},
{0x1.6e1f769340dc9p+0, -0x1.6e60ee0ecb000p-2},
{0x1.6a13ccc8f195cp+0, -0x1.63002fdbf6000p-2},
{0x1.661ec72e86f3ap+0, -0x1.57bf76c597000p-2},
{0x1.623fa6c447b16p+0, -0x1.4c9e07f0d2000p-2},
{0x1.5e75bbca31702p+0, -0x1.419b42f027000p-2},
{0x1.5ac05655adb10p+0, -0x1.36b67660e6000p-2},
{0x1.571ed3e940191p+0, -0x1.2bef0839e4800p-2},
{0x1.539094ac0fbbfp+0, -0x1.21445727cb000p-2},
{0x1.5015007e7fc42p+0, -0x1.16b5ca3c3d000p-2},
{0x1.4cab877c31cf9p+0, -0x1.0c42d3805f800p-2},
{0x1.49539e76a88d3p+0, -0x1.01eae61b60800p-2},
{0x1.460cbc12211dap+0, -0x1.ef5adb9fb0000p-3},
{0x1.42d6624debe3ap+0, -0x1.db13daab99000p-3},
{0x1.3fb0144f0d462p+0, -0x1.c6ffbe896e000p-3},
{0x1.3c995a1f9a9b4p+0, -0x1.b31d84722d000p-3},
{0x1.3991c23952500p+0, -0x1.9f6c3cf6eb000p-3},
{0x1.3698df35eaa14p+0, -0x1.8beafe7f13000p-3},
{0x1.33ae463091760p+0, -0x1.7898db878d000p-3},
{0x1.30d190aae3d72p+0, -0x1.6574efe4ec000p-3},
{0x1.2e025c9203c89p+0, -0x1.527e620845000p-3},
{0x1.2b404a7244988p+0, -0x1.3fb457d798000p-3},
{0x1.288b01dc19544p+0, -0x1.2d1615a077000p-3},
{0x1.25e2268085f69p+0, -0x1.1aa2b431e5000p-3},
{0x1.23456812abb74p+0, -0x1.08598f1d2b000p-3},
{0x1.20b4703174157p+0, -0x1.ec738fee40000p-4},
{0x1.1e2ef308b4e9bp+0, -0x1.c885768862000p-4},
{0x1.1bb4a36b70a3fp+0, -0x1.a4e75b6a46000p-4},
{0x1.194538e960658p+0, -0x1.8197efba9a000p-4},
{0x1.16e0692a10ac8p+0, -0x1.5e95ad734e000p-4},
{0x1.1485f1ba1568bp+0, -0x1.3bdf67117c000p-4},
{0x1.12358e123ed6fp+0, -0x1.1973b744f0000p-4},
{0x1.0fef01de37c8dp+0, -0x1.eea33446bc000p-5},
{0x1.0db20b82be414p+0, -0x1.aaef4ab304000p-5},
{0x1.0b7e6f67f69b3p+0, -0x1.67c962fd2c000p-5},
{0x1.0953f342fc108p+0, -0x1.252f29acf8000p-5},
{0x1.0732604ec956bp+0, -0x1.c63d19e9c0000p-6},
{0x1.051980117f9b0p+0, -0x1.432ab6a388000p-6},
{0x1.03091aa6810f1p+0, -0x1.8244357f50000p-7},
{0x1.01010152cf066p+0, -0x1.0080a711c0000p-8},
{0x1.fc07ef6b6e30bp-1, 0x1.fe03018e80000p-8},
{0x1.f4465aa1024afp-1, 0x1.7b91986450000p-6},
{0x1.ecc07a8fd3f5ep-1, 0x1.39e88608c8000p-5},
{0x1.e573ad856b537p-1, 0x1.b42dc6e624000p-5},
{0x1.de5d6dc7b8057p-1, 0x1.165372ec20000p-4},
{0x1.d77b6498bddf7p-1, 0x1.51b07a0170000p-4},
{0x1.d0cb580315c0fp-1, 0x1.8c3465c7ea000p-4},
{0x1.ca4b30d1cf449p-1, 0x1.c5e544a290000p-4},
{0x1.c3f8ef4810d8ep-1, 0x1.fec91aa0a6000p-4},
{0x1.bdd2b8b311f44p-1, 0x1.1b72acdc5c000p-3},
{0x1.b7d6c2eeac054p-1, 0x1.371fc65a98000p-3},
{0x1.b20363474c8f5p-1, 0x1.526e61c1aa000p-3},
{0x1.ac570165eeab1p-1, 0x1.6d60ffc240000p-3},
{0x1.a6d019f331df4p-1, 0x1.87fa08a013000p-3},
{0x1.a16d3ebc9e3c3p-1, 0x1.a23bc630c3000p-3},
{0x1.9c2d14567ef45p-1, 0x1.bc286a3512000p-3},
{0x1.970e4efae9169p-1, 0x1.d5c2195697000p-3},
{0x1.920fb3bd0b802p-1, 0x1.ef0ae132d3000p-3},
{0x1.8d3018b58699ap-1, 0x1.040259974e000p-2},
{0x1.886e5ff170ee6p-1, 0x1.1058bd40e2000p-2},
{0x1.83c977ad35d27p-1, 0x1.1c898c1137800p-2},
{0x1.7f405ed16c520p-1, 0x1.2895a3e65b000p-2},
{0x1.7ad220d0335c4p-1, 0x1.347dd8f6bd000p-2},
{0x1.767dce53474fdp-1, 0x1.4043083cb3800p-2},
#elif N == 128
{0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2},
{0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2},
{0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2},
@ -220,9 +328,76 @@ that logc + poly(z/c - 1) has small error, however near x == 1 when
{0x1.79baa679725c2p-1, 0x1.377266dec1800p-2},
{0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2},
{0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2},
#endif
},
#if !__FP_FAST_FMA
#if !HAVE_FAST_FMA
.tab2 = {
# if N == 64
{0x1.61ffff94c4fecp-1, -0x1.9fe4fc998f325p-56},
{0x1.66000020377ddp-1, 0x1.e804c7a9519f2p-55},
{0x1.6a00004c41678p-1, 0x1.902c675d9ecfep-55},
{0x1.6dffff7384f87p-1, -0x1.2fd6b95e55043p-56},
{0x1.720000b37216ep-1, 0x1.802bc8d437043p-55},
{0x1.75ffffbeb3c9dp-1, 0x1.6047ad0a0d4e4p-57},
{0x1.7a0000628daep-1, -0x1.e00434b49313dp-56},
{0x1.7dffffd7abd1ap-1, -0x1.6015f8a083576p-56},
{0x1.81ffffdf40c54p-1, 0x1.7f54bf76a42c9p-57},
{0x1.860000f334e11p-1, 0x1.60054cb5344d7p-56},
{0x1.8a0001238aca7p-1, 0x1.c03c9bd132f55p-57},
{0x1.8dffffb81d212p-1, -0x1.001e519f2764fp-55},
{0x1.92000086adc7cp-1, 0x1.1fe40f88f49c6p-55},
{0x1.960000135d8eap-1, -0x1.f832268dc3095p-55},
{0x1.99ffff9435acp-1, 0x1.7031d8b835edcp-56},
{0x1.9e00003478565p-1, -0x1.0030b221ce3eep-58},
{0x1.a20000b592948p-1, 0x1.8fd2f1dbd4639p-55},
{0x1.a600000ad0bcfp-1, 0x1.901d6a974e6bep-55},
{0x1.a9ffff55953a5p-1, 0x1.a07556192db98p-57},
{0x1.adffff29ce03dp-1, -0x1.fff0717ec71c2p-56},
{0x1.b1ffff34f3ac8p-1, 0x1.8005573de89d1p-57},
{0x1.b60000894c55bp-1, -0x1.ff2fb51b044c7p-57},
{0x1.b9fffef45ec7dp-1, -0x1.9ff7c4e8730fp-56},
{0x1.be0000cda7b2ap-1, 0x1.57d058dbf3c1dp-55},
{0x1.c1ffff2c57917p-1, 0x1.7e66d7e48dbc9p-58},
{0x1.c60000ea5b82ap-1, -0x1.47f5e132ed4bep-55},
{0x1.ca0001121ae98p-1, -0x1.40958c8d5e00ap-58},
{0x1.ce0000f9241cbp-1, -0x1.7da063caa81c8p-59},
{0x1.d1fffe8be95a4p-1, -0x1.82e3a411afcd9p-59},
{0x1.d5ffff035932bp-1, -0x1.00f901b3fe87dp-58},
{0x1.d9fffe8b54ba7p-1, 0x1.ffef55d6e3a4p-55},
{0x1.de0000ad95d19p-1, 0x1.5feb2efd4c7c7p-55},
{0x1.e1fffe925ce47p-1, 0x1.c8085484eaf08p-55},
{0x1.e5fffe3ddf853p-1, -0x1.fd5ed02c5cadp-60},
{0x1.e9fffed0a0e5fp-1, -0x1.a80aaef411586p-55},
{0x1.ee00008f82eep-1, -0x1.b000aeaf97276p-55},
{0x1.f20000a22d2f4p-1, -0x1.8f8906e13eba3p-56},
{0x1.f5fffee35b57dp-1, 0x1.1fdd33b2d3714p-57},
{0x1.fa00014eec3a6p-1, -0x1.3ee0b7a18c1a5p-58},
{0x1.fdffff5daa89fp-1, -0x1.c1e24c8e3b503p-58},
{0x1.0200005b93349p+0, -0x1.50197fe6bedcap-54},
{0x1.05ffff9d597acp+0, 0x1.20160d062d0dcp-55},
{0x1.0a00005687a63p+0, -0x1.27f3f9307696ep-54},
{0x1.0dffff779164ep+0, 0x1.b7eb40bb9c4f4p-54},
{0x1.12000044a0aa8p+0, 0x1.efbc914d512c4p-55},
{0x1.16000069685bcp+0, -0x1.c0bea3eb2d82cp-57},
{0x1.1a000093f0d78p+0, 0x1.1fecbf1e8c52p-54},
{0x1.1dffffb2b1457p+0, -0x1.3fc91365637d6p-55},
{0x1.2200008824a1p+0, -0x1.dff7e9feb578ap-54},
{0x1.25ffffeef953p+0, -0x1.b00a61ec912f7p-55},
{0x1.2a0000a1e7783p+0, 0x1.60048318b0483p-56},
{0x1.2e0000853d4c7p+0, -0x1.77fbedf2c8cf3p-54},
{0x1.320000324c55bp+0, 0x1.f81983997354fp-54},
{0x1.360000594f796p+0, -0x1.cfe4beff900a9p-54},
{0x1.3a0000a4c1c0fp+0, 0x1.07dbb2e268d0ep-54},
{0x1.3e0000751c61bp+0, 0x1.80583ed1c566ep-56},
{0x1.42000069e8a9fp+0, 0x1.f01f1edf82045p-54},
{0x1.460000b5a1e34p+0, -0x1.dfdf0cf45c14ap-55},
{0x1.4a0000187e513p+0, 0x1.401306b83a98dp-55},
{0x1.4dffff3ba420bp+0, 0x1.9fc6539a6454ep-56},
{0x1.51fffffe391c9p+0, -0x1.601ef3353ac83p-54},
{0x1.560000e342455p+0, 0x1.3fb7fac8ac151p-55},
{0x1.59ffffc39676fp+0, 0x1.4fe7dd6659cc2p-55},
{0x1.5dfffff10ef42p+0, -0x1.48154cb592bcbp-54},
# elif N == 128
{0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56},
{0x1.63000034db495p-1, 0x1.dbfea48005d41p-55},
{0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55},
@ -351,6 +526,7 @@ that logc + poly(z/c - 1) has small error, however near x == 1 when
{0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54},
{0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54},
{0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54},
},
#endif
},
#endif /* !HAVE_FAST_FMA */
};