Add more libm unit tests and fixes

See #61
This commit is contained in:
Justine Tunney 2021-03-02 13:57:23 -08:00
parent 32e289b1d8
commit 9367253b4d
15 changed files with 390 additions and 124 deletions

View file

@ -17,28 +17,41 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
.source __FILE__
// Returns smallest integral not less than 𝑥.
//
// @param 𝑥 is double scalar in low half of %xmm0
// @return double scalar in low half of %xmm0
// @see round(),rint(),nearbyint()
// @see vroundsd $_MM_FROUND_TO_POS_INF|_MM_FROUND_NO_EXC,%xmm0,%xmm0,%xmm0
ceil: .leafprologue
.profilable
movsd nan(%rip),%xmm1
movsd sig(%rip),%xmm2
andpd %xmm0,%xmm1
comisd %xmm1,%xmm2
movsd 4f(%rip),%xmm3
movsd 2f(%rip),%xmm4
movapd %xmm0,%xmm2
movapd %xmm0,%xmm1
andpd %xmm3,%xmm2
ucomisd %xmm2,%xmm4
jbe 1f
cvttsd2siq %xmm0,%rax
pxor %xmm1,%xmm1
movsd one(%rip),%xmm2
cvtsi2sdq %rax,%xmm1
cmpnlesd %xmm1,%xmm0
andpd %xmm2,%xmm0
addsd %xmm1,%xmm0
pxor %xmm2,%xmm2
movsd 3f(%rip),%xmm4
andnpd %xmm1,%xmm3
cvtsi2sdq %rax,%xmm2
cmpnlesd %xmm2,%xmm0
andpd %xmm4,%xmm0
addsd %xmm2,%xmm0
orpd %xmm3,%xmm0
1: .leafepilogue
.endfn ceil,globl
.rodata.cst8
nan: .double nan
sig: .quad 0x0010000000000000
one: .double 1
// vroundsd $_MM_FROUND_TO_POS_INF|_MM_FROUND_NO_EXC,%xmm0,%xmm0,%xmm0
2: .long 0x00000000
.long 0x43300000
3: .long 0x00000000
.long 0x3ff00000
.rodata.cst16
4: .long 0xffffffff
.long 0x7fffffff
.long 0x00000000
.long 0x00000000

View file

@ -17,34 +17,39 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
.source __FILE__
// Returns smallest integral not less than 𝑥.
//
// @param 𝑥 is float scalar in low quarter of %xmm0
// @return float scalar in low quarter of %xmm0
// @see round(),rint(),nearbyint()
// @see vroundss $_MM_FROUND_TO_POS_INF|_MM_FROUND_NO_EXC,%xmm0,%xmm0,%xmm0
ceilf: .leafprologue
.profilable
movss .L3(%rip),%xmm1
andps %xmm0,%xmm1
movss .L2(%rip),%xmm2
comiss %xmm1,%xmm2
movss 4f(%rip),%xmm3
movss 2f(%rip),%xmm4
movaps %xmm0,%xmm2
movaps %xmm0,%xmm1
andps %xmm3,%xmm2
ucomiss %xmm2,%xmm4
jbe 1f
cvttss2si %xmm0,%eax
pxor %xmm1,%xmm1
movss .L1(%rip),%xmm2
cvtsi2ss %eax,%xmm1
cmpnless %xmm1,%xmm0
andps %xmm2,%xmm0
addss %xmm1,%xmm0
cvttss2sil %xmm0,%eax
pxor %xmm2,%xmm2
movss 3f(%rip),%xmm4
andnps %xmm1,%xmm3
cvtsi2ssl %eax,%xmm2
cmpnless %xmm2,%xmm0
andps %xmm4,%xmm0
addss %xmm2,%xmm0
orps %xmm3,%xmm0
1: .leafepilogue
.endfn ceilf,globl
.rodata.cst4
.L1: .float 1.0
.L2: .long 1258291200
2: .long 0x4b000000
3: .long 0x3f800000
.rodata.cst16
.L3: .long 2147483647
.long 0
.long 0
.long 0
// TODO(jart):
// vroundss $10,%xmm0,%xmm0,%xmm0
4: .long 0x7fffffff
.long 0x00000000
.long 0x00000000
.long 0x00000000

View file

@ -17,19 +17,24 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
.source __FILE__
ceill: .profilable
sub $24,%rsp
fldt 32(%rsp)
fnstcw 14(%rsp)
movzwl 14(%rsp),%eax
andb $-13,%ah
orb $8,%ah
movw %ax,12(%rsp)
fldcw 12(%rsp)
// Returns smallest integral not less than 𝑥.
//
// @param 𝑥 is long double passed on stack
// @return long double in %st
ceill: pushq %rbp
mov %rsp,%rbp
.profilable
sub $16,%rsp
fnstcw -2(%rbp)
fldt 16(%rbp)
movzwl -2(%rbp),%eax
and $-13,%ah
or $8,%ah
mov %ax,-4(%rbp)
fldcw -4(%rbp)
frndint
fldcw 14(%rsp)
add $24,%rsp
fldcw -2(%rbp)
leave
ret
.endfn ceill,globl

View file

@ -17,31 +17,42 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
.source __FILE__
// vroundsd $_MM_FROUND_TO_NEG_INF|_MM_FROUND_NO_EXC,%xmm0,%xmm0,%xmm0
// Returns largest integral not greater than 𝑥.
//
// @param 𝑥 is double scalar in low half of %xmm0
// @return double scalar in low half of %xmm0
floor: .leafprologue
.profilable
movsd 4f(%rip),%xmm1
movsd 3f(%rip),%xmm2
andpd %xmm0,%xmm1
comisd %xmm1,%xmm2
movsd 4f(%rip),%xmm3
movsd 2f(%rip),%xmm4
movapd %xmm0,%xmm2
movapd %xmm0,%xmm1
andpd %xmm3,%xmm2
ucomisd %xmm2,%xmm4
jbe 1f
cvttsd2siq %xmm0,%rax
pxor %xmm1,%xmm1
movsd 2f(%rip),%xmm2
cvtsi2sdq %rax,%xmm1
movapd %xmm1,%xmm3
cmpnlesd %xmm0,%xmm3
movapd %xmm3,%xmm0
andpd %xmm2,%xmm0
subsd %xmm0,%xmm1
movapd %xmm1,%xmm0
pxor %xmm2,%xmm2
movsd 3f(%rip),%xmm4
andnpd %xmm1,%xmm3
cvtsi2sdq %rax,%xmm2
movapd %xmm2,%xmm5
cmpnlesd %xmm0,%xmm5
movapd %xmm5,%xmm0
andpd %xmm4,%xmm0
subsd %xmm0,%xmm2
movapd %xmm2,%xmm0
orpd %xmm3,%xmm0
1: .leafepilogue
.endfn floor,globl
.rodata.cst8
2: .double 1
3: .quad 0x0010000000000000
4: .double nan
2: .long 0x00000000
.long 0x43300000
3: .long 0x00000000
.long 0x3ff00000
.rodata.cst16
4: .long 0xffffffff
.long 0x7fffffff
.long 0x00000000
.long 0x00000000

View file

@ -17,31 +17,40 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
.source __FILE__
// Returns largest integral not greater than 𝑥.
//
// @param 𝑥 is float scalar in low quarter of %xmm0
// @return float scalar in low quarter of %xmm0
floorf: .leafprologue
.profilable
movss .LC8(%rip),%xmm1
andps %xmm0,%xmm1
movss .LC7(%rip),%xmm2
comiss %xmm1,%xmm2
movss 4f(%rip),%xmm3
movss 2f(%rip),%xmm4
movaps %xmm0,%xmm2
movaps %xmm0,%xmm1
andps %xmm3,%xmm2
ucomiss %xmm2,%xmm4
jbe 1f
cvttss2si %xmm0,%eax
pxor %xmm1,%xmm1
movss .LC3(%rip),%xmm2
cvtsi2ss %eax,%xmm1
movaps %xmm1,%xmm3
cmpnless %xmm0,%xmm3
movaps %xmm3,%xmm0
andps %xmm2,%xmm0
subss %xmm0,%xmm1
movaps %xmm1,%xmm0
cvttss2sil %xmm0,%eax
pxor %xmm2,%xmm2
movss 3f(%rip),%xmm4
andnps %xmm1,%xmm3
cvtsi2ssl %eax,%xmm2
movaps %xmm2,%xmm5
cmpnless %xmm0,%xmm5
movaps %xmm5,%xmm0
andps %xmm4,%xmm0
subss %xmm0,%xmm2
movaps %xmm2,%xmm0
orps %xmm3,%xmm0
1: .leafepilogue
.endfn floorf,globl
.rodata.cst4
.LC3: .float 1.0
.LC7: .long 0x4b000000
2: .long 0x4b000000
3: .long 0x3f800000
.rodata.cst16
.LC8: .long 2147483647,0,0,0
4: .long 0x7fffffff
.long 0x00000000
.long 0x00000000
.long 0x00000000

View file

@ -17,17 +17,23 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
.source __FILE__
floorl: .profilable
fldt 8(%rsp)
mov $7,%al
fstcw 8(%rsp)
mov 9(%rsp),%ah
mov %al,9(%rsp)
fldcw 8(%rsp)
// Returns largest integral not greater than 𝑥.
//
// @param 𝑥 is long double passed on stack
// @return float scalar in low quarter of %xmm0
floorl: pushq %rbp
mov %rsp,%rbp
sub $16,%rsp
fnstcw -2(%rbp)
fldt 16(%rbp)
movzwl -2(%rbp),%eax
and $-13,%ah
or $4,%ah
mov %ax,-4(%rbp)
fldcw -4(%rbp)
frndint
mov %ah,9(%rsp)
fldcw 8(%rsp)
fldcw -2(%rbp)
leave
ret
.endfn floorl,globl

View file

@ -16,6 +16,7 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/runtime/pc.internal.h"
#include "libc/macros.internal.h"
// Returns 𝑥^𝑦.
@ -24,12 +25,15 @@
// @param 𝑦 is the power, also pushed on stack, in reverse order
// @return result of exponentiation on FPU stack in %st
// @note Sun's fdlibm needs 2kLOC to do this for RISC lool
// @define exp2l(fmodl(y*log2l(x),1))*exp2l(y)
// @define z=y*log2(fabs(x)),copysign(trunc(exp2(fmod(z,1)))*exp2(z),x)
powl: push %rbp
mov %rsp,%rbp
.profilable
fldt 32(%rbp)
fldt 16(%rbp)
fxam
fstsw
fabs
fyl2x
fld1
fld %st(1)
@ -39,7 +43,10 @@ powl: push %rbp
fscale
fxch
fstp %st
pop %rbp
test $FPU_C1>>8,%ah
jz 1f
fchs
1: pop %rbp
ret
.endfn powl,globl
.alias powl,__powl_finite

View file

@ -17,7 +17,6 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
.source __FILE__
// Rounds to integer, toward zero.
//
@ -28,17 +27,26 @@
// @see roundsd $_MM_FROUND_TO_ZERO|_MM_FROUND_NO_EXC,%xmm0,%xmm0
trunc: .leafprologue
.profilable
movsd 3f(%rip),%xmm1
movsd 2f(%rip),%xmm2
andpd %xmm0,%xmm1
comisd %xmm1,%xmm2
movsd 3f(%rip),%xmm2
movsd 2f(%rip),%xmm4
movapd %xmm0,%xmm3
movapd %xmm0,%xmm1
andpd %xmm2,%xmm3
ucomisd %xmm3,%xmm4
jbe 1f
cvttsd2siq %xmm0,%rax
pxor %xmm0,%xmm0
andnpd %xmm1,%xmm2
cvtsi2sdq %rax,%xmm0
orpd %xmm2,%xmm0
1: .leafepilogue
.endfn trunc,globl
.rodata.cst8
2: .quad 0x0010000000000000
3: .double nan
2: .long 0x00000000
.long 0x43300000
.rodata.cst16
3: .long 0xffffffff
.long 0x7fffffff
.long 0x00000000
.long 0x00000000

View file

@ -17,18 +17,28 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
.source __FILE__
// Rounds to integer, toward zero.
//
// @param 𝑥 is float scalar in low quarter of %xmm0
// @return float scalar in low quarter of %xmm0
// @define trunc(𝑥+copysign(.5,𝑥))
// @see round(),rint(),nearbyint()
// @see roundss $_MM_FROUND_TO_ZERO|_MM_FROUND_NO_EXC,%xmm0,%xmm0
truncf: .leafprologue
.profilable
movss 3f(%rip),%xmm1
andps %xmm0,%xmm1
movss 2f(%rip),%xmm2
comiss %xmm1,%xmm2
movss 3f(%rip),%xmm2
movss 2f(%rip),%xmm4
movaps %xmm0,%xmm3
movaps %xmm0,%xmm1
andps %xmm2,%xmm3
ucomiss %xmm3,%xmm4
jbe 1f
cvttss2si %xmm0,%eax
cvttss2sil %xmm0,%eax
pxor %xmm0,%xmm0
cvtsi2ss %eax,%xmm0
andnps %xmm1,%xmm2
cvtsi2ssl %eax,%xmm0
orps %xmm2,%xmm0
1: .leafepilogue
.endfn truncf,globl
@ -36,6 +46,3 @@ truncf: .leafprologue
2: .long 0x4b000000
.rodata.cst16
3: .long 0x7fffffff,0,0,0
// TODO(jart)
// roundss $_MM_FROUND_TO_ZERO|_MM_FROUND_NO_EXC,%xmm0,%xmm0