Add more libm unit tests and fixes

See #61
2025-10-06 14:41:02 +00:00 · 2021-03-02 13:57:23 -08:00 · 2021-03-02 13:57:23 -08:00 · 9367253b4d
commit 9367253b4d
parent 32e289b1d8
15 changed files with 390 additions and 124 deletions
--- a/libc/tinymath/ceil.S
+++ b/libc/tinymath/ceil.S
@ -17,28 +17,41 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/macros.internal.h"
-.source	__FILE__

+//	Returns smallest integral not less than 𝑥.
+//
+//	@param	𝑥 is double scalar in low half of %xmm0
+//	@return	double scalar in low half of %xmm0
+//	@see	round(),rint(),nearbyint()
+//	@see vroundsd $_MM_FROUND_TO_POS_INF|_MM_FROUND_NO_EXC,%xmm0,%xmm0,%xmm0
 ceil:	.leafprologue
 	.profilable
-	movsd	nan(%rip),%xmm1
-	movsd	sig(%rip),%xmm2
-	andpd	%xmm0,%xmm1
-	comisd	%xmm1,%xmm2
+	movsd	4f(%rip),%xmm3
+	movsd	2f(%rip),%xmm4
+	movapd	%xmm0,%xmm2
+	movapd	%xmm0,%xmm1
+	andpd	%xmm3,%xmm2
+	ucomisd	%xmm2,%xmm4
 	jbe	1f
 	cvttsd2siq %xmm0,%rax
-	pxor	%xmm1,%xmm1
-	movsd	one(%rip),%xmm2
-	cvtsi2sdq %rax,%xmm1
-	cmpnlesd %xmm1,%xmm0
-	andpd	%xmm2,%xmm0
-	addsd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm2
+	movsd	3f(%rip),%xmm4
+	andnpd	%xmm1,%xmm3
+	cvtsi2sdq %rax,%xmm2
+	cmpnlesd %xmm2,%xmm0
+	andpd	%xmm4,%xmm0
+	addsd	%xmm2,%xmm0
+	orpd	%xmm3,%xmm0
 1:	.leafepilogue
 	.endfn	ceil,globl

 	.rodata.cst8
-nan:	.double	nan
-sig:	.quad	0x0010000000000000
-one:	.double	1
-
-//	vroundsd $_MM_FROUND_TO_POS_INF|_MM_FROUND_NO_EXC,%xmm0,%xmm0,%xmm0
+2:	.long	0x00000000
+	.long	0x43300000
+3:	.long	0x00000000
+	.long	0x3ff00000
+	.rodata.cst16
+4:	.long	0xffffffff
+	.long	0x7fffffff
+	.long	0x00000000
+	.long	0x00000000
--- a/libc/tinymath/ceilf.S
+++ b/libc/tinymath/ceilf.S
@ -17,34 +17,39 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/macros.internal.h"
-.source	__FILE__

+//	Returns smallest integral not less than 𝑥.
+//
+//	@param	𝑥 is float scalar in low quarter of %xmm0
+//	@return	float scalar in low quarter of %xmm0
+//	@see	round(),rint(),nearbyint()
+//	@see vroundss $_MM_FROUND_TO_POS_INF|_MM_FROUND_NO_EXC,%xmm0,%xmm0,%xmm0
 ceilf:	.leafprologue
 	.profilable
-	movss	.L3(%rip),%xmm1
-	andps	%xmm0,%xmm1
-	movss	.L2(%rip),%xmm2
-	comiss	%xmm1,%xmm2
+	movss	4f(%rip),%xmm3
+	movss	2f(%rip),%xmm4
+	movaps	%xmm0,%xmm2
+	movaps	%xmm0,%xmm1
+	andps	%xmm3,%xmm2
+	ucomiss	%xmm2,%xmm4
 	jbe	1f
-	cvttss2si %xmm0,%eax
-	pxor	%xmm1,%xmm1
-	movss	.L1(%rip),%xmm2
-	cvtsi2ss %eax,%xmm1
-	cmpnless %xmm1,%xmm0
-	andps	%xmm2,%xmm0
-	addss	%xmm1,%xmm0
+	cvttss2sil %xmm0,%eax
+	pxor	%xmm2,%xmm2
+	movss	3f(%rip),%xmm4
+	andnps	%xmm1,%xmm3
+	cvtsi2ssl %eax,%xmm2
+	cmpnless %xmm2,%xmm0
+	andps	%xmm4,%xmm0
+	addss	%xmm2,%xmm0
+	orps	%xmm3,%xmm0
 1:	.leafepilogue
 	.endfn	ceilf,globl

 	.rodata.cst4
-.L1:	.float	1.0
-.L2:	.long	1258291200
-
+2:	.long	0x4b000000
+3:	.long	0x3f800000
 	.rodata.cst16
-.L3:	.long	2147483647
-	.long	0
-	.long	0
-	.long	0
-
-//	TODO(jart):
-//	vroundss	$10,%xmm0,%xmm0,%xmm0
+4:	.long	0x7fffffff
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
--- a/libc/tinymath/ceill.S
+++ b/libc/tinymath/ceill.S
@ -17,19 +17,24 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/macros.internal.h"
-.source	__FILE__

-ceill:	.profilable
-	sub	$24,%rsp
-	fldt	32(%rsp)
-	fnstcw	14(%rsp)
-	movzwl	14(%rsp),%eax
-	andb	$-13,%ah
-	orb	$8,%ah
-	movw	%ax,12(%rsp)
-	fldcw	12(%rsp)
+//	Returns smallest integral not less than 𝑥.
+//
+//	@param	𝑥 is long double passed on stack
+//	@return	long double in %st
+ceill:	pushq	%rbp
+	mov	%rsp,%rbp
+	.profilable
+	sub	$16,%rsp
+	fnstcw	-2(%rbp)
+	fldt	16(%rbp)
+	movzwl	-2(%rbp),%eax
+	and	$-13,%ah
+	or	$8,%ah
+	mov	%ax,-4(%rbp)
+	fldcw	-4(%rbp)
 	frndint
-	fldcw	14(%rsp)
-	add	$24,%rsp
+	fldcw	-2(%rbp)
+	leave
 	ret
 	.endfn	ceill,globl
--- a/libc/tinymath/floor.S
+++ b/libc/tinymath/floor.S
@ -17,31 +17,42 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/macros.internal.h"
-.source	__FILE__
-
-//	vroundsd $_MM_FROUND_TO_NEG_INF|_MM_FROUND_NO_EXC,%xmm0,%xmm0,%xmm0

+//	Returns largest integral not greater than 𝑥.
+//
+//	@param	𝑥 is double scalar in low half of %xmm0
+//	@return	double scalar in low half of %xmm0
 floor:	.leafprologue
 	.profilable
-	movsd	4f(%rip),%xmm1
-	movsd	3f(%rip),%xmm2
-	andpd	%xmm0,%xmm1
-	comisd	%xmm1,%xmm2
+	movsd	4f(%rip),%xmm3
+	movsd	2f(%rip),%xmm4
+	movapd	%xmm0,%xmm2
+	movapd	%xmm0,%xmm1
+	andpd	%xmm3,%xmm2
+	ucomisd	%xmm2,%xmm4
 	jbe	1f
 	cvttsd2siq %xmm0,%rax
-	pxor	%xmm1,%xmm1
-	movsd	2f(%rip),%xmm2
-	cvtsi2sdq %rax,%xmm1
-	movapd	%xmm1,%xmm3
-	cmpnlesd %xmm0,%xmm3
-	movapd	%xmm3,%xmm0
-	andpd	%xmm2,%xmm0
-	subsd	%xmm0,%xmm1
-	movapd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm2
+	movsd	3f(%rip),%xmm4
+	andnpd	%xmm1,%xmm3
+	cvtsi2sdq %rax,%xmm2
+	movapd	%xmm2,%xmm5
+	cmpnlesd %xmm0,%xmm5
+	movapd	%xmm5,%xmm0
+	andpd	%xmm4,%xmm0
+	subsd	%xmm0,%xmm2
+	movapd	%xmm2,%xmm0
+	orpd	%xmm3,%xmm0
 1:	.leafepilogue
 	.endfn	floor,globl

 	.rodata.cst8
-2:	.double	1
-3:	.quad	0x0010000000000000
-4:	.double	nan
+2:	.long	0x00000000
+	.long	0x43300000
+3:	.long	0x00000000
+	.long	0x3ff00000
+	.rodata.cst16
+4:	.long	0xffffffff
+	.long	0x7fffffff
+	.long	0x00000000
+	.long	0x00000000
--- a/libc/tinymath/floorf.S
+++ b/libc/tinymath/floorf.S
@ -17,31 +17,40 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/macros.internal.h"
-.source	__FILE__

+//	Returns largest integral not greater than 𝑥.
+//
+//	@param	𝑥 is float scalar in low quarter of %xmm0
+//	@return	float scalar in low quarter of %xmm0
 floorf:	.leafprologue
 	.profilable
-	movss	.LC8(%rip),%xmm1
-	andps	%xmm0,%xmm1
-	movss	.LC7(%rip),%xmm2
-	comiss	%xmm1,%xmm2
+	movss	4f(%rip),%xmm3
+	movss	2f(%rip),%xmm4
+	movaps	%xmm0,%xmm2
+	movaps	%xmm0,%xmm1
+	andps	%xmm3,%xmm2
+	ucomiss	%xmm2,%xmm4
 	jbe	1f
-	cvttss2si %xmm0,%eax
-	pxor	%xmm1,%xmm1
-	movss	.LC3(%rip),%xmm2
-	cvtsi2ss %eax,%xmm1
-	movaps	%xmm1,%xmm3
-	cmpnless %xmm0,%xmm3
-	movaps	%xmm3,%xmm0
-	andps	%xmm2,%xmm0
-	subss	%xmm0,%xmm1
-	movaps	%xmm1,%xmm0
+	cvttss2sil %xmm0,%eax
+	pxor	%xmm2,%xmm2
+	movss	3f(%rip),%xmm4
+	andnps	%xmm1,%xmm3
+	cvtsi2ssl %eax,%xmm2
+	movaps	%xmm2,%xmm5
+	cmpnless %xmm0,%xmm5
+	movaps	%xmm5,%xmm0
+	andps	%xmm4,%xmm0
+	subss	%xmm0,%xmm2
+	movaps	%xmm2,%xmm0
+	orps	%xmm3,%xmm0
 1:	.leafepilogue
 	.endfn	floorf,globl

 	.rodata.cst4
-.LC3:	.float	1.0
-.LC7:	.long	0x4b000000
-
+2:	.long	0x4b000000
+3:	.long	0x3f800000
 	.rodata.cst16
-.LC8:	.long	2147483647,0,0,0
+4:	.long	0x7fffffff
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
--- a/libc/tinymath/floorl.S
+++ b/libc/tinymath/floorl.S
@ -17,17 +17,23 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/macros.internal.h"
-.source	__FILE__

-floorl:	.profilable
-	fldt	8(%rsp)
-	mov	$7,%al
-	fstcw	8(%rsp)
-	mov	9(%rsp),%ah
-	mov	%al,9(%rsp)
-	fldcw	8(%rsp)
+//	Returns largest integral not greater than 𝑥.
+//
+//	@param	𝑥 is long double passed on stack
+//	@return	float scalar in low quarter of %xmm0
+floorl:	pushq	%rbp
+	mov	%rsp,%rbp
+	sub	$16,%rsp
+	fnstcw	-2(%rbp)
+	fldt	16(%rbp)
+	movzwl	-2(%rbp),%eax
+	and	$-13,%ah
+	or	$4,%ah
+	mov	%ax,-4(%rbp)
+	fldcw	-4(%rbp)
 	frndint
-	mov	%ah,9(%rsp)
-	fldcw	8(%rsp)
+	fldcw	-2(%rbp)
+	leave
 	ret
 	.endfn	floorl,globl
--- a/libc/tinymath/powl.S
+++ b/libc/tinymath/powl.S
@ -16,6 +16,7 @@
 │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/runtime/pc.internal.h"
 #include "libc/macros.internal.h"

 //	Returns 𝑥^𝑦.
@ -24,12 +25,15 @@
 //	@param	𝑦 is the power, also pushed on stack, in reverse order
 //	@return	result of exponentiation on FPU stack in %st
 //	@note	Sun's fdlibm needs 2kLOC to do this for RISC lool
-//	@define	exp2l(fmodl(y*log2l(x),1))*exp2l(y)
+//	@define	z=y*log2(fabs(x)),copysign(trunc(exp2(fmod(z,1)))*exp2(z),x)
 powl:	push	%rbp
 	mov	%rsp,%rbp
 	.profilable
 	fldt	32(%rbp)
 	fldt	16(%rbp)
+	fxam
+	fstsw
+	fabs
 	fyl2x
 	fld1
 	fld	%st(1)
@ -39,7 +43,10 @@ powl:	push	%rbp
 	fscale
 	fxch
 	fstp	%st
-	pop	%rbp
+	test	$FPU_C1>>8,%ah
+	jz	1f
+	fchs
+1:	pop	%rbp
 	ret
 	.endfn	powl,globl
 	.alias	powl,__powl_finite
--- a/libc/tinymath/trunc.S
+++ b/libc/tinymath/trunc.S
@ -17,7 +17,6 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/macros.internal.h"
-.source	__FILE__

 //	Rounds to integer, toward zero.
 //
@ -28,17 +27,26 @@
 //	@see	roundsd $_MM_FROUND_TO_ZERO|_MM_FROUND_NO_EXC,%xmm0,%xmm0
 trunc:	.leafprologue
 	.profilable
-	movsd	3f(%rip),%xmm1
-	movsd	2f(%rip),%xmm2
-	andpd	%xmm0,%xmm1
-	comisd	%xmm1,%xmm2
+	movsd	3f(%rip),%xmm2
+	movsd	2f(%rip),%xmm4
+	movapd	%xmm0,%xmm3
+	movapd	%xmm0,%xmm1
+	andpd	%xmm2,%xmm3
+	ucomisd	%xmm3,%xmm4
 	jbe	1f
 	cvttsd2siq %xmm0,%rax
 	pxor	%xmm0,%xmm0
+	andnpd	%xmm1,%xmm2
 	cvtsi2sdq %rax,%xmm0
+	orpd	%xmm2,%xmm0
 1:	.leafepilogue
 	.endfn	trunc,globl

 	.rodata.cst8
-2:	.quad	0x0010000000000000
-3:	.double	nan
+2:	.long	0x00000000
+	.long	0x43300000
+	.rodata.cst16
+3:	.long	0xffffffff
+	.long	0x7fffffff
+	.long	0x00000000
+	.long	0x00000000
--- a/libc/tinymath/truncf.S
+++ b/libc/tinymath/truncf.S
@ -17,18 +17,28 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/macros.internal.h"
-.source	__FILE__

+//	Rounds to integer, toward zero.
+//
+//	@param	𝑥 is float scalar in low quarter of %xmm0
+//	@return	float scalar in low quarter of %xmm0
+//	@define	trunc(𝑥+copysign(.5,𝑥))
+//	@see	round(),rint(),nearbyint()
+//	@see	roundss $_MM_FROUND_TO_ZERO|_MM_FROUND_NO_EXC,%xmm0,%xmm0
 truncf:	.leafprologue
 	.profilable
-	movss	3f(%rip),%xmm1
-	andps	%xmm0,%xmm1
-	movss	2f(%rip),%xmm2
-	comiss	%xmm1,%xmm2
+	movss	3f(%rip),%xmm2
+	movss	2f(%rip),%xmm4
+	movaps	%xmm0,%xmm3
+	movaps	%xmm0,%xmm1
+	andps	%xmm2,%xmm3
+	ucomiss	%xmm3,%xmm4
 	jbe	1f
-	cvttss2si %xmm0,%eax
+	cvttss2sil	%xmm0,%eax
 	pxor	%xmm0,%xmm0
-	cvtsi2ss %eax,%xmm0
+	andnps	%xmm1,%xmm2
+	cvtsi2ssl	%eax,%xmm0
+	orps	%xmm2,%xmm0
 1:	.leafepilogue
 	.endfn	truncf,globl

@ -36,6 +46,3 @@ truncf:	.leafprologue
 2:	.long	0x4b000000
 	.rodata.cst16
 3:	.long	0x7fffffff,0,0,0
-
-//	TODO(jart)
-//	roundss $_MM_FROUND_TO_ZERO|_MM_FROUND_NO_EXC,%xmm0,%xmm0