mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-05-22 21:32:31 +00:00
parent
32e289b1d8
commit
9367253b4d
15 changed files with 390 additions and 124 deletions
|
@ -17,28 +17,41 @@
|
|||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
.source __FILE__
|
||||
|
||||
// Returns smallest integral not less than 𝑥.
|
||||
//
|
||||
// @param 𝑥 is double scalar in low half of %xmm0
|
||||
// @return double scalar in low half of %xmm0
|
||||
// @see round(),rint(),nearbyint()
|
||||
// @see vroundsd $_MM_FROUND_TO_POS_INF|_MM_FROUND_NO_EXC,%xmm0,%xmm0,%xmm0
|
||||
ceil: .leafprologue
|
||||
.profilable
|
||||
movsd nan(%rip),%xmm1
|
||||
movsd sig(%rip),%xmm2
|
||||
andpd %xmm0,%xmm1
|
||||
comisd %xmm1,%xmm2
|
||||
movsd 4f(%rip),%xmm3
|
||||
movsd 2f(%rip),%xmm4
|
||||
movapd %xmm0,%xmm2
|
||||
movapd %xmm0,%xmm1
|
||||
andpd %xmm3,%xmm2
|
||||
ucomisd %xmm2,%xmm4
|
||||
jbe 1f
|
||||
cvttsd2siq %xmm0,%rax
|
||||
pxor %xmm1,%xmm1
|
||||
movsd one(%rip),%xmm2
|
||||
cvtsi2sdq %rax,%xmm1
|
||||
cmpnlesd %xmm1,%xmm0
|
||||
andpd %xmm2,%xmm0
|
||||
addsd %xmm1,%xmm0
|
||||
pxor %xmm2,%xmm2
|
||||
movsd 3f(%rip),%xmm4
|
||||
andnpd %xmm1,%xmm3
|
||||
cvtsi2sdq %rax,%xmm2
|
||||
cmpnlesd %xmm2,%xmm0
|
||||
andpd %xmm4,%xmm0
|
||||
addsd %xmm2,%xmm0
|
||||
orpd %xmm3,%xmm0
|
||||
1: .leafepilogue
|
||||
.endfn ceil,globl
|
||||
|
||||
.rodata.cst8
|
||||
nan: .double nan
|
||||
sig: .quad 0x0010000000000000
|
||||
one: .double 1
|
||||
|
||||
// vroundsd $_MM_FROUND_TO_POS_INF|_MM_FROUND_NO_EXC,%xmm0,%xmm0,%xmm0
|
||||
2: .long 0x00000000
|
||||
.long 0x43300000
|
||||
3: .long 0x00000000
|
||||
.long 0x3ff00000
|
||||
.rodata.cst16
|
||||
4: .long 0xffffffff
|
||||
.long 0x7fffffff
|
||||
.long 0x00000000
|
||||
.long 0x00000000
|
||||
|
|
|
@ -17,34 +17,39 @@
|
|||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
.source __FILE__
|
||||
|
||||
// Returns smallest integral not less than 𝑥.
|
||||
//
|
||||
// @param 𝑥 is float scalar in low quarter of %xmm0
|
||||
// @return float scalar in low quarter of %xmm0
|
||||
// @see round(),rint(),nearbyint()
|
||||
// @see vroundss $_MM_FROUND_TO_POS_INF|_MM_FROUND_NO_EXC,%xmm0,%xmm0,%xmm0
|
||||
ceilf: .leafprologue
|
||||
.profilable
|
||||
movss .L3(%rip),%xmm1
|
||||
andps %xmm0,%xmm1
|
||||
movss .L2(%rip),%xmm2
|
||||
comiss %xmm1,%xmm2
|
||||
movss 4f(%rip),%xmm3
|
||||
movss 2f(%rip),%xmm4
|
||||
movaps %xmm0,%xmm2
|
||||
movaps %xmm0,%xmm1
|
||||
andps %xmm3,%xmm2
|
||||
ucomiss %xmm2,%xmm4
|
||||
jbe 1f
|
||||
cvttss2si %xmm0,%eax
|
||||
pxor %xmm1,%xmm1
|
||||
movss .L1(%rip),%xmm2
|
||||
cvtsi2ss %eax,%xmm1
|
||||
cmpnless %xmm1,%xmm0
|
||||
andps %xmm2,%xmm0
|
||||
addss %xmm1,%xmm0
|
||||
cvttss2sil %xmm0,%eax
|
||||
pxor %xmm2,%xmm2
|
||||
movss 3f(%rip),%xmm4
|
||||
andnps %xmm1,%xmm3
|
||||
cvtsi2ssl %eax,%xmm2
|
||||
cmpnless %xmm2,%xmm0
|
||||
andps %xmm4,%xmm0
|
||||
addss %xmm2,%xmm0
|
||||
orps %xmm3,%xmm0
|
||||
1: .leafepilogue
|
||||
.endfn ceilf,globl
|
||||
|
||||
.rodata.cst4
|
||||
.L1: .float 1.0
|
||||
.L2: .long 1258291200
|
||||
|
||||
2: .long 0x4b000000
|
||||
3: .long 0x3f800000
|
||||
.rodata.cst16
|
||||
.L3: .long 2147483647
|
||||
.long 0
|
||||
.long 0
|
||||
.long 0
|
||||
|
||||
// TODO(jart):
|
||||
// vroundss $10,%xmm0,%xmm0,%xmm0
|
||||
4: .long 0x7fffffff
|
||||
.long 0x00000000
|
||||
.long 0x00000000
|
||||
.long 0x00000000
|
||||
|
|
|
@ -17,19 +17,24 @@
|
|||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
.source __FILE__
|
||||
|
||||
ceill: .profilable
|
||||
sub $24,%rsp
|
||||
fldt 32(%rsp)
|
||||
fnstcw 14(%rsp)
|
||||
movzwl 14(%rsp),%eax
|
||||
andb $-13,%ah
|
||||
orb $8,%ah
|
||||
movw %ax,12(%rsp)
|
||||
fldcw 12(%rsp)
|
||||
// Returns smallest integral not less than 𝑥.
|
||||
//
|
||||
// @param 𝑥 is long double passed on stack
|
||||
// @return long double in %st
|
||||
ceill: pushq %rbp
|
||||
mov %rsp,%rbp
|
||||
.profilable
|
||||
sub $16,%rsp
|
||||
fnstcw -2(%rbp)
|
||||
fldt 16(%rbp)
|
||||
movzwl -2(%rbp),%eax
|
||||
and $-13,%ah
|
||||
or $8,%ah
|
||||
mov %ax,-4(%rbp)
|
||||
fldcw -4(%rbp)
|
||||
frndint
|
||||
fldcw 14(%rsp)
|
||||
add $24,%rsp
|
||||
fldcw -2(%rbp)
|
||||
leave
|
||||
ret
|
||||
.endfn ceill,globl
|
||||
|
|
|
@ -17,31 +17,42 @@
|
|||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
.source __FILE__
|
||||
|
||||
// vroundsd $_MM_FROUND_TO_NEG_INF|_MM_FROUND_NO_EXC,%xmm0,%xmm0,%xmm0
|
||||
|
||||
// Returns largest integral not greater than 𝑥.
|
||||
//
|
||||
// @param 𝑥 is double scalar in low half of %xmm0
|
||||
// @return double scalar in low half of %xmm0
|
||||
floor: .leafprologue
|
||||
.profilable
|
||||
movsd 4f(%rip),%xmm1
|
||||
movsd 3f(%rip),%xmm2
|
||||
andpd %xmm0,%xmm1
|
||||
comisd %xmm1,%xmm2
|
||||
movsd 4f(%rip),%xmm3
|
||||
movsd 2f(%rip),%xmm4
|
||||
movapd %xmm0,%xmm2
|
||||
movapd %xmm0,%xmm1
|
||||
andpd %xmm3,%xmm2
|
||||
ucomisd %xmm2,%xmm4
|
||||
jbe 1f
|
||||
cvttsd2siq %xmm0,%rax
|
||||
pxor %xmm1,%xmm1
|
||||
movsd 2f(%rip),%xmm2
|
||||
cvtsi2sdq %rax,%xmm1
|
||||
movapd %xmm1,%xmm3
|
||||
cmpnlesd %xmm0,%xmm3
|
||||
movapd %xmm3,%xmm0
|
||||
andpd %xmm2,%xmm0
|
||||
subsd %xmm0,%xmm1
|
||||
movapd %xmm1,%xmm0
|
||||
pxor %xmm2,%xmm2
|
||||
movsd 3f(%rip),%xmm4
|
||||
andnpd %xmm1,%xmm3
|
||||
cvtsi2sdq %rax,%xmm2
|
||||
movapd %xmm2,%xmm5
|
||||
cmpnlesd %xmm0,%xmm5
|
||||
movapd %xmm5,%xmm0
|
||||
andpd %xmm4,%xmm0
|
||||
subsd %xmm0,%xmm2
|
||||
movapd %xmm2,%xmm0
|
||||
orpd %xmm3,%xmm0
|
||||
1: .leafepilogue
|
||||
.endfn floor,globl
|
||||
|
||||
.rodata.cst8
|
||||
2: .double 1
|
||||
3: .quad 0x0010000000000000
|
||||
4: .double nan
|
||||
2: .long 0x00000000
|
||||
.long 0x43300000
|
||||
3: .long 0x00000000
|
||||
.long 0x3ff00000
|
||||
.rodata.cst16
|
||||
4: .long 0xffffffff
|
||||
.long 0x7fffffff
|
||||
.long 0x00000000
|
||||
.long 0x00000000
|
||||
|
|
|
@ -17,31 +17,40 @@
|
|||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
.source __FILE__
|
||||
|
||||
// Returns largest integral not greater than 𝑥.
|
||||
//
|
||||
// @param 𝑥 is float scalar in low quarter of %xmm0
|
||||
// @return float scalar in low quarter of %xmm0
|
||||
floorf: .leafprologue
|
||||
.profilable
|
||||
movss .LC8(%rip),%xmm1
|
||||
andps %xmm0,%xmm1
|
||||
movss .LC7(%rip),%xmm2
|
||||
comiss %xmm1,%xmm2
|
||||
movss 4f(%rip),%xmm3
|
||||
movss 2f(%rip),%xmm4
|
||||
movaps %xmm0,%xmm2
|
||||
movaps %xmm0,%xmm1
|
||||
andps %xmm3,%xmm2
|
||||
ucomiss %xmm2,%xmm4
|
||||
jbe 1f
|
||||
cvttss2si %xmm0,%eax
|
||||
pxor %xmm1,%xmm1
|
||||
movss .LC3(%rip),%xmm2
|
||||
cvtsi2ss %eax,%xmm1
|
||||
movaps %xmm1,%xmm3
|
||||
cmpnless %xmm0,%xmm3
|
||||
movaps %xmm3,%xmm0
|
||||
andps %xmm2,%xmm0
|
||||
subss %xmm0,%xmm1
|
||||
movaps %xmm1,%xmm0
|
||||
cvttss2sil %xmm0,%eax
|
||||
pxor %xmm2,%xmm2
|
||||
movss 3f(%rip),%xmm4
|
||||
andnps %xmm1,%xmm3
|
||||
cvtsi2ssl %eax,%xmm2
|
||||
movaps %xmm2,%xmm5
|
||||
cmpnless %xmm0,%xmm5
|
||||
movaps %xmm5,%xmm0
|
||||
andps %xmm4,%xmm0
|
||||
subss %xmm0,%xmm2
|
||||
movaps %xmm2,%xmm0
|
||||
orps %xmm3,%xmm0
|
||||
1: .leafepilogue
|
||||
.endfn floorf,globl
|
||||
|
||||
.rodata.cst4
|
||||
.LC3: .float 1.0
|
||||
.LC7: .long 0x4b000000
|
||||
|
||||
2: .long 0x4b000000
|
||||
3: .long 0x3f800000
|
||||
.rodata.cst16
|
||||
.LC8: .long 2147483647,0,0,0
|
||||
4: .long 0x7fffffff
|
||||
.long 0x00000000
|
||||
.long 0x00000000
|
||||
.long 0x00000000
|
||||
|
|
|
@ -17,17 +17,23 @@
|
|||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
.source __FILE__
|
||||
|
||||
floorl: .profilable
|
||||
fldt 8(%rsp)
|
||||
mov $7,%al
|
||||
fstcw 8(%rsp)
|
||||
mov 9(%rsp),%ah
|
||||
mov %al,9(%rsp)
|
||||
fldcw 8(%rsp)
|
||||
// Returns largest integral not greater than 𝑥.
|
||||
//
|
||||
// @param 𝑥 is long double passed on stack
|
||||
// @return float scalar in low quarter of %xmm0
|
||||
floorl: pushq %rbp
|
||||
mov %rsp,%rbp
|
||||
sub $16,%rsp
|
||||
fnstcw -2(%rbp)
|
||||
fldt 16(%rbp)
|
||||
movzwl -2(%rbp),%eax
|
||||
and $-13,%ah
|
||||
or $4,%ah
|
||||
mov %ax,-4(%rbp)
|
||||
fldcw -4(%rbp)
|
||||
frndint
|
||||
mov %ah,9(%rsp)
|
||||
fldcw 8(%rsp)
|
||||
fldcw -2(%rbp)
|
||||
leave
|
||||
ret
|
||||
.endfn floorl,globl
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/runtime/pc.internal.h"
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// Returns 𝑥^𝑦.
|
||||
|
@ -24,12 +25,15 @@
|
|||
// @param 𝑦 is the power, also pushed on stack, in reverse order
|
||||
// @return result of exponentiation on FPU stack in %st
|
||||
// @note Sun's fdlibm needs 2kLOC to do this for RISC lool
|
||||
// @define exp2l(fmodl(y*log2l(x),1))*exp2l(y)
|
||||
// @define z=y*log2(fabs(x)),copysign(trunc(exp2(fmod(z,1)))*exp2(z),x)
|
||||
powl: push %rbp
|
||||
mov %rsp,%rbp
|
||||
.profilable
|
||||
fldt 32(%rbp)
|
||||
fldt 16(%rbp)
|
||||
fxam
|
||||
fstsw
|
||||
fabs
|
||||
fyl2x
|
||||
fld1
|
||||
fld %st(1)
|
||||
|
@ -39,7 +43,10 @@ powl: push %rbp
|
|||
fscale
|
||||
fxch
|
||||
fstp %st
|
||||
pop %rbp
|
||||
test $FPU_C1>>8,%ah
|
||||
jz 1f
|
||||
fchs
|
||||
1: pop %rbp
|
||||
ret
|
||||
.endfn powl,globl
|
||||
.alias powl,__powl_finite
|
||||
|
|
|
@ -17,7 +17,6 @@
|
|||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
.source __FILE__
|
||||
|
||||
// Rounds to integer, toward zero.
|
||||
//
|
||||
|
@ -28,17 +27,26 @@
|
|||
// @see roundsd $_MM_FROUND_TO_ZERO|_MM_FROUND_NO_EXC,%xmm0,%xmm0
|
||||
trunc: .leafprologue
|
||||
.profilable
|
||||
movsd 3f(%rip),%xmm1
|
||||
movsd 2f(%rip),%xmm2
|
||||
andpd %xmm0,%xmm1
|
||||
comisd %xmm1,%xmm2
|
||||
movsd 3f(%rip),%xmm2
|
||||
movsd 2f(%rip),%xmm4
|
||||
movapd %xmm0,%xmm3
|
||||
movapd %xmm0,%xmm1
|
||||
andpd %xmm2,%xmm3
|
||||
ucomisd %xmm3,%xmm4
|
||||
jbe 1f
|
||||
cvttsd2siq %xmm0,%rax
|
||||
pxor %xmm0,%xmm0
|
||||
andnpd %xmm1,%xmm2
|
||||
cvtsi2sdq %rax,%xmm0
|
||||
orpd %xmm2,%xmm0
|
||||
1: .leafepilogue
|
||||
.endfn trunc,globl
|
||||
|
||||
.rodata.cst8
|
||||
2: .quad 0x0010000000000000
|
||||
3: .double nan
|
||||
2: .long 0x00000000
|
||||
.long 0x43300000
|
||||
.rodata.cst16
|
||||
3: .long 0xffffffff
|
||||
.long 0x7fffffff
|
||||
.long 0x00000000
|
||||
.long 0x00000000
|
||||
|
|
|
@ -17,18 +17,28 @@
|
|||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
.source __FILE__
|
||||
|
||||
// Rounds to integer, toward zero.
|
||||
//
|
||||
// @param 𝑥 is float scalar in low quarter of %xmm0
|
||||
// @return float scalar in low quarter of %xmm0
|
||||
// @define trunc(𝑥+copysign(.5,𝑥))
|
||||
// @see round(),rint(),nearbyint()
|
||||
// @see roundss $_MM_FROUND_TO_ZERO|_MM_FROUND_NO_EXC,%xmm0,%xmm0
|
||||
truncf: .leafprologue
|
||||
.profilable
|
||||
movss 3f(%rip),%xmm1
|
||||
andps %xmm0,%xmm1
|
||||
movss 2f(%rip),%xmm2
|
||||
comiss %xmm1,%xmm2
|
||||
movss 3f(%rip),%xmm2
|
||||
movss 2f(%rip),%xmm4
|
||||
movaps %xmm0,%xmm3
|
||||
movaps %xmm0,%xmm1
|
||||
andps %xmm2,%xmm3
|
||||
ucomiss %xmm3,%xmm4
|
||||
jbe 1f
|
||||
cvttss2si %xmm0,%eax
|
||||
cvttss2sil %xmm0,%eax
|
||||
pxor %xmm0,%xmm0
|
||||
cvtsi2ss %eax,%xmm0
|
||||
andnps %xmm1,%xmm2
|
||||
cvtsi2ssl %eax,%xmm0
|
||||
orps %xmm2,%xmm0
|
||||
1: .leafepilogue
|
||||
.endfn truncf,globl
|
||||
|
||||
|
@ -36,6 +46,3 @@ truncf: .leafprologue
|
|||
2: .long 0x4b000000
|
||||
.rodata.cst16
|
||||
3: .long 0x7fffffff,0,0,0
|
||||
|
||||
// TODO(jart)
|
||||
// roundss $_MM_FROUND_TO_ZERO|_MM_FROUND_NO_EXC,%xmm0,%xmm0
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue