cosmopolitan/libc/nexgen32e/div10.greg.S

120 lines
3.7 KiB
ArmAsm
Raw Normal View History

/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
2020-06-15 14:18:57 +00:00
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify │
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License. │
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of │
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software │
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/macros.h"
/ Performs 128-bit div+mod by 10 without using div or mod.
/
/ If we didn't have this one-off function, our palandprintf()
/ implementation would cause nearly everything to need a soft
/ math library. It also somehow goes faster than 64-bit IDIV.
/
/ @param rdi:rsi is the number
/ @param rdx points to where remainder goes
/ @return rax:rdx is result of division
/ @see “Division by Invariant Integers using Multiplication”
/ @see llog10() and div10int64() is a tiny bit faster
div10: .leafprologue
.profilable
push %rbx
mov %rdx,%r8
test %rsi,%rsi
je 1f
bsr %rsi,%r10
xor $63,%r10d
mov $125,%r9d
sub %r10d,%r9d
cmp $64,%r9d
jne 6f
xor %eax,%eax
xor %r11d,%r11d
jmp 9f
1: test %r8,%r8
je 3f
movabs $0xcccccccccccccccd,%rcx
mov %rdi,%rax
mul %rcx
shr $3,%rdx
add %edx,%edx
lea (%rdx,%rdx,4),%eax
mov %edi,%ecx
sub %eax,%ecx
mov %ecx,(%r8)
3: movabs $0xcccccccccccccccd,%rcx
mov %rdi,%rax
mul %rcx
mov %rdx,%rax
shr $3,%rax
xor %edi,%edi
jmp 14f
6: mov %r9d,%ecx
neg %cl
cmp $62,%r10d
jb 8f
mov %rdi,%rdx
shl %cl,%rdx
mov %rsi,%rax
mov %r9d,%ecx
shr %cl,%rax
shrd %cl,%rsi,%rdi
xor %r11d,%r11d
mov %rdi,%rsi
mov %rdx,%rdi
jmp 9f
8: mov %rdi,%r11
shl %cl,%r11
mov %rsi,%rax
shl %cl,%rax
mov %r9d,%ecx
shr %cl,%rdi
or %rax,%rdi
shr %cl,%rsi
xor %eax,%eax
9: add $-125,%r10d
xor %ecx,%ecx
mov $9,%r9d
10: shld $1,%rsi,%rax
shld $1,%rdi,%rsi
shld $1,%r11,%rdi
mov %r11,%rdx
add %r11,%rdx
mov %rcx,%r11
or %rdx,%r11
cmp %rsi,%r9
mov $0,%ebx
sbb %rax,%rbx
sar $63,%rbx
mov %ebx,%ecx
and $1,%ecx
and $10,%ebx
sub %rbx,%rsi
sbb $0,%rax
inc %r10d
jne 10b
test %r8,%r8
je 13f
mov %esi,(%r8)
13: lea (%rcx,%r11,2),%rax
shld $1,%rdx,%rdi
14: mov %rdi,%rdx
pop %rbx
.leafepilogue
.endfn div10,globl,hidden
.source __FILE__