mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-07-12 05:59:10 +00:00
Get codebase completely working with LLVM
You can now build Cosmopolitan with Clang: make -j8 MODE=llvm o/llvm/examples/hello.com The assembler and linker code is now friendly to LLVM too. So it's not needed to configure Clang to use binutils under the hood. If you love LLVM then you can now use pure LLVM.
This commit is contained in:
parent
0e36cb3ac4
commit
e75ffde09e
4528 changed files with 7776 additions and 11640 deletions
|
@ -18,21 +18,21 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Returns binary logarithm of integer 𝑥.
|
||||
/
|
||||
/ uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥)
|
||||
/ 0x00000000 wut 32 0 wut 32
|
||||
/ 0x00000001 0 0 1 0 31
|
||||
/ 0x80000001 0 0 1 31 0
|
||||
/ 0x80000000 31 31 32 31 0
|
||||
/ 0x00000010 4 4 5 4 27
|
||||
/ 0x08000010 4 4 5 27 4
|
||||
/ 0x08000000 27 27 28 27 4
|
||||
/ 0xffffffff 0 0 1 31 0
|
||||
/
|
||||
/ @param rsi:rdi is 128-bit unsigned 𝑥 value
|
||||
/ @return eax number in range [0,128) or undef if 𝑥 is 0
|
||||
/ @see also treasure trove of nearly identical functions
|
||||
// Returns binary logarithm of integer 𝑥.
|
||||
//
|
||||
// uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥)
|
||||
// 0x00000000 wut 32 0 wut 32
|
||||
// 0x00000001 0 0 1 0 31
|
||||
// 0x80000001 0 0 1 31 0
|
||||
// 0x80000000 31 31 32 31 0
|
||||
// 0x00000010 4 4 5 4 27
|
||||
// 0x08000010 4 4 5 27 4
|
||||
// 0x08000000 27 27 28 27 4
|
||||
// 0xffffffff 0 0 1 31 0
|
||||
//
|
||||
// @param rsi:rdi is 128-bit unsigned 𝑥 value
|
||||
// @return eax number in range [0,128) or undef if 𝑥 is 0
|
||||
// @see also treasure trove of nearly identical functions
|
||||
bsrmax: .leafprologue
|
||||
.profilable
|
||||
bsr %rsi,%rax
|
||||
|
|
|
@ -18,15 +18,15 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Sets memory to zero.
|
||||
/
|
||||
/ C code should always favor memset(), since that's the one we've
|
||||
/ prototyped with the best optimizations. This definition is used
|
||||
/ by old code and sometimes code generators, as a thunk.
|
||||
/
|
||||
/ @param rdi is dest
|
||||
/ @param rsi is the number of bytes to set
|
||||
/ @see memset(), explicit_bzero()
|
||||
// Sets memory to zero.
|
||||
//
|
||||
// C code should always favor memset(), since that's the one we've
|
||||
// prototyped with the best optimizations. This definition is used
|
||||
// by old code and sometimes code generators, as a thunk.
|
||||
//
|
||||
// @param rdi is dest
|
||||
// @param rsi is the number of bytes to set
|
||||
// @see memset(), explicit_bzero()
|
||||
bzero: mov %rsi,%rdx
|
||||
xor %esi,%esi
|
||||
jmp MemSet
|
||||
|
|
|
@ -19,19 +19,19 @@
|
|||
#include "ape/relocations.h"
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Escapes byte for string literal.
|
||||
/
|
||||
/ This turns stuff like (char)0xFF into \0377. The returned
|
||||
/ string is word-encoded, e.g. '\\'|'0'<<010|'3'<<020|etc.
|
||||
/
|
||||
/ @param dil contains byte to escape
|
||||
/ @see libc/nexgen32e/cescapec.c
|
||||
// Escapes byte for string literal.
|
||||
//
|
||||
// This turns stuff like (char)0xFF into \0377. The returned
|
||||
// string is word-encoded, e.g. '\\'|'0'<<010|'3'<<020|etc.
|
||||
//
|
||||
// @param dil contains byte to escape
|
||||
// @see libc/nexgen32e/cescapec.c
|
||||
cescapec:
|
||||
movzbl %dil,%edi
|
||||
lea -7(%rdi),%ecx
|
||||
cmp $85,%cl
|
||||
ja 1f
|
||||
mov $'\\,%eax
|
||||
mov $'\\',%eax
|
||||
movzbl %cl,%ecx
|
||||
jmp *cescapectab(,%rcx,8)
|
||||
.Lanchorpoint:
|
||||
|
@ -74,7 +74,7 @@ cescapec:
|
|||
shl $24,%edi
|
||||
or %ecx,%edi
|
||||
lea (%rdi,%rax,4),%eax
|
||||
add $'0<<030|'0<<020|'0<<010|'\\,%eax
|
||||
add $'0'<<030|'0'<<020|'0'<<010|'\\',%eax
|
||||
ret
|
||||
.endfn cescapec,globl
|
||||
|
||||
|
@ -87,13 +87,13 @@ cescapectab.ro:
|
|||
.byte 1,.LVT-.Lanchorpoint
|
||||
.byte 1,.LFF-.Lanchorpoint
|
||||
.byte 1,.LCR-.Lanchorpoint
|
||||
.byte '\"-'\r-1,1b-.Lanchorpoint
|
||||
.byte 20,1b-.Lanchorpoint
|
||||
.byte 1,.LDQ-.Lanchorpoint
|
||||
.byte '\'-'\"-1,1b-.Lanchorpoint
|
||||
.byte '\''-'\"'-1,1b-.Lanchorpoint
|
||||
.byte 1,.LSQ-.Lanchorpoint
|
||||
.byte '?-'\'-1,1b-.Lanchorpoint
|
||||
.byte '?'-'\''-1,1b-.Lanchorpoint
|
||||
.byte 1,.LQM-.Lanchorpoint
|
||||
.byte '\\-'?-1,1b-.Lanchorpoint
|
||||
.byte '\\'-'?'-1,1b-.Lanchorpoint
|
||||
.byte 1,.LBSL-.Lanchorpoint
|
||||
.equ .Lcescapectab.ro.size,.-cescapectab.ro
|
||||
.org 8 - .Lcescapectab.ro.size % 8 + .
|
||||
|
@ -102,7 +102,7 @@ cescapectab.ro:
|
|||
|
||||
.initbss 300,_init_cescapec
|
||||
cescapectab:
|
||||
.rept '\\-7+1
|
||||
.rept '\\'-7+1
|
||||
.quad 0
|
||||
.endr
|
||||
.endobj cescapectab
|
||||
|
|
|
@ -18,11 +18,11 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Compares 8-bit signed integers.
|
||||
/
|
||||
/ @param rdi points to left integer
|
||||
/ @param rsi points to right integer
|
||||
/ @return <0, 0, or >0 based on comparison
|
||||
// Compares 8-bit signed integers.
|
||||
//
|
||||
// @param rdi points to left integer
|
||||
// @param rsi points to right integer
|
||||
// @return <0, 0, or >0 based on comparison
|
||||
.align 16
|
||||
cmpsb: .leafprologue
|
||||
.profilable
|
||||
|
|
|
@ -18,19 +18,19 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Compares 32-bit signed integers.
|
||||
/
|
||||
/ @param rdi points to left integer
|
||||
/ @param rsi points to right integer
|
||||
/ @return <0, 0, or >0 based on comparison
|
||||
// Compares 32-bit signed integers.
|
||||
//
|
||||
// @param rdi points to left integer
|
||||
// @param rsi points to right integer
|
||||
// @return <0, 0, or >0 based on comparison
|
||||
.align 16
|
||||
cmpsl: .leafprologue
|
||||
.profilable
|
||||
xor %eax,%eax
|
||||
cmpsl
|
||||
/ mov (%rdi),%edi
|
||||
/ mov (%rsi),%esi
|
||||
/ cmp %edi,%esi
|
||||
// mov (%rdi),%edi
|
||||
// mov (%rsi),%esi
|
||||
// cmp %edi,%esi
|
||||
setl %al
|
||||
cmovg .Lneg1(%rip),%eax
|
||||
.leafepilogue
|
||||
|
|
|
@ -18,11 +18,11 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Compares 64-bit signed integers.
|
||||
/
|
||||
/ @param rdi points to left integer
|
||||
/ @param rsi points to right integer
|
||||
/ @return <0, 0, or >0 based on comparison
|
||||
// Compares 64-bit signed integers.
|
||||
//
|
||||
// @param rdi points to left integer
|
||||
// @param rsi points to right integer
|
||||
// @return <0, 0, or >0 based on comparison
|
||||
.align 16
|
||||
cmpsq: .leafprologue
|
||||
.profilable
|
||||
|
|
|
@ -18,11 +18,11 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Compares 16-bit signed integers.
|
||||
/
|
||||
/ @param rdi points to left integer
|
||||
/ @param rsi points to right integer
|
||||
/ @return <0, 0, or >0 based on comparison
|
||||
// Compares 16-bit signed integers.
|
||||
//
|
||||
// @param rdi points to left integer
|
||||
// @param rsi points to right integer
|
||||
// @return <0, 0, or >0 based on comparison
|
||||
.align 16
|
||||
cmpsw: .leafprologue
|
||||
.profilable
|
||||
|
|
|
@ -18,11 +18,11 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Compares 8-bit unsigned integers.
|
||||
/
|
||||
/ @param rdi points to left integer
|
||||
/ @param rsi points to right integer
|
||||
/ @return <0, 0, or >0 based on comparison
|
||||
// Compares 8-bit unsigned integers.
|
||||
//
|
||||
// @param rdi points to left integer
|
||||
// @param rsi points to right integer
|
||||
// @return <0, 0, or >0 based on comparison
|
||||
.align 16
|
||||
cmpub: .leafprologue
|
||||
.profilable
|
||||
|
|
|
@ -18,11 +18,11 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Compares 32-bit unsigned integers.
|
||||
/
|
||||
/ @param rdi points to left integer
|
||||
/ @param rsi points to right integer
|
||||
/ @return <0, 0, or >0 based on comparison
|
||||
// Compares 32-bit unsigned integers.
|
||||
//
|
||||
// @param rdi points to left integer
|
||||
// @param rsi points to right integer
|
||||
// @return <0, 0, or >0 based on comparison
|
||||
.align 16
|
||||
cmpul: .leafprologue
|
||||
.profilable
|
||||
|
|
|
@ -18,11 +18,11 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Compares 64-bit unsigned integers.
|
||||
/
|
||||
/ @param rdi points to left integer
|
||||
/ @param rsi points to right integer
|
||||
/ @return <0, 0, or >0 based on comparison
|
||||
// Compares 64-bit unsigned integers.
|
||||
//
|
||||
// @param rdi points to left integer
|
||||
// @param rsi points to right integer
|
||||
// @return <0, 0, or >0 based on comparison
|
||||
.align 16
|
||||
cmpuq: .leafprologue
|
||||
.profilable
|
||||
|
|
|
@ -18,11 +18,11 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Compares 16-bit unsigned integers.
|
||||
/
|
||||
/ @param rdi points to left integer
|
||||
/ @param rsi points to right integer
|
||||
/ @return <0, 0, or >0 based on comparison
|
||||
// Compares 16-bit unsigned integers.
|
||||
//
|
||||
// @param rdi points to left integer
|
||||
// @param rsi points to right integer
|
||||
// @return <0, 0, or >0 based on comparison
|
||||
.align 16
|
||||
cmpuw: .leafprologue
|
||||
.profilable
|
||||
|
|
|
@ -18,18 +18,18 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Computes Phil Katz CRC-32 w/ carryless multiply isa.
|
||||
/
|
||||
/ This is support code that's abstracted by crc32_z().
|
||||
/
|
||||
/ @param edi is initial value
|
||||
/ @param rsi points to buffer
|
||||
/ @param rdx is bytes in buffer that's >=64 and %16==0
|
||||
/ @return eax is crc32
|
||||
/ @note needs Westmere (c.2010) or Bulldozer (c.2011)
|
||||
/ @see “Fast CRC Computation for Generic Polynomials Using
|
||||
/ PCLMULQDQ Instruction” V. Gopal, E. Ozturk, et al.,
|
||||
/ 2009, intel.ly/2ySEwL0
|
||||
// Computes Phil Katz CRC-32 w/ carryless multiply isa.
|
||||
//
|
||||
// This is support code that's abstracted by crc32_z().
|
||||
//
|
||||
// @param edi is initial value
|
||||
// @param rsi points to buffer
|
||||
// @param rdx is bytes in buffer that's >=64 and %16==0
|
||||
// @return eax is crc32
|
||||
// @note needs Westmere (c.2010) or Bulldozer (c.2011)
|
||||
// @see “Fast CRC Computation for Generic Polynomials Using
|
||||
// PCLMULQDQ Instruction” V. Gopal, E. Ozturk, et al.,
|
||||
// 2009, intel.ly/2ySEwL0
|
||||
crc32_pclmul:
|
||||
.leafprologue
|
||||
.profilable
|
||||
|
@ -139,8 +139,8 @@ crc32_pclmul:
|
|||
.endfn crc32_pclmul,globl,hidden
|
||||
.source __FILE__
|
||||
|
||||
/ Definitions of the bit-reflected domain constants k1,k2,k3, etc.
|
||||
/ and the CRC32+Barrett polynomials given at the end of the paper.
|
||||
// Definitions of the bit-reflected domain constants k1,k2,k3, etc.
|
||||
// and the CRC32+Barrett polynomials given at the end of the paper.
|
||||
.rodata.cst16
|
||||
.Lk1k2: .quad 0x0000000154442bd4
|
||||
.quad 0x00000001c6e41596
|
||||
|
|
|
@ -18,22 +18,22 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Generates lookup table for computing CRC-32 byte-by-byte.
|
||||
/
|
||||
/ void crc32init(uint32_t table[256], uint32_t polynomial) {
|
||||
/ uint32_t d, i, r;
|
||||
/ for (d = 0; d < 256; ++d) {
|
||||
/ r = d;
|
||||
/ for (i = 0; i < 8; ++i) {
|
||||
/ r = r >> 1 ^ (r & 1 ? polynomial : 0);
|
||||
/ }
|
||||
/ table[d] = r;
|
||||
/ }
|
||||
/ }
|
||||
/
|
||||
/ @param rdi is pointer to uint32_t[256] array
|
||||
/ @param esi 32-bit binary polynomial config
|
||||
/ @note imposes ~300ns one-time cost
|
||||
// Generates lookup table for computing CRC-32 byte-by-byte.
|
||||
//
|
||||
// void crc32init(uint32_t table[256], uint32_t polynomial) {
|
||||
// uint32_t d, i, r;
|
||||
// for (d = 0; d < 256; ++d) {
|
||||
// r = d;
|
||||
// for (i = 0; i < 8; ++i) {
|
||||
// r = r >> 1 ^ (r & 1 ? polynomial : 0);
|
||||
// }
|
||||
// table[d] = r;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// @param rdi is pointer to uint32_t[256] array
|
||||
// @param esi 32-bit binary polynomial config
|
||||
// @note imposes ~300ns one-time cost
|
||||
crc32init:
|
||||
push %rbp
|
||||
mov %rsp,%rbp
|
||||
|
|
|
@ -18,10 +18,10 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Divides 64-bit signed integer by 1,000,000,000.
|
||||
/
|
||||
/ @param rdi is number to divide
|
||||
/ @return quotient
|
||||
// Divides 64-bit signed integer by 1,000,000,000.
|
||||
//
|
||||
// @param rdi is number to divide
|
||||
// @return quotient
|
||||
div1000000000int64:
|
||||
mov $0x1a,%cl
|
||||
movabs $0x112e0be826d694b3,%rdx
|
||||
|
|
|
@ -18,10 +18,10 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Divides 64-bit signed integer by 1,000,000.
|
||||
/
|
||||
/ @param rdi is number to divide
|
||||
/ @return quotient
|
||||
// Divides 64-bit signed integer by 1,000,000.
|
||||
//
|
||||
// @param rdi is number to divide
|
||||
// @return quotient
|
||||
div1000000int64:
|
||||
mov $0x12,%cl
|
||||
movabs $0x431bde82d7b634db,%rdx
|
||||
|
|
|
@ -18,10 +18,10 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Divides 64-bit signed integer by 10,000.
|
||||
/
|
||||
/ @param rdi is number to divide
|
||||
/ @return truncated quotient
|
||||
// Divides 64-bit signed integer by 10,000.
|
||||
//
|
||||
// @param rdi is number to divide
|
||||
// @return truncated quotient
|
||||
div10000int64:
|
||||
mov $11,%cl
|
||||
movabs $0x346dc5d63886594b,%rdx
|
||||
|
|
|
@ -18,10 +18,10 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Divides 64-bit signed integer by 1,000.
|
||||
/
|
||||
/ @param rdi is number to divide
|
||||
/ @return quotient
|
||||
// Divides 64-bit signed integer by 1,000.
|
||||
//
|
||||
// @param rdi is number to divide
|
||||
// @return quotient
|
||||
div1000int64:
|
||||
mov $0x7,%cl
|
||||
movabs $0x20c49ba5e353f7cf,%rdx
|
||||
|
|
|
@ -18,10 +18,10 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Divides 64-bit signed integer by 100.
|
||||
/
|
||||
/ @param rdi is number to divide
|
||||
/ @return rax has quotient
|
||||
// Divides 64-bit signed integer by 100.
|
||||
//
|
||||
// @param rdi is number to divide
|
||||
// @return rax has quotient
|
||||
div100int64:
|
||||
mov %rdi,%rax
|
||||
movabs $-6640827866535438581,%rdx
|
||||
|
|
|
@ -18,10 +18,10 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Divides 64-bit signed integer by 10.
|
||||
/
|
||||
/ @param rdi is number to divide
|
||||
/ @return quotient
|
||||
// Divides 64-bit signed integer by 10.
|
||||
//
|
||||
// @param rdi is number to divide
|
||||
// @return quotient
|
||||
div10int64:
|
||||
mov $2,%cl
|
||||
movabs $0x6666666666666667,%rdx
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
#include "libc/macros.h"
|
||||
.source __FILE__
|
||||
|
||||
/ D.J. Bernstein's outrageously fast integer sorting algorithm.
|
||||
/
|
||||
/ @param rdi is int32 array
|
||||
/ @param rsi is number of elements in rdi
|
||||
/ @note public domain
|
||||
/ @see en.wikipedia.org/wiki/Sorting_network
|
||||
// D.J. Bernstein's outrageously fast integer sorting algorithm.
|
||||
//
|
||||
// @param rdi is int32 array
|
||||
// @param rsi is number of elements in rdi
|
||||
// @note public domain
|
||||
// @see en.wikipedia.org/wiki/Sorting_network
|
||||
djbsort_avx2:
|
||||
push %rbp
|
||||
mov %rsp,%rbp
|
||||
|
@ -1115,7 +1115,7 @@ int32_sort_2power:
|
|||
vpxor 32(%rdi),%ymm0,%ymm2
|
||||
vpxor (%rdi),%ymm0,%ymm0
|
||||
vmovdqa .LC1(%rip),%ymm4
|
||||
cmp $0,-116(%rbp)
|
||||
cmpl $0,-116(%rbp)
|
||||
vpunpckldq %ymm2,%ymm0,%ymm1
|
||||
vpunpckhdq %ymm2,%ymm0,%ymm0
|
||||
vpunpcklqdq %ymm0,%ymm1,%ymm3
|
||||
|
@ -1179,7 +1179,7 @@ int32_sort_2power:
|
|||
mov $16,%esi
|
||||
mov %r13,%rdi
|
||||
call int32_sort_2power
|
||||
cmp $0,-116(%rbp)
|
||||
cmpl $0,-116(%rbp)
|
||||
vmovdqu (%r12),%ymm4
|
||||
vmovdqu 32(%r12),%ymm1
|
||||
vmovdqu 64(%r12),%ymm2
|
||||
|
@ -1750,7 +1750,7 @@ int32_sort_2power:
|
|||
vpunpckhdq 160(%rax),%ymm7,%ymm0
|
||||
vpunpcklqdq %ymm2,%ymm12,%ymm8
|
||||
vpunpcklqdq %ymm4,%ymm6,%ymm9
|
||||
cmp $0,-116(%rbp)
|
||||
cmpl $0,-116(%rbp)
|
||||
vmovdqu 192(%rax),%ymm7
|
||||
vpunpckhqdq %ymm2,%ymm12,%ymm12
|
||||
vpunpckhqdq %ymm4,%ymm6,%ymm4
|
||||
|
@ -1837,7 +1837,7 @@ int32_sort_2power:
|
|||
vmovdqu %ymm2,-64(%rax)
|
||||
vmovdqu %ymm0,-32(%rax)
|
||||
jmp .L85
|
||||
.L142: cmp $32,-112(%rbp)
|
||||
.L142: cmpq $32,-112(%rbp)
|
||||
jne .L94
|
||||
.L93: mov -112(%rbp),%rcx
|
||||
sar $2,%rcx
|
||||
|
@ -1871,7 +1871,7 @@ int32_sort_2power:
|
|||
cmp %rax,%r15
|
||||
jg .L92
|
||||
sarq $3,-112(%rbp)
|
||||
.L89: cmp $127,-112(%rbp)
|
||||
.L89: cmpq $127,-112(%rbp)
|
||||
jle .L142
|
||||
jmp .L93
|
||||
.L92: cmp -136(%rbp),%rax
|
||||
|
@ -1925,7 +1925,7 @@ int32_sort_2power:
|
|||
add %rdx,-136(%rbp)
|
||||
jmp .L90
|
||||
.L145: sarq $2,-112(%rbp)
|
||||
.L94: cmp $15,-112(%rbp)
|
||||
.L94: cmpq $15,-112(%rbp)
|
||||
jle .L144
|
||||
mov -112(%rbp),%rcx
|
||||
xor %esi,%esi
|
||||
|
@ -1962,7 +1962,7 @@ int32_sort_2power:
|
|||
.L146: add %rdx,%rsi
|
||||
add %rdx,%rcx
|
||||
jmp .L95
|
||||
.L144: cmp $8,-112(%rbp)
|
||||
.L144: cmpq $8,-112(%rbp)
|
||||
je .L111
|
||||
.L102: mov -152(%rbp),%rdx
|
||||
add -128(%rbp),%rdx
|
||||
|
@ -1997,7 +1997,7 @@ int32_sort_2power:
|
|||
vpmaxsd %ymm3,%ymm5,%ymm3
|
||||
vpminsd (%rdi),%ymm7,%ymm1
|
||||
vpminsd %ymm2,%ymm4,%ymm10
|
||||
cmp $0,-116(%rbp)
|
||||
cmpl $0,-116(%rbp)
|
||||
vpmaxsd (%rdi),%ymm7,%ymm0
|
||||
vmovdqu (%rsi),%ymm7
|
||||
vpmaxsd %ymm2,%ymm4,%ymm2
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
#include "libc/macros.h"
|
||||
#include "libc/notice.inc"
|
||||
|
||||
/ Environment variable pointer list.
|
||||
// Environment variable pointer list.
|
||||
.bss
|
||||
.align 8
|
||||
environ:
|
||||
|
|
|
@ -19,16 +19,16 @@
|
|||
#include "libc/macros.h"
|
||||
.source __FILE__
|
||||
|
||||
/ Global variable for last error.
|
||||
/
|
||||
/ The system call wrappers update this with WIN32 error codes.
|
||||
/ Unlike traditional libraries, Cosmopolitan error codes are
|
||||
/ defined as variables. By convention, system calls and other
|
||||
/ functions do not update this variable when nothing's broken.
|
||||
/
|
||||
/ @see libc/sysv/consts.sh
|
||||
/ @see libc/sysv/errfuns.h
|
||||
/ @see __errno_location() stable abi
|
||||
// Global variable for last error.
|
||||
//
|
||||
// The system call wrappers update this with WIN32 error codes.
|
||||
// Unlike traditional libraries, Cosmopolitan error codes are
|
||||
// defined as variables. By convention, system calls and other
|
||||
// functions do not update this variable when nothing's broken.
|
||||
//
|
||||
// @see libc/sysv/consts.sh
|
||||
// @see libc/sysv/errfuns.h
|
||||
// @see __errno_location() stable abi
|
||||
.bss
|
||||
.align 4
|
||||
errno: .long 0
|
||||
|
|
|
@ -20,8 +20,8 @@
|
|||
.source __FILE__
|
||||
.keep.text # gdb needs it
|
||||
|
||||
/ Returns address of errno variable.
|
||||
/ @note this isn't a universal definition
|
||||
// Returns address of errno variable.
|
||||
// @note this isn't a universal definition
|
||||
__errno_location:
|
||||
ezlea errno,ax
|
||||
ret
|
||||
|
|
|
@ -18,14 +18,14 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Sets memory to zero w/ accompanying non-optimizing macro.
|
||||
/
|
||||
/ This is intended for security-conscious applications. This
|
||||
/ implementation also hoses every register the abi allows. A
|
||||
/ concomitant prototype (str.h) countermands compiler magic.
|
||||
/
|
||||
/ @param rdi is dest
|
||||
/ @param rsi is the number of bytes to set
|
||||
// Sets memory to zero w/ accompanying non-optimizing macro.
|
||||
//
|
||||
// This is intended for security-conscious applications. This
|
||||
// implementation also hoses every register the abi allows. A
|
||||
// concomitant prototype (str.h) countermands compiler magic.
|
||||
//
|
||||
// @param rdi is dest
|
||||
// @param rsi is the number of bytes to set
|
||||
explicit_bzero:
|
||||
.leafprologue
|
||||
mov %rsi,%rcx
|
||||
|
|
|
@ -21,11 +21,11 @@
|
|||
.source __FILE__
|
||||
.code16 # ∩ .code32 ∩ .code64
|
||||
|
||||
/ Function entry hook stub.
|
||||
/
|
||||
/ @note cc -pg -mfentry adds this to the start of every function
|
||||
/ @see libc/log/shadowargs.ncabi.c
|
||||
/ @mode long,legacy,real
|
||||
// Function entry hook stub.
|
||||
//
|
||||
// @note cc -pg -mfentry adds this to the start of every function
|
||||
// @see libc/log/shadowargs.ncabi.c
|
||||
// @mode long,legacy,real
|
||||
__fentry__:
|
||||
ret
|
||||
.endfn __fentry__,weak
|
||||
|
|
|
@ -18,22 +18,22 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Finds lowest set bit in word.
|
||||
/
|
||||
/ uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥)
|
||||
/ 0x00000000 wut 32 0 wut 32
|
||||
/ 0x00000001 0 0 1 0 31
|
||||
/ 0x80000001 0 0 1 31 0
|
||||
/ 0x80000000 31 31 32 31 0
|
||||
/ 0x00000010 4 4 5 4 27
|
||||
/ 0x08000010 4 4 5 27 4
|
||||
/ 0x08000000 27 27 28 27 4
|
||||
/ 0xffffffff 0 0 1 31 0
|
||||
/
|
||||
/ @param edi is the input number
|
||||
/ @return number in range [1,32] or 0 if no bits set
|
||||
/ @see also treasure trove of nearly identical functions
|
||||
/ @asyncsignalsafe
|
||||
// Finds lowest set bit in word.
|
||||
//
|
||||
// uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥)
|
||||
// 0x00000000 wut 32 0 wut 32
|
||||
// 0x00000001 0 0 1 0 31
|
||||
// 0x80000001 0 0 1 31 0
|
||||
// 0x80000000 31 31 32 31 0
|
||||
// 0x00000010 4 4 5 4 27
|
||||
// 0x08000010 4 4 5 27 4
|
||||
// 0x08000000 27 27 28 27 4
|
||||
// 0xffffffff 0 0 1 31 0
|
||||
//
|
||||
// @param edi is the input number
|
||||
// @return number in range [1,32] or 0 if no bits set
|
||||
// @see also treasure trove of nearly identical functions
|
||||
// @asyncsignalsafe
|
||||
ffs: .leafprologue
|
||||
.profilable
|
||||
or $-1,%edx
|
||||
|
|
|
@ -18,22 +18,22 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Finds lowest set bit in word.
|
||||
/
|
||||
/ uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥)
|
||||
/ 0x00000000 wut 32 0 wut 32
|
||||
/ 0x00000001 0 0 1 0 31
|
||||
/ 0x80000001 0 0 1 31 0
|
||||
/ 0x80000000 31 31 32 31 0
|
||||
/ 0x00000010 4 4 5 4 27
|
||||
/ 0x08000010 4 4 5 27 4
|
||||
/ 0x08000000 27 27 28 27 4
|
||||
/ 0xffffffff 0 0 1 31 0
|
||||
/
|
||||
/ @param rdi is the input number
|
||||
/ @return number in range [1,64] or 0 if no bits set
|
||||
/ @see also treasure trove of nearly identical functions
|
||||
/ @asyncsignalsafe
|
||||
// Finds lowest set bit in word.
|
||||
//
|
||||
// uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥)
|
||||
// 0x00000000 wut 32 0 wut 32
|
||||
// 0x00000001 0 0 1 0 31
|
||||
// 0x80000001 0 0 1 31 0
|
||||
// 0x80000000 31 31 32 31 0
|
||||
// 0x00000010 4 4 5 4 27
|
||||
// 0x08000010 4 4 5 27 4
|
||||
// 0x08000000 27 27 28 27 4
|
||||
// 0xffffffff 0 0 1 31 0
|
||||
//
|
||||
// @param rdi is the input number
|
||||
// @return number in range [1,64] or 0 if no bits set
|
||||
// @see also treasure trove of nearly identical functions
|
||||
// @asyncsignalsafe
|
||||
ffsl: .leafprologue
|
||||
.profilable
|
||||
or $-1,%edx
|
||||
|
|
|
@ -21,17 +21,17 @@
|
|||
#include "libc/notice.inc"
|
||||
#define INITIAL_CAPACITY 4
|
||||
|
||||
/ Invokes deferred function calls.
|
||||
/
|
||||
/ This offers behavior similar to std::unique_ptr. Functions
|
||||
/ overwrite their return addresses jumping here, and pushing
|
||||
/ exactly one entry on the shadow stack below. Functions may
|
||||
/ repeat that process multiple times, in which case the body
|
||||
/ of this gadget loops and unwinds as a natural consequence.
|
||||
/
|
||||
/ @param rax,rdx,xmm0,xmm1,st0,st1 is return value
|
||||
/ @see test/libc/runtime/gc_test.c
|
||||
/ <LIMBO>
|
||||
// Invokes deferred function calls.
|
||||
//
|
||||
// This offers behavior similar to std::unique_ptr. Functions
|
||||
// overwrite their return addresses jumping here, and pushing
|
||||
// exactly one entry on the shadow stack below. Functions may
|
||||
// repeat that process multiple times, in which case the body
|
||||
// of this gadget loops and unwinds as a natural consequence.
|
||||
//
|
||||
// @param rax,rdx,xmm0,xmm1,st0,st1 is return value
|
||||
// @see test/libc/runtime/gc_test.c
|
||||
// <LIMBO>
|
||||
__gc: decq __garbage(%rip)
|
||||
mov __garbage(%rip),%r8
|
||||
mov __garbage+16(%rip),%r9
|
||||
|
@ -41,7 +41,7 @@ __gc: decq __garbage(%rip)
|
|||
mov 8(%r8),%r9
|
||||
mov 16(%r8),%rdi
|
||||
push 24(%r8)
|
||||
/ </LIMBO>
|
||||
// </LIMBO>
|
||||
push %rbp
|
||||
mov %rsp,%rbp
|
||||
sub $0x20,%rsp
|
||||
|
|
|
@ -4,19 +4,22 @@
|
|||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
struct Garbages {
|
||||
size_t i, n;
|
||||
struct Garbage {
|
||||
struct StackFrame *frame;
|
||||
intptr_t fn;
|
||||
intptr_t arg;
|
||||
intptr_t ret;
|
||||
} * p;
|
||||
struct Garbage {
|
||||
struct StackFrame *frame;
|
||||
intptr_t fn;
|
||||
intptr_t arg;
|
||||
intptr_t ret;
|
||||
};
|
||||
|
||||
hidden extern struct Garbages __garbage;
|
||||
struct Garbages {
|
||||
size_t i, n;
|
||||
struct Garbage *p;
|
||||
struct Garbage initmem[1];
|
||||
};
|
||||
|
||||
int64_t __gc(void) hidden;
|
||||
extern struct Garbages __garbage;
|
||||
|
||||
int64_t __gc(void);
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
|
|
|
@ -18,17 +18,17 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Jumps up stack to previous setjmp() invocation.
|
||||
/
|
||||
/ This is the same as longjmp() but also unwinds the stack to free
|
||||
/ memory, etc. that was registered using gc() or defer(). If GC
|
||||
/ isn't linked, this behaves the same as longjmp().
|
||||
/
|
||||
/ @param rdi points to the jmp_buf which must be the same stack
|
||||
/ @param esi is returned by setjmp() invocation (coerced nonzero)
|
||||
/ @assume system five nexgen32e abi conformant
|
||||
/ @see examples/ctrlc.c
|
||||
/ @noreturn
|
||||
// Jumps up stack to previous setjmp() invocation.
|
||||
//
|
||||
// This is the same as longjmp() but also unwinds the stack to free
|
||||
// memory, etc. that was registered using gc() or defer(). If GC
|
||||
// isn't linked, this behaves the same as longjmp().
|
||||
//
|
||||
// @param rdi points to the jmp_buf which must be the same stack
|
||||
// @param esi is returned by setjmp() invocation (coerced nonzero)
|
||||
// @assume system five nexgen32e abi conformant
|
||||
// @see examples/ctrlc.c
|
||||
// @noreturn
|
||||
gclongjmp:
|
||||
.leafprologue
|
||||
.profilable
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
#include "libc/macros.h"
|
||||
#include "libc/notice.inc"
|
||||
|
||||
/ The identity() function.
|
||||
/ @return first argument
|
||||
// The identity() function.
|
||||
// @return first argument
|
||||
identity:
|
||||
mov %rdi,%rax
|
||||
ret
|
||||
|
|
|
@ -20,11 +20,11 @@
|
|||
.text.startup
|
||||
.align 8
|
||||
|
||||
/ Identity maps 256-byte translation table.
|
||||
/
|
||||
/ @param char (*rdi)[256]
|
||||
/ @speed 90mBps
|
||||
/ @mode long
|
||||
// Identity maps 256-byte translation table.
|
||||
//
|
||||
// @param char (*rdi)[256]
|
||||
// @speed 90mBps
|
||||
// @mode long
|
||||
imapxlatab:
|
||||
.leafprologue
|
||||
.profilable
|
||||
|
|
|
@ -18,29 +18,29 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Base 36 Decoder Table.
|
||||
/
|
||||
/ This supports uppercase and lowercase. For encoding, the string
|
||||
/ 0123456789abcdefghijklmnopqrstuvwxyz can be used, which linkers
|
||||
/ are able to deduplicate.
|
||||
// Base 36 Decoder Table.
|
||||
//
|
||||
// This supports uppercase and lowercase. For encoding, the string
|
||||
// 0123456789abcdefghijklmnopqrstuvwxyz can be used, which linkers
|
||||
// are able to deduplicate.
|
||||
.initbss 300,_init_kBase36
|
||||
kBase36:.zero 256
|
||||
.endobj kBase36,globl,hidden
|
||||
.previous
|
||||
|
||||
.init.start 300,_init_kBase36
|
||||
add $'0,%rdi
|
||||
add $'0',%rdi
|
||||
xor %eax,%eax
|
||||
pushpop 10,%rcx
|
||||
0: inc %eax
|
||||
stosb
|
||||
.loop 0b
|
||||
add $'A-1-'9,%rdi
|
||||
pushpop 'Z+1-'A,%rcx
|
||||
add $'A'-1-'9',%rdi
|
||||
pushpop 'Z'+1-'A',%rcx
|
||||
0: inc %eax
|
||||
mov %al,0x20(%rdi)
|
||||
stosb
|
||||
.loop 0b
|
||||
add $255-'Z,%rdi
|
||||
add $255-'Z',%rdi
|
||||
.init.end 300,_init_kBase36
|
||||
.source __FILE__
|
||||
|
|
|
@ -21,43 +21,43 @@
|
|||
.align 16
|
||||
.source __FILE__
|
||||
|
||||
/ ibm cp437 unicode table w/ string literal safety
|
||||
/
|
||||
/ ░▄██▒▄█ ▐██ ░░░ ▀▀████▒▀█▄
|
||||
/ ▐███▓██░ ██▌ ▀████▄■█▄
|
||||
/ ▐█▓███▀█░██▀ ░ ░▀█████▓▄
|
||||
/ ▐█▓██▀▄█▒██▀ ▄▄░ ▄▄▄ ░░░ ░▀████▒▄
|
||||
/ ▐████▀▄█■█▀ ▀▀ ░█████░
|
||||
/ ▐█▓█▀████▀ ░ ▐▓███▒
|
||||
/ █░███▀▀ ░░░ ▄█ ░░░ █████
|
||||
/ ▐█▓█░▀▀ ░░▄█▄▄▄▄▄ ▀▄ ▌▄▄▄░▄▄▄▄▄ ▐████░
|
||||
/ ▐███▌ ▄▀█████████▄ ▌▐▄████████▄ ▐▓███░
|
||||
/ ▐███░░░▀▄█▀▄▄████▄▀░ ▐████████▒ ▀ ░███░
|
||||
/ ░████░ ▓▀ ▄███████▀▌ ▀▄■████▀▀█▀ ██▀█
|
||||
/ ▓███░ ░▄▀▀░░░ ▀ ░░▌ ▄▀▀▄░░▀░▄▀▄ ▐██▀▄
|
||||
/ ░███░ ▄▓▓▄▄░▀▀█▀█ ▌░░ ▀█▀█▀▀ ▐██▀
|
||||
/ █▀▄▐██ ▀░░ ▄▀ ▐ █ ▀ ▄▄▄░ ░▀▄█▄▀█
|
||||
/ ▌▄ █▓ ▒ ░ █▄█▄▀▄▄▄███▄▀▄ ░░ ░ ▀ █▌
|
||||
/ █▌▄░▌ ░░░▄▀█▀███████▄▀▄▀▄▀▀▄▄▄ █▀█░▐
|
||||
/ ██▄ ░░░▄█▄▀██▄█■██████▄█▄█▄■▀█░ ▐░▐
|
||||
/ ▀██░ ░▄██████████████████▄█▄█ ░█ ░ ▄▀
|
||||
/ ▀▓█▄▓░░ ▒█▀█████████████████████▒ ██▀
|
||||
/ ▀███ ▓▒ ██████████████▀▀▀▀█▄▀ ░▄█▒
|
||||
/ ▀███ ▀█▄▀▄█████▀▀ ▓▓▓▄░ ▐ ░▄██
|
||||
/ ▀██ ▄███████▄████████▀░░ ░▄██
|
||||
/ ▄██▀▀▄ █▄▀▄██▒▒███████████▀▀▀▄░ ░███░
|
||||
/ ▄██▀▄▄░░▀▐▄████▄ █████▀▄░░█▀▄▀░░ ▄██░
|
||||
/ █████▄▄▄███▀░█▌██▄▀▀█████▄▄░░░▄▄███▀██▄ ▄▀▀▀▄▄
|
||||
/ ▀██████▀■▄█▄▄ ░▀███████████████▓▓░░▄██▀▄████▄▄▀▄
|
||||
/
|
||||
/ █▀█ █ █▀█ █▀█ █▄▀ ▐▀█▀▌█▀█ █▀█ █▄ █ ▀█▀ █▀█ █▀▀
|
||||
/ █▀▄ █ █ █ █ █ ▀▄ █ █▀▄ █ █ █ ▀█ █ █ ▀▀█
|
||||
/ █▄█ █▄▌█▄█ █▄█ █ █ █ █ █ █▄█ █ █ ▄█▄ █▄█ █▄█
|
||||
/
|
||||
/ THERE WILL BE BLOCKS march 01 2017
|
||||
/
|
||||
/ @see libc/str/str.h
|
||||
/ @see kCp437i[]
|
||||
// ibm cp437 unicode table w/ string literal safety
|
||||
//
|
||||
// ░▄██▒▄█ ▐██ ░░░ ▀▀████▒▀█▄
|
||||
// ▐███▓██░ ██▌ ▀████▄■█▄
|
||||
// ▐█▓███▀█░██▀ ░ ░▀█████▓▄
|
||||
// ▐█▓██▀▄█▒██▀ ▄▄░ ▄▄▄ ░░░ ░▀████▒▄
|
||||
// ▐████▀▄█■█▀ ▀▀ ░█████░
|
||||
// ▐█▓█▀████▀ ░ ▐▓███▒
|
||||
// █░███▀▀ ░░░ ▄█ ░░░ █████
|
||||
// ▐█▓█░▀▀ ░░▄█▄▄▄▄▄ ▀▄ ▌▄▄▄░▄▄▄▄▄ ▐████░
|
||||
// ▐███▌ ▄▀█████████▄ ▌▐▄████████▄ ▐▓███░
|
||||
// ▐███░░░▀▄█▀▄▄████▄▀░ ▐████████▒ ▀ ░███░
|
||||
// ░████░ ▓▀ ▄███████▀▌ ▀▄■████▀▀█▀ ██▀█
|
||||
// ▓███░ ░▄▀▀░░░ ▀ ░░▌ ▄▀▀▄░░▀░▄▀▄ ▐██▀▄
|
||||
// ░███░ ▄▓▓▄▄░▀▀█▀█ ▌░░ ▀█▀█▀▀ ▐██▀
|
||||
// █▀▄▐██ ▀░░ ▄▀ ▐ █ ▀ ▄▄▄░ ░▀▄█▄▀█
|
||||
// ▌▄ █▓ ▒ ░ █▄█▄▀▄▄▄███▄▀▄ ░░ ░ ▀ █▌
|
||||
// █▌▄░▌ ░░░▄▀█▀███████▄▀▄▀▄▀▀▄▄▄ █▀█░▐
|
||||
// ██▄ ░░░▄█▄▀██▄█■██████▄█▄█▄■▀█░ ▐░▐
|
||||
// ▀██░ ░▄██████████████████▄█▄█ ░█ ░ ▄▀
|
||||
// ▀▓█▄▓░░ ▒█▀█████████████████████▒ ██▀
|
||||
// ▀███ ▓▒ ██████████████▀▀▀▀█▄▀ ░▄█▒
|
||||
// ▀███ ▀█▄▀▄█████▀▀ ▓▓▓▄░ ▐ ░▄██
|
||||
// ▀██ ▄███████▄████████▀░░ ░▄██
|
||||
// ▄██▀▀▄ █▄▀▄██▒▒███████████▀▀▀▄░ ░███░
|
||||
// ▄██▀▄▄░░▀▐▄████▄ █████▀▄░░█▀▄▀░░ ▄██░
|
||||
// █████▄▄▄███▀░█▌██▄▀▀█████▄▄░░░▄▄███▀██▄ ▄▀▀▀▄▄
|
||||
// ▀██████▀■▄█▄▄ ░▀███████████████▓▓░░▄██▀▄████▄▄▀▄
|
||||
//
|
||||
// █▀█ █ █▀█ █▀█ █▄▀ ▐▀█▀▌█▀█ █▀█ █▄ █ ▀█▀ █▀█ █▀▀
|
||||
// █▀▄ █ █ █ █ █ ▀▄ █ █▀▄ █ █ █ ▀█ █ █ ▀▀█
|
||||
// █▄█ █▄▌█▄█ █▄█ █ █ █ █ █ █▄█ █ █ ▄█▄ █▄█ █▄█
|
||||
//
|
||||
// THERE WILL BE BLOCKS march 01 2017
|
||||
//
|
||||
// @see libc/str/str.h
|
||||
// @see kCp437i[]
|
||||
kCp437:
|
||||
.short 0x00a0,0x263a,0x263b,0x2665,0x2666,0x2663,0x2660,0x2022 #00: ☺☻♥♦♣♠•
|
||||
.short 0x25d8,0x25cb,0x25d9,0x2642,0x2640,0x266a,0x266b,0x263c #08:◘○◙♂♀♪♫☼
|
||||
|
|
|
@ -22,14 +22,14 @@
|
|||
#include "libc/nexgen32e/kcpuids.h"
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
|
||||
/ Globally precomputed CPUID.
|
||||
/
|
||||
/ This module lets us check CPUID in 0.06ns rather than 51.00ns.
|
||||
/ If every piece of native software linked this module, then the
|
||||
/ world would be a much better place; since all the alternatives
|
||||
/ are quite toilsome.
|
||||
/
|
||||
/ @see www.felixcloutier.com/x86/cpuid
|
||||
// Globally precomputed CPUID.
|
||||
//
|
||||
// This module lets us check CPUID in 0.06ns rather than 51.00ns.
|
||||
// If every piece of native software linked this module, then the
|
||||
// world would be a much better place; since all the alternatives
|
||||
// are quite toilsome.
|
||||
//
|
||||
// @see www.felixcloutier.com/x86/cpuid
|
||||
.initbss 201,_init_kCpuids
|
||||
kCpuids:.long 0,0,0,0 # EAX=0 (Basic Processor Info)
|
||||
.long 0,0,0,0 # EAX=1 (Processor Info)
|
||||
|
|
|
@ -18,10 +18,10 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Castagnoli CRC32 ISCSI Polynomial
|
||||
/ x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1
|
||||
/ 0b00011110110111000110111101000001
|
||||
/ bitreverse32(0x1edc6f41)
|
||||
// Castagnoli CRC32 ISCSI Polynomial
|
||||
// x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1
|
||||
// 0b00011110110111000110111101000001
|
||||
// bitreverse32(0x1edc6f41)
|
||||
#define ISCSI_POLYNOMIAL 0x82f63b78
|
||||
|
||||
.initbss 300,_init_kCrc32cTab
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
#include "libc/macros.h"
|
||||
|
||||
.initbss 202,_init_kHalfCache3
|
||||
/ Half size of level 3 cache in bytes.
|
||||
// Half size of level 3 cache in bytes.
|
||||
kHalfCache3:
|
||||
.quad 0
|
||||
.endobj kHalfCache3,globl
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Signal mask constant w/ no signal bits set.
|
||||
// Signal mask constant w/ no signal bits set.
|
||||
.initbss 300,_init_kSigsetEmpty
|
||||
kSigsetEmpty:
|
||||
.rept NSIG / 64
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Signal mask constant w/ every signal bit set.
|
||||
// Signal mask constant w/ every signal bit set.
|
||||
.initbss 300,_init_kSigsetFull
|
||||
kSigsetFull:
|
||||
.rept NSIG / 64
|
||||
|
|
|
@ -18,11 +18,11 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Stores CPU Timestamp Counter at startup.
|
||||
/
|
||||
/ It can be useful as an added source of seeding information.
|
||||
/
|
||||
/ @note rdtsc is a 25 cycle instruction
|
||||
// Stores CPU Timestamp Counter at startup.
|
||||
//
|
||||
// It can be useful as an added source of seeding information.
|
||||
//
|
||||
// @note rdtsc is a 25 cycle instruction
|
||||
.initbss 200,_init_kStartTsc
|
||||
kStartTsc:
|
||||
.quad 0
|
||||
|
|
|
@ -18,12 +18,12 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ ASCII uppercase → lowercase translation tables.
|
||||
/
|
||||
/ char kToLower[256];
|
||||
/ char16_t kToLower16[256];
|
||||
/
|
||||
/ @note kToLower16 saves 128kb; CMOVcc can't even 8-bit
|
||||
// ASCII uppercase → lowercase translation tables.
|
||||
//
|
||||
// char kToLower[256];
|
||||
// char16_t kToLower16[256];
|
||||
//
|
||||
// @note kToLower16 saves 128kb; CMOVcc can't even 8-bit
|
||||
.initbss 300,_init_kToLower
|
||||
kToLower:
|
||||
.rept 256
|
||||
|
@ -43,8 +43,8 @@ kToLower16:
|
|||
xchg %rsi,(%rsp)
|
||||
xor %ecx,%ecx
|
||||
0: inc %ecx
|
||||
addb $0x20,'A-1(%rsi,%rcx)
|
||||
cmp $'Z-'A,%ecx
|
||||
addb $0x20,'A'-1(%rsi,%rcx)
|
||||
cmp $'Z'-'A',%ecx
|
||||
jne 0b
|
||||
xor %eax,%eax
|
||||
mov $256,%ecx
|
||||
|
|
|
@ -18,10 +18,10 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Fast log₁₀ when 𝑥 is an integer.
|
||||
/
|
||||
/ @param rdi is uint64 𝑥
|
||||
/ @domain 0<𝑥<2⁶⁴ ∧ 𝑥∊ℤ
|
||||
// Fast log₁₀ when 𝑥 is an integer.
|
||||
//
|
||||
// @param rdi is uint64 𝑥
|
||||
// @domain 0<𝑥<2⁶⁴ ∧ 𝑥∊ℤ
|
||||
llog10: .leafprologue
|
||||
.profilable
|
||||
bsr %rdi,%rax
|
||||
|
|
|
@ -19,11 +19,12 @@
|
|||
#include "libc/macros.h"
|
||||
.privileged
|
||||
|
||||
/ Loads XMM registers from buffer.
|
||||
/
|
||||
/ @param %rdi points to &(forcealign(16) uint8_t[256])[128]
|
||||
/ @note modern cpus have out-of-order execution engines
|
||||
loadxmm:.leafprologue
|
||||
// Loads XMM registers from buffer.
|
||||
//
|
||||
// @param %rdi points to &(forcealign(16) uint8_t[256])[128]
|
||||
// @note modern cpus have out-of-order execution engines
|
||||
loadxmm:
|
||||
.leafprologue
|
||||
movaps -0x80(%rdi),%xmm0
|
||||
movaps -0x70(%rdi),%xmm1
|
||||
movaps -0x60(%rdi),%xmm2
|
||||
|
|
|
@ -18,16 +18,16 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ @fileoverview Byte-order conversion functions.
|
||||
/
|
||||
/ Endianness is deceptively complicated to the uninitiated. Many
|
||||
/ helpers have been written by our top minds to address perceived
|
||||
/ difficulties. These ones got through standardization processes.
|
||||
/ To protect their legacy, all 19 functions have been implemented
|
||||
/ in just 17 bytes.
|
||||
/
|
||||
/ @see READ32LE(), READ32BE(), etc.
|
||||
/ @asyncsignalsafe
|
||||
// @fileoverview Byte-order conversion functions.
|
||||
//
|
||||
// Endianness is deceptively complicated to the uninitiated. Many
|
||||
// helpers have been written by our top minds to address perceived
|
||||
// difficulties. These ones got through standardization processes.
|
||||
// To protect their legacy, all 19 functions have been implemented
|
||||
// in just 17 bytes.
|
||||
//
|
||||
// @see READ32LE(), READ32BE(), etc.
|
||||
// @asyncsignalsafe
|
||||
|
||||
bswap_64:
|
||||
htobe64:
|
||||
|
|
|
@ -19,14 +19,14 @@
|
|||
#include "libc/macros.h"
|
||||
.privileged
|
||||
|
||||
/ Loads previously saved processor state.
|
||||
/
|
||||
/ @param rdi points to the jmp_buf
|
||||
/ @param esi is returned by setjmp() invocation (coerced nonzero)
|
||||
/ @noreturn
|
||||
/ @assume system five nexgen32e abi conformant
|
||||
/ @note code built w/ microsoft abi compiler can't call this
|
||||
/ @see gclongjmp() unwinds gc() destructors
|
||||
// Loads previously saved processor state.
|
||||
//
|
||||
// @param rdi points to the jmp_buf
|
||||
// @param esi is returned by setjmp() invocation (coerced nonzero)
|
||||
// @noreturn
|
||||
// @assume system five nexgen32e abi conformant
|
||||
// @note code built w/ microsoft abi compiler can't call this
|
||||
// @see gclongjmp() unwinds gc() destructors
|
||||
longjmp:mov %esi,%eax
|
||||
test %eax,%eax
|
||||
jnz 1f
|
||||
|
|
|
@ -18,21 +18,21 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Finds leading bits in 𝑥.
|
||||
/
|
||||
/ uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥)
|
||||
/ 0x00000000 wut 32 0 wut 32
|
||||
/ 0x00000001 0 0 1 0 31
|
||||
/ 0x80000001 0 0 1 31 0
|
||||
/ 0x80000000 31 31 32 31 0
|
||||
/ 0x00000010 4 4 5 4 27
|
||||
/ 0x08000010 4 4 5 27 4
|
||||
/ 0x08000000 27 27 28 27 4
|
||||
/ 0xffffffff 0 0 1 31 0
|
||||
/
|
||||
/ @param edi is 32-bit unsigned 𝑥 value
|
||||
/ @return eax number in range [0,32) or 32 if 𝑥 is 0
|
||||
/ @see also treasure trove of nearly identical functions
|
||||
// Finds leading bits in 𝑥.
|
||||
//
|
||||
// uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥)
|
||||
// 0x00000000 wut 32 0 wut 32
|
||||
// 0x00000001 0 0 1 0 31
|
||||
// 0x80000001 0 0 1 31 0
|
||||
// 0x80000000 31 31 32 31 0
|
||||
// 0x00000010 4 4 5 4 27
|
||||
// 0x08000010 4 4 5 27 4
|
||||
// 0x08000000 27 27 28 27 4
|
||||
// 0xffffffff 0 0 1 31 0
|
||||
//
|
||||
// @param edi is 32-bit unsigned 𝑥 value
|
||||
// @return eax number in range [0,32) or 32 if 𝑥 is 0
|
||||
// @see also treasure trove of nearly identical functions
|
||||
lzcnt: .leafprologue
|
||||
.profilable
|
||||
mov $31,%eax
|
||||
|
|
|
@ -18,21 +18,21 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Finds leading bits in 𝑥.
|
||||
/
|
||||
/ uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥)
|
||||
/ 0x00000000 wut 32 0 wut 32
|
||||
/ 0x00000001 0 0 1 0 31
|
||||
/ 0x80000001 0 0 1 31 0
|
||||
/ 0x80000000 31 31 32 31 0
|
||||
/ 0x00000010 4 4 5 4 27
|
||||
/ 0x08000010 4 4 5 27 4
|
||||
/ 0x08000000 27 27 28 27 4
|
||||
/ 0xffffffff 0 0 1 31 0
|
||||
/
|
||||
/ @param rdi is 64-bit unsigned 𝑥 value
|
||||
/ @return rax number in range [0,64) or 64 if 𝑥 is 0
|
||||
/ @see also treasure trove of nearly identical functions
|
||||
// Finds leading bits in 𝑥.
|
||||
//
|
||||
// uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥)
|
||||
// 0x00000000 wut 32 0 wut 32
|
||||
// 0x00000001 0 0 1 0 31
|
||||
// 0x80000001 0 0 1 31 0
|
||||
// 0x80000000 31 31 32 31 0
|
||||
// 0x00000010 4 4 5 4 27
|
||||
// 0x08000010 4 4 5 27 4
|
||||
// 0x08000000 27 27 28 27 4
|
||||
// 0xffffffff 0 0 1 31 0
|
||||
//
|
||||
// @param rdi is 64-bit unsigned 𝑥 value
|
||||
// @return rax number in range [0,64) or 64 if 𝑥 is 0
|
||||
// @see also treasure trove of nearly identical functions
|
||||
lzcntl: .leafprologue
|
||||
.profilable
|
||||
mov $31,%eax
|
||||
|
|
|
@ -19,46 +19,46 @@
|
|||
#include "libc/macros.h"
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
|
||||
/ Broadcast byte literal to vector, e.g.
|
||||
/
|
||||
/ # xmm0=0x12121212121212121212121212121212
|
||||
/ .bcblit $0x12,%al,%eax,%xmm0
|
||||
/
|
||||
/ @param reg and regSI need to be the same register
|
||||
// Broadcast byte literal to vector, e.g.
|
||||
//
|
||||
// # xmm0=0x12121212121212121212121212121212
|
||||
// .bcblit $0x12,%al,%eax,%xmm0
|
||||
//
|
||||
// @param reg and regSI need to be the same register
|
||||
.macro .bcblit lit:req reg:req regSI:req xmm:req
|
||||
mov \lit,\reg
|
||||
movd \regSI,\xmm
|
||||
pbroadcastb \xmm
|
||||
.endm
|
||||
|
||||
/ Broadcast word literal to vector, e.g.
|
||||
/
|
||||
/ # xmm0=0x01230123012301230123012301230123
|
||||
/ .bcwlit $0x123,%ax,%eax,%xmm0
|
||||
/
|
||||
/ @param reg and regSI need to be the same register
|
||||
// Broadcast word literal to vector, e.g.
|
||||
//
|
||||
// # xmm0=0x01230123012301230123012301230123
|
||||
// .bcwlit $0x123,%ax,%eax,%xmm0
|
||||
//
|
||||
// @param reg and regSI need to be the same register
|
||||
.macro .bcwlit lit:req reg:req regSI:req xmm:req
|
||||
mov \lit,\reg
|
||||
movd \regSI,\xmm
|
||||
pbroadcastw \xmm
|
||||
.endm
|
||||
|
||||
/ Broadcast int16 from register to vector.
|
||||
// Broadcast int16 from register to vector.
|
||||
.macro .bcwreg regSI:req xmm:req
|
||||
movd \regSI,\xmm
|
||||
pbroadcastw \xmm
|
||||
.endm
|
||||
|
||||
/ Sets all bytes in XMM register to first byte, e.g.
|
||||
/
|
||||
/ mov $0x11,%eax
|
||||
/ movd %eax,%xmm0
|
||||
/ pbroadcastb %xmm0
|
||||
/
|
||||
/ 11000000000000000000000000000000
|
||||
/ → 11111111111111111111111111111111
|
||||
/
|
||||
/ @param xmm can be %xmm0,%xmm1,etc.
|
||||
// Sets all bytes in XMM register to first byte, e.g.
|
||||
//
|
||||
// mov $0x11,%eax
|
||||
// movd %eax,%xmm0
|
||||
// pbroadcastb %xmm0
|
||||
//
|
||||
// 11000000000000000000000000000000
|
||||
// → 11111111111111111111111111111111
|
||||
//
|
||||
// @param xmm can be %xmm0,%xmm1,etc.
|
||||
.macro pbroadcastb xmm:req
|
||||
#if X86_NEED(AVX2)
|
||||
vpbroadcastb \xmm,\xmm
|
||||
|
@ -69,16 +69,16 @@
|
|||
#endif
|
||||
.endm
|
||||
|
||||
/ Sets all words in XMM register to first word, e.g.
|
||||
/
|
||||
/ mov $0x1234,%eax
|
||||
/ movd %eax,%xmm0
|
||||
/ pbroadcastw %xmm0
|
||||
/
|
||||
/ 12340000000000000000000000000000
|
||||
/ → 12341234123412341234123412341234
|
||||
/
|
||||
/ @param xmm can be %xmm0,%xmm1,etc.
|
||||
// Sets all words in XMM register to first word, e.g.
|
||||
//
|
||||
// mov $0x1234,%eax
|
||||
// movd %eax,%xmm0
|
||||
// pbroadcastw %xmm0
|
||||
//
|
||||
// 12340000000000000000000000000000
|
||||
// → 12341234123412341234123412341234
|
||||
//
|
||||
// @param xmm can be %xmm0,%xmm1,etc.
|
||||
.macro pbroadcastw xmm:req
|
||||
#if X86_NEED(AVX2)
|
||||
vpbroadcastw \xmm,\xmm
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
.source __FILE__
|
||||
.code16 # ∩ .code32 ∩ .code64
|
||||
|
||||
/ Function Profiling Hook.
|
||||
/ cc -pg adds this to the start of global functions.
|
||||
// Function Profiling Hook.
|
||||
// cc -pg adds this to the start of global functions.
|
||||
mcount: ret
|
||||
.endfn mcount,weak
|
||||
|
|
|
@ -30,28 +30,28 @@
|
|||
#include "libc/nexgen32e/x86feature.h"
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Copies memory.
|
||||
/
|
||||
/ DEST and SRC must not overlap, unless DEST≤SRC.
|
||||
/
|
||||
/ @param rdi is dest
|
||||
/ @param rsi is src
|
||||
/ @param rdx is number of bytes
|
||||
/ @return original rdi copied to rax
|
||||
/ @mode long
|
||||
/ @asyncsignalsafe
|
||||
// Copies memory.
|
||||
//
|
||||
// DEST and SRC must not overlap, unless DEST≤SRC.
|
||||
//
|
||||
// @param rdi is dest
|
||||
// @param rsi is src
|
||||
// @param rdx is number of bytes
|
||||
// @return original rdi copied to rax
|
||||
// @mode long
|
||||
// @asyncsignalsafe
|
||||
memcpy: mov %rdi,%rax
|
||||
/ 𝑠𝑙𝑖𝑑𝑒
|
||||
// 𝑠𝑙𝑖𝑑𝑒
|
||||
.align 16
|
||||
.endfn memcpy,globl
|
||||
|
||||
/ Copies memory w/ minimal impact ABI.
|
||||
/
|
||||
/ @param rdi is dest
|
||||
/ @param rsi is src
|
||||
/ @param rdx is number of bytes
|
||||
/ @clob flags,rcx,xmm3,xmm4
|
||||
/ @mode long
|
||||
// Copies memory w/ minimal impact ABI.
|
||||
//
|
||||
// @param rdi is dest
|
||||
// @param rsi is src
|
||||
// @param rdx is number of bytes
|
||||
// @clob flags,rcx,xmm3,xmm4
|
||||
// @mode long
|
||||
MemCpy: .leafprologue
|
||||
.profilable
|
||||
mov $.Lmemcpytab.ro.size,%ecx
|
||||
|
|
|
@ -18,14 +18,14 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Creates bit mask of which bytes are the same.
|
||||
/
|
||||
/ @param %rdi points to bit mask (write-only)
|
||||
/ @param %rsi points to first buffer (read-only)
|
||||
/ @param %rdx points to second buffer (read-only)
|
||||
/ @param %rcx is byte length of both %rsi and %rdx
|
||||
/ @return %rax is set to %rdi
|
||||
/ @note buffers should be 128-byte aligned
|
||||
// Creates bit mask of which bytes are the same.
|
||||
//
|
||||
// @param %rdi points to bit mask (write-only)
|
||||
// @param %rsi points to first buffer (read-only)
|
||||
// @param %rdx points to second buffer (read-only)
|
||||
// @param %rcx is byte length of both %rsi and %rdx
|
||||
// @return %rax is set to %rdi
|
||||
// @note buffers should be 128-byte aligned
|
||||
memeqmask:
|
||||
.leafprologue
|
||||
xor %eax,%eax
|
||||
|
|
|
@ -20,13 +20,13 @@
|
|||
#include "libc/macros.h"
|
||||
.text.startup
|
||||
|
||||
/ Initializes jump table for memset() and memcpy().
|
||||
/
|
||||
/ @param !ZF if required cpu vector extensions are available
|
||||
/ @param rdi is address of 64-bit jump table
|
||||
/ @param rsi is address of 8-bit jump initializers
|
||||
/ @param rdx is address of indirect branch
|
||||
/ @param ecx is size of jump table
|
||||
// Initializes jump table for memset() and memcpy().
|
||||
//
|
||||
// @param !ZF if required cpu vector extensions are available
|
||||
// @param rdi is address of 64-bit jump table
|
||||
// @param rsi is address of 8-bit jump initializers
|
||||
// @param rdx is address of indirect branch
|
||||
// @param ecx is size of jump table
|
||||
memjmpinit:
|
||||
.leafprologue
|
||||
setnz %r8b
|
||||
|
|
|
@ -18,19 +18,19 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Copies memory.
|
||||
/
|
||||
/ DEST and SRC may overlap.
|
||||
/
|
||||
/ @param rdi is dest
|
||||
/ @param rsi is src
|
||||
/ @param rdx is number of bytes
|
||||
/ @return original rdi copied to rax
|
||||
/ @clob flags,rcx
|
||||
/ @asyncsignalsafe
|
||||
// Copies memory.
|
||||
//
|
||||
// DEST and SRC may overlap.
|
||||
//
|
||||
// @param rdi is dest
|
||||
// @param rsi is src
|
||||
// @param rdx is number of bytes
|
||||
// @return original rdi copied to rax
|
||||
// @clob flags,rcx
|
||||
// @asyncsignalsafe
|
||||
memmove:
|
||||
mov %rdi,%rax
|
||||
/ 𝑠𝑙𝑖𝑑𝑒
|
||||
// 𝑠𝑙𝑖𝑑𝑒
|
||||
.endfn MemMove,globl,hidden
|
||||
|
||||
MemMove:
|
||||
|
|
|
@ -19,14 +19,14 @@
|
|||
#include "libc/macros.h"
|
||||
.source __FILE__
|
||||
|
||||
/ Copies memory.
|
||||
/
|
||||
/ DEST and SRC must not overlap unless DEST ≤ SRC.
|
||||
/
|
||||
/ @param rdi is dest
|
||||
/ @param rsi is src
|
||||
/ @param rdx is number of bytes
|
||||
/ @return original rdi + rdx copied to rax
|
||||
// Copies memory.
|
||||
//
|
||||
// DEST and SRC must not overlap unless DEST ≤ SRC.
|
||||
//
|
||||
// @param rdi is dest
|
||||
// @param rsi is src
|
||||
// @param rdx is number of bytes
|
||||
// @return original rdi + rdx copied to rax
|
||||
mempcpy:
|
||||
lea (%rdi,%rdx),%rax
|
||||
jmp MemCpy
|
||||
|
|
|
@ -20,14 +20,14 @@
|
|||
#include "libc/dce.h"
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Searches for last instance of byte in memory region.
|
||||
/
|
||||
/ @param rdi points to data to search
|
||||
/ @param esi is treated as unsigned char
|
||||
/ @param rdx is byte length of rdi
|
||||
/ @return rax is address of last %sil in %rdi, or NULL
|
||||
/ @note AVX2 requires Haswell (2014+) or Excavator (2015+)
|
||||
/ @asyncsignalsafe
|
||||
// Searches for last instance of byte in memory region.
|
||||
//
|
||||
// @param rdi points to data to search
|
||||
// @param esi is treated as unsigned char
|
||||
// @param rdx is byte length of rdi
|
||||
// @return rax is address of last %sil in %rdi, or NULL
|
||||
// @note AVX2 requires Haswell (2014+) or Excavator (2015+)
|
||||
// @asyncsignalsafe
|
||||
memrchr:.leafprologue
|
||||
.profilable
|
||||
#if !IsTiny()
|
||||
|
|
|
@ -20,13 +20,13 @@
|
|||
#include "libc/dce.h"
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Searches for last instance of uint16_t in memory region.
|
||||
/
|
||||
/ @param rdi points to data to search
|
||||
/ @param esi is treated as uint16_t
|
||||
/ @param rdx is short count in rdi
|
||||
/ @return rax is address of last %si in %rdi, or NULL
|
||||
/ @note AVX2 requires Haswell (2014+) or Excavator (2015+)
|
||||
// Searches for last instance of uint16_t in memory region.
|
||||
//
|
||||
// @param rdi points to data to search
|
||||
// @param esi is treated as uint16_t
|
||||
// @param rdx is short count in rdi
|
||||
// @return rax is address of last %si in %rdi, or NULL
|
||||
// @note AVX2 requires Haswell (2014+) or Excavator (2015+)
|
||||
memrchr16:
|
||||
.leafprologue
|
||||
.profilable
|
||||
|
|
|
@ -20,13 +20,13 @@
|
|||
#include "libc/dce.h"
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Searches for last instance of wchar_t in memory region.
|
||||
/
|
||||
/ @param rdi points to data to search
|
||||
/ @param esi is treated as int32_t (officially wchar_t)
|
||||
/ @param rdx is short count in rdi
|
||||
/ @return rax is address of last %esi in %rdi, or NULL
|
||||
/ @note AVX2 requires Haswell (2014+) or Excavator (2015+)
|
||||
// Searches for last instance of wchar_t in memory region.
|
||||
//
|
||||
// @param rdi points to data to search
|
||||
// @param esi is treated as int32_t (officially wchar_t)
|
||||
// @param rdx is short count in rdi
|
||||
// @return rax is address of last %esi in %rdi, or NULL
|
||||
// @note AVX2 requires Haswell (2014+) or Excavator (2015+)
|
||||
wmemrchr:
|
||||
.leafprologue
|
||||
.profilable
|
||||
|
|
|
@ -24,26 +24,26 @@
|
|||
#include "libc/nexgen32e/macros.h"
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Sets memory.
|
||||
/
|
||||
/ @param rdi is dest
|
||||
/ @param esi is the byte to set
|
||||
/ @param edx is the number of bytes to set
|
||||
/ @return original rdi copied to rax
|
||||
/ @mode long
|
||||
/ @asyncsignalsafe
|
||||
// Sets memory.
|
||||
//
|
||||
// @param rdi is dest
|
||||
// @param esi is the byte to set
|
||||
// @param edx is the number of bytes to set
|
||||
// @return original rdi copied to rax
|
||||
// @mode long
|
||||
// @asyncsignalsafe
|
||||
memset: mov %rdi,%rax
|
||||
/ 𝑠𝑙𝑖𝑑𝑒
|
||||
// 𝑠𝑙𝑖𝑑𝑒
|
||||
.align 16
|
||||
.endfn memset,globl
|
||||
|
||||
/ Sets memory w/ minimal-impact ABI.
|
||||
/
|
||||
/ @param rdi is dest
|
||||
/ @param esi is the byte to set
|
||||
/ @param edx is the number of bytes to set
|
||||
/ @clob flags,rcx,xmm3
|
||||
/ @mode long
|
||||
// Sets memory w/ minimal-impact ABI.
|
||||
//
|
||||
// @param rdi is dest
|
||||
// @param esi is the byte to set
|
||||
// @param edx is the number of bytes to set
|
||||
// @clob flags,rcx,xmm3
|
||||
// @mode long
|
||||
MemSet: .leafprologue
|
||||
.profilable
|
||||
mov $.Lmemsettab.ro.size,%ecx
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
.source __FILE__
|
||||
.code16 # ∩ .code32 ∩ .code64
|
||||
|
||||
/ Optional function stub.
|
||||
// Optional function stub.
|
||||
missingno:
|
||||
xor %ax,%ax
|
||||
ret
|
||||
|
|
|
@ -19,16 +19,16 @@
|
|||
#include "libc/macros.h"
|
||||
.text.windows
|
||||
|
||||
/ Translates function call from code built w/ MS-style compiler.
|
||||
/
|
||||
/ This wraps WinMain() and callback functions passed to Win32 API.
|
||||
/ Please note an intermediary jump slot is needed to set %rax.
|
||||
/
|
||||
/ @param %rax is function address
|
||||
/ @param %rcx,%rdx,%r8,%r9
|
||||
/ @return %rax,%xmm0
|
||||
/ @note slower than __sysv2nt
|
||||
/ @see NT2SYSV() macro
|
||||
// Translates function call from code built w/ MS-style compiler.
|
||||
//
|
||||
// This wraps WinMain() and callback functions passed to Win32 API.
|
||||
// Please note an intermediary jump slot is needed to set %rax.
|
||||
//
|
||||
// @param %rax is function address
|
||||
// @param %rcx,%rdx,%r8,%r9
|
||||
// @return %rax,%xmm0
|
||||
// @note slower than __sysv2nt
|
||||
// @see NT2SYSV() macro
|
||||
__nt2sysv:
|
||||
push %rbp
|
||||
mov %rsp,%rbp
|
||||
|
|
|
@ -17,38 +17,38 @@
|
|||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
|
||||
/ Documentation for Intel(R)'s “Most Complicated Instruction”(TM)
|
||||
/
|
||||
/ mnemonic op1 op2 op3 op4 modif f description, notes
|
||||
/ ═══════════ ════ ════ ════ ═══ ════════ ═════════════════════════════
|
||||
/ PCMPESTRM XMM0 Vdq Wdq ... o..szapc Explicit Length, Return Mask
|
||||
/ PCMPESTRI rCX Vdq Wdq ... o..szapc Explicit Length, Return Index
|
||||
/ PCMPISTRM XMM0 Vdq Wdq Ib o..szapc Implicit Length, Return Mask
|
||||
/ PCMPISTRI rCX Vdq Wdq Ib o..szapc Implicit Length, Return Index
|
||||
/
|
||||
/ CF ← Reset if IntRes2 is equal to zero, set otherwise
|
||||
/ ZF ← Set if any byte/word of xmm2/mem128 is null, reset otherwise
|
||||
/ SF ← Set if any byte/word of xmm1 is null, reset otherwise
|
||||
/ OF ← IntRes2[0]
|
||||
/ AF ← Reset
|
||||
/ PF ← Reset
|
||||
/
|
||||
/ PCMP{E,I}STR{I,M} Control Byte
|
||||
/ @see Intel Manual V.2B §4.1.7
|
||||
/
|
||||
/ ┌─0:index of the LEAST significant, set, bit is used
|
||||
/ │ regardless of corresponding input element validity
|
||||
/ │ intres2 is returned in least significant bits of xmm0
|
||||
/ ├─1:index of the MOST significant, set, bit is used
|
||||
/ │ regardless of corresponding input element validity
|
||||
/ │ each bit of intres2 is expanded to byte/word
|
||||
/ │┌─0:negation of intres1 is for all 16 (8) bits
|
||||
/ │├─1:negation of intres1 is masked by reg/mem validity
|
||||
/ ││┌─intres1 is negated (1’s complement)
|
||||
/ │││┌─mode{equalany,ranges,equaleach,equalordered}
|
||||
/ ││││ ┌─issigned
|
||||
/ ││││ │┌─is16bit
|
||||
/ u│││├┐││
|
||||
// Documentation for Intel(R)'s “Most Complicated Instruction”(TM)
|
||||
//
|
||||
// mnemonic op1 op2 op3 op4 modif f description, notes
|
||||
// ═══════════ ════ ════ ════ ═══ ════════ ═════════════════════════════
|
||||
// PCMPESTRM XMM0 Vdq Wdq ... o..szapc Explicit Length, Return Mask
|
||||
// PCMPESTRI rCX Vdq Wdq ... o..szapc Explicit Length, Return Index
|
||||
// PCMPISTRM XMM0 Vdq Wdq Ib o..szapc Implicit Length, Return Mask
|
||||
// PCMPISTRI rCX Vdq Wdq Ib o..szapc Implicit Length, Return Index
|
||||
//
|
||||
// CF ← Reset if IntRes2 is equal to zero, set otherwise
|
||||
// ZF ← Set if any byte/word of xmm2/mem128 is null, reset otherwise
|
||||
// SF ← Set if any byte/word of xmm1 is null, reset otherwise
|
||||
// OF ← IntRes2[0]
|
||||
// AF ← Reset
|
||||
// PF ← Reset
|
||||
//
|
||||
// PCMP{E,I}STR{I,M} Control Byte
|
||||
// @see Intel Manual V.2B §4.1.7
|
||||
//
|
||||
// ┌─0:index of the LEAST significant, set, bit is used
|
||||
// │ regardless of corresponding input element validity
|
||||
// │ intres2 is returned in least significant bits of xmm0
|
||||
// ├─1:index of the MOST significant, set, bit is used
|
||||
// │ regardless of corresponding input element validity
|
||||
// │ each bit of intres2 is expanded to byte/word
|
||||
// │┌─0:negation of intres1 is for all 16 (8) bits
|
||||
// │├─1:negation of intres1 is masked by reg/mem validity
|
||||
// ││┌─intres1 is negated (1’s complement)
|
||||
// │││┌─mode{equalany,ranges,equaleach,equalordered}
|
||||
// ││││ ┌─issigned
|
||||
// ││││ │┌─is16bit
|
||||
// u│││├┐││
|
||||
.Lequalordered = 0b00001100
|
||||
.Lequalorder16 = 0b00001101
|
||||
.Lequalranges8 = 0b00000100
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Supplies argv[0] the GNU way.
|
||||
// Supplies argv[0] the GNU way.
|
||||
.initbss 300,_init_program_invocation_name
|
||||
program_invocation_name:
|
||||
.quad 0
|
||||
|
|
|
@ -18,10 +18,10 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Returns 𝑥 % 1,000,000,000.
|
||||
/
|
||||
/ @param rdi int64 𝑥
|
||||
/ @return rax has remainder
|
||||
// Returns 𝑥 % 1,000,000,000.
|
||||
//
|
||||
// @param rdi int64 𝑥
|
||||
// @return rax has remainder
|
||||
rem1000000000int64:
|
||||
movabs $0x112e0be826d694b3,%rdx
|
||||
mov %rdi,%rax
|
||||
|
|
|
@ -18,10 +18,10 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Returns 𝑥 % 1,000,000.
|
||||
/
|
||||
/ @param rdi int64 𝑥
|
||||
/ @return rax has remainder
|
||||
// Returns 𝑥 % 1,000,000.
|
||||
//
|
||||
// @param rdi int64 𝑥
|
||||
// @return rax has remainder
|
||||
rem1000000int64:
|
||||
movabs $0x431bde82d7b634db,%rdx
|
||||
mov %rdi,%rax
|
||||
|
|
|
@ -18,10 +18,10 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Returns 𝑥 % 10,000.
|
||||
/
|
||||
/ @param rdi int64 𝑥
|
||||
/ @return rax has remainder
|
||||
// Returns 𝑥 % 10,000.
|
||||
//
|
||||
// @param rdi int64 𝑥
|
||||
// @return rax has remainder
|
||||
rem10000int64:
|
||||
mov %rdi,%rax
|
||||
movabsq $0x346dc5d63886594b,%rdx
|
||||
|
|
|
@ -18,10 +18,10 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Returns 𝑥 % 1,000.
|
||||
/
|
||||
/ @param rdi int64 𝑥
|
||||
/ @return rax has remainder
|
||||
// Returns 𝑥 % 1,000.
|
||||
//
|
||||
// @param rdi int64 𝑥
|
||||
// @return rax has remainder
|
||||
rem1000int64:
|
||||
movabs $0x20c49ba5e353f7cf,%rdx
|
||||
mov %rdi,%rax
|
||||
|
|
|
@ -18,10 +18,10 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Returns 𝑥 % 100.
|
||||
/
|
||||
/ @param rdi int64 𝑥
|
||||
/ @return rax has remainder
|
||||
// Returns 𝑥 % 100.
|
||||
//
|
||||
// @param rdi int64 𝑥
|
||||
// @return rax has remainder
|
||||
rem100int64:
|
||||
mov %rdi,%rax
|
||||
movabsq $-6640827866535438581,%rdx
|
||||
|
|
|
@ -18,10 +18,10 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Returns 𝑥 % 10.
|
||||
/
|
||||
/ @param rdi int64 𝑥
|
||||
/ @return rax has remainder
|
||||
// Returns 𝑥 % 10.
|
||||
//
|
||||
// @param rdi int64 𝑥
|
||||
// @return rax has remainder
|
||||
rem10int64:
|
||||
movabs $0x6666666666666667,%rdx
|
||||
mov %rdi,%rax
|
||||
|
|
|
@ -19,11 +19,11 @@
|
|||
#include "libc/macros.h"
|
||||
.text.startup
|
||||
|
||||
/ Seventeen byte decompressor.
|
||||
/
|
||||
/ @param di points to output buffer
|
||||
/ @param si points to uint8_t {len₁,byte₁}, ..., {0,0}
|
||||
/ @mode long,legacy,real
|
||||
// Seventeen byte decompressor.
|
||||
//
|
||||
// @param di points to output buffer
|
||||
// @param si points to uint8_t {len₁,byte₁}, ..., {0,0}
|
||||
// @mode long,legacy,real
|
||||
rldecode:
|
||||
.leafprologue
|
||||
.profilable
|
||||
|
|
|
@ -19,11 +19,12 @@
|
|||
#include "libc/macros.h"
|
||||
.privileged
|
||||
|
||||
/ Stores XMM registers to buffer.
|
||||
/
|
||||
/ @param %rdi points to &(forcealign(16) uint8_t[256])[128]
|
||||
/ @note modern cpus have out-of-order execution engines
|
||||
savexmm:.leafprologue
|
||||
// Stores XMM registers to buffer.
|
||||
//
|
||||
// @param %rdi points to &(forcealign(16) uint8_t[256])[128]
|
||||
// @note modern cpus have out-of-order execution engines
|
||||
savexmm:
|
||||
.leafprologue
|
||||
movaps %xmm0,-0x80(%rdi)
|
||||
movaps %xmm1,-0x70(%rdi)
|
||||
movaps %xmm2,-0x60(%rdi)
|
||||
|
|
|
@ -18,14 +18,14 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Saves caller CPU state to cacheline.
|
||||
/
|
||||
/ @param rdi points to jmp_buf
|
||||
/ @return rax 0 when set and !0 when longjmp'd
|
||||
/ @returnstwice
|
||||
/ @assume system five nexgen32e abi conformant
|
||||
/ @note code built w/ microsoft abi compiler can't call this
|
||||
/ @see longjmp(), gclongjmp()
|
||||
// Saves caller CPU state to cacheline.
|
||||
//
|
||||
// @param rdi points to jmp_buf
|
||||
// @return rax 0 when set and !0 when longjmp'd
|
||||
// @returnstwice
|
||||
// @assume system five nexgen32e abi conformant
|
||||
// @note code built w/ microsoft abi compiler can't call this
|
||||
// @see longjmp(), gclongjmp()
|
||||
setjmp: lea 8(%rsp),%rax
|
||||
mov %rax,(%rdi)
|
||||
mov %rbx,8(%rdi)
|
||||
|
|
|
@ -19,21 +19,21 @@
|
|||
#include "libc/macros.h"
|
||||
#include "libc/notice.inc"
|
||||
|
||||
/ Applies no-clobber guarantee to System Five function call.
|
||||
/
|
||||
/ - Reentrant
|
||||
/ - Realigns stack
|
||||
/ - Doesn't assume red zone
|
||||
/ - Clobbers nothing (except %rax and flags)
|
||||
/
|
||||
/ This function may be called using an stdcall convention. It's
|
||||
/ useful for files named FOO.hookabi.c and BAR.ncabi.c to make
|
||||
/ calls into other parts of the system, that don't conform to the
|
||||
/ same restricted ABI.
|
||||
/
|
||||
/ @param six args and fn addr pushed on stack in reverse order
|
||||
/ @return %rax has function return value, and stack is cleaned up
|
||||
/ @see libc/shadowargs.hook.c for intended use case
|
||||
// Applies no-clobber guarantee to System Five function call.
|
||||
//
|
||||
// - Reentrant
|
||||
// - Realigns stack
|
||||
// - Doesn't assume red zone
|
||||
// - Clobbers nothing (except %rax and flags)
|
||||
//
|
||||
// This function may be called using an stdcall convention. It's
|
||||
// useful for files named FOO.hookabi.c and BAR.ncabi.c to make
|
||||
// calls into other parts of the system, that don't conform to the
|
||||
// same restricted ABI.
|
||||
//
|
||||
// @param six args and fn addr pushed on stack in reverse order
|
||||
// @return %rax has function return value, and stack is cleaned up
|
||||
// @see libc/shadowargs.hook.c for intended use case
|
||||
slowcall:
|
||||
#param %r9 # 0x40 arg6
|
||||
#param %r8 # 0x38 arg5
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ TODO(jart): pmovzxbw and vpunpcklbw
|
||||
// TODO(jart): pmovzxbw and vpunpcklbw
|
||||
strcpyzbw:
|
||||
.leafprologue
|
||||
.profilable
|
||||
|
|
|
@ -18,13 +18,13 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Returns prefix length, consisting of chars not in reject.
|
||||
/
|
||||
/ @param rdi is string
|
||||
/ @param rsi is reject nul-terminated character set
|
||||
/ @return rax is index of first byte in charset
|
||||
/ @see strspn(), strtok_r()
|
||||
/ @asyncsignalsafe
|
||||
// Returns prefix length, consisting of chars not in reject.
|
||||
//
|
||||
// @param rdi is string
|
||||
// @param rsi is reject nul-terminated character set
|
||||
// @return rax is index of first byte in charset
|
||||
// @see strspn(), strtok_r()
|
||||
// @asyncsignalsafe
|
||||
strcspn:
|
||||
push %rbp
|
||||
mov %rsp,%rbp
|
||||
|
|
|
@ -18,13 +18,13 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Returns length of NUL-terminated string.
|
||||
/
|
||||
/ @param rdi is non-null NUL-terminated string pointer
|
||||
/ @return rax is number of bytes (excluding NUL)
|
||||
/ @clob ax,dx,cx,xmm3,xmm4
|
||||
/ @note h/t agner fog
|
||||
/ @asyncsignalsafe
|
||||
// Returns length of NUL-terminated string.
|
||||
//
|
||||
// @param rdi is non-null NUL-terminated string pointer
|
||||
// @return rax is number of bytes (excluding NUL)
|
||||
// @clob ax,dx,cx,xmm3,xmm4
|
||||
// @note h/t agner fog
|
||||
// @asyncsignalsafe
|
||||
strlen: .leafprologue
|
||||
.profilable
|
||||
mov %rdi,%rax
|
||||
|
|
|
@ -21,14 +21,14 @@
|
|||
#include "libc/macros.h"
|
||||
.source __FILE__
|
||||
|
||||
/ Returns length of NUL-terminated string w/ security blankets.
|
||||
/
|
||||
/ This is like strnlen() except it'll return 0 if (1) RDI is NULL
|
||||
/ or (2) a NUL-terminator wasn't found in the first RSI bytes.
|
||||
/
|
||||
/ @param rdi is a nullable NUL-terminated string pointer
|
||||
/ @param rsi is the maximum number of bytes to consider
|
||||
/ @return rax is the number of bytes, excluding the NUL
|
||||
// Returns length of NUL-terminated string w/ security blankets.
|
||||
//
|
||||
// This is like strnlen() except it'll return 0 if (1) RDI is NULL
|
||||
// or (2) a NUL-terminator wasn't found in the first RSI bytes.
|
||||
//
|
||||
// @param rdi is a nullable NUL-terminated string pointer
|
||||
// @param rsi is the maximum number of bytes to consider
|
||||
// @return rax is the number of bytes, excluding the NUL
|
||||
strnlen_s:
|
||||
.leafprologue
|
||||
.profilable
|
||||
|
@ -39,20 +39,20 @@ strnlen_s:
|
|||
.leafepilogue
|
||||
0: xor %edx,%edx
|
||||
mov %rdi,%r8
|
||||
/ 𝑠𝑙𝑖𝑑𝑒
|
||||
// 𝑠𝑙𝑖𝑑𝑒
|
||||
.endfn strnlen_s,globl
|
||||
|
||||
/ Swiss army knife of string character scanning.
|
||||
/ Used to be fourteen fast functions in one.
|
||||
/
|
||||
/ @param rdi is non-null string memory
|
||||
/ @param rsi is max number of bytes to consider
|
||||
/ @param dl is search character #1
|
||||
/ @param dh is search character #2
|
||||
/ @param r8 is subtracted from result (for length vs. pointer)
|
||||
/ @param r9 masks result if DH is found (for NUL vs. NULL)
|
||||
/ @param r10 masks result on bytes exhausted (for length v. NULL)
|
||||
/ @return rax end pointer after r8/r9/r10 modifications
|
||||
// Swiss army knife of string character scanning.
|
||||
// Used to be fourteen fast functions in one.
|
||||
//
|
||||
// @param rdi is non-null string memory
|
||||
// @param rsi is max number of bytes to consider
|
||||
// @param dl is search character #1
|
||||
// @param dh is search character #2
|
||||
// @param r8 is subtracted from result (for length vs. pointer)
|
||||
// @param r9 masks result if DH is found (for NUL vs. NULL)
|
||||
// @param r10 masks result on bytes exhausted (for length v. NULL)
|
||||
// @return rax end pointer after r8/r9/r10 modifications
|
||||
strsak: lea -1(%rdi),%rax
|
||||
1: add $1,%rax
|
||||
sub $1,%rsi
|
||||
|
|
|
@ -19,12 +19,12 @@
|
|||
#include "libc/nexgen32e/x86feature.h"
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Returns pointer to first instance of character.
|
||||
/
|
||||
/ @param rdi is a non-null NUL-terminated char16_t string pointer
|
||||
/ @param esi is the search word
|
||||
/ @return rax points to character, or to NUL word if not found
|
||||
/ @note this won't return NULL if search character is NUL
|
||||
// Returns pointer to first instance of character.
|
||||
//
|
||||
// @param rdi is a non-null NUL-terminated char16_t string pointer
|
||||
// @param esi is the search word
|
||||
// @return rax points to character, or to NUL word if not found
|
||||
// @note this won't return NULL if search character is NUL
|
||||
strchrnul16:
|
||||
.leafprologue
|
||||
.profilable
|
||||
|
@ -32,13 +32,13 @@ strchrnul16:
|
|||
jmp 0f
|
||||
.endfn strchrnul16,globl
|
||||
|
||||
/ Returns pointer to first instance of character.
|
||||
/
|
||||
/ @param rdi is a non-null NUL-terminated char16_t string pointer
|
||||
/ @param esi is the search word
|
||||
/ @return rax points to first result, or NULL if not found
|
||||
/ @note this won't return NULL if search character is NUL
|
||||
/ @asyncsignalsafe
|
||||
// Returns pointer to first instance of character.
|
||||
//
|
||||
// @param rdi is a non-null NUL-terminated char16_t string pointer
|
||||
// @param esi is the search word
|
||||
// @return rax points to first result, or NULL if not found
|
||||
// @note this won't return NULL if search character is NUL
|
||||
// @asyncsignalsafe
|
||||
strchr16:
|
||||
.leafprologue
|
||||
.profilable
|
||||
|
@ -50,23 +50,23 @@ strchr16:
|
|||
jmp strsak16
|
||||
.endfn strchr16,globl
|
||||
|
||||
/ Returns pointer to first instance of character in range.
|
||||
/
|
||||
/ @param rdi is a non-null pointer to memory
|
||||
/ @param esi is the search word
|
||||
/ @return rax points to word if found, or else undefined behavior
|
||||
// Returns pointer to first instance of character in range.
|
||||
//
|
||||
// @param rdi is a non-null pointer to memory
|
||||
// @param esi is the search word
|
||||
// @return rax points to word if found, or else undefined behavior
|
||||
rawmemchr16:
|
||||
or $-1,%rdx
|
||||
/ fallthrough
|
||||
// fallthrough
|
||||
.endfn rawmemchr16,globl
|
||||
|
||||
/ Returns pointer to first instance of character in range.
|
||||
/
|
||||
/ @param rdi is a non-null pointer to memory
|
||||
/ @param esi is the search word
|
||||
/ @param rdx is length of memory in shorts
|
||||
/ @return rax points to word if found or NULL
|
||||
/ @asyncsignalsafe
|
||||
// Returns pointer to first instance of character in range.
|
||||
//
|
||||
// @param rdi is a non-null pointer to memory
|
||||
// @param esi is the search word
|
||||
// @param rdx is length of memory in shorts
|
||||
// @return rax points to word if found or NULL
|
||||
// @asyncsignalsafe
|
||||
memchr16:
|
||||
.leafprologue
|
||||
.profilable
|
||||
|
@ -77,14 +77,14 @@ memchr16:
|
|||
jmp strsak16
|
||||
.endfn memchr16,globl
|
||||
|
||||
/ Returns length of char16_t string w/ security blankets.
|
||||
/
|
||||
/ This is like strnlen() except it'll return 0 if (1) RDI is NULL
|
||||
/ or (2) a NUL-terminator wasn't found in the first RSI shorts.
|
||||
/
|
||||
/ @param rdi is a nullable NUL-terminated char16_t string pointer
|
||||
/ @param rsi is the maximum number of shorts to consider
|
||||
/ @return rax is the number of shorts, excluding the NUL
|
||||
// Returns length of char16_t string w/ security blankets.
|
||||
//
|
||||
// This is like strnlen() except it'll return 0 if (1) RDI is NULL
|
||||
// or (2) a NUL-terminator wasn't found in the first RSI shorts.
|
||||
//
|
||||
// @param rdi is a nullable NUL-terminated char16_t string pointer
|
||||
// @param rsi is the maximum number of shorts to consider
|
||||
// @return rax is the number of shorts, excluding the NUL
|
||||
strnlen16_s:
|
||||
.leafprologue
|
||||
.profilable
|
||||
|
@ -95,22 +95,22 @@ strnlen16_s:
|
|||
.leafepilogue
|
||||
.endfn strnlen16_s,globl
|
||||
|
||||
/ Returns length of NUL-terminated char16_t string.
|
||||
/
|
||||
/ @param rdi is non-null NUL-terminated char16_t string pointer
|
||||
/ @return rax is the number of shorts, excluding the NUL
|
||||
/ @asyncsignalsafe
|
||||
// Returns length of NUL-terminated char16_t string.
|
||||
//
|
||||
// @param rdi is non-null NUL-terminated char16_t string pointer
|
||||
// @return rax is the number of shorts, excluding the NUL
|
||||
// @asyncsignalsafe
|
||||
strlen16:
|
||||
or $-1,%rsi
|
||||
/ fallthrough
|
||||
// fallthrough
|
||||
.endfn strlen16,globl
|
||||
|
||||
/ Returns length of NUL-terminated memory, with limit.
|
||||
/
|
||||
/ @param rdi is non-null memory
|
||||
/ @param rsi is the maximum number of shorts to consider
|
||||
/ @return rax is the number of shorts, excluding the NUL
|
||||
/ @asyncsignalsafe
|
||||
// Returns length of NUL-terminated memory, with limit.
|
||||
//
|
||||
// @param rdi is non-null memory
|
||||
// @param rsi is the maximum number of shorts to consider
|
||||
// @return rax is the number of shorts, excluding the NUL
|
||||
// @asyncsignalsafe
|
||||
strnlen16:
|
||||
.leafprologue
|
||||
.profilable
|
||||
|
@ -118,20 +118,20 @@ strnlen16:
|
|||
0: xor %edx,%edx
|
||||
xor %r11d,%r11d
|
||||
mov %rdi,%r8
|
||||
/ fallthrough
|
||||
// fallthrough
|
||||
.endfn strnlen16,globl
|
||||
|
||||
/ Swiss Army Knife of string char16_t scanning.
|
||||
/ Sixteen fast functions in one.
|
||||
/
|
||||
/ @param rdi is non-null string memory
|
||||
/ @param rsi is max number of shorts to consider
|
||||
/ @param dx is search character #1
|
||||
/ @param r11w is search character #2
|
||||
/ @param r8 is subtracted from result (for length vs. pointer)
|
||||
/ @param r9 masks result if DH is found (for NUL vs. NULL)
|
||||
/ @param r10 masks result on shorts exhausted (for length v. NULL)
|
||||
/ @return rax end pointer after r8/r9/r10 modifications
|
||||
// Swiss Army Knife of string char16_t scanning.
|
||||
// Sixteen fast functions in one.
|
||||
//
|
||||
// @param rdi is non-null string memory
|
||||
// @param rsi is max number of shorts to consider
|
||||
// @param dx is search character #1
|
||||
// @param r11w is search character #2
|
||||
// @param r8 is subtracted from result (for length vs. pointer)
|
||||
// @param r9 masks result if DH is found (for NUL vs. NULL)
|
||||
// @param r10 masks result on shorts exhausted (for length v. NULL)
|
||||
// @return rax end pointer after r8/r9/r10 modifications
|
||||
strsak16:
|
||||
lea -2(%rdi),%rax
|
||||
1: add $2,%rax
|
||||
|
|
|
@ -19,25 +19,25 @@
|
|||
#include "libc/nexgen32e/x86feature.h"
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Returns pointer to first instance of character.
|
||||
/
|
||||
/ @param rdi is a non-null NUL-terminated wchar_t string pointer
|
||||
/ @param esi is the search word
|
||||
/ @return rax points to character, or to NUL word if not found
|
||||
/ @note this won't return NULL if search character is NUL
|
||||
// Returns pointer to first instance of character.
|
||||
//
|
||||
// @param rdi is a non-null NUL-terminated wchar_t string pointer
|
||||
// @param esi is the search word
|
||||
// @return rax points to character, or to NUL word if not found
|
||||
// @note this won't return NULL if search character is NUL
|
||||
wcschrnul:
|
||||
.leafprologue
|
||||
.profilable
|
||||
or $-1,%r9
|
||||
jmp 0f
|
||||
|
||||
/ Returns pointer to first instance of character.
|
||||
/
|
||||
/ @param rdi is a non-null NUL-terminated wchar_t string pointer
|
||||
/ @param esi is the search word
|
||||
/ @return rax points to first result, or NULL if not found
|
||||
/ @note this won't return NULL if search character is NUL
|
||||
/ @asyncsignalsafe
|
||||
// Returns pointer to first instance of character.
|
||||
//
|
||||
// @param rdi is a non-null NUL-terminated wchar_t string pointer
|
||||
// @param esi is the search word
|
||||
// @return rax points to first result, or NULL if not found
|
||||
// @note this won't return NULL if search character is NUL
|
||||
// @asyncsignalsafe
|
||||
wcschr: .leafprologue
|
||||
.profilable
|
||||
xor %r9,%r9
|
||||
|
@ -47,14 +47,14 @@ wcschr: .leafprologue
|
|||
xor %r8,%r8
|
||||
jmp wcssak
|
||||
|
||||
/ Returns length of wchar_t string w/ security blankets.
|
||||
/
|
||||
/ This is like wcsnlen() except it'll return 0 if (1) RDI is NULL
|
||||
/ or (2) a NUL-terminator wasn't found in the first RSI chars.
|
||||
/
|
||||
/ @param rdi is a nullable NUL-terminated wchar_t string pointer
|
||||
/ @param rsi is the maximum number of chars to consider
|
||||
/ @return rax is the number of chars, excluding the NUL
|
||||
// Returns length of wchar_t string w/ security blankets.
|
||||
//
|
||||
// This is like wcsnlen() except it'll return 0 if (1) RDI is NULL
|
||||
// or (2) a NUL-terminator wasn't found in the first RSI chars.
|
||||
//
|
||||
// @param rdi is a nullable NUL-terminated wchar_t string pointer
|
||||
// @param rsi is the maximum number of chars to consider
|
||||
// @return rax is the number of chars, excluding the NUL
|
||||
wcsnlen_s:
|
||||
.leafprologue
|
||||
.profilable
|
||||
|
@ -65,20 +65,20 @@ wcsnlen_s:
|
|||
.leafepilogue
|
||||
.endfn wcsnlen_s,globl
|
||||
|
||||
/ Returns length of NUL-terminated wchar_t string.
|
||||
/
|
||||
/ @param rdi is non-null NUL-terminated wchar_t string pointer
|
||||
/ @return rax is the number of chars, excluding the NUL
|
||||
/ @asyncsignalsafe
|
||||
// Returns length of NUL-terminated wchar_t string.
|
||||
//
|
||||
// @param rdi is non-null NUL-terminated wchar_t string pointer
|
||||
// @return rax is the number of chars, excluding the NUL
|
||||
// @asyncsignalsafe
|
||||
wcslen: or $-1,%rsi
|
||||
/ fallthrough
|
||||
// fallthrough
|
||||
|
||||
/ Returns length of NUL-terminated memory, with limit.
|
||||
/
|
||||
/ @param rdi is non-null memory
|
||||
/ @param rsi is the maximum number of chars to consider
|
||||
/ @return rax is the number of chars, excluding the NUL
|
||||
/ @asyncsignalsafe
|
||||
// Returns length of NUL-terminated memory, with limit.
|
||||
//
|
||||
// @param rdi is non-null memory
|
||||
// @param rsi is the maximum number of chars to consider
|
||||
// @return rax is the number of chars, excluding the NUL
|
||||
// @asyncsignalsafe
|
||||
wcsnlen:.leafprologue
|
||||
.profilable
|
||||
or $-1,%r10
|
||||
|
@ -87,22 +87,22 @@ wcsnlen:.leafprologue
|
|||
mov %rdi,%r8
|
||||
jmp wcssak
|
||||
|
||||
/ Returns pointer to first instance of character in range.
|
||||
/
|
||||
/ @param rdi is a non-null pointer to memory
|
||||
/ @param esi is the search word
|
||||
/ @return rax points to word if found, or else undefined behavior
|
||||
// Returns pointer to first instance of character in range.
|
||||
//
|
||||
// @param rdi is a non-null pointer to memory
|
||||
// @param esi is the search word
|
||||
// @return rax points to word if found, or else undefined behavior
|
||||
rawwmemchr:
|
||||
or $-1,%rdx
|
||||
/ fallthrough
|
||||
// fallthrough
|
||||
|
||||
/ Returns pointer to first instance of character in range.
|
||||
/
|
||||
/ @param rdi is a non-null pointer to memory
|
||||
/ @param esi is the int32_t search word (officially wchar_t)
|
||||
/ @param rdx is length of memory in chars
|
||||
/ @return rax points to word if found or NULL
|
||||
/ @asyncsignalsafe
|
||||
// Returns pointer to first instance of character in range.
|
||||
//
|
||||
// @param rdi is a non-null pointer to memory
|
||||
// @param esi is the int32_t search word (officially wchar_t)
|
||||
// @param rdx is length of memory in chars
|
||||
// @return rax points to word if found or NULL
|
||||
// @asyncsignalsafe
|
||||
wmemchr:.leafprologue
|
||||
.profilable
|
||||
xor %r8,%r8
|
||||
|
@ -111,19 +111,19 @@ wmemchr:.leafprologue
|
|||
mov %esi,%edx
|
||||
mov %esi,%r11d
|
||||
mov %rcx,%rsi
|
||||
/ fallthrough
|
||||
// fallthrough
|
||||
|
||||
/ Swiss army knife of wchar_t string scanning.
|
||||
/ Seven fast functions in one.
|
||||
/
|
||||
/ @param rdi is non-null wchar_t string memory
|
||||
/ @param rsi is max number of chars to consider
|
||||
/ @param edx is search character #1
|
||||
/ @param r11d is search character #2
|
||||
/ @param r8 is subtracted from result (for length vs. pointer)
|
||||
/ @param r9 masks result if r11w is found (for NUL vs. NULL)
|
||||
/ @param r10 masks result on chars exhausted (for length v. NULL)
|
||||
/ @return rax end pointer after r8/r9/r10 modifications
|
||||
// Swiss army knife of wchar_t string scanning.
|
||||
// Seven fast functions in one.
|
||||
//
|
||||
// @param rdi is non-null wchar_t string memory
|
||||
// @param rsi is max number of chars to consider
|
||||
// @param edx is search character #1
|
||||
// @param r11d is search character #2
|
||||
// @param r8 is subtracted from result (for length vs. pointer)
|
||||
// @param r9 masks result if r11w is found (for NUL vs. NULL)
|
||||
// @param r10 masks result on chars exhausted (for length v. NULL)
|
||||
// @return rax end pointer after r8/r9/r10 modifications
|
||||
wcssak: lea -4(%rdi),%rax
|
||||
.align 16
|
||||
1: add $4,%rax
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
#include "libc/nexgen32e/pcmpstr.inc"
|
||||
#include "libc/nexgen32e/strstr.inc"
|
||||
|
||||
/ TODO(jart): Fix me.
|
||||
// TODO(jart): Fix me.
|
||||
strstr_sse42:
|
||||
.leafprologue
|
||||
mov %rdi,%rax
|
||||
|
|
|
@ -18,12 +18,12 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
/* clang-format off */
|
||||
|
||||
/ Searches for substring.
|
||||
/
|
||||
/ @param rdi is NUL-terminated haystack string
|
||||
/ @param rsi is NUL-terminated needle string (16-byte aligned)
|
||||
/ @return rax is pointer to substring or NULL
|
||||
/ @todo 10x faster than naïve but could be 100x faster
|
||||
// Searches for substring.
|
||||
//
|
||||
// @param rdi is NUL-terminated haystack string
|
||||
// @param rsi is NUL-terminated needle string (16-byte aligned)
|
||||
// @return rax is pointer to substring or NULL
|
||||
// @todo 10x faster than naïve but could be 100x faster
|
||||
.macro .strstr mode:req
|
||||
push %rbp
|
||||
mov %rsp,%rbp
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
#include "libc/nexgen32e/pcmpstr.inc"
|
||||
#include "libc/nexgen32e/strstr.inc"
|
||||
|
||||
/ TODO(jart): Fix me.
|
||||
// TODO(jart): Fix me.
|
||||
strstr16$sse42:
|
||||
.strstr .Lequalorder16
|
||||
.endfn strstr16$sse42,globl,hidden
|
||||
|
|
|
@ -18,17 +18,17 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Support code for fast integer division by Si units.
|
||||
/
|
||||
/ Division by magnums is described in Hacker's Delight and is
|
||||
/ usually generated automatically by compilers, but sadly not
|
||||
/ when we optimize for size and idiv goes at least 10x slower
|
||||
/ so we do this which saves space while avoiding build tuning
|
||||
/
|
||||
/ @param rdi is number to divide
|
||||
/ @param cl is magnum #1
|
||||
/ @param rdx is magnum #2
|
||||
/ @return quotient
|
||||
// Support code for fast integer division by Si units.
|
||||
//
|
||||
// Division by magnums is described in Hacker's Delight and is
|
||||
// usually generated automatically by compilers, but sadly not
|
||||
// when we optimize for size and idiv goes at least 10x slower
|
||||
// so we do this which saves space while avoiding build tuning
|
||||
//
|
||||
// @param rdi is number to divide
|
||||
// @param cl is magnum #1
|
||||
// @param rdx is magnum #2
|
||||
// @return quotient
|
||||
tinydivsi:
|
||||
.leafprologue
|
||||
mov %rdi,%rax
|
||||
|
|
|
@ -18,11 +18,11 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ 8-bit strlen that's tiny and near optimal if data's tiny.
|
||||
/
|
||||
/ @param RDI is char *s
|
||||
/ @param EAX is unsigned length
|
||||
/ @see libc/nexgen32e/strsak.S
|
||||
// 8-bit strlen that's tiny and near optimal if data's tiny.
|
||||
//
|
||||
// @param RDI is char *s
|
||||
// @param EAX is unsigned length
|
||||
// @see libc/nexgen32e/strsak.S
|
||||
tinystrlen:
|
||||
.leafprologue
|
||||
.profilable
|
||||
|
|
|
@ -18,11 +18,11 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ 16-bit strlen that's tiny and near optimal if data's tiny.
|
||||
/
|
||||
/ @param RDI is char16_t *s
|
||||
/ @param EAX is unsigned length
|
||||
/ @see libc/nexgen32e/strsak16.S
|
||||
// 16-bit strlen that's tiny and near optimal if data's tiny.
|
||||
//
|
||||
// @param RDI is char16_t *s
|
||||
// @param EAX is unsigned length
|
||||
// @see libc/nexgen32e/strsak16.S
|
||||
tinystrlen16:
|
||||
.leafprologue
|
||||
.profilable
|
||||
|
|
|
@ -18,14 +18,14 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Compares strings w/ limit & no-clobber greg abi.
|
||||
/
|
||||
/ @param %rdi is first string
|
||||
/ @param %rsi is second string
|
||||
/ @param %rdx is max length
|
||||
/ @return <0, 0, or >0 depending on comparison
|
||||
/ @clob flags only
|
||||
/ @asyncsignalsafe
|
||||
// Compares strings w/ limit & no-clobber greg abi.
|
||||
//
|
||||
// @param %rdi is first string
|
||||
// @param %rsi is second string
|
||||
// @param %rdx is max length
|
||||
// @return <0, 0, or >0 depending on comparison
|
||||
// @clob flags only
|
||||
// @asyncsignalsafe
|
||||
tinystrncmp:
|
||||
.leafprologue
|
||||
push %rbx
|
||||
|
|
|
@ -18,12 +18,12 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ 8-bit strnlen that's tiny and near optimal if data's tiny.
|
||||
/
|
||||
/ @param RDI is char *s
|
||||
/ @param RSI is size_t n
|
||||
/ @param EAX is unsigned length
|
||||
/ @see libc/nexgen32e/strsak.S
|
||||
// 8-bit strnlen that's tiny and near optimal if data's tiny.
|
||||
//
|
||||
// @param RDI is char *s
|
||||
// @param RSI is size_t n
|
||||
// @param EAX is unsigned length
|
||||
// @see libc/nexgen32e/strsak.S
|
||||
tinystrnlen:
|
||||
.leafprologue
|
||||
.profilable
|
||||
|
|
|
@ -18,12 +18,12 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ 16-bit strnlen that's tiny and near optimal if data's tiny.
|
||||
/
|
||||
/ @param RDI is char16_t *s
|
||||
/ @param RSI is size_t n
|
||||
/ @param EAX is unsigned length
|
||||
/ @see libc/nexgen32e/strsak16.S
|
||||
// 16-bit strnlen that's tiny and near optimal if data's tiny.
|
||||
//
|
||||
// @param RDI is char16_t *s
|
||||
// @param RSI is size_t n
|
||||
// @param EAX is unsigned length
|
||||
// @see libc/nexgen32e/strsak16.S
|
||||
tinystrnlen16:
|
||||
.leafprologue
|
||||
.profilable
|
||||
|
|
|
@ -18,11 +18,11 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ 32-bit strlen that's tiny and near optimal if data's tiny.
|
||||
/
|
||||
/ @param RDI is wchar_t *s
|
||||
/ @param EAX is unsigned length
|
||||
/ @see libc/nexgen32e/strsak32.S
|
||||
// 32-bit strlen that's tiny and near optimal if data's tiny.
|
||||
//
|
||||
// @param RDI is wchar_t *s
|
||||
// @param EAX is unsigned length
|
||||
// @see libc/nexgen32e/strsak32.S
|
||||
tinywcslen:
|
||||
.leafprologue
|
||||
.profilable
|
||||
|
|
|
@ -18,12 +18,12 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ 32-bit strnlen that's tiny and near optimal if data's tiny.
|
||||
/
|
||||
/ @param RDI is wchar_t *s
|
||||
/ @param RSI is size_t n
|
||||
/ @param EAX is unsigned length
|
||||
/ @see libc/nexgen32e/strsak32.S
|
||||
// 32-bit strnlen that's tiny and near optimal if data's tiny.
|
||||
//
|
||||
// @param RDI is wchar_t *s
|
||||
// @param RSI is size_t n
|
||||
// @param EAX is unsigned length
|
||||
// @see libc/nexgen32e/strsak32.S
|
||||
tinywcsnlen:
|
||||
.leafprologue
|
||||
.profilable
|
||||
|
|
|
@ -22,10 +22,10 @@
|
|||
.source __FILE__
|
||||
.code16 # ∩ .code32 ∩ .code64
|
||||
|
||||
/ Hoses interrupt descriptor table and triple-faults the system.
|
||||
/
|
||||
/ @see youtu.be/GIKfEAF2Yhw?t=67
|
||||
/ @mode long,legacy,real
|
||||
// Hoses interrupt descriptor table and triple-faults the system.
|
||||
//
|
||||
// @see youtu.be/GIKfEAF2Yhw?t=67
|
||||
// @mode long,legacy,real
|
||||
triplf: push %bp
|
||||
mov %sp,%bp
|
||||
sub $8,%sp
|
||||
|
|
|
@ -19,21 +19,21 @@
|
|||
#include "libc/macros.h"
|
||||
.source __FILE__
|
||||
|
||||
/ Finds lowest set bit in 𝑥.
|
||||
/
|
||||
/ uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥)
|
||||
/ 0x00000000 wut 32 0 wut 32
|
||||
/ 0x00000001 0 0 1 0 31
|
||||
/ 0x80000001 0 0 1 31 0
|
||||
/ 0x80000000 31 31 32 31 0
|
||||
/ 0x00000010 4 4 5 4 27
|
||||
/ 0x08000010 4 4 5 27 4
|
||||
/ 0x08000000 27 27 28 27 4
|
||||
/ 0xffffffff 0 0 1 31 0
|
||||
/
|
||||
/ @param edi is 32-bit unsigned 𝑥 value
|
||||
/ @return eax number in range [0,32) or 32 if 𝑥 is 0
|
||||
/ @see also treasure trove of nearly identical functions
|
||||
// Finds lowest set bit in 𝑥.
|
||||
//
|
||||
// uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥)
|
||||
// 0x00000000 wut 32 0 wut 32
|
||||
// 0x00000001 0 0 1 0 31
|
||||
// 0x80000001 0 0 1 31 0
|
||||
// 0x80000000 31 31 32 31 0
|
||||
// 0x00000010 4 4 5 4 27
|
||||
// 0x08000010 4 4 5 27 4
|
||||
// 0x08000000 27 27 28 27 4
|
||||
// 0xffffffff 0 0 1 31 0
|
||||
//
|
||||
// @param edi is 32-bit unsigned 𝑥 value
|
||||
// @return eax number in range [0,32) or 32 if 𝑥 is 0
|
||||
// @see also treasure trove of nearly identical functions
|
||||
tzcnt: .leafprologue
|
||||
.profilable
|
||||
mov $32,%esi
|
||||
|
|
|
@ -18,21 +18,21 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Finds lowest set bit in 𝑥.
|
||||
/
|
||||
/ uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥)
|
||||
/ 0x00000000 wut 32 0 wut 32
|
||||
/ 0x00000001 0 0 1 0 31
|
||||
/ 0x80000001 0 0 1 31 0
|
||||
/ 0x80000000 31 31 32 31 0
|
||||
/ 0x00000010 4 4 5 4 27
|
||||
/ 0x08000010 4 4 5 27 4
|
||||
/ 0x08000000 27 27 28 27 4
|
||||
/ 0xffffffff 0 0 1 31 0
|
||||
/
|
||||
/ @param rdi is 64-bit unsigned 𝑥 value
|
||||
/ @return rax number in range [0,64) or 64 if 𝑥 is 0
|
||||
/ @see also treasure trove of nearly identical functions
|
||||
// Finds lowest set bit in 𝑥.
|
||||
//
|
||||
// uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥)
|
||||
// 0x00000000 wut 32 0 wut 32
|
||||
// 0x00000001 0 0 1 0 31
|
||||
// 0x80000001 0 0 1 31 0
|
||||
// 0x80000000 31 31 32 31 0
|
||||
// 0x00000010 4 4 5 4 27
|
||||
// 0x08000010 4 4 5 27 4
|
||||
// 0x08000000 27 27 28 27 4
|
||||
// 0xffffffff 0 0 1 31 0
|
||||
//
|
||||
// @param rdi is 64-bit unsigned 𝑥 value
|
||||
// @return rax number in range [0,64) or 64 if 𝑥 is 0
|
||||
// @see also treasure trove of nearly identical functions
|
||||
tzcntl: .leafprologue
|
||||
.profilable
|
||||
mov $64,%esi
|
||||
|
|
|
@ -23,11 +23,11 @@
|
|||
.code16 # ∩ .code32 ∩ .code64
|
||||
.source __FILE__
|
||||
|
||||
/ Clears display page.
|
||||
/
|
||||
/ @param es:di arbitrary address within video page
|
||||
/ @return es:ax starting address
|
||||
/ @mode long,legacy,real
|
||||
// Clears display page.
|
||||
//
|
||||
// @param es:di arbitrary address within video page
|
||||
// @return es:ax starting address
|
||||
// @mode long,legacy,real
|
||||
vcls: xchg %di,%ax
|
||||
bband VIDYA_REWIND,%ah,%al
|
||||
xchg %di,%ax
|
||||
|
|
|
@ -17,12 +17,12 @@
|
|||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
|
||||
/ Reconfigures transcendental math coprocessor.
|
||||
/
|
||||
/ @param \conf can be absent to restore default
|
||||
/ @clob x87 status and control words only
|
||||
/ @see Intel Manual V.1 §8.1.5
|
||||
/ @mode long,legacy
|
||||
// Reconfigures transcendental math coprocessor.
|
||||
//
|
||||
// @param \conf can be absent to restore default
|
||||
// @clob x87 status and control words only
|
||||
// @see Intel Manual V.1 §8.1.5
|
||||
// @mode long,legacy
|
||||
.macro x87conf conf=$0x33f
|
||||
push %ax
|
||||
pushw \conf
|
||||
|
|
|
@ -20,8 +20,8 @@
|
|||
#include "libc/notice.inc"
|
||||
#include "ape/config.h"
|
||||
|
||||
/ eXtreme Low Memory.
|
||||
/ @see ape/config.h
|
||||
// eXtreme Low Memory.
|
||||
// @see ape/config.h
|
||||
.section .xlm,"aw",@nobits
|
||||
.align 4096
|
||||
__xlm: .rept XLM_SIZE
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
#include "ape/relocations.h"
|
||||
#include "libc/zip.h"
|
||||
|
||||
/ ZIP Central Directory.
|
||||
// ZIP Central Directory.
|
||||
.section .piro.data.sort.zip.3,"a",@progbits
|
||||
.hidden __zip_start
|
||||
.globl __zip_start
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue