Get codebase completely working with LLVM

You can now build Cosmopolitan with Clang:

    make -j8 MODE=llvm
    o/llvm/examples/hello.com

The assembler and linker code is now friendly to LLVM too.
So it's not needed to configure Clang to use binutils under
the hood. If you love LLVM then you can now use pure LLVM.
This commit is contained in:
Justine Tunney 2021-02-08 09:19:00 -08:00
parent 0e36cb3ac4
commit e75ffde09e
4528 changed files with 7776 additions and 11640 deletions

View file

@ -18,21 +18,21 @@
*/
#include "libc/macros.h"
/ Returns binary logarithm of integer 𝑥.
/
/ uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥)
/ 0x00000000 wut 32 0 wut 32
/ 0x00000001 0 0 1 0 31
/ 0x80000001 0 0 1 31 0
/ 0x80000000 31 31 32 31 0
/ 0x00000010 4 4 5 4 27
/ 0x08000010 4 4 5 27 4
/ 0x08000000 27 27 28 27 4
/ 0xffffffff 0 0 1 31 0
/
/ @param rsi:rdi is 128-bit unsigned 𝑥 value
/ @return eax number in range [0,128) or undef if 𝑥 is 0
/ @see also treasure trove of nearly identical functions
// Returns binary logarithm of integer 𝑥.
//
// uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥)
// 0x00000000 wut 32 0 wut 32
// 0x00000001 0 0 1 0 31
// 0x80000001 0 0 1 31 0
// 0x80000000 31 31 32 31 0
// 0x00000010 4 4 5 4 27
// 0x08000010 4 4 5 27 4
// 0x08000000 27 27 28 27 4
// 0xffffffff 0 0 1 31 0
//
// @param rsi:rdi is 128-bit unsigned 𝑥 value
// @return eax number in range [0,128) or undef if 𝑥 is 0
// @see also treasure trove of nearly identical functions
bsrmax: .leafprologue
.profilable
bsr %rsi,%rax

View file

@ -18,15 +18,15 @@
*/
#include "libc/macros.h"
/ Sets memory to zero.
/
/ C code should always favor memset(), since that's the one we've
/ prototyped with the best optimizations. This definition is used
/ by old code and sometimes code generators, as a thunk.
/
/ @param rdi is dest
/ @param rsi is the number of bytes to set
/ @see memset(), explicit_bzero()
// Sets memory to zero.
//
// C code should always favor memset(), since that's the one we've
// prototyped with the best optimizations. This definition is used
// by old code and sometimes code generators, as a thunk.
//
// @param rdi is dest
// @param rsi is the number of bytes to set
// @see memset(), explicit_bzero()
bzero: mov %rsi,%rdx
xor %esi,%esi
jmp MemSet

View file

@ -19,19 +19,19 @@
#include "ape/relocations.h"
#include "libc/macros.h"
/ Escapes byte for string literal.
/
/ This turns stuff like (char)0xFF into \0377. The returned
/ string is word-encoded, e.g. '\\'|'0'<<010|'3'<<020|etc.
/
/ @param dil contains byte to escape
/ @see libc/nexgen32e/cescapec.c
// Escapes byte for string literal.
//
// This turns stuff like (char)0xFF into \0377. The returned
// string is word-encoded, e.g. '\\'|'0'<<010|'3'<<020|etc.
//
// @param dil contains byte to escape
// @see libc/nexgen32e/cescapec.c
cescapec:
movzbl %dil,%edi
lea -7(%rdi),%ecx
cmp $85,%cl
ja 1f
mov $'\\,%eax
mov $'\\',%eax
movzbl %cl,%ecx
jmp *cescapectab(,%rcx,8)
.Lanchorpoint:
@ -74,7 +74,7 @@ cescapec:
shl $24,%edi
or %ecx,%edi
lea (%rdi,%rax,4),%eax
add $'0<<030|'0<<020|'0<<010|'\\,%eax
add $'0'<<030|'0'<<020|'0'<<010|'\\',%eax
ret
.endfn cescapec,globl
@ -87,13 +87,13 @@ cescapectab.ro:
.byte 1,.LVT-.Lanchorpoint
.byte 1,.LFF-.Lanchorpoint
.byte 1,.LCR-.Lanchorpoint
.byte '\"-'\r-1,1b-.Lanchorpoint
.byte 20,1b-.Lanchorpoint
.byte 1,.LDQ-.Lanchorpoint
.byte '\'-'\"-1,1b-.Lanchorpoint
.byte '\''-'\"'-1,1b-.Lanchorpoint
.byte 1,.LSQ-.Lanchorpoint
.byte '?-'\'-1,1b-.Lanchorpoint
.byte '?'-'\''-1,1b-.Lanchorpoint
.byte 1,.LQM-.Lanchorpoint
.byte '\\-'?-1,1b-.Lanchorpoint
.byte '\\'-'?'-1,1b-.Lanchorpoint
.byte 1,.LBSL-.Lanchorpoint
.equ .Lcescapectab.ro.size,.-cescapectab.ro
.org 8 - .Lcescapectab.ro.size % 8 + .
@ -102,7 +102,7 @@ cescapectab.ro:
.initbss 300,_init_cescapec
cescapectab:
.rept '\\-7+1
.rept '\\'-7+1
.quad 0
.endr
.endobj cescapectab

View file

@ -18,11 +18,11 @@
*/
#include "libc/macros.h"
/ Compares 8-bit signed integers.
/
/ @param rdi points to left integer
/ @param rsi points to right integer
/ @return <0, 0, or >0 based on comparison
// Compares 8-bit signed integers.
//
// @param rdi points to left integer
// @param rsi points to right integer
// @return <0, 0, or >0 based on comparison
.align 16
cmpsb: .leafprologue
.profilable

View file

@ -18,19 +18,19 @@
*/
#include "libc/macros.h"
/ Compares 32-bit signed integers.
/
/ @param rdi points to left integer
/ @param rsi points to right integer
/ @return <0, 0, or >0 based on comparison
// Compares 32-bit signed integers.
//
// @param rdi points to left integer
// @param rsi points to right integer
// @return <0, 0, or >0 based on comparison
.align 16
cmpsl: .leafprologue
.profilable
xor %eax,%eax
cmpsl
/ mov (%rdi),%edi
/ mov (%rsi),%esi
/ cmp %edi,%esi
// mov (%rdi),%edi
// mov (%rsi),%esi
// cmp %edi,%esi
setl %al
cmovg .Lneg1(%rip),%eax
.leafepilogue

View file

@ -18,11 +18,11 @@
*/
#include "libc/macros.h"
/ Compares 64-bit signed integers.
/
/ @param rdi points to left integer
/ @param rsi points to right integer
/ @return <0, 0, or >0 based on comparison
// Compares 64-bit signed integers.
//
// @param rdi points to left integer
// @param rsi points to right integer
// @return <0, 0, or >0 based on comparison
.align 16
cmpsq: .leafprologue
.profilable

View file

@ -18,11 +18,11 @@
*/
#include "libc/macros.h"
/ Compares 16-bit signed integers.
/
/ @param rdi points to left integer
/ @param rsi points to right integer
/ @return <0, 0, or >0 based on comparison
// Compares 16-bit signed integers.
//
// @param rdi points to left integer
// @param rsi points to right integer
// @return <0, 0, or >0 based on comparison
.align 16
cmpsw: .leafprologue
.profilable

View file

@ -18,11 +18,11 @@
*/
#include "libc/macros.h"
/ Compares 8-bit unsigned integers.
/
/ @param rdi points to left integer
/ @param rsi points to right integer
/ @return <0, 0, or >0 based on comparison
// Compares 8-bit unsigned integers.
//
// @param rdi points to left integer
// @param rsi points to right integer
// @return <0, 0, or >0 based on comparison
.align 16
cmpub: .leafprologue
.profilable

View file

@ -18,11 +18,11 @@
*/
#include "libc/macros.h"
/ Compares 32-bit unsigned integers.
/
/ @param rdi points to left integer
/ @param rsi points to right integer
/ @return <0, 0, or >0 based on comparison
// Compares 32-bit unsigned integers.
//
// @param rdi points to left integer
// @param rsi points to right integer
// @return <0, 0, or >0 based on comparison
.align 16
cmpul: .leafprologue
.profilable

View file

@ -18,11 +18,11 @@
*/
#include "libc/macros.h"
/ Compares 64-bit unsigned integers.
/
/ @param rdi points to left integer
/ @param rsi points to right integer
/ @return <0, 0, or >0 based on comparison
// Compares 64-bit unsigned integers.
//
// @param rdi points to left integer
// @param rsi points to right integer
// @return <0, 0, or >0 based on comparison
.align 16
cmpuq: .leafprologue
.profilable

View file

@ -18,11 +18,11 @@
*/
#include "libc/macros.h"
/ Compares 16-bit unsigned integers.
/
/ @param rdi points to left integer
/ @param rsi points to right integer
/ @return <0, 0, or >0 based on comparison
// Compares 16-bit unsigned integers.
//
// @param rdi points to left integer
// @param rsi points to right integer
// @return <0, 0, or >0 based on comparison
.align 16
cmpuw: .leafprologue
.profilable

View file

@ -18,18 +18,18 @@
*/
#include "libc/macros.h"
/ Computes Phil Katz CRC-32 w/ carryless multiply isa.
/
/ This is support code that's abstracted by crc32_z().
/
/ @param edi is initial value
/ @param rsi points to buffer
/ @param rdx is bytes in buffer that's >=64 and %16==0
/ @return eax is crc32
/ @note needs Westmere (c.2010) or Bulldozer (c.2011)
/ @see “Fast CRC Computation for Generic Polynomials Using
/ PCLMULQDQ Instruction V. Gopal, E. Ozturk, et al.,
/ 2009, intel.ly/2ySEwL0
// Computes Phil Katz CRC-32 w/ carryless multiply isa.
//
// This is support code that's abstracted by crc32_z().
//
// @param edi is initial value
// @param rsi points to buffer
// @param rdx is bytes in buffer that's >=64 and %16==0
// @return eax is crc32
// @note needs Westmere (c.2010) or Bulldozer (c.2011)
// @see “Fast CRC Computation for Generic Polynomials Using
// PCLMULQDQ Instruction V. Gopal, E. Ozturk, et al.,
// 2009, intel.ly/2ySEwL0
crc32_pclmul:
.leafprologue
.profilable
@ -139,8 +139,8 @@ crc32_pclmul:
.endfn crc32_pclmul,globl,hidden
.source __FILE__
/ Definitions of the bit-reflected domain constants k1,k2,k3, etc.
/ and the CRC32+Barrett polynomials given at the end of the paper.
// Definitions of the bit-reflected domain constants k1,k2,k3, etc.
// and the CRC32+Barrett polynomials given at the end of the paper.
.rodata.cst16
.Lk1k2: .quad 0x0000000154442bd4
.quad 0x00000001c6e41596

View file

@ -18,22 +18,22 @@
*/
#include "libc/macros.h"
/ Generates lookup table for computing CRC-32 byte-by-byte.
/
/ void crc32init(uint32_t table[256], uint32_t polynomial) {
/ uint32_t d, i, r;
/ for (d = 0; d < 256; ++d) {
/ r = d;
/ for (i = 0; i < 8; ++i) {
/ r = r >> 1 ^ (r & 1 ? polynomial : 0);
/ }
/ table[d] = r;
/ }
/ }
/
/ @param rdi is pointer to uint32_t[256] array
/ @param esi 32-bit binary polynomial config
/ @note imposes ~300ns one-time cost
// Generates lookup table for computing CRC-32 byte-by-byte.
//
// void crc32init(uint32_t table[256], uint32_t polynomial) {
// uint32_t d, i, r;
// for (d = 0; d < 256; ++d) {
// r = d;
// for (i = 0; i < 8; ++i) {
// r = r >> 1 ^ (r & 1 ? polynomial : 0);
// }
// table[d] = r;
// }
// }
//
// @param rdi is pointer to uint32_t[256] array
// @param esi 32-bit binary polynomial config
// @note imposes ~300ns one-time cost
crc32init:
push %rbp
mov %rsp,%rbp

View file

@ -18,10 +18,10 @@
*/
#include "libc/macros.h"
/ Divides 64-bit signed integer by 1,000,000,000.
/
/ @param rdi is number to divide
/ @return quotient
// Divides 64-bit signed integer by 1,000,000,000.
//
// @param rdi is number to divide
// @return quotient
div1000000000int64:
mov $0x1a,%cl
movabs $0x112e0be826d694b3,%rdx

View file

@ -18,10 +18,10 @@
*/
#include "libc/macros.h"
/ Divides 64-bit signed integer by 1,000,000.
/
/ @param rdi is number to divide
/ @return quotient
// Divides 64-bit signed integer by 1,000,000.
//
// @param rdi is number to divide
// @return quotient
div1000000int64:
mov $0x12,%cl
movabs $0x431bde82d7b634db,%rdx

View file

@ -18,10 +18,10 @@
*/
#include "libc/macros.h"
/ Divides 64-bit signed integer by 10,000.
/
/ @param rdi is number to divide
/ @return truncated quotient
// Divides 64-bit signed integer by 10,000.
//
// @param rdi is number to divide
// @return truncated quotient
div10000int64:
mov $11,%cl
movabs $0x346dc5d63886594b,%rdx

View file

@ -18,10 +18,10 @@
*/
#include "libc/macros.h"
/ Divides 64-bit signed integer by 1,000.
/
/ @param rdi is number to divide
/ @return quotient
// Divides 64-bit signed integer by 1,000.
//
// @param rdi is number to divide
// @return quotient
div1000int64:
mov $0x7,%cl
movabs $0x20c49ba5e353f7cf,%rdx

View file

@ -18,10 +18,10 @@
*/
#include "libc/macros.h"
/ Divides 64-bit signed integer by 100.
/
/ @param rdi is number to divide
/ @return rax has quotient
// Divides 64-bit signed integer by 100.
//
// @param rdi is number to divide
// @return rax has quotient
div100int64:
mov %rdi,%rax
movabs $-6640827866535438581,%rdx

View file

@ -18,10 +18,10 @@
*/
#include "libc/macros.h"
/ Divides 64-bit signed integer by 10.
/
/ @param rdi is number to divide
/ @return quotient
// Divides 64-bit signed integer by 10.
//
// @param rdi is number to divide
// @return quotient
div10int64:
mov $2,%cl
movabs $0x6666666666666667,%rdx

View file

@ -1,12 +1,12 @@
#include "libc/macros.h"
.source __FILE__
/ D.J. Bernstein's outrageously fast integer sorting algorithm.
/
/ @param rdi is int32 array
/ @param rsi is number of elements in rdi
/ @note public domain
/ @see en.wikipedia.org/wiki/Sorting_network
// D.J. Bernstein's outrageously fast integer sorting algorithm.
//
// @param rdi is int32 array
// @param rsi is number of elements in rdi
// @note public domain
// @see en.wikipedia.org/wiki/Sorting_network
djbsort_avx2:
push %rbp
mov %rsp,%rbp
@ -1115,7 +1115,7 @@ int32_sort_2power:
vpxor 32(%rdi),%ymm0,%ymm2
vpxor (%rdi),%ymm0,%ymm0
vmovdqa .LC1(%rip),%ymm4
cmp $0,-116(%rbp)
cmpl $0,-116(%rbp)
vpunpckldq %ymm2,%ymm0,%ymm1
vpunpckhdq %ymm2,%ymm0,%ymm0
vpunpcklqdq %ymm0,%ymm1,%ymm3
@ -1179,7 +1179,7 @@ int32_sort_2power:
mov $16,%esi
mov %r13,%rdi
call int32_sort_2power
cmp $0,-116(%rbp)
cmpl $0,-116(%rbp)
vmovdqu (%r12),%ymm4
vmovdqu 32(%r12),%ymm1
vmovdqu 64(%r12),%ymm2
@ -1750,7 +1750,7 @@ int32_sort_2power:
vpunpckhdq 160(%rax),%ymm7,%ymm0
vpunpcklqdq %ymm2,%ymm12,%ymm8
vpunpcklqdq %ymm4,%ymm6,%ymm9
cmp $0,-116(%rbp)
cmpl $0,-116(%rbp)
vmovdqu 192(%rax),%ymm7
vpunpckhqdq %ymm2,%ymm12,%ymm12
vpunpckhqdq %ymm4,%ymm6,%ymm4
@ -1837,7 +1837,7 @@ int32_sort_2power:
vmovdqu %ymm2,-64(%rax)
vmovdqu %ymm0,-32(%rax)
jmp .L85
.L142: cmp $32,-112(%rbp)
.L142: cmpq $32,-112(%rbp)
jne .L94
.L93: mov -112(%rbp),%rcx
sar $2,%rcx
@ -1871,7 +1871,7 @@ int32_sort_2power:
cmp %rax,%r15
jg .L92
sarq $3,-112(%rbp)
.L89: cmp $127,-112(%rbp)
.L89: cmpq $127,-112(%rbp)
jle .L142
jmp .L93
.L92: cmp -136(%rbp),%rax
@ -1925,7 +1925,7 @@ int32_sort_2power:
add %rdx,-136(%rbp)
jmp .L90
.L145: sarq $2,-112(%rbp)
.L94: cmp $15,-112(%rbp)
.L94: cmpq $15,-112(%rbp)
jle .L144
mov -112(%rbp),%rcx
xor %esi,%esi
@ -1962,7 +1962,7 @@ int32_sort_2power:
.L146: add %rdx,%rsi
add %rdx,%rcx
jmp .L95
.L144: cmp $8,-112(%rbp)
.L144: cmpq $8,-112(%rbp)
je .L111
.L102: mov -152(%rbp),%rdx
add -128(%rbp),%rdx
@ -1997,7 +1997,7 @@ int32_sort_2power:
vpmaxsd %ymm3,%ymm5,%ymm3
vpminsd (%rdi),%ymm7,%ymm1
vpminsd %ymm2,%ymm4,%ymm10
cmp $0,-116(%rbp)
cmpl $0,-116(%rbp)
vpmaxsd (%rdi),%ymm7,%ymm0
vmovdqu (%rsi),%ymm7
vpmaxsd %ymm2,%ymm4,%ymm2

View file

@ -19,7 +19,7 @@
#include "libc/macros.h"
#include "libc/notice.inc"
/ Environment variable pointer list.
// Environment variable pointer list.
.bss
.align 8
environ:

View file

@ -19,16 +19,16 @@
#include "libc/macros.h"
.source __FILE__
/ Global variable for last error.
/
/ The system call wrappers update this with WIN32 error codes.
/ Unlike traditional libraries, Cosmopolitan error codes are
/ defined as variables. By convention, system calls and other
/ functions do not update this variable when nothing's broken.
/
/ @see libc/sysv/consts.sh
/ @see libc/sysv/errfuns.h
/ @see __errno_location() stable abi
// Global variable for last error.
//
// The system call wrappers update this with WIN32 error codes.
// Unlike traditional libraries, Cosmopolitan error codes are
// defined as variables. By convention, system calls and other
// functions do not update this variable when nothing's broken.
//
// @see libc/sysv/consts.sh
// @see libc/sysv/errfuns.h
// @see __errno_location() stable abi
.bss
.align 4
errno: .long 0

View file

@ -20,8 +20,8 @@
.source __FILE__
.keep.text # gdb needs it
/ Returns address of errno variable.
/ @note this isn't a universal definition
// Returns address of errno variable.
// @note this isn't a universal definition
__errno_location:
ezlea errno,ax
ret

View file

@ -18,14 +18,14 @@
*/
#include "libc/macros.h"
/ Sets memory to zero w/ accompanying non-optimizing macro.
/
/ This is intended for security-conscious applications. This
/ implementation also hoses every register the abi allows. A
/ concomitant prototype (str.h) countermands compiler magic.
/
/ @param rdi is dest
/ @param rsi is the number of bytes to set
// Sets memory to zero w/ accompanying non-optimizing macro.
//
// This is intended for security-conscious applications. This
// implementation also hoses every register the abi allows. A
// concomitant prototype (str.h) countermands compiler magic.
//
// @param rdi is dest
// @param rsi is the number of bytes to set
explicit_bzero:
.leafprologue
mov %rsi,%rcx

View file

@ -21,11 +21,11 @@
.source __FILE__
.code16 # .code32 .code64
/ Function entry hook stub.
/
/ @note cc -pg -mfentry adds this to the start of every function
/ @see libc/log/shadowargs.ncabi.c
/ @mode long,legacy,real
// Function entry hook stub.
//
// @note cc -pg -mfentry adds this to the start of every function
// @see libc/log/shadowargs.ncabi.c
// @mode long,legacy,real
__fentry__:
ret
.endfn __fentry__,weak

View file

@ -18,22 +18,22 @@
*/
#include "libc/macros.h"
/ Finds lowest set bit in word.
/
/ uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥)
/ 0x00000000 wut 32 0 wut 32
/ 0x00000001 0 0 1 0 31
/ 0x80000001 0 0 1 31 0
/ 0x80000000 31 31 32 31 0
/ 0x00000010 4 4 5 4 27
/ 0x08000010 4 4 5 27 4
/ 0x08000000 27 27 28 27 4
/ 0xffffffff 0 0 1 31 0
/
/ @param edi is the input number
/ @return number in range [1,32] or 0 if no bits set
/ @see also treasure trove of nearly identical functions
/ @asyncsignalsafe
// Finds lowest set bit in word.
//
// uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥)
// 0x00000000 wut 32 0 wut 32
// 0x00000001 0 0 1 0 31
// 0x80000001 0 0 1 31 0
// 0x80000000 31 31 32 31 0
// 0x00000010 4 4 5 4 27
// 0x08000010 4 4 5 27 4
// 0x08000000 27 27 28 27 4
// 0xffffffff 0 0 1 31 0
//
// @param edi is the input number
// @return number in range [1,32] or 0 if no bits set
// @see also treasure trove of nearly identical functions
// @asyncsignalsafe
ffs: .leafprologue
.profilable
or $-1,%edx

View file

@ -18,22 +18,22 @@
*/
#include "libc/macros.h"
/ Finds lowest set bit in word.
/
/ uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥)
/ 0x00000000 wut 32 0 wut 32
/ 0x00000001 0 0 1 0 31
/ 0x80000001 0 0 1 31 0
/ 0x80000000 31 31 32 31 0
/ 0x00000010 4 4 5 4 27
/ 0x08000010 4 4 5 27 4
/ 0x08000000 27 27 28 27 4
/ 0xffffffff 0 0 1 31 0
/
/ @param rdi is the input number
/ @return number in range [1,64] or 0 if no bits set
/ @see also treasure trove of nearly identical functions
/ @asyncsignalsafe
// Finds lowest set bit in word.
//
// uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥)
// 0x00000000 wut 32 0 wut 32
// 0x00000001 0 0 1 0 31
// 0x80000001 0 0 1 31 0
// 0x80000000 31 31 32 31 0
// 0x00000010 4 4 5 4 27
// 0x08000010 4 4 5 27 4
// 0x08000000 27 27 28 27 4
// 0xffffffff 0 0 1 31 0
//
// @param rdi is the input number
// @return number in range [1,64] or 0 if no bits set
// @see also treasure trove of nearly identical functions
// @asyncsignalsafe
ffsl: .leafprologue
.profilable
or $-1,%edx

View file

@ -21,17 +21,17 @@
#include "libc/notice.inc"
#define INITIAL_CAPACITY 4
/ Invokes deferred function calls.
/
/ This offers behavior similar to std::unique_ptr. Functions
/ overwrite their return addresses jumping here, and pushing
/ exactly one entry on the shadow stack below. Functions may
/ repeat that process multiple times, in which case the body
/ of this gadget loops and unwinds as a natural consequence.
/
/ @param rax,rdx,xmm0,xmm1,st0,st1 is return value
/ @see test/libc/runtime/gc_test.c
/ <LIMBO>
// Invokes deferred function calls.
//
// This offers behavior similar to std::unique_ptr. Functions
// overwrite their return addresses jumping here, and pushing
// exactly one entry on the shadow stack below. Functions may
// repeat that process multiple times, in which case the body
// of this gadget loops and unwinds as a natural consequence.
//
// @param rax,rdx,xmm0,xmm1,st0,st1 is return value
// @see test/libc/runtime/gc_test.c
// <LIMBO>
__gc: decq __garbage(%rip)
mov __garbage(%rip),%r8
mov __garbage+16(%rip),%r9
@ -41,7 +41,7 @@ __gc: decq __garbage(%rip)
mov 8(%r8),%r9
mov 16(%r8),%rdi
push 24(%r8)
/ </LIMBO>
// </LIMBO>
push %rbp
mov %rsp,%rbp
sub $0x20,%rsp

View file

@ -4,19 +4,22 @@
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
struct Garbages {
size_t i, n;
struct Garbage {
struct StackFrame *frame;
intptr_t fn;
intptr_t arg;
intptr_t ret;
} * p;
struct Garbage {
struct StackFrame *frame;
intptr_t fn;
intptr_t arg;
intptr_t ret;
};
hidden extern struct Garbages __garbage;
struct Garbages {
size_t i, n;
struct Garbage *p;
struct Garbage initmem[1];
};
int64_t __gc(void) hidden;
extern struct Garbages __garbage;
int64_t __gc(void);
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */

View file

@ -18,17 +18,17 @@
*/
#include "libc/macros.h"
/ Jumps up stack to previous setjmp() invocation.
/
/ This is the same as longjmp() but also unwinds the stack to free
/ memory, etc. that was registered using gc() or defer(). If GC
/ isn't linked, this behaves the same as longjmp().
/
/ @param rdi points to the jmp_buf which must be the same stack
/ @param esi is returned by setjmp() invocation (coerced nonzero)
/ @assume system five nexgen32e abi conformant
/ @see examples/ctrlc.c
/ @noreturn
// Jumps up stack to previous setjmp() invocation.
//
// This is the same as longjmp() but also unwinds the stack to free
// memory, etc. that was registered using gc() or defer(). If GC
// isn't linked, this behaves the same as longjmp().
//
// @param rdi points to the jmp_buf which must be the same stack
// @param esi is returned by setjmp() invocation (coerced nonzero)
// @assume system five nexgen32e abi conformant
// @see examples/ctrlc.c
// @noreturn
gclongjmp:
.leafprologue
.profilable

View file

@ -19,8 +19,8 @@
#include "libc/macros.h"
#include "libc/notice.inc"
/ The identity() function.
/ @return first argument
// The identity() function.
// @return first argument
identity:
mov %rdi,%rax
ret

View file

@ -20,11 +20,11 @@
.text.startup
.align 8
/ Identity maps 256-byte translation table.
/
/ @param char (*rdi)[256]
/ @speed 90mBps
/ @mode long
// Identity maps 256-byte translation table.
//
// @param char (*rdi)[256]
// @speed 90mBps
// @mode long
imapxlatab:
.leafprologue
.profilable

View file

@ -18,29 +18,29 @@
*/
#include "libc/macros.h"
/ Base 36 Decoder Table.
/
/ This supports uppercase and lowercase. For encoding, the string
/ 0123456789abcdefghijklmnopqrstuvwxyz can be used, which linkers
/ are able to deduplicate.
// Base 36 Decoder Table.
//
// This supports uppercase and lowercase. For encoding, the string
// 0123456789abcdefghijklmnopqrstuvwxyz can be used, which linkers
// are able to deduplicate.
.initbss 300,_init_kBase36
kBase36:.zero 256
.endobj kBase36,globl,hidden
.previous
.init.start 300,_init_kBase36
add $'0,%rdi
add $'0',%rdi
xor %eax,%eax
pushpop 10,%rcx
0: inc %eax
stosb
.loop 0b
add $'A-1-'9,%rdi
pushpop 'Z+1-'A,%rcx
add $'A'-1-'9',%rdi
pushpop 'Z'+1-'A',%rcx
0: inc %eax
mov %al,0x20(%rdi)
stosb
.loop 0b
add $255-'Z,%rdi
add $255-'Z',%rdi
.init.end 300,_init_kBase36
.source __FILE__

View file

@ -21,43 +21,43 @@
.align 16
.source __FILE__
/ ibm cp437 unicode table w/ string literal safety
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/ THERE WILL BE BLOCKS march 01 2017
/
/ @see libc/str/str.h
/ @see kCp437i[]
// ibm cp437 unicode table w/ string literal safety
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
//
// THERE WILL BE BLOCKS march 01 2017
//
// @see libc/str/str.h
// @see kCp437i[]
kCp437:
.short 0x00a0,0x263a,0x263b,0x2665,0x2666,0x2663,0x2660,0x2022 #00: 
.short 0x25d8,0x25cb,0x25d9,0x2642,0x2640,0x266a,0x266b,0x263c #08:

View file

@ -22,14 +22,14 @@
#include "libc/nexgen32e/kcpuids.h"
#include "libc/nexgen32e/x86feature.h"
/ Globally precomputed CPUID.
/
/ This module lets us check CPUID in 0.06ns rather than 51.00ns.
/ If every piece of native software linked this module, then the
/ world would be a much better place; since all the alternatives
/ are quite toilsome.
/
/ @see www.felixcloutier.com/x86/cpuid
// Globally precomputed CPUID.
//
// This module lets us check CPUID in 0.06ns rather than 51.00ns.
// If every piece of native software linked this module, then the
// world would be a much better place; since all the alternatives
// are quite toilsome.
//
// @see www.felixcloutier.com/x86/cpuid
.initbss 201,_init_kCpuids
kCpuids:.long 0,0,0,0 # EAX=0 (Basic Processor Info)
.long 0,0,0,0 # EAX=1 (Processor Info)

View file

@ -18,10 +18,10 @@
*/
#include "libc/macros.h"
/ Castagnoli CRC32 ISCSI Polynomial
/ x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1
/ 0b00011110110111000110111101000001
/ bitreverse32(0x1edc6f41)
// Castagnoli CRC32 ISCSI Polynomial
// x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1
// 0b00011110110111000110111101000001
// bitreverse32(0x1edc6f41)
#define ISCSI_POLYNOMIAL 0x82f63b78
.initbss 300,_init_kCrc32cTab

View file

@ -19,7 +19,7 @@
#include "libc/macros.h"
.initbss 202,_init_kHalfCache3
/ Half size of level 3 cache in bytes.
// Half size of level 3 cache in bytes.
kHalfCache3:
.quad 0
.endobj kHalfCache3,globl

View file

@ -18,7 +18,7 @@
*/
#include "libc/macros.h"
/ Signal mask constant w/ no signal bits set.
// Signal mask constant w/ no signal bits set.
.initbss 300,_init_kSigsetEmpty
kSigsetEmpty:
.rept NSIG / 64

View file

@ -18,7 +18,7 @@
*/
#include "libc/macros.h"
/ Signal mask constant w/ every signal bit set.
// Signal mask constant w/ every signal bit set.
.initbss 300,_init_kSigsetFull
kSigsetFull:
.rept NSIG / 64

View file

@ -18,11 +18,11 @@
*/
#include "libc/macros.h"
/ Stores CPU Timestamp Counter at startup.
/
/ It can be useful as an added source of seeding information.
/
/ @note rdtsc is a 25 cycle instruction
// Stores CPU Timestamp Counter at startup.
//
// It can be useful as an added source of seeding information.
//
// @note rdtsc is a 25 cycle instruction
.initbss 200,_init_kStartTsc
kStartTsc:
.quad 0

View file

@ -18,12 +18,12 @@
*/
#include "libc/macros.h"
/ ASCII uppercase lowercase translation tables.
/
/ char kToLower[256];
/ char16_t kToLower16[256];
/
/ @note kToLower16 saves 128kb; CMOVcc can't even 8-bit
// ASCII uppercase lowercase translation tables.
//
// char kToLower[256];
// char16_t kToLower16[256];
//
// @note kToLower16 saves 128kb; CMOVcc can't even 8-bit
.initbss 300,_init_kToLower
kToLower:
.rept 256
@ -43,8 +43,8 @@ kToLower16:
xchg %rsi,(%rsp)
xor %ecx,%ecx
0: inc %ecx
addb $0x20,'A-1(%rsi,%rcx)
cmp $'Z-'A,%ecx
addb $0x20,'A'-1(%rsi,%rcx)
cmp $'Z'-'A',%ecx
jne 0b
xor %eax,%eax
mov $256,%ecx

View file

@ -18,10 +18,10 @@
*/
#include "libc/macros.h"
/ Fast log when 𝑥 is an integer.
/
/ @param rdi is uint64 𝑥
/ @domain 0<𝑥<2⁶⁴ ∧ 𝑥∊ℤ
// Fast log when 𝑥 is an integer.
//
// @param rdi is uint64 𝑥
// @domain 0<𝑥<2⁶⁴ ∧ 𝑥∊ℤ
llog10: .leafprologue
.profilable
bsr %rdi,%rax

View file

@ -19,11 +19,12 @@
#include "libc/macros.h"
.privileged
/ Loads XMM registers from buffer.
/
/ @param %rdi points to &(forcealign(16) uint8_t[256])[128]
/ @note modern cpus have out-of-order execution engines
loadxmm:.leafprologue
// Loads XMM registers from buffer.
//
// @param %rdi points to &(forcealign(16) uint8_t[256])[128]
// @note modern cpus have out-of-order execution engines
loadxmm:
.leafprologue
movaps -0x80(%rdi),%xmm0
movaps -0x70(%rdi),%xmm1
movaps -0x60(%rdi),%xmm2

View file

@ -18,16 +18,16 @@
*/
#include "libc/macros.h"
/ @fileoverview Byte-order conversion functions.
/
/ Endianness is deceptively complicated to the uninitiated. Many
/ helpers have been written by our top minds to address perceived
/ difficulties. These ones got through standardization processes.
/ To protect their legacy, all 19 functions have been implemented
/ in just 17 bytes.
/
/ @see READ32LE(), READ32BE(), etc.
/ @asyncsignalsafe
// @fileoverview Byte-order conversion functions.
//
// Endianness is deceptively complicated to the uninitiated. Many
// helpers have been written by our top minds to address perceived
// difficulties. These ones got through standardization processes.
// To protect their legacy, all 19 functions have been implemented
// in just 17 bytes.
//
// @see READ32LE(), READ32BE(), etc.
// @asyncsignalsafe
bswap_64:
htobe64:

View file

@ -19,14 +19,14 @@
#include "libc/macros.h"
.privileged
/ Loads previously saved processor state.
/
/ @param rdi points to the jmp_buf
/ @param esi is returned by setjmp() invocation (coerced nonzero)
/ @noreturn
/ @assume system five nexgen32e abi conformant
/ @note code built w/ microsoft abi compiler can't call this
/ @see gclongjmp() unwinds gc() destructors
// Loads previously saved processor state.
//
// @param rdi points to the jmp_buf
// @param esi is returned by setjmp() invocation (coerced nonzero)
// @noreturn
// @assume system five nexgen32e abi conformant
// @note code built w/ microsoft abi compiler can't call this
// @see gclongjmp() unwinds gc() destructors
longjmp:mov %esi,%eax
test %eax,%eax
jnz 1f

View file

@ -18,21 +18,21 @@
*/
#include "libc/macros.h"
/ Finds leading bits in 𝑥.
/
/ uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥)
/ 0x00000000 wut 32 0 wut 32
/ 0x00000001 0 0 1 0 31
/ 0x80000001 0 0 1 31 0
/ 0x80000000 31 31 32 31 0
/ 0x00000010 4 4 5 4 27
/ 0x08000010 4 4 5 27 4
/ 0x08000000 27 27 28 27 4
/ 0xffffffff 0 0 1 31 0
/
/ @param edi is 32-bit unsigned 𝑥 value
/ @return eax number in range [0,32) or 32 if 𝑥 is 0
/ @see also treasure trove of nearly identical functions
// Finds leading bits in 𝑥.
//
// uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥)
// 0x00000000 wut 32 0 wut 32
// 0x00000001 0 0 1 0 31
// 0x80000001 0 0 1 31 0
// 0x80000000 31 31 32 31 0
// 0x00000010 4 4 5 4 27
// 0x08000010 4 4 5 27 4
// 0x08000000 27 27 28 27 4
// 0xffffffff 0 0 1 31 0
//
// @param edi is 32-bit unsigned 𝑥 value
// @return eax number in range [0,32) or 32 if 𝑥 is 0
// @see also treasure trove of nearly identical functions
lzcnt: .leafprologue
.profilable
mov $31,%eax

View file

@ -18,21 +18,21 @@
*/
#include "libc/macros.h"
/ Finds leading bits in 𝑥.
/
/ uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥)
/ 0x00000000 wut 32 0 wut 32
/ 0x00000001 0 0 1 0 31
/ 0x80000001 0 0 1 31 0
/ 0x80000000 31 31 32 31 0
/ 0x00000010 4 4 5 4 27
/ 0x08000010 4 4 5 27 4
/ 0x08000000 27 27 28 27 4
/ 0xffffffff 0 0 1 31 0
/
/ @param rdi is 64-bit unsigned 𝑥 value
/ @return rax number in range [0,64) or 64 if 𝑥 is 0
/ @see also treasure trove of nearly identical functions
// Finds leading bits in 𝑥.
//
// uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥)
// 0x00000000 wut 32 0 wut 32
// 0x00000001 0 0 1 0 31
// 0x80000001 0 0 1 31 0
// 0x80000000 31 31 32 31 0
// 0x00000010 4 4 5 4 27
// 0x08000010 4 4 5 27 4
// 0x08000000 27 27 28 27 4
// 0xffffffff 0 0 1 31 0
//
// @param rdi is 64-bit unsigned 𝑥 value
// @return rax number in range [0,64) or 64 if 𝑥 is 0
// @see also treasure trove of nearly identical functions
lzcntl: .leafprologue
.profilable
mov $31,%eax

View file

@ -19,46 +19,46 @@
#include "libc/macros.h"
#include "libc/nexgen32e/x86feature.h"
/ Broadcast byte literal to vector, e.g.
/
/ # xmm0=0x12121212121212121212121212121212
/ .bcblit $0x12,%al,%eax,%xmm0
/
/ @param reg and regSI need to be the same register
// Broadcast byte literal to vector, e.g.
//
// # xmm0=0x12121212121212121212121212121212
// .bcblit $0x12,%al,%eax,%xmm0
//
// @param reg and regSI need to be the same register
.macro .bcblit lit:req reg:req regSI:req xmm:req
mov \lit,\reg
movd \regSI,\xmm
pbroadcastb \xmm
.endm
/ Broadcast word literal to vector, e.g.
/
/ # xmm0=0x01230123012301230123012301230123
/ .bcwlit $0x123,%ax,%eax,%xmm0
/
/ @param reg and regSI need to be the same register
// Broadcast word literal to vector, e.g.
//
// # xmm0=0x01230123012301230123012301230123
// .bcwlit $0x123,%ax,%eax,%xmm0
//
// @param reg and regSI need to be the same register
.macro .bcwlit lit:req reg:req regSI:req xmm:req
mov \lit,\reg
movd \regSI,\xmm
pbroadcastw \xmm
.endm
/ Broadcast int16 from register to vector.
// Broadcast int16 from register to vector.
.macro .bcwreg regSI:req xmm:req
movd \regSI,\xmm
pbroadcastw \xmm
.endm
/ Sets all bytes in XMM register to first byte, e.g.
/
/ mov $0x11,%eax
/ movd %eax,%xmm0
/ pbroadcastb %xmm0
/
/ 11000000000000000000000000000000
/ 11111111111111111111111111111111
/
/ @param xmm can be %xmm0,%xmm1,etc.
// Sets all bytes in XMM register to first byte, e.g.
//
// mov $0x11,%eax
// movd %eax,%xmm0
// pbroadcastb %xmm0
//
// 11000000000000000000000000000000
// → 11111111111111111111111111111111
//
// @param xmm can be %xmm0,%xmm1,etc.
.macro pbroadcastb xmm:req
#if X86_NEED(AVX2)
vpbroadcastb \xmm,\xmm
@ -69,16 +69,16 @@
#endif
.endm
/ Sets all words in XMM register to first word, e.g.
/
/ mov $0x1234,%eax
/ movd %eax,%xmm0
/ pbroadcastw %xmm0
/
/ 12340000000000000000000000000000
/ 12341234123412341234123412341234
/
/ @param xmm can be %xmm0,%xmm1,etc.
// Sets all words in XMM register to first word, e.g.
//
// mov $0x1234,%eax
// movd %eax,%xmm0
// pbroadcastw %xmm0
//
// 12340000000000000000000000000000
// → 12341234123412341234123412341234
//
// @param xmm can be %xmm0,%xmm1,etc.
.macro pbroadcastw xmm:req
#if X86_NEED(AVX2)
vpbroadcastw \xmm,\xmm

View file

@ -21,7 +21,7 @@
.source __FILE__
.code16 # .code32 .code64
/ Function Profiling Hook.
/ cc -pg adds this to the start of global functions.
// Function Profiling Hook.
// cc -pg adds this to the start of global functions.
mcount: ret
.endfn mcount,weak

View file

@ -30,28 +30,28 @@
#include "libc/nexgen32e/x86feature.h"
#include "libc/macros.h"
/ Copies memory.
/
/ DEST and SRC must not overlap, unless DESTSRC.
/
/ @param rdi is dest
/ @param rsi is src
/ @param rdx is number of bytes
/ @return original rdi copied to rax
/ @mode long
/ @asyncsignalsafe
// Copies memory.
//
// DEST and SRC must not overlap, unless DESTSRC.
//
// @param rdi is dest
// @param rsi is src
// @param rdx is number of bytes
// @return original rdi copied to rax
// @mode long
// @asyncsignalsafe
memcpy: mov %rdi,%rax
/ 𝑠𝑙𝑖𝑑𝑒
// 𝑠𝑙𝑖𝑑𝑒
.align 16
.endfn memcpy,globl
/ Copies memory w/ minimal impact ABI.
/
/ @param rdi is dest
/ @param rsi is src
/ @param rdx is number of bytes
/ @clob flags,rcx,xmm3,xmm4
/ @mode long
// Copies memory w/ minimal impact ABI.
//
// @param rdi is dest
// @param rsi is src
// @param rdx is number of bytes
// @clob flags,rcx,xmm3,xmm4
// @mode long
MemCpy: .leafprologue
.profilable
mov $.Lmemcpytab.ro.size,%ecx

View file

@ -18,14 +18,14 @@
*/
#include "libc/macros.h"
/ Creates bit mask of which bytes are the same.
/
/ @param %rdi points to bit mask (write-only)
/ @param %rsi points to first buffer (read-only)
/ @param %rdx points to second buffer (read-only)
/ @param %rcx is byte length of both %rsi and %rdx
/ @return %rax is set to %rdi
/ @note buffers should be 128-byte aligned
// Creates bit mask of which bytes are the same.
//
// @param %rdi points to bit mask (write-only)
// @param %rsi points to first buffer (read-only)
// @param %rdx points to second buffer (read-only)
// @param %rcx is byte length of both %rsi and %rdx
// @return %rax is set to %rdi
// @note buffers should be 128-byte aligned
memeqmask:
.leafprologue
xor %eax,%eax

View file

@ -20,13 +20,13 @@
#include "libc/macros.h"
.text.startup
/ Initializes jump table for memset() and memcpy().
/
/ @param !ZF if required cpu vector extensions are available
/ @param rdi is address of 64-bit jump table
/ @param rsi is address of 8-bit jump initializers
/ @param rdx is address of indirect branch
/ @param ecx is size of jump table
// Initializes jump table for memset() and memcpy().
//
// @param !ZF if required cpu vector extensions are available
// @param rdi is address of 64-bit jump table
// @param rsi is address of 8-bit jump initializers
// @param rdx is address of indirect branch
// @param ecx is size of jump table
memjmpinit:
.leafprologue
setnz %r8b

View file

@ -18,19 +18,19 @@
*/
#include "libc/macros.h"
/ Copies memory.
/
/ DEST and SRC may overlap.
/
/ @param rdi is dest
/ @param rsi is src
/ @param rdx is number of bytes
/ @return original rdi copied to rax
/ @clob flags,rcx
/ @asyncsignalsafe
// Copies memory.
//
// DEST and SRC may overlap.
//
// @param rdi is dest
// @param rsi is src
// @param rdx is number of bytes
// @return original rdi copied to rax
// @clob flags,rcx
// @asyncsignalsafe
memmove:
mov %rdi,%rax
/ 𝑠𝑙𝑖𝑑𝑒
// 𝑠𝑙𝑖𝑑𝑒
.endfn MemMove,globl,hidden
MemMove:

View file

@ -19,14 +19,14 @@
#include "libc/macros.h"
.source __FILE__
/ Copies memory.
/
/ DEST and SRC must not overlap unless DEST SRC.
/
/ @param rdi is dest
/ @param rsi is src
/ @param rdx is number of bytes
/ @return original rdi + rdx copied to rax
// Copies memory.
//
// DEST and SRC must not overlap unless DEST SRC.
//
// @param rdi is dest
// @param rsi is src
// @param rdx is number of bytes
// @return original rdi + rdx copied to rax
mempcpy:
lea (%rdi,%rdx),%rax
jmp MemCpy

View file

@ -20,14 +20,14 @@
#include "libc/dce.h"
#include "libc/macros.h"
/ Searches for last instance of byte in memory region.
/
/ @param rdi points to data to search
/ @param esi is treated as unsigned char
/ @param rdx is byte length of rdi
/ @return rax is address of last %sil in %rdi, or NULL
/ @note AVX2 requires Haswell (2014+) or Excavator (2015+)
/ @asyncsignalsafe
// Searches for last instance of byte in memory region.
//
// @param rdi points to data to search
// @param esi is treated as unsigned char
// @param rdx is byte length of rdi
// @return rax is address of last %sil in %rdi, or NULL
// @note AVX2 requires Haswell (2014+) or Excavator (2015+)
// @asyncsignalsafe
memrchr:.leafprologue
.profilable
#if !IsTiny()

View file

@ -20,13 +20,13 @@
#include "libc/dce.h"
#include "libc/macros.h"
/ Searches for last instance of uint16_t in memory region.
/
/ @param rdi points to data to search
/ @param esi is treated as uint16_t
/ @param rdx is short count in rdi
/ @return rax is address of last %si in %rdi, or NULL
/ @note AVX2 requires Haswell (2014+) or Excavator (2015+)
// Searches for last instance of uint16_t in memory region.
//
// @param rdi points to data to search
// @param esi is treated as uint16_t
// @param rdx is short count in rdi
// @return rax is address of last %si in %rdi, or NULL
// @note AVX2 requires Haswell (2014+) or Excavator (2015+)
memrchr16:
.leafprologue
.profilable

View file

@ -20,13 +20,13 @@
#include "libc/dce.h"
#include "libc/macros.h"
/ Searches for last instance of wchar_t in memory region.
/
/ @param rdi points to data to search
/ @param esi is treated as int32_t (officially wchar_t)
/ @param rdx is short count in rdi
/ @return rax is address of last %esi in %rdi, or NULL
/ @note AVX2 requires Haswell (2014+) or Excavator (2015+)
// Searches for last instance of wchar_t in memory region.
//
// @param rdi points to data to search
// @param esi is treated as int32_t (officially wchar_t)
// @param rdx is short count in rdi
// @return rax is address of last %esi in %rdi, or NULL
// @note AVX2 requires Haswell (2014+) or Excavator (2015+)
wmemrchr:
.leafprologue
.profilable

View file

@ -24,26 +24,26 @@
#include "libc/nexgen32e/macros.h"
#include "libc/macros.h"
/ Sets memory.
/
/ @param rdi is dest
/ @param esi is the byte to set
/ @param edx is the number of bytes to set
/ @return original rdi copied to rax
/ @mode long
/ @asyncsignalsafe
// Sets memory.
//
// @param rdi is dest
// @param esi is the byte to set
// @param edx is the number of bytes to set
// @return original rdi copied to rax
// @mode long
// @asyncsignalsafe
memset: mov %rdi,%rax
/ 𝑠𝑙𝑖𝑑𝑒
// 𝑠𝑙𝑖𝑑𝑒
.align 16
.endfn memset,globl
/ Sets memory w/ minimal-impact ABI.
/
/ @param rdi is dest
/ @param esi is the byte to set
/ @param edx is the number of bytes to set
/ @clob flags,rcx,xmm3
/ @mode long
// Sets memory w/ minimal-impact ABI.
//
// @param rdi is dest
// @param esi is the byte to set
// @param edx is the number of bytes to set
// @clob flags,rcx,xmm3
// @mode long
MemSet: .leafprologue
.profilable
mov $.Lmemsettab.ro.size,%ecx

View file

@ -21,7 +21,7 @@
.source __FILE__
.code16 # .code32 .code64
/ Optional function stub.
// Optional function stub.
missingno:
xor %ax,%ax
ret

View file

@ -19,16 +19,16 @@
#include "libc/macros.h"
.text.windows
/ Translates function call from code built w/ MS-style compiler.
/
/ This wraps WinMain() and callback functions passed to Win32 API.
/ Please note an intermediary jump slot is needed to set %rax.
/
/ @param %rax is function address
/ @param %rcx,%rdx,%r8,%r9
/ @return %rax,%xmm0
/ @note slower than __sysv2nt
/ @see NT2SYSV() macro
// Translates function call from code built w/ MS-style compiler.
//
// This wraps WinMain() and callback functions passed to Win32 API.
// Please note an intermediary jump slot is needed to set %rax.
//
// @param %rax is function address
// @param %rcx,%rdx,%r8,%r9
// @return %rax,%xmm0
// @note slower than __sysv2nt
// @see NT2SYSV() macro
__nt2sysv:
push %rbp
mov %rsp,%rbp

View file

@ -17,38 +17,38 @@
PERFORMANCE OF THIS SOFTWARE.
╚─────────────────────────────────────────────────────────────────────────────*/
/ Documentation for Intel(R)'s “Most Complicated Instruction”(TM)
/
/ mnemonic op1 op2 op3 op4 modif f description, notes
/ ═══════════ ════ ════ ════ ═══ ════════ ═════════════════════════════
/ PCMPESTRM XMM0 Vdq Wdq ... o..szapc Explicit Length, Return Mask
/ PCMPESTRI rCX Vdq Wdq ... o..szapc Explicit Length, Return Index
/ PCMPISTRM XMM0 Vdq Wdq Ib o..szapc Implicit Length, Return Mask
/ PCMPISTRI rCX Vdq Wdq Ib o..szapc Implicit Length, Return Index
/
/ CF Reset if IntRes2 is equal to zero, set otherwise
/ ZF Set if any byte/word of xmm2/mem128 is null, reset otherwise
/ SF Set if any byte/word of xmm1 is null, reset otherwise
/ OF IntRes2[0]
/ AF Reset
/ PF Reset
/
/ PCMP{E,I}STR{I,M} Control Byte
/ @see Intel Manual V.2B §4.1.7
/
/ ┌─0:index of the LEAST significant, set, bit is used
/ regardless of corresponding input element validity
/ intres2 is returned in least significant bits of xmm0
/ ├─1:index of the MOST significant, set, bit is used
/ regardless of corresponding input element validity
/ each bit of intres2 is expanded to byte/word
/ │┌─0:negation of intres1 is for all 16 (8) bits
/ │├─1:negation of intres1 is masked by reg/mem validity
/ ││┌─intres1 is negated (1s complement)
/ │││┌─mode{equalany,ranges,equaleach,equalordered}
/ ││││ ┌─issigned
/ ││││ │┌─is16bit
/ u│││├┐││
// Documentation for Intel(R)'s “Most Complicated Instruction”(TM)
//
// mnemonic op1 op2 op3 op4 modif f description, notes
// ═══════════ ════ ════ ════ ═══ ════════ ═════════════════════════════
// PCMPESTRM XMM0 Vdq Wdq ... o..szapc Explicit Length, Return Mask
// PCMPESTRI rCX Vdq Wdq ... o..szapc Explicit Length, Return Index
// PCMPISTRM XMM0 Vdq Wdq Ib o..szapc Implicit Length, Return Mask
// PCMPISTRI rCX Vdq Wdq Ib o..szapc Implicit Length, Return Index
//
// CF ← Reset if IntRes2 is equal to zero, set otherwise
// ZF ← Set if any byte/word of xmm2/mem128 is null, reset otherwise
// SF ← Set if any byte/word of xmm1 is null, reset otherwise
// OF ← IntRes2[0]
// AF ← Reset
// PF ← Reset
//
// PCMP{E,I}STR{I,M} Control Byte
// @see Intel Manual V.2B §4.1.7
//
// ┌─0:index of the LEAST significant, set, bit is used
// │ regardless of corresponding input element validity
// │ intres2 is returned in least significant bits of xmm0
// ├─1:index of the MOST significant, set, bit is used
// │ regardless of corresponding input element validity
// │ each bit of intres2 is expanded to byte/word
// │┌─0:negation of intres1 is for all 16 (8) bits
// │├─1:negation of intres1 is masked by reg/mem validity
// ││┌─intres1 is negated (1s complement)
// │││┌─mode{equalany,ranges,equaleach,equalordered}
// ││││ ┌─issigned
// ││││ │┌─is16bit
// u│││├┐││
.Lequalordered = 0b00001100
.Lequalorder16 = 0b00001101
.Lequalranges8 = 0b00000100

View file

@ -18,7 +18,7 @@
*/
#include "libc/macros.h"
/ Supplies argv[0] the GNU way.
// Supplies argv[0] the GNU way.
.initbss 300,_init_program_invocation_name
program_invocation_name:
.quad 0

View file

@ -18,10 +18,10 @@
*/
#include "libc/macros.h"
/ Returns 𝑥 % 1,000,000,000.
/
/ @param rdi int64 𝑥
/ @return rax has remainder
// Returns 𝑥 % 1,000,000,000.
//
// @param rdi int64 𝑥
// @return rax has remainder
rem1000000000int64:
movabs $0x112e0be826d694b3,%rdx
mov %rdi,%rax

View file

@ -18,10 +18,10 @@
*/
#include "libc/macros.h"
/ Returns 𝑥 % 1,000,000.
/
/ @param rdi int64 𝑥
/ @return rax has remainder
// Returns 𝑥 % 1,000,000.
//
// @param rdi int64 𝑥
// @return rax has remainder
rem1000000int64:
movabs $0x431bde82d7b634db,%rdx
mov %rdi,%rax

View file

@ -18,10 +18,10 @@
*/
#include "libc/macros.h"
/ Returns 𝑥 % 10,000.
/
/ @param rdi int64 𝑥
/ @return rax has remainder
// Returns 𝑥 % 10,000.
//
// @param rdi int64 𝑥
// @return rax has remainder
rem10000int64:
mov %rdi,%rax
movabsq $0x346dc5d63886594b,%rdx

View file

@ -18,10 +18,10 @@
*/
#include "libc/macros.h"
/ Returns 𝑥 % 1,000.
/
/ @param rdi int64 𝑥
/ @return rax has remainder
// Returns 𝑥 % 1,000.
//
// @param rdi int64 𝑥
// @return rax has remainder
rem1000int64:
movabs $0x20c49ba5e353f7cf,%rdx
mov %rdi,%rax

View file

@ -18,10 +18,10 @@
*/
#include "libc/macros.h"
/ Returns 𝑥 % 100.
/
/ @param rdi int64 𝑥
/ @return rax has remainder
// Returns 𝑥 % 100.
//
// @param rdi int64 𝑥
// @return rax has remainder
rem100int64:
mov %rdi,%rax
movabsq $-6640827866535438581,%rdx

View file

@ -18,10 +18,10 @@
*/
#include "libc/macros.h"
/ Returns 𝑥 % 10.
/
/ @param rdi int64 𝑥
/ @return rax has remainder
// Returns 𝑥 % 10.
//
// @param rdi int64 𝑥
// @return rax has remainder
rem10int64:
movabs $0x6666666666666667,%rdx
mov %rdi,%rax

View file

@ -19,11 +19,11 @@
#include "libc/macros.h"
.text.startup
/ Seventeen byte decompressor.
/
/ @param di points to output buffer
/ @param si points to uint8_t {len₁,byte₁}, ..., {0,0}
/ @mode long,legacy,real
// Seventeen byte decompressor.
//
// @param di points to output buffer
// @param si points to uint8_t {len₁,byte₁}, ..., {0,0}
// @mode long,legacy,real
rldecode:
.leafprologue
.profilable

View file

@ -19,11 +19,12 @@
#include "libc/macros.h"
.privileged
/ Stores XMM registers to buffer.
/
/ @param %rdi points to &(forcealign(16) uint8_t[256])[128]
/ @note modern cpus have out-of-order execution engines
savexmm:.leafprologue
// Stores XMM registers to buffer.
//
// @param %rdi points to &(forcealign(16) uint8_t[256])[128]
// @note modern cpus have out-of-order execution engines
savexmm:
.leafprologue
movaps %xmm0,-0x80(%rdi)
movaps %xmm1,-0x70(%rdi)
movaps %xmm2,-0x60(%rdi)

View file

@ -18,14 +18,14 @@
*/
#include "libc/macros.h"
/ Saves caller CPU state to cacheline.
/
/ @param rdi points to jmp_buf
/ @return rax 0 when set and !0 when longjmp'd
/ @returnstwice
/ @assume system five nexgen32e abi conformant
/ @note code built w/ microsoft abi compiler can't call this
/ @see longjmp(), gclongjmp()
// Saves caller CPU state to cacheline.
//
// @param rdi points to jmp_buf
// @return rax 0 when set and !0 when longjmp'd
// @returnstwice
// @assume system five nexgen32e abi conformant
// @note code built w/ microsoft abi compiler can't call this
// @see longjmp(), gclongjmp()
setjmp: lea 8(%rsp),%rax
mov %rax,(%rdi)
mov %rbx,8(%rdi)

View file

@ -19,21 +19,21 @@
#include "libc/macros.h"
#include "libc/notice.inc"
/ Applies no-clobber guarantee to System Five function call.
/
/ - Reentrant
/ - Realigns stack
/ - Doesn't assume red zone
/ - Clobbers nothing (except %rax and flags)
/
/ This function may be called using an stdcall convention. It's
/ useful for files named FOO.hookabi.c and BAR.ncabi.c to make
/ calls into other parts of the system, that don't conform to the
/ same restricted ABI.
/
/ @param six args and fn addr pushed on stack in reverse order
/ @return %rax has function return value, and stack is cleaned up
/ @see libc/shadowargs.hook.c for intended use case
// Applies no-clobber guarantee to System Five function call.
//
// - Reentrant
// - Realigns stack
// - Doesn't assume red zone
// - Clobbers nothing (except %rax and flags)
//
// This function may be called using an stdcall convention. It's
// useful for files named FOO.hookabi.c and BAR.ncabi.c to make
// calls into other parts of the system, that don't conform to the
// same restricted ABI.
//
// @param six args and fn addr pushed on stack in reverse order
// @return %rax has function return value, and stack is cleaned up
// @see libc/shadowargs.hook.c for intended use case
slowcall:
#param %r9 # 0x40 arg6
#param %r8 # 0x38 arg5

View file

@ -18,7 +18,7 @@
*/
#include "libc/macros.h"
/ TODO(jart): pmovzxbw and vpunpcklbw
// TODO(jart): pmovzxbw and vpunpcklbw
strcpyzbw:
.leafprologue
.profilable

View file

@ -18,13 +18,13 @@
*/
#include "libc/macros.h"
/ Returns prefix length, consisting of chars not in reject.
/
/ @param rdi is string
/ @param rsi is reject nul-terminated character set
/ @return rax is index of first byte in charset
/ @see strspn(), strtok_r()
/ @asyncsignalsafe
// Returns prefix length, consisting of chars not in reject.
//
// @param rdi is string
// @param rsi is reject nul-terminated character set
// @return rax is index of first byte in charset
// @see strspn(), strtok_r()
// @asyncsignalsafe
strcspn:
push %rbp
mov %rsp,%rbp

View file

@ -18,13 +18,13 @@
*/
#include "libc/macros.h"
/ Returns length of NUL-terminated string.
/
/ @param rdi is non-null NUL-terminated string pointer
/ @return rax is number of bytes (excluding NUL)
/ @clob ax,dx,cx,xmm3,xmm4
/ @note h/t agner fog
/ @asyncsignalsafe
// Returns length of NUL-terminated string.
//
// @param rdi is non-null NUL-terminated string pointer
// @return rax is number of bytes (excluding NUL)
// @clob ax,dx,cx,xmm3,xmm4
// @note h/t agner fog
// @asyncsignalsafe
strlen: .leafprologue
.profilable
mov %rdi,%rax

View file

@ -21,14 +21,14 @@
#include "libc/macros.h"
.source __FILE__
/ Returns length of NUL-terminated string w/ security blankets.
/
/ This is like strnlen() except it'll return 0 if (1) RDI is NULL
/ or (2) a NUL-terminator wasn't found in the first RSI bytes.
/
/ @param rdi is a nullable NUL-terminated string pointer
/ @param rsi is the maximum number of bytes to consider
/ @return rax is the number of bytes, excluding the NUL
// Returns length of NUL-terminated string w/ security blankets.
//
// This is like strnlen() except it'll return 0 if (1) RDI is NULL
// or (2) a NUL-terminator wasn't found in the first RSI bytes.
//
// @param rdi is a nullable NUL-terminated string pointer
// @param rsi is the maximum number of bytes to consider
// @return rax is the number of bytes, excluding the NUL
strnlen_s:
.leafprologue
.profilable
@ -39,20 +39,20 @@ strnlen_s:
.leafepilogue
0: xor %edx,%edx
mov %rdi,%r8
/ 𝑠𝑙𝑖𝑑𝑒
// 𝑠𝑙𝑖𝑑𝑒
.endfn strnlen_s,globl
/ Swiss army knife of string character scanning.
/ Used to be fourteen fast functions in one.
/
/ @param rdi is non-null string memory
/ @param rsi is max number of bytes to consider
/ @param dl is search character #1
/ @param dh is search character #2
/ @param r8 is subtracted from result (for length vs. pointer)
/ @param r9 masks result if DH is found (for NUL vs. NULL)
/ @param r10 masks result on bytes exhausted (for length v. NULL)
/ @return rax end pointer after r8/r9/r10 modifications
// Swiss army knife of string character scanning.
// Used to be fourteen fast functions in one.
//
// @param rdi is non-null string memory
// @param rsi is max number of bytes to consider
// @param dl is search character #1
// @param dh is search character #2
// @param r8 is subtracted from result (for length vs. pointer)
// @param r9 masks result if DH is found (for NUL vs. NULL)
// @param r10 masks result on bytes exhausted (for length v. NULL)
// @return rax end pointer after r8/r9/r10 modifications
strsak: lea -1(%rdi),%rax
1: add $1,%rax
sub $1,%rsi

View file

@ -19,12 +19,12 @@
#include "libc/nexgen32e/x86feature.h"
#include "libc/macros.h"
/ Returns pointer to first instance of character.
/
/ @param rdi is a non-null NUL-terminated char16_t string pointer
/ @param esi is the search word
/ @return rax points to character, or to NUL word if not found
/ @note this won't return NULL if search character is NUL
// Returns pointer to first instance of character.
//
// @param rdi is a non-null NUL-terminated char16_t string pointer
// @param esi is the search word
// @return rax points to character, or to NUL word if not found
// @note this won't return NULL if search character is NUL
strchrnul16:
.leafprologue
.profilable
@ -32,13 +32,13 @@ strchrnul16:
jmp 0f
.endfn strchrnul16,globl
/ Returns pointer to first instance of character.
/
/ @param rdi is a non-null NUL-terminated char16_t string pointer
/ @param esi is the search word
/ @return rax points to first result, or NULL if not found
/ @note this won't return NULL if search character is NUL
/ @asyncsignalsafe
// Returns pointer to first instance of character.
//
// @param rdi is a non-null NUL-terminated char16_t string pointer
// @param esi is the search word
// @return rax points to first result, or NULL if not found
// @note this won't return NULL if search character is NUL
// @asyncsignalsafe
strchr16:
.leafprologue
.profilable
@ -50,23 +50,23 @@ strchr16:
jmp strsak16
.endfn strchr16,globl
/ Returns pointer to first instance of character in range.
/
/ @param rdi is a non-null pointer to memory
/ @param esi is the search word
/ @return rax points to word if found, or else undefined behavior
// Returns pointer to first instance of character in range.
//
// @param rdi is a non-null pointer to memory
// @param esi is the search word
// @return rax points to word if found, or else undefined behavior
rawmemchr16:
or $-1,%rdx
/ fallthrough
// fallthrough
.endfn rawmemchr16,globl
/ Returns pointer to first instance of character in range.
/
/ @param rdi is a non-null pointer to memory
/ @param esi is the search word
/ @param rdx is length of memory in shorts
/ @return rax points to word if found or NULL
/ @asyncsignalsafe
// Returns pointer to first instance of character in range.
//
// @param rdi is a non-null pointer to memory
// @param esi is the search word
// @param rdx is length of memory in shorts
// @return rax points to word if found or NULL
// @asyncsignalsafe
memchr16:
.leafprologue
.profilable
@ -77,14 +77,14 @@ memchr16:
jmp strsak16
.endfn memchr16,globl
/ Returns length of char16_t string w/ security blankets.
/
/ This is like strnlen() except it'll return 0 if (1) RDI is NULL
/ or (2) a NUL-terminator wasn't found in the first RSI shorts.
/
/ @param rdi is a nullable NUL-terminated char16_t string pointer
/ @param rsi is the maximum number of shorts to consider
/ @return rax is the number of shorts, excluding the NUL
// Returns length of char16_t string w/ security blankets.
//
// This is like strnlen() except it'll return 0 if (1) RDI is NULL
// or (2) a NUL-terminator wasn't found in the first RSI shorts.
//
// @param rdi is a nullable NUL-terminated char16_t string pointer
// @param rsi is the maximum number of shorts to consider
// @return rax is the number of shorts, excluding the NUL
strnlen16_s:
.leafprologue
.profilable
@ -95,22 +95,22 @@ strnlen16_s:
.leafepilogue
.endfn strnlen16_s,globl
/ Returns length of NUL-terminated char16_t string.
/
/ @param rdi is non-null NUL-terminated char16_t string pointer
/ @return rax is the number of shorts, excluding the NUL
/ @asyncsignalsafe
// Returns length of NUL-terminated char16_t string.
//
// @param rdi is non-null NUL-terminated char16_t string pointer
// @return rax is the number of shorts, excluding the NUL
// @asyncsignalsafe
strlen16:
or $-1,%rsi
/ fallthrough
// fallthrough
.endfn strlen16,globl
/ Returns length of NUL-terminated memory, with limit.
/
/ @param rdi is non-null memory
/ @param rsi is the maximum number of shorts to consider
/ @return rax is the number of shorts, excluding the NUL
/ @asyncsignalsafe
// Returns length of NUL-terminated memory, with limit.
//
// @param rdi is non-null memory
// @param rsi is the maximum number of shorts to consider
// @return rax is the number of shorts, excluding the NUL
// @asyncsignalsafe
strnlen16:
.leafprologue
.profilable
@ -118,20 +118,20 @@ strnlen16:
0: xor %edx,%edx
xor %r11d,%r11d
mov %rdi,%r8
/ fallthrough
// fallthrough
.endfn strnlen16,globl
/ Swiss Army Knife of string char16_t scanning.
/ Sixteen fast functions in one.
/
/ @param rdi is non-null string memory
/ @param rsi is max number of shorts to consider
/ @param dx is search character #1
/ @param r11w is search character #2
/ @param r8 is subtracted from result (for length vs. pointer)
/ @param r9 masks result if DH is found (for NUL vs. NULL)
/ @param r10 masks result on shorts exhausted (for length v. NULL)
/ @return rax end pointer after r8/r9/r10 modifications
// Swiss Army Knife of string char16_t scanning.
// Sixteen fast functions in one.
//
// @param rdi is non-null string memory
// @param rsi is max number of shorts to consider
// @param dx is search character #1
// @param r11w is search character #2
// @param r8 is subtracted from result (for length vs. pointer)
// @param r9 masks result if DH is found (for NUL vs. NULL)
// @param r10 masks result on shorts exhausted (for length v. NULL)
// @return rax end pointer after r8/r9/r10 modifications
strsak16:
lea -2(%rdi),%rax
1: add $2,%rax

View file

@ -19,25 +19,25 @@
#include "libc/nexgen32e/x86feature.h"
#include "libc/macros.h"
/ Returns pointer to first instance of character.
/
/ @param rdi is a non-null NUL-terminated wchar_t string pointer
/ @param esi is the search word
/ @return rax points to character, or to NUL word if not found
/ @note this won't return NULL if search character is NUL
// Returns pointer to first instance of character.
//
// @param rdi is a non-null NUL-terminated wchar_t string pointer
// @param esi is the search word
// @return rax points to character, or to NUL word if not found
// @note this won't return NULL if search character is NUL
wcschrnul:
.leafprologue
.profilable
or $-1,%r9
jmp 0f
/ Returns pointer to first instance of character.
/
/ @param rdi is a non-null NUL-terminated wchar_t string pointer
/ @param esi is the search word
/ @return rax points to first result, or NULL if not found
/ @note this won't return NULL if search character is NUL
/ @asyncsignalsafe
// Returns pointer to first instance of character.
//
// @param rdi is a non-null NUL-terminated wchar_t string pointer
// @param esi is the search word
// @return rax points to first result, or NULL if not found
// @note this won't return NULL if search character is NUL
// @asyncsignalsafe
wcschr: .leafprologue
.profilable
xor %r9,%r9
@ -47,14 +47,14 @@ wcschr: .leafprologue
xor %r8,%r8
jmp wcssak
/ Returns length of wchar_t string w/ security blankets.
/
/ This is like wcsnlen() except it'll return 0 if (1) RDI is NULL
/ or (2) a NUL-terminator wasn't found in the first RSI chars.
/
/ @param rdi is a nullable NUL-terminated wchar_t string pointer
/ @param rsi is the maximum number of chars to consider
/ @return rax is the number of chars, excluding the NUL
// Returns length of wchar_t string w/ security blankets.
//
// This is like wcsnlen() except it'll return 0 if (1) RDI is NULL
// or (2) a NUL-terminator wasn't found in the first RSI chars.
//
// @param rdi is a nullable NUL-terminated wchar_t string pointer
// @param rsi is the maximum number of chars to consider
// @return rax is the number of chars, excluding the NUL
wcsnlen_s:
.leafprologue
.profilable
@ -65,20 +65,20 @@ wcsnlen_s:
.leafepilogue
.endfn wcsnlen_s,globl
/ Returns length of NUL-terminated wchar_t string.
/
/ @param rdi is non-null NUL-terminated wchar_t string pointer
/ @return rax is the number of chars, excluding the NUL
/ @asyncsignalsafe
// Returns length of NUL-terminated wchar_t string.
//
// @param rdi is non-null NUL-terminated wchar_t string pointer
// @return rax is the number of chars, excluding the NUL
// @asyncsignalsafe
wcslen: or $-1,%rsi
/ fallthrough
// fallthrough
/ Returns length of NUL-terminated memory, with limit.
/
/ @param rdi is non-null memory
/ @param rsi is the maximum number of chars to consider
/ @return rax is the number of chars, excluding the NUL
/ @asyncsignalsafe
// Returns length of NUL-terminated memory, with limit.
//
// @param rdi is non-null memory
// @param rsi is the maximum number of chars to consider
// @return rax is the number of chars, excluding the NUL
// @asyncsignalsafe
wcsnlen:.leafprologue
.profilable
or $-1,%r10
@ -87,22 +87,22 @@ wcsnlen:.leafprologue
mov %rdi,%r8
jmp wcssak
/ Returns pointer to first instance of character in range.
/
/ @param rdi is a non-null pointer to memory
/ @param esi is the search word
/ @return rax points to word if found, or else undefined behavior
// Returns pointer to first instance of character in range.
//
// @param rdi is a non-null pointer to memory
// @param esi is the search word
// @return rax points to word if found, or else undefined behavior
rawwmemchr:
or $-1,%rdx
/ fallthrough
// fallthrough
/ Returns pointer to first instance of character in range.
/
/ @param rdi is a non-null pointer to memory
/ @param esi is the int32_t search word (officially wchar_t)
/ @param rdx is length of memory in chars
/ @return rax points to word if found or NULL
/ @asyncsignalsafe
// Returns pointer to first instance of character in range.
//
// @param rdi is a non-null pointer to memory
// @param esi is the int32_t search word (officially wchar_t)
// @param rdx is length of memory in chars
// @return rax points to word if found or NULL
// @asyncsignalsafe
wmemchr:.leafprologue
.profilable
xor %r8,%r8
@ -111,19 +111,19 @@ wmemchr:.leafprologue
mov %esi,%edx
mov %esi,%r11d
mov %rcx,%rsi
/ fallthrough
// fallthrough
/ Swiss army knife of wchar_t string scanning.
/ Seven fast functions in one.
/
/ @param rdi is non-null wchar_t string memory
/ @param rsi is max number of chars to consider
/ @param edx is search character #1
/ @param r11d is search character #2
/ @param r8 is subtracted from result (for length vs. pointer)
/ @param r9 masks result if r11w is found (for NUL vs. NULL)
/ @param r10 masks result on chars exhausted (for length v. NULL)
/ @return rax end pointer after r8/r9/r10 modifications
// Swiss army knife of wchar_t string scanning.
// Seven fast functions in one.
//
// @param rdi is non-null wchar_t string memory
// @param rsi is max number of chars to consider
// @param edx is search character #1
// @param r11d is search character #2
// @param r8 is subtracted from result (for length vs. pointer)
// @param r9 masks result if r11w is found (for NUL vs. NULL)
// @param r10 masks result on chars exhausted (for length v. NULL)
// @return rax end pointer after r8/r9/r10 modifications
wcssak: lea -4(%rdi),%rax
.align 16
1: add $4,%rax

View file

@ -20,7 +20,7 @@
#include "libc/nexgen32e/pcmpstr.inc"
#include "libc/nexgen32e/strstr.inc"
/ TODO(jart): Fix me.
// TODO(jart): Fix me.
strstr_sse42:
.leafprologue
mov %rdi,%rax

View file

@ -18,12 +18,12 @@
╚─────────────────────────────────────────────────────────────────────────────*/
/* clang-format off */
/ Searches for substring.
/
/ @param rdi is NUL-terminated haystack string
/ @param rsi is NUL-terminated needle string (16-byte aligned)
/ @return rax is pointer to substring or NULL
/ @todo 10x faster than naïve but could be 100x faster
// Searches for substring.
//
// @param rdi is NUL-terminated haystack string
// @param rsi is NUL-terminated needle string (16-byte aligned)
// @return rax is pointer to substring or NULL
// @todo 10x faster than naïve but could be 100x faster
.macro .strstr mode:req
push %rbp
mov %rsp,%rbp

View file

@ -20,7 +20,7 @@
#include "libc/nexgen32e/pcmpstr.inc"
#include "libc/nexgen32e/strstr.inc"
/ TODO(jart): Fix me.
// TODO(jart): Fix me.
strstr16$sse42:
.strstr .Lequalorder16
.endfn strstr16$sse42,globl,hidden

View file

@ -18,17 +18,17 @@
*/
#include "libc/macros.h"
/ Support code for fast integer division by Si units.
/
/ Division by magnums is described in Hacker's Delight and is
/ usually generated automatically by compilers, but sadly not
/ when we optimize for size and idiv goes at least 10x slower
/ so we do this which saves space while avoiding build tuning
/
/ @param rdi is number to divide
/ @param cl is magnum #1
/ @param rdx is magnum #2
/ @return quotient
// Support code for fast integer division by Si units.
//
// Division by magnums is described in Hacker's Delight and is
// usually generated automatically by compilers, but sadly not
// when we optimize for size and idiv goes at least 10x slower
// so we do this which saves space while avoiding build tuning
//
// @param rdi is number to divide
// @param cl is magnum #1
// @param rdx is magnum #2
// @return quotient
tinydivsi:
.leafprologue
mov %rdi,%rax

View file

@ -18,11 +18,11 @@
*/
#include "libc/macros.h"
/ 8-bit strlen that's tiny and near optimal if data's tiny.
/
/ @param RDI is char *s
/ @param EAX is unsigned length
/ @see libc/nexgen32e/strsak.S
// 8-bit strlen that's tiny and near optimal if data's tiny.
//
// @param RDI is char *s
// @param EAX is unsigned length
// @see libc/nexgen32e/strsak.S
tinystrlen:
.leafprologue
.profilable

View file

@ -18,11 +18,11 @@
*/
#include "libc/macros.h"
/ 16-bit strlen that's tiny and near optimal if data's tiny.
/
/ @param RDI is char16_t *s
/ @param EAX is unsigned length
/ @see libc/nexgen32e/strsak16.S
// 16-bit strlen that's tiny and near optimal if data's tiny.
//
// @param RDI is char16_t *s
// @param EAX is unsigned length
// @see libc/nexgen32e/strsak16.S
tinystrlen16:
.leafprologue
.profilable

View file

@ -18,14 +18,14 @@
*/
#include "libc/macros.h"
/ Compares strings w/ limit & no-clobber greg abi.
/
/ @param %rdi is first string
/ @param %rsi is second string
/ @param %rdx is max length
/ @return <0, 0, or >0 depending on comparison
/ @clob flags only
/ @asyncsignalsafe
// Compares strings w/ limit & no-clobber greg abi.
//
// @param %rdi is first string
// @param %rsi is second string
// @param %rdx is max length
// @return <0, 0, or >0 depending on comparison
// @clob flags only
// @asyncsignalsafe
tinystrncmp:
.leafprologue
push %rbx

View file

@ -18,12 +18,12 @@
*/
#include "libc/macros.h"
/ 8-bit strnlen that's tiny and near optimal if data's tiny.
/
/ @param RDI is char *s
/ @param RSI is size_t n
/ @param EAX is unsigned length
/ @see libc/nexgen32e/strsak.S
// 8-bit strnlen that's tiny and near optimal if data's tiny.
//
// @param RDI is char *s
// @param RSI is size_t n
// @param EAX is unsigned length
// @see libc/nexgen32e/strsak.S
tinystrnlen:
.leafprologue
.profilable

View file

@ -18,12 +18,12 @@
*/
#include "libc/macros.h"
/ 16-bit strnlen that's tiny and near optimal if data's tiny.
/
/ @param RDI is char16_t *s
/ @param RSI is size_t n
/ @param EAX is unsigned length
/ @see libc/nexgen32e/strsak16.S
// 16-bit strnlen that's tiny and near optimal if data's tiny.
//
// @param RDI is char16_t *s
// @param RSI is size_t n
// @param EAX is unsigned length
// @see libc/nexgen32e/strsak16.S
tinystrnlen16:
.leafprologue
.profilable

View file

@ -18,11 +18,11 @@
*/
#include "libc/macros.h"
/ 32-bit strlen that's tiny and near optimal if data's tiny.
/
/ @param RDI is wchar_t *s
/ @param EAX is unsigned length
/ @see libc/nexgen32e/strsak32.S
// 32-bit strlen that's tiny and near optimal if data's tiny.
//
// @param RDI is wchar_t *s
// @param EAX is unsigned length
// @see libc/nexgen32e/strsak32.S
tinywcslen:
.leafprologue
.profilable

View file

@ -18,12 +18,12 @@
*/
#include "libc/macros.h"
/ 32-bit strnlen that's tiny and near optimal if data's tiny.
/
/ @param RDI is wchar_t *s
/ @param RSI is size_t n
/ @param EAX is unsigned length
/ @see libc/nexgen32e/strsak32.S
// 32-bit strnlen that's tiny and near optimal if data's tiny.
//
// @param RDI is wchar_t *s
// @param RSI is size_t n
// @param EAX is unsigned length
// @see libc/nexgen32e/strsak32.S
tinywcsnlen:
.leafprologue
.profilable

View file

@ -22,10 +22,10 @@
.source __FILE__
.code16 # .code32 .code64
/ Hoses interrupt descriptor table and triple-faults the system.
/
/ @see youtu.be/GIKfEAF2Yhw?t=67
/ @mode long,legacy,real
// Hoses interrupt descriptor table and triple-faults the system.
//
// @see youtu.be/GIKfEAF2Yhw?t=67
// @mode long,legacy,real
triplf: push %bp
mov %sp,%bp
sub $8,%sp

View file

@ -19,21 +19,21 @@
#include "libc/macros.h"
.source __FILE__
/ Finds lowest set bit in 𝑥.
/
/ uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥)
/ 0x00000000 wut 32 0 wut 32
/ 0x00000001 0 0 1 0 31
/ 0x80000001 0 0 1 31 0
/ 0x80000000 31 31 32 31 0
/ 0x00000010 4 4 5 4 27
/ 0x08000010 4 4 5 27 4
/ 0x08000000 27 27 28 27 4
/ 0xffffffff 0 0 1 31 0
/
/ @param edi is 32-bit unsigned 𝑥 value
/ @return eax number in range [0,32) or 32 if 𝑥 is 0
/ @see also treasure trove of nearly identical functions
// Finds lowest set bit in 𝑥.
//
// uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥)
// 0x00000000 wut 32 0 wut 32
// 0x00000001 0 0 1 0 31
// 0x80000001 0 0 1 31 0
// 0x80000000 31 31 32 31 0
// 0x00000010 4 4 5 4 27
// 0x08000010 4 4 5 27 4
// 0x08000000 27 27 28 27 4
// 0xffffffff 0 0 1 31 0
//
// @param edi is 32-bit unsigned 𝑥 value
// @return eax number in range [0,32) or 32 if 𝑥 is 0
// @see also treasure trove of nearly identical functions
tzcnt: .leafprologue
.profilable
mov $32,%esi

View file

@ -18,21 +18,21 @@
*/
#include "libc/macros.h"
/ Finds lowest set bit in 𝑥.
/
/ uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥)
/ 0x00000000 wut 32 0 wut 32
/ 0x00000001 0 0 1 0 31
/ 0x80000001 0 0 1 31 0
/ 0x80000000 31 31 32 31 0
/ 0x00000010 4 4 5 4 27
/ 0x08000010 4 4 5 27 4
/ 0x08000000 27 27 28 27 4
/ 0xffffffff 0 0 1 31 0
/
/ @param rdi is 64-bit unsigned 𝑥 value
/ @return rax number in range [0,64) or 64 if 𝑥 is 0
/ @see also treasure trove of nearly identical functions
// Finds lowest set bit in 𝑥.
//
// uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥)
// 0x00000000 wut 32 0 wut 32
// 0x00000001 0 0 1 0 31
// 0x80000001 0 0 1 31 0
// 0x80000000 31 31 32 31 0
// 0x00000010 4 4 5 4 27
// 0x08000010 4 4 5 27 4
// 0x08000000 27 27 28 27 4
// 0xffffffff 0 0 1 31 0
//
// @param rdi is 64-bit unsigned 𝑥 value
// @return rax number in range [0,64) or 64 if 𝑥 is 0
// @see also treasure trove of nearly identical functions
tzcntl: .leafprologue
.profilable
mov $64,%esi

View file

@ -23,11 +23,11 @@
.code16 # .code32 .code64
.source __FILE__
/ Clears display page.
/
/ @param es:di arbitrary address within video page
/ @return es:ax starting address
/ @mode long,legacy,real
// Clears display page.
//
// @param es:di arbitrary address within video page
// @return es:ax starting address
// @mode long,legacy,real
vcls: xchg %di,%ax
bband VIDYA_REWIND,%ah,%al
xchg %di,%ax

View file

@ -17,12 +17,12 @@
PERFORMANCE OF THIS SOFTWARE.
╚─────────────────────────────────────────────────────────────────────────────*/
/ Reconfigures transcendental math coprocessor.
/
/ @param \conf can be absent to restore default
/ @clob x87 status and control words only
/ @see Intel Manual V.1 §8.1.5
/ @mode long,legacy
// Reconfigures transcendental math coprocessor.
//
// @param \conf can be absent to restore default
// @clob x87 status and control words only
// @see Intel Manual V.1 §8.1.5
// @mode long,legacy
.macro x87conf conf=$0x33f
push %ax
pushw \conf

View file

@ -20,8 +20,8 @@
#include "libc/notice.inc"
#include "ape/config.h"
/ eXtreme Low Memory.
/ @see ape/config.h
// eXtreme Low Memory.
// @see ape/config.h
.section .xlm,"aw",@nobits
.align 4096
__xlm: .rept XLM_SIZE

View file

@ -20,7 +20,7 @@
#include "ape/relocations.h"
#include "libc/zip.h"
/ ZIP Central Directory.
// ZIP Central Directory.
.section .piro.data.sort.zip.3,"a",@progbits
.hidden __zip_start
.globl __zip_start