mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-07-25 20:10:29 +00:00
Get --ftrace working on aarch64
This change implements a new approach to function call logging, that's based on the GCC flag: -fpatchable-function-entry. Read the commentary in build/config.mk to learn how it works.
This commit is contained in:
parent
5b908bc756
commit
eb40cb371d
934 changed files with 2259 additions and 1268 deletions
|
@ -30,11 +30,12 @@
|
|||
// @see examples/ctrlc.c
|
||||
// @threadsafe
|
||||
// @noreturn
|
||||
.ftrace1
|
||||
_gclongjmp:
|
||||
.ftrace2
|
||||
#ifdef __x86_64__
|
||||
push %rbp
|
||||
mov %rsp,%rbp
|
||||
.profilable
|
||||
mov %fs:0,%r12 # __get_tls()
|
||||
mov 0x18(%r12),%r12 # Tls::garbages
|
||||
test %r12,%r12
|
||||
|
|
|
@ -25,7 +25,9 @@
|
|||
// @noreturn
|
||||
// @see _gclongjmp()
|
||||
// @see siglongjmp()
|
||||
.ftrace1
|
||||
longjmp:
|
||||
.ftrace2
|
||||
#ifdef __x86_64__
|
||||
mov %esi,%eax
|
||||
test %eax,%eax
|
||||
|
|
|
@ -32,10 +32,11 @@
|
|||
// @param rdx is right hand side which must have 4 quadwords
|
||||
// @note words are host endian while array is little endian
|
||||
// @mayalias
|
||||
.ftrace1
|
||||
Mul4x4Adx:
|
||||
.ftrace2
|
||||
push %rbp
|
||||
mov %rsp,%rbp
|
||||
.profilable
|
||||
sub $56,%rsp
|
||||
mov %r15,-8(%rbp)
|
||||
mov %r14,-16(%rbp)
|
||||
|
|
|
@ -32,10 +32,11 @@
|
|||
// @param rdx is right hand side which must have 4 quadwords
|
||||
// @note words are host endian while array is little endian
|
||||
// @mayalias
|
||||
.ftrace1
|
||||
Mul6x6Adx:
|
||||
.ftrace2
|
||||
push %rbp
|
||||
mov %rsp,%rbp
|
||||
.profilable
|
||||
sub $64,%rsp
|
||||
mov %r15,-8(%rbp)
|
||||
mov %r14,-16(%rbp)
|
||||
|
|
|
@ -32,10 +32,11 @@
|
|||
// @param rdx is right hand side which must have 8 quadwords
|
||||
// @note words are host endian while array is little endian
|
||||
// @mayalias
|
||||
.ftrace1
|
||||
Mul8x8Adx:
|
||||
.ftrace2
|
||||
push %rbp
|
||||
mov %rsp,%rbp
|
||||
.profilable
|
||||
sub $104,%rsp
|
||||
mov %r15,-8(%rbp)
|
||||
mov %r14,-16(%rbp)
|
||||
|
|
|
@ -56,7 +56,7 @@ o/$(MODE)/libc/nexgen32e/ktoupper.o \
|
|||
o/$(MODE)/libc/nexgen32e/pid.o \
|
||||
o/$(MODE)/libc/nexgen32e/program_invocation_name2.o \
|
||||
o/$(MODE)/libc/nexgen32e/threaded.o: private \
|
||||
OVERRIDE_CFLAGS += \
|
||||
CFLAGS += \
|
||||
$(NO_MAGIC)
|
||||
|
||||
# these assembly files are safe to build on aarch64
|
||||
|
|
|
@ -29,12 +29,13 @@
|
|||
// @return %rax,%xmm0
|
||||
// @note slower than __sysv2nt
|
||||
// @see NT2SYSV() macro
|
||||
.ftrace1
|
||||
__nt2sysv:
|
||||
.ftrace2
|
||||
push %rbp
|
||||
mov %rsp,%rbp
|
||||
// TODO(jart): We should probably find some way to use our own
|
||||
// stack when Windows delivers signals ;_;
|
||||
.profilable
|
||||
sub $0x100,%rsp
|
||||
push %rbx
|
||||
push %rdi
|
||||
|
|
|
@ -24,9 +24,10 @@
|
|||
// @param di points to output buffer
|
||||
// @param si points to uint8_t {len₁,byte₁}, ..., {0,0}
|
||||
// @mode long,legacy,real
|
||||
.ftrace1
|
||||
rldecode:
|
||||
.ftrace2
|
||||
.leafprologue
|
||||
.profilable
|
||||
xor %ecx,%ecx
|
||||
0: lodsb
|
||||
xchg %al,%cl
|
||||
|
|
|
@ -26,7 +26,8 @@
|
|||
// @assume system five nexgen32e abi conformant
|
||||
// @note code built w/ microsoft abi compiler can't call this
|
||||
// @see longjmp(), _gclongjmp()
|
||||
setjmp:
|
||||
.ftrace1
|
||||
setjmp: .ftrace2
|
||||
#ifdef __x86_64__
|
||||
lea 8(%rsp),%rax
|
||||
mov %rax,(%rdi)
|
||||
|
|
|
@ -613,10 +613,11 @@ BSWAP_SHUFB_CTL:
|
|||
// @param %rsi points to input data
|
||||
// @param %rdx is number of 64-byte blocks to process
|
||||
// @see X86_HAVE(SHA)
|
||||
.ftrace1
|
||||
sha1_transform_avx2:
|
||||
.ftrace2
|
||||
push %rbp
|
||||
mov %rsp,%rbp
|
||||
.profilable
|
||||
push %rbx
|
||||
push %r12
|
||||
push %r13
|
||||
|
|
|
@ -77,10 +77,11 @@ Tim Chen <tim.c.chen@linux.intel.com>\n"
|
|||
// @param %rsi points to input data
|
||||
// @param %rdx is number of 64-byte blocks to process
|
||||
// @see X86_HAVE(SHA)
|
||||
.ftrace1
|
||||
sha1_transform_ni:
|
||||
.ftrace2
|
||||
push %rbp
|
||||
mov %rsp,%rbp
|
||||
.profilable
|
||||
sub $FRAME_SIZE,%rsp
|
||||
shl $6,NUM_BLKS # convert to bytes
|
||||
jz .Ldone_hash
|
||||
|
|
|
@ -530,10 +530,11 @@ STACK_SIZE = _RSP + _RSP_SIZE
|
|||
########################################################################
|
||||
.text
|
||||
.balign 32
|
||||
.ftrace1
|
||||
sha256_transform_rorx:
|
||||
.ftrace2
|
||||
push %rbp
|
||||
mov %rsp,%rbp
|
||||
.profilable
|
||||
pushq %rbx
|
||||
pushq %r12
|
||||
pushq %r13
|
||||
|
|
|
@ -80,9 +80,10 @@ Tim Chen <tim.c.chen@linux.intel.com>\n"
|
|||
// @param %rsi points to input data
|
||||
// @param %rdx is number of blocks to process
|
||||
// @see X86_HAVE(SHA)
|
||||
.ftrace1
|
||||
sha256_transform_ni:
|
||||
.ftrace2
|
||||
.leafprologue
|
||||
.profilable
|
||||
shl $6,NUM_BLKS # convert to bytes
|
||||
jz .Ldone_hash
|
||||
add DATA_PTR,NUM_BLKS # pointer to end of data
|
||||
|
|
|
@ -571,10 +571,11 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE
|
|||
# of SHA512 message blocks.
|
||||
# "blocks" is the message length in SHA512 blocks
|
||||
########################################################################
|
||||
.ftrace1
|
||||
sha512_transform_rorx:
|
||||
.ftrace2
|
||||
push %rbp
|
||||
mov %rsp,%rbp
|
||||
.profilable
|
||||
# Allocate Stack Space
|
||||
mov %rsp, %rax
|
||||
sub $frame_size, %rsp
|
||||
|
|
|
@ -23,9 +23,10 @@
|
|||
// @param RDI is wchar_t *s
|
||||
// @param EAX is unsigned length
|
||||
// @see libc/nexgen32e/strsak32.S
|
||||
.ftrace1
|
||||
tinywcslen:
|
||||
.ftrace2
|
||||
.leafprologue
|
||||
.profilable
|
||||
xor %eax,%eax
|
||||
1: cmpl $0,(%rdi,%rax,4)
|
||||
jz 2f
|
||||
|
|
|
@ -24,9 +24,10 @@
|
|||
// @param RSI is size_t n
|
||||
// @param EAX is unsigned length
|
||||
// @see libc/nexgen32e/strsak32.S
|
||||
.ftrace1
|
||||
tinywcsnlen:
|
||||
.ftrace2
|
||||
.leafprologue
|
||||
.profilable
|
||||
xor %eax,%eax
|
||||
1: cmp %esi,%eax
|
||||
jae 2f
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue