Get --ftrace working on aarch64

This change implements a new approach to function call logging, that's
based on the GCC flag: -fpatchable-function-entry. Read the commentary
in build/config.mk to learn how it works.
This commit is contained in:
Justine Tunney 2023-06-05 23:35:31 -07:00
parent 5b908bc756
commit eb40cb371d
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
934 changed files with 2259 additions and 1268 deletions

View file

@ -30,11 +30,12 @@
// @see examples/ctrlc.c
// @threadsafe
// @noreturn
.ftrace1
_gclongjmp:
.ftrace2
#ifdef __x86_64__
push %rbp
mov %rsp,%rbp
.profilable
mov %fs:0,%r12 # __get_tls()
mov 0x18(%r12),%r12 # Tls::garbages
test %r12,%r12

View file

@ -25,7 +25,9 @@
// @noreturn
// @see _gclongjmp()
// @see siglongjmp()
.ftrace1
longjmp:
.ftrace2
#ifdef __x86_64__
mov %esi,%eax
test %eax,%eax

View file

@ -32,10 +32,11 @@
// @param rdx is right hand side which must have 4 quadwords
// @note words are host endian while array is little endian
// @mayalias
.ftrace1
Mul4x4Adx:
.ftrace2
push %rbp
mov %rsp,%rbp
.profilable
sub $56,%rsp
mov %r15,-8(%rbp)
mov %r14,-16(%rbp)

View file

@ -32,10 +32,11 @@
// @param rdx is right hand side which must have 4 quadwords
// @note words are host endian while array is little endian
// @mayalias
.ftrace1
Mul6x6Adx:
.ftrace2
push %rbp
mov %rsp,%rbp
.profilable
sub $64,%rsp
mov %r15,-8(%rbp)
mov %r14,-16(%rbp)

View file

@ -32,10 +32,11 @@
// @param rdx is right hand side which must have 8 quadwords
// @note words are host endian while array is little endian
// @mayalias
.ftrace1
Mul8x8Adx:
.ftrace2
push %rbp
mov %rsp,%rbp
.profilable
sub $104,%rsp
mov %r15,-8(%rbp)
mov %r14,-16(%rbp)

View file

@ -56,7 +56,7 @@ o/$(MODE)/libc/nexgen32e/ktoupper.o \
o/$(MODE)/libc/nexgen32e/pid.o \
o/$(MODE)/libc/nexgen32e/program_invocation_name2.o \
o/$(MODE)/libc/nexgen32e/threaded.o: private \
OVERRIDE_CFLAGS += \
CFLAGS += \
$(NO_MAGIC)
# these assembly files are safe to build on aarch64

View file

@ -29,12 +29,13 @@
// @return %rax,%xmm0
// @note slower than __sysv2nt
// @see NT2SYSV() macro
.ftrace1
__nt2sysv:
.ftrace2
push %rbp
mov %rsp,%rbp
// TODO(jart): We should probably find some way to use our own
// stack when Windows delivers signals ;_;
.profilable
sub $0x100,%rsp
push %rbx
push %rdi

View file

@ -24,9 +24,10 @@
// @param di points to output buffer
// @param si points to uint8_t {len₁,byte₁}, ..., {0,0}
// @mode long,legacy,real
.ftrace1
rldecode:
.ftrace2
.leafprologue
.profilable
xor %ecx,%ecx
0: lodsb
xchg %al,%cl

View file

@ -26,7 +26,8 @@
// @assume system five nexgen32e abi conformant
// @note code built w/ microsoft abi compiler can't call this
// @see longjmp(), _gclongjmp()
setjmp:
.ftrace1
setjmp: .ftrace2
#ifdef __x86_64__
lea 8(%rsp),%rax
mov %rax,(%rdi)

View file

@ -613,10 +613,11 @@ BSWAP_SHUFB_CTL:
// @param %rsi points to input data
// @param %rdx is number of 64-byte blocks to process
// @see X86_HAVE(SHA)
.ftrace1
sha1_transform_avx2:
.ftrace2
push %rbp
mov %rsp,%rbp
.profilable
push %rbx
push %r12
push %r13

View file

@ -77,10 +77,11 @@ Tim Chen <tim.c.chen@linux.intel.com>\n"
// @param %rsi points to input data
// @param %rdx is number of 64-byte blocks to process
// @see X86_HAVE(SHA)
.ftrace1
sha1_transform_ni:
.ftrace2
push %rbp
mov %rsp,%rbp
.profilable
sub $FRAME_SIZE,%rsp
shl $6,NUM_BLKS # convert to bytes
jz .Ldone_hash

View file

@ -530,10 +530,11 @@ STACK_SIZE = _RSP + _RSP_SIZE
########################################################################
.text
.balign 32
.ftrace1
sha256_transform_rorx:
.ftrace2
push %rbp
mov %rsp,%rbp
.profilable
pushq %rbx
pushq %r12
pushq %r13

View file

@ -80,9 +80,10 @@ Tim Chen <tim.c.chen@linux.intel.com>\n"
// @param %rsi points to input data
// @param %rdx is number of blocks to process
// @see X86_HAVE(SHA)
.ftrace1
sha256_transform_ni:
.ftrace2
.leafprologue
.profilable
shl $6,NUM_BLKS # convert to bytes
jz .Ldone_hash
add DATA_PTR,NUM_BLKS # pointer to end of data

View file

@ -571,10 +571,11 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE
# of SHA512 message blocks.
# "blocks" is the message length in SHA512 blocks
########################################################################
.ftrace1
sha512_transform_rorx:
.ftrace2
push %rbp
mov %rsp,%rbp
.profilable
# Allocate Stack Space
mov %rsp, %rax
sub $frame_size, %rsp

View file

@ -23,9 +23,10 @@
// @param RDI is wchar_t *s
// @param EAX is unsigned length
// @see libc/nexgen32e/strsak32.S
.ftrace1
tinywcslen:
.ftrace2
.leafprologue
.profilable
xor %eax,%eax
1: cmpl $0,(%rdi,%rax,4)
jz 2f

View file

@ -24,9 +24,10 @@
// @param RSI is size_t n
// @param EAX is unsigned length
// @see libc/nexgen32e/strsak32.S
.ftrace1
tinywcsnlen:
.ftrace2
.leafprologue
.profilable
xor %eax,%eax
1: cmp %esi,%eax
jae 2f