mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-07-24 19:40:28 +00:00
Get --ftrace working on aarch64
This change implements a new approach to function call logging, that's based on the GCC flag: -fpatchable-function-entry. Read the commentary in build/config.mk to learn how it works.
This commit is contained in:
parent
5b908bc756
commit
eb40cb371d
934 changed files with 2259 additions and 1268 deletions
|
@ -105,7 +105,7 @@ o/$(MODE)/tool/build/blinkenlights.com: \
|
|||
@$(MAKE_SYMTAB_ZIP)
|
||||
|
||||
o/$(MODE)/tool/build/emulator.o: private \
|
||||
OVERRIDE_COPTS += \
|
||||
COPTS += \
|
||||
-fno-sanitize=pointer-overflow
|
||||
|
||||
o/$(MODE)/tool/build/dso/sandbox.so.zip.o \
|
||||
|
@ -127,7 +127,7 @@ o/$(MODE)/tool/build/dd.zip.o: private \
|
|||
# we need pic because:
|
||||
# so it can be an LD_PRELOAD payload
|
||||
o/$(MODE)/tool/build/dso/sandbox.o: private \
|
||||
OVERRIDE_CFLAGS += \
|
||||
CFLAGS += \
|
||||
-fPIC
|
||||
|
||||
o/$(MODE)/tool/build/dso/sandbox.o: \
|
||||
|
|
|
@ -61,7 +61,7 @@ o/tiny/tool/build/emubin/mdatest.bin.dbg: \
|
|||
@$(ELFLINK) -z max-page-size=0x10 -T tool/build/emucrt/real.lds
|
||||
|
||||
$(TOOL_BUILD_EMUBIN_OBJS): private \
|
||||
OVERRIDE_CFLAGS += \
|
||||
CFLAGS += \
|
||||
$(NO_MAGIC)
|
||||
|
||||
.PHONY: o/$(MODE)/tool/build/emubin
|
||||
|
|
|
@ -54,6 +54,17 @@ Usage: fixupobj.com [-h] ARGS...\n\
|
|||
#define MRS_TPIDR_EL0 0xd53bd040u
|
||||
#define MOV_REG(DST, SRC) (0xaa0003e0u | (SRC) << 16 | (DST))
|
||||
|
||||
static const unsigned char kFatNops[8][8] = {
|
||||
{}, //
|
||||
{0x90}, // nop
|
||||
{0x66, 0x90}, // xchg %ax,%ax
|
||||
{0x0f, 0x1f, 0x00}, // nopl (%rax)
|
||||
{0x0f, 0x1f, 0x40, 0x00}, // nopl 0x00(%rax)
|
||||
{0x0f, 0x1f, 0x44, 0x00, 0x00}, // nopl 0x00(%rax,%rax,1)
|
||||
{0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00}, // nopw 0x00(%rax,%rax,1)
|
||||
{0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00}, // nopl 0x00000000(%rax)
|
||||
};
|
||||
|
||||
void Write(const char *s, ...) {
|
||||
va_list va;
|
||||
va_start(va, s);
|
||||
|
@ -110,6 +121,44 @@ void RewriteTlsCode(Elf64_Ehdr *elf, size_t elfsize) {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Improve GCC11 `-fpatchable-function-entry` codegen.
|
||||
*
|
||||
* When using flags like `-fpatchable-function-entry=9,7` GCC v11 will
|
||||
* insert two `nop` instructions, rather than merging them into faster
|
||||
* "fat" nops.
|
||||
*
|
||||
* In order for this to work, the function symbol must be declared as
|
||||
* `STT_FUNC` and `st_size` must have the function's byte length.
|
||||
*/
|
||||
void OptimizePatchableFunctionEntries(Elf64_Ehdr *elf, size_t elfsize) {
|
||||
#ifdef __x86_64__
|
||||
long i, n;
|
||||
int nopcount;
|
||||
Elf64_Sym *syms;
|
||||
Elf64_Shdr *shdr;
|
||||
Elf64_Xword symcount;
|
||||
unsigned char *p, *pe;
|
||||
CHECK_NOTNULL((syms = GetElfSymbolTable(elf, elfsize, &symcount)));
|
||||
for (i = 0; i < symcount; ++i) {
|
||||
if (ELF64_ST_TYPE(syms[i].st_info) == STT_FUNC && syms[i].st_size) {
|
||||
shdr = GetElfSectionHeaderAddress(elf, elfsize, syms[i].st_shndx);
|
||||
p = GetElfSectionAddress(elf, elfsize, shdr);
|
||||
p += syms[i].st_value;
|
||||
pe = p + syms[i].st_size;
|
||||
for (; p + 1 < pe; p += n) {
|
||||
if (p[0] != 0x90) break;
|
||||
if (p[1] != 0x90) break;
|
||||
for (n = 2; p + n < pe && n < ARRAYLEN(kFatNops); ++n) {
|
||||
if (p[n] != 0x90) break;
|
||||
}
|
||||
memcpy(p, kFatNops[n], n);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* __x86_64__ */
|
||||
}
|
||||
|
||||
void OptimizeRelocations(Elf64_Ehdr *elf, size_t elfsize) {
|
||||
char *strs;
|
||||
Elf64_Half i;
|
||||
|
@ -191,6 +240,7 @@ void RewriteObject(const char *path) {
|
|||
}
|
||||
if (elf->e_machine == EM_NEXGEN32E) {
|
||||
OptimizeRelocations(elf, st.st_size);
|
||||
OptimizePatchableFunctionEntries(elf, st.st_size);
|
||||
}
|
||||
if (elf->e_machine == EM_AARCH64) {
|
||||
RewriteTlsCode(elf, st.st_size);
|
||||
|
|
|
@ -16,10 +16,10 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "tool/build/lib/asmdown.h"
|
||||
#include "libc/mem/alg.h"
|
||||
#include "libc/mem/mem.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "tool/build/lib/asmdown.h"
|
||||
#include "tool/build/lib/javadown.h"
|
||||
|
||||
static bool IsSymbolChar1(char c) {
|
||||
|
@ -76,6 +76,8 @@ struct Asmdown *ParseAsmdown(const char *code, size_t size) {
|
|||
state = SYM;
|
||||
} else if (code[i] == '\n') {
|
||||
++line;
|
||||
} else if (i + 10 < size && !memcmp(code + i, "\t.ftrace1\t", 10)) {
|
||||
++line;
|
||||
} else if (i + 8 < size && !memcmp(code + i, "\t.alias\t", 8)) {
|
||||
p1 = code + i + 8;
|
||||
if ((p2 = strchr(p1, ',')) && (p3 = strchr(p2, '\n'))) {
|
||||
|
|
|
@ -47,7 +47,7 @@ o/$(MODE)/tool/lambda/%.com.dbg: \
|
|||
@$(APELINK)
|
||||
|
||||
o/$(MODE)/tool/lambda/tromp.o: private \
|
||||
OVERRIDE_CFLAGS += \
|
||||
CFLAGS += \
|
||||
-w
|
||||
|
||||
$(TOOL_LAMBDA_OBJS): \
|
||||
|
|
|
@ -131,7 +131,7 @@ o/$(MODE)/tool/net/redbean.com: \
|
|||
@$(TOOL_NET_REDBEAN_STANDARD_ASSETS_ZIP)
|
||||
|
||||
o/$(MODE)/tool/net/lsqlite3.o: private \
|
||||
OVERRIDE_CFLAGS += \
|
||||
CFLAGS += \
|
||||
-DSQLITE_ENABLE_SESSION
|
||||
|
||||
# REDBEAN-DEMO.COM
|
||||
|
|
|
@ -53,14 +53,14 @@ $(TOOL_PLINKO_LIB_A).pkg: \
|
|||
$(foreach x,$(TOOL_PLINKO_LIB_A_DIRECTDEPS),$($(x)_A).pkg)
|
||||
|
||||
ifeq ($(MODE),)
|
||||
$(TOOL_PLINKO_LIB_A_OBJS): private OVERRIDE_CFLAGS += -fno-inline
|
||||
$(TOOL_PLINKO_LIB_A_OBJS): private CFLAGS += -fno-inline
|
||||
endif
|
||||
|
||||
ifeq ($(MODE),dbg)
|
||||
$(TOOL_PLINKO_LIB_A_OBJS): private OVERRIDE_CFLAGS += -fno-inline
|
||||
$(TOOL_PLINKO_LIB_A_OBJS): private CFLAGS += -fno-inline
|
||||
endif
|
||||
|
||||
$(TOOL_PLINKO_LIB_A_OBJS): private OVERRIDE_CFLAGS += -ffast-math -foptimize-sibling-calls -O2
|
||||
$(TOOL_PLINKO_LIB_A_OBJS): private CFLAGS += -ffast-math -foptimize-sibling-calls -O2
|
||||
|
||||
TOOL_PLINKO_LIB_LIBS = $(foreach x,$(TOOL_PLINKO_LIB_ARTIFACTS),$($(x)))
|
||||
TOOL_PLINKO_LIB_SRCS = $(foreach x,$(TOOL_PLINKO_LIB_ARTIFACTS),$($(x)_SRCS))
|
||||
|
|
|
@ -27,9 +27,10 @@
|
|||
// @param %rsi is char[edi/16][16] output and %rsi==%rdx is OK
|
||||
// @param %rdx is char[edi/16][8] input
|
||||
// @return %rax is %rsi
|
||||
.ftrace1
|
||||
doublechrominance:
|
||||
.ftrace2
|
||||
.leafprologue
|
||||
.profilable
|
||||
shr $1,%edi # backwards algorithm
|
||||
jbe 1f # do nothing if !n || n%2
|
||||
mov %edi,%ecx
|
||||
|
|
|
@ -50,7 +50,7 @@ TOOL_VIZ_LIB_A_DEPS := \
|
|||
$(call uniq,$(foreach x,$(TOOL_VIZ_LIB_A_DIRECTDEPS),$($(x))))
|
||||
|
||||
o/$(MODE)/tool/viz/lib/pmaddubsw.o: private \
|
||||
OVERRIDE_CFLAGS += \
|
||||
CFLAGS += \
|
||||
-fvect-cost-model=unlimited
|
||||
|
||||
o/$(MODE)/tool/viz/lib/scale.o \
|
||||
|
@ -69,12 +69,12 @@ o/$(MODE)/tool/viz/lib/getxtermcodes.o \
|
|||
o/$(MODE)/tool/viz/lib/lingamma.o \
|
||||
o/$(MODE)/tool/viz/lib/perlin3.o \
|
||||
o/$(MODE)/tool/viz/lib/resizegraphic.o: private \
|
||||
OVERRIDE_CFLAGS += \
|
||||
CFLAGS += \
|
||||
-DSTACK_FRAME_UNLIMITED \
|
||||
$(MATHEMATICAL)
|
||||
|
||||
o/$(MODE)/tool/viz/lib/printmatrix.o: private \
|
||||
OVERRIDE_CFLAGS += \
|
||||
CFLAGS += \
|
||||
$(IEEE_MATH)
|
||||
|
||||
$(TOOL_VIZ_LIB_A): \
|
||||
|
|
|
@ -98,12 +98,12 @@ o/$(MODE)/tool/viz/printvideo.com: \
|
|||
@$(MAKE_SYMTAB_ZIP)
|
||||
|
||||
o/$(MODE)/tool/viz/derasterize.o: private \
|
||||
OVERRIDE_CFLAGS += \
|
||||
CFLAGS += \
|
||||
-DSTACK_FRAME_UNLIMITED \
|
||||
$(MATHEMATICAL)
|
||||
|
||||
o/$(MODE)/tool/viz/magikarp.o: private \
|
||||
OVERRIDE_CFLAGS += \
|
||||
CFLAGS += \
|
||||
$(MATHEMATICAL)
|
||||
|
||||
$(TOOL_VIZ_OBJS): \
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue