Get --ftrace working on aarch64

This change implements a new approach to function call logging, that's
based on the GCC flag: -fpatchable-function-entry. Read the commentary
in build/config.mk to learn how it works.
This commit is contained in:
Justine Tunney 2023-06-05 23:35:31 -07:00
parent 5b908bc756
commit eb40cb371d
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
934 changed files with 2259 additions and 1268 deletions

View file

@ -105,7 +105,7 @@ o/$(MODE)/tool/build/blinkenlights.com: \
@$(MAKE_SYMTAB_ZIP)
o/$(MODE)/tool/build/emulator.o: private \
OVERRIDE_COPTS += \
COPTS += \
-fno-sanitize=pointer-overflow
o/$(MODE)/tool/build/dso/sandbox.so.zip.o \
@ -127,7 +127,7 @@ o/$(MODE)/tool/build/dd.zip.o: private \
# we need pic because:
# so it can be an LD_PRELOAD payload
o/$(MODE)/tool/build/dso/sandbox.o: private \
OVERRIDE_CFLAGS += \
CFLAGS += \
-fPIC
o/$(MODE)/tool/build/dso/sandbox.o: \

View file

@ -61,7 +61,7 @@ o/tiny/tool/build/emubin/mdatest.bin.dbg: \
@$(ELFLINK) -z max-page-size=0x10 -T tool/build/emucrt/real.lds
$(TOOL_BUILD_EMUBIN_OBJS): private \
OVERRIDE_CFLAGS += \
CFLAGS += \
$(NO_MAGIC)
.PHONY: o/$(MODE)/tool/build/emubin

View file

@ -54,6 +54,17 @@ Usage: fixupobj.com [-h] ARGS...\n\
#define MRS_TPIDR_EL0 0xd53bd040u
#define MOV_REG(DST, SRC) (0xaa0003e0u | (SRC) << 16 | (DST))
static const unsigned char kFatNops[8][8] = {
{}, //
{0x90}, // nop
{0x66, 0x90}, // xchg %ax,%ax
{0x0f, 0x1f, 0x00}, // nopl (%rax)
{0x0f, 0x1f, 0x40, 0x00}, // nopl 0x00(%rax)
{0x0f, 0x1f, 0x44, 0x00, 0x00}, // nopl 0x00(%rax,%rax,1)
{0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00}, // nopw 0x00(%rax,%rax,1)
{0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00}, // nopl 0x00000000(%rax)
};
void Write(const char *s, ...) {
va_list va;
va_start(va, s);
@ -110,6 +121,44 @@ void RewriteTlsCode(Elf64_Ehdr *elf, size_t elfsize) {
}
}
/**
* Improve GCC11 `-fpatchable-function-entry` codegen.
*
* When using flags like `-fpatchable-function-entry=9,7` GCC v11 will
* insert two `nop` instructions, rather than merging them into faster
* "fat" nops.
*
* In order for this to work, the function symbol must be declared as
* `STT_FUNC` and `st_size` must have the function's byte length.
*/
void OptimizePatchableFunctionEntries(Elf64_Ehdr *elf, size_t elfsize) {
#ifdef __x86_64__
long i, n;
int nopcount;
Elf64_Sym *syms;
Elf64_Shdr *shdr;
Elf64_Xword symcount;
unsigned char *p, *pe;
CHECK_NOTNULL((syms = GetElfSymbolTable(elf, elfsize, &symcount)));
for (i = 0; i < symcount; ++i) {
if (ELF64_ST_TYPE(syms[i].st_info) == STT_FUNC && syms[i].st_size) {
shdr = GetElfSectionHeaderAddress(elf, elfsize, syms[i].st_shndx);
p = GetElfSectionAddress(elf, elfsize, shdr);
p += syms[i].st_value;
pe = p + syms[i].st_size;
for (; p + 1 < pe; p += n) {
if (p[0] != 0x90) break;
if (p[1] != 0x90) break;
for (n = 2; p + n < pe && n < ARRAYLEN(kFatNops); ++n) {
if (p[n] != 0x90) break;
}
memcpy(p, kFatNops[n], n);
}
}
}
#endif /* __x86_64__ */
}
void OptimizeRelocations(Elf64_Ehdr *elf, size_t elfsize) {
char *strs;
Elf64_Half i;
@ -191,6 +240,7 @@ void RewriteObject(const char *path) {
}
if (elf->e_machine == EM_NEXGEN32E) {
OptimizeRelocations(elf, st.st_size);
OptimizePatchableFunctionEntries(elf, st.st_size);
}
if (elf->e_machine == EM_AARCH64) {
RewriteTlsCode(elf, st.st_size);

View file

@ -16,10 +16,10 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "tool/build/lib/asmdown.h"
#include "libc/mem/alg.h"
#include "libc/mem/mem.h"
#include "libc/str/str.h"
#include "tool/build/lib/asmdown.h"
#include "tool/build/lib/javadown.h"
static bool IsSymbolChar1(char c) {
@ -76,6 +76,8 @@ struct Asmdown *ParseAsmdown(const char *code, size_t size) {
state = SYM;
} else if (code[i] == '\n') {
++line;
} else if (i + 10 < size && !memcmp(code + i, "\t.ftrace1\t", 10)) {
++line;
} else if (i + 8 < size && !memcmp(code + i, "\t.alias\t", 8)) {
p1 = code + i + 8;
if ((p2 = strchr(p1, ',')) && (p3 = strchr(p2, '\n'))) {

View file

@ -47,7 +47,7 @@ o/$(MODE)/tool/lambda/%.com.dbg: \
@$(APELINK)
o/$(MODE)/tool/lambda/tromp.o: private \
OVERRIDE_CFLAGS += \
CFLAGS += \
-w
$(TOOL_LAMBDA_OBJS): \

View file

@ -131,7 +131,7 @@ o/$(MODE)/tool/net/redbean.com: \
@$(TOOL_NET_REDBEAN_STANDARD_ASSETS_ZIP)
o/$(MODE)/tool/net/lsqlite3.o: private \
OVERRIDE_CFLAGS += \
CFLAGS += \
-DSQLITE_ENABLE_SESSION
# REDBEAN-DEMO.COM

View file

@ -53,14 +53,14 @@ $(TOOL_PLINKO_LIB_A).pkg: \
$(foreach x,$(TOOL_PLINKO_LIB_A_DIRECTDEPS),$($(x)_A).pkg)
ifeq ($(MODE),)
$(TOOL_PLINKO_LIB_A_OBJS): private OVERRIDE_CFLAGS += -fno-inline
$(TOOL_PLINKO_LIB_A_OBJS): private CFLAGS += -fno-inline
endif
ifeq ($(MODE),dbg)
$(TOOL_PLINKO_LIB_A_OBJS): private OVERRIDE_CFLAGS += -fno-inline
$(TOOL_PLINKO_LIB_A_OBJS): private CFLAGS += -fno-inline
endif
$(TOOL_PLINKO_LIB_A_OBJS): private OVERRIDE_CFLAGS += -ffast-math -foptimize-sibling-calls -O2
$(TOOL_PLINKO_LIB_A_OBJS): private CFLAGS += -ffast-math -foptimize-sibling-calls -O2
TOOL_PLINKO_LIB_LIBS = $(foreach x,$(TOOL_PLINKO_LIB_ARTIFACTS),$($(x)))
TOOL_PLINKO_LIB_SRCS = $(foreach x,$(TOOL_PLINKO_LIB_ARTIFACTS),$($(x)_SRCS))

View file

@ -27,9 +27,10 @@
// @param %rsi is char[edi/16][16] output and %rsi==%rdx is OK
// @param %rdx is char[edi/16][8] input
// @return %rax is %rsi
.ftrace1
doublechrominance:
.ftrace2
.leafprologue
.profilable
shr $1,%edi # backwards algorithm
jbe 1f # do nothing if !n || n%2
mov %edi,%ecx

View file

@ -50,7 +50,7 @@ TOOL_VIZ_LIB_A_DEPS := \
$(call uniq,$(foreach x,$(TOOL_VIZ_LIB_A_DIRECTDEPS),$($(x))))
o/$(MODE)/tool/viz/lib/pmaddubsw.o: private \
OVERRIDE_CFLAGS += \
CFLAGS += \
-fvect-cost-model=unlimited
o/$(MODE)/tool/viz/lib/scale.o \
@ -69,12 +69,12 @@ o/$(MODE)/tool/viz/lib/getxtermcodes.o \
o/$(MODE)/tool/viz/lib/lingamma.o \
o/$(MODE)/tool/viz/lib/perlin3.o \
o/$(MODE)/tool/viz/lib/resizegraphic.o: private \
OVERRIDE_CFLAGS += \
CFLAGS += \
-DSTACK_FRAME_UNLIMITED \
$(MATHEMATICAL)
o/$(MODE)/tool/viz/lib/printmatrix.o: private \
OVERRIDE_CFLAGS += \
CFLAGS += \
$(IEEE_MATH)
$(TOOL_VIZ_LIB_A): \

View file

@ -98,12 +98,12 @@ o/$(MODE)/tool/viz/printvideo.com: \
@$(MAKE_SYMTAB_ZIP)
o/$(MODE)/tool/viz/derasterize.o: private \
OVERRIDE_CFLAGS += \
CFLAGS += \
-DSTACK_FRAME_UNLIMITED \
$(MATHEMATICAL)
o/$(MODE)/tool/viz/magikarp.o: private \
OVERRIDE_CFLAGS += \
CFLAGS += \
$(MATHEMATICAL)
$(TOOL_VIZ_OBJS): \