Make improvements

- Introduce portable sched_getcpu() api
- Support GCC's __target_clones__ feature
- Make fma() go faster on x86 in default mode
- Remove some asan checks from core libraries
- WinMain() now ensures $HOME and $USER are defined
This commit is contained in:
Justine Tunney 2024-02-01 03:39:46 -08:00
parent d5225a693b
commit 2ab9e9f7fd
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
192 changed files with 2809 additions and 932 deletions

View file

@ -29,13 +29,16 @@
#include "libc/errno.h"
#include "libc/fmt/itoa.h"
#include "libc/fmt/magnumstrs.internal.h"
#include "libc/intrin/kprintf.h"
#include "libc/limits.h"
#include "libc/log/log.h"
#include "libc/macros.internal.h"
#include "libc/mem/gc.h"
#include "libc/runtime/runtime.h"
#include "libc/serialize.h"
#include "libc/stdalign.internal.h"
#include "libc/stdckdint.h"
#include "libc/stdio/stdio.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/msync.h"
@ -48,8 +51,10 @@
* @fileoverview GCC Codegen Fixer-Upper.
*/
#define COSMO_TLS_REG 28
#define MRS_TPIDR_EL0 0xd53bd040u
#define COSMO_TLS_REG 28
#define MRS_TPIDR_EL0 0xd53bd040u
#define IFUNC_SECTION ".init.202.ifunc"
#define MOV_REG(DST, SRC) (0xaa0003e0u | (SRC) << 16 | (DST))
static int mode;
@ -365,6 +370,203 @@ static void RelinkZipFiles(void) {
eocd = foot;
}
// when __attribute__((__target_clones__(...))) is used, the compiler
// will generate multiple implementations of a function for different
// microarchitectures as well as a resolver function that tells which
// function is appropriate to call. however the compiler doesn't make
// code for the actual function. it also doesn't record where resolve
// functions are located in the binary so we've reverse eng'd it here
static void GenerateIfuncInit(void) {
char *name, *s;
long code_i = 0;
long relas_i = 0;
static char code[16384];
static Elf64_Rela relas[1024];
Elf64_Shdr *symtab_shdr = GetElfSymbolTable(elf, esize, SHT_SYMTAB, 0);
if (!symtab_shdr) Die("symbol table section header not found");
Elf64_Word symtab_shdr_index =
((char *)symtab_shdr - ((char *)elf + elf->e_shoff)) / elf->e_shentsize;
for (Elf64_Xword i = 0; i < symcount; ++i) {
if (syms[i].st_shndx == SHN_UNDEF) continue;
if (syms[i].st_shndx >= SHN_LORESERVE) continue;
if (ELF64_ST_TYPE(syms[i].st_info) != STT_GNU_IFUNC) continue;
if (!(name = GetElfString(elf, esize, symstrs, syms[i].st_name)))
Die("could not get symbol name of ifunc");
static char resolver_name[65536];
strlcpy(resolver_name, name, sizeof(resolver_name));
if (strlcat(resolver_name, ".resolver", sizeof(resolver_name)) >=
sizeof(resolver_name))
Die("ifunc name too long");
Elf64_Xword function_sym_index = i;
Elf64_Xword resolver_sym_index = -1;
for (Elf64_Xword i = 0; i < symcount; ++i) {
if (syms[i].st_shndx == SHN_UNDEF) continue;
if (syms[i].st_shndx >= SHN_LORESERVE) continue;
if (ELF64_ST_TYPE(syms[i].st_info) != STT_FUNC) continue;
if (!(s = GetElfString(elf, esize, symstrs, syms[i].st_name))) continue;
if (strcmp(s, resolver_name)) continue;
resolver_sym_index = i;
break;
}
if (resolver_sym_index == -1)
// this can happen if a function with __target_clones() also has a
// __weak_reference() defined, in which case GCC shall only create
// one resolver function for the two of them so we can ignore this
// HOWEVER the GOT will still have an entry for each two functions
continue;
// call the resolver (using cosmo's special .init abi)
static const char chunk1[] = {
0x57, // push %rdi
0x56, // push %rsi
0xe8, 0x00, 0x00, 0x00, 0x00, // call f.resolver
};
if (code_i + sizeof(chunk1) > sizeof(code) || relas_i + 1 > ARRAYLEN(relas))
Die("too many ifuncs");
memcpy(code + code_i, chunk1, sizeof(chunk1));
relas[relas_i].r_info = ELF64_R_INFO(resolver_sym_index, R_X86_64_PLT32);
relas[relas_i].r_offset = code_i + 1 + 1 + 1;
relas[relas_i].r_addend = -4;
code_i += sizeof(chunk1);
relas_i += 1;
// move the resolved function address into the GOT slot. it's very
// important that this happen, because the linker by default makes
// self-referencing PLT functions whose execution falls through oh
// no. we need to repeat this process for any aliases this defines
static const char chunk2[] = {
0x48, 0x89, 0x05, 0x00, 0x00, 0x00, 0x00, // mov %rax,f@gotpcrel(%rip)
};
for (Elf64_Xword i = 0; i < symcount; ++i) {
if (i == function_sym_index ||
(ELF64_ST_TYPE(syms[i].st_info) == STT_GNU_IFUNC &&
syms[i].st_shndx == syms[function_sym_index].st_shndx &&
syms[i].st_value == syms[function_sym_index].st_value)) {
if (code_i + sizeof(chunk2) > sizeof(code) ||
relas_i + 1 > ARRAYLEN(relas))
Die("too many ifuncs");
memcpy(code + code_i, chunk2, sizeof(chunk2));
relas[relas_i].r_info = ELF64_R_INFO(i, R_X86_64_GOTPCREL);
relas[relas_i].r_offset = code_i + 3;
relas[relas_i].r_addend = -4;
code_i += sizeof(chunk2);
relas_i += 1;
}
}
static const char chunk3[] = {
0x5e, // pop %rsi
0x5f, // pop %rdi
};
if (code_i + sizeof(chunk3) > sizeof(code)) Die("too many ifuncs");
memcpy(code + code_i, chunk3, sizeof(chunk3));
code_i += sizeof(chunk3);
}
if (!code_i) return;
// prepare to mutate elf
// remap file so it has more space
if (elf->e_shnum + 2 > 65535) Die("too many sections");
size_t reserve_size = esize + 32 * 1024 * 1024;
if (ftruncate(fildes, reserve_size)) SysExit("ifunc ftruncate #1");
elf = mmap((char *)elf, reserve_size, PROT_READ | PROT_WRITE,
MAP_FIXED | MAP_SHARED, fildes, 0);
if (elf == MAP_FAILED) SysExit("ifunc mmap");
// duplicate section name strings table to end of file
Elf64_Shdr *shdrstr_shdr = (Elf64_Shdr *)((char *)elf + elf->e_shoff +
elf->e_shstrndx * elf->e_shentsize);
memcpy((char *)elf + esize, (char *)elf + shdrstr_shdr->sh_offset,
shdrstr_shdr->sh_size);
shdrstr_shdr->sh_offset = esize;
esize += shdrstr_shdr->sh_size;
// append strings for the two sections we're creating
const char *code_section_name = IFUNC_SECTION;
Elf64_Word code_section_name_offset = shdrstr_shdr->sh_size;
memcpy((char *)elf + esize, code_section_name, strlen(code_section_name) + 1);
shdrstr_shdr->sh_size += strlen(code_section_name) + 1;
esize += strlen(code_section_name) + 1;
const char *rela_section_name = ".rela" IFUNC_SECTION;
Elf64_Word rela_section_name_offset = shdrstr_shdr->sh_size;
memcpy((char *)elf + esize, rela_section_name, strlen(rela_section_name) + 1);
shdrstr_shdr->sh_size += strlen(rela_section_name) + 1;
esize += strlen(rela_section_name) + 1;
unassert(esize == shdrstr_shdr->sh_offset + shdrstr_shdr->sh_size);
++esize;
// duplicate section headers to end of file
esize = (esize + alignof(Elf64_Shdr) - 1) & -alignof(Elf64_Shdr);
memcpy((char *)elf + esize, (char *)elf + elf->e_shoff,
elf->e_shnum * elf->e_shentsize);
elf->e_shoff = esize;
esize += elf->e_shnum * elf->e_shentsize;
unassert(esize == elf->e_shoff + elf->e_shnum * elf->e_shentsize);
// append code section header
Elf64_Shdr *code_shdr = (Elf64_Shdr *)((char *)elf + esize);
Elf64_Word code_shdr_index = elf->e_shnum++;
esize += elf->e_shentsize;
code_shdr->sh_name = code_section_name_offset;
code_shdr->sh_type = SHT_PROGBITS;
code_shdr->sh_flags = SHF_ALLOC | SHF_EXECINSTR;
code_shdr->sh_addr = 0;
code_shdr->sh_link = 0;
code_shdr->sh_info = 0;
code_shdr->sh_entsize = 1;
code_shdr->sh_addralign = 1;
code_shdr->sh_size = code_i;
// append code's rela section header
Elf64_Shdr *rela_shdr = (Elf64_Shdr *)((char *)elf + esize);
esize += elf->e_shentsize;
rela_shdr->sh_name = rela_section_name_offset;
rela_shdr->sh_type = SHT_RELA;
rela_shdr->sh_flags = SHF_INFO_LINK;
rela_shdr->sh_addr = 0;
rela_shdr->sh_info = code_shdr_index;
rela_shdr->sh_link = symtab_shdr_index;
rela_shdr->sh_entsize = sizeof(Elf64_Rela);
rela_shdr->sh_addralign = alignof(Elf64_Rela);
rela_shdr->sh_size = relas_i * sizeof(Elf64_Rela);
elf->e_shnum++;
// append relas
esize = (esize + 63) & -64;
rela_shdr->sh_offset = esize;
memcpy((char *)elf + esize, relas, relas_i * sizeof(Elf64_Rela));
esize += relas_i * sizeof(Elf64_Rela);
unassert(esize == rela_shdr->sh_offset + rela_shdr->sh_size);
// append code
esize = (esize + 63) & -64;
code_shdr->sh_offset = esize;
memcpy((char *)elf + esize, code, code_i);
esize += code_i;
unassert(esize == code_shdr->sh_offset + code_shdr->sh_size);
if (ftruncate(fildes, esize)) SysExit("ifunc ftruncate #1");
}
// when __attribute__((__target_clones__(...))) is used, static binaries
// become poisoned with rela IFUNC relocations, which the linker refuses
// to remove. even if we objcopy the ape executable as binary the linker
// preserves its precious ifunc code and puts them before the executable
// header. the good news is that the linker actually does link correctly
// which means we can delete the broken rela sections in the elf binary.
static void PurgeIfuncSections(void) {
Elf64_Shdr *shdrs = (Elf64_Shdr *)((char *)elf + elf->e_shoff);
for (Elf64_Word i = 0; i < elf->e_shnum; ++i) {
char *name;
if (shdrs[i].sh_type == SHT_RELA ||
((name = GetElfSectionName(elf, esize, shdrs + i)) &&
!strcmp(name, ".init.202.ifunc"))) {
shdrs[i].sh_type = SHT_NULL;
shdrs[i].sh_flags &= ~SHF_ALLOC;
}
}
}
static void FixupObject(void) {
if ((fildes = open(epath, mode)) == -1) {
SysExit("open");
@ -373,8 +575,8 @@ static void FixupObject(void) {
SysExit("lseek");
}
if (esize) {
if ((elf = mmap(0, esize, PROT_READ | PROT_WRITE, MAP_SHARED, fildes, 0)) ==
MAP_FAILED) {
if ((elf = mmap((void *)0x003210000000, esize, PROT_READ | PROT_WRITE,
MAP_FIXED | MAP_SHARED, fildes, 0)) == MAP_FAILED) {
SysExit("mmap");
}
if (!IsElf64Binary(elf, esize)) {
@ -393,6 +595,7 @@ static void FixupObject(void) {
if (mode == O_RDWR) {
if (elf->e_machine == EM_NEXGEN32E) {
OptimizePatchableFunctionEntries();
GenerateIfuncInit();
} else if (elf->e_machine == EM_AARCH64) {
RewriteTlsCode();
if (elf->e_type != ET_REL) {
@ -400,6 +603,7 @@ static void FixupObject(void) {
}
}
if (elf->e_type != ET_REL) {
PurgeIfuncSections();
RelinkZipFiles();
}
if (msync(elf, esize, MS_ASYNC | MS_INVALIDATE)) {