mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-07-06 11:18:30 +00:00
Make improvements
- Introduce portable sched_getcpu() api - Support GCC's __target_clones__ feature - Make fma() go faster on x86 in default mode - Remove some asan checks from core libraries - WinMain() now ensures $HOME and $USER are defined
This commit is contained in:
parent
d5225a693b
commit
2ab9e9f7fd
192 changed files with 2809 additions and 932 deletions
|
@ -29,13 +29,16 @@
|
|||
#include "libc/errno.h"
|
||||
#include "libc/fmt/itoa.h"
|
||||
#include "libc/fmt/magnumstrs.internal.h"
|
||||
#include "libc/intrin/kprintf.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/log/log.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/mem/gc.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/serialize.h"
|
||||
#include "libc/stdalign.internal.h"
|
||||
#include "libc/stdckdint.h"
|
||||
#include "libc/stdio/stdio.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/sysv/consts/map.h"
|
||||
#include "libc/sysv/consts/msync.h"
|
||||
|
@ -48,8 +51,10 @@
|
|||
* @fileoverview GCC Codegen Fixer-Upper.
|
||||
*/
|
||||
|
||||
#define COSMO_TLS_REG 28
|
||||
#define MRS_TPIDR_EL0 0xd53bd040u
|
||||
#define COSMO_TLS_REG 28
|
||||
#define MRS_TPIDR_EL0 0xd53bd040u
|
||||
#define IFUNC_SECTION ".init.202.ifunc"
|
||||
|
||||
#define MOV_REG(DST, SRC) (0xaa0003e0u | (SRC) << 16 | (DST))
|
||||
|
||||
static int mode;
|
||||
|
@ -365,6 +370,203 @@ static void RelinkZipFiles(void) {
|
|||
eocd = foot;
|
||||
}
|
||||
|
||||
// when __attribute__((__target_clones__(...))) is used, the compiler
|
||||
// will generate multiple implementations of a function for different
|
||||
// microarchitectures as well as a resolver function that tells which
|
||||
// function is appropriate to call. however the compiler doesn't make
|
||||
// code for the actual function. it also doesn't record where resolve
|
||||
// functions are located in the binary so we've reverse eng'd it here
|
||||
static void GenerateIfuncInit(void) {
|
||||
char *name, *s;
|
||||
long code_i = 0;
|
||||
long relas_i = 0;
|
||||
static char code[16384];
|
||||
static Elf64_Rela relas[1024];
|
||||
Elf64_Shdr *symtab_shdr = GetElfSymbolTable(elf, esize, SHT_SYMTAB, 0);
|
||||
if (!symtab_shdr) Die("symbol table section header not found");
|
||||
Elf64_Word symtab_shdr_index =
|
||||
((char *)symtab_shdr - ((char *)elf + elf->e_shoff)) / elf->e_shentsize;
|
||||
for (Elf64_Xword i = 0; i < symcount; ++i) {
|
||||
if (syms[i].st_shndx == SHN_UNDEF) continue;
|
||||
if (syms[i].st_shndx >= SHN_LORESERVE) continue;
|
||||
if (ELF64_ST_TYPE(syms[i].st_info) != STT_GNU_IFUNC) continue;
|
||||
if (!(name = GetElfString(elf, esize, symstrs, syms[i].st_name)))
|
||||
Die("could not get symbol name of ifunc");
|
||||
static char resolver_name[65536];
|
||||
strlcpy(resolver_name, name, sizeof(resolver_name));
|
||||
if (strlcat(resolver_name, ".resolver", sizeof(resolver_name)) >=
|
||||
sizeof(resolver_name))
|
||||
Die("ifunc name too long");
|
||||
Elf64_Xword function_sym_index = i;
|
||||
Elf64_Xword resolver_sym_index = -1;
|
||||
for (Elf64_Xword i = 0; i < symcount; ++i) {
|
||||
if (syms[i].st_shndx == SHN_UNDEF) continue;
|
||||
if (syms[i].st_shndx >= SHN_LORESERVE) continue;
|
||||
if (ELF64_ST_TYPE(syms[i].st_info) != STT_FUNC) continue;
|
||||
if (!(s = GetElfString(elf, esize, symstrs, syms[i].st_name))) continue;
|
||||
if (strcmp(s, resolver_name)) continue;
|
||||
resolver_sym_index = i;
|
||||
break;
|
||||
}
|
||||
if (resolver_sym_index == -1)
|
||||
// this can happen if a function with __target_clones() also has a
|
||||
// __weak_reference() defined, in which case GCC shall only create
|
||||
// one resolver function for the two of them so we can ignore this
|
||||
// HOWEVER the GOT will still have an entry for each two functions
|
||||
continue;
|
||||
|
||||
// call the resolver (using cosmo's special .init abi)
|
||||
static const char chunk1[] = {
|
||||
0x57, // push %rdi
|
||||
0x56, // push %rsi
|
||||
0xe8, 0x00, 0x00, 0x00, 0x00, // call f.resolver
|
||||
};
|
||||
if (code_i + sizeof(chunk1) > sizeof(code) || relas_i + 1 > ARRAYLEN(relas))
|
||||
Die("too many ifuncs");
|
||||
memcpy(code + code_i, chunk1, sizeof(chunk1));
|
||||
relas[relas_i].r_info = ELF64_R_INFO(resolver_sym_index, R_X86_64_PLT32);
|
||||
relas[relas_i].r_offset = code_i + 1 + 1 + 1;
|
||||
relas[relas_i].r_addend = -4;
|
||||
code_i += sizeof(chunk1);
|
||||
relas_i += 1;
|
||||
|
||||
// move the resolved function address into the GOT slot. it's very
|
||||
// important that this happen, because the linker by default makes
|
||||
// self-referencing PLT functions whose execution falls through oh
|
||||
// no. we need to repeat this process for any aliases this defines
|
||||
static const char chunk2[] = {
|
||||
0x48, 0x89, 0x05, 0x00, 0x00, 0x00, 0x00, // mov %rax,f@gotpcrel(%rip)
|
||||
};
|
||||
for (Elf64_Xword i = 0; i < symcount; ++i) {
|
||||
if (i == function_sym_index ||
|
||||
(ELF64_ST_TYPE(syms[i].st_info) == STT_GNU_IFUNC &&
|
||||
syms[i].st_shndx == syms[function_sym_index].st_shndx &&
|
||||
syms[i].st_value == syms[function_sym_index].st_value)) {
|
||||
if (code_i + sizeof(chunk2) > sizeof(code) ||
|
||||
relas_i + 1 > ARRAYLEN(relas))
|
||||
Die("too many ifuncs");
|
||||
memcpy(code + code_i, chunk2, sizeof(chunk2));
|
||||
relas[relas_i].r_info = ELF64_R_INFO(i, R_X86_64_GOTPCREL);
|
||||
relas[relas_i].r_offset = code_i + 3;
|
||||
relas[relas_i].r_addend = -4;
|
||||
code_i += sizeof(chunk2);
|
||||
relas_i += 1;
|
||||
}
|
||||
}
|
||||
|
||||
static const char chunk3[] = {
|
||||
0x5e, // pop %rsi
|
||||
0x5f, // pop %rdi
|
||||
};
|
||||
if (code_i + sizeof(chunk3) > sizeof(code)) Die("too many ifuncs");
|
||||
memcpy(code + code_i, chunk3, sizeof(chunk3));
|
||||
code_i += sizeof(chunk3);
|
||||
}
|
||||
if (!code_i) return;
|
||||
|
||||
// prepare to mutate elf
|
||||
// remap file so it has more space
|
||||
if (elf->e_shnum + 2 > 65535) Die("too many sections");
|
||||
size_t reserve_size = esize + 32 * 1024 * 1024;
|
||||
if (ftruncate(fildes, reserve_size)) SysExit("ifunc ftruncate #1");
|
||||
elf = mmap((char *)elf, reserve_size, PROT_READ | PROT_WRITE,
|
||||
MAP_FIXED | MAP_SHARED, fildes, 0);
|
||||
if (elf == MAP_FAILED) SysExit("ifunc mmap");
|
||||
|
||||
// duplicate section name strings table to end of file
|
||||
Elf64_Shdr *shdrstr_shdr = (Elf64_Shdr *)((char *)elf + elf->e_shoff +
|
||||
elf->e_shstrndx * elf->e_shentsize);
|
||||
memcpy((char *)elf + esize, (char *)elf + shdrstr_shdr->sh_offset,
|
||||
shdrstr_shdr->sh_size);
|
||||
shdrstr_shdr->sh_offset = esize;
|
||||
esize += shdrstr_shdr->sh_size;
|
||||
|
||||
// append strings for the two sections we're creating
|
||||
const char *code_section_name = IFUNC_SECTION;
|
||||
Elf64_Word code_section_name_offset = shdrstr_shdr->sh_size;
|
||||
memcpy((char *)elf + esize, code_section_name, strlen(code_section_name) + 1);
|
||||
shdrstr_shdr->sh_size += strlen(code_section_name) + 1;
|
||||
esize += strlen(code_section_name) + 1;
|
||||
const char *rela_section_name = ".rela" IFUNC_SECTION;
|
||||
Elf64_Word rela_section_name_offset = shdrstr_shdr->sh_size;
|
||||
memcpy((char *)elf + esize, rela_section_name, strlen(rela_section_name) + 1);
|
||||
shdrstr_shdr->sh_size += strlen(rela_section_name) + 1;
|
||||
esize += strlen(rela_section_name) + 1;
|
||||
unassert(esize == shdrstr_shdr->sh_offset + shdrstr_shdr->sh_size);
|
||||
++esize;
|
||||
|
||||
// duplicate section headers to end of file
|
||||
esize = (esize + alignof(Elf64_Shdr) - 1) & -alignof(Elf64_Shdr);
|
||||
memcpy((char *)elf + esize, (char *)elf + elf->e_shoff,
|
||||
elf->e_shnum * elf->e_shentsize);
|
||||
elf->e_shoff = esize;
|
||||
esize += elf->e_shnum * elf->e_shentsize;
|
||||
unassert(esize == elf->e_shoff + elf->e_shnum * elf->e_shentsize);
|
||||
|
||||
// append code section header
|
||||
Elf64_Shdr *code_shdr = (Elf64_Shdr *)((char *)elf + esize);
|
||||
Elf64_Word code_shdr_index = elf->e_shnum++;
|
||||
esize += elf->e_shentsize;
|
||||
code_shdr->sh_name = code_section_name_offset;
|
||||
code_shdr->sh_type = SHT_PROGBITS;
|
||||
code_shdr->sh_flags = SHF_ALLOC | SHF_EXECINSTR;
|
||||
code_shdr->sh_addr = 0;
|
||||
code_shdr->sh_link = 0;
|
||||
code_shdr->sh_info = 0;
|
||||
code_shdr->sh_entsize = 1;
|
||||
code_shdr->sh_addralign = 1;
|
||||
code_shdr->sh_size = code_i;
|
||||
|
||||
// append code's rela section header
|
||||
Elf64_Shdr *rela_shdr = (Elf64_Shdr *)((char *)elf + esize);
|
||||
esize += elf->e_shentsize;
|
||||
rela_shdr->sh_name = rela_section_name_offset;
|
||||
rela_shdr->sh_type = SHT_RELA;
|
||||
rela_shdr->sh_flags = SHF_INFO_LINK;
|
||||
rela_shdr->sh_addr = 0;
|
||||
rela_shdr->sh_info = code_shdr_index;
|
||||
rela_shdr->sh_link = symtab_shdr_index;
|
||||
rela_shdr->sh_entsize = sizeof(Elf64_Rela);
|
||||
rela_shdr->sh_addralign = alignof(Elf64_Rela);
|
||||
rela_shdr->sh_size = relas_i * sizeof(Elf64_Rela);
|
||||
elf->e_shnum++;
|
||||
|
||||
// append relas
|
||||
esize = (esize + 63) & -64;
|
||||
rela_shdr->sh_offset = esize;
|
||||
memcpy((char *)elf + esize, relas, relas_i * sizeof(Elf64_Rela));
|
||||
esize += relas_i * sizeof(Elf64_Rela);
|
||||
unassert(esize == rela_shdr->sh_offset + rela_shdr->sh_size);
|
||||
|
||||
// append code
|
||||
esize = (esize + 63) & -64;
|
||||
code_shdr->sh_offset = esize;
|
||||
memcpy((char *)elf + esize, code, code_i);
|
||||
esize += code_i;
|
||||
unassert(esize == code_shdr->sh_offset + code_shdr->sh_size);
|
||||
|
||||
if (ftruncate(fildes, esize)) SysExit("ifunc ftruncate #1");
|
||||
}
|
||||
|
||||
// when __attribute__((__target_clones__(...))) is used, static binaries
|
||||
// become poisoned with rela IFUNC relocations, which the linker refuses
|
||||
// to remove. even if we objcopy the ape executable as binary the linker
|
||||
// preserves its precious ifunc code and puts them before the executable
|
||||
// header. the good news is that the linker actually does link correctly
|
||||
// which means we can delete the broken rela sections in the elf binary.
|
||||
static void PurgeIfuncSections(void) {
|
||||
Elf64_Shdr *shdrs = (Elf64_Shdr *)((char *)elf + elf->e_shoff);
|
||||
for (Elf64_Word i = 0; i < elf->e_shnum; ++i) {
|
||||
char *name;
|
||||
if (shdrs[i].sh_type == SHT_RELA ||
|
||||
((name = GetElfSectionName(elf, esize, shdrs + i)) &&
|
||||
!strcmp(name, ".init.202.ifunc"))) {
|
||||
shdrs[i].sh_type = SHT_NULL;
|
||||
shdrs[i].sh_flags &= ~SHF_ALLOC;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void FixupObject(void) {
|
||||
if ((fildes = open(epath, mode)) == -1) {
|
||||
SysExit("open");
|
||||
|
@ -373,8 +575,8 @@ static void FixupObject(void) {
|
|||
SysExit("lseek");
|
||||
}
|
||||
if (esize) {
|
||||
if ((elf = mmap(0, esize, PROT_READ | PROT_WRITE, MAP_SHARED, fildes, 0)) ==
|
||||
MAP_FAILED) {
|
||||
if ((elf = mmap((void *)0x003210000000, esize, PROT_READ | PROT_WRITE,
|
||||
MAP_FIXED | MAP_SHARED, fildes, 0)) == MAP_FAILED) {
|
||||
SysExit("mmap");
|
||||
}
|
||||
if (!IsElf64Binary(elf, esize)) {
|
||||
|
@ -393,6 +595,7 @@ static void FixupObject(void) {
|
|||
if (mode == O_RDWR) {
|
||||
if (elf->e_machine == EM_NEXGEN32E) {
|
||||
OptimizePatchableFunctionEntries();
|
||||
GenerateIfuncInit();
|
||||
} else if (elf->e_machine == EM_AARCH64) {
|
||||
RewriteTlsCode();
|
||||
if (elf->e_type != ET_REL) {
|
||||
|
@ -400,6 +603,7 @@ static void FixupObject(void) {
|
|||
}
|
||||
}
|
||||
if (elf->e_type != ET_REL) {
|
||||
PurgeIfuncSections();
|
||||
RelinkZipFiles();
|
||||
}
|
||||
if (msync(elf, esize, MS_ASYNC | MS_INVALIDATE)) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue