mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 03:27:39 +00:00
11d9fb521d
This change implements the compiler runtime for ARM v8.1 ISE atomics and gets rid of the mandatory -mno-outline-atomics flag. It can dramatically speed things up, on newer ARM CPUs, as indicated by the changed lines in test/libc/thread/footek_test.c. In llamafile dispatching on hwcap atomic also shaved microseconds off synchronization barriers.
697 lines
24 KiB
C
697 lines
24 KiB
C
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
|
│ │
|
|
│ Permission to use, copy, modify, and/or distribute this software for │
|
|
│ any purpose with or without fee is hereby granted, provided that the │
|
|
│ above copyright notice and this permission notice appear in all copies. │
|
|
│ │
|
|
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
|
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
|
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
|
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
|
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
|
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
#include "libc/assert.h"
|
|
#include "libc/calls/calls.h"
|
|
#include "libc/calls/struct/stat.h"
|
|
#include "libc/dce.h"
|
|
#include "libc/elf/def.h"
|
|
#include "libc/elf/elf.h"
|
|
#include "libc/elf/scalar.h"
|
|
#include "libc/elf/struct/rela.h"
|
|
#include "libc/elf/struct/shdr.h"
|
|
#include "libc/elf/struct/sym.h"
|
|
#include "libc/errno.h"
|
|
#include "libc/fmt/itoa.h"
|
|
#include "libc/fmt/magnumstrs.internal.h"
|
|
#include "libc/limits.h"
|
|
#include "libc/log/log.h"
|
|
#include "libc/macros.h"
|
|
#include "libc/mem/gc.h"
|
|
#include "libc/mem/mem.h"
|
|
#include "libc/runtime/runtime.h"
|
|
#include "libc/serialize.h"
|
|
#include "libc/stdalign.h"
|
|
#include "libc/stdckdint.h"
|
|
#include "libc/stdio/stdio.h"
|
|
#include "libc/str/str.h"
|
|
#include "libc/sysv/consts/map.h"
|
|
#include "libc/sysv/consts/msync.h"
|
|
#include "libc/sysv/consts/o.h"
|
|
#include "libc/sysv/consts/prot.h"
|
|
#include "libc/zip.h"
|
|
#include "third_party/getopt/getopt.internal.h"
|
|
|
|
/**
|
|
* @fileoverview GCC Codegen Fixer-Upper.
|
|
*/
|
|
|
|
#define COSMO_TLS_REG 28
|
|
#define MRS_TPIDR_EL0 0xd53bd040u
|
|
#define IFUNC_SECTION ".init.202.ifunc"
|
|
|
|
#define MOV_REG(DST, SRC) (0xaa0003e0u | (SRC) << 16 | (DST))
|
|
|
|
static int mode;
|
|
static int fildes;
|
|
static char *symstrs;
|
|
static char *secstrs;
|
|
static ssize_t esize;
|
|
static Elf64_Sym *syms;
|
|
static Elf64_Ehdr *elf;
|
|
static const char *epath;
|
|
static Elf64_Xword symcount;
|
|
|
|
#include "libc/mem/tinymalloc.inc"
|
|
|
|
static wontreturn void Die(const char *reason) {
|
|
tinyprint(2, epath, ": ", reason, "\n", NULL);
|
|
exit(1);
|
|
}
|
|
|
|
static wontreturn void DieOom(void) {
|
|
Die("out of memory");
|
|
}
|
|
|
|
static void *Malloc(size_t n) {
|
|
void *p;
|
|
if (!(p = malloc(n)))
|
|
DieOom();
|
|
return p;
|
|
}
|
|
|
|
static void *Realloc(void *p, size_t n) {
|
|
if (!(p = realloc(p, n)))
|
|
DieOom();
|
|
return p;
|
|
}
|
|
|
|
static wontreturn void SysExit(const char *func) {
|
|
const char *errstr;
|
|
if (!(errstr = _strerdoc(errno)))
|
|
errstr = "EUNKNOWN";
|
|
tinyprint(2, epath, ": ", func, " failed with ", errstr, "\n", NULL);
|
|
exit(1);
|
|
}
|
|
|
|
static wontreturn void PrintUsage(int fd, int exitcode) {
|
|
tinyprint(fd, "\n\
|
|
NAME\n\
|
|
\n\
|
|
Cosmopolitan Object Fixer\n\
|
|
\n\
|
|
SYNOPSIS\n\
|
|
\n\
|
|
",
|
|
program_invocation_name, " [FLAGS] OBJECT...\n\
|
|
\n\
|
|
DESCRIPTION\n\
|
|
\n\
|
|
This program applies fixups to ELF object files and executables that\n\
|
|
at build time whenever they're created by the toolchain. It's needed\n\
|
|
so that zip assets work correctly, plus this'll make code go faster.\n\
|
|
This program is also able to spot some coding errors like privileged\n\
|
|
functions calling unprivileged ones.\n\
|
|
\n\
|
|
Multiple binary files may be specified, which are modified in-place.\n\
|
|
\n\
|
|
FLAGS\n\
|
|
\n\
|
|
-h show this help\n\
|
|
-c checks only mode\n\
|
|
\n\
|
|
",
|
|
NULL);
|
|
exit(exitcode);
|
|
}
|
|
|
|
static void GetOpts(int argc, char *argv[]) {
|
|
int opt;
|
|
mode = O_RDWR;
|
|
while ((opt = getopt(argc, argv, "ch")) != -1) {
|
|
switch (opt) {
|
|
case 'c':
|
|
mode = O_RDONLY;
|
|
break;
|
|
case 'h':
|
|
PrintUsage(1, 0);
|
|
default:
|
|
PrintUsage(2, 1);
|
|
}
|
|
}
|
|
if (optind == argc) {
|
|
tinyprint(2,
|
|
"error: no elf object files specified\n"
|
|
"run ",
|
|
program_invocation_name, " -h for usage\n", NULL);
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
// Official Intel Multibyte No-Operation Instructions. See
|
|
// Intel's Six Thousand Page Manual, Volume 2, Table 4-12:
|
|
// On "Recommended Multi-Byte Sequence of NOP Instruction"
|
|
static const unsigned char kNops[10][10] = {
|
|
{}, //
|
|
{/***/ /***/ 0x90}, // nop
|
|
{0x66, /***/ 0x90}, // xchg %ax,%ax
|
|
{/***/ 0x0f, 0x1f, 0000}, // nopl (%rax)
|
|
{/***/ 0x0f, 0x1f, 0100, /***/ 0}, // nopl 0x00(%rax)
|
|
{/***/ 0x0f, 0x1f, 0104, 0000, 0}, // nopl 0x00(%rax,%rax,1)
|
|
{0x66, 0x0f, 0x1f, 0104, 0000, 0}, // nopw 0x00(%rax,%rax,1)
|
|
{/***/ 0x0f, 0x1f, 0200, 0000, 0, 0, 0}, // nopl 0x00000000(%rax)
|
|
{/***/ 0x0F, 0x1F, 0204, 0000, 0, 0, 0, 0}, // nopl 0x00000000(%rax,%rax,1)
|
|
{0x66, 0x0F, 0x1F, 0204, 0000, 0, 0, 0, 0}, // nopw 0x00000000(%rax,%rax,1)
|
|
// osz map op modrm sib displacement //
|
|
};
|
|
|
|
/**
|
|
* Rewrites leading NOP instructions to have fewer instructions.
|
|
*
|
|
* For example, the following code:
|
|
*
|
|
* nop
|
|
* nop
|
|
* nop
|
|
* nop
|
|
* nop
|
|
* nop
|
|
* nop
|
|
* nop
|
|
* nop
|
|
* nop
|
|
* nop
|
|
* nop
|
|
* ret
|
|
* nop
|
|
* nop
|
|
*
|
|
* Would be morphed into the following:
|
|
*
|
|
* nopw 0x00000000(%rax,%rax,1)
|
|
* xchg %ax,%ax
|
|
* ret
|
|
* nop
|
|
* nop
|
|
*
|
|
* @param p points to memory region that shall be modified
|
|
* @param e points to end of memory region, i.e. `p + #bytes`
|
|
* @return p advanced past last morphed byte
|
|
*/
|
|
static unsigned char *CoalesceNops(unsigned char *p, const unsigned char *e) {
|
|
long n;
|
|
for (; p + 1 < e; p += n) {
|
|
if (p[0] != 0x90)
|
|
break;
|
|
if (p[1] != 0x90)
|
|
break;
|
|
for (n = 2; p + n < e; ++n) {
|
|
if (p[n] != 0x90)
|
|
break;
|
|
if (n == ARRAYLEN(kNops) - 1)
|
|
break;
|
|
}
|
|
memcpy(p, kNops[n], n);
|
|
}
|
|
return p;
|
|
}
|
|
|
|
static void CheckPrivilegedCrossReferences(void) {
|
|
unsigned long x;
|
|
const char *secname;
|
|
const Elf64_Shdr *shdr;
|
|
const Elf64_Rela *rela, *erela;
|
|
shdr = FindElfSectionByName(elf, esize, secstrs, ".rela.privileged");
|
|
if (!shdr || !(rela = GetElfSectionAddress(elf, esize, shdr)))
|
|
return;
|
|
erela = rela + shdr->sh_size / sizeof(*rela);
|
|
for (; rela < erela; ++rela) {
|
|
if (!ELF64_R_TYPE(rela->r_info))
|
|
continue;
|
|
if (!(x = ELF64_R_SYM(rela->r_info)))
|
|
continue;
|
|
if (x >= symcount)
|
|
continue;
|
|
if (syms[x].st_shndx == SHN_ABS)
|
|
continue;
|
|
if (!syms[x].st_shndx)
|
|
continue;
|
|
if ((shdr = GetElfSectionHeaderAddress(elf, esize, syms[x].st_shndx))) {
|
|
if (~shdr->sh_flags & SHF_EXECINSTR)
|
|
continue; // data reference
|
|
if ((secname = GetElfString(elf, esize, secstrs, shdr->sh_name)) &&
|
|
!startswith(secname, ".privileged")) {
|
|
tinyprint(2, epath,
|
|
": code in .privileged section "
|
|
"references symbol '",
|
|
GetElfString(elf, esize, symstrs, syms[x].st_name),
|
|
"' in unprivileged code section '", secname, "'\n", NULL);
|
|
exit(1);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Change AMD code to use %gs:0x30 instead of %fs:0
|
|
// We assume -mno-tls-direct-seg-refs has been used
|
|
static void ChangeTlsFsToGs(unsigned char *p, size_t n) {
|
|
unsigned char *e = p + n - 9;
|
|
while (p <= e) {
|
|
// we're checking for the following expression:
|
|
// 0144 == p[0] && // %fs
|
|
// 0110 == (p[1] & 0373) && // rex.w (and ignore rex.r)
|
|
// (0213 == p[2] || // mov reg/mem → reg (word-sized)
|
|
// 0003 == p[2]) && // add reg/mem → reg (word-sized)
|
|
// 0004 == (p[3] & 0307) && // mod/rm (4,reg,0) means sib → reg
|
|
// 0045 == p[4] && // sib (5,4,0) → (rbp,rsp,0) → disp32
|
|
// 0000 == p[5] && // displacement (von Neumann endian)
|
|
// 0000 == p[6] && // displacement
|
|
// 0000 == p[7] && // displacement
|
|
// 0000 == p[8] // displacement
|
|
uint64_t w = READ64LE(p) & READ64LE("\377\373\377\307\377\377\377\377");
|
|
if ((w == READ64LE("\144\110\213\004\045\000\000\000") ||
|
|
w == READ64LE("\144\110\003\004\045\000\000\000")) &&
|
|
!p[8]) {
|
|
p[0] = 0145; // change %fs to %gs
|
|
p[5] = 0x30; // change 0 to 0x30
|
|
p += 9;
|
|
} else {
|
|
++p;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void RewriteTlsCodeAmd64(void) {
|
|
int i;
|
|
uint8_t *p;
|
|
Elf64_Shdr *shdr;
|
|
for (i = 0; i < elf->e_shnum; ++i) {
|
|
if (!(shdr = GetElfSectionHeaderAddress(elf, esize, i)))
|
|
Die("elf header overflow #1");
|
|
if (shdr->sh_type == SHT_PROGBITS && //
|
|
(shdr->sh_flags & SHF_ALLOC) && //
|
|
(shdr->sh_flags & SHF_EXECINSTR)) {
|
|
if (!(p = GetElfSectionAddress(elf, esize, shdr)))
|
|
Die("elf header overflow #2");
|
|
ChangeTlsFsToGs(p, shdr->sh_size);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Modify ARM64 code to use x28 for TLS rather than tpidr_el0.
|
|
static void RewriteTlsCodeArm64(void) {
|
|
int i;
|
|
Elf64_Shdr *shdr;
|
|
uint32_t *p, *pe;
|
|
for (i = 0; i < elf->e_shnum; ++i) {
|
|
if (!(shdr = GetElfSectionHeaderAddress(elf, esize, i)))
|
|
Die("elf header overflow #1");
|
|
if (shdr->sh_type == SHT_PROGBITS && //
|
|
(shdr->sh_flags & SHF_ALLOC) && //
|
|
(shdr->sh_flags & SHF_EXECINSTR)) {
|
|
if (!(p = GetElfSectionAddress(elf, esize, shdr)))
|
|
Die("elf header overflow #2");
|
|
for (pe = p + shdr->sh_size / 4; p <= pe; ++p)
|
|
if ((*p & -32) == MRS_TPIDR_EL0)
|
|
*p = MOV_REG(*p & 31, COSMO_TLS_REG);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void UseFreebsdOsAbi(void) {
|
|
elf->e_ident[EI_OSABI] = ELFOSABI_FREEBSD;
|
|
}
|
|
|
|
static void WriteApeFlags(void) {
|
|
/* try to be forward-compatible */
|
|
elf->e_flags = (elf->e_flags & ~EF_APE_MODERN_MASK) | EF_APE_MODERN;
|
|
}
|
|
|
|
/**
|
|
* Improve GCC11 `-fpatchable-function-entry` codegen.
|
|
*
|
|
* When using flags like `-fpatchable-function-entry=9,7` GCC v11 will
|
|
* insert two `nop` instructions, rather than merging them into faster
|
|
* "fat" nops.
|
|
*
|
|
* In order for this to work, the function symbol must be declared as
|
|
* `STT_FUNC` and `st_size` must have the function's byte length.
|
|
*/
|
|
static void OptimizePatchableFunctionEntries(void) {
|
|
long i;
|
|
Elf64_Shdr *shdr;
|
|
unsigned char *p;
|
|
Elf64_Addr sym_rva;
|
|
if (elf->e_machine == EM_NEXGEN32E) {
|
|
for (i = 0; i < symcount; ++i) {
|
|
if (!syms[i].st_size)
|
|
continue;
|
|
if (ELF64_ST_TYPE(syms[i].st_info) != STT_FUNC)
|
|
continue;
|
|
if (!(shdr = GetElfSectionHeaderAddress(elf, esize, syms[i].st_shndx)))
|
|
Die("elf header overflow #3");
|
|
if (shdr->sh_type != SHT_PROGBITS)
|
|
continue;
|
|
if (!(p = GetElfSectionAddress(elf, esize, shdr)))
|
|
Die("elf section overflow");
|
|
if (ckd_sub(&sym_rva, syms[i].st_value, shdr->sh_addr))
|
|
Die("elf symbol beneath section");
|
|
if (sym_rva > esize - shdr->sh_offset || //
|
|
(p += sym_rva) >= (unsigned char *)elf + esize || //
|
|
syms[i].st_size >= esize - sym_rva) {
|
|
Die("elf symbol overflow");
|
|
}
|
|
CoalesceNops(p, p + syms[i].st_size);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Converts PKZIP recs from PC-relative to RVA-relative.
|
|
*/
|
|
static void RelinkZipFiles(void) {
|
|
int rela, recs;
|
|
unsigned long cdsize, cdoffset;
|
|
unsigned char foot[kZipCdirHdrMinSize];
|
|
unsigned char *base, *xeof, *stop, *eocd, *cdir, *lfile, *cfile;
|
|
base = (unsigned char *)elf;
|
|
xeof = (unsigned char *)elf + esize;
|
|
eocd = xeof - kZipCdirHdrMinSize;
|
|
stop = base;
|
|
// scan backwards for zip eocd todo record
|
|
// that was created by libc/nexgen32e/zip.S
|
|
for (;;) {
|
|
if (eocd < stop)
|
|
return;
|
|
if (READ32LE(eocd) == kZipCdirHdrMagicTodo && //
|
|
ZIP_CDIR_SIZE(eocd) && //
|
|
!ZIP_CDIR_OFFSET(eocd) && //
|
|
!ZIP_CDIR_RECORDS(eocd) && //
|
|
!ZIP_CDIR_RECORDSONDISK(eocd)) {
|
|
break;
|
|
}
|
|
eocd = memrchr(stop, 'P', eocd - base);
|
|
}
|
|
// apply fixups to zip central directory recs
|
|
recs = 0;
|
|
cdir = (stop = eocd) - (cdsize = ZIP_CDIR_SIZE(eocd));
|
|
for (cfile = cdir; cfile < stop; cfile += ZIP_CFILE_HDRSIZE(cfile)) {
|
|
if (++recs >= 65536)
|
|
Die("too many zip central directory records");
|
|
if (cfile < base || //
|
|
cfile + kZipCfileHdrMinSize > xeof || //
|
|
cfile + ZIP_CFILE_HDRSIZE(cfile) > xeof)
|
|
Die("zip central directory entry overflows image");
|
|
if (READ32LE(cfile) != kZipCfileHdrMagic)
|
|
Die("bad __zip_cdir_size or zip central directory corrupted");
|
|
if ((rela = ZIP_CFILE_OFFSET(cfile)) < 0) {
|
|
lfile = cfile + kZipCfileOffsetOffset + rela;
|
|
} else {
|
|
lfile = base + rela; // earlier fixup failed partway?
|
|
}
|
|
if (lfile < base || //
|
|
lfile + kZipLfileHdrMinSize > xeof || //
|
|
lfile + ZIP_LFILE_SIZE(lfile) > xeof)
|
|
Die("zip local file overflows image");
|
|
if (READ32LE(lfile) != kZipLfileHdrMagic)
|
|
Die("zip central directory offset to local file corrupted");
|
|
if (rela < 0)
|
|
WRITE32LE(cfile + kZipCfileOffsetOffset, lfile - base);
|
|
}
|
|
// append new eocd record to program image
|
|
if (esize > INT_MAX - sizeof(foot) ||
|
|
(cdoffset = esize) > INT_MAX - sizeof(foot))
|
|
Die("the time has come to adopt zip64");
|
|
bzero(foot, sizeof(foot));
|
|
WRITE32LE(foot, kZipCdirHdrMagic);
|
|
WRITE32LE(foot + kZipCdirSizeOffset, cdsize);
|
|
WRITE16LE(foot + kZipCdirRecordsOffset, recs);
|
|
WRITE32LE(foot + kZipCdirOffsetOffset, cdoffset);
|
|
WRITE16LE(foot + kZipCdirRecordsOnDiskOffset, recs);
|
|
if (pwrite(fildes, cdir, cdsize, esize) != cdsize)
|
|
SysExit("cdir pwrite");
|
|
if (pwrite(fildes, foot, sizeof(foot), esize + cdsize) != sizeof(foot))
|
|
SysExit("eocd pwrite");
|
|
eocd = foot;
|
|
}
|
|
|
|
// when __attribute__((__target_clones__(...))) is used, the compiler
|
|
// will generate multiple implementations of a function for different
|
|
// microarchitectures as well as a resolver function that tells which
|
|
// function is appropriate to call. however the compiler doesn't make
|
|
// code for the actual function. it also doesn't record where resolve
|
|
// functions are located in the binary so we've reverse eng'd it here
|
|
static void GenerateIfuncInit(void) {
|
|
char *name, *s;
|
|
long code_i = 0;
|
|
long relas_i = 0;
|
|
static char code[16384];
|
|
static Elf64_Rela relas[1024];
|
|
Elf64_Shdr *symtab_shdr = GetElfSymbolTable(elf, esize, SHT_SYMTAB, 0);
|
|
if (!symtab_shdr)
|
|
Die("symbol table section header not found");
|
|
Elf64_Word symtab_shdr_index =
|
|
((char *)symtab_shdr - ((char *)elf + elf->e_shoff)) / elf->e_shentsize;
|
|
for (Elf64_Xword i = 0; i < symcount; ++i) {
|
|
if (syms[i].st_shndx == SHN_UNDEF)
|
|
continue;
|
|
if (syms[i].st_shndx >= SHN_LORESERVE)
|
|
continue;
|
|
if (ELF64_ST_TYPE(syms[i].st_info) != STT_GNU_IFUNC)
|
|
continue;
|
|
if (!(name = GetElfString(elf, esize, symstrs, syms[i].st_name)))
|
|
Die("could not get symbol name of ifunc");
|
|
static char resolver_name[65536];
|
|
strlcpy(resolver_name, name, sizeof(resolver_name));
|
|
if (strlcat(resolver_name, ".resolver", sizeof(resolver_name)) >=
|
|
sizeof(resolver_name))
|
|
Die("ifunc name too long");
|
|
Elf64_Xword function_sym_index = i;
|
|
Elf64_Xword resolver_sym_index = -1;
|
|
for (Elf64_Xword i = 0; i < symcount; ++i) {
|
|
if (syms[i].st_shndx == SHN_UNDEF)
|
|
continue;
|
|
if (syms[i].st_shndx >= SHN_LORESERVE)
|
|
continue;
|
|
if (ELF64_ST_TYPE(syms[i].st_info) != STT_FUNC)
|
|
continue;
|
|
if (!(s = GetElfString(elf, esize, symstrs, syms[i].st_name)))
|
|
continue;
|
|
if (strcmp(s, resolver_name))
|
|
continue;
|
|
resolver_sym_index = i;
|
|
break;
|
|
}
|
|
if (resolver_sym_index == -1)
|
|
// this can happen if a function with __target_clones() also has a
|
|
// __weak_reference() defined, in which case GCC shall only create
|
|
// one resolver function for the two of them so we can ignore this
|
|
// HOWEVER the GOT will still have an entry for each two functions
|
|
continue;
|
|
|
|
// call the resolver (using cosmo's special .init abi)
|
|
static const char chunk1[] = {
|
|
0x57, // push %rdi
|
|
0x56, // push %rsi
|
|
0xe8, 0x00, 0x00, 0x00, 0x00, // call f.resolver
|
|
};
|
|
if (code_i + sizeof(chunk1) > sizeof(code) || relas_i + 1 > ARRAYLEN(relas))
|
|
Die("too many ifuncs");
|
|
memcpy(code + code_i, chunk1, sizeof(chunk1));
|
|
relas[relas_i].r_info = ELF64_R_INFO(resolver_sym_index, R_X86_64_PLT32);
|
|
relas[relas_i].r_offset = code_i + 1 + 1 + 1;
|
|
relas[relas_i].r_addend = -4;
|
|
code_i += sizeof(chunk1);
|
|
relas_i += 1;
|
|
|
|
// move the resolved function address into the GOT slot. it's very
|
|
// important that this happen, because the linker by default makes
|
|
// self-referencing PLT functions whose execution falls through oh
|
|
// no. we need to repeat this process for any aliases this defines
|
|
static const char chunk2[] = {
|
|
0x48, 0x89, 0x05, 0x00, 0x00, 0x00, 0x00, // mov %rax,f@gotpcrel(%rip)
|
|
};
|
|
for (Elf64_Xword i = 0; i < symcount; ++i) {
|
|
if (i == function_sym_index ||
|
|
(ELF64_ST_TYPE(syms[i].st_info) == STT_GNU_IFUNC &&
|
|
syms[i].st_shndx == syms[function_sym_index].st_shndx &&
|
|
syms[i].st_value == syms[function_sym_index].st_value)) {
|
|
if (code_i + sizeof(chunk2) > sizeof(code) ||
|
|
relas_i + 1 > ARRAYLEN(relas))
|
|
Die("too many ifuncs");
|
|
memcpy(code + code_i, chunk2, sizeof(chunk2));
|
|
relas[relas_i].r_info = ELF64_R_INFO(i, R_X86_64_GOTPCREL);
|
|
relas[relas_i].r_offset = code_i + 3;
|
|
relas[relas_i].r_addend = -4;
|
|
code_i += sizeof(chunk2);
|
|
relas_i += 1;
|
|
}
|
|
}
|
|
|
|
static const char chunk3[] = {
|
|
0x5e, // pop %rsi
|
|
0x5f, // pop %rdi
|
|
};
|
|
if (code_i + sizeof(chunk3) > sizeof(code))
|
|
Die("too many ifuncs");
|
|
memcpy(code + code_i, chunk3, sizeof(chunk3));
|
|
code_i += sizeof(chunk3);
|
|
}
|
|
if (!code_i)
|
|
return;
|
|
|
|
// prepare to mutate elf
|
|
// remap file so it has more space
|
|
if (elf->e_shnum + 2 > 65535)
|
|
Die("too many sections");
|
|
size_t reserve_size = esize + 32 * 1024 * 1024;
|
|
elf = Realloc(elf, reserve_size);
|
|
|
|
// duplicate section name strings table to end of file
|
|
Elf64_Shdr *shdrstr_shdr = (Elf64_Shdr *)((char *)elf + elf->e_shoff +
|
|
elf->e_shstrndx * elf->e_shentsize);
|
|
memcpy((char *)elf + esize, (char *)elf + shdrstr_shdr->sh_offset,
|
|
shdrstr_shdr->sh_size);
|
|
shdrstr_shdr->sh_offset = esize;
|
|
esize += shdrstr_shdr->sh_size;
|
|
|
|
// append strings for the two sections we're creating
|
|
const char *code_section_name = IFUNC_SECTION;
|
|
Elf64_Word code_section_name_offset = shdrstr_shdr->sh_size;
|
|
memcpy((char *)elf + esize, code_section_name, strlen(code_section_name) + 1);
|
|
shdrstr_shdr->sh_size += strlen(code_section_name) + 1;
|
|
esize += strlen(code_section_name) + 1;
|
|
const char *rela_section_name = ".rela" IFUNC_SECTION;
|
|
Elf64_Word rela_section_name_offset = shdrstr_shdr->sh_size;
|
|
memcpy((char *)elf + esize, rela_section_name, strlen(rela_section_name) + 1);
|
|
shdrstr_shdr->sh_size += strlen(rela_section_name) + 1;
|
|
esize += strlen(rela_section_name) + 1;
|
|
unassert(esize == shdrstr_shdr->sh_offset + shdrstr_shdr->sh_size);
|
|
++esize;
|
|
|
|
// duplicate section headers to end of file
|
|
esize = (esize + alignof(Elf64_Shdr) - 1) & -alignof(Elf64_Shdr);
|
|
memcpy((char *)elf + esize, (char *)elf + elf->e_shoff,
|
|
elf->e_shnum * elf->e_shentsize);
|
|
elf->e_shoff = esize;
|
|
esize += elf->e_shnum * elf->e_shentsize;
|
|
unassert(esize == elf->e_shoff + elf->e_shnum * elf->e_shentsize);
|
|
|
|
// append code section header
|
|
Elf64_Shdr *code_shdr = (Elf64_Shdr *)((char *)elf + esize);
|
|
Elf64_Word code_shdr_index = elf->e_shnum++;
|
|
esize += elf->e_shentsize;
|
|
code_shdr->sh_name = code_section_name_offset;
|
|
code_shdr->sh_type = SHT_PROGBITS;
|
|
code_shdr->sh_flags = SHF_ALLOC | SHF_EXECINSTR;
|
|
code_shdr->sh_addr = 0;
|
|
code_shdr->sh_link = 0;
|
|
code_shdr->sh_info = 0;
|
|
code_shdr->sh_entsize = 1;
|
|
code_shdr->sh_addralign = 1;
|
|
code_shdr->sh_size = code_i;
|
|
|
|
// append code's rela section header
|
|
Elf64_Shdr *rela_shdr = (Elf64_Shdr *)((char *)elf + esize);
|
|
esize += elf->e_shentsize;
|
|
rela_shdr->sh_name = rela_section_name_offset;
|
|
rela_shdr->sh_type = SHT_RELA;
|
|
rela_shdr->sh_flags = SHF_INFO_LINK;
|
|
rela_shdr->sh_addr = 0;
|
|
rela_shdr->sh_info = code_shdr_index;
|
|
rela_shdr->sh_link = symtab_shdr_index;
|
|
rela_shdr->sh_entsize = sizeof(Elf64_Rela);
|
|
rela_shdr->sh_addralign = alignof(Elf64_Rela);
|
|
rela_shdr->sh_size = relas_i * sizeof(Elf64_Rela);
|
|
elf->e_shnum++;
|
|
|
|
// append relas
|
|
esize = (esize + 63) & -64;
|
|
rela_shdr->sh_offset = esize;
|
|
memcpy((char *)elf + esize, relas, relas_i * sizeof(Elf64_Rela));
|
|
esize += relas_i * sizeof(Elf64_Rela);
|
|
unassert(esize == rela_shdr->sh_offset + rela_shdr->sh_size);
|
|
|
|
// append code
|
|
esize = (esize + 63) & -64;
|
|
code_shdr->sh_offset = esize;
|
|
memcpy((char *)elf + esize, code, code_i);
|
|
esize += code_i;
|
|
unassert(esize == code_shdr->sh_offset + code_shdr->sh_size);
|
|
}
|
|
|
|
// when __attribute__((__target_clones__(...))) is used, static binaries
|
|
// become poisoned with rela IFUNC relocations, which the linker refuses
|
|
// to remove. even if we objcopy the ape executable as binary the linker
|
|
// preserves its precious ifunc code and puts them before the executable
|
|
// header. the good news is that the linker actually does link correctly
|
|
// which means we can delete the broken rela sections in the elf binary.
|
|
static void PurgeIfuncSections(void) {
|
|
Elf64_Shdr *shdrs = (Elf64_Shdr *)((char *)elf + elf->e_shoff);
|
|
for (Elf64_Word i = 0; i < elf->e_shnum; ++i) {
|
|
char *name;
|
|
if (shdrs[i].sh_type == SHT_RELA ||
|
|
((name = GetElfSectionName(elf, esize, shdrs + i)) &&
|
|
!strcmp(name, ".init.202.ifunc"))) {
|
|
shdrs[i].sh_type = SHT_NULL;
|
|
shdrs[i].sh_flags &= ~SHF_ALLOC;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void FixupObject(void) {
|
|
if ((fildes = open(epath, mode)) == -1)
|
|
SysExit("open");
|
|
if ((esize = lseek(fildes, 0, SEEK_END)) == -1)
|
|
SysExit("lseek");
|
|
if (esize) {
|
|
elf = Malloc(esize);
|
|
if (pread(fildes, elf, esize, 0) != esize)
|
|
SysExit("pread");
|
|
if (!IsElf64Binary(elf, esize))
|
|
Die("not an elf64 binary");
|
|
if (!(syms = GetElfSymbols(elf, esize, SHT_SYMTAB, &symcount)))
|
|
Die("missing elf symbol table");
|
|
if (!(secstrs = GetElfSectionNameStringTable(elf, esize)))
|
|
Die("missing elf section string table");
|
|
if (!(symstrs = GetElfStringTable(elf, esize, ".strtab")))
|
|
Die("missing elf symbol string table");
|
|
CheckPrivilegedCrossReferences();
|
|
if (mode == O_RDWR) {
|
|
if (elf->e_machine == EM_NEXGEN32E) {
|
|
RewriteTlsCodeAmd64();
|
|
OptimizePatchableFunctionEntries();
|
|
GenerateIfuncInit();
|
|
} else if (elf->e_machine == EM_AARCH64) {
|
|
RewriteTlsCodeArm64();
|
|
if (elf->e_type != ET_REL)
|
|
UseFreebsdOsAbi();
|
|
}
|
|
if (elf->e_type != ET_REL) {
|
|
WriteApeFlags();
|
|
PurgeIfuncSections();
|
|
RelinkZipFiles();
|
|
}
|
|
if (pwrite(fildes, elf, esize, 0) != esize)
|
|
SysExit("pwrite");
|
|
}
|
|
}
|
|
if (close(fildes))
|
|
SysExit("close");
|
|
}
|
|
|
|
int main(int argc, char *argv[]) {
|
|
int i;
|
|
if (!IsOptimized())
|
|
ShowCrashReports();
|
|
GetOpts(argc, argv);
|
|
for (i = optind; i < argc; ++i) {
|
|
epath = argv[i];
|
|
FixupObject();
|
|
}
|
|
}
|