mirror of
				https://github.com/jart/cosmopolitan.git
				synced 2025-10-26 19:16:41 +00:00 
			
		
		
		
	For this to work, a loader has to be able to tell the difference between an ‘old’ and a ‘new’ binary. This is achieved via a repurposing of ELF’s e_flags field. We previously tried to use the padding in e_ident for it, but binutils was resetting it to zero in e.g. strip. This introduces one new ELF flag for cosmopolitan binaries. It is called `EF_APE_MODERN`. We choose 0x101ca75, "lol cat 5". It should now be safe to install the ape loader binfmt registration with the `P` flag.
		
			
				
	
	
		
			697 lines
		
	
	
	
		
			24 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			697 lines
		
	
	
	
		
			24 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
 | |
| │ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
 | |
| ╞══════════════════════════════════════════════════════════════════════════════╡
 | |
| │ Copyright 2020 Justine Alexandra Roberts Tunney                              │
 | |
| │                                                                              │
 | |
| │ Permission to use, copy, modify, and/or distribute this software for         │
 | |
| │ any purpose with or without fee is hereby granted, provided that the         │
 | |
| │ above copyright notice and this permission notice appear in all copies.      │
 | |
| │                                                                              │
 | |
| │ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
 | |
| │ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
 | |
| │ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
 | |
| │ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
 | |
| │ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
 | |
| │ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
 | |
| │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
 | |
| │ PERFORMANCE OF THIS SOFTWARE.                                                │
 | |
| ╚─────────────────────────────────────────────────────────────────────────────*/
 | |
| #include "libc/assert.h"
 | |
| #include "libc/calls/calls.h"
 | |
| #include "libc/calls/struct/stat.h"
 | |
| #include "libc/dce.h"
 | |
| #include "libc/elf/def.h"
 | |
| #include "libc/elf/elf.h"
 | |
| #include "libc/elf/scalar.h"
 | |
| #include "libc/elf/struct/rela.h"
 | |
| #include "libc/elf/struct/shdr.h"
 | |
| #include "libc/elf/struct/sym.h"
 | |
| #include "libc/errno.h"
 | |
| #include "libc/fmt/itoa.h"
 | |
| #include "libc/fmt/magnumstrs.internal.h"
 | |
| #include "libc/limits.h"
 | |
| #include "libc/log/log.h"
 | |
| #include "libc/macros.internal.h"
 | |
| #include "libc/mem/gc.h"
 | |
| #include "libc/mem/mem.h"
 | |
| #include "libc/runtime/runtime.h"
 | |
| #include "libc/serialize.h"
 | |
| #include "libc/stdalign.internal.h"
 | |
| #include "libc/stdckdint.h"
 | |
| #include "libc/stdio/stdio.h"
 | |
| #include "libc/str/str.h"
 | |
| #include "libc/sysv/consts/map.h"
 | |
| #include "libc/sysv/consts/msync.h"
 | |
| #include "libc/sysv/consts/o.h"
 | |
| #include "libc/sysv/consts/prot.h"
 | |
| #include "libc/zip.internal.h"
 | |
| #include "third_party/getopt/getopt.internal.h"
 | |
| 
 | |
| /**
 | |
|  * @fileoverview GCC Codegen Fixer-Upper.
 | |
|  */
 | |
| 
 | |
| #define COSMO_TLS_REG 28
 | |
| #define MRS_TPIDR_EL0 0xd53bd040u
 | |
| #define IFUNC_SECTION ".init.202.ifunc"
 | |
| 
 | |
| #define MOV_REG(DST, SRC) (0xaa0003e0u | (SRC) << 16 | (DST))
 | |
| 
 | |
| static int mode;
 | |
| static int fildes;
 | |
| static char *symstrs;
 | |
| static char *secstrs;
 | |
| static ssize_t esize;
 | |
| static Elf64_Sym *syms;
 | |
| static Elf64_Ehdr *elf;
 | |
| static const char *epath;
 | |
| static Elf64_Xword symcount;
 | |
| 
 | |
| #include "libc/mem/tinymalloc.inc"
 | |
| 
 | |
| static wontreturn void Die(const char *reason) {
 | |
|   tinyprint(2, epath, ": ", reason, "\n", NULL);
 | |
|   exit(1);
 | |
| }
 | |
| 
 | |
| static wontreturn void DieOom(void) {
 | |
|   Die("out of memory");
 | |
| }
 | |
| 
 | |
| static void *Malloc(size_t n) {
 | |
|   void *p;
 | |
|   if (!(p = malloc(n)))
 | |
|     DieOom();
 | |
|   return p;
 | |
| }
 | |
| 
 | |
| static void *Realloc(void *p, size_t n) {
 | |
|   if (!(p = realloc(p, n)))
 | |
|     DieOom();
 | |
|   return p;
 | |
| }
 | |
| 
 | |
| static wontreturn void SysExit(const char *func) {
 | |
|   const char *errstr;
 | |
|   if (!(errstr = _strerdoc(errno)))
 | |
|     errstr = "EUNKNOWN";
 | |
|   tinyprint(2, epath, ": ", func, " failed with ", errstr, "\n", NULL);
 | |
|   exit(1);
 | |
| }
 | |
| 
 | |
| static wontreturn void PrintUsage(int fd, int exitcode) {
 | |
|   tinyprint(fd, "\n\
 | |
| NAME\n\
 | |
| \n\
 | |
|   Cosmopolitan Object Fixer\n\
 | |
| \n\
 | |
| SYNOPSIS\n\
 | |
| \n\
 | |
|   ",
 | |
|             program_invocation_name, " [FLAGS] OBJECT...\n\
 | |
| \n\
 | |
| DESCRIPTION\n\
 | |
| \n\
 | |
|   This program applies fixups to ELF object files and executables that\n\
 | |
|   at build time whenever they're created by the toolchain. It's needed\n\
 | |
|   so that zip assets work correctly, plus this'll make code go faster.\n\
 | |
|   This program is also able to spot some coding errors like privileged\n\
 | |
|   functions calling unprivileged ones.\n\
 | |
| \n\
 | |
|   Multiple binary files may be specified, which are modified in-place.\n\
 | |
| \n\
 | |
| FLAGS\n\
 | |
| \n\
 | |
|   -h            show this help\n\
 | |
|   -c            checks only mode\n\
 | |
| \n\
 | |
| ",
 | |
|             NULL);
 | |
|   exit(exitcode);
 | |
| }
 | |
| 
 | |
| static void GetOpts(int argc, char *argv[]) {
 | |
|   int opt;
 | |
|   mode = O_RDWR;
 | |
|   while ((opt = getopt(argc, argv, "ch")) != -1) {
 | |
|     switch (opt) {
 | |
|       case 'c':
 | |
|         mode = O_RDONLY;
 | |
|         break;
 | |
|       case 'h':
 | |
|         PrintUsage(1, 0);
 | |
|       default:
 | |
|         PrintUsage(2, 1);
 | |
|     }
 | |
|   }
 | |
|   if (optind == argc) {
 | |
|     tinyprint(2,
 | |
|               "error: no elf object files specified\n"
 | |
|               "run ",
 | |
|               program_invocation_name, " -h for usage\n", NULL);
 | |
|     exit(1);
 | |
|   }
 | |
| }
 | |
| 
 | |
| // Official Intel Multibyte No-Operation Instructions. See
 | |
| // Intel's Six Thousand Page Manual, Volume 2, Table 4-12:
 | |
| // On "Recommended Multi-Byte Sequence of NOP Instruction"
 | |
| static const unsigned char kNops[10][10] = {
 | |
|     {},                                          //
 | |
|     {/***/ /***/ 0x90},                          // nop
 | |
|     {0x66, /***/ 0x90},                          // xchg %ax,%ax
 | |
|     {/***/ 0x0f, 0x1f, 0000},                    // nopl (%rax)
 | |
|     {/***/ 0x0f, 0x1f, 0100, /***/ 0},           // nopl 0x00(%rax)
 | |
|     {/***/ 0x0f, 0x1f, 0104, 0000, 0},           // nopl 0x00(%rax,%rax,1)
 | |
|     {0x66, 0x0f, 0x1f, 0104, 0000, 0},           // nopw 0x00(%rax,%rax,1)
 | |
|     {/***/ 0x0f, 0x1f, 0200, 0000, 0, 0, 0},     // nopl 0x00000000(%rax)
 | |
|     {/***/ 0x0F, 0x1F, 0204, 0000, 0, 0, 0, 0},  // nopl 0x00000000(%rax,%rax,1)
 | |
|     {0x66, 0x0F, 0x1F, 0204, 0000, 0, 0, 0, 0},  // nopw 0x00000000(%rax,%rax,1)
 | |
|     // osz  map  op   modrm  sib   displacement  //
 | |
| };
 | |
| 
 | |
| /**
 | |
|  * Rewrites leading NOP instructions to have fewer instructions.
 | |
|  *
 | |
|  * For example, the following code:
 | |
|  *
 | |
|  *     nop
 | |
|  *     nop
 | |
|  *     nop
 | |
|  *     nop
 | |
|  *     nop
 | |
|  *     nop
 | |
|  *     nop
 | |
|  *     nop
 | |
|  *     nop
 | |
|  *     nop
 | |
|  *     nop
 | |
|  *     nop
 | |
|  *     ret
 | |
|  *     nop
 | |
|  *     nop
 | |
|  *
 | |
|  * Would be morphed into the following:
 | |
|  *
 | |
|  *     nopw 0x00000000(%rax,%rax,1)
 | |
|  *     xchg %ax,%ax
 | |
|  *     ret
 | |
|  *     nop
 | |
|  *     nop
 | |
|  *
 | |
|  * @param p points to memory region that shall be modified
 | |
|  * @param e points to end of memory region, i.e. `p + #bytes`
 | |
|  * @return p advanced past last morphed byte
 | |
|  */
 | |
| static unsigned char *CoalesceNops(unsigned char *p, const unsigned char *e) {
 | |
|   long n;
 | |
|   for (; p + 1 < e; p += n) {
 | |
|     if (p[0] != 0x90)
 | |
|       break;
 | |
|     if (p[1] != 0x90)
 | |
|       break;
 | |
|     for (n = 2; p + n < e; ++n) {
 | |
|       if (p[n] != 0x90)
 | |
|         break;
 | |
|       if (n == ARRAYLEN(kNops) - 1)
 | |
|         break;
 | |
|     }
 | |
|     memcpy(p, kNops[n], n);
 | |
|   }
 | |
|   return p;
 | |
| }
 | |
| 
 | |
| static void CheckPrivilegedCrossReferences(void) {
 | |
|   unsigned long x;
 | |
|   const char *secname;
 | |
|   const Elf64_Shdr *shdr;
 | |
|   const Elf64_Rela *rela, *erela;
 | |
|   shdr = FindElfSectionByName(elf, esize, secstrs, ".rela.privileged");
 | |
|   if (!shdr || !(rela = GetElfSectionAddress(elf, esize, shdr)))
 | |
|     return;
 | |
|   erela = rela + shdr->sh_size / sizeof(*rela);
 | |
|   for (; rela < erela; ++rela) {
 | |
|     if (!ELF64_R_TYPE(rela->r_info))
 | |
|       continue;
 | |
|     if (!(x = ELF64_R_SYM(rela->r_info)))
 | |
|       continue;
 | |
|     if (x >= symcount)
 | |
|       continue;
 | |
|     if (syms[x].st_shndx == SHN_ABS)
 | |
|       continue;
 | |
|     if (!syms[x].st_shndx)
 | |
|       continue;
 | |
|     if ((shdr = GetElfSectionHeaderAddress(elf, esize, syms[x].st_shndx))) {
 | |
|       if (~shdr->sh_flags & SHF_EXECINSTR)
 | |
|         continue;  // data reference
 | |
|       if ((secname = GetElfString(elf, esize, secstrs, shdr->sh_name)) &&
 | |
|           strcmp(".privileged", secname)) {
 | |
|         tinyprint(2, epath,
 | |
|                   ": code in .privileged section "
 | |
|                   "references symbol '",
 | |
|                   GetElfString(elf, esize, symstrs, syms[x].st_name),
 | |
|                   "' in unprivileged code section '", secname, "'\n", NULL);
 | |
|         exit(1);
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| // Change AMD code to use %gs:0x30 instead of %fs:0
 | |
| // We assume -mno-tls-direct-seg-refs has been used
 | |
| static void ChangeTlsFsToGs(unsigned char *p, size_t n) {
 | |
|   unsigned char *e = p + n - 9;
 | |
|   while (p <= e) {
 | |
|     // we're checking for the following expression:
 | |
|     //   0144 == p[0] &&           // %fs
 | |
|     //   0110 == (p[1] & 0373) &&  // rex.w (and ignore rex.r)
 | |
|     //   (0213 == p[2] ||          // mov reg/mem → reg (word-sized)
 | |
|     //   0003 == p[2]) &&          // add reg/mem → reg (word-sized)
 | |
|     //   0004 == (p[3] & 0307) &&  // mod/rm (4,reg,0) means sib → reg
 | |
|     //   0045 == p[4] &&           // sib (5,4,0) → (rbp,rsp,0) → disp32
 | |
|     //   0000 == p[5] &&           // displacement (von Neumann endian)
 | |
|     //   0000 == p[6] &&           // displacement
 | |
|     //   0000 == p[7] &&           // displacement
 | |
|     //   0000 == p[8]              // displacement
 | |
|     uint64_t w = READ64LE(p) & READ64LE("\377\373\377\307\377\377\377\377");
 | |
|     if ((w == READ64LE("\144\110\213\004\045\000\000\000") ||
 | |
|          w == READ64LE("\144\110\003\004\045\000\000\000")) &&
 | |
|         !p[8]) {
 | |
|       p[0] = 0145;  // change %fs to %gs
 | |
|       p[5] = 0x30;  // change 0 to 0x30
 | |
|       p += 9;
 | |
|     } else {
 | |
|       ++p;
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| static void RewriteTlsCodeAmd64(void) {
 | |
|   int i;
 | |
|   uint8_t *p;
 | |
|   Elf64_Shdr *shdr;
 | |
|   for (i = 0; i < elf->e_shnum; ++i) {
 | |
|     if (!(shdr = GetElfSectionHeaderAddress(elf, esize, i)))
 | |
|       Die("elf header overflow #1");
 | |
|     if (shdr->sh_type == SHT_PROGBITS &&  //
 | |
|         (shdr->sh_flags & SHF_ALLOC) &&   //
 | |
|         (shdr->sh_flags & SHF_EXECINSTR)) {
 | |
|       if (!(p = GetElfSectionAddress(elf, esize, shdr)))
 | |
|         Die("elf header overflow #2");
 | |
|       ChangeTlsFsToGs(p, shdr->sh_size);
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| // Modify ARM64 code to use x28 for TLS rather than tpidr_el0.
 | |
| static void RewriteTlsCodeArm64(void) {
 | |
|   int i;
 | |
|   Elf64_Shdr *shdr;
 | |
|   uint32_t *p, *pe;
 | |
|   for (i = 0; i < elf->e_shnum; ++i) {
 | |
|     if (!(shdr = GetElfSectionHeaderAddress(elf, esize, i)))
 | |
|       Die("elf header overflow #1");
 | |
|     if (shdr->sh_type == SHT_PROGBITS &&  //
 | |
|         (shdr->sh_flags & SHF_ALLOC) &&   //
 | |
|         (shdr->sh_flags & SHF_EXECINSTR)) {
 | |
|       if (!(p = GetElfSectionAddress(elf, esize, shdr)))
 | |
|         Die("elf header overflow #2");
 | |
|       for (pe = p + shdr->sh_size / 4; p <= pe; ++p)
 | |
|         if ((*p & -32) == MRS_TPIDR_EL0)
 | |
|           *p = MOV_REG(*p & 31, COSMO_TLS_REG);
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| static void UseFreebsdOsAbi(void) {
 | |
|   elf->e_ident[EI_OSABI] = ELFOSABI_FREEBSD;
 | |
| }
 | |
| 
 | |
| static void WriteApeFlags(void) {
 | |
|   /* try to be forward-compatible */
 | |
|   elf->e_flags = (elf->e_flags & ~EF_APE_MODERN_MASK) | EF_APE_MODERN;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Improve GCC11 `-fpatchable-function-entry` codegen.
 | |
|  *
 | |
|  * When using flags like `-fpatchable-function-entry=9,7` GCC v11 will
 | |
|  * insert two `nop` instructions, rather than merging them into faster
 | |
|  * "fat" nops.
 | |
|  *
 | |
|  * In order for this to work, the function symbol must be declared as
 | |
|  * `STT_FUNC` and `st_size` must have the function's byte length.
 | |
|  */
 | |
| static void OptimizePatchableFunctionEntries(void) {
 | |
|   long i;
 | |
|   Elf64_Shdr *shdr;
 | |
|   unsigned char *p;
 | |
|   Elf64_Addr sym_rva;
 | |
|   if (elf->e_machine == EM_NEXGEN32E) {
 | |
|     for (i = 0; i < symcount; ++i) {
 | |
|       if (!syms[i].st_size)
 | |
|         continue;
 | |
|       if (ELF64_ST_TYPE(syms[i].st_info) != STT_FUNC)
 | |
|         continue;
 | |
|       if (!(shdr = GetElfSectionHeaderAddress(elf, esize, syms[i].st_shndx)))
 | |
|         Die("elf header overflow #3");
 | |
|       if (shdr->sh_type != SHT_PROGBITS)
 | |
|         continue;
 | |
|       if (!(p = GetElfSectionAddress(elf, esize, shdr)))
 | |
|         Die("elf section overflow");
 | |
|       if (ckd_sub(&sym_rva, syms[i].st_value, shdr->sh_addr))
 | |
|         Die("elf symbol beneath section");
 | |
|       if (sym_rva > esize - shdr->sh_offset ||               //
 | |
|           (p += sym_rva) >= (unsigned char *)elf + esize ||  //
 | |
|           syms[i].st_size >= esize - sym_rva) {
 | |
|         Die("elf symbol overflow");
 | |
|       }
 | |
|       CoalesceNops(p, p + syms[i].st_size);
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Converts PKZIP recs from PC-relative to RVA-relative.
 | |
|  */
 | |
| static void RelinkZipFiles(void) {
 | |
|   int rela, recs;
 | |
|   unsigned long cdsize, cdoffset;
 | |
|   unsigned char foot[kZipCdirHdrMinSize];
 | |
|   unsigned char *base, *xeof, *stop, *eocd, *cdir, *lfile, *cfile;
 | |
|   base = (unsigned char *)elf;
 | |
|   xeof = (unsigned char *)elf + esize;
 | |
|   eocd = xeof - kZipCdirHdrMinSize;
 | |
|   stop = base;
 | |
|   // scan backwards for zip eocd todo record
 | |
|   // that was created by libc/nexgen32e/zip.S
 | |
|   for (;;) {
 | |
|     if (eocd < stop)
 | |
|       return;
 | |
|     if (READ32LE(eocd) == kZipCdirHdrMagicTodo &&  //
 | |
|         ZIP_CDIR_SIZE(eocd) &&                     //
 | |
|         !ZIP_CDIR_OFFSET(eocd) &&                  //
 | |
|         !ZIP_CDIR_RECORDS(eocd) &&                 //
 | |
|         !ZIP_CDIR_RECORDSONDISK(eocd)) {
 | |
|       break;
 | |
|     }
 | |
|     eocd = memrchr(stop, 'P', eocd - base);
 | |
|   }
 | |
|   // apply fixups to zip central directory recs
 | |
|   recs = 0;
 | |
|   cdir = (stop = eocd) - (cdsize = ZIP_CDIR_SIZE(eocd));
 | |
|   for (cfile = cdir; cfile < stop; cfile += ZIP_CFILE_HDRSIZE(cfile)) {
 | |
|     if (++recs >= 65536)
 | |
|       Die("too many zip central directory records");
 | |
|     if (cfile < base ||                        //
 | |
|         cfile + kZipCfileHdrMinSize > xeof ||  //
 | |
|         cfile + ZIP_CFILE_HDRSIZE(cfile) > xeof)
 | |
|       Die("zip central directory entry overflows image");
 | |
|     if (READ32LE(cfile) != kZipCfileHdrMagic)
 | |
|       Die("bad __zip_cdir_size or zip central directory corrupted");
 | |
|     if ((rela = ZIP_CFILE_OFFSET(cfile)) < 0) {
 | |
|       lfile = cfile + kZipCfileOffsetOffset + rela;
 | |
|     } else {
 | |
|       lfile = base + rela;  // earlier fixup failed partway?
 | |
|     }
 | |
|     if (lfile < base ||                        //
 | |
|         lfile + kZipLfileHdrMinSize > xeof ||  //
 | |
|         lfile + ZIP_LFILE_SIZE(lfile) > xeof)
 | |
|       Die("zip local file overflows image");
 | |
|     if (READ32LE(lfile) != kZipLfileHdrMagic)
 | |
|       Die("zip central directory offset to local file corrupted");
 | |
|     if (rela < 0)
 | |
|       WRITE32LE(cfile + kZipCfileOffsetOffset, lfile - base);
 | |
|   }
 | |
|   // append new eocd record to program image
 | |
|   if (esize > INT_MAX - sizeof(foot) ||
 | |
|       (cdoffset = esize) > INT_MAX - sizeof(foot))
 | |
|     Die("the time has come to adopt zip64");
 | |
|   bzero(foot, sizeof(foot));
 | |
|   WRITE32LE(foot, kZipCdirHdrMagic);
 | |
|   WRITE32LE(foot + kZipCdirSizeOffset, cdsize);
 | |
|   WRITE16LE(foot + kZipCdirRecordsOffset, recs);
 | |
|   WRITE32LE(foot + kZipCdirOffsetOffset, cdoffset);
 | |
|   WRITE16LE(foot + kZipCdirRecordsOnDiskOffset, recs);
 | |
|   if (pwrite(fildes, cdir, cdsize, esize) != cdsize)
 | |
|     SysExit("cdir pwrite");
 | |
|   if (pwrite(fildes, foot, sizeof(foot), esize + cdsize) != sizeof(foot))
 | |
|     SysExit("eocd pwrite");
 | |
|   eocd = foot;
 | |
| }
 | |
| 
 | |
| // when __attribute__((__target_clones__(...))) is used, the compiler
 | |
| // will generate multiple implementations of a function for different
 | |
| // microarchitectures as well as a resolver function that tells which
 | |
| // function is appropriate to call. however the compiler doesn't make
 | |
| // code for the actual function. it also doesn't record where resolve
 | |
| // functions are located in the binary so we've reverse eng'd it here
 | |
| static void GenerateIfuncInit(void) {
 | |
|   char *name, *s;
 | |
|   long code_i = 0;
 | |
|   long relas_i = 0;
 | |
|   static char code[16384];
 | |
|   static Elf64_Rela relas[1024];
 | |
|   Elf64_Shdr *symtab_shdr = GetElfSymbolTable(elf, esize, SHT_SYMTAB, 0);
 | |
|   if (!symtab_shdr)
 | |
|     Die("symbol table section header not found");
 | |
|   Elf64_Word symtab_shdr_index =
 | |
|       ((char *)symtab_shdr - ((char *)elf + elf->e_shoff)) / elf->e_shentsize;
 | |
|   for (Elf64_Xword i = 0; i < symcount; ++i) {
 | |
|     if (syms[i].st_shndx == SHN_UNDEF)
 | |
|       continue;
 | |
|     if (syms[i].st_shndx >= SHN_LORESERVE)
 | |
|       continue;
 | |
|     if (ELF64_ST_TYPE(syms[i].st_info) != STT_GNU_IFUNC)
 | |
|       continue;
 | |
|     if (!(name = GetElfString(elf, esize, symstrs, syms[i].st_name)))
 | |
|       Die("could not get symbol name of ifunc");
 | |
|     static char resolver_name[65536];
 | |
|     strlcpy(resolver_name, name, sizeof(resolver_name));
 | |
|     if (strlcat(resolver_name, ".resolver", sizeof(resolver_name)) >=
 | |
|         sizeof(resolver_name))
 | |
|       Die("ifunc name too long");
 | |
|     Elf64_Xword function_sym_index = i;
 | |
|     Elf64_Xword resolver_sym_index = -1;
 | |
|     for (Elf64_Xword i = 0; i < symcount; ++i) {
 | |
|       if (syms[i].st_shndx == SHN_UNDEF)
 | |
|         continue;
 | |
|       if (syms[i].st_shndx >= SHN_LORESERVE)
 | |
|         continue;
 | |
|       if (ELF64_ST_TYPE(syms[i].st_info) != STT_FUNC)
 | |
|         continue;
 | |
|       if (!(s = GetElfString(elf, esize, symstrs, syms[i].st_name)))
 | |
|         continue;
 | |
|       if (strcmp(s, resolver_name))
 | |
|         continue;
 | |
|       resolver_sym_index = i;
 | |
|       break;
 | |
|     }
 | |
|     if (resolver_sym_index == -1)
 | |
|       // this can happen if a function with __target_clones() also has a
 | |
|       // __weak_reference() defined, in which case GCC shall only create
 | |
|       // one resolver function for the two of them so we can ignore this
 | |
|       // HOWEVER the GOT will still have an entry for each two functions
 | |
|       continue;
 | |
| 
 | |
|     // call the resolver (using cosmo's special .init abi)
 | |
|     static const char chunk1[] = {
 | |
|         0x57,                          // push %rdi
 | |
|         0x56,                          // push %rsi
 | |
|         0xe8, 0x00, 0x00, 0x00, 0x00,  // call f.resolver
 | |
|     };
 | |
|     if (code_i + sizeof(chunk1) > sizeof(code) || relas_i + 1 > ARRAYLEN(relas))
 | |
|       Die("too many ifuncs");
 | |
|     memcpy(code + code_i, chunk1, sizeof(chunk1));
 | |
|     relas[relas_i].r_info = ELF64_R_INFO(resolver_sym_index, R_X86_64_PLT32);
 | |
|     relas[relas_i].r_offset = code_i + 1 + 1 + 1;
 | |
|     relas[relas_i].r_addend = -4;
 | |
|     code_i += sizeof(chunk1);
 | |
|     relas_i += 1;
 | |
| 
 | |
|     // move the resolved function address into the GOT slot. it's very
 | |
|     // important that this happen, because the linker by default makes
 | |
|     // self-referencing PLT functions whose execution falls through oh
 | |
|     // no. we need to repeat this process for any aliases this defines
 | |
|     static const char chunk2[] = {
 | |
|         0x48, 0x89, 0x05, 0x00, 0x00, 0x00, 0x00,  // mov %rax,f@gotpcrel(%rip)
 | |
|     };
 | |
|     for (Elf64_Xword i = 0; i < symcount; ++i) {
 | |
|       if (i == function_sym_index ||
 | |
|           (ELF64_ST_TYPE(syms[i].st_info) == STT_GNU_IFUNC &&
 | |
|            syms[i].st_shndx == syms[function_sym_index].st_shndx &&
 | |
|            syms[i].st_value == syms[function_sym_index].st_value)) {
 | |
|         if (code_i + sizeof(chunk2) > sizeof(code) ||
 | |
|             relas_i + 1 > ARRAYLEN(relas))
 | |
|           Die("too many ifuncs");
 | |
|         memcpy(code + code_i, chunk2, sizeof(chunk2));
 | |
|         relas[relas_i].r_info = ELF64_R_INFO(i, R_X86_64_GOTPCREL);
 | |
|         relas[relas_i].r_offset = code_i + 3;
 | |
|         relas[relas_i].r_addend = -4;
 | |
|         code_i += sizeof(chunk2);
 | |
|         relas_i += 1;
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     static const char chunk3[] = {
 | |
|         0x5e,  // pop %rsi
 | |
|         0x5f,  // pop %rdi
 | |
|     };
 | |
|     if (code_i + sizeof(chunk3) > sizeof(code))
 | |
|       Die("too many ifuncs");
 | |
|     memcpy(code + code_i, chunk3, sizeof(chunk3));
 | |
|     code_i += sizeof(chunk3);
 | |
|   }
 | |
|   if (!code_i)
 | |
|     return;
 | |
| 
 | |
|   // prepare to mutate elf
 | |
|   // remap file so it has more space
 | |
|   if (elf->e_shnum + 2 > 65535)
 | |
|     Die("too many sections");
 | |
|   size_t reserve_size = esize + 32 * 1024 * 1024;
 | |
|   elf = Realloc(elf, reserve_size);
 | |
| 
 | |
|   // duplicate section name strings table to end of file
 | |
|   Elf64_Shdr *shdrstr_shdr = (Elf64_Shdr *)((char *)elf + elf->e_shoff +
 | |
|                                             elf->e_shstrndx * elf->e_shentsize);
 | |
|   memcpy((char *)elf + esize, (char *)elf + shdrstr_shdr->sh_offset,
 | |
|          shdrstr_shdr->sh_size);
 | |
|   shdrstr_shdr->sh_offset = esize;
 | |
|   esize += shdrstr_shdr->sh_size;
 | |
| 
 | |
|   // append strings for the two sections we're creating
 | |
|   const char *code_section_name = IFUNC_SECTION;
 | |
|   Elf64_Word code_section_name_offset = shdrstr_shdr->sh_size;
 | |
|   memcpy((char *)elf + esize, code_section_name, strlen(code_section_name) + 1);
 | |
|   shdrstr_shdr->sh_size += strlen(code_section_name) + 1;
 | |
|   esize += strlen(code_section_name) + 1;
 | |
|   const char *rela_section_name = ".rela" IFUNC_SECTION;
 | |
|   Elf64_Word rela_section_name_offset = shdrstr_shdr->sh_size;
 | |
|   memcpy((char *)elf + esize, rela_section_name, strlen(rela_section_name) + 1);
 | |
|   shdrstr_shdr->sh_size += strlen(rela_section_name) + 1;
 | |
|   esize += strlen(rela_section_name) + 1;
 | |
|   unassert(esize == shdrstr_shdr->sh_offset + shdrstr_shdr->sh_size);
 | |
|   ++esize;
 | |
| 
 | |
|   // duplicate section headers to end of file
 | |
|   esize = (esize + alignof(Elf64_Shdr) - 1) & -alignof(Elf64_Shdr);
 | |
|   memcpy((char *)elf + esize, (char *)elf + elf->e_shoff,
 | |
|          elf->e_shnum * elf->e_shentsize);
 | |
|   elf->e_shoff = esize;
 | |
|   esize += elf->e_shnum * elf->e_shentsize;
 | |
|   unassert(esize == elf->e_shoff + elf->e_shnum * elf->e_shentsize);
 | |
| 
 | |
|   // append code section header
 | |
|   Elf64_Shdr *code_shdr = (Elf64_Shdr *)((char *)elf + esize);
 | |
|   Elf64_Word code_shdr_index = elf->e_shnum++;
 | |
|   esize += elf->e_shentsize;
 | |
|   code_shdr->sh_name = code_section_name_offset;
 | |
|   code_shdr->sh_type = SHT_PROGBITS;
 | |
|   code_shdr->sh_flags = SHF_ALLOC | SHF_EXECINSTR;
 | |
|   code_shdr->sh_addr = 0;
 | |
|   code_shdr->sh_link = 0;
 | |
|   code_shdr->sh_info = 0;
 | |
|   code_shdr->sh_entsize = 1;
 | |
|   code_shdr->sh_addralign = 1;
 | |
|   code_shdr->sh_size = code_i;
 | |
| 
 | |
|   // append code's rela section header
 | |
|   Elf64_Shdr *rela_shdr = (Elf64_Shdr *)((char *)elf + esize);
 | |
|   esize += elf->e_shentsize;
 | |
|   rela_shdr->sh_name = rela_section_name_offset;
 | |
|   rela_shdr->sh_type = SHT_RELA;
 | |
|   rela_shdr->sh_flags = SHF_INFO_LINK;
 | |
|   rela_shdr->sh_addr = 0;
 | |
|   rela_shdr->sh_info = code_shdr_index;
 | |
|   rela_shdr->sh_link = symtab_shdr_index;
 | |
|   rela_shdr->sh_entsize = sizeof(Elf64_Rela);
 | |
|   rela_shdr->sh_addralign = alignof(Elf64_Rela);
 | |
|   rela_shdr->sh_size = relas_i * sizeof(Elf64_Rela);
 | |
|   elf->e_shnum++;
 | |
| 
 | |
|   // append relas
 | |
|   esize = (esize + 63) & -64;
 | |
|   rela_shdr->sh_offset = esize;
 | |
|   memcpy((char *)elf + esize, relas, relas_i * sizeof(Elf64_Rela));
 | |
|   esize += relas_i * sizeof(Elf64_Rela);
 | |
|   unassert(esize == rela_shdr->sh_offset + rela_shdr->sh_size);
 | |
| 
 | |
|   // append code
 | |
|   esize = (esize + 63) & -64;
 | |
|   code_shdr->sh_offset = esize;
 | |
|   memcpy((char *)elf + esize, code, code_i);
 | |
|   esize += code_i;
 | |
|   unassert(esize == code_shdr->sh_offset + code_shdr->sh_size);
 | |
| }
 | |
| 
 | |
| // when __attribute__((__target_clones__(...))) is used, static binaries
 | |
| // become poisoned with rela IFUNC relocations, which the linker refuses
 | |
| // to remove. even if we objcopy the ape executable as binary the linker
 | |
| // preserves its precious ifunc code and puts them before the executable
 | |
| // header. the good news is that the linker actually does link correctly
 | |
| // which means we can delete the broken rela sections in the elf binary.
 | |
| static void PurgeIfuncSections(void) {
 | |
|   Elf64_Shdr *shdrs = (Elf64_Shdr *)((char *)elf + elf->e_shoff);
 | |
|   for (Elf64_Word i = 0; i < elf->e_shnum; ++i) {
 | |
|     char *name;
 | |
|     if (shdrs[i].sh_type == SHT_RELA ||
 | |
|         ((name = GetElfSectionName(elf, esize, shdrs + i)) &&
 | |
|          !strcmp(name, ".init.202.ifunc"))) {
 | |
|       shdrs[i].sh_type = SHT_NULL;
 | |
|       shdrs[i].sh_flags &= ~SHF_ALLOC;
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| static void FixupObject(void) {
 | |
|   if ((fildes = open(epath, mode)) == -1)
 | |
|     SysExit("open");
 | |
|   if ((esize = lseek(fildes, 0, SEEK_END)) == -1)
 | |
|     SysExit("lseek");
 | |
|   if (esize) {
 | |
|     elf = Malloc(esize);
 | |
|     if (pread(fildes, elf, esize, 0) != esize)
 | |
|       SysExit("pread");
 | |
|     if (!IsElf64Binary(elf, esize))
 | |
|       Die("not an elf64 binary");
 | |
|     if (!(syms = GetElfSymbols(elf, esize, SHT_SYMTAB, &symcount)))
 | |
|       Die("missing elf symbol table");
 | |
|     if (!(secstrs = GetElfSectionNameStringTable(elf, esize)))
 | |
|       Die("missing elf section string table");
 | |
|     if (!(symstrs = GetElfStringTable(elf, esize, ".strtab")))
 | |
|       Die("missing elf symbol string table");
 | |
|     CheckPrivilegedCrossReferences();
 | |
|     if (mode == O_RDWR) {
 | |
|       if (elf->e_machine == EM_NEXGEN32E) {
 | |
|         RewriteTlsCodeAmd64();
 | |
|         OptimizePatchableFunctionEntries();
 | |
|         GenerateIfuncInit();
 | |
|       } else if (elf->e_machine == EM_AARCH64) {
 | |
|         RewriteTlsCodeArm64();
 | |
|         if (elf->e_type != ET_REL)
 | |
|           UseFreebsdOsAbi();
 | |
|       }
 | |
|       if (elf->e_type != ET_REL) {
 | |
|         WriteApeFlags();
 | |
|         PurgeIfuncSections();
 | |
|         RelinkZipFiles();
 | |
|       }
 | |
|       if (pwrite(fildes, elf, esize, 0) != esize)
 | |
|         SysExit("pwrite");
 | |
|     }
 | |
|   }
 | |
|   if (close(fildes))
 | |
|     SysExit("close");
 | |
| }
 | |
| 
 | |
| int main(int argc, char *argv[]) {
 | |
|   int i;
 | |
|   if (!IsOptimized())
 | |
|     ShowCrashReports();
 | |
|   GetOpts(argc, argv);
 | |
|   for (i = optind; i < argc; ++i) {
 | |
|     epath = argv[i];
 | |
|     FixupObject();
 | |
|   }
 | |
| }
 |