2022-08-06 10:51:50 +00:00
|
|
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
|
|
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
|
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
|
|
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
|
|
|
│ │
|
|
|
|
│ Permission to use, copy, modify, and/or distribute this software for │
|
|
|
|
│ any purpose with or without fee is hereby granted, provided that the │
|
|
|
|
│ above copyright notice and this permission notice appear in all copies. │
|
|
|
|
│ │
|
|
|
|
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
|
|
|
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
|
|
|
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
|
|
|
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
|
|
|
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
|
|
|
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
|
|
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
|
|
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
|
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
2023-06-08 11:37:05 +00:00
|
|
|
#include "libc/assert.h"
|
2022-08-06 10:51:50 +00:00
|
|
|
#include "libc/calls/calls.h"
|
|
|
|
#include "libc/calls/struct/stat.h"
|
|
|
|
#include "libc/dce.h"
|
2023-07-02 17:19:16 +00:00
|
|
|
#include "libc/elf/def.h"
|
2022-08-06 10:51:50 +00:00
|
|
|
#include "libc/elf/elf.h"
|
|
|
|
#include "libc/elf/scalar.h"
|
|
|
|
#include "libc/elf/struct/rela.h"
|
|
|
|
#include "libc/elf/struct/shdr.h"
|
|
|
|
#include "libc/elf/struct/sym.h"
|
2022-08-14 00:20:50 +00:00
|
|
|
#include "libc/errno.h"
|
2022-08-06 10:51:50 +00:00
|
|
|
#include "libc/fmt/itoa.h"
|
2023-06-09 08:23:18 +00:00
|
|
|
#include "libc/fmt/magnumstrs.internal.h"
|
2023-06-10 16:15:19 +00:00
|
|
|
#include "libc/limits.h"
|
2022-08-06 10:51:50 +00:00
|
|
|
#include "libc/log/log.h"
|
2023-05-19 02:05:08 +00:00
|
|
|
#include "libc/macros.h"
|
2024-01-08 18:07:35 +00:00
|
|
|
#include "libc/mem/gc.h"
|
2024-03-24 10:14:25 +00:00
|
|
|
#include "libc/mem/mem.h"
|
2022-08-06 10:51:50 +00:00
|
|
|
#include "libc/runtime/runtime.h"
|
2023-12-30 04:11:23 +00:00
|
|
|
#include "libc/serialize.h"
|
2024-02-01 11:39:46 +00:00
|
|
|
#include "libc/stdalign.h"
|
2023-11-11 22:04:26 +00:00
|
|
|
#include "libc/stdckdint.h"
|
2024-02-01 11:39:46 +00:00
|
|
|
#include "libc/stdio/stdio.h"
|
2022-08-06 10:51:50 +00:00
|
|
|
#include "libc/str/str.h"
|
|
|
|
#include "libc/sysv/consts/map.h"
|
|
|
|
#include "libc/sysv/consts/msync.h"
|
|
|
|
#include "libc/sysv/consts/o.h"
|
|
|
|
#include "libc/sysv/consts/prot.h"
|
2023-06-10 16:15:19 +00:00
|
|
|
#include "libc/zip.h"
|
2023-07-03 02:57:43 +00:00
|
|
|
#include "third_party/getopt/getopt.internal.h"
|
2022-08-06 10:51:50 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @fileoverview GCC Codegen Fixer-Upper.
|
|
|
|
*/
|
|
|
|
|
2024-02-01 11:39:46 +00:00
|
|
|
#define COSMO_TLS_REG 28
|
|
|
|
#define MRS_TPIDR_EL0 0xd53bd040u
|
|
|
|
#define IFUNC_SECTION ".init.202.ifunc"
|
|
|
|
|
2023-05-19 02:05:08 +00:00
|
|
|
#define MOV_REG(DST, SRC) (0xaa0003e0u | (SRC) << 16 | (DST))
|
|
|
|
|
2023-06-08 21:29:22 +00:00
|
|
|
static int mode;
|
2023-06-10 16:15:19 +00:00
|
|
|
static int fildes;
|
2023-06-08 21:29:22 +00:00
|
|
|
static char *symstrs;
|
|
|
|
static char *secstrs;
|
|
|
|
static ssize_t esize;
|
|
|
|
static Elf64_Sym *syms;
|
2023-09-02 03:49:13 +00:00
|
|
|
static Elf64_Ehdr *elf;
|
2023-06-08 21:29:22 +00:00
|
|
|
static const char *epath;
|
|
|
|
static Elf64_Xword symcount;
|
2023-06-08 11:37:05 +00:00
|
|
|
|
2024-05-07 07:37:41 +00:00
|
|
|
#include "libc/mem/tinymalloc.inc"
|
|
|
|
|
2023-06-10 16:15:19 +00:00
|
|
|
static wontreturn void Die(const char *reason) {
|
2023-07-03 09:47:05 +00:00
|
|
|
tinyprint(2, epath, ": ", reason, "\n", NULL);
|
2023-06-10 16:15:19 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2024-03-24 10:14:25 +00:00
|
|
|
static wontreturn void DieOom(void) {
|
|
|
|
Die("out of memory");
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *Malloc(size_t n) {
|
|
|
|
void *p;
|
|
|
|
if (!(p = malloc(n)))
|
|
|
|
DieOom();
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *Realloc(void *p, size_t n) {
|
|
|
|
if (!(p = realloc(p, n)))
|
|
|
|
DieOom();
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
2023-06-08 21:29:22 +00:00
|
|
|
static wontreturn void SysExit(const char *func) {
|
2023-06-08 11:37:05 +00:00
|
|
|
const char *errstr;
|
|
|
|
if (!(errstr = _strerdoc(errno)))
|
|
|
|
errstr = "EUNKNOWN";
|
2023-07-03 09:47:05 +00:00
|
|
|
tinyprint(2, epath, ": ", func, " failed with ", errstr, "\n", NULL);
|
2023-06-08 11:37:05 +00:00
|
|
|
exit(1);
|
2022-08-06 10:51:50 +00:00
|
|
|
}
|
|
|
|
|
2023-06-10 16:15:19 +00:00
|
|
|
static wontreturn void PrintUsage(int fd, int exitcode) {
|
2023-07-03 09:47:05 +00:00
|
|
|
tinyprint(fd, "\n\
|
2023-06-10 16:15:19 +00:00
|
|
|
NAME\n\
|
|
|
|
\n\
|
|
|
|
Cosmopolitan Object Fixer\n\
|
|
|
|
\n\
|
|
|
|
SYNOPSIS\n\
|
|
|
|
\n\
|
|
|
|
",
|
2023-07-03 09:47:05 +00:00
|
|
|
program_invocation_name, " [FLAGS] OBJECT...\n\
|
2023-06-10 16:15:19 +00:00
|
|
|
\n\
|
|
|
|
DESCRIPTION\n\
|
|
|
|
\n\
|
|
|
|
This program applies fixups to ELF object files and executables that\n\
|
|
|
|
at build time whenever they're created by the toolchain. It's needed\n\
|
|
|
|
so that zip assets work correctly, plus this'll make code go faster.\n\
|
|
|
|
This program is also able to spot some coding errors like privileged\n\
|
|
|
|
functions calling unprivileged ones.\n\
|
|
|
|
\n\
|
|
|
|
Multiple binary files may be specified, which are modified in-place.\n\
|
|
|
|
\n\
|
|
|
|
FLAGS\n\
|
|
|
|
\n\
|
|
|
|
-h show this help\n\
|
|
|
|
-c checks only mode\n\
|
|
|
|
\n\
|
|
|
|
",
|
2023-07-03 09:47:05 +00:00
|
|
|
NULL);
|
2023-06-10 16:15:19 +00:00
|
|
|
exit(exitcode);
|
|
|
|
}
|
|
|
|
|
2023-06-08 21:29:22 +00:00
|
|
|
static void GetOpts(int argc, char *argv[]) {
|
2022-08-06 10:51:50 +00:00
|
|
|
int opt;
|
2023-06-08 11:37:05 +00:00
|
|
|
mode = O_RDWR;
|
2023-06-10 16:15:19 +00:00
|
|
|
while ((opt = getopt(argc, argv, "ch")) != -1) {
|
2022-08-06 10:51:50 +00:00
|
|
|
switch (opt) {
|
2023-06-08 11:37:05 +00:00
|
|
|
case 'c':
|
|
|
|
mode = O_RDONLY;
|
|
|
|
break;
|
2022-08-06 10:51:50 +00:00
|
|
|
case 'h':
|
2023-06-10 16:15:19 +00:00
|
|
|
PrintUsage(1, 0);
|
2022-08-06 10:51:50 +00:00
|
|
|
default:
|
2023-06-10 16:15:19 +00:00
|
|
|
PrintUsage(2, 1);
|
2022-08-06 10:51:50 +00:00
|
|
|
}
|
|
|
|
}
|
2023-06-10 16:15:19 +00:00
|
|
|
if (optind == argc) {
|
2023-07-03 09:47:05 +00:00
|
|
|
tinyprint(2,
|
|
|
|
"error: no elf object files specified\n"
|
|
|
|
"run ",
|
|
|
|
program_invocation_name, " -h for usage\n", NULL);
|
2023-06-10 16:15:19 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
2022-08-06 10:51:50 +00:00
|
|
|
}
|
|
|
|
|
2023-11-11 22:04:26 +00:00
|
|
|
// Official Intel Multibyte No-Operation Instructions. See
|
|
|
|
// Intel's Six Thousand Page Manual, Volume 2, Table 4-12:
|
|
|
|
// On "Recommended Multi-Byte Sequence of NOP Instruction"
|
|
|
|
static const unsigned char kNops[10][10] = {
|
|
|
|
{}, //
|
|
|
|
{/***/ /***/ 0x90}, // nop
|
|
|
|
{0x66, /***/ 0x90}, // xchg %ax,%ax
|
|
|
|
{/***/ 0x0f, 0x1f, 0000}, // nopl (%rax)
|
|
|
|
{/***/ 0x0f, 0x1f, 0100, /***/ 0}, // nopl 0x00(%rax)
|
|
|
|
{/***/ 0x0f, 0x1f, 0104, 0000, 0}, // nopl 0x00(%rax,%rax,1)
|
|
|
|
{0x66, 0x0f, 0x1f, 0104, 0000, 0}, // nopw 0x00(%rax,%rax,1)
|
|
|
|
{/***/ 0x0f, 0x1f, 0200, 0000, 0, 0, 0}, // nopl 0x00000000(%rax)
|
|
|
|
{/***/ 0x0F, 0x1F, 0204, 0000, 0, 0, 0, 0}, // nopl 0x00000000(%rax,%rax,1)
|
|
|
|
{0x66, 0x0F, 0x1F, 0204, 0000, 0, 0, 0, 0}, // nopw 0x00000000(%rax,%rax,1)
|
|
|
|
// osz map op modrm sib displacement //
|
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Rewrites leading NOP instructions to have fewer instructions.
|
|
|
|
*
|
|
|
|
* For example, the following code:
|
|
|
|
*
|
|
|
|
* nop
|
|
|
|
* nop
|
|
|
|
* nop
|
|
|
|
* nop
|
|
|
|
* nop
|
|
|
|
* nop
|
|
|
|
* nop
|
|
|
|
* nop
|
|
|
|
* nop
|
|
|
|
* nop
|
|
|
|
* nop
|
|
|
|
* nop
|
|
|
|
* ret
|
|
|
|
* nop
|
|
|
|
* nop
|
|
|
|
*
|
|
|
|
* Would be morphed into the following:
|
|
|
|
*
|
|
|
|
* nopw 0x00000000(%rax,%rax,1)
|
|
|
|
* xchg %ax,%ax
|
|
|
|
* ret
|
|
|
|
* nop
|
|
|
|
* nop
|
|
|
|
*
|
|
|
|
* @param p points to memory region that shall be modified
|
|
|
|
* @param e points to end of memory region, i.e. `p + #bytes`
|
|
|
|
* @return p advanced past last morphed byte
|
|
|
|
*/
|
|
|
|
static unsigned char *CoalesceNops(unsigned char *p, const unsigned char *e) {
|
|
|
|
long n;
|
|
|
|
for (; p + 1 < e; p += n) {
|
|
|
|
if (p[0] != 0x90)
|
|
|
|
break;
|
|
|
|
if (p[1] != 0x90)
|
|
|
|
break;
|
|
|
|
for (n = 2; p + n < e; ++n) {
|
|
|
|
if (p[n] != 0x90)
|
|
|
|
break;
|
|
|
|
if (n == ARRAYLEN(kNops) - 1)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
memcpy(p, kNops[n], n);
|
|
|
|
}
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
2023-06-08 21:29:22 +00:00
|
|
|
static void CheckPrivilegedCrossReferences(void) {
|
2023-06-10 16:15:19 +00:00
|
|
|
unsigned long x;
|
2023-06-08 11:37:05 +00:00
|
|
|
const char *secname;
|
2023-09-02 03:49:13 +00:00
|
|
|
const Elf64_Shdr *shdr;
|
|
|
|
const Elf64_Rela *rela, *erela;
|
2023-07-11 12:48:39 +00:00
|
|
|
shdr = FindElfSectionByName(elf, esize, secstrs, ".rela.privileged");
|
|
|
|
if (!shdr || !(rela = GetElfSectionAddress(elf, esize, shdr)))
|
|
|
|
return;
|
2023-06-08 11:37:05 +00:00
|
|
|
erela = rela + shdr->sh_size / sizeof(*rela);
|
|
|
|
for (; rela < erela; ++rela) {
|
|
|
|
if (!ELF64_R_TYPE(rela->r_info))
|
|
|
|
continue;
|
|
|
|
if (!(x = ELF64_R_SYM(rela->r_info)))
|
|
|
|
continue;
|
2023-06-10 16:15:19 +00:00
|
|
|
if (x >= symcount)
|
|
|
|
continue;
|
2023-06-08 11:37:05 +00:00
|
|
|
if (syms[x].st_shndx == SHN_ABS)
|
|
|
|
continue;
|
|
|
|
if (!syms[x].st_shndx)
|
|
|
|
continue;
|
2023-06-10 16:15:19 +00:00
|
|
|
if ((shdr = GetElfSectionHeaderAddress(elf, esize, syms[x].st_shndx))) {
|
|
|
|
if (~shdr->sh_flags & SHF_EXECINSTR)
|
|
|
|
continue; // data reference
|
|
|
|
if ((secname = GetElfString(elf, esize, secstrs, shdr->sh_name)) &&
|
2024-08-16 18:05:37 +00:00
|
|
|
!startswith(secname, ".privileged")) {
|
2023-07-03 09:47:05 +00:00
|
|
|
tinyprint(2, epath,
|
|
|
|
": code in .privileged section "
|
|
|
|
"references symbol '",
|
|
|
|
GetElfString(elf, esize, symstrs, syms[x].st_name),
|
|
|
|
"' in unprivileged code section '", secname, "'\n", NULL);
|
2023-06-10 16:15:19 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
2023-06-08 11:37:05 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Release Cosmopolitan v3.3
This change upgrades to GCC 12.3 and GNU binutils 2.42. The GNU linker
appears to have changed things so that only a single de-duplicated str
table is present in the binary, and it gets placed wherever the linker
wants, regardless of what the linker script says. To cope with that we
need to stop using .ident to embed licenses. As such, this change does
significant work to revamp how third party licenses are defined in the
codebase, using `.section .notice,"aR",@progbits`.
This new GCC 12.3 toolchain has support for GNU indirect functions. It
lets us support __target_clones__ for the first time. This is used for
optimizing the performance of libc string functions such as strlen and
friends so far on x86, by ensuring AVX systems favor a second codepath
that uses VEX encoding. It shaves some latency off certain operations.
It's a useful feature to have for scientific computing for the reasons
explained by the test/libcxx/openmp_test.cc example which compiles for
fifteen different microarchitectures. Thanks to the upgrades, it's now
also possible to use newer instruction sets, such as AVX512FP16, VNNI.
Cosmo now uses the %gs register on x86 by default for TLS. Doing it is
helpful for any program that links `cosmo_dlopen()`. Such programs had
to recompile their binaries at startup to change the TLS instructions.
That's not great, since it means every page in the executable needs to
be faulted. The work of rewriting TLS-related x86 opcodes, is moved to
fixupobj.com instead. This is great news for MacOS x86 users, since we
previously needed to morph the binary every time for that platform but
now that's no longer necessary. The only platforms where we need fixup
of TLS x86 opcodes at runtime are now Windows, OpenBSD, and NetBSD. On
Windows we morph TLS to point deeper into the TIB, based on a TlsAlloc
assignment, and on OpenBSD/NetBSD we morph %gs back into %fs since the
kernels do not allow us to specify a value for the %gs register.
OpenBSD users are now required to use APE Loader to run Cosmo binaries
and assimilation is no longer possible. OpenBSD kernel needs to change
to allow programs to specify a value for the %gs register, or it needs
to stop marking executable pages loaded by the kernel as mimmutable().
This release fixes __constructor__, .ctor, .init_array, and lastly the
.preinit_array so they behave the exact same way as glibc.
We no longer use hex constants to define math.h symbols like M_PI.
2024-02-20 19:12:09 +00:00
|
|
|
// Change AMD code to use %gs:0x30 instead of %fs:0
|
|
|
|
// We assume -mno-tls-direct-seg-refs has been used
|
|
|
|
static void ChangeTlsFsToGs(unsigned char *p, size_t n) {
|
|
|
|
unsigned char *e = p + n - 9;
|
|
|
|
while (p <= e) {
|
|
|
|
// we're checking for the following expression:
|
|
|
|
// 0144 == p[0] && // %fs
|
|
|
|
// 0110 == (p[1] & 0373) && // rex.w (and ignore rex.r)
|
|
|
|
// (0213 == p[2] || // mov reg/mem → reg (word-sized)
|
|
|
|
// 0003 == p[2]) && // add reg/mem → reg (word-sized)
|
|
|
|
// 0004 == (p[3] & 0307) && // mod/rm (4,reg,0) means sib → reg
|
|
|
|
// 0045 == p[4] && // sib (5,4,0) → (rbp,rsp,0) → disp32
|
|
|
|
// 0000 == p[5] && // displacement (von Neumann endian)
|
|
|
|
// 0000 == p[6] && // displacement
|
|
|
|
// 0000 == p[7] && // displacement
|
|
|
|
// 0000 == p[8] // displacement
|
|
|
|
uint64_t w = READ64LE(p) & READ64LE("\377\373\377\307\377\377\377\377");
|
|
|
|
if ((w == READ64LE("\144\110\213\004\045\000\000\000") ||
|
|
|
|
w == READ64LE("\144\110\003\004\045\000\000\000")) &&
|
|
|
|
!p[8]) {
|
|
|
|
p[0] = 0145; // change %fs to %gs
|
|
|
|
p[5] = 0x30; // change 0 to 0x30
|
|
|
|
p += 9;
|
|
|
|
} else {
|
|
|
|
++p;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void RewriteTlsCodeAmd64(void) {
|
|
|
|
int i;
|
|
|
|
uint8_t *p;
|
|
|
|
Elf64_Shdr *shdr;
|
|
|
|
for (i = 0; i < elf->e_shnum; ++i) {
|
2024-05-05 23:14:43 +00:00
|
|
|
if (!(shdr = GetElfSectionHeaderAddress(elf, esize, i)))
|
Release Cosmopolitan v3.3
This change upgrades to GCC 12.3 and GNU binutils 2.42. The GNU linker
appears to have changed things so that only a single de-duplicated str
table is present in the binary, and it gets placed wherever the linker
wants, regardless of what the linker script says. To cope with that we
need to stop using .ident to embed licenses. As such, this change does
significant work to revamp how third party licenses are defined in the
codebase, using `.section .notice,"aR",@progbits`.
This new GCC 12.3 toolchain has support for GNU indirect functions. It
lets us support __target_clones__ for the first time. This is used for
optimizing the performance of libc string functions such as strlen and
friends so far on x86, by ensuring AVX systems favor a second codepath
that uses VEX encoding. It shaves some latency off certain operations.
It's a useful feature to have for scientific computing for the reasons
explained by the test/libcxx/openmp_test.cc example which compiles for
fifteen different microarchitectures. Thanks to the upgrades, it's now
also possible to use newer instruction sets, such as AVX512FP16, VNNI.
Cosmo now uses the %gs register on x86 by default for TLS. Doing it is
helpful for any program that links `cosmo_dlopen()`. Such programs had
to recompile their binaries at startup to change the TLS instructions.
That's not great, since it means every page in the executable needs to
be faulted. The work of rewriting TLS-related x86 opcodes, is moved to
fixupobj.com instead. This is great news for MacOS x86 users, since we
previously needed to morph the binary every time for that platform but
now that's no longer necessary. The only platforms where we need fixup
of TLS x86 opcodes at runtime are now Windows, OpenBSD, and NetBSD. On
Windows we morph TLS to point deeper into the TIB, based on a TlsAlloc
assignment, and on OpenBSD/NetBSD we morph %gs back into %fs since the
kernels do not allow us to specify a value for the %gs register.
OpenBSD users are now required to use APE Loader to run Cosmo binaries
and assimilation is no longer possible. OpenBSD kernel needs to change
to allow programs to specify a value for the %gs register, or it needs
to stop marking executable pages loaded by the kernel as mimmutable().
This release fixes __constructor__, .ctor, .init_array, and lastly the
.preinit_array so they behave the exact same way as glibc.
We no longer use hex constants to define math.h symbols like M_PI.
2024-02-20 19:12:09 +00:00
|
|
|
Die("elf header overflow #1");
|
|
|
|
if (shdr->sh_type == SHT_PROGBITS && //
|
|
|
|
(shdr->sh_flags & SHF_ALLOC) && //
|
|
|
|
(shdr->sh_flags & SHF_EXECINSTR)) {
|
2024-05-05 23:14:43 +00:00
|
|
|
if (!(p = GetElfSectionAddress(elf, esize, shdr)))
|
Release Cosmopolitan v3.3
This change upgrades to GCC 12.3 and GNU binutils 2.42. The GNU linker
appears to have changed things so that only a single de-duplicated str
table is present in the binary, and it gets placed wherever the linker
wants, regardless of what the linker script says. To cope with that we
need to stop using .ident to embed licenses. As such, this change does
significant work to revamp how third party licenses are defined in the
codebase, using `.section .notice,"aR",@progbits`.
This new GCC 12.3 toolchain has support for GNU indirect functions. It
lets us support __target_clones__ for the first time. This is used for
optimizing the performance of libc string functions such as strlen and
friends so far on x86, by ensuring AVX systems favor a second codepath
that uses VEX encoding. It shaves some latency off certain operations.
It's a useful feature to have for scientific computing for the reasons
explained by the test/libcxx/openmp_test.cc example which compiles for
fifteen different microarchitectures. Thanks to the upgrades, it's now
also possible to use newer instruction sets, such as AVX512FP16, VNNI.
Cosmo now uses the %gs register on x86 by default for TLS. Doing it is
helpful for any program that links `cosmo_dlopen()`. Such programs had
to recompile their binaries at startup to change the TLS instructions.
That's not great, since it means every page in the executable needs to
be faulted. The work of rewriting TLS-related x86 opcodes, is moved to
fixupobj.com instead. This is great news for MacOS x86 users, since we
previously needed to morph the binary every time for that platform but
now that's no longer necessary. The only platforms where we need fixup
of TLS x86 opcodes at runtime are now Windows, OpenBSD, and NetBSD. On
Windows we morph TLS to point deeper into the TIB, based on a TlsAlloc
assignment, and on OpenBSD/NetBSD we morph %gs back into %fs since the
kernels do not allow us to specify a value for the %gs register.
OpenBSD users are now required to use APE Loader to run Cosmo binaries
and assimilation is no longer possible. OpenBSD kernel needs to change
to allow programs to specify a value for the %gs register, or it needs
to stop marking executable pages loaded by the kernel as mimmutable().
This release fixes __constructor__, .ctor, .init_array, and lastly the
.preinit_array so they behave the exact same way as glibc.
We no longer use hex constants to define math.h symbols like M_PI.
2024-02-20 19:12:09 +00:00
|
|
|
Die("elf header overflow #2");
|
|
|
|
ChangeTlsFsToGs(p, shdr->sh_size);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-05-19 02:05:08 +00:00
|
|
|
// Modify ARM64 code to use x28 for TLS rather than tpidr_el0.
|
Release Cosmopolitan v3.3
This change upgrades to GCC 12.3 and GNU binutils 2.42. The GNU linker
appears to have changed things so that only a single de-duplicated str
table is present in the binary, and it gets placed wherever the linker
wants, regardless of what the linker script says. To cope with that we
need to stop using .ident to embed licenses. As such, this change does
significant work to revamp how third party licenses are defined in the
codebase, using `.section .notice,"aR",@progbits`.
This new GCC 12.3 toolchain has support for GNU indirect functions. It
lets us support __target_clones__ for the first time. This is used for
optimizing the performance of libc string functions such as strlen and
friends so far on x86, by ensuring AVX systems favor a second codepath
that uses VEX encoding. It shaves some latency off certain operations.
It's a useful feature to have for scientific computing for the reasons
explained by the test/libcxx/openmp_test.cc example which compiles for
fifteen different microarchitectures. Thanks to the upgrades, it's now
also possible to use newer instruction sets, such as AVX512FP16, VNNI.
Cosmo now uses the %gs register on x86 by default for TLS. Doing it is
helpful for any program that links `cosmo_dlopen()`. Such programs had
to recompile their binaries at startup to change the TLS instructions.
That's not great, since it means every page in the executable needs to
be faulted. The work of rewriting TLS-related x86 opcodes, is moved to
fixupobj.com instead. This is great news for MacOS x86 users, since we
previously needed to morph the binary every time for that platform but
now that's no longer necessary. The only platforms where we need fixup
of TLS x86 opcodes at runtime are now Windows, OpenBSD, and NetBSD. On
Windows we morph TLS to point deeper into the TIB, based on a TlsAlloc
assignment, and on OpenBSD/NetBSD we morph %gs back into %fs since the
kernels do not allow us to specify a value for the %gs register.
OpenBSD users are now required to use APE Loader to run Cosmo binaries
and assimilation is no longer possible. OpenBSD kernel needs to change
to allow programs to specify a value for the %gs register, or it needs
to stop marking executable pages loaded by the kernel as mimmutable().
This release fixes __constructor__, .ctor, .init_array, and lastly the
.preinit_array so they behave the exact same way as glibc.
We no longer use hex constants to define math.h symbols like M_PI.
2024-02-20 19:12:09 +00:00
|
|
|
static void RewriteTlsCodeArm64(void) {
|
2023-09-02 03:49:13 +00:00
|
|
|
int i;
|
2023-05-19 02:05:08 +00:00
|
|
|
Elf64_Shdr *shdr;
|
|
|
|
uint32_t *p, *pe;
|
|
|
|
for (i = 0; i < elf->e_shnum; ++i) {
|
2024-05-05 23:14:43 +00:00
|
|
|
if (!(shdr = GetElfSectionHeaderAddress(elf, esize, i)))
|
2023-07-28 13:17:34 +00:00
|
|
|
Die("elf header overflow #1");
|
2023-06-10 16:15:19 +00:00
|
|
|
if (shdr->sh_type == SHT_PROGBITS && //
|
|
|
|
(shdr->sh_flags & SHF_ALLOC) && //
|
|
|
|
(shdr->sh_flags & SHF_EXECINSTR)) {
|
2024-05-05 23:14:43 +00:00
|
|
|
if (!(p = GetElfSectionAddress(elf, esize, shdr)))
|
2023-07-28 13:17:34 +00:00
|
|
|
Die("elf header overflow #2");
|
2024-05-05 23:14:43 +00:00
|
|
|
for (pe = p + shdr->sh_size / 4; p <= pe; ++p)
|
|
|
|
if ((*p & -32) == MRS_TPIDR_EL0)
|
2023-05-19 02:05:08 +00:00
|
|
|
*p = MOV_REG(*p & 31, COSMO_TLS_REG);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-12-30 04:11:23 +00:00
|
|
|
static void UseFreebsdOsAbi(void) {
|
|
|
|
elf->e_ident[EI_OSABI] = ELFOSABI_FREEBSD;
|
|
|
|
}
|
|
|
|
|
2024-05-08 00:42:18 +00:00
|
|
|
static void WriteApeFlags(void) {
|
|
|
|
/* try to be forward-compatible */
|
|
|
|
elf->e_flags = (elf->e_flags & ~EF_APE_MODERN_MASK) | EF_APE_MODERN;
|
|
|
|
}
|
|
|
|
|
2023-06-06 06:35:31 +00:00
|
|
|
/**
|
|
|
|
* Improve GCC11 `-fpatchable-function-entry` codegen.
|
|
|
|
*
|
|
|
|
* When using flags like `-fpatchable-function-entry=9,7` GCC v11 will
|
|
|
|
* insert two `nop` instructions, rather than merging them into faster
|
|
|
|
* "fat" nops.
|
|
|
|
*
|
|
|
|
* In order for this to work, the function symbol must be declared as
|
|
|
|
* `STT_FUNC` and `st_size` must have the function's byte length.
|
|
|
|
*/
|
2023-06-08 21:29:22 +00:00
|
|
|
static void OptimizePatchableFunctionEntries(void) {
|
2023-11-11 22:04:26 +00:00
|
|
|
long i;
|
2023-06-06 06:35:31 +00:00
|
|
|
Elf64_Shdr *shdr;
|
2023-11-11 22:04:26 +00:00
|
|
|
unsigned char *p;
|
|
|
|
Elf64_Addr sym_rva;
|
|
|
|
if (elf->e_machine == EM_NEXGEN32E) {
|
|
|
|
for (i = 0; i < symcount; ++i) {
|
|
|
|
if (!syms[i].st_size)
|
|
|
|
continue;
|
|
|
|
if (ELF64_ST_TYPE(syms[i].st_info) != STT_FUNC)
|
|
|
|
continue;
|
2024-05-05 23:14:43 +00:00
|
|
|
if (!(shdr = GetElfSectionHeaderAddress(elf, esize, syms[i].st_shndx)))
|
2023-11-11 22:04:26 +00:00
|
|
|
Die("elf header overflow #3");
|
|
|
|
if (shdr->sh_type != SHT_PROGBITS)
|
|
|
|
continue;
|
2024-05-05 23:14:43 +00:00
|
|
|
if (!(p = GetElfSectionAddress(elf, esize, shdr)))
|
2023-11-11 22:04:26 +00:00
|
|
|
Die("elf section overflow");
|
2024-05-05 23:14:43 +00:00
|
|
|
if (ckd_sub(&sym_rva, syms[i].st_value, shdr->sh_addr))
|
2023-11-11 22:04:26 +00:00
|
|
|
Die("elf symbol beneath section");
|
|
|
|
if (sym_rva > esize - shdr->sh_offset || //
|
|
|
|
(p += sym_rva) >= (unsigned char *)elf + esize || //
|
|
|
|
syms[i].st_size >= esize - sym_rva) {
|
|
|
|
Die("elf symbol overflow");
|
2023-06-06 06:35:31 +00:00
|
|
|
}
|
2023-11-11 22:04:26 +00:00
|
|
|
CoalesceNops(p, p + syms[i].st_size);
|
2023-06-06 06:35:31 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-06-10 16:15:19 +00:00
|
|
|
/**
|
|
|
|
* Converts PKZIP recs from PC-relative to RVA-relative.
|
|
|
|
*/
|
|
|
|
static void RelinkZipFiles(void) {
|
|
|
|
int rela, recs;
|
|
|
|
unsigned long cdsize, cdoffset;
|
|
|
|
unsigned char foot[kZipCdirHdrMinSize];
|
|
|
|
unsigned char *base, *xeof, *stop, *eocd, *cdir, *lfile, *cfile;
|
|
|
|
base = (unsigned char *)elf;
|
|
|
|
xeof = (unsigned char *)elf + esize;
|
|
|
|
eocd = xeof - kZipCdirHdrMinSize;
|
|
|
|
stop = base;
|
|
|
|
// scan backwards for zip eocd todo record
|
|
|
|
// that was created by libc/nexgen32e/zip.S
|
|
|
|
for (;;) {
|
|
|
|
if (eocd < stop)
|
|
|
|
return;
|
|
|
|
if (READ32LE(eocd) == kZipCdirHdrMagicTodo && //
|
|
|
|
ZIP_CDIR_SIZE(eocd) && //
|
|
|
|
!ZIP_CDIR_OFFSET(eocd) && //
|
|
|
|
!ZIP_CDIR_RECORDS(eocd) && //
|
|
|
|
!ZIP_CDIR_RECORDSONDISK(eocd)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
eocd = memrchr(stop, 'P', eocd - base);
|
|
|
|
}
|
|
|
|
// apply fixups to zip central directory recs
|
|
|
|
recs = 0;
|
|
|
|
cdir = (stop = eocd) - (cdsize = ZIP_CDIR_SIZE(eocd));
|
|
|
|
for (cfile = cdir; cfile < stop; cfile += ZIP_CFILE_HDRSIZE(cfile)) {
|
2024-05-05 23:14:43 +00:00
|
|
|
if (++recs >= 65536)
|
2023-06-10 16:15:19 +00:00
|
|
|
Die("too many zip central directory records");
|
|
|
|
if (cfile < base || //
|
|
|
|
cfile + kZipCfileHdrMinSize > xeof || //
|
2024-05-05 23:14:43 +00:00
|
|
|
cfile + ZIP_CFILE_HDRSIZE(cfile) > xeof)
|
2023-06-10 16:15:19 +00:00
|
|
|
Die("zip central directory entry overflows image");
|
2024-05-05 23:14:43 +00:00
|
|
|
if (READ32LE(cfile) != kZipCfileHdrMagic)
|
2023-06-10 16:15:19 +00:00
|
|
|
Die("bad __zip_cdir_size or zip central directory corrupted");
|
|
|
|
if ((rela = ZIP_CFILE_OFFSET(cfile)) < 0) {
|
|
|
|
lfile = cfile + kZipCfileOffsetOffset + rela;
|
|
|
|
} else {
|
|
|
|
lfile = base + rela; // earlier fixup failed partway?
|
|
|
|
}
|
|
|
|
if (lfile < base || //
|
|
|
|
lfile + kZipLfileHdrMinSize > xeof || //
|
2024-05-05 23:14:43 +00:00
|
|
|
lfile + ZIP_LFILE_SIZE(lfile) > xeof)
|
2023-06-10 16:15:19 +00:00
|
|
|
Die("zip local file overflows image");
|
2024-05-05 23:14:43 +00:00
|
|
|
if (READ32LE(lfile) != kZipLfileHdrMagic)
|
2023-06-10 16:15:19 +00:00
|
|
|
Die("zip central directory offset to local file corrupted");
|
2024-05-05 23:14:43 +00:00
|
|
|
if (rela < 0)
|
2023-06-10 16:15:19 +00:00
|
|
|
WRITE32LE(cfile + kZipCfileOffsetOffset, lfile - base);
|
|
|
|
}
|
|
|
|
// append new eocd record to program image
|
|
|
|
if (esize > INT_MAX - sizeof(foot) ||
|
2024-05-05 23:14:43 +00:00
|
|
|
(cdoffset = esize) > INT_MAX - sizeof(foot))
|
2023-06-10 16:15:19 +00:00
|
|
|
Die("the time has come to adopt zip64");
|
|
|
|
bzero(foot, sizeof(foot));
|
|
|
|
WRITE32LE(foot, kZipCdirHdrMagic);
|
|
|
|
WRITE32LE(foot + kZipCdirSizeOffset, cdsize);
|
|
|
|
WRITE16LE(foot + kZipCdirRecordsOffset, recs);
|
|
|
|
WRITE32LE(foot + kZipCdirOffsetOffset, cdoffset);
|
|
|
|
WRITE16LE(foot + kZipCdirRecordsOnDiskOffset, recs);
|
2024-05-05 23:14:43 +00:00
|
|
|
if (pwrite(fildes, cdir, cdsize, esize) != cdsize)
|
2023-06-10 16:15:19 +00:00
|
|
|
SysExit("cdir pwrite");
|
2024-05-05 23:14:43 +00:00
|
|
|
if (pwrite(fildes, foot, sizeof(foot), esize + cdsize) != sizeof(foot))
|
2023-06-10 16:15:19 +00:00
|
|
|
SysExit("eocd pwrite");
|
|
|
|
eocd = foot;
|
2022-08-06 10:51:50 +00:00
|
|
|
}
|
|
|
|
|
2024-02-01 11:39:46 +00:00
|
|
|
// when __attribute__((__target_clones__(...))) is used, the compiler
|
|
|
|
// will generate multiple implementations of a function for different
|
|
|
|
// microarchitectures as well as a resolver function that tells which
|
|
|
|
// function is appropriate to call. however the compiler doesn't make
|
|
|
|
// code for the actual function. it also doesn't record where resolve
|
|
|
|
// functions are located in the binary so we've reverse eng'd it here
|
|
|
|
static void GenerateIfuncInit(void) {
|
|
|
|
char *name, *s;
|
|
|
|
long code_i = 0;
|
|
|
|
long relas_i = 0;
|
|
|
|
static char code[16384];
|
|
|
|
static Elf64_Rela relas[1024];
|
|
|
|
Elf64_Shdr *symtab_shdr = GetElfSymbolTable(elf, esize, SHT_SYMTAB, 0);
|
|
|
|
if (!symtab_shdr)
|
|
|
|
Die("symbol table section header not found");
|
|
|
|
Elf64_Word symtab_shdr_index =
|
|
|
|
((char *)symtab_shdr - ((char *)elf + elf->e_shoff)) / elf->e_shentsize;
|
|
|
|
for (Elf64_Xword i = 0; i < symcount; ++i) {
|
|
|
|
if (syms[i].st_shndx == SHN_UNDEF)
|
|
|
|
continue;
|
|
|
|
if (syms[i].st_shndx >= SHN_LORESERVE)
|
|
|
|
continue;
|
|
|
|
if (ELF64_ST_TYPE(syms[i].st_info) != STT_GNU_IFUNC)
|
|
|
|
continue;
|
|
|
|
if (!(name = GetElfString(elf, esize, symstrs, syms[i].st_name)))
|
|
|
|
Die("could not get symbol name of ifunc");
|
|
|
|
static char resolver_name[65536];
|
|
|
|
strlcpy(resolver_name, name, sizeof(resolver_name));
|
|
|
|
if (strlcat(resolver_name, ".resolver", sizeof(resolver_name)) >=
|
|
|
|
sizeof(resolver_name))
|
|
|
|
Die("ifunc name too long");
|
|
|
|
Elf64_Xword function_sym_index = i;
|
|
|
|
Elf64_Xword resolver_sym_index = -1;
|
|
|
|
for (Elf64_Xword i = 0; i < symcount; ++i) {
|
|
|
|
if (syms[i].st_shndx == SHN_UNDEF)
|
|
|
|
continue;
|
|
|
|
if (syms[i].st_shndx >= SHN_LORESERVE)
|
|
|
|
continue;
|
|
|
|
if (ELF64_ST_TYPE(syms[i].st_info) != STT_FUNC)
|
|
|
|
continue;
|
|
|
|
if (!(s = GetElfString(elf, esize, symstrs, syms[i].st_name)))
|
|
|
|
continue;
|
|
|
|
if (strcmp(s, resolver_name))
|
|
|
|
continue;
|
|
|
|
resolver_sym_index = i;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (resolver_sym_index == -1)
|
|
|
|
// this can happen if a function with __target_clones() also has a
|
|
|
|
// __weak_reference() defined, in which case GCC shall only create
|
|
|
|
// one resolver function for the two of them so we can ignore this
|
|
|
|
// HOWEVER the GOT will still have an entry for each two functions
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// call the resolver (using cosmo's special .init abi)
|
|
|
|
static const char chunk1[] = {
|
|
|
|
0x57, // push %rdi
|
|
|
|
0x56, // push %rsi
|
|
|
|
0xe8, 0x00, 0x00, 0x00, 0x00, // call f.resolver
|
|
|
|
};
|
|
|
|
if (code_i + sizeof(chunk1) > sizeof(code) || relas_i + 1 > ARRAYLEN(relas))
|
|
|
|
Die("too many ifuncs");
|
|
|
|
memcpy(code + code_i, chunk1, sizeof(chunk1));
|
|
|
|
relas[relas_i].r_info = ELF64_R_INFO(resolver_sym_index, R_X86_64_PLT32);
|
|
|
|
relas[relas_i].r_offset = code_i + 1 + 1 + 1;
|
|
|
|
relas[relas_i].r_addend = -4;
|
|
|
|
code_i += sizeof(chunk1);
|
|
|
|
relas_i += 1;
|
|
|
|
|
|
|
|
// move the resolved function address into the GOT slot. it's very
|
|
|
|
// important that this happen, because the linker by default makes
|
|
|
|
// self-referencing PLT functions whose execution falls through oh
|
|
|
|
// no. we need to repeat this process for any aliases this defines
|
|
|
|
static const char chunk2[] = {
|
|
|
|
0x48, 0x89, 0x05, 0x00, 0x00, 0x00, 0x00, // mov %rax,f@gotpcrel(%rip)
|
|
|
|
};
|
|
|
|
for (Elf64_Xword i = 0; i < symcount; ++i) {
|
|
|
|
if (i == function_sym_index ||
|
|
|
|
(ELF64_ST_TYPE(syms[i].st_info) == STT_GNU_IFUNC &&
|
|
|
|
syms[i].st_shndx == syms[function_sym_index].st_shndx &&
|
|
|
|
syms[i].st_value == syms[function_sym_index].st_value)) {
|
|
|
|
if (code_i + sizeof(chunk2) > sizeof(code) ||
|
|
|
|
relas_i + 1 > ARRAYLEN(relas))
|
|
|
|
Die("too many ifuncs");
|
|
|
|
memcpy(code + code_i, chunk2, sizeof(chunk2));
|
|
|
|
relas[relas_i].r_info = ELF64_R_INFO(i, R_X86_64_GOTPCREL);
|
|
|
|
relas[relas_i].r_offset = code_i + 3;
|
|
|
|
relas[relas_i].r_addend = -4;
|
|
|
|
code_i += sizeof(chunk2);
|
|
|
|
relas_i += 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char chunk3[] = {
|
|
|
|
0x5e, // pop %rsi
|
|
|
|
0x5f, // pop %rdi
|
|
|
|
};
|
|
|
|
if (code_i + sizeof(chunk3) > sizeof(code))
|
|
|
|
Die("too many ifuncs");
|
|
|
|
memcpy(code + code_i, chunk3, sizeof(chunk3));
|
|
|
|
code_i += sizeof(chunk3);
|
|
|
|
}
|
|
|
|
if (!code_i)
|
|
|
|
return;
|
|
|
|
|
|
|
|
// prepare to mutate elf
|
|
|
|
// remap file so it has more space
|
|
|
|
if (elf->e_shnum + 2 > 65535)
|
|
|
|
Die("too many sections");
|
|
|
|
size_t reserve_size = esize + 32 * 1024 * 1024;
|
2024-03-24 10:14:25 +00:00
|
|
|
elf = Realloc(elf, reserve_size);
|
2024-02-01 11:39:46 +00:00
|
|
|
|
|
|
|
// duplicate section name strings table to end of file
|
|
|
|
Elf64_Shdr *shdrstr_shdr = (Elf64_Shdr *)((char *)elf + elf->e_shoff +
|
|
|
|
elf->e_shstrndx * elf->e_shentsize);
|
|
|
|
memcpy((char *)elf + esize, (char *)elf + shdrstr_shdr->sh_offset,
|
|
|
|
shdrstr_shdr->sh_size);
|
|
|
|
shdrstr_shdr->sh_offset = esize;
|
|
|
|
esize += shdrstr_shdr->sh_size;
|
|
|
|
|
|
|
|
// append strings for the two sections we're creating
|
|
|
|
const char *code_section_name = IFUNC_SECTION;
|
|
|
|
Elf64_Word code_section_name_offset = shdrstr_shdr->sh_size;
|
|
|
|
memcpy((char *)elf + esize, code_section_name, strlen(code_section_name) + 1);
|
|
|
|
shdrstr_shdr->sh_size += strlen(code_section_name) + 1;
|
|
|
|
esize += strlen(code_section_name) + 1;
|
|
|
|
const char *rela_section_name = ".rela" IFUNC_SECTION;
|
|
|
|
Elf64_Word rela_section_name_offset = shdrstr_shdr->sh_size;
|
|
|
|
memcpy((char *)elf + esize, rela_section_name, strlen(rela_section_name) + 1);
|
|
|
|
shdrstr_shdr->sh_size += strlen(rela_section_name) + 1;
|
|
|
|
esize += strlen(rela_section_name) + 1;
|
|
|
|
unassert(esize == shdrstr_shdr->sh_offset + shdrstr_shdr->sh_size);
|
|
|
|
++esize;
|
|
|
|
|
|
|
|
// duplicate section headers to end of file
|
|
|
|
esize = (esize + alignof(Elf64_Shdr) - 1) & -alignof(Elf64_Shdr);
|
|
|
|
memcpy((char *)elf + esize, (char *)elf + elf->e_shoff,
|
|
|
|
elf->e_shnum * elf->e_shentsize);
|
|
|
|
elf->e_shoff = esize;
|
|
|
|
esize += elf->e_shnum * elf->e_shentsize;
|
|
|
|
unassert(esize == elf->e_shoff + elf->e_shnum * elf->e_shentsize);
|
|
|
|
|
|
|
|
// append code section header
|
|
|
|
Elf64_Shdr *code_shdr = (Elf64_Shdr *)((char *)elf + esize);
|
|
|
|
Elf64_Word code_shdr_index = elf->e_shnum++;
|
|
|
|
esize += elf->e_shentsize;
|
|
|
|
code_shdr->sh_name = code_section_name_offset;
|
|
|
|
code_shdr->sh_type = SHT_PROGBITS;
|
|
|
|
code_shdr->sh_flags = SHF_ALLOC | SHF_EXECINSTR;
|
|
|
|
code_shdr->sh_addr = 0;
|
|
|
|
code_shdr->sh_link = 0;
|
|
|
|
code_shdr->sh_info = 0;
|
|
|
|
code_shdr->sh_entsize = 1;
|
|
|
|
code_shdr->sh_addralign = 1;
|
|
|
|
code_shdr->sh_size = code_i;
|
|
|
|
|
|
|
|
// append code's rela section header
|
|
|
|
Elf64_Shdr *rela_shdr = (Elf64_Shdr *)((char *)elf + esize);
|
|
|
|
esize += elf->e_shentsize;
|
|
|
|
rela_shdr->sh_name = rela_section_name_offset;
|
|
|
|
rela_shdr->sh_type = SHT_RELA;
|
|
|
|
rela_shdr->sh_flags = SHF_INFO_LINK;
|
|
|
|
rela_shdr->sh_addr = 0;
|
|
|
|
rela_shdr->sh_info = code_shdr_index;
|
|
|
|
rela_shdr->sh_link = symtab_shdr_index;
|
|
|
|
rela_shdr->sh_entsize = sizeof(Elf64_Rela);
|
|
|
|
rela_shdr->sh_addralign = alignof(Elf64_Rela);
|
|
|
|
rela_shdr->sh_size = relas_i * sizeof(Elf64_Rela);
|
|
|
|
elf->e_shnum++;
|
|
|
|
|
|
|
|
// append relas
|
|
|
|
esize = (esize + 63) & -64;
|
|
|
|
rela_shdr->sh_offset = esize;
|
|
|
|
memcpy((char *)elf + esize, relas, relas_i * sizeof(Elf64_Rela));
|
|
|
|
esize += relas_i * sizeof(Elf64_Rela);
|
|
|
|
unassert(esize == rela_shdr->sh_offset + rela_shdr->sh_size);
|
|
|
|
|
|
|
|
// append code
|
|
|
|
esize = (esize + 63) & -64;
|
|
|
|
code_shdr->sh_offset = esize;
|
|
|
|
memcpy((char *)elf + esize, code, code_i);
|
|
|
|
esize += code_i;
|
|
|
|
unassert(esize == code_shdr->sh_offset + code_shdr->sh_size);
|
|
|
|
}
|
|
|
|
|
|
|
|
// when __attribute__((__target_clones__(...))) is used, static binaries
|
|
|
|
// become poisoned with rela IFUNC relocations, which the linker refuses
|
|
|
|
// to remove. even if we objcopy the ape executable as binary the linker
|
|
|
|
// preserves its precious ifunc code and puts them before the executable
|
|
|
|
// header. the good news is that the linker actually does link correctly
|
|
|
|
// which means we can delete the broken rela sections in the elf binary.
|
|
|
|
static void PurgeIfuncSections(void) {
|
|
|
|
Elf64_Shdr *shdrs = (Elf64_Shdr *)((char *)elf + elf->e_shoff);
|
|
|
|
for (Elf64_Word i = 0; i < elf->e_shnum; ++i) {
|
|
|
|
char *name;
|
|
|
|
if (shdrs[i].sh_type == SHT_RELA ||
|
|
|
|
((name = GetElfSectionName(elf, esize, shdrs + i)) &&
|
|
|
|
!strcmp(name, ".init.202.ifunc"))) {
|
|
|
|
shdrs[i].sh_type = SHT_NULL;
|
|
|
|
shdrs[i].sh_flags &= ~SHF_ALLOC;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-06-08 21:29:22 +00:00
|
|
|
static void FixupObject(void) {
|
2024-05-05 23:14:43 +00:00
|
|
|
if ((fildes = open(epath, mode)) == -1)
|
2023-06-08 11:37:05 +00:00
|
|
|
SysExit("open");
|
2024-05-05 23:14:43 +00:00
|
|
|
if ((esize = lseek(fildes, 0, SEEK_END)) == -1)
|
2023-06-08 11:37:05 +00:00
|
|
|
SysExit("lseek");
|
|
|
|
if (esize) {
|
2024-03-24 10:14:25 +00:00
|
|
|
elf = Malloc(esize);
|
2024-05-05 23:14:43 +00:00
|
|
|
if (pread(fildes, elf, esize, 0) != esize)
|
2024-03-24 10:14:25 +00:00
|
|
|
SysExit("pread");
|
2024-05-05 23:14:43 +00:00
|
|
|
if (!IsElf64Binary(elf, esize))
|
2023-06-10 16:15:19 +00:00
|
|
|
Die("not an elf64 binary");
|
2024-05-05 23:14:43 +00:00
|
|
|
if (!(syms = GetElfSymbols(elf, esize, SHT_SYMTAB, &symcount)))
|
2023-06-10 16:15:19 +00:00
|
|
|
Die("missing elf symbol table");
|
2024-05-05 23:14:43 +00:00
|
|
|
if (!(secstrs = GetElfSectionNameStringTable(elf, esize)))
|
2023-06-10 16:15:19 +00:00
|
|
|
Die("missing elf section string table");
|
2024-05-05 23:14:43 +00:00
|
|
|
if (!(symstrs = GetElfStringTable(elf, esize, ".strtab")))
|
2023-06-10 16:15:19 +00:00
|
|
|
Die("missing elf symbol string table");
|
2023-06-08 11:37:05 +00:00
|
|
|
CheckPrivilegedCrossReferences();
|
|
|
|
if (mode == O_RDWR) {
|
|
|
|
if (elf->e_machine == EM_NEXGEN32E) {
|
Release Cosmopolitan v3.3
This change upgrades to GCC 12.3 and GNU binutils 2.42. The GNU linker
appears to have changed things so that only a single de-duplicated str
table is present in the binary, and it gets placed wherever the linker
wants, regardless of what the linker script says. To cope with that we
need to stop using .ident to embed licenses. As such, this change does
significant work to revamp how third party licenses are defined in the
codebase, using `.section .notice,"aR",@progbits`.
This new GCC 12.3 toolchain has support for GNU indirect functions. It
lets us support __target_clones__ for the first time. This is used for
optimizing the performance of libc string functions such as strlen and
friends so far on x86, by ensuring AVX systems favor a second codepath
that uses VEX encoding. It shaves some latency off certain operations.
It's a useful feature to have for scientific computing for the reasons
explained by the test/libcxx/openmp_test.cc example which compiles for
fifteen different microarchitectures. Thanks to the upgrades, it's now
also possible to use newer instruction sets, such as AVX512FP16, VNNI.
Cosmo now uses the %gs register on x86 by default for TLS. Doing it is
helpful for any program that links `cosmo_dlopen()`. Such programs had
to recompile their binaries at startup to change the TLS instructions.
That's not great, since it means every page in the executable needs to
be faulted. The work of rewriting TLS-related x86 opcodes, is moved to
fixupobj.com instead. This is great news for MacOS x86 users, since we
previously needed to morph the binary every time for that platform but
now that's no longer necessary. The only platforms where we need fixup
of TLS x86 opcodes at runtime are now Windows, OpenBSD, and NetBSD. On
Windows we morph TLS to point deeper into the TIB, based on a TlsAlloc
assignment, and on OpenBSD/NetBSD we morph %gs back into %fs since the
kernels do not allow us to specify a value for the %gs register.
OpenBSD users are now required to use APE Loader to run Cosmo binaries
and assimilation is no longer possible. OpenBSD kernel needs to change
to allow programs to specify a value for the %gs register, or it needs
to stop marking executable pages loaded by the kernel as mimmutable().
This release fixes __constructor__, .ctor, .init_array, and lastly the
.preinit_array so they behave the exact same way as glibc.
We no longer use hex constants to define math.h symbols like M_PI.
2024-02-20 19:12:09 +00:00
|
|
|
RewriteTlsCodeAmd64();
|
2023-06-08 11:37:05 +00:00
|
|
|
OptimizePatchableFunctionEntries();
|
2024-02-01 11:39:46 +00:00
|
|
|
GenerateIfuncInit();
|
2023-06-10 16:15:19 +00:00
|
|
|
} else if (elf->e_machine == EM_AARCH64) {
|
Release Cosmopolitan v3.3
This change upgrades to GCC 12.3 and GNU binutils 2.42. The GNU linker
appears to have changed things so that only a single de-duplicated str
table is present in the binary, and it gets placed wherever the linker
wants, regardless of what the linker script says. To cope with that we
need to stop using .ident to embed licenses. As such, this change does
significant work to revamp how third party licenses are defined in the
codebase, using `.section .notice,"aR",@progbits`.
This new GCC 12.3 toolchain has support for GNU indirect functions. It
lets us support __target_clones__ for the first time. This is used for
optimizing the performance of libc string functions such as strlen and
friends so far on x86, by ensuring AVX systems favor a second codepath
that uses VEX encoding. It shaves some latency off certain operations.
It's a useful feature to have for scientific computing for the reasons
explained by the test/libcxx/openmp_test.cc example which compiles for
fifteen different microarchitectures. Thanks to the upgrades, it's now
also possible to use newer instruction sets, such as AVX512FP16, VNNI.
Cosmo now uses the %gs register on x86 by default for TLS. Doing it is
helpful for any program that links `cosmo_dlopen()`. Such programs had
to recompile their binaries at startup to change the TLS instructions.
That's not great, since it means every page in the executable needs to
be faulted. The work of rewriting TLS-related x86 opcodes, is moved to
fixupobj.com instead. This is great news for MacOS x86 users, since we
previously needed to morph the binary every time for that platform but
now that's no longer necessary. The only platforms where we need fixup
of TLS x86 opcodes at runtime are now Windows, OpenBSD, and NetBSD. On
Windows we morph TLS to point deeper into the TIB, based on a TlsAlloc
assignment, and on OpenBSD/NetBSD we morph %gs back into %fs since the
kernels do not allow us to specify a value for the %gs register.
OpenBSD users are now required to use APE Loader to run Cosmo binaries
and assimilation is no longer possible. OpenBSD kernel needs to change
to allow programs to specify a value for the %gs register, or it needs
to stop marking executable pages loaded by the kernel as mimmutable().
This release fixes __constructor__, .ctor, .init_array, and lastly the
.preinit_array so they behave the exact same way as glibc.
We no longer use hex constants to define math.h symbols like M_PI.
2024-02-20 19:12:09 +00:00
|
|
|
RewriteTlsCodeArm64();
|
2024-05-05 23:14:43 +00:00
|
|
|
if (elf->e_type != ET_REL)
|
2023-12-30 04:11:23 +00:00
|
|
|
UseFreebsdOsAbi();
|
2023-06-08 11:37:05 +00:00
|
|
|
}
|
2023-06-10 16:15:19 +00:00
|
|
|
if (elf->e_type != ET_REL) {
|
2024-05-08 00:42:18 +00:00
|
|
|
WriteApeFlags();
|
2024-02-01 11:39:46 +00:00
|
|
|
PurgeIfuncSections();
|
2023-06-10 16:15:19 +00:00
|
|
|
RelinkZipFiles();
|
|
|
|
}
|
2024-05-05 23:14:43 +00:00
|
|
|
if (pwrite(fildes, elf, esize, 0) != esize)
|
2024-03-24 10:14:25 +00:00
|
|
|
SysExit("pwrite");
|
2023-06-08 11:37:05 +00:00
|
|
|
}
|
2022-08-06 10:51:50 +00:00
|
|
|
}
|
2024-05-05 23:14:43 +00:00
|
|
|
if (close(fildes))
|
2023-06-08 11:37:05 +00:00
|
|
|
SysExit("close");
|
2022-08-06 10:51:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int main(int argc, char *argv[]) {
|
2023-09-02 03:49:13 +00:00
|
|
|
int i;
|
2024-05-05 23:14:43 +00:00
|
|
|
if (!IsOptimized())
|
2023-06-08 11:37:05 +00:00
|
|
|
ShowCrashReports();
|
2022-08-06 10:51:50 +00:00
|
|
|
GetOpts(argc, argv);
|
|
|
|
for (i = optind; i < argc; ++i) {
|
2023-06-08 11:37:05 +00:00
|
|
|
epath = argv[i];
|
|
|
|
FixupObject();
|
2022-08-06 10:51:50 +00:00
|
|
|
}
|
|
|
|
}
|