mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-08-08 10:50:28 +00:00
WIP
This commit is contained in:
parent
6bc04598bf
commit
2b4d6124d9
20 changed files with 63 additions and 6666 deletions
331
third_party/mold/elf/arch-alpha.cc
vendored
331
third_party/mold/elf/arch-alpha.cc
vendored
|
@ -1,331 +0,0 @@
|
||||||
// clang-format off
|
|
||||||
// Alpha is a 64-bit RISC ISA developed by DEC (Digital Equipment
|
|
||||||
// Corporation) in the early '90s. It aimed to be an ISA that would last
|
|
||||||
// 25 years. DEC expected Alpha would become 1000x faster during that time
|
|
||||||
// span. Since the ISA was developed from scratch for future machines,
|
|
||||||
// it's 64-bit from the beginning. There's no 32-bit variant.
|
|
||||||
//
|
|
||||||
// DEC ported its own Unix (Tru64) to Alpha. Microsoft also ported Windows
|
|
||||||
// NT to it. But it wasn't a huge commercial success.
|
|
||||||
//
|
|
||||||
// DEC was acquired by Compaq in 1997. In the late '90s, Intel and
|
|
||||||
// Hewlett-Packard were advertising that their upcoming Itanium processor
|
|
||||||
// would achieve significantly better performance than RISC processors, so
|
|
||||||
// Compaq decided to discontinue the Alpha processor line to switch to
|
|
||||||
// Itanium. Itanium resulted in a miserable failure, but it still suceeded
|
|
||||||
// to wipe out several RISC processors just by promising overly optimistic
|
|
||||||
// perf numbers. Alpha as an ISA would probably have been fine after 25
|
|
||||||
// years since its introduction (which is 1992 + 25 = 2017), but the
|
|
||||||
// company and its market didn't last that long.
|
|
||||||
//
|
|
||||||
// From the linker's point of view, there are a few peculiarities in its
|
|
||||||
// psABI as shown below:
|
|
||||||
//
|
|
||||||
// - Alpha lacks PC-relative memory load/store instructions, so it uses
|
|
||||||
// register-relative load/store instructions in position-independent
|
|
||||||
// code. Specifically, GP (which is an alias for $r29) is always
|
|
||||||
// maintained to refer to .got+0x8000, and global variables' addresses
|
|
||||||
// are loaded in a GP-relative manner.
|
|
||||||
//
|
|
||||||
// - It looks like even function addresses are first loaded to register
|
|
||||||
// in a GP-relative manner before calling it. We can relax it to
|
|
||||||
// convert the instruction sequence with a direct branch instruction,
|
|
||||||
// but by default, object files don't use a direct branch to call a
|
|
||||||
// function. Therefore, by default, we don't need to create a PLT.
|
|
||||||
// Any function call is made by first reading its address from GOT and
|
|
||||||
// jump to the address.
|
|
||||||
|
|
||||||
#include "third_party/mold/elf/mold.h"
|
|
||||||
|
|
||||||
namespace mold::elf {
|
|
||||||
|
|
||||||
using E = ALPHA;
|
|
||||||
|
|
||||||
// A 32-bit immediate can be materialized in a register with a "load high"
|
|
||||||
// and a "load low" instruction sequence. The first instruction sets the
|
|
||||||
// upper 16 bits in a register, and the second one set the lower 16
|
|
||||||
// bits. When doing so, they sign-extend an immediate. Therefore, if the
|
|
||||||
// 15th bit of an immediate happens to be 1, setting a "low half" value
|
|
||||||
// negates the upper 16 bit values that has already been set in a
|
|
||||||
// register. To compensate that, we need to add 0x8000 when setting the
|
|
||||||
// upper 16 bits.
|
|
||||||
static u32 hi(u32 val) {
|
|
||||||
return bits(val + 0x8000, 31, 16);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void write_plt_header(Context<E> &ctx, u8 *buf) {}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void write_plt_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void EhFrameSection<E>::apply_reloc(Context<E> &ctx, const ElfRel<E> &rel,
|
|
||||||
u64 offset, u64 val) {
|
|
||||||
u8 *loc = ctx.buf + this->shdr.sh_offset + offset;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_NONE:
|
|
||||||
break;
|
|
||||||
case R_ALPHA_SREL32:
|
|
||||||
*(ul32 *)loc = val - this->shdr.sh_addr - offset;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
ElfRel<E> *dynrel = nullptr;
|
|
||||||
if (ctx.reldyn)
|
|
||||||
dynrel = (ElfRel<E> *)(ctx.buf + ctx.reldyn->shdr.sh_offset +
|
|
||||||
file.reldyn_offset + this->reldyn_offset);
|
|
||||||
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
u8 *loc = base + rel.r_offset;
|
|
||||||
|
|
||||||
u64 S = sym.get_addr(ctx);
|
|
||||||
u64 A = rel.r_addend;
|
|
||||||
u64 P = get_addr() + rel.r_offset;
|
|
||||||
u64 G = sym.get_got_idx(ctx) * sizeof(Word<E>);
|
|
||||||
u64 GOT = ctx.got->shdr.sh_addr;
|
|
||||||
u64 GP = ctx.got->shdr.sh_addr + 0x8000;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_ALPHA_REFQUAD:
|
|
||||||
apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel);
|
|
||||||
break;
|
|
||||||
case R_ALPHA_GPREL32:
|
|
||||||
*(ul32 *)loc = S + A - GP;
|
|
||||||
break;
|
|
||||||
case R_ALPHA_LITERAL:
|
|
||||||
if (A)
|
|
||||||
*(ul16 *)loc = ctx.extra.got->get_addr(sym, A) - GP;
|
|
||||||
else
|
|
||||||
*(ul16 *)loc = GOT + G - GP;
|
|
||||||
break;
|
|
||||||
case R_ALPHA_BRSGP:
|
|
||||||
*(ul32 *)loc |= bits(S + A - P - 4, 22, 0);
|
|
||||||
break;
|
|
||||||
case R_ALPHA_GPDISP:
|
|
||||||
*(ul16 *)loc = hi(GP - P);
|
|
||||||
*(ul16 *)(loc + A) = GP - P;
|
|
||||||
break;
|
|
||||||
case R_ALPHA_SREL32:
|
|
||||||
*(ul32 *)loc = S + A - P;
|
|
||||||
break;
|
|
||||||
case R_ALPHA_GPRELHIGH:
|
|
||||||
*(ul16 *)loc = hi(S + A - GP);
|
|
||||||
break;
|
|
||||||
case R_ALPHA_GPRELLOW:
|
|
||||||
*(ul16 *)loc = S + A - GP;
|
|
||||||
break;
|
|
||||||
case R_ALPHA_TLSGD:
|
|
||||||
*(ul16 *)loc = sym.get_tlsgd_addr(ctx) - GP;
|
|
||||||
break;
|
|
||||||
case R_ALPHA_TLSLDM:
|
|
||||||
*(ul16 *)loc = ctx.got->get_tlsld_addr(ctx) - GP;
|
|
||||||
break;
|
|
||||||
case R_ALPHA_DTPRELHI:
|
|
||||||
*(ul16 *)loc = hi(S + A - ctx.dtp_addr);
|
|
||||||
break;
|
|
||||||
case R_ALPHA_DTPRELLO:
|
|
||||||
*(ul16 *)loc = S + A - ctx.dtp_addr;
|
|
||||||
break;
|
|
||||||
case R_ALPHA_GOTTPREL:
|
|
||||||
*(ul16 *)loc = sym.get_gottp_addr(ctx) + A - GP;
|
|
||||||
break;
|
|
||||||
case R_ALPHA_TPRELHI:
|
|
||||||
*(ul16 *)loc = hi(S + A - ctx.tp_addr);
|
|
||||||
break;
|
|
||||||
case R_ALPHA_TPRELLO:
|
|
||||||
*(ul16 *)loc = S + A - ctx.tp_addr;
|
|
||||||
break;
|
|
||||||
case R_ALPHA_LITUSE:
|
|
||||||
case R_ALPHA_HINT:
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
unreachable();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::apply_reloc_nonalloc(Context<E> &ctx, u8 *base) {
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
u8 *loc = base + rel.r_offset;
|
|
||||||
|
|
||||||
SectionFragment<E> *frag;
|
|
||||||
i64 frag_addend;
|
|
||||||
std::tie(frag, frag_addend) = get_fragment(ctx, rel);
|
|
||||||
|
|
||||||
u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx);
|
|
||||||
u64 A = frag ? frag_addend : (i64)rel.r_addend;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_ALPHA_REFLONG:
|
|
||||||
if (std::optional<u64> val = get_tombstone(sym, frag))
|
|
||||||
*(ul32 *)loc = *val;
|
|
||||||
else
|
|
||||||
*(ul32 *)loc = S + A;
|
|
||||||
break;
|
|
||||||
case R_ALPHA_REFQUAD:
|
|
||||||
if (std::optional<u64> val = get_tombstone(sym, frag))
|
|
||||||
*(ul64 *)loc = *val;
|
|
||||||
else
|
|
||||||
*(ul64 *)loc = S + A;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: "
|
|
||||||
<< rel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::scan_relocations(Context<E> &ctx) {
|
|
||||||
assert(shdr().sh_flags & SHF_ALLOC);
|
|
||||||
|
|
||||||
this->reldyn_offset = file.num_dynrel * sizeof(ElfRel<E>);
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
|
|
||||||
if (sym.is_ifunc())
|
|
||||||
Error(ctx) << sym << ": GNU ifunc symbol is not supported on Alpha";
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_ALPHA_REFQUAD:
|
|
||||||
scan_dyn_absrel(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_ALPHA_LITERAL:
|
|
||||||
if (rel.r_addend)
|
|
||||||
ctx.extra.got->add_symbol(sym, rel.r_addend);
|
|
||||||
else
|
|
||||||
sym.flags |= NEEDS_GOT;
|
|
||||||
break;
|
|
||||||
case R_ALPHA_SREL32:
|
|
||||||
scan_pcrel(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_ALPHA_BRSGP:
|
|
||||||
if (sym.is_imported)
|
|
||||||
sym.flags |= NEEDS_PLT;
|
|
||||||
break;
|
|
||||||
case R_ALPHA_TLSGD:
|
|
||||||
sym.flags |= NEEDS_TLSGD;
|
|
||||||
break;
|
|
||||||
case R_ALPHA_TLSLDM:
|
|
||||||
ctx.needs_tlsld = true;
|
|
||||||
break;
|
|
||||||
case R_ALPHA_GOTTPREL:
|
|
||||||
sym.flags |= NEEDS_GOTTP;
|
|
||||||
break;
|
|
||||||
case R_ALPHA_TPRELHI:
|
|
||||||
case R_ALPHA_TPRELLO:
|
|
||||||
check_tlsle(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_ALPHA_GPREL32:
|
|
||||||
case R_ALPHA_LITUSE:
|
|
||||||
case R_ALPHA_GPDISP:
|
|
||||||
case R_ALPHA_HINT:
|
|
||||||
case R_ALPHA_GPRELHIGH:
|
|
||||||
case R_ALPHA_GPRELLOW:
|
|
||||||
case R_ALPHA_DTPRELHI:
|
|
||||||
case R_ALPHA_DTPRELLO:
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Fatal(ctx) << *this << ": unknown relocation: " << rel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// An R_ALPHA_LITERAL relocation may request the linker to create a GOT
|
|
||||||
// entry for an external symbol with a non-zero addend. This is an unusual
|
|
||||||
// request which is not found in any other targets.
|
|
||||||
//
|
|
||||||
// Referring an external symbol with a non-zero addend is a bad practice
|
|
||||||
// because we need to create as many dynamic relocations as the number of
|
|
||||||
// distinctive addends for the same symbol.
|
|
||||||
//
|
|
||||||
// We don't want to mess up the implementation of the common GOT section
|
|
||||||
// for Alpha. So we create another GOT-like section, .alpha_got. Any GOT
|
|
||||||
// entry for an R_ALPHA_LITERAL reloc with a non-zero addend is created
|
|
||||||
// not in .got but in .alpha_got.
|
|
||||||
//
|
|
||||||
// Since .alpha_got entries are accessed relative to GP, .alpha_got
|
|
||||||
// needs to be close enough to .got. It's actually placed next to .got.
|
|
||||||
void AlphaGotSection::add_symbol(Symbol<E> &sym, i64 addend) {
|
|
||||||
assert(addend);
|
|
||||||
std::scoped_lock lock(mu);
|
|
||||||
entries.push_back({&sym, addend});
|
|
||||||
}
|
|
||||||
|
|
||||||
bool operator<(const AlphaGotSection::Entry &a, const AlphaGotSection::Entry &b) {
|
|
||||||
return std::tuple(a.sym->file->priority, a.sym->sym_idx, a.addend) <
|
|
||||||
std::tuple(b.sym->file->priority, b.sym->sym_idx, b.addend);
|
|
||||||
};
|
|
||||||
|
|
||||||
u64 AlphaGotSection::get_addr(Symbol<E> &sym, i64 addend) {
|
|
||||||
auto it = std::lower_bound(entries.begin(), entries.end(), Entry{&sym, addend});
|
|
||||||
assert(it != entries.end());
|
|
||||||
return this->shdr.sh_addr + (it - entries.begin()) * sizeof(Word<E>);
|
|
||||||
}
|
|
||||||
|
|
||||||
i64 AlphaGotSection::get_reldyn_size(Context<E> &ctx) const {
|
|
||||||
i64 n = 0;
|
|
||||||
for (const Entry &e : entries)
|
|
||||||
if (e.sym->is_imported || (ctx.arg.pic && !e.sym->is_absolute()))
|
|
||||||
n++;
|
|
||||||
return n;
|
|
||||||
}
|
|
||||||
|
|
||||||
void AlphaGotSection::finalize() {
|
|
||||||
sort(entries);
|
|
||||||
remove_duplicates(entries);
|
|
||||||
shdr.sh_size = entries.size() * sizeof(Word<E>);
|
|
||||||
}
|
|
||||||
|
|
||||||
void AlphaGotSection::copy_buf(Context<E> &ctx) {
|
|
||||||
ElfRel<E> *dynrel = (ElfRel<E> *)(ctx.buf + ctx.reldyn->shdr.sh_offset +
|
|
||||||
reldyn_offset);
|
|
||||||
|
|
||||||
for (i64 i = 0; i < entries.size(); i++) {
|
|
||||||
Entry &e = entries[i];
|
|
||||||
u64 P = this->shdr.sh_addr + sizeof(Word<E>) * i;
|
|
||||||
ul64 *buf = (ul64 *)(ctx.buf + this->shdr.sh_offset + sizeof(Word<E>) * i);
|
|
||||||
|
|
||||||
if (e.sym->is_imported) {
|
|
||||||
*buf = ctx.arg.apply_dynamic_relocs ? e.addend : 0;
|
|
||||||
*dynrel++ = ElfRel<E>(P, E::R_ABS, e.sym->get_dynsym_idx(ctx), e.addend);
|
|
||||||
} else {
|
|
||||||
*buf = e.sym->get_addr(ctx) + e.addend;
|
|
||||||
if (ctx.arg.pic && !e.sym->is_absolute())
|
|
||||||
*dynrel++ = ElfRel<E>(P, E::R_RELATIVE, 0, *buf);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace mold::elf
|
|
737
third_party/mold/elf/arch-arm32.cc
vendored
737
third_party/mold/elf/arch-arm32.cc
vendored
|
@ -1,737 +0,0 @@
|
||||||
// clang-format off
|
|
||||||
// ARM32 is a bit special from the linker's viewpoint because ARM
|
|
||||||
// processors support two different instruction encodings: Thumb and
|
|
||||||
// ARM (in a narrower sense). Thumb instructions are either 16 bits or
|
|
||||||
// 32 bits, while ARM instructions are all 32 bits. Feature-wise,
|
|
||||||
// thumb is a subset of ARM, so not all ARM instructions are
|
|
||||||
// representable in Thumb.
|
|
||||||
//
|
|
||||||
// ARM processors originally supported only ARM instructions. Thumb
|
|
||||||
// instructions were later added to increase code density.
|
|
||||||
//
|
|
||||||
// ARM processors runs in either ARM mode or Thumb mode. The mode can
|
|
||||||
// be switched using BX (branch and mode exchange)-family instructions.
|
|
||||||
// We need to use that instructions to, for example, call a function
|
|
||||||
// encoded in Thumb from a function encoded in ARM. Sometimes, the
|
|
||||||
// linker even has to emit an interworking thunk code to switch mode.
|
|
||||||
//
|
|
||||||
// ARM instructions are aligned to 4 byte boundaries. Thumb are to 2
|
|
||||||
// byte boundaries.
|
|
||||||
//
|
|
||||||
// You can distinguish Thumb functions from ARM functions by looking
|
|
||||||
// at the least significant bit (LSB) of its "address". If LSB is 0,
|
|
||||||
// it's ARM; otherwise, Thumb.
|
|
||||||
//
|
|
||||||
// For example, if a symbol `foo` is of type STT_FUNC and has value
|
|
||||||
// 0x2001, `foo` is a function using Thumb instructions whose address
|
|
||||||
// is 0x2000 (not 0x2001, as Thumb instructions are always 2-byte
|
|
||||||
// aligned). Likewise, if a function pointer has value 0x2001, it
|
|
||||||
// refers a Thumb function at 0x2000.
|
|
||||||
//
|
|
||||||
// https://github.com/ARM-software/abi-aa/blob/main/aaelf32/aaelf32.rst
|
|
||||||
|
|
||||||
#include "third_party/mold/elf/mold.h"
|
|
||||||
|
|
||||||
namespace mold::elf {
|
|
||||||
|
|
||||||
using E = ARM32;
|
|
||||||
|
|
||||||
template <>
|
|
||||||
i64 get_addend(u8 *loc, const ElfRel<E> &rel) {
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_ARM_ABS32:
|
|
||||||
case R_ARM_REL32:
|
|
||||||
case R_ARM_TARGET1:
|
|
||||||
case R_ARM_BASE_PREL:
|
|
||||||
case R_ARM_GOTOFF32:
|
|
||||||
case R_ARM_GOT_PREL:
|
|
||||||
case R_ARM_GOT_BREL:
|
|
||||||
case R_ARM_TLS_GD32:
|
|
||||||
case R_ARM_TLS_LDM32:
|
|
||||||
case R_ARM_TLS_LDO32:
|
|
||||||
case R_ARM_TLS_IE32:
|
|
||||||
case R_ARM_TLS_LE32:
|
|
||||||
case R_ARM_TLS_GOTDESC:
|
|
||||||
case R_ARM_TARGET2:
|
|
||||||
return *(il32 *)loc;
|
|
||||||
case R_ARM_THM_JUMP11:
|
|
||||||
return sign_extend(*(ul16 *)loc, 10) << 1;
|
|
||||||
case R_ARM_THM_CALL:
|
|
||||||
case R_ARM_THM_JUMP24:
|
|
||||||
case R_ARM_THM_TLS_CALL: {
|
|
||||||
u32 S = bit(*(ul16 *)loc, 10);
|
|
||||||
u32 J1 = bit(*(ul16 *)(loc + 2), 13);
|
|
||||||
u32 J2 = bit(*(ul16 *)(loc + 2), 11);
|
|
||||||
u32 I1 = !(J1 ^ S);
|
|
||||||
u32 I2 = !(J2 ^ S);
|
|
||||||
u32 imm10 = bits(*(ul16 *)loc, 9, 0);
|
|
||||||
u32 imm11 = bits(*(ul16 *)(loc + 2), 10, 0);
|
|
||||||
u32 val = (S << 24) | (I1 << 23) | (I2 << 22) | (imm10 << 12) | (imm11 << 1);
|
|
||||||
return sign_extend(val, 24);
|
|
||||||
}
|
|
||||||
case R_ARM_CALL:
|
|
||||||
case R_ARM_JUMP24:
|
|
||||||
case R_ARM_PLT32:
|
|
||||||
case R_ARM_TLS_CALL:
|
|
||||||
return sign_extend(*(ul32 *)loc, 23) << 2;
|
|
||||||
case R_ARM_MOVW_PREL_NC:
|
|
||||||
case R_ARM_MOVW_ABS_NC:
|
|
||||||
case R_ARM_MOVT_PREL:
|
|
||||||
case R_ARM_MOVT_ABS: {
|
|
||||||
u32 imm12 = bits(*(ul32 *)loc, 11, 0);
|
|
||||||
u32 imm4 = bits(*(ul32 *)loc, 19, 16);
|
|
||||||
return sign_extend((imm4 << 12) | imm12, 15);
|
|
||||||
}
|
|
||||||
case R_ARM_PREL31:
|
|
||||||
return sign_extend(*(ul32 *)loc, 30);
|
|
||||||
case R_ARM_THM_MOVW_PREL_NC:
|
|
||||||
case R_ARM_THM_MOVW_ABS_NC:
|
|
||||||
case R_ARM_THM_MOVT_PREL:
|
|
||||||
case R_ARM_THM_MOVT_ABS: {
|
|
||||||
u32 imm4 = bits(*(ul16 *)loc, 3, 0);
|
|
||||||
u32 i = bit(*(ul16 *)loc, 10);
|
|
||||||
u32 imm3 = bits(*(ul16 *)(loc + 2), 14, 12);
|
|
||||||
u32 imm8 = bits(*(ul16 *)(loc + 2), 7, 0);
|
|
||||||
u32 val = (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8;
|
|
||||||
return sign_extend(val, 15);
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void write_mov_imm(u8 *loc, u32 val) {
|
|
||||||
u32 imm12 = bits(val, 11, 0);
|
|
||||||
u32 imm4 = bits(val, 15, 12);
|
|
||||||
*(ul32 *)loc = (*(ul32 *)loc & 0xfff0f000) | (imm4 << 16) | imm12;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void write_thm_b_imm(u8 *loc, u32 val) {
|
|
||||||
// https://developer.arm.com/documentation/ddi0406/cb/Application-Level-Architecture/Instruction-Details/Alphabetical-list-of-instructions/BL--BLX--immediate-
|
|
||||||
u32 sign = bit(val, 24);
|
|
||||||
u32 I1 = bit(val, 23);
|
|
||||||
u32 I2 = bit(val, 22);
|
|
||||||
u32 J1 = !I1 ^ sign;
|
|
||||||
u32 J2 = !I2 ^ sign;
|
|
||||||
u32 imm10 = bits(val, 21, 12);
|
|
||||||
u32 imm11 = bits(val, 11, 1);
|
|
||||||
|
|
||||||
ul16 *buf = (ul16 *)loc;
|
|
||||||
buf[0] = (buf[0] & 0b1111'1000'0000'0000) | (sign << 10) | imm10;
|
|
||||||
buf[1] = (buf[1] & 0b1101'0000'0000'0000) | (J1 << 13) | (J2 << 11) | imm11;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void write_thm_mov_imm(u8 *loc, u32 val) {
|
|
||||||
// https://developer.arm.com/documentation/ddi0406/cb/Application-Level-Architecture/Instruction-Details/Alphabetical-list-of-instructions/MOVT
|
|
||||||
u32 imm4 = bits(val, 15, 12);
|
|
||||||
u32 i = bit(val, 11);
|
|
||||||
u32 imm3 = bits(val, 10, 8);
|
|
||||||
u32 imm8 = bits(val, 7, 0);
|
|
||||||
|
|
||||||
ul16 *buf = (ul16 *)loc;
|
|
||||||
buf[0] = (buf[0] & 0b1111'1011'1111'0000) | (i << 10) | imm4;
|
|
||||||
buf[1] = (buf[1] & 0b1000'1111'0000'0000) | (imm3 << 12) | imm8;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void write_addend(u8 *loc, i64 val, const ElfRel<E> &rel) {
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_ARM_NONE:
|
|
||||||
break;
|
|
||||||
case R_ARM_ABS32:
|
|
||||||
case R_ARM_REL32:
|
|
||||||
case R_ARM_TARGET1:
|
|
||||||
case R_ARM_BASE_PREL:
|
|
||||||
case R_ARM_GOTOFF32:
|
|
||||||
case R_ARM_GOT_PREL:
|
|
||||||
case R_ARM_GOT_BREL:
|
|
||||||
case R_ARM_TLS_GD32:
|
|
||||||
case R_ARM_TLS_LDM32:
|
|
||||||
case R_ARM_TLS_LDO32:
|
|
||||||
case R_ARM_TLS_IE32:
|
|
||||||
case R_ARM_TLS_LE32:
|
|
||||||
case R_ARM_TLS_GOTDESC:
|
|
||||||
case R_ARM_TARGET2:
|
|
||||||
*(ul32 *)loc = val;
|
|
||||||
break;
|
|
||||||
case R_ARM_THM_JUMP11:
|
|
||||||
*(ul16 *)loc = (*(ul16 *)loc & 0xf800) | bits(val, 11, 1);
|
|
||||||
break;
|
|
||||||
case R_ARM_THM_CALL:
|
|
||||||
case R_ARM_THM_JUMP24:
|
|
||||||
case R_ARM_THM_TLS_CALL:
|
|
||||||
write_thm_b_imm(loc, val);
|
|
||||||
break;
|
|
||||||
case R_ARM_CALL:
|
|
||||||
case R_ARM_JUMP24:
|
|
||||||
case R_ARM_PLT32:
|
|
||||||
*(ul32 *)loc = (*(ul32 *)loc & 0xff00'0000) | bits(val, 25, 2);
|
|
||||||
break;
|
|
||||||
case R_ARM_MOVW_PREL_NC:
|
|
||||||
case R_ARM_MOVW_ABS_NC:
|
|
||||||
case R_ARM_MOVT_PREL:
|
|
||||||
case R_ARM_MOVT_ABS:
|
|
||||||
write_mov_imm(loc, val);
|
|
||||||
break;
|
|
||||||
case R_ARM_PREL31:
|
|
||||||
*(ul32 *)loc = (*(ul32 *)loc & 0x8000'0000) | (val & 0x7fff'ffff);
|
|
||||||
break;
|
|
||||||
case R_ARM_THM_MOVW_PREL_NC:
|
|
||||||
case R_ARM_THM_MOVW_ABS_NC:
|
|
||||||
case R_ARM_THM_MOVT_PREL:
|
|
||||||
case R_ARM_THM_MOVT_ABS:
|
|
||||||
write_thm_mov_imm(loc, val);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
unreachable();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void write_plt_header(Context<E> &ctx, u8 *buf) {
|
|
||||||
static const ul32 insn[] = {
|
|
||||||
0xe52d'e004, // push {lr}
|
|
||||||
0xe59f'e004, // ldr lr, 2f
|
|
||||||
0xe08f'e00e, // 1: add lr, pc, lr
|
|
||||||
0xe5be'f008, // ldr pc, [lr, #8]!
|
|
||||||
0x0000'0000, // 2: .word .got.plt - 1b - 8
|
|
||||||
0xe320'f000, // nop
|
|
||||||
0xe320'f000, // nop
|
|
||||||
0xe320'f000, // nop
|
|
||||||
};
|
|
||||||
|
|
||||||
memcpy(buf, insn, sizeof(insn));
|
|
||||||
*(ul32 *)(buf + 16) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr - 16;
|
|
||||||
}
|
|
||||||
|
|
||||||
static const ul32 plt_entry[] = {
|
|
||||||
0xe59f'c004, // 1: ldr ip, 2f
|
|
||||||
0xe08c'c00f, // add ip, ip, pc
|
|
||||||
0xe59c'f000, // ldr pc, [ip]
|
|
||||||
0x0000'0000, // 2: .word sym@GOT - 1b
|
|
||||||
};
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void write_plt_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
|
|
||||||
memcpy(buf, plt_entry, sizeof(plt_entry));
|
|
||||||
*(ul32 *)(buf + 12) = sym.get_gotplt_addr(ctx) - sym.get_plt_addr(ctx) - 12;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
|
|
||||||
memcpy(buf, plt_entry, sizeof(plt_entry));
|
|
||||||
*(ul32 *)(buf + 12) = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 12;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ARM does not use .eh_frame for exception handling. Instead, it uses
|
|
||||||
// .ARM.exidx and .ARM.extab. So this function is empty.
|
|
||||||
template <>
|
|
||||||
void EhFrameSection<E>::apply_reloc(Context<E> &ctx, const ElfRel<E> &rel,
|
|
||||||
u64 offset, u64 val) {}
|
|
||||||
|
|
||||||
// ARM and Thumb branch instructions can jump within ±16 MiB.
|
|
||||||
static bool is_jump_reachable(i64 val) {
|
|
||||||
return sign_extend(val, 24) == val;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
ElfRel<E> *dynrel = nullptr;
|
|
||||||
if (ctx.reldyn)
|
|
||||||
dynrel = (ElfRel<E> *)(ctx.buf + ctx.reldyn->shdr.sh_offset +
|
|
||||||
file.reldyn_offset + this->reldyn_offset);
|
|
||||||
|
|
||||||
auto get_tls_trampoline_addr = [&, i = 0](u64 addr) mutable {
|
|
||||||
for (; i < output_section->thunks.size(); i++) {
|
|
||||||
i64 disp = output_section->shdr.sh_addr + output_section->thunks[i]->offset -
|
|
||||||
addr;
|
|
||||||
if (is_jump_reachable(disp))
|
|
||||||
return disp;
|
|
||||||
}
|
|
||||||
unreachable();
|
|
||||||
};
|
|
||||||
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE || rel.r_type == R_ARM_V4BX)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
u8 *loc = base + rel.r_offset;
|
|
||||||
|
|
||||||
auto check = [&](i64 val, i64 lo, i64 hi) {
|
|
||||||
if (val < lo || hi <= val)
|
|
||||||
Error(ctx) << *this << ": relocation " << rel << " against "
|
|
||||||
<< sym << " out of range: " << val << " is not in ["
|
|
||||||
<< lo << ", " << hi << ")";
|
|
||||||
};
|
|
||||||
|
|
||||||
u64 S = sym.get_addr(ctx);
|
|
||||||
u64 A = get_addend(*this, rel);
|
|
||||||
u64 P = get_addr() + rel.r_offset;
|
|
||||||
u64 T = S & 1;
|
|
||||||
u64 G = sym.get_got_idx(ctx) * sizeof(Word<E>);
|
|
||||||
u64 GOT = ctx.got->shdr.sh_addr;
|
|
||||||
|
|
||||||
auto get_thumb_thunk_addr = [&] { return get_thunk_addr(i); };
|
|
||||||
auto get_arm_thunk_addr = [&] { return get_thunk_addr(i) + 4; };
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_ARM_ABS32:
|
|
||||||
case R_ARM_TARGET1:
|
|
||||||
apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel);
|
|
||||||
break;
|
|
||||||
case R_ARM_REL32:
|
|
||||||
*(ul32 *)loc = S + A - P;
|
|
||||||
break;
|
|
||||||
case R_ARM_THM_CALL: {
|
|
||||||
if (sym.is_remaining_undef_weak()) {
|
|
||||||
// On ARM, calling an weak undefined symbol jumps to the
|
|
||||||
// next instruction.
|
|
||||||
*(ul32 *)loc = 0x8000'f3af; // NOP.W
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// THM_CALL relocation refers either BL or BLX instruction.
|
|
||||||
// They are different in only one bit. We need to use BL if
|
|
||||||
// the jump target is Thumb. Otherwise, use BLX.
|
|
||||||
i64 val = S + A - P;
|
|
||||||
if (is_jump_reachable(val)) {
|
|
||||||
if (T) {
|
|
||||||
write_thm_b_imm(loc, val);
|
|
||||||
*(ul16 *)(loc + 2) |= 0x1000; // rewrite to BL
|
|
||||||
} else {
|
|
||||||
write_thm_b_imm(loc, align_to(val, 4));
|
|
||||||
*(ul16 *)(loc + 2) &= ~0x1000; // rewrite to BLX
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
write_thm_b_imm(loc, align_to(get_arm_thunk_addr() + A - P, 4));
|
|
||||||
*(ul16 *)(loc + 2) &= ~0x1000; // rewrite to BLX
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_ARM_BASE_PREL:
|
|
||||||
*(ul32 *)loc = GOT + A - P;
|
|
||||||
break;
|
|
||||||
case R_ARM_GOTOFF32:
|
|
||||||
*(ul32 *)loc = ((S + A) | T) - GOT;
|
|
||||||
break;
|
|
||||||
case R_ARM_GOT_PREL:
|
|
||||||
case R_ARM_TARGET2:
|
|
||||||
*(ul32 *)loc = GOT + G + A - P;
|
|
||||||
break;
|
|
||||||
case R_ARM_GOT_BREL:
|
|
||||||
*(ul32 *)loc = G + A;
|
|
||||||
break;
|
|
||||||
case R_ARM_CALL: {
|
|
||||||
if (sym.is_remaining_undef_weak()) {
|
|
||||||
*(ul32 *)loc = 0xe320'f000; // NOP
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Just like THM_CALL, ARM_CALL relocation refers either BL or
|
|
||||||
// BLX instruction. We may need to rewrite BL → BLX or BLX → BL.
|
|
||||||
bool is_bl = ((*(ul32 *)loc & 0xff00'0000) == 0xeb00'0000);
|
|
||||||
bool is_blx = ((*(ul32 *)loc & 0xfe00'0000) == 0xfa00'0000);
|
|
||||||
if (!is_bl && !is_blx)
|
|
||||||
Fatal(ctx) << *this << ": R_ARM_CALL refers neither BL nor BLX";
|
|
||||||
|
|
||||||
u64 val = S + A - P;
|
|
||||||
if (is_jump_reachable(val)) {
|
|
||||||
if (T) {
|
|
||||||
*(ul32 *)loc = 0xfa00'0000; // BLX
|
|
||||||
*(ul32 *)loc |= (bit(val, 1) << 24) | bits(val, 25, 2);
|
|
||||||
} else {
|
|
||||||
*(ul32 *)loc = 0xeb00'0000; // BL
|
|
||||||
*(ul32 *)loc |= bits(val, 25, 2);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
*(ul32 *)loc = 0xeb00'0000; // BL
|
|
||||||
*(ul32 *)loc |= bits(get_arm_thunk_addr() + A - P, 25, 2);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_ARM_JUMP24: {
|
|
||||||
if (sym.is_remaining_undef_weak()) {
|
|
||||||
*(ul32 *)loc = 0xe320'f000; // NOP
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// These relocs refers a B (unconditional branch) instruction.
|
|
||||||
// Unlike BL or BLX, we can't rewrite B to BX in place when the
|
|
||||||
// processor mode switch is required because BX doesn't takes an
|
|
||||||
// immediate; it takes only a register. So if mode switch is
|
|
||||||
// required, we jump to a linker-synthesized thunk which does the
|
|
||||||
// job with a longer code sequence.
|
|
||||||
u64 val = S + A - P;
|
|
||||||
if (!is_jump_reachable(val) || T)
|
|
||||||
val = get_arm_thunk_addr() + A - P;
|
|
||||||
*(ul32 *)loc = (*(ul32 *)loc & 0xff00'0000) | bits(val, 25, 2);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_ARM_PLT32:
|
|
||||||
if (sym.is_remaining_undef_weak()) {
|
|
||||||
*(ul32 *)loc = 0xe320'f000; // NOP
|
|
||||||
} else {
|
|
||||||
u64 val = (T ? get_arm_thunk_addr() : S) + A - P;
|
|
||||||
*(ul32 *)loc = (*(ul32 *)loc & 0xff00'0000) | bits(val, 25, 2);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case R_ARM_THM_JUMP11:
|
|
||||||
assert(T);
|
|
||||||
check(S + A - P, -(1 << 11), 1 << 11);
|
|
||||||
*(ul16 *)loc &= 0xf800;
|
|
||||||
*(ul16 *)loc |= bits(S + A - P, 11, 1);
|
|
||||||
break;
|
|
||||||
case R_ARM_THM_JUMP19: {
|
|
||||||
i64 val = S + A - P;
|
|
||||||
check(val, -(1 << 19), 1 << 19);
|
|
||||||
|
|
||||||
// sign:J2:J1:imm6:imm11:'0'
|
|
||||||
u32 sign = bit(val, 20);
|
|
||||||
u32 J2 = bit(val, 19);
|
|
||||||
u32 J1 = bit(val, 18);
|
|
||||||
u32 imm6 = bits(val, 17, 12);
|
|
||||||
u32 imm11 = bits(val, 11, 1);
|
|
||||||
|
|
||||||
*(ul16 *)loc &= 0b1111'1011'1100'0000;
|
|
||||||
*(ul16 *)loc |= (sign << 10) | imm6;
|
|
||||||
|
|
||||||
*(ul16 *)(loc + 2) &= 0b1101'0000'0000'0000;
|
|
||||||
*(ul16 *)(loc + 2) |= (J2 << 13) | (J1 << 11) | imm11;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_ARM_THM_JUMP24: {
|
|
||||||
if (sym.is_remaining_undef_weak()) {
|
|
||||||
*(ul32 *)loc = 0x8000'f3af; // NOP
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Just like R_ARM_JUMP24, we need to jump to a thunk if we need to
|
|
||||||
// switch processor mode.
|
|
||||||
u64 val = S + A - P;
|
|
||||||
if (!is_jump_reachable(val) || !T)
|
|
||||||
val = get_thumb_thunk_addr() + A - P;
|
|
||||||
write_thm_b_imm(loc, val);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_ARM_MOVW_PREL_NC:
|
|
||||||
write_mov_imm(loc, ((S + A) | T) - P);
|
|
||||||
break;
|
|
||||||
case R_ARM_MOVW_ABS_NC:
|
|
||||||
write_mov_imm(loc, (S + A) | T);
|
|
||||||
break;
|
|
||||||
case R_ARM_THM_MOVW_PREL_NC:
|
|
||||||
write_thm_mov_imm(loc, ((S + A) | T) - P);
|
|
||||||
break;
|
|
||||||
case R_ARM_PREL31:
|
|
||||||
check(S + A - P, -(1LL << 30), 1LL << 30);
|
|
||||||
*(ul32 *)loc &= 0x8000'0000;
|
|
||||||
*(ul32 *)loc |= (S + A - P) & 0x7fff'ffff;
|
|
||||||
break;
|
|
||||||
case R_ARM_THM_MOVW_ABS_NC:
|
|
||||||
write_thm_mov_imm(loc, (S + A) | T);
|
|
||||||
break;
|
|
||||||
case R_ARM_MOVT_PREL:
|
|
||||||
write_mov_imm(loc, (S + A - P) >> 16);
|
|
||||||
break;
|
|
||||||
case R_ARM_THM_MOVT_PREL:
|
|
||||||
write_thm_mov_imm(loc, (S + A - P) >> 16);
|
|
||||||
break;
|
|
||||||
case R_ARM_MOVT_ABS:
|
|
||||||
write_mov_imm(loc, (S + A) >> 16);
|
|
||||||
break;
|
|
||||||
case R_ARM_THM_MOVT_ABS:
|
|
||||||
write_thm_mov_imm(loc, (S + A) >> 16);
|
|
||||||
break;
|
|
||||||
case R_ARM_TLS_GD32:
|
|
||||||
*(ul32 *)loc = sym.get_tlsgd_addr(ctx) + A - P;
|
|
||||||
break;
|
|
||||||
case R_ARM_TLS_LDM32:
|
|
||||||
*(ul32 *)loc = ctx.got->get_tlsld_addr(ctx) + A - P;
|
|
||||||
break;
|
|
||||||
case R_ARM_TLS_LDO32:
|
|
||||||
*(ul32 *)loc = S + A - ctx.dtp_addr;
|
|
||||||
break;
|
|
||||||
case R_ARM_TLS_IE32:
|
|
||||||
*(ul32 *)loc = sym.get_gottp_addr(ctx) + A - P;
|
|
||||||
break;
|
|
||||||
case R_ARM_TLS_LE32:
|
|
||||||
*(ul32 *)loc = S + A - ctx.tp_addr;
|
|
||||||
break;
|
|
||||||
case R_ARM_TLS_GOTDESC:
|
|
||||||
if (sym.has_tlsdesc(ctx)) {
|
|
||||||
// A is odd if the corresponding TLS_CALL is Thumb.
|
|
||||||
if (A & 1)
|
|
||||||
*(ul32 *)loc = sym.get_tlsdesc_addr(ctx) - P + A - 6;
|
|
||||||
else
|
|
||||||
*(ul32 *)loc = sym.get_tlsdesc_addr(ctx) - P + A - 4;
|
|
||||||
} else {
|
|
||||||
*(ul32 *)loc = S - ctx.tp_addr;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case R_ARM_TLS_CALL:
|
|
||||||
if (sym.has_tlsdesc(ctx)) {
|
|
||||||
// BL <tls_trampoline>
|
|
||||||
*(ul32 *)loc = 0xeb00'0000 | bits(get_tls_trampoline_addr(P + 8), 25, 2);
|
|
||||||
} else {
|
|
||||||
// BL -> NOP
|
|
||||||
*(ul32 *)loc = 0xe320'f000;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case R_ARM_THM_TLS_CALL:
|
|
||||||
if (sym.has_tlsdesc(ctx)) {
|
|
||||||
u64 val = align_to(get_tls_trampoline_addr(P + 4), 4);
|
|
||||||
write_thm_b_imm(loc, val);
|
|
||||||
*(ul16 *)(loc + 2) &= ~0x1000; // rewrite BL with BLX
|
|
||||||
} else {
|
|
||||||
// BL -> NOP.W
|
|
||||||
*(ul32 *)loc = 0x8000'f3af;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Error(ctx) << *this << ": unknown relocation: " << rel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::apply_reloc_nonalloc(Context<E> &ctx, u8 *base) {
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
u8 *loc = base + rel.r_offset;
|
|
||||||
|
|
||||||
SectionFragment<E> *frag;
|
|
||||||
i64 frag_addend;
|
|
||||||
std::tie(frag, frag_addend) = get_fragment(ctx, rel);
|
|
||||||
|
|
||||||
u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx);
|
|
||||||
u64 A = frag ? frag_addend : get_addend(*this, rel);
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_ARM_ABS32:
|
|
||||||
if (std::optional<u64> val = get_tombstone(sym, frag))
|
|
||||||
*(ul32 *)loc = *val;
|
|
||||||
else
|
|
||||||
*(ul32 *)loc = S + A;
|
|
||||||
break;
|
|
||||||
case R_ARM_TLS_LDO32:
|
|
||||||
if (std::optional<u64> val = get_tombstone(sym, frag))
|
|
||||||
*(ul32 *)loc = *val;
|
|
||||||
else
|
|
||||||
*(ul32 *)loc = S + A - ctx.dtp_addr;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: "
|
|
||||||
<< rel;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::scan_relocations(Context<E> &ctx) {
|
|
||||||
assert(shdr().sh_flags & SHF_ALLOC);
|
|
||||||
|
|
||||||
this->reldyn_offset = file.num_dynrel * sizeof(ElfRel<E>);
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
// Scan relocations
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
|
|
||||||
if (sym.is_ifunc())
|
|
||||||
sym.flags |= NEEDS_GOT | NEEDS_PLT;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_ARM_ABS32:
|
|
||||||
case R_ARM_MOVT_ABS:
|
|
||||||
case R_ARM_THM_MOVT_ABS:
|
|
||||||
case R_ARM_TARGET1:
|
|
||||||
scan_dyn_absrel(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_ARM_THM_CALL:
|
|
||||||
case R_ARM_CALL:
|
|
||||||
case R_ARM_JUMP24:
|
|
||||||
case R_ARM_PLT32:
|
|
||||||
case R_ARM_THM_JUMP24:
|
|
||||||
if (sym.is_imported)
|
|
||||||
sym.flags |= NEEDS_PLT;
|
|
||||||
break;
|
|
||||||
case R_ARM_GOT_PREL:
|
|
||||||
case R_ARM_GOT_BREL:
|
|
||||||
case R_ARM_TARGET2:
|
|
||||||
sym.flags |= NEEDS_GOT;
|
|
||||||
break;
|
|
||||||
case R_ARM_MOVT_PREL:
|
|
||||||
case R_ARM_THM_MOVT_PREL:
|
|
||||||
case R_ARM_PREL31:
|
|
||||||
scan_pcrel(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_ARM_TLS_GD32:
|
|
||||||
sym.flags |= NEEDS_TLSGD;
|
|
||||||
break;
|
|
||||||
case R_ARM_TLS_LDM32:
|
|
||||||
ctx.needs_tlsld = true;
|
|
||||||
break;
|
|
||||||
case R_ARM_TLS_IE32:
|
|
||||||
sym.flags |= NEEDS_GOTTP;
|
|
||||||
break;
|
|
||||||
case R_ARM_TLS_GOTDESC:
|
|
||||||
if (!relax_tlsdesc(ctx, sym))
|
|
||||||
sym.flags |= NEEDS_TLSDESC;
|
|
||||||
break;
|
|
||||||
case R_ARM_TLS_LE32:
|
|
||||||
check_tlsle(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_ARM_REL32:
|
|
||||||
case R_ARM_BASE_PREL:
|
|
||||||
case R_ARM_GOTOFF32:
|
|
||||||
case R_ARM_THM_JUMP11:
|
|
||||||
case R_ARM_THM_JUMP19:
|
|
||||||
case R_ARM_MOVW_PREL_NC:
|
|
||||||
case R_ARM_MOVW_ABS_NC:
|
|
||||||
case R_ARM_THM_MOVW_PREL_NC:
|
|
||||||
case R_ARM_THM_MOVW_ABS_NC:
|
|
||||||
case R_ARM_TLS_LDO32:
|
|
||||||
case R_ARM_TLS_CALL:
|
|
||||||
case R_ARM_THM_TLS_CALL:
|
|
||||||
case R_ARM_V4BX:
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Error(ctx) << *this << ": unknown relocation: " << rel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void RangeExtensionThunk<E>::copy_buf(Context<E> &ctx) {
|
|
||||||
u8 *buf = ctx.buf + output_section.shdr.sh_offset + offset;
|
|
||||||
|
|
||||||
// TLS trampoline code. ARM32's TLSDESC is designed so that this
|
|
||||||
// common piece of code is factored out from object files to reduce
|
|
||||||
// output size. Since no one provide, the linker has to synthesize it.
|
|
||||||
static ul32 hdr[] = {
|
|
||||||
0xe08e'0000, // add r0, lr, r0
|
|
||||||
0xe590'1004, // ldr r1, [r0, #4]
|
|
||||||
0xe12f'ff11, // bx r1
|
|
||||||
};
|
|
||||||
|
|
||||||
// This is a range extension and mode switch thunk.
|
|
||||||
// It has two entry points: +0 for Thumb and +4 for ARM.
|
|
||||||
const u8 entry[] = {
|
|
||||||
// .thumb
|
|
||||||
0xfc, 0x46, // mov ip, pc
|
|
||||||
0x60, 0x47, // bx ip # jumps to the following `ldr` insn
|
|
||||||
// .arm
|
|
||||||
0x04, 0xc0, 0x9f, 0xe5, // ldr ip, 2f
|
|
||||||
0x0f, 0xc0, 0x8c, 0xe0, // 1: add ip, ip, pc
|
|
||||||
0x1c, 0xff, 0x2f, 0xe1, // bx ip
|
|
||||||
0x00, 0x00, 0x00, 0x00, // 2: .word sym - 1b
|
|
||||||
};
|
|
||||||
|
|
||||||
static_assert(E::thunk_hdr_size == sizeof(hdr));
|
|
||||||
static_assert(E::thunk_size == sizeof(entry));
|
|
||||||
|
|
||||||
memcpy(buf, hdr, sizeof(hdr));
|
|
||||||
|
|
||||||
for (i64 i = 0; i < symbols.size(); i++) {
|
|
||||||
u8 *loc = buf + sizeof(hdr) + i * sizeof(entry);
|
|
||||||
memcpy(loc, entry, sizeof(entry));
|
|
||||||
|
|
||||||
u64 S = symbols[i]->get_addr(ctx);
|
|
||||||
u64 P = output_section.shdr.sh_addr + offset + sizeof(hdr) + i * sizeof(entry);
|
|
||||||
*(ul32 *)(loc + 16) = S - P - 16;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ARM executables use an .ARM.exidx section to look up an exception
|
|
||||||
// handling record for the current instruction pointer. The table needs
|
|
||||||
// to be sorted by their addresses.
|
|
||||||
//
|
|
||||||
// Other target uses .eh_frame_hdr instead for the same purpose.
|
|
||||||
// I don't know why only ARM uses the different mechanism, but it's
|
|
||||||
// likely that it's due to some historical reason.
|
|
||||||
//
|
|
||||||
// This function sorts .ARM.exidx records.
|
|
||||||
void fixup_arm_exidx_section(Context<E> &ctx) {
|
|
||||||
Timer t(ctx, "fixup_arm_exidx_section");
|
|
||||||
|
|
||||||
OutputSection<E> *osec = find_section(ctx, SHT_ARM_EXIDX);
|
|
||||||
if (!osec)
|
|
||||||
return;
|
|
||||||
|
|
||||||
// .ARM.exidx records consists of a signed 31-bit relative address
|
|
||||||
// and a 32-bit value. The relative address indicates the start
|
|
||||||
// address of a function that the record covers. The value is one of
|
|
||||||
// the followings:
|
|
||||||
//
|
|
||||||
// 1. CANTUNWIND indicating that there's no unwinding info for the function,
|
|
||||||
// 2. a compact unwinding record encoded into a 32-bit value, or
|
|
||||||
// 3. a 31-bit relative address which points to a larger record in
|
|
||||||
// the .ARM.extab section.
|
|
||||||
//
|
|
||||||
// CANTUNWIND is value 1. The most significant bit is set in (2) but
|
|
||||||
// not in (3). So we can distinguished them just by looking at a value.
|
|
||||||
const u32 EXIDX_CANTUNWIND = 1;
|
|
||||||
|
|
||||||
struct Entry {
|
|
||||||
ul32 addr;
|
|
||||||
ul32 val;
|
|
||||||
};
|
|
||||||
|
|
||||||
if (osec->shdr.sh_size % sizeof(Entry))
|
|
||||||
Fatal(ctx) << "invalid .ARM.exidx section size";
|
|
||||||
|
|
||||||
Entry *ent = (Entry *)(ctx.buf + osec->shdr.sh_offset);
|
|
||||||
i64 num_entries = osec->shdr.sh_size / sizeof(Entry);
|
|
||||||
|
|
||||||
// Entry's addresses are relative to themselves. In order to sort
|
|
||||||
// records by addresses, we first translate them so that the addresses
|
|
||||||
// are relative to the beginning of the section.
|
|
||||||
auto is_relative = [](u32 val) {
|
|
||||||
return val != EXIDX_CANTUNWIND && !(val & 0x8000'0000);
|
|
||||||
};
|
|
||||||
|
|
||||||
tbb::parallel_for((i64)0, num_entries, [&](i64 i) {
|
|
||||||
i64 offset = sizeof(Entry) * i;
|
|
||||||
ent[i].addr = sign_extend(ent[i].addr, 30) + offset;
|
|
||||||
if (is_relative(ent[i].val))
|
|
||||||
ent[i].val = 0x7fff'ffff & (ent[i].val + offset);
|
|
||||||
});
|
|
||||||
|
|
||||||
tbb::parallel_sort(ent, ent + num_entries, [](const Entry &a, const Entry &b) {
|
|
||||||
return a.addr < b.addr;
|
|
||||||
});
|
|
||||||
|
|
||||||
// Make addresses relative to themselves.
|
|
||||||
tbb::parallel_for((i64)0, num_entries, [&](i64 i) {
|
|
||||||
i64 offset = sizeof(Entry) * i;
|
|
||||||
ent[i].addr = 0x7fff'ffff & (ent[i].addr - offset);
|
|
||||||
if (is_relative(ent[i].val))
|
|
||||||
ent[i].val = 0x7fff'ffff & (ent[i].val - offset);
|
|
||||||
});
|
|
||||||
|
|
||||||
// .ARM.exidx's sh_link should be set to the .text section index.
|
|
||||||
// Runtime doesn't care about it, but the binutils's strip command does.
|
|
||||||
if (ctx.shdr) {
|
|
||||||
if (Chunk<E> *text = find_section(ctx, ".text")) {
|
|
||||||
osec->shdr.sh_link = text->shndx;
|
|
||||||
ctx.shdr->copy_buf(ctx);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace mold::elf
|
|
595
third_party/mold/elf/arch-arm64.cc
vendored
595
third_party/mold/elf/arch-arm64.cc
vendored
|
@ -1,595 +0,0 @@
|
||||||
// clang-format off
|
|
||||||
// This file contains ARM64-specific code. Being new, the ARM64's ELF
|
|
||||||
// psABI doesn't have anything peculiar. ARM64 is a clean RISC
|
|
||||||
// instruction set that supports PC-relative load/store instructions.
|
|
||||||
//
|
|
||||||
// Unlike ARM32, instructions length doesn't vary. All ARM64
|
|
||||||
// instructions are 4 bytes long.
|
|
||||||
//
|
|
||||||
// Branch instructions used for function call can jump within ±128 MiB.
|
|
||||||
// We need to create range extension thunks to support binaries whose
|
|
||||||
// .text is larger than that.
|
|
||||||
//
|
|
||||||
// Unlike most other targets, the TLSDESC access model is used by default
|
|
||||||
// for -fPIC to access thread-local variables instead of the less
|
|
||||||
// efficient GD model. You can still enable GD but it needs the
|
|
||||||
// -mtls-dialect=trad flag. Since GD is used rarely, we don't need to
|
|
||||||
// implement GD → LE relaxation.
|
|
||||||
//
|
|
||||||
// https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst
|
|
||||||
|
|
||||||
#include "third_party/mold/elf/mold.h"
|
|
||||||
|
|
||||||
namespace mold::elf {
|
|
||||||
|
|
||||||
using E = ARM64;
|
|
||||||
|
|
||||||
static void write_adrp(u8 *buf, u64 val) {
|
|
||||||
*(ul32 *)buf |= (bits(val, 13, 12) << 29) | (bits(val, 32, 14) << 5);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void write_adr(u8 *buf, u64 val) {
|
|
||||||
*(ul32 *)buf |= (bits(val, 1, 0) << 29) | (bits(val, 20, 2) << 5);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void write_movn_movz(u8 *buf, i64 val) {
|
|
||||||
*(ul32 *)buf &= 0b0000'0000'0110'0000'0000'0000'0001'1111;
|
|
||||||
|
|
||||||
if (val >= 0)
|
|
||||||
*(ul32 *)buf |= 0xd280'0000 | (bits(val, 15, 0) << 5); // rewrite to movz
|
|
||||||
else
|
|
||||||
*(ul32 *)buf |= 0x9280'0000 | (bits(~val, 15, 0) << 5); // rewrite to movn
|
|
||||||
}
|
|
||||||
|
|
||||||
static u64 page(u64 val) {
|
|
||||||
return val & 0xffff'ffff'ffff'f000;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void write_plt_header(Context<E> &ctx, u8 *buf) {
|
|
||||||
static const ul32 insn[] = {
|
|
||||||
0xa9bf'7bf0, // stp x16, x30, [sp,#-16]!
|
|
||||||
0x9000'0010, // adrp x16, .got.plt[2]
|
|
||||||
0xf940'0211, // ldr x17, [x16, .got.plt[2]]
|
|
||||||
0x9100'0210, // add x16, x16, .got.plt[2]
|
|
||||||
0xd61f'0220, // br x17
|
|
||||||
0xd503'201f, // nop
|
|
||||||
0xd503'201f, // nop
|
|
||||||
0xd503'201f, // nop
|
|
||||||
};
|
|
||||||
|
|
||||||
u64 gotplt = ctx.gotplt->shdr.sh_addr + 16;
|
|
||||||
u64 plt = ctx.plt->shdr.sh_addr;
|
|
||||||
|
|
||||||
memcpy(buf, insn, sizeof(insn));
|
|
||||||
write_adrp(buf + 4, page(gotplt) - page(plt + 4));
|
|
||||||
*(ul32 *)(buf + 8) |= bits(gotplt, 11, 3) << 10;
|
|
||||||
*(ul32 *)(buf + 12) |= (gotplt & 0xfff) << 10;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void write_plt_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
|
|
||||||
static const ul32 insn[] = {
|
|
||||||
0x9000'0010, // adrp x16, .got.plt[n]
|
|
||||||
0xf940'0211, // ldr x17, [x16, .got.plt[n]]
|
|
||||||
0x9100'0210, // add x16, x16, .got.plt[n]
|
|
||||||
0xd61f'0220, // br x17
|
|
||||||
};
|
|
||||||
|
|
||||||
u64 gotplt = sym.get_gotplt_addr(ctx);
|
|
||||||
u64 plt = sym.get_plt_addr(ctx);
|
|
||||||
|
|
||||||
memcpy(buf, insn, sizeof(insn));
|
|
||||||
write_adrp(buf, page(gotplt) - page(plt));
|
|
||||||
*(ul32 *)(buf + 4) |= bits(gotplt, 11, 3) << 10;
|
|
||||||
*(ul32 *)(buf + 8) |= (gotplt & 0xfff) << 10;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
|
|
||||||
static const ul32 insn[] = {
|
|
||||||
0x9000'0010, // adrp x16, GOT[n]
|
|
||||||
0xf940'0211, // ldr x17, [x16, GOT[n]]
|
|
||||||
0xd61f'0220, // br x17
|
|
||||||
0xd503'201f, // nop
|
|
||||||
};
|
|
||||||
|
|
||||||
u64 got = sym.get_got_addr(ctx);
|
|
||||||
u64 plt = sym.get_plt_addr(ctx);
|
|
||||||
|
|
||||||
memcpy(buf, insn, sizeof(insn));
|
|
||||||
write_adrp(buf, page(got) - page(plt));
|
|
||||||
*(ul32 *)(buf + 4) |= bits(got, 11, 3) << 10;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void EhFrameSection<E>::apply_reloc(Context<E> &ctx, const ElfRel<E> &rel,
|
|
||||||
u64 offset, u64 val) {
|
|
||||||
u8 *loc = ctx.buf + this->shdr.sh_offset + offset;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_NONE:
|
|
||||||
break;
|
|
||||||
case R_AARCH64_ABS64:
|
|
||||||
*(ul64 *)loc = val;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_PREL32:
|
|
||||||
*(ul32 *)loc = val - this->shdr.sh_addr - offset;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_PREL64:
|
|
||||||
*(ul64 *)loc = val - this->shdr.sh_addr - offset;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool is_adrp(u8 *loc) {
|
|
||||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADRP--Form-PC-relative-address-to-4KB-page-
|
|
||||||
u32 insn = *(ul32 *)loc;
|
|
||||||
return (bits(insn, 31, 24) & 0b1001'1111) == 0b1001'0000;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool is_ldr(u8 *loc) {
|
|
||||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--immediate---Load-Register--immediate--
|
|
||||||
u32 insn = *(ul32 *)loc;
|
|
||||||
return (bits(insn, 31, 20) & 0b1111'1111'1100) == 0b1111'1001'0100;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool is_add(u8 *loc) {
|
|
||||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADD--immediate---Add--immediate--
|
|
||||||
u32 insn = *(ul32 *)loc;
|
|
||||||
return (bits(insn, 31, 20) & 0b1111'1111'1100) == 0b1001'0001'0000;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
ElfRel<E> *dynrel = nullptr;
|
|
||||||
if (ctx.reldyn)
|
|
||||||
dynrel = (ElfRel<E> *)(ctx.buf + ctx.reldyn->shdr.sh_offset +
|
|
||||||
file.reldyn_offset + this->reldyn_offset);
|
|
||||||
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
u8 *loc = base + rel.r_offset;
|
|
||||||
|
|
||||||
auto check = [&](i64 val, i64 lo, i64 hi) {
|
|
||||||
if (val < lo || hi <= val)
|
|
||||||
Error(ctx) << *this << ": relocation " << rel << " against "
|
|
||||||
<< sym << " out of range: " << val << " is not in ["
|
|
||||||
<< lo << ", " << hi << ")";
|
|
||||||
};
|
|
||||||
|
|
||||||
u64 S = sym.get_addr(ctx);
|
|
||||||
u64 A = rel.r_addend;
|
|
||||||
u64 P = get_addr() + rel.r_offset;
|
|
||||||
u64 G = sym.get_got_idx(ctx) * sizeof(Word<E>);
|
|
||||||
u64 GOT = ctx.got->shdr.sh_addr;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_AARCH64_ABS64:
|
|
||||||
apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel);
|
|
||||||
break;
|
|
||||||
case R_AARCH64_LDST8_ABS_LO12_NC:
|
|
||||||
case R_AARCH64_ADD_ABS_LO12_NC:
|
|
||||||
*(ul32 *)loc |= bits(S + A, 11, 0) << 10;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_LDST16_ABS_LO12_NC:
|
|
||||||
*(ul32 *)loc |= bits(S + A, 11, 1) << 10;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_LDST32_ABS_LO12_NC:
|
|
||||||
*(ul32 *)loc |= bits(S + A, 11, 2) << 10;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_LDST64_ABS_LO12_NC:
|
|
||||||
*(ul32 *)loc |= bits(S + A, 11, 3) << 10;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_LDST128_ABS_LO12_NC:
|
|
||||||
*(ul32 *)loc |= bits(S + A, 11, 4) << 10;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_MOVW_UABS_G0:
|
|
||||||
check(S + A, 0, 1 << 16);
|
|
||||||
*(ul32 *)loc |= bits(S + A, 15, 0) << 5;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_MOVW_UABS_G0_NC:
|
|
||||||
*(ul32 *)loc |= bits(S + A, 15, 0) << 5;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_MOVW_UABS_G1:
|
|
||||||
check(S + A, 0, 1LL << 32);
|
|
||||||
*(ul32 *)loc |= bits(S + A, 31, 16) << 5;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_MOVW_UABS_G1_NC:
|
|
||||||
*(ul32 *)loc |= bits(S + A, 31, 16) << 5;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_MOVW_UABS_G2:
|
|
||||||
check(S + A, 0, 1LL << 48);
|
|
||||||
*(ul32 *)loc |= bits(S + A, 47, 32) << 5;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_MOVW_UABS_G2_NC:
|
|
||||||
*(ul32 *)loc |= bits(S + A, 47, 32) << 5;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_MOVW_UABS_G3:
|
|
||||||
*(ul32 *)loc |= bits(S + A, 63, 48) << 5;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_ADR_GOT_PAGE:
|
|
||||||
if (sym.has_got(ctx)) {
|
|
||||||
i64 val = page(G + GOT + A) - page(P);
|
|
||||||
check(val, -(1LL << 32), 1LL << 32);
|
|
||||||
write_adrp(loc, val);
|
|
||||||
} else {
|
|
||||||
// Relax GOT-loading ADRP+LDR to an immediate ADRP+ADD
|
|
||||||
i64 val = page(S + A) - page(P);
|
|
||||||
check(val, -(1LL << 32), 1LL << 32);
|
|
||||||
write_adrp(loc, val);
|
|
||||||
|
|
||||||
u32 reg = bits(*(ul32 *)loc, 4, 0);
|
|
||||||
*(ul32 *)(loc + 4) = 0x9100'0000 | (reg << 5) | reg; // ADD
|
|
||||||
*(ul32 *)(loc + 4) |= bits(S + A, 11, 0) << 10;
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case R_AARCH64_ADR_PREL_PG_HI21: {
|
|
||||||
// The ARM64 psABI defines that an `ADRP x0, foo` and `ADD x0, x0,
|
|
||||||
// :lo12: foo` instruction pair to materialize a PC-relative address
|
|
||||||
// in a register can be relaxed to `NOP` followed by `ADR x0, foo`
|
|
||||||
// if foo is in PC ± 1 MiB.
|
|
||||||
if (ctx.arg.relax && i + 1 < rels.size() &&
|
|
||||||
sign_extend(S + A - P - 4, 20) == S + A - P - 4) {
|
|
||||||
const ElfRel<E> &rel2 = rels[i + 1];
|
|
||||||
if (rel2.r_type == R_AARCH64_ADD_ABS_LO12_NC &&
|
|
||||||
rel2.r_sym == rel.r_sym &&
|
|
||||||
rel2.r_offset == rel.r_offset + 4 &&
|
|
||||||
rel2.r_addend == rel.r_addend &&
|
|
||||||
is_adrp(loc) &&
|
|
||||||
is_add(loc + 4)) {
|
|
||||||
u32 reg1 = bits(*(ul32 *)loc, 4, 0);
|
|
||||||
u32 reg2 = bits(*(ul32 *)(loc + 4), 4, 0);
|
|
||||||
if (reg1 == reg2) {
|
|
||||||
*(ul32 *)loc = 0xd503'201f; // nop
|
|
||||||
*(ul32 *)(loc + 4) = 0x1000'0000 | reg1; // adr
|
|
||||||
write_adr(loc + 4, S + A - P - 4);
|
|
||||||
i++;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
i64 val = page(S + A) - page(P);
|
|
||||||
check(val, -(1LL << 32), 1LL << 32);
|
|
||||||
write_adrp(loc, val);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_AARCH64_ADR_PREL_LO21:
|
|
||||||
check(S + A - P, -(1LL << 20), 1LL << 20);
|
|
||||||
write_adr(loc, S + A - P);
|
|
||||||
break;
|
|
||||||
case R_AARCH64_CALL26:
|
|
||||||
case R_AARCH64_JUMP26: {
|
|
||||||
if (sym.is_remaining_undef_weak()) {
|
|
||||||
// On ARM, calling an weak undefined symbol jumps to the
|
|
||||||
// next instruction.
|
|
||||||
*(ul32 *)loc = 0xd503'201f; // nop
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
i64 val = S + A - P;
|
|
||||||
if (val < -(1 << 27) || (1 << 27) <= val)
|
|
||||||
val = get_thunk_addr(i) + A - P;
|
|
||||||
*(ul32 *)loc |= bits(val, 27, 2);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_AARCH64_PLT32:
|
|
||||||
check(S + A - P, -(1LL << 31), 1LL << 31);
|
|
||||||
*(ul32 *)loc = S + A - P;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_CONDBR19:
|
|
||||||
case R_AARCH64_LD_PREL_LO19:
|
|
||||||
check(S + A - P, -(1LL << 20), 1LL << 20);
|
|
||||||
*(ul32 *)loc |= bits(S + A - P, 20, 2) << 5;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_PREL16:
|
|
||||||
check(S + A - P, -(1LL << 15), 1LL << 15);
|
|
||||||
*(ul16 *)loc = S + A - P;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_PREL32:
|
|
||||||
check(S + A - P, -(1LL << 31), 1LL << 32);
|
|
||||||
*(ul32 *)loc = S + A - P;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_PREL64:
|
|
||||||
*(ul64 *)loc = S + A - P;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_LD64_GOT_LO12_NC:
|
|
||||||
*(ul32 *)loc |= bits(G + GOT + A, 11, 3) << 10;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_LD64_GOTPAGE_LO15: {
|
|
||||||
i64 val = G + GOT + A - page(GOT);
|
|
||||||
check(val, 0, 1 << 15);
|
|
||||||
*(ul32 *)loc |= bits(val, 14, 3) << 10;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: {
|
|
||||||
i64 val = page(sym.get_gottp_addr(ctx) + A) - page(P);
|
|
||||||
check(val, -(1LL << 32), 1LL << 32);
|
|
||||||
write_adrp(loc, val);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
|
|
||||||
*(ul32 *)loc |= bits(sym.get_gottp_addr(ctx) + A, 11, 3) << 10;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_TLSLE_MOVW_TPREL_G0: {
|
|
||||||
i64 val = S + A - ctx.tp_addr;
|
|
||||||
check(val, -(1 << 15), 1 << 15);
|
|
||||||
write_movn_movz(loc, val);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC:
|
|
||||||
*(ul32 *)loc |= bits(S + A - ctx.tp_addr, 15, 0) << 5;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_TLSLE_MOVW_TPREL_G1: {
|
|
||||||
i64 val = S + A - ctx.tp_addr;
|
|
||||||
check(val, -(1LL << 31), 1LL << 31);
|
|
||||||
write_movn_movz(loc, val >> 16);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC:
|
|
||||||
*(ul32 *)loc |= bits(S + A - ctx.tp_addr, 31, 16) << 5;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_TLSLE_MOVW_TPREL_G2: {
|
|
||||||
i64 val = S + A - ctx.tp_addr;
|
|
||||||
check(val, -(1LL << 47), 1LL << 47);
|
|
||||||
write_movn_movz(loc, val >> 32);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_AARCH64_TLSLE_ADD_TPREL_HI12: {
|
|
||||||
i64 val = S + A - ctx.tp_addr;
|
|
||||||
check(val, 0, 1LL << 24);
|
|
||||||
*(ul32 *)loc |= bits(val, 23, 12) << 10;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_AARCH64_TLSLE_ADD_TPREL_LO12:
|
|
||||||
check(S + A - ctx.tp_addr, 0, 1 << 12);
|
|
||||||
*(ul32 *)loc |= bits(S + A - ctx.tp_addr, 11, 0) << 10;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
|
|
||||||
*(ul32 *)loc |= bits(S + A - ctx.tp_addr, 11, 0) << 10;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_TLSGD_ADR_PAGE21: {
|
|
||||||
i64 val = page(sym.get_tlsgd_addr(ctx) + A) - page(P);
|
|
||||||
check(val, -(1LL << 32), 1LL << 32);
|
|
||||||
write_adrp(loc, val);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_AARCH64_TLSGD_ADD_LO12_NC:
|
|
||||||
*(ul32 *)loc |= bits(sym.get_tlsgd_addr(ctx) + A, 11, 0) << 10;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_TLSDESC_ADR_PAGE21:
|
|
||||||
if (sym.has_tlsdesc(ctx)) {
|
|
||||||
i64 val = page(sym.get_tlsdesc_addr(ctx) + A) - page(P);
|
|
||||||
check(val, -(1LL << 32), 1LL << 32);
|
|
||||||
write_adrp(loc, val);
|
|
||||||
} else {
|
|
||||||
// adrp x0, 0 -> movz x0, #tls_ofset_hi, lsl #16
|
|
||||||
i64 val = (S + A - ctx.tp_addr);
|
|
||||||
check(val, -(1LL << 32), 1LL << 32);
|
|
||||||
*(ul32 *)loc = 0xd2a0'0000 | (bits(val, 32, 16) << 5);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case R_AARCH64_TLSDESC_LD64_LO12:
|
|
||||||
if (sym.has_tlsdesc(ctx)) {
|
|
||||||
*(ul32 *)loc |= bits(sym.get_tlsdesc_addr(ctx) + A, 11, 3) << 10;
|
|
||||||
} else {
|
|
||||||
// ldr x2, [x0] -> movk x0, #tls_ofset_lo
|
|
||||||
u32 offset_lo = (S + A - ctx.tp_addr) & 0xffff;
|
|
||||||
*(ul32 *)loc = 0xf280'0000 | (offset_lo << 5);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case R_AARCH64_TLSDESC_ADD_LO12:
|
|
||||||
if (sym.has_tlsdesc(ctx)) {
|
|
||||||
*(ul32 *)loc |= bits(sym.get_tlsdesc_addr(ctx) + A, 11, 0) << 10;
|
|
||||||
} else {
|
|
||||||
// add x0, x0, #0 -> nop
|
|
||||||
*(ul32 *)loc = 0xd503'201f;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case R_AARCH64_TLSDESC_CALL:
|
|
||||||
if (!sym.has_tlsdesc(ctx)) {
|
|
||||||
// blr x2 -> nop
|
|
||||||
*(ul32 *)loc = 0xd503'201f;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
unreachable();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::apply_reloc_nonalloc(Context<E> &ctx, u8 *base) {
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
u8 *loc = base + rel.r_offset;
|
|
||||||
|
|
||||||
auto check = [&](i64 val, i64 lo, i64 hi) {
|
|
||||||
if (val < lo || hi <= val)
|
|
||||||
Error(ctx) << *this << ": relocation " << rel << " against "
|
|
||||||
<< sym << " out of range: " << val << " is not in ["
|
|
||||||
<< lo << ", " << hi << ")";
|
|
||||||
};
|
|
||||||
|
|
||||||
SectionFragment<E> *frag;
|
|
||||||
i64 frag_addend;
|
|
||||||
std::tie(frag, frag_addend) = get_fragment(ctx, rel);
|
|
||||||
|
|
||||||
u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx);
|
|
||||||
u64 A = frag ? frag_addend : (i64)rel.r_addend;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_AARCH64_ABS64:
|
|
||||||
if (std::optional<u64> val = get_tombstone(sym, frag))
|
|
||||||
*(ul64 *)loc = *val;
|
|
||||||
else
|
|
||||||
*(ul64 *)loc = S + A;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_ABS32: {
|
|
||||||
i64 val = S + A;
|
|
||||||
check(val, 0, 1LL << 32);
|
|
||||||
*(ul32 *)loc = val;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: "
|
|
||||||
<< rel;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::scan_relocations(Context<E> &ctx) {
|
|
||||||
assert(shdr().sh_flags & SHF_ALLOC);
|
|
||||||
|
|
||||||
this->reldyn_offset = file.num_dynrel * sizeof(ElfRel<E>);
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
// Scan relocations
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
u8 *loc = (u8 *)(contents.data() + rel.r_offset);
|
|
||||||
|
|
||||||
if (sym.is_ifunc())
|
|
||||||
sym.flags |= NEEDS_GOT | NEEDS_PLT;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_AARCH64_ABS64:
|
|
||||||
scan_dyn_absrel(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_AARCH64_ADR_GOT_PAGE:
|
|
||||||
// An ADR_GOT_PAGE and GOT_LO12_NC relocation pair is used to load a
|
|
||||||
// symbol's address from GOT. If the GOT value is a link-time
|
|
||||||
// constant, we may be able to rewrite the ADRP+LDR instruction pair
|
|
||||||
// with an ADRP+ADD, eliminating a GOT memory load.
|
|
||||||
if (ctx.arg.relax && sym.is_relative() && !sym.is_imported &&
|
|
||||||
!sym.is_ifunc() && i + 1 < rels.size()) {
|
|
||||||
// ADRP+LDR must be consecutive and use the same register to relax.
|
|
||||||
const ElfRel<E> &rel2 = rels[i + 1];
|
|
||||||
if (rel2.r_type == R_AARCH64_LD64_GOT_LO12_NC &&
|
|
||||||
rel2.r_offset == rel.r_offset + 4 &&
|
|
||||||
rel2.r_sym == rel.r_sym &&
|
|
||||||
rel.r_addend == 0 &&
|
|
||||||
rel2.r_addend == 0 &&
|
|
||||||
is_adrp(loc) &&
|
|
||||||
is_ldr(loc + 4)) {
|
|
||||||
u32 rd = bits(*(ul32 *)loc, 4, 0);
|
|
||||||
u32 rn = bits(*(ul32 *)(loc + 4), 9, 5);
|
|
||||||
u32 rt = bits(*(ul32 *)(loc + 4), 4, 0);
|
|
||||||
if (rd == rn && rn == rt) {
|
|
||||||
i++;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
sym.flags |= NEEDS_GOT;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_LD64_GOT_LO12_NC:
|
|
||||||
case R_AARCH64_LD64_GOTPAGE_LO15:
|
|
||||||
sym.flags |= NEEDS_GOT;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_CALL26:
|
|
||||||
case R_AARCH64_JUMP26:
|
|
||||||
case R_AARCH64_PLT32:
|
|
||||||
if (sym.is_imported)
|
|
||||||
sym.flags |= NEEDS_PLT;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
|
|
||||||
case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
|
|
||||||
sym.flags |= NEEDS_GOTTP;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_ADR_PREL_PG_HI21:
|
|
||||||
scan_pcrel(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_AARCH64_TLSGD_ADR_PAGE21:
|
|
||||||
sym.flags |= NEEDS_TLSGD;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_TLSDESC_ADR_PAGE21:
|
|
||||||
case R_AARCH64_TLSDESC_LD64_LO12:
|
|
||||||
case R_AARCH64_TLSDESC_ADD_LO12:
|
|
||||||
if (!relax_tlsdesc(ctx, sym))
|
|
||||||
sym.flags |= NEEDS_TLSDESC;
|
|
||||||
break;
|
|
||||||
case R_AARCH64_TLSLE_MOVW_TPREL_G0:
|
|
||||||
case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC:
|
|
||||||
case R_AARCH64_TLSLE_MOVW_TPREL_G1:
|
|
||||||
case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC:
|
|
||||||
case R_AARCH64_TLSLE_MOVW_TPREL_G2:
|
|
||||||
case R_AARCH64_TLSLE_ADD_TPREL_HI12:
|
|
||||||
case R_AARCH64_TLSLE_ADD_TPREL_LO12:
|
|
||||||
case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
|
|
||||||
check_tlsle(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_AARCH64_ADD_ABS_LO12_NC:
|
|
||||||
case R_AARCH64_ADR_PREL_LO21:
|
|
||||||
case R_AARCH64_CONDBR19:
|
|
||||||
case R_AARCH64_LD_PREL_LO19:
|
|
||||||
case R_AARCH64_LDST16_ABS_LO12_NC:
|
|
||||||
case R_AARCH64_LDST32_ABS_LO12_NC:
|
|
||||||
case R_AARCH64_LDST64_ABS_LO12_NC:
|
|
||||||
case R_AARCH64_LDST128_ABS_LO12_NC:
|
|
||||||
case R_AARCH64_LDST8_ABS_LO12_NC:
|
|
||||||
case R_AARCH64_MOVW_UABS_G0:
|
|
||||||
case R_AARCH64_MOVW_UABS_G0_NC:
|
|
||||||
case R_AARCH64_MOVW_UABS_G1:
|
|
||||||
case R_AARCH64_MOVW_UABS_G1_NC:
|
|
||||||
case R_AARCH64_MOVW_UABS_G2:
|
|
||||||
case R_AARCH64_MOVW_UABS_G2_NC:
|
|
||||||
case R_AARCH64_MOVW_UABS_G3:
|
|
||||||
case R_AARCH64_PREL16:
|
|
||||||
case R_AARCH64_PREL32:
|
|
||||||
case R_AARCH64_PREL64:
|
|
||||||
case R_AARCH64_TLSGD_ADD_LO12_NC:
|
|
||||||
case R_AARCH64_TLSDESC_CALL:
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Error(ctx) << *this << ": unknown relocation: " << rel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void RangeExtensionThunk<E>::copy_buf(Context<E> &ctx) {
|
|
||||||
u8 *buf = ctx.buf + output_section.shdr.sh_offset + offset;
|
|
||||||
|
|
||||||
static const ul32 data[] = {
|
|
||||||
0x9000'0010, // adrp x16, 0 # R_AARCH64_ADR_PREL_PG_HI21
|
|
||||||
0x9100'0210, // add x16, x16 # R_AARCH64_ADD_ABS_LO12_NC
|
|
||||||
0xd61f'0200, // br x16
|
|
||||||
};
|
|
||||||
|
|
||||||
static_assert(E::thunk_size == sizeof(data));
|
|
||||||
|
|
||||||
for (i64 i = 0; i < symbols.size(); i++) {
|
|
||||||
u64 S = symbols[i]->get_addr(ctx);
|
|
||||||
u64 P = output_section.shdr.sh_addr + offset + i * E::thunk_size;
|
|
||||||
|
|
||||||
u8 *loc = buf + i * E::thunk_size;
|
|
||||||
memcpy(loc , data, sizeof(data));
|
|
||||||
write_adrp(loc, page(S) - page(P));
|
|
||||||
*(ul32 *)(loc + 4) |= bits(S, 11, 0) << 10;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace mold::elf
|
|
565
third_party/mold/elf/arch-i386.cc
vendored
565
third_party/mold/elf/arch-i386.cc
vendored
|
@ -1,565 +0,0 @@
|
||||||
// clang-format off
|
|
||||||
// i386 is similar to x86-64 but lacks PC-relative memory access
|
|
||||||
// instructions. So it's not straightforward to support position-
|
|
||||||
// independent code (PIC) on that target.
|
|
||||||
//
|
|
||||||
// If an object file is compiled with -fPIC, a function that needs to load
|
|
||||||
// a value from memory first obtains its own address with the following
|
|
||||||
// code
|
|
||||||
//
|
|
||||||
// call __x86.get_pc_thunk.bx
|
|
||||||
//
|
|
||||||
// where __x86.get_pc_thunk.bx is defined as
|
|
||||||
//
|
|
||||||
// __x86.get_pc_thunk.bx:
|
|
||||||
// mov (%esp), %ebx # move the return address to %ebx
|
|
||||||
// ret
|
|
||||||
//
|
|
||||||
// . With the function's own address (or, more precisely, the address
|
|
||||||
// immediately after the call instruction), the function can compute an
|
|
||||||
// absolute address of a variable with its address + link-time constant.
|
|
||||||
//
|
|
||||||
// Executing call-mov-ret isn't very cheap, and allocating one register to
|
|
||||||
// store PC isn't cheap too, especially given that i386 has only 8
|
|
||||||
// general-purpose registers. But that's the cost of PIC on i386. You need
|
|
||||||
// to pay it when creating a .so and a position-independent executable.
|
|
||||||
//
|
|
||||||
// When a position-independent function calls another function, it sets
|
|
||||||
// %ebx to the address of .got. Position-independent PLT entries use that
|
|
||||||
// register to load values from .got.plt/.got.
|
|
||||||
//
|
|
||||||
// If we are creating a position-dependent executable (PDE), we can't
|
|
||||||
// assume that %ebx is set to .got. For PDE, we need to create position-
|
|
||||||
// dependent PLT entries which don't use %ebx.
|
|
||||||
//
|
|
||||||
// https://github.com/rui314/psabi/blob/main/i386.pdf
|
|
||||||
|
|
||||||
#include "third_party/mold/elf/mold.h"
|
|
||||||
|
|
||||||
namespace mold::elf {
|
|
||||||
|
|
||||||
using E = I386;
|
|
||||||
|
|
||||||
template <>
|
|
||||||
i64 get_addend(u8 *loc, const ElfRel<E> &rel) {
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_386_8:
|
|
||||||
case R_386_PC8:
|
|
||||||
return *loc;
|
|
||||||
case R_386_16:
|
|
||||||
case R_386_PC16:
|
|
||||||
return *(ul16 *)loc;
|
|
||||||
case R_386_32:
|
|
||||||
case R_386_PC32:
|
|
||||||
case R_386_GOT32:
|
|
||||||
case R_386_GOT32X:
|
|
||||||
case R_386_PLT32:
|
|
||||||
case R_386_GOTOFF:
|
|
||||||
case R_386_GOTPC:
|
|
||||||
case R_386_TLS_LDM:
|
|
||||||
case R_386_TLS_GOTIE:
|
|
||||||
case R_386_TLS_LE:
|
|
||||||
case R_386_TLS_IE:
|
|
||||||
case R_386_TLS_GD:
|
|
||||||
case R_386_TLS_LDO_32:
|
|
||||||
case R_386_SIZE32:
|
|
||||||
case R_386_TLS_GOTDESC:
|
|
||||||
return *(ul32 *)loc;
|
|
||||||
default:
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void write_addend(u8 *loc, i64 val, const ElfRel<E> &rel) {
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_386_NONE:
|
|
||||||
break;
|
|
||||||
case R_386_8:
|
|
||||||
case R_386_PC8:
|
|
||||||
*loc = val;
|
|
||||||
break;
|
|
||||||
case R_386_16:
|
|
||||||
case R_386_PC16:
|
|
||||||
*(ul16 *)loc = val;
|
|
||||||
break;
|
|
||||||
case R_386_32:
|
|
||||||
case R_386_PC32:
|
|
||||||
case R_386_GOT32:
|
|
||||||
case R_386_GOT32X:
|
|
||||||
case R_386_PLT32:
|
|
||||||
case R_386_GOTOFF:
|
|
||||||
case R_386_GOTPC:
|
|
||||||
case R_386_TLS_LDM:
|
|
||||||
case R_386_TLS_GOTIE:
|
|
||||||
case R_386_TLS_LE:
|
|
||||||
case R_386_TLS_IE:
|
|
||||||
case R_386_TLS_GD:
|
|
||||||
case R_386_TLS_LDO_32:
|
|
||||||
case R_386_SIZE32:
|
|
||||||
case R_386_TLS_GOTDESC:
|
|
||||||
*(ul32 *)loc = val;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
unreachable();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void write_plt_header(Context<E> &ctx, u8 *buf) {
|
|
||||||
if (ctx.arg.pic) {
|
|
||||||
static const u8 insn[] = {
|
|
||||||
0xf3, 0x0f, 0x1e, 0xfb, // endbr32
|
|
||||||
0x51, // push %ecx
|
|
||||||
0x8d, 0x8b, 0, 0, 0, 0, // lea GOTPLT+4(%ebx), %ecx
|
|
||||||
0xff, 0x31, // push (%ecx)
|
|
||||||
0xff, 0x61, 0x04, // jmp *0x4(%ecx)
|
|
||||||
};
|
|
||||||
memcpy(buf, insn, sizeof(insn));
|
|
||||||
*(ul32 *)(buf + 7) = ctx.gotplt->shdr.sh_addr - ctx.got->shdr.sh_addr + 4;
|
|
||||||
} else {
|
|
||||||
static const u8 insn[] = {
|
|
||||||
0xf3, 0x0f, 0x1e, 0xfb, // endbr32
|
|
||||||
0x51, // push %ecx
|
|
||||||
0xb9, 0, 0, 0, 0, // mov GOTPLT+4, %ecx
|
|
||||||
0xff, 0x31, // push (%ecx)
|
|
||||||
0xff, 0x61, 0x04, // jmp *0x4(%ecx)
|
|
||||||
0xcc, // (padding)
|
|
||||||
};
|
|
||||||
memcpy(buf, insn, sizeof(insn));
|
|
||||||
*(ul32 *)(buf + 6) = ctx.gotplt->shdr.sh_addr + 4;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void write_plt_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
|
|
||||||
if (ctx.arg.pic) {
|
|
||||||
static const u8 insn[] = {
|
|
||||||
0xf3, 0x0f, 0x1e, 0xfb, // endbr32
|
|
||||||
0xb9, 0, 0, 0, 0, // mov $reloc_offset, %ecx
|
|
||||||
0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx)
|
|
||||||
0xcc, // (padding)
|
|
||||||
};
|
|
||||||
memcpy(buf, insn, sizeof(insn));
|
|
||||||
*(ul32 *)(buf + 5) = sym.get_plt_idx(ctx) * sizeof(ElfRel<E>);
|
|
||||||
*(ul32 *)(buf + 11) = sym.get_gotplt_addr(ctx) - ctx.got->shdr.sh_addr;
|
|
||||||
} else {
|
|
||||||
static const u8 insn[] = {
|
|
||||||
0xf3, 0x0f, 0x1e, 0xfb, // endbr32
|
|
||||||
0xb9, 0, 0, 0, 0, // mov $reloc_offset, %ecx
|
|
||||||
0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
|
|
||||||
0xcc, // (padding)
|
|
||||||
};
|
|
||||||
memcpy(buf, insn, sizeof(insn));
|
|
||||||
*(ul32 *)(buf + 5) = sym.get_plt_idx(ctx) * sizeof(ElfRel<E>);
|
|
||||||
*(ul32 *)(buf + 11) = sym.get_gotplt_addr(ctx);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
|
|
||||||
if (ctx.arg.pic) {
|
|
||||||
static const u8 insn[] = {
|
|
||||||
0xf3, 0x0f, 0x1e, 0xfb, // endbr32
|
|
||||||
0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx)
|
|
||||||
0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // (padding)
|
|
||||||
};
|
|
||||||
memcpy(buf, insn, sizeof(insn));
|
|
||||||
*(ul32 *)(buf + 6) = sym.get_got_addr(ctx) - ctx.got->shdr.sh_addr;
|
|
||||||
} else {
|
|
||||||
static const u8 insn[] = {
|
|
||||||
0xf3, 0x0f, 0x1e, 0xfb, // endbr32
|
|
||||||
0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
|
|
||||||
0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // (padding)
|
|
||||||
};
|
|
||||||
memcpy(buf, insn, sizeof(insn));
|
|
||||||
*(ul32 *)(buf + 6) = sym.get_got_addr(ctx);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void EhFrameSection<E>::apply_reloc(Context<E> &ctx, const ElfRel<E> &rel,
|
|
||||||
u64 offset, u64 val) {
|
|
||||||
u8 *loc = ctx.buf + this->shdr.sh_offset + offset;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_NONE:
|
|
||||||
break;
|
|
||||||
case R_386_32:
|
|
||||||
*(ul32 *)loc = val;
|
|
||||||
break;
|
|
||||||
case R_386_PC32:
|
|
||||||
*(ul32 *)loc = val - this->shdr.sh_addr - offset;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static u32 relax_got32x(u8 *loc) {
|
|
||||||
// mov imm(%reg1), %reg2 -> lea imm(%reg1), %reg2
|
|
||||||
if (loc[0] == 0x8b)
|
|
||||||
return 0x8d00 | loc[1];
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Relax GD to LE
|
|
||||||
static void relax_gd_to_le(u8 *loc, ElfRel<E> rel, u64 val) {
|
|
||||||
static const u8 insn[] = {
|
|
||||||
0x65, 0xa1, 0, 0, 0, 0, // mov %gs:0, %eax
|
|
||||||
0x81, 0xc0, 0, 0, 0, 0, // add $tp_offset, %eax
|
|
||||||
};
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_386_PLT32:
|
|
||||||
case R_386_PC32:
|
|
||||||
memcpy(loc - 3, insn, sizeof(insn));
|
|
||||||
*(ul32 *)(loc + 5) = val;
|
|
||||||
break;
|
|
||||||
case R_386_GOT32:
|
|
||||||
case R_386_GOT32X:
|
|
||||||
memcpy(loc - 2, insn, sizeof(insn));
|
|
||||||
*(ul32 *)(loc + 6) = val;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
unreachable();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Relax LD to LE
|
|
||||||
static void relax_ld_to_le(u8 *loc, ElfRel<E> rel, u64 val) {
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_386_PLT32:
|
|
||||||
case R_386_PC32: {
|
|
||||||
static const u8 insn[] = {
|
|
||||||
0x65, 0xa1, 0, 0, 0, 0, // mov %gs:0, %eax
|
|
||||||
0x2d, 0, 0, 0, 0, // sub $tls_size, %eax
|
|
||||||
};
|
|
||||||
memcpy(loc - 2, insn, sizeof(insn));
|
|
||||||
*(ul32 *)(loc + 5) = val;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_386_GOT32:
|
|
||||||
case R_386_GOT32X: {
|
|
||||||
static const u8 insn[] = {
|
|
||||||
0x65, 0xa1, 0, 0, 0, 0, // mov %gs:0, %eax
|
|
||||||
0x2d, 0, 0, 0, 0, // sub $tls_size, %eax
|
|
||||||
0x90, // nop
|
|
||||||
};
|
|
||||||
memcpy(loc - 2, insn, sizeof(insn));
|
|
||||||
*(ul32 *)(loc + 5) = val;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
unreachable();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
ElfRel<E> *dynrel = nullptr;
|
|
||||||
if (ctx.reldyn)
|
|
||||||
dynrel = (ElfRel<E> *)(ctx.buf + ctx.reldyn->shdr.sh_offset +
|
|
||||||
file.reldyn_offset + this->reldyn_offset);
|
|
||||||
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
u8 *loc = base + rel.r_offset;
|
|
||||||
|
|
||||||
auto check = [&](i64 val, i64 lo, i64 hi) {
|
|
||||||
if (val < lo || hi <= val)
|
|
||||||
Error(ctx) << *this << ": relocation " << rel << " against "
|
|
||||||
<< sym << " out of range: " << val << " is not in ["
|
|
||||||
<< lo << ", " << hi << ")";
|
|
||||||
};
|
|
||||||
|
|
||||||
u64 S = sym.get_addr(ctx);
|
|
||||||
u64 A = get_addend(*this, rel);
|
|
||||||
u64 P = get_addr() + rel.r_offset;
|
|
||||||
u64 G = sym.get_got_idx(ctx) * sizeof(Word<E>);
|
|
||||||
u64 GOT = ctx.got->shdr.sh_addr;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_386_8:
|
|
||||||
check(S + A, 0, 1 << 8);
|
|
||||||
*loc = S + A;
|
|
||||||
break;
|
|
||||||
case R_386_16:
|
|
||||||
check(S + A, 0, 1 << 16);
|
|
||||||
*(ul16 *)loc = S + A;
|
|
||||||
break;
|
|
||||||
case R_386_32:
|
|
||||||
apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel);
|
|
||||||
break;
|
|
||||||
case R_386_PC8:
|
|
||||||
check(S + A - P, -(1 << 7), 1 << 7);
|
|
||||||
*loc = S + A - P;
|
|
||||||
break;
|
|
||||||
case R_386_PC16:
|
|
||||||
check(S + A - P, -(1 << 15), 1 << 15);
|
|
||||||
*(ul16 *)loc = S + A - P;
|
|
||||||
break;
|
|
||||||
case R_386_PC32:
|
|
||||||
case R_386_PLT32:
|
|
||||||
*(ul32 *)loc = S + A - P;
|
|
||||||
break;
|
|
||||||
case R_386_GOT32:
|
|
||||||
*(ul32 *)loc = G + A;
|
|
||||||
break;
|
|
||||||
case R_386_GOT32X:
|
|
||||||
if (sym.has_got(ctx)) {
|
|
||||||
*(ul32 *)loc = G + A;
|
|
||||||
} else {
|
|
||||||
u32 insn = relax_got32x(loc - 2);
|
|
||||||
assert(insn);
|
|
||||||
loc[-2] = insn >> 8;
|
|
||||||
loc[-1] = insn;
|
|
||||||
*(ul32 *)loc = S + A - GOT;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case R_386_GOTOFF:
|
|
||||||
*(ul32 *)loc = S + A - GOT;
|
|
||||||
break;
|
|
||||||
case R_386_GOTPC:
|
|
||||||
*(ul32 *)loc = GOT + A - P;
|
|
||||||
break;
|
|
||||||
case R_386_TLS_GOTIE:
|
|
||||||
*(ul32 *)loc = sym.get_gottp_addr(ctx) + A - GOT;
|
|
||||||
break;
|
|
||||||
case R_386_TLS_LE:
|
|
||||||
*(ul32 *)loc = S + A - ctx.tp_addr;
|
|
||||||
break;
|
|
||||||
case R_386_TLS_IE:
|
|
||||||
*(ul32 *)loc = sym.get_gottp_addr(ctx) + A;
|
|
||||||
break;
|
|
||||||
case R_386_TLS_GD:
|
|
||||||
if (sym.has_tlsgd(ctx)) {
|
|
||||||
*(ul32 *)loc = sym.get_tlsgd_addr(ctx) + A - GOT;
|
|
||||||
} else {
|
|
||||||
relax_gd_to_le(loc, rels[i + 1], S - ctx.tp_addr);
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case R_386_TLS_LDM:
|
|
||||||
if (ctx.got->has_tlsld(ctx)) {
|
|
||||||
*(ul32 *)loc = ctx.got->get_tlsld_addr(ctx) + A - GOT;
|
|
||||||
} else {
|
|
||||||
relax_ld_to_le(loc, rels[i + 1], ctx.tp_addr - ctx.tls_begin);
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case R_386_TLS_LDO_32:
|
|
||||||
*(ul32 *)loc = S + A - ctx.dtp_addr;
|
|
||||||
break;
|
|
||||||
case R_386_SIZE32:
|
|
||||||
*(ul32 *)loc = sym.esym().st_size + A;
|
|
||||||
break;
|
|
||||||
case R_386_TLS_GOTDESC:
|
|
||||||
if (sym.has_tlsdesc(ctx)) {
|
|
||||||
*(ul32 *)loc = sym.get_tlsdesc_addr(ctx) + A - GOT;
|
|
||||||
} else {
|
|
||||||
static const u8 insn[] = {
|
|
||||||
0x8d, 0x05, 0, 0, 0, 0, // lea 0, %eax
|
|
||||||
};
|
|
||||||
memcpy(loc - 2, insn, sizeof(insn));
|
|
||||||
*(ul32 *)loc = S + A - ctx.tp_addr;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case R_386_TLS_DESC_CALL:
|
|
||||||
if (!sym.has_tlsdesc(ctx)) {
|
|
||||||
// call *(%eax) -> nop
|
|
||||||
loc[0] = 0x66;
|
|
||||||
loc[1] = 0x90;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
unreachable();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::apply_reloc_nonalloc(Context<E> &ctx, u8 *base) {
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
u8 *loc = base + rel.r_offset;
|
|
||||||
|
|
||||||
auto check = [&](i64 val, i64 lo, i64 hi) {
|
|
||||||
if (val < lo || hi <= val)
|
|
||||||
Error(ctx) << *this << ": relocation " << rel << " against "
|
|
||||||
<< sym << " out of range: " << val << " is not in ["
|
|
||||||
<< lo << ", " << hi << ")";
|
|
||||||
};
|
|
||||||
|
|
||||||
SectionFragment<E> *frag;
|
|
||||||
i64 frag_addend;
|
|
||||||
std::tie(frag, frag_addend) = get_fragment(ctx, rel);
|
|
||||||
|
|
||||||
u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx);
|
|
||||||
u64 A = frag ? frag_addend : get_addend(*this, rel);
|
|
||||||
u64 GOT = ctx.got->shdr.sh_addr;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_386_8:
|
|
||||||
check(S + A, 0, 1 << 8);
|
|
||||||
*loc = S + A;
|
|
||||||
break;
|
|
||||||
case R_386_16:
|
|
||||||
check(S + A, 0, 1 << 16);
|
|
||||||
*(ul16 *)loc = S + A;
|
|
||||||
break;
|
|
||||||
case R_386_32:
|
|
||||||
if (std::optional<u64> val = get_tombstone(sym, frag))
|
|
||||||
*(ul32 *)loc = *val;
|
|
||||||
else
|
|
||||||
*(ul32 *)loc = S + A;
|
|
||||||
break;
|
|
||||||
case R_386_PC8:
|
|
||||||
check(S + A, -(1 << 7), 1 << 7);
|
|
||||||
*loc = S + A;
|
|
||||||
break;
|
|
||||||
case R_386_PC16:
|
|
||||||
check(S + A, -(1 << 15), 1 << 15);
|
|
||||||
*(ul16 *)loc = S + A;
|
|
||||||
break;
|
|
||||||
case R_386_PC32:
|
|
||||||
*(ul32 *)loc = S + A;
|
|
||||||
break;
|
|
||||||
case R_386_GOTPC:
|
|
||||||
*(ul32 *)loc = GOT + A;
|
|
||||||
break;
|
|
||||||
case R_386_GOTOFF:
|
|
||||||
*(ul32 *)loc = S + A - GOT;
|
|
||||||
break;
|
|
||||||
case R_386_TLS_LDO_32:
|
|
||||||
if (std::optional<u64> val = get_tombstone(sym, frag))
|
|
||||||
*(ul32 *)loc = *val;
|
|
||||||
else
|
|
||||||
*(ul32 *)loc = S + A - ctx.dtp_addr;
|
|
||||||
break;
|
|
||||||
case R_386_SIZE32:
|
|
||||||
*(ul32 *)loc = sym.esym().st_size + A;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
unreachable();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::scan_relocations(Context<E> &ctx) {
|
|
||||||
assert(shdr().sh_flags & SHF_ALLOC);
|
|
||||||
|
|
||||||
this->reldyn_offset = file.num_dynrel * sizeof(ElfRel<E>);
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
// Scan relocations
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
u8 *loc = (u8 *)(contents.data() + rel.r_offset);
|
|
||||||
|
|
||||||
if (sym.is_ifunc())
|
|
||||||
sym.flags |= NEEDS_GOT | NEEDS_PLT;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_386_8:
|
|
||||||
case R_386_16:
|
|
||||||
scan_absrel(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_386_32:
|
|
||||||
scan_dyn_absrel(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_386_PC8:
|
|
||||||
case R_386_PC16:
|
|
||||||
case R_386_PC32:
|
|
||||||
scan_pcrel(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_386_GOT32:
|
|
||||||
case R_386_GOTPC:
|
|
||||||
sym.flags |= NEEDS_GOT;
|
|
||||||
break;
|
|
||||||
case R_386_GOT32X: {
|
|
||||||
// We always want to relax GOT32X because static PIE doesn't
|
|
||||||
// work without it.
|
|
||||||
bool do_relax = !sym.is_imported && sym.is_relative() &&
|
|
||||||
relax_got32x(loc - 2);
|
|
||||||
if (!do_relax)
|
|
||||||
sym.flags |= NEEDS_GOT;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_386_PLT32:
|
|
||||||
if (sym.is_imported)
|
|
||||||
sym.flags |= NEEDS_PLT;
|
|
||||||
break;
|
|
||||||
case R_386_TLS_GOTIE:
|
|
||||||
case R_386_TLS_IE:
|
|
||||||
sym.flags |= NEEDS_GOTTP;
|
|
||||||
break;
|
|
||||||
case R_386_TLS_GD:
|
|
||||||
if (i + 1 == rels.size())
|
|
||||||
Fatal(ctx) << *this << ": TLS_GD reloc must be followed by PLT or GOT32";
|
|
||||||
|
|
||||||
if (u32 ty = rels[i + 1].r_type;
|
|
||||||
ty != R_386_PLT32 && ty != R_386_PC32 &&
|
|
||||||
ty != R_386_GOT32 && ty != R_386_GOT32X)
|
|
||||||
Fatal(ctx) << *this << ": TLS_GD reloc must be followed by PLT or GOT32";
|
|
||||||
|
|
||||||
// We always relax if -static because libc.a doesn't contain
|
|
||||||
// __tls_get_addr().
|
|
||||||
if (ctx.arg.is_static ||
|
|
||||||
(ctx.arg.relax && !ctx.arg.shared && !sym.is_imported))
|
|
||||||
i++;
|
|
||||||
else
|
|
||||||
sym.flags |= NEEDS_TLSGD;
|
|
||||||
break;
|
|
||||||
case R_386_TLS_LDM:
|
|
||||||
if (i + 1 == rels.size())
|
|
||||||
Fatal(ctx) << *this << ": TLS_LDM reloc must be followed by PLT or GOT32";
|
|
||||||
|
|
||||||
if (u32 ty = rels[i + 1].r_type;
|
|
||||||
ty != R_386_PLT32 && ty != R_386_PC32 &&
|
|
||||||
ty != R_386_GOT32 && ty != R_386_GOT32X)
|
|
||||||
Fatal(ctx) << *this << ": TLS_LDM reloc must be followed by PLT or GOT32";
|
|
||||||
|
|
||||||
// We always relax if -static because libc.a doesn't contain
|
|
||||||
// __tls_get_addr().
|
|
||||||
if (ctx.arg.is_static || (ctx.arg.relax && !ctx.arg.shared))
|
|
||||||
i++;
|
|
||||||
else
|
|
||||||
ctx.needs_tlsld = true;
|
|
||||||
break;
|
|
||||||
case R_386_TLS_GOTDESC:
|
|
||||||
if (!relax_tlsdesc(ctx, sym))
|
|
||||||
sym.flags |= NEEDS_TLSDESC;
|
|
||||||
break;
|
|
||||||
case R_386_TLS_LE:
|
|
||||||
check_tlsle(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_386_GOTOFF:
|
|
||||||
case R_386_TLS_LDO_32:
|
|
||||||
case R_386_SIZE32:
|
|
||||||
case R_386_TLS_DESC_CALL:
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Error(ctx) << *this << ": unknown relocation: " << rel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace mold::elf
|
|
326
third_party/mold/elf/arch-m68k.cc
vendored
326
third_party/mold/elf/arch-m68k.cc
vendored
|
@ -1,326 +0,0 @@
|
||||||
// clang-format off
|
|
||||||
// This file contains code for the Motorola 68000 series microprocessors,
|
|
||||||
// which is often abbreviated as m68k. Running a Unix-like system on a
|
|
||||||
// m68k-based machine today is probably a retro-computing hobby activity,
|
|
||||||
// but the processor was a popular choice to build Unix computers during
|
|
||||||
// '80s. Early Sun workstations for example used m68k. Macintosh until
|
|
||||||
// 1994 were based on m68k as well until they switched to PowerPC (and
|
|
||||||
// then to x86 and to ARM.)
|
|
||||||
//
|
|
||||||
// From the linker's point of view, it is not hard to support m68k. It's
|
|
||||||
// just a 32-bit big-endian CISC ISA. Compared to comtemporary i386,
|
|
||||||
// m68k's psABI is actually simpler because m68k has PC-relative memory
|
|
||||||
// access instructions and therefore can support position-independent
|
|
||||||
// code without too much hassle.
|
|
||||||
//
|
|
||||||
// https://github.com/rui314/psabi/blob/main/m68k.pdf
|
|
||||||
|
|
||||||
#include "third_party/mold/elf/mold.h"
|
|
||||||
|
|
||||||
namespace mold::elf {
|
|
||||||
|
|
||||||
using E = M68K;
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void write_plt_header(Context<E> &ctx, u8 *buf) {
|
|
||||||
static const u8 insn[] = {
|
|
||||||
0x2f, 0x00, // move.l %d0, -(%sp)
|
|
||||||
0x2f, 0x3b, 0x01, 0x70, 0, 0, 0, 0, // move.l (GOTPLT+4, %pc), -(%sp)
|
|
||||||
0x4e, 0xfb, 0x01, 0x71, 0, 0, 0, 0, // jmp ([GOTPLT+8, %pc])
|
|
||||||
};
|
|
||||||
|
|
||||||
memcpy(buf, insn, sizeof(insn));
|
|
||||||
*(ub32 *)(buf + 6) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr;
|
|
||||||
*(ub32 *)(buf + 14) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr - 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void write_plt_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
|
|
||||||
static const u8 insn[] = {
|
|
||||||
0x20, 0x3c, 0, 0, 0, 0, // move.l PLT_OFFSET, %d0
|
|
||||||
0x4e, 0xfb, 0x01, 0x71, 0, 0, 0, 0, // jmp ([GOTPLT_ENTRY, %pc])
|
|
||||||
};
|
|
||||||
|
|
||||||
memcpy(buf, insn, sizeof(insn));
|
|
||||||
*(ub32 *)(buf + 2) = sym.get_plt_idx(ctx) * sizeof(ElfRel<E>);
|
|
||||||
*(ub32 *)(buf + 10) = sym.get_gotplt_addr(ctx) - sym.get_plt_addr(ctx) - 8;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
|
|
||||||
static const u8 insn[] = {
|
|
||||||
0x4e, 0xfb, 0x01, 0x71, 0, 0, 0, 0, // jmp ([GOT_ENTRY, %pc])
|
|
||||||
};
|
|
||||||
|
|
||||||
memcpy(buf, insn, sizeof(insn));
|
|
||||||
*(ub32 *)(buf + 4) = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void EhFrameSection<E>::apply_reloc(Context<E> &ctx, const ElfRel<E> &rel,
|
|
||||||
u64 offset, u64 val) {
|
|
||||||
u8 *loc = ctx.buf + this->shdr.sh_offset + offset;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_NONE:
|
|
||||||
break;
|
|
||||||
case R_68K_32:
|
|
||||||
*(ub32 *)loc = val;
|
|
||||||
break;
|
|
||||||
case R_68K_PC32:
|
|
||||||
*(ub32 *)loc = val - this->shdr.sh_addr - offset;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
ElfRel<E> *dynrel = nullptr;
|
|
||||||
if (ctx.reldyn)
|
|
||||||
dynrel = (ElfRel<E> *)(ctx.buf + ctx.reldyn->shdr.sh_offset +
|
|
||||||
file.reldyn_offset + this->reldyn_offset);
|
|
||||||
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
u8 *loc = base + rel.r_offset;
|
|
||||||
|
|
||||||
auto check = [&](i64 val, i64 lo, i64 hi) {
|
|
||||||
if (val < lo || hi <= val)
|
|
||||||
Error(ctx) << *this << ": relocation " << rel << " against "
|
|
||||||
<< sym << " out of range: " << val << " is not in ["
|
|
||||||
<< lo << ", " << hi << ")";
|
|
||||||
};
|
|
||||||
|
|
||||||
auto write16 = [&](u64 val) {
|
|
||||||
check(val, 0, 1 << 16);
|
|
||||||
*(ub16 *)loc = val;
|
|
||||||
};
|
|
||||||
|
|
||||||
auto write16s = [&](u64 val) {
|
|
||||||
check(val, -(1 << 15), 1 << 15);
|
|
||||||
*(ub16 *)loc = val;
|
|
||||||
};
|
|
||||||
|
|
||||||
auto write8 = [&](u64 val) {
|
|
||||||
check(val, 0, 1 << 8);
|
|
||||||
*loc = val;
|
|
||||||
};
|
|
||||||
|
|
||||||
auto write8s = [&](u64 val) {
|
|
||||||
check(val, -(1 << 7), 1 << 7);
|
|
||||||
*loc = val;
|
|
||||||
};
|
|
||||||
|
|
||||||
u64 S = sym.get_addr(ctx);
|
|
||||||
u64 A = rel.r_addend;
|
|
||||||
u64 P = get_addr() + rel.r_offset;
|
|
||||||
u64 G = sym.get_got_idx(ctx) * sizeof(Word<E>);
|
|
||||||
u64 GOT = ctx.got->shdr.sh_addr;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_68K_32:
|
|
||||||
apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel);
|
|
||||||
break;
|
|
||||||
case R_68K_16:
|
|
||||||
write16(S + A);
|
|
||||||
break;
|
|
||||||
case R_68K_8:
|
|
||||||
write8(S + A);
|
|
||||||
break;
|
|
||||||
case R_68K_PC32:
|
|
||||||
case R_68K_PLT32:
|
|
||||||
*(ub32 *)loc = S + A - P;
|
|
||||||
break;
|
|
||||||
case R_68K_PC16:
|
|
||||||
case R_68K_PLT16:
|
|
||||||
write16s(S + A - P);
|
|
||||||
break;
|
|
||||||
case R_68K_PC8:
|
|
||||||
case R_68K_PLT8:
|
|
||||||
write8s(S + A - P);
|
|
||||||
break;
|
|
||||||
case R_68K_GOTPCREL32:
|
|
||||||
*(ub32 *)loc = GOT + A - P;
|
|
||||||
break;
|
|
||||||
case R_68K_GOTPCREL16:
|
|
||||||
write16s(GOT + A - P);
|
|
||||||
break;
|
|
||||||
case R_68K_GOTPCREL8:
|
|
||||||
write8s(GOT + A - P);
|
|
||||||
break;
|
|
||||||
case R_68K_GOTOFF32:
|
|
||||||
*(ub32 *)loc = G + A;
|
|
||||||
break;
|
|
||||||
case R_68K_GOTOFF16:
|
|
||||||
write16(G + A);
|
|
||||||
break;
|
|
||||||
case R_68K_GOTOFF8:
|
|
||||||
write8(G + A);
|
|
||||||
break;
|
|
||||||
case R_68K_TLS_GD32:
|
|
||||||
*(ub32 *)loc = sym.get_tlsgd_addr(ctx) + A - GOT;
|
|
||||||
break;
|
|
||||||
case R_68K_TLS_GD16:
|
|
||||||
write16(sym.get_tlsgd_addr(ctx) + A - GOT);
|
|
||||||
break;
|
|
||||||
case R_68K_TLS_GD8:
|
|
||||||
write8(sym.get_tlsgd_addr(ctx) + A - GOT);
|
|
||||||
break;
|
|
||||||
case R_68K_TLS_LDM32:
|
|
||||||
*(ub32 *)loc = ctx.got->get_tlsld_addr(ctx) + A - GOT;
|
|
||||||
break;
|
|
||||||
case R_68K_TLS_LDM16:
|
|
||||||
write16(ctx.got->get_tlsld_addr(ctx) + A - GOT);
|
|
||||||
break;
|
|
||||||
case R_68K_TLS_LDM8:
|
|
||||||
write8(ctx.got->get_tlsld_addr(ctx) + A - GOT);
|
|
||||||
break;
|
|
||||||
case R_68K_TLS_LDO32:
|
|
||||||
*(ub32 *)loc = S + A - ctx.dtp_addr;
|
|
||||||
break;
|
|
||||||
case R_68K_TLS_LDO16:
|
|
||||||
write16s(S + A - ctx.dtp_addr);
|
|
||||||
break;
|
|
||||||
case R_68K_TLS_LDO8:
|
|
||||||
write8s(S + A - ctx.dtp_addr);
|
|
||||||
break;
|
|
||||||
case R_68K_TLS_IE32:
|
|
||||||
*(ub32 *)loc = sym.get_gottp_addr(ctx) + A - GOT;
|
|
||||||
break;
|
|
||||||
case R_68K_TLS_IE16:
|
|
||||||
write16(sym.get_gottp_addr(ctx) + A - GOT);
|
|
||||||
break;
|
|
||||||
case R_68K_TLS_IE8:
|
|
||||||
write8(sym.get_gottp_addr(ctx) + A - GOT);
|
|
||||||
break;
|
|
||||||
case R_68K_TLS_LE32:
|
|
||||||
*(ub32 *)loc = S + A - ctx.tp_addr;
|
|
||||||
break;
|
|
||||||
case R_68K_TLS_LE16:
|
|
||||||
write16(S + A - ctx.tp_addr);
|
|
||||||
break;
|
|
||||||
case R_68K_TLS_LE8:
|
|
||||||
write8(S + A - ctx.tp_addr);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
unreachable();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::apply_reloc_nonalloc(Context<E> &ctx, u8 *base) {
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
u8 *loc = base + rel.r_offset;
|
|
||||||
|
|
||||||
SectionFragment<E> *frag;
|
|
||||||
i64 frag_addend;
|
|
||||||
std::tie(frag, frag_addend) = get_fragment(ctx, rel);
|
|
||||||
|
|
||||||
u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx);
|
|
||||||
u64 A = frag ? frag_addend : (i64)rel.r_addend;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_68K_32:
|
|
||||||
if (std::optional<u64> val = get_tombstone(sym, frag))
|
|
||||||
*(ub32 *)loc = *val;
|
|
||||||
else
|
|
||||||
*(ub32 *)loc = S + A;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: "
|
|
||||||
<< rel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::scan_relocations(Context<E> &ctx) {
|
|
||||||
assert(shdr().sh_flags & SHF_ALLOC);
|
|
||||||
|
|
||||||
this->reldyn_offset = file.num_dynrel * sizeof(ElfRel<E>);
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
|
|
||||||
if (sym.is_ifunc())
|
|
||||||
Error(ctx) << sym << ": GNU ifunc symbol is not supported on m68k";
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_68K_32:
|
|
||||||
scan_dyn_absrel(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_68K_16:
|
|
||||||
case R_68K_8:
|
|
||||||
scan_absrel(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_68K_PC32:
|
|
||||||
case R_68K_PC16:
|
|
||||||
case R_68K_PC8:
|
|
||||||
scan_pcrel(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_68K_GOTPCREL32:
|
|
||||||
case R_68K_GOTPCREL16:
|
|
||||||
case R_68K_GOTPCREL8:
|
|
||||||
case R_68K_GOTOFF32:
|
|
||||||
case R_68K_GOTOFF16:
|
|
||||||
case R_68K_GOTOFF8:
|
|
||||||
sym.flags |= NEEDS_GOT;
|
|
||||||
break;
|
|
||||||
case R_68K_PLT32:
|
|
||||||
case R_68K_PLT16:
|
|
||||||
case R_68K_PLT8:
|
|
||||||
if (sym.is_imported)
|
|
||||||
sym.flags |= NEEDS_PLT;
|
|
||||||
break;
|
|
||||||
case R_68K_TLS_GD32:
|
|
||||||
case R_68K_TLS_GD16:
|
|
||||||
case R_68K_TLS_GD8:
|
|
||||||
sym.flags |= NEEDS_TLSGD;
|
|
||||||
break;
|
|
||||||
case R_68K_TLS_LDM32:
|
|
||||||
case R_68K_TLS_LDM16:
|
|
||||||
case R_68K_TLS_LDM8:
|
|
||||||
ctx.needs_tlsld = true;
|
|
||||||
break;
|
|
||||||
case R_68K_TLS_IE32:
|
|
||||||
case R_68K_TLS_IE16:
|
|
||||||
case R_68K_TLS_IE8:
|
|
||||||
sym.flags |= NEEDS_GOTTP;
|
|
||||||
break;
|
|
||||||
case R_68K_TLS_LE32:
|
|
||||||
case R_68K_TLS_LE16:
|
|
||||||
case R_68K_TLS_LE8:
|
|
||||||
check_tlsle(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_68K_TLS_LDO32:
|
|
||||||
case R_68K_TLS_LDO16:
|
|
||||||
case R_68K_TLS_LDO8:
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Error(ctx) << *this << ": unknown relocation: " << rel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace mold::elf
|
|
452
third_party/mold/elf/arch-ppc32.cc
vendored
452
third_party/mold/elf/arch-ppc32.cc
vendored
|
@ -1,452 +0,0 @@
|
||||||
// clang-format off
|
|
||||||
// This file implements the PowerPC 32-bit ISA. For 64-bit PowerPC, see
|
|
||||||
// arch-ppc64v1.cpp and arch-ppc64v2.cpp.
|
|
||||||
//
|
|
||||||
// PPC32 is a RISC ISA. It has 32 general-purpose registers (GPRs).
|
|
||||||
// r0, r11 and r12 are reserved for static linkers, so we can use these
|
|
||||||
// registers in PLTs and range extension thunks. In addition to that, it
|
|
||||||
// has a few special registers. Notable ones are LR which holds a return
|
|
||||||
// address and CTR which we can use to store a branch target address.
|
|
||||||
//
|
|
||||||
// It feels that the PPC32 psABI is unnecessarily complicated at first
|
|
||||||
// glance, but that is mainly stemmed from the fact that the ISA lacks
|
|
||||||
// PC-relative load/store instructions. Since machine instructions cannot
|
|
||||||
// load data relative to its own address, it is not straightforward to
|
|
||||||
// support position-independent code (PIC) on PPC32.
|
|
||||||
//
|
|
||||||
// A position-independent function typically contains the following code
|
|
||||||
// in the prologue to obtain its own address:
|
|
||||||
//
|
|
||||||
// mflr r0 // save the current return address to %r0
|
|
||||||
// bcl 20, 31, 4 // call the next instruction as if it were a function
|
|
||||||
// mtlr r12 // save the return address to %r12
|
|
||||||
// mtlr r0 // restore the original return address
|
|
||||||
//
|
|
||||||
// An object file compiled with -fPIC contains a data section named
|
|
||||||
// `.got2` to store addresses of locally-defined global variables and
|
|
||||||
// constants. A PIC function usually computes its .got2+0x8000 and set it
|
|
||||||
// to %r30. This scheme allows the function to access global objects
|
|
||||||
// defined in the same input file with a single %r30-relative load/store
|
|
||||||
// instruction with a 16-bit offset, given that .got2 is smaller than
|
|
||||||
// 0x10000 (or 65536) bytes.
|
|
||||||
//
|
|
||||||
// Since each object file has its own .got2, %r30 refers to different
|
|
||||||
// places in a merged .got2 for two functions that came from different
|
|
||||||
// input files. Therefore, %r30 makes sense only within a single function.
|
|
||||||
//
|
|
||||||
// Technically, we can reuse a %r30 value in our PLT if we create a PLT
|
|
||||||
// _for each input file_ (that's what GNU ld seems to be doing), but that
|
|
||||||
// doesn't seems to be worth its complexity. Our PLT simply doesn't rely
|
|
||||||
// on a %r30 value.
|
|
||||||
//
|
|
||||||
// https://github.com/rui314/psabi/blob/main/ppc32.pdf
|
|
||||||
|
|
||||||
#include "third_party/mold/elf/mold.h"
|
|
||||||
|
|
||||||
namespace mold::elf {
|
|
||||||
|
|
||||||
using E = PPC32;
|
|
||||||
|
|
||||||
static u64 lo(u64 x) { return x & 0xffff; }
|
|
||||||
static u64 hi(u64 x) { return x >> 16; }
|
|
||||||
static u64 ha(u64 x) { return (x + 0x8000) >> 16; }
|
|
||||||
static u64 high(u64 x) { return (x >> 16) & 0xffff; }
|
|
||||||
static u64 higha(u64 x) { return ((x + 0x8000) >> 16) & 0xffff; }
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void write_plt_header(Context<E> &ctx, u8 *buf) {
|
|
||||||
static const ub32 insn[] = {
|
|
||||||
// Get the address of this PLT section
|
|
||||||
0x7c08'02a6, // mflr r0
|
|
||||||
0x429f'0005, // bcl 20, 31, 4
|
|
||||||
0x7d88'02a6, // 1: mflr r12
|
|
||||||
0x7c08'03a6, // mtlr r0
|
|
||||||
|
|
||||||
// Compute the runtime address of GOTPLT+12
|
|
||||||
0x3d8c'0000, // addis r12, r12, (GOTPLT - 1b)@higha
|
|
||||||
0x398c'0000, // addi r12, r12, (GOTPLT - 1b)@lo
|
|
||||||
|
|
||||||
// Compute the PLT entry offset
|
|
||||||
0x7d6c'5850, // sub r11, r11, r12
|
|
||||||
0x1d6b'0003, // mulli r11, r11, 3
|
|
||||||
|
|
||||||
// Load GOTPLT[2] and branch to GOTPLT[1]
|
|
||||||
0x800c'fff8, // lwz r0, -8(r12)
|
|
||||||
0x7c09'03a6, // mtctr r0
|
|
||||||
0x818c'fffc, // lwz r12, -4(r12)
|
|
||||||
0x4e80'0420, // bctr
|
|
||||||
0x6000'0000, // nop
|
|
||||||
0x6000'0000, // nop
|
|
||||||
0x6000'0000, // nop
|
|
||||||
0x6000'0000, // nop
|
|
||||||
};
|
|
||||||
|
|
||||||
static_assert(sizeof(insn) == E::plt_hdr_size);
|
|
||||||
memcpy(buf, insn, sizeof(insn));
|
|
||||||
|
|
||||||
ub32 *loc = (ub32 *)buf;
|
|
||||||
loc[4] |= higha(ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr + 4);
|
|
||||||
loc[5] |= lo(ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr + 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
static const ub32 plt_entry[] = {
|
|
||||||
// Get the address of this PLT entry
|
|
||||||
0x7c08'02a6, // mflr r0
|
|
||||||
0x429f'0005, // bcl 20, 31, 4
|
|
||||||
0x7d88'02a6, // mflr r12
|
|
||||||
0x7c08'03a6, // mtlr r0
|
|
||||||
|
|
||||||
// Load an address from the GOT/GOTPLT entry and jump to that address
|
|
||||||
0x3d6c'0000, // addis r11, r12, OFFSET@higha
|
|
||||||
0x396b'0000, // addi r11, r11, OFFSET@lo
|
|
||||||
0x818b'0000, // lwz r12, 0(r11)
|
|
||||||
0x7d89'03a6, // mtctr r12
|
|
||||||
0x4e80'0420, // bctr
|
|
||||||
};
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void write_plt_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
|
|
||||||
static_assert(E::plt_size == sizeof(plt_entry));
|
|
||||||
memcpy(buf, plt_entry, sizeof(plt_entry));
|
|
||||||
|
|
||||||
ub32 *loc = (ub32 *)buf;
|
|
||||||
i64 offset = sym.get_gotplt_addr(ctx) - sym.get_plt_addr(ctx) - 8;
|
|
||||||
loc[4] |= higha(offset);
|
|
||||||
loc[5] |= lo(offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
|
|
||||||
static_assert(E::pltgot_size == sizeof(plt_entry));
|
|
||||||
memcpy(buf, plt_entry, sizeof(plt_entry));
|
|
||||||
|
|
||||||
ub32 *loc = (ub32 *)buf;
|
|
||||||
i64 offset = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 8;
|
|
||||||
loc[4] |= higha(offset);
|
|
||||||
loc[5] |= lo(offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void EhFrameSection<E>::apply_reloc(Context<E> &ctx, const ElfRel<E> &rel,
|
|
||||||
u64 offset, u64 val) {
|
|
||||||
u8 *loc = ctx.buf + this->shdr.sh_offset + offset;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_NONE:
|
|
||||||
break;
|
|
||||||
case R_PPC_ADDR32:
|
|
||||||
*(ub32 *)loc = val;
|
|
||||||
break;
|
|
||||||
case R_PPC_REL32:
|
|
||||||
*(ub32 *)loc = val - this->shdr.sh_addr - offset;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
ElfRel<E> *dynrel = nullptr;
|
|
||||||
if (ctx.reldyn)
|
|
||||||
dynrel = (ElfRel<E> *)(ctx.buf + ctx.reldyn->shdr.sh_offset +
|
|
||||||
file.reldyn_offset + this->reldyn_offset);
|
|
||||||
|
|
||||||
u64 GOT2 = file.ppc32_got2 ? file.ppc32_got2->get_addr() : 0;
|
|
||||||
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
u8 *loc = base + rel.r_offset;
|
|
||||||
|
|
||||||
u64 S = sym.get_addr(ctx);
|
|
||||||
u64 A = rel.r_addend;
|
|
||||||
u64 P = get_addr() + rel.r_offset;
|
|
||||||
u64 G = sym.get_got_idx(ctx) * sizeof(Word<E>);
|
|
||||||
u64 GOT = ctx.got->shdr.sh_addr;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_PPC_ADDR32:
|
|
||||||
case R_PPC_UADDR32:
|
|
||||||
apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel);
|
|
||||||
break;
|
|
||||||
case R_PPC_ADDR14:
|
|
||||||
*(ub32 *)loc |= bits(S + A, 15, 2) << 2;
|
|
||||||
break;
|
|
||||||
case R_PPC_ADDR16:
|
|
||||||
case R_PPC_UADDR16:
|
|
||||||
case R_PPC_ADDR16_LO:
|
|
||||||
*(ub16 *)loc = lo(S + A);
|
|
||||||
break;
|
|
||||||
case R_PPC_ADDR16_HI:
|
|
||||||
*(ub16 *)loc = hi(S + A);
|
|
||||||
break;
|
|
||||||
case R_PPC_ADDR16_HA:
|
|
||||||
*(ub16 *)loc = ha(S + A);
|
|
||||||
break;
|
|
||||||
case R_PPC_ADDR24:
|
|
||||||
*(ub32 *)loc |= bits(S + A, 25, 2) << 2;
|
|
||||||
break;
|
|
||||||
case R_PPC_ADDR30:
|
|
||||||
*(ub32 *)loc |= bits(S + A, 31, 2) << 2;
|
|
||||||
break;
|
|
||||||
case R_PPC_PLT16_LO:
|
|
||||||
*(ub16 *)loc = lo(G + GOT - A - GOT2);
|
|
||||||
break;
|
|
||||||
case R_PPC_PLT16_HI:
|
|
||||||
*(ub16 *)loc = hi(G + GOT - A - GOT2);
|
|
||||||
break;
|
|
||||||
case R_PPC_PLT16_HA:
|
|
||||||
*(ub16 *)loc = ha(G + GOT - A - GOT2);
|
|
||||||
break;
|
|
||||||
case R_PPC_PLT32:
|
|
||||||
*(ub32 *)loc = G + GOT - A - GOT2;
|
|
||||||
break;
|
|
||||||
case R_PPC_REL14:
|
|
||||||
*(ub32 *)loc |= bits(S + A - P, 15, 2) << 2;
|
|
||||||
break;
|
|
||||||
case R_PPC_REL16:
|
|
||||||
case R_PPC_REL16_LO:
|
|
||||||
*(ub16 *)loc = lo(S + A - P);
|
|
||||||
break;
|
|
||||||
case R_PPC_REL16_HI:
|
|
||||||
*(ub16 *)loc = hi(S + A - P);
|
|
||||||
break;
|
|
||||||
case R_PPC_REL16_HA:
|
|
||||||
*(ub16 *)loc = ha(S + A - P);
|
|
||||||
break;
|
|
||||||
case R_PPC_REL24:
|
|
||||||
case R_PPC_LOCAL24PC: {
|
|
||||||
i64 val = S + A - P;
|
|
||||||
if (sign_extend(val, 25) != val)
|
|
||||||
val = get_thunk_addr(i) - P;
|
|
||||||
*(ub32 *)loc |= bits(val, 25, 2) << 2;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_PPC_PLTREL24: {
|
|
||||||
i64 val = S - P;
|
|
||||||
if (sym.has_plt(ctx) || sign_extend(val, 25) != val)
|
|
||||||
val = get_thunk_addr(i) - P;
|
|
||||||
*(ub32 *)loc |= bits(val, 25, 2) << 2;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_PPC_REL32:
|
|
||||||
case R_PPC_PLTREL32:
|
|
||||||
*(ub32 *)loc = S + A - P;
|
|
||||||
break;
|
|
||||||
case R_PPC_GOT16:
|
|
||||||
case R_PPC_GOT16_LO:
|
|
||||||
*(ub16 *)loc = lo(G + A);
|
|
||||||
break;
|
|
||||||
case R_PPC_GOT16_HI:
|
|
||||||
*(ub16 *)loc = hi(G + A);
|
|
||||||
break;
|
|
||||||
case R_PPC_GOT16_HA:
|
|
||||||
*(ub16 *)loc = ha(G + A);
|
|
||||||
break;
|
|
||||||
case R_PPC_TPREL16_LO:
|
|
||||||
*(ub16 *)loc = lo(S + A - ctx.tp_addr);
|
|
||||||
break;
|
|
||||||
case R_PPC_TPREL16_HI:
|
|
||||||
*(ub16 *)loc = hi(S + A - ctx.tp_addr);
|
|
||||||
break;
|
|
||||||
case R_PPC_TPREL16_HA:
|
|
||||||
*(ub16 *)loc = ha(S + A - ctx.tp_addr);
|
|
||||||
break;
|
|
||||||
case R_PPC_DTPREL16_LO:
|
|
||||||
*(ub16 *)loc = lo(S + A - ctx.dtp_addr);
|
|
||||||
break;
|
|
||||||
case R_PPC_DTPREL16_HI:
|
|
||||||
*(ub16 *)loc = hi(S + A - ctx.dtp_addr);
|
|
||||||
break;
|
|
||||||
case R_PPC_DTPREL16_HA:
|
|
||||||
*(ub16 *)loc = ha(S + A - ctx.dtp_addr);
|
|
||||||
break;
|
|
||||||
case R_PPC_GOT_TLSGD16:
|
|
||||||
*(ub16 *)loc = sym.get_tlsgd_addr(ctx) - GOT;
|
|
||||||
break;
|
|
||||||
case R_PPC_GOT_TLSLD16:
|
|
||||||
*(ub16 *)loc = ctx.got->get_tlsld_addr(ctx) - GOT;
|
|
||||||
break;
|
|
||||||
case R_PPC_GOT_TPREL16:
|
|
||||||
*(ub16 *)loc = sym.get_gottp_addr(ctx) - GOT;
|
|
||||||
break;
|
|
||||||
case R_PPC_TLS:
|
|
||||||
case R_PPC_TLSGD:
|
|
||||||
case R_PPC_TLSLD:
|
|
||||||
case R_PPC_PLTSEQ:
|
|
||||||
case R_PPC_PLTCALL:
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
unreachable();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::apply_reloc_nonalloc(Context<E> &ctx, u8 *base) {
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
u8 *loc = base + rel.r_offset;
|
|
||||||
|
|
||||||
SectionFragment<E> *frag;
|
|
||||||
i64 frag_addend;
|
|
||||||
std::tie(frag, frag_addend) = get_fragment(ctx, rel);
|
|
||||||
|
|
||||||
u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx);
|
|
||||||
u64 A = frag ? frag_addend : (i64)rel.r_addend;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_PPC_ADDR32:
|
|
||||||
if (std::optional<u64> val = get_tombstone(sym, frag))
|
|
||||||
*(ub32 *)loc = *val;
|
|
||||||
else
|
|
||||||
*(ub32 *)loc = S + A;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: "
|
|
||||||
<< rel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::scan_relocations(Context<E> &ctx) {
|
|
||||||
assert(shdr().sh_flags & SHF_ALLOC);
|
|
||||||
|
|
||||||
this->reldyn_offset = file.num_dynrel * sizeof(ElfRel<E>);
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
// Scan relocations
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
|
|
||||||
if (sym.is_ifunc())
|
|
||||||
sym.flags |= NEEDS_GOT | NEEDS_PLT;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_PPC_ADDR32:
|
|
||||||
case R_PPC_UADDR32:
|
|
||||||
scan_dyn_absrel(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_PPC_ADDR14:
|
|
||||||
case R_PPC_ADDR16:
|
|
||||||
case R_PPC_UADDR16:
|
|
||||||
case R_PPC_ADDR16_LO:
|
|
||||||
case R_PPC_ADDR16_HI:
|
|
||||||
case R_PPC_ADDR16_HA:
|
|
||||||
case R_PPC_ADDR24:
|
|
||||||
case R_PPC_ADDR30:
|
|
||||||
scan_absrel(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_PPC_REL14:
|
|
||||||
case R_PPC_REL16:
|
|
||||||
case R_PPC_REL16_LO:
|
|
||||||
case R_PPC_REL16_HI:
|
|
||||||
case R_PPC_REL16_HA:
|
|
||||||
case R_PPC_REL32:
|
|
||||||
scan_pcrel(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_PPC_GOT16:
|
|
||||||
case R_PPC_GOT16_LO:
|
|
||||||
case R_PPC_GOT16_HI:
|
|
||||||
case R_PPC_GOT16_HA:
|
|
||||||
case R_PPC_PLT16_LO:
|
|
||||||
case R_PPC_PLT16_HI:
|
|
||||||
case R_PPC_PLT16_HA:
|
|
||||||
case R_PPC_PLT32:
|
|
||||||
sym.flags |= NEEDS_GOT;
|
|
||||||
break;
|
|
||||||
case R_PPC_REL24:
|
|
||||||
case R_PPC_PLTREL24:
|
|
||||||
case R_PPC_PLTREL32:
|
|
||||||
if (sym.is_imported)
|
|
||||||
sym.flags |= NEEDS_PLT;
|
|
||||||
break;
|
|
||||||
case R_PPC_GOT_TLSGD16:
|
|
||||||
sym.flags |= NEEDS_TLSGD;
|
|
||||||
break;
|
|
||||||
case R_PPC_GOT_TLSLD16:
|
|
||||||
ctx.needs_tlsld = true;
|
|
||||||
break;
|
|
||||||
case R_PPC_GOT_TPREL16:
|
|
||||||
sym.flags |= NEEDS_GOTTP;
|
|
||||||
break;
|
|
||||||
case R_PPC_TPREL16_LO:
|
|
||||||
case R_PPC_TPREL16_HI:
|
|
||||||
case R_PPC_TPREL16_HA:
|
|
||||||
check_tlsle(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_PPC_LOCAL24PC:
|
|
||||||
case R_PPC_TLS:
|
|
||||||
case R_PPC_TLSGD:
|
|
||||||
case R_PPC_TLSLD:
|
|
||||||
case R_PPC_DTPREL16_LO:
|
|
||||||
case R_PPC_DTPREL16_HI:
|
|
||||||
case R_PPC_DTPREL16_HA:
|
|
||||||
case R_PPC_PLTSEQ:
|
|
||||||
case R_PPC_PLTCALL:
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Error(ctx) << *this << ": unknown relocation: " << rel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void RangeExtensionThunk<E>::copy_buf(Context<E> &ctx) {
|
|
||||||
u8 *buf = ctx.buf + output_section.shdr.sh_offset + offset;
|
|
||||||
|
|
||||||
static const ub32 local_thunk[] = {
|
|
||||||
// Get this thunk's address
|
|
||||||
0x7c08'02a6, // mflr r0
|
|
||||||
0x429f'0005, // bcl 20, 31, 4
|
|
||||||
0x7d88'02a6, // mflr r12
|
|
||||||
0x7c08'03a6, // mtlr r0
|
|
||||||
|
|
||||||
// Materialize the destination's address in %r11 and jump to that address
|
|
||||||
0x3d6c'0000, // addis r11, r12, OFFSET@higha
|
|
||||||
0x396b'0000, // addi r11, r11, OFFSET@lo
|
|
||||||
0x7d69'03a6, // mtctr r11
|
|
||||||
0x4e80'0420, // bctr
|
|
||||||
0x6000'0000, // nop
|
|
||||||
};
|
|
||||||
|
|
||||||
static_assert(E::thunk_size == sizeof(plt_entry));
|
|
||||||
static_assert(E::thunk_size == sizeof(local_thunk));
|
|
||||||
|
|
||||||
for (i64 i = 0; i < symbols.size(); i++) {
|
|
||||||
ub32 *loc = (ub32 *)(buf + i * E::thunk_size);
|
|
||||||
Symbol<E> &sym = *symbols[i];
|
|
||||||
|
|
||||||
if (sym.has_plt(ctx)) {
|
|
||||||
memcpy(loc, plt_entry, sizeof(plt_entry));
|
|
||||||
u64 got = sym.has_got(ctx) ? sym.get_got_addr(ctx) : sym.get_gotplt_addr(ctx);
|
|
||||||
i64 val = got - get_addr(i) - 8;
|
|
||||||
loc[4] |= higha(val);
|
|
||||||
loc[5] |= lo(val);
|
|
||||||
} else {
|
|
||||||
memcpy(loc, local_thunk, sizeof(local_thunk));
|
|
||||||
i64 val = sym.get_addr(ctx) - get_addr(i) - 8;
|
|
||||||
loc[4] |= higha(val);
|
|
||||||
loc[5] |= lo(val);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace mold::elf
|
|
687
third_party/mold/elf/arch-ppc64v1.cc
vendored
687
third_party/mold/elf/arch-ppc64v1.cc
vendored
|
@ -1,687 +0,0 @@
|
||||||
// clang-format off
|
|
||||||
// This file contains code for the 64-bit PowerPC ELFv1 ABI that is
|
|
||||||
// commonly used for big-endian PPC systems. Modern PPC systems that use
|
|
||||||
// the processor in the little-endian mode use the ELFv2 ABI instead. For
|
|
||||||
// ELFv2, see arch-ppc64v2.cc.
|
|
||||||
//
|
|
||||||
// Even though they are similiar, ELFv1 isn't only different from ELFv2 in
|
|
||||||
// endianness. The most notable difference is, in ELFv1, a function
|
|
||||||
// pointer doesn't directly refer to the entry point of a function but
|
|
||||||
// instead refers to a data structure so-called "function descriptor".
|
|
||||||
//
|
|
||||||
// The function descriptor is essentially a pair of a function entry point
|
|
||||||
// address and a value that should be set to %r2 before calling that
|
|
||||||
// function. There is also a third member for "the environment pointer for
|
|
||||||
// languages such as Pascal and PL/1" according to the psABI, but it looks
|
|
||||||
// like no one acutally uses it. In total, the function descriptor is 24
|
|
||||||
// bytes long. Here is why we need it.
|
|
||||||
//
|
|
||||||
// PPC generally lacks PC-relative data access instructions. Position-
|
|
||||||
// independent code sets GOT + 0x8000 to %r2 and access global variables
|
|
||||||
// relative to %r2.
|
|
||||||
//
|
|
||||||
// Each ELF file has its own GOT. If a function calls another function in
|
|
||||||
// the same ELF file, it doesn't have to reset %r2. However, if it is in
|
|
||||||
// other file (e.g. other .so), it has to set a new value to %r2 so that
|
|
||||||
// the register contains the callee's GOT + 0x8000.
|
|
||||||
//
|
|
||||||
// In this way, you can't call a function just by knowing the function's
|
|
||||||
// entry point address. You also need to know a proper %r2 value for the
|
|
||||||
// function. This is why a function pointer refers to a tuple of an
|
|
||||||
// address and a %r2 value.
|
|
||||||
//
|
|
||||||
// If a function call is made through PLT, PLT takes care of restoring %r2.
|
|
||||||
// Therefore, the caller has to restore %r2 only for function calls
|
|
||||||
// through function pointers.
|
|
||||||
//
|
|
||||||
// .opd (short for "official procedure descriptors") contains function
|
|
||||||
// descriptors.
|
|
||||||
//
|
|
||||||
// You can think OPD as this: even in other targets, a function can have a
|
|
||||||
// few different addresses for different purposes. It may not only have an
|
|
||||||
// entry point address but may also have PLT and/or GOT addresses.
|
|
||||||
// In PPCV1, it may have an OPD address in addition to these. OPD address
|
|
||||||
// is used for relocations that refers to the address of a function as a
|
|
||||||
// function pointer.
|
|
||||||
//
|
|
||||||
// https://github.com/rui314/psabi/blob/main/ppc64v1.pdf
|
|
||||||
|
|
||||||
#include "third_party/mold/elf/mold.h"
|
|
||||||
|
|
||||||
#include "third_party/libcxx/algorithm"
|
|
||||||
// MISSING #include <tbb/parallel_for_each.h>
|
|
||||||
|
|
||||||
namespace mold::elf {
|
|
||||||
|
|
||||||
using E = PPC64V1;
|
|
||||||
|
|
||||||
static u64 lo(u64 x) { return x & 0xffff; }
|
|
||||||
static u64 hi(u64 x) { return x >> 16; }
|
|
||||||
static u64 ha(u64 x) { return (x + 0x8000) >> 16; }
|
|
||||||
static u64 high(u64 x) { return (x >> 16) & 0xffff; }
|
|
||||||
static u64 higha(u64 x) { return ((x + 0x8000) >> 16) & 0xffff; }
|
|
||||||
|
|
||||||
// .plt is used only for lazy symbol resolution on PPC64. All PLT
|
|
||||||
// calls are made via range extension thunks even if they are within
|
|
||||||
// reach. Thunks read addresses from .got.plt and jump there.
|
|
||||||
// Therefore, once PLT symbols are resolved and final addresses are
|
|
||||||
// written to .got.plt, thunks just skip .plt and directly jump to the
|
|
||||||
// resolved addresses.
|
|
||||||
template <>
|
|
||||||
void write_plt_header(Context<E> &ctx, u8 *buf) {
|
|
||||||
static const ub32 insn[] = {
|
|
||||||
0x7d88'02a6, // mflr r12
|
|
||||||
0x429f'0005, // bcl 20, 31, 4 // obtain PC
|
|
||||||
0x7d68'02a6, // mflr r11
|
|
||||||
0xe84b'0024, // ld r2,36(r11)
|
|
||||||
0x7d88'03a6, // mtlr r12
|
|
||||||
0x7d62'5a14, // add r11,r2,r11
|
|
||||||
0xe98b'0000, // ld r12,0(r11)
|
|
||||||
0xe84b'0008, // ld r2,8(r11)
|
|
||||||
0x7d89'03a6, // mtctr r12
|
|
||||||
0xe96b'0010, // ld r11,16(r11)
|
|
||||||
0x4e80'0420, // bctr
|
|
||||||
// .quad .got.plt - .plt - 8
|
|
||||||
0x0000'0000,
|
|
||||||
0x0000'0000,
|
|
||||||
};
|
|
||||||
|
|
||||||
static_assert(sizeof(insn) == E::plt_hdr_size);
|
|
||||||
memcpy(buf, insn, sizeof(insn));
|
|
||||||
*(ub64 *)(buf + 44) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr - 8;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void write_plt_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
|
|
||||||
ub32 *loc = (ub32 *)buf;
|
|
||||||
i64 idx = sym.get_plt_idx(ctx);
|
|
||||||
|
|
||||||
// The PPC64 ELFv1 ABI requires PLT entries to be vary in size depending
|
|
||||||
// on their indices. Unlike other targets, .got.plt is filled not by us
|
|
||||||
// but by the loader, so we don't have a control over where the initial
|
|
||||||
// call to the PLT entry jumps to. So we need to strictly follow the PLT
|
|
||||||
// section layout as the loader expect it to be.
|
|
||||||
if (idx < 0x8000) {
|
|
||||||
static const ub32 insn[] = {
|
|
||||||
0x3800'0000, // li r0, PLT_INDEX
|
|
||||||
0x4b00'0000, // b plt0
|
|
||||||
};
|
|
||||||
|
|
||||||
memcpy(loc, insn, sizeof(insn));
|
|
||||||
loc[0] |= idx;
|
|
||||||
loc[1] |= (ctx.plt->shdr.sh_addr - sym.get_plt_addr(ctx) - 4) & 0x00ff'ffff;
|
|
||||||
} else {
|
|
||||||
static const ub32 insn[] = {
|
|
||||||
0x3c00'0000, // lis r0, PLT_INDEX@high
|
|
||||||
0x6000'0000, // ori r0, r0, PLT_INDEX@lo
|
|
||||||
0x4b00'0000, // b plt0
|
|
||||||
};
|
|
||||||
|
|
||||||
memcpy(loc, insn, sizeof(insn));
|
|
||||||
loc[0] |= high(idx);
|
|
||||||
loc[1] |= lo(idx);
|
|
||||||
loc[2] |= (ctx.plt->shdr.sh_addr - sym.get_plt_addr(ctx) - 8) & 0x00ff'ffff;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// .plt.got is not necessary on PPC64 because range extension thunks
|
|
||||||
// directly read GOT entries and jump there.
|
|
||||||
template <>
|
|
||||||
void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void EhFrameSection<E>::apply_reloc(Context<E> &ctx, const ElfRel<E> &rel,
|
|
||||||
u64 offset, u64 val) {
|
|
||||||
u8 *loc = ctx.buf + this->shdr.sh_offset + offset;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_NONE:
|
|
||||||
break;
|
|
||||||
case R_PPC64_ADDR64:
|
|
||||||
*(ub64 *)loc = val;
|
|
||||||
break;
|
|
||||||
case R_PPC64_REL32:
|
|
||||||
*(ub32 *)loc = val - this->shdr.sh_addr - offset;
|
|
||||||
break;
|
|
||||||
case R_PPC64_REL64:
|
|
||||||
*(ub64 *)loc = val - this->shdr.sh_addr - offset;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
ElfRel<E> *dynrel = nullptr;
|
|
||||||
if (ctx.reldyn)
|
|
||||||
dynrel = (ElfRel<E> *)(ctx.buf + ctx.reldyn->shdr.sh_offset +
|
|
||||||
file.reldyn_offset + this->reldyn_offset);
|
|
||||||
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
u8 *loc = base + rel.r_offset;
|
|
||||||
|
|
||||||
auto check = [&](i64 val, i64 lo, i64 hi) {
|
|
||||||
if (val < lo || hi <= val)
|
|
||||||
Error(ctx) << *this << ": relocation " << rel << " against "
|
|
||||||
<< sym << " out of range: " << val << " is not in ["
|
|
||||||
<< lo << ", " << hi << ")";
|
|
||||||
};
|
|
||||||
|
|
||||||
u64 S = sym.get_addr(ctx);
|
|
||||||
u64 A = rel.r_addend;
|
|
||||||
u64 P = get_addr() + rel.r_offset;
|
|
||||||
u64 G = sym.get_got_idx(ctx) * sizeof(Word<E>);
|
|
||||||
u64 GOT = ctx.got->shdr.sh_addr;
|
|
||||||
u64 TOC = ctx.extra.TOC->value;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_PPC64_ADDR64:
|
|
||||||
apply_toc_rel(ctx, sym, rel, loc, S, A, P, dynrel);
|
|
||||||
break;
|
|
||||||
case R_PPC64_TOC:
|
|
||||||
apply_toc_rel(ctx, *ctx.extra.TOC, rel, loc, TOC, A, P, dynrel);
|
|
||||||
break;
|
|
||||||
case R_PPC64_TOC16_HA:
|
|
||||||
*(ub16 *)loc = ha(S + A - TOC);
|
|
||||||
break;
|
|
||||||
case R_PPC64_TOC16_LO:
|
|
||||||
*(ub16 *)loc = lo(S + A - TOC);
|
|
||||||
break;
|
|
||||||
case R_PPC64_TOC16_DS:
|
|
||||||
check(S + A - TOC, -(1 << 15), 1 << 15);
|
|
||||||
*(ub16 *)loc |= (S + A - TOC) & 0xfffc;
|
|
||||||
break;
|
|
||||||
case R_PPC64_TOC16_LO_DS:
|
|
||||||
*(ub16 *)loc |= (S + A - TOC) & 0xfffc;
|
|
||||||
break;
|
|
||||||
case R_PPC64_REL24: {
|
|
||||||
i64 val = sym.get_addr(ctx, NO_OPD) + A - P;
|
|
||||||
if (sym.has_plt(ctx) || sign_extend(val, 25) != val)
|
|
||||||
val = get_thunk_addr(i) + A - P;
|
|
||||||
|
|
||||||
check(val, -(1 << 25), 1 << 25);
|
|
||||||
*(ub32 *)loc |= bits(val, 25, 2) << 2;
|
|
||||||
|
|
||||||
// If a callee is an external function, PLT saves %r2 to the
|
|
||||||
// caller's r2 save slot. We need to restore it after function
|
|
||||||
// return. To do so, there's usually a NOP as a placeholder
|
|
||||||
// after a BL. 0x6000'0000 is a NOP.
|
|
||||||
if (sym.has_plt(ctx) && *(ub32 *)(loc + 4) == 0x6000'0000)
|
|
||||||
*(ub32 *)(loc + 4) = 0xe841'0028; // ld r2, 40(r1)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_PPC64_REL32:
|
|
||||||
*(ub32 *)loc = S + A - P;
|
|
||||||
break;
|
|
||||||
case R_PPC64_REL64:
|
|
||||||
*(ub64 *)loc = S + A - P;
|
|
||||||
break;
|
|
||||||
case R_PPC64_REL16_HA:
|
|
||||||
*(ub16 *)loc = ha(S + A - P);
|
|
||||||
break;
|
|
||||||
case R_PPC64_REL16_LO:
|
|
||||||
*(ub16 *)loc = lo(S + A - P);
|
|
||||||
break;
|
|
||||||
case R_PPC64_PLT16_HA:
|
|
||||||
*(ub16 *)loc = ha(G + GOT - TOC);
|
|
||||||
break;
|
|
||||||
case R_PPC64_PLT16_HI:
|
|
||||||
*(ub16 *)loc = hi(G + GOT - TOC);
|
|
||||||
break;
|
|
||||||
case R_PPC64_PLT16_LO:
|
|
||||||
*(ub16 *)loc = lo(G + GOT - TOC);
|
|
||||||
break;
|
|
||||||
case R_PPC64_PLT16_LO_DS:
|
|
||||||
*(ub16 *)loc |= (G + GOT - TOC) & 0xfffc;
|
|
||||||
break;
|
|
||||||
case R_PPC64_GOT_TPREL16_HA:
|
|
||||||
*(ub16 *)loc = ha(sym.get_gottp_addr(ctx) - TOC);
|
|
||||||
break;
|
|
||||||
case R_PPC64_GOT_TLSGD16_HA:
|
|
||||||
*(ub16 *)loc = ha(sym.get_tlsgd_addr(ctx) - TOC);
|
|
||||||
break;
|
|
||||||
case R_PPC64_GOT_TLSGD16_LO:
|
|
||||||
*(ub16 *)loc = lo(sym.get_tlsgd_addr(ctx) - TOC);
|
|
||||||
break;
|
|
||||||
case R_PPC64_GOT_TLSLD16_HA:
|
|
||||||
*(ub16 *)loc = ha(ctx.got->get_tlsld_addr(ctx) - TOC);
|
|
||||||
break;
|
|
||||||
case R_PPC64_GOT_TLSLD16_LO:
|
|
||||||
*(ub16 *)loc = lo(ctx.got->get_tlsld_addr(ctx) - TOC);
|
|
||||||
break;
|
|
||||||
case R_PPC64_DTPREL16_HA:
|
|
||||||
*(ub16 *)loc = ha(S + A - ctx.dtp_addr);
|
|
||||||
break;
|
|
||||||
case R_PPC64_DTPREL16_LO:
|
|
||||||
*(ub16 *)loc = lo(S + A - ctx.dtp_addr);
|
|
||||||
break;
|
|
||||||
case R_PPC64_TPREL16_HA:
|
|
||||||
*(ub16 *)loc = ha(S + A - ctx.tp_addr);
|
|
||||||
break;
|
|
||||||
case R_PPC64_TPREL16_LO:
|
|
||||||
*(ub16 *)loc = lo(S + A - ctx.tp_addr);
|
|
||||||
break;
|
|
||||||
case R_PPC64_GOT_TPREL16_LO_DS:
|
|
||||||
*(ub16 *)loc |= (sym.get_gottp_addr(ctx) - TOC) & 0xfffc;
|
|
||||||
break;
|
|
||||||
case R_PPC64_PLTSEQ:
|
|
||||||
case R_PPC64_PLTCALL:
|
|
||||||
case R_PPC64_TLS:
|
|
||||||
case R_PPC64_TLSGD:
|
|
||||||
case R_PPC64_TLSLD:
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
unreachable();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::apply_reloc_nonalloc(Context<E> &ctx, u8 *base) {
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
u8 *loc = base + rel.r_offset;
|
|
||||||
|
|
||||||
auto check = [&](i64 val, i64 lo, i64 hi) {
|
|
||||||
if (val < lo || hi <= val)
|
|
||||||
Error(ctx) << *this << ": relocation " << rel << " against "
|
|
||||||
<< sym << " out of range: " << val << " is not in ["
|
|
||||||
<< lo << ", " << hi << ")";
|
|
||||||
};
|
|
||||||
|
|
||||||
SectionFragment<E> *frag;
|
|
||||||
i64 frag_addend;
|
|
||||||
std::tie(frag, frag_addend) = get_fragment(ctx, rel);
|
|
||||||
|
|
||||||
u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx);
|
|
||||||
u64 A = frag ? frag_addend : (i64)rel.r_addend;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_PPC64_ADDR64:
|
|
||||||
if (std::optional<u64> val = get_tombstone(sym, frag))
|
|
||||||
*(ub64 *)loc = *val;
|
|
||||||
else
|
|
||||||
*(ub64 *)loc = S + A;
|
|
||||||
break;
|
|
||||||
case R_PPC64_ADDR32: {
|
|
||||||
i64 val = S + A;
|
|
||||||
check(val, 0, 1LL << 32);
|
|
||||||
*(ub32 *)loc = val;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_PPC64_DTPREL64:
|
|
||||||
*(ub64 *)loc = S + A - ctx.dtp_addr;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: "
|
|
||||||
<< rel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::scan_relocations(Context<E> &ctx) {
|
|
||||||
assert(shdr().sh_flags & SHF_ALLOC);
|
|
||||||
|
|
||||||
this->reldyn_offset = file.num_dynrel * sizeof(ElfRel<E>);
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
// Scan relocations
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
|
|
||||||
if (sym.is_ifunc())
|
|
||||||
sym.flags |= NEEDS_GOT | NEEDS_PLT | NEEDS_PPC_OPD;
|
|
||||||
|
|
||||||
// Any relocation except R_PPC64_REL24 is considered as an
|
|
||||||
// address-taking relocation.
|
|
||||||
if (rel.r_type != R_PPC64_REL24 && sym.get_type() == STT_FUNC)
|
|
||||||
sym.flags |= NEEDS_PPC_OPD;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_PPC64_ADDR64:
|
|
||||||
case R_PPC64_TOC:
|
|
||||||
scan_toc_rel(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_PPC64_GOT_TPREL16_HA:
|
|
||||||
sym.flags |= NEEDS_GOTTP;
|
|
||||||
break;
|
|
||||||
case R_PPC64_REL24:
|
|
||||||
if (sym.is_imported)
|
|
||||||
sym.flags |= NEEDS_PLT;
|
|
||||||
break;
|
|
||||||
case R_PPC64_PLT16_HA:
|
|
||||||
sym.flags |= NEEDS_GOT;
|
|
||||||
break;
|
|
||||||
case R_PPC64_GOT_TLSGD16_HA:
|
|
||||||
sym.flags |= NEEDS_TLSGD;
|
|
||||||
break;
|
|
||||||
case R_PPC64_GOT_TLSLD16_HA:
|
|
||||||
ctx.needs_tlsld = true;
|
|
||||||
break;
|
|
||||||
case R_PPC64_TPREL16_HA:
|
|
||||||
case R_PPC64_TPREL16_LO:
|
|
||||||
check_tlsle(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_PPC64_REL32:
|
|
||||||
case R_PPC64_REL64:
|
|
||||||
case R_PPC64_TOC16_HA:
|
|
||||||
case R_PPC64_TOC16_LO:
|
|
||||||
case R_PPC64_TOC16_LO_DS:
|
|
||||||
case R_PPC64_TOC16_DS:
|
|
||||||
case R_PPC64_REL16_HA:
|
|
||||||
case R_PPC64_REL16_LO:
|
|
||||||
case R_PPC64_PLT16_HI:
|
|
||||||
case R_PPC64_PLT16_LO:
|
|
||||||
case R_PPC64_PLT16_LO_DS:
|
|
||||||
case R_PPC64_PLTSEQ:
|
|
||||||
case R_PPC64_PLTCALL:
|
|
||||||
case R_PPC64_GOT_TPREL16_LO_DS:
|
|
||||||
case R_PPC64_GOT_TLSGD16_LO:
|
|
||||||
case R_PPC64_GOT_TLSLD16_LO:
|
|
||||||
case R_PPC64_TLS:
|
|
||||||
case R_PPC64_TLSGD:
|
|
||||||
case R_PPC64_TLSLD:
|
|
||||||
case R_PPC64_DTPREL16_HA:
|
|
||||||
case R_PPC64_DTPREL16_LO:
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Error(ctx) << *this << ": unknown relocation: " << rel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void RangeExtensionThunk<E>::copy_buf(Context<E> &ctx) {
|
|
||||||
u8 *buf = ctx.buf + output_section.shdr.sh_offset + offset;
|
|
||||||
|
|
||||||
// If the destination is .plt.got, we save the current r2, read an
|
|
||||||
// address of a function descriptor from .got, restore %r2 and jump
|
|
||||||
// to the function.
|
|
||||||
static const ub32 pltgot_thunk[] = {
|
|
||||||
// Store the caller's %r2
|
|
||||||
0xf841'0028, // std %r2, 40(%r1)
|
|
||||||
|
|
||||||
// Load an address of a function descriptor
|
|
||||||
0x3d82'0000, // addis %r12, %r2, foo@got@toc@ha
|
|
||||||
0xe98c'0000, // ld %r12, foo@got@toc@lo(%r12)
|
|
||||||
|
|
||||||
// Restore the callee's %r2
|
|
||||||
0xe84c'0008, // ld %r2, 8(%r12)
|
|
||||||
|
|
||||||
// Jump to the function
|
|
||||||
0xe98c'0000, // ld %r12, 0(%r12)
|
|
||||||
0x7d89'03a6, // mtctr %r12
|
|
||||||
0x4e80'0420, // bctr
|
|
||||||
};
|
|
||||||
|
|
||||||
// If the destination is .plt, read a function descriptor from .got.plt.
|
|
||||||
static const ub32 plt_thunk[] = {
|
|
||||||
// Store the caller's %r2
|
|
||||||
0xf841'0028, // std %r2, 40(%r1)
|
|
||||||
|
|
||||||
// Materialize an address of a function descriptor
|
|
||||||
0x3d82'0000, // addis %r12, %r2, foo@gotplt@toc@ha
|
|
||||||
0x398c'0000, // addi %r12, %r12, foo@gotplt@toc@lo
|
|
||||||
|
|
||||||
// Restore the callee's %r2
|
|
||||||
0xe84c'0008, // ld %r2, 8(%r12)
|
|
||||||
|
|
||||||
// Jump to the function
|
|
||||||
0xe98c'0000, // ld %r12, 0(%r12)
|
|
||||||
0x7d89'03a6, // mtctr %r12
|
|
||||||
0x4e80'0420, // bctr
|
|
||||||
};
|
|
||||||
|
|
||||||
// If the destination is a non-imported function, we directly jump
|
|
||||||
// to the function entry address.
|
|
||||||
static const ub32 local_thunk[] = {
|
|
||||||
0x3d82'0000, // addis r12, r2, foo@toc@ha
|
|
||||||
0x398c'0000, // addi r12, r12, foo@toc@lo
|
|
||||||
0x7d89'03a6, // mtctr r12
|
|
||||||
0x4e80'0420, // bctr
|
|
||||||
0x6000'0000, // nop
|
|
||||||
0x6000'0000, // nop
|
|
||||||
0x6000'0000, // nop
|
|
||||||
};
|
|
||||||
|
|
||||||
static_assert(E::thunk_size == sizeof(pltgot_thunk));
|
|
||||||
static_assert(E::thunk_size == sizeof(plt_thunk));
|
|
||||||
static_assert(E::thunk_size == sizeof(local_thunk));
|
|
||||||
|
|
||||||
for (i64 i = 0; i < symbols.size(); i++) {
|
|
||||||
Symbol<E> &sym = *symbols[i];
|
|
||||||
ub32 *loc = (ub32 *)(buf + i * E::thunk_size);
|
|
||||||
|
|
||||||
if (sym.has_got(ctx)) {
|
|
||||||
memcpy(loc, pltgot_thunk, sizeof(pltgot_thunk));
|
|
||||||
i64 val = sym.get_got_addr(ctx) - ctx.extra.TOC->value;
|
|
||||||
loc[1] |= higha(val);
|
|
||||||
loc[2] |= lo(val);
|
|
||||||
} else if(sym.has_plt(ctx)) {
|
|
||||||
memcpy(loc, plt_thunk, sizeof(plt_thunk));
|
|
||||||
i64 val = sym.get_gotplt_addr(ctx) - ctx.extra.TOC->value;
|
|
||||||
loc[1] |= higha(val);
|
|
||||||
loc[2] |= lo(val);
|
|
||||||
} else {
|
|
||||||
memcpy(loc, local_thunk, sizeof(local_thunk));
|
|
||||||
i64 val = sym.get_addr(ctx, NO_OPD) - ctx.extra.TOC->value;
|
|
||||||
loc[0] |= higha(val);
|
|
||||||
loc[1] |= lo(val);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static InputSection<E> *get_opd_section(ObjectFile<E> &file) {
|
|
||||||
for (std::unique_ptr<InputSection<E>> &isec : file.sections)
|
|
||||||
if (isec && isec->name() == ".opd")
|
|
||||||
return isec.get();
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
static ElfRel<E> *
|
|
||||||
get_relocation_at(Context<E> &ctx, InputSection<E> &isec, i64 offset) {
|
|
||||||
std::span<ElfRel<E>> rels = isec.get_rels(ctx);
|
|
||||||
|
|
||||||
auto it = std::lower_bound(rels.begin(), rels.end(), offset,
|
|
||||||
[](const ElfRel<E> &r, i64 offset) {
|
|
||||||
return r.r_offset < offset;
|
|
||||||
});
|
|
||||||
|
|
||||||
if (it == rels.end())
|
|
||||||
return nullptr;
|
|
||||||
if (it->r_offset != offset)
|
|
||||||
return nullptr;
|
|
||||||
return &*it;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct OpdSymbol {
|
|
||||||
bool operator<(const OpdSymbol &x) const { return r_offset < x.r_offset; }
|
|
||||||
|
|
||||||
u64 r_offset = 0;
|
|
||||||
Symbol<E> *sym = nullptr;
|
|
||||||
};
|
|
||||||
|
|
||||||
static Symbol<E> *
|
|
||||||
get_opd_sym_at(Context<E> &ctx, std::span<OpdSymbol> syms, u64 offset) {
|
|
||||||
auto it = std::lower_bound(syms.begin(), syms.end(), OpdSymbol{offset});
|
|
||||||
if (it == syms.end())
|
|
||||||
return nullptr;
|
|
||||||
if (it->r_offset != offset)
|
|
||||||
return nullptr;
|
|
||||||
return it->sym;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compiler creates an .opd entry for each function symbol. The intention
|
|
||||||
// is to make it possible to create an output .opd section just by linking
|
|
||||||
// input .opd sections in the same manner as we do to other normal input
|
|
||||||
// sections.
|
|
||||||
//
|
|
||||||
// However, in reality, .opd isn't a normal input section. It needs many
|
|
||||||
// special treatments as follows:
|
|
||||||
//
|
|
||||||
// 1. A function symbol refers to not a .text but an .opd. Its address
|
|
||||||
// works fine for address-taking relocations such as R_PPC64_ADDR64.
|
|
||||||
// However, R_PPC64_REL24 (which is used for branch instruction) needs
|
|
||||||
// a function's real address instead of the function's .opd address.
|
|
||||||
// We need to read .opd contents to find out a function entry point
|
|
||||||
// address to apply R_PPC64_REL24.
|
|
||||||
//
|
|
||||||
// 2. Output .opd entries are needed only for functions whose addresses
|
|
||||||
// are taken. Just copying input .opd sections to an output would
|
|
||||||
// produces lots of dead .opd entries.
|
|
||||||
//
|
|
||||||
// 3. In this design, all function symbols refer to an .opd section, and
|
|
||||||
// that doesn't work well with graph traversal optimizations such as
|
|
||||||
// garbage collection or identical comdat folding. For example, garbage
|
|
||||||
// collector would mark an .opd alive which in turn mark all functions
|
|
||||||
// thatare referenced by .opd as alive, effectively keeping all
|
|
||||||
// functions as alive.
|
|
||||||
//
|
|
||||||
// The problem is that the compiler creates a half-baked .opd section, and
|
|
||||||
// the linker has to figure out what all these .opd entries and
|
|
||||||
// relocations are trying to achieve. It's like the compiler would emit a
|
|
||||||
// half-baked .plt section in an object file and the linker has to deal
|
|
||||||
// with that. That's not a good design.
|
|
||||||
//
|
|
||||||
// So, in this function, we undo what the compiler did to .opd. We remove
|
|
||||||
// function symbols from .opd and reattach them to their function entry
|
|
||||||
// points. We also rewrite relocations that directly refer to an input
|
|
||||||
// .opd section so that they refer to function symbols instead. We then
|
|
||||||
// mark input .opd sections as dead.
|
|
||||||
//
|
|
||||||
// After this function, we mark symbols with the NEEDS_PPC_OPD flag if the
|
|
||||||
// symbol needs an .opd entry. We then create an output .opd just like we
|
|
||||||
// do for .plt or .got.
|
|
||||||
void ppc64v1_rewrite_opd(Context<E> &ctx) {
|
|
||||||
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
|
|
||||||
InputSection<E> *opd = get_opd_section(*file);
|
|
||||||
if (!opd)
|
|
||||||
return;
|
|
||||||
opd->is_alive = false;
|
|
||||||
|
|
||||||
// Move symbols from .opd to .text.
|
|
||||||
std::vector<OpdSymbol> opd_syms;
|
|
||||||
|
|
||||||
for (Symbol<E> *sym : file->symbols) {
|
|
||||||
if (sym->file != file || sym->get_input_section() != opd)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (u32 ty = sym->get_type(); ty != STT_FUNC && ty != STT_GNU_IFUNC)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
ElfRel<E> *rel = get_relocation_at(ctx, *opd, sym->value);
|
|
||||||
if (!rel)
|
|
||||||
Fatal(ctx) << *file << ": cannot find a relocation in .opd for "
|
|
||||||
<< *sym << " at offset 0x" << std::hex << (u64)sym->value;
|
|
||||||
|
|
||||||
Symbol<E> *sym2 = file->symbols[rel->r_sym];
|
|
||||||
if (sym2->get_type() != STT_SECTION)
|
|
||||||
Fatal(ctx) << *file << ": bad relocation in .opd referring " << *sym2;
|
|
||||||
|
|
||||||
opd_syms.push_back({sym->value, sym});
|
|
||||||
|
|
||||||
sym->set_input_section(sym2->get_input_section());
|
|
||||||
sym->value = rel->r_addend;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sort symbols so that get_opd_sym_at() can do binary search.
|
|
||||||
sort(opd_syms);
|
|
||||||
|
|
||||||
// Rewrite relocations so that they directly refer to .opd.
|
|
||||||
for (std::unique_ptr<InputSection<E>> &isec : file->sections) {
|
|
||||||
if (!isec || !isec->is_alive || isec.get() == opd)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
for (ElfRel<E> &r : isec->get_rels(ctx)) {
|
|
||||||
Symbol<E> &sym = *file->symbols[r.r_sym];
|
|
||||||
if (sym.get_input_section() != opd)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> *real_sym = get_opd_sym_at(ctx, opd_syms, r.r_addend);
|
|
||||||
if (!real_sym)
|
|
||||||
Fatal(ctx) << *isec << ": cannot find a symbol in .opd for " << r
|
|
||||||
<< " at offset 0x" << std::hex << (u64)r.r_addend;
|
|
||||||
|
|
||||||
r.r_sym = real_sym->sym_idx;
|
|
||||||
r.r_addend = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// When a function is exported, the dynamic symbol for the function should
|
|
||||||
// refers to the function's .opd entry. This function marks such symbols
|
|
||||||
// with NEEDS_PPC_OPD.
|
|
||||||
void ppc64v1_scan_symbols(Context<E> &ctx) {
|
|
||||||
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
|
|
||||||
for (Symbol<E> *sym : file->symbols)
|
|
||||||
if (sym->file == file && sym->is_exported)
|
|
||||||
if (u32 ty = sym->get_type(); ty == STT_FUNC || ty == STT_GNU_IFUNC)
|
|
||||||
sym->flags |= NEEDS_PPC_OPD;
|
|
||||||
});
|
|
||||||
|
|
||||||
// Functions referenced by the ELF header also have to have .opd entries.
|
|
||||||
auto mark = [&](std::string_view name) {
|
|
||||||
if (!name.empty())
|
|
||||||
if (Symbol<E> &sym = *get_symbol(ctx, name); !sym.is_imported)
|
|
||||||
sym.flags |= NEEDS_PPC_OPD;
|
|
||||||
};
|
|
||||||
|
|
||||||
mark(ctx.arg.entry);
|
|
||||||
mark(ctx.arg.init);
|
|
||||||
mark(ctx.arg.fini);
|
|
||||||
}
|
|
||||||
|
|
||||||
void PPC64OpdSection::add_symbol(Context<E> &ctx, Symbol<E> *sym) {
|
|
||||||
sym->set_opd_idx(ctx, symbols.size());
|
|
||||||
symbols.push_back(sym);
|
|
||||||
this->shdr.sh_size += ENTRY_SIZE;
|
|
||||||
}
|
|
||||||
|
|
||||||
i64 PPC64OpdSection::get_reldyn_size(Context<E> &ctx) const {
|
|
||||||
if (ctx.arg.pic)
|
|
||||||
return symbols.size() * 2;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void PPC64OpdSection::copy_buf(Context<E> &ctx) {
|
|
||||||
ub64 *buf = (ub64 *)(ctx.buf + this->shdr.sh_offset);
|
|
||||||
|
|
||||||
ElfRel<E> *rel = nullptr;
|
|
||||||
if (ctx.arg.pic)
|
|
||||||
rel = (ElfRel<E> *)(ctx.buf + ctx.reldyn->shdr.sh_offset + reldyn_offset);
|
|
||||||
|
|
||||||
for (Symbol<E> *sym : symbols) {
|
|
||||||
u64 addr = sym->get_addr(ctx, NO_PLT | NO_OPD);
|
|
||||||
*buf++ = addr;
|
|
||||||
*buf++ = ctx.extra.TOC->value;
|
|
||||||
*buf++ = 0;
|
|
||||||
|
|
||||||
if (ctx.arg.pic) {
|
|
||||||
u64 loc = sym->get_opd_addr(ctx);
|
|
||||||
*rel++ = ElfRel<E>(loc, E::R_RELATIVE, 0, addr);
|
|
||||||
*rel++ = ElfRel<E>(loc + 8, E::R_RELATIVE, 0, ctx.extra.TOC->value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace mold::elf
|
|
555
third_party/mold/elf/arch-ppc64v2.cc
vendored
555
third_party/mold/elf/arch-ppc64v2.cc
vendored
|
@ -1,555 +0,0 @@
|
||||||
// clang-format off
|
|
||||||
// This file implements the PowerPC ELFv2 ABI which was standardized in
|
|
||||||
// 2014. Modern little-endian PowerPC systems are based on this ABI.
|
|
||||||
// The ABI is often referred to as "ppc64le". This shouldn't be confused
|
|
||||||
// with "ppc64" which refers to the original, big-endian PowerPC systems.
|
|
||||||
//
|
|
||||||
// PPC64 is a bit tricky to support because PC-relative load/store
|
|
||||||
// instructions hadn't been available until Power10 which debuted in 2021.
|
|
||||||
// Prior to Power10, it wasn't trivial for position-independent code (PIC)
|
|
||||||
// to load a value from, for example, .got, as we can't do that with [PC +
|
|
||||||
// the offset to the .got entry].
|
|
||||||
//
|
|
||||||
// In the following, I'll explain how PIC is supported on pre-Power10
|
|
||||||
// systems first and then explain what has changed with Power10.
|
|
||||||
//
|
|
||||||
//
|
|
||||||
// Position-independent code on Power9 or earlier:
|
|
||||||
//
|
|
||||||
// We can get the program counter on older PPC64 systems with the
|
|
||||||
// following four instructions
|
|
||||||
//
|
|
||||||
// mflr r1 // save the current link register to r1
|
|
||||||
// bl .+4 // branch to the next instruction as if it were a function
|
|
||||||
// mflr r0 // copy the return address to r0
|
|
||||||
// mtlr r1 // restore the original link register value
|
|
||||||
//
|
|
||||||
// , but it's too expensive to do if we do this for each load/store.
|
|
||||||
//
|
|
||||||
// As a workaround, most functions are compiled in such a way that r2 is
|
|
||||||
// assumed to always contain the address of .got + 0x8000. With this, we
|
|
||||||
// can for example load the first entry of .got with a single instruction
|
|
||||||
// `lw r0, -0x8000(r2)`. r2 is called the TOC pointer.
|
|
||||||
//
|
|
||||||
// There's only one .got for each ELF module. Therefore, if a callee is in
|
|
||||||
// the same ELF module, r2 doesn't have to be recomputed. Most function
|
|
||||||
// calls are usually within the same ELF module, so this mechanism is
|
|
||||||
// efficient.
|
|
||||||
//
|
|
||||||
// A function compiled for pre-Power10 usually has two entry points,
|
|
||||||
// global and local. The global entry point usually 8 bytes precedes
|
|
||||||
// the local entry point. In between is the following instructions:
|
|
||||||
//
|
|
||||||
// addis r2, r12, .TOC.@ha
|
|
||||||
// addi r2, r2, .TOC.@lo + 4;
|
|
||||||
//
|
|
||||||
// The global entry point assumes that the address of itself is in r12,
|
|
||||||
// and it computes its own TOC pointer from r12. It's easy to do so for
|
|
||||||
// the callee because the offset between its .got + 0x8000 and the
|
|
||||||
// function is known at link-time. The above code sequence then falls
|
|
||||||
// through to the local entry point that assumes r2 is .got + 0x8000.
|
|
||||||
//
|
|
||||||
// So, if a callee's TOC pointer is different from the current one
|
|
||||||
// (e.g. calling a function in another .so), we first load the callee's
|
|
||||||
// address to r12 (e.g. from .got.plt with a r2-relative load) and branch
|
|
||||||
// to that address. Then the callee computes its own TOC pointer using
|
|
||||||
// r12.
|
|
||||||
//
|
|
||||||
//
|
|
||||||
// Position-independent code on Power10:
|
|
||||||
//
|
|
||||||
// Power10 added 8-bytes-long instructions to the ISA. Some of them are
|
|
||||||
// PC-relative load/store instructions that take 34 bits offsets.
|
|
||||||
// Functions compiled with `-mcpu=power10` use these instructions for PIC.
|
|
||||||
// r2 does not have a special meaning in such fucntions.
|
|
||||||
//
|
|
||||||
// When a fucntion compiled for Power10 calls a function that uses the TOC
|
|
||||||
// pointer, we need to compute a correct value for TOC and set it to r2
|
|
||||||
// before transferring the control to the callee. Thunks are responsible
|
|
||||||
// for doing it.
|
|
||||||
//
|
|
||||||
// `_NOTOC` relocations such as `R_PPC64_REL24_NOTOC` indicate that the
|
|
||||||
// callee does not use TOC (i.e. compiled with `-mcpu=power10`). If a
|
|
||||||
// function using TOC is referenced via a `_NOTOC` relocation, that call
|
|
||||||
// is made through a range extension thunk.
|
|
||||||
//
|
|
||||||
//
|
|
||||||
// Note on section names: the PPC64 psABI uses a weird naming convention
|
|
||||||
// which calls .got.plt .plt. We ignored that part because it's just
|
|
||||||
// confusing. Since the runtime only cares about segments, we should be
|
|
||||||
// able to name sections whatever we want.
|
|
||||||
//
|
|
||||||
// https://github.com/rui314/psabi/blob/main/ppc64v2.pdf
|
|
||||||
|
|
||||||
#include "third_party/mold/elf/mold.h"
|
|
||||||
|
|
||||||
namespace mold::elf {
|
|
||||||
|
|
||||||
using E = PPC64V2;
|
|
||||||
|
|
||||||
static u64 lo(u64 x) { return x & 0xffff; }
|
|
||||||
static u64 hi(u64 x) { return x >> 16; }
|
|
||||||
static u64 ha(u64 x) { return (x + 0x8000) >> 16; }
|
|
||||||
static u64 high(u64 x) { return (x >> 16) & 0xffff; }
|
|
||||||
static u64 higha(u64 x) { return ((x + 0x8000) >> 16) & 0xffff; }
|
|
||||||
|
|
||||||
static u64 prefix34(u64 x) {
|
|
||||||
return bits(x, 33, 16) | (bits(x, 15, 0) << 32);
|
|
||||||
}
|
|
||||||
|
|
||||||
// .plt is used only for lazy symbol resolution on PPC64. All PLT
|
|
||||||
// calls are made via range extension thunks even if they are within
|
|
||||||
// reach. Thunks read addresses from .got.plt and jump there.
|
|
||||||
// Therefore, once PLT symbols are resolved and final addresses are
|
|
||||||
// written to .got.plt, thunks just skip .plt and directly jump to the
|
|
||||||
// resolved addresses.
|
|
||||||
template <>
|
|
||||||
void write_plt_header(Context<E> &ctx, u8 *buf) {
|
|
||||||
static const ul32 insn[] = {
|
|
||||||
// Get PC
|
|
||||||
0x7c08'02a6, // mflr r0
|
|
||||||
0x429f'0005, // bcl 20, 31, 4 // obtain PC
|
|
||||||
0x7d68'02a6, // mflr r11
|
|
||||||
0x7c08'03a6, // mtlr r0
|
|
||||||
|
|
||||||
// Compute the PLT entry index
|
|
||||||
0xe80b'002c, // ld r0, 44(r11)
|
|
||||||
0x7d8b'6050, // subf r12, r11, r12
|
|
||||||
0x7d60'5a14, // add r11, r0, r11
|
|
||||||
0x380c'ffcc, // addi r0, r12, -52
|
|
||||||
0x7800'f082, // rldicl r0, r0, 62, 2
|
|
||||||
|
|
||||||
// Load .got.plt[0] and .got.plt[1] and branch to .got.plt[0]
|
|
||||||
0xe98b'0000, // ld r12, 0(r11)
|
|
||||||
0x7d89'03a6, // mtctr r12
|
|
||||||
0xe96b'0008, // ld r11, 8(r11)
|
|
||||||
0x4e80'0420, // bctr
|
|
||||||
|
|
||||||
// .quad .got.plt - .plt - 8
|
|
||||||
0x0000'0000,
|
|
||||||
0x0000'0000,
|
|
||||||
};
|
|
||||||
|
|
||||||
memcpy(buf, insn, sizeof(insn));
|
|
||||||
*(ul64 *)(buf + 52) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr - 8;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void write_plt_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
|
|
||||||
// When the control is transferred to a PLT entry, the PLT entry's
|
|
||||||
// address is already set to %r12 by the caller.
|
|
||||||
i64 offset = ctx.plt->shdr.sh_addr - sym.get_plt_addr(ctx);
|
|
||||||
*(ul32 *)buf = 0x4b00'0000 | (offset & 0x00ff'ffff); // b plt0
|
|
||||||
}
|
|
||||||
|
|
||||||
// .plt.got is not necessary on PPC64 because range extension thunks
|
|
||||||
// directly read GOT entries and jump there.
|
|
||||||
template <>
|
|
||||||
void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void EhFrameSection<E>::apply_reloc(Context<E> &ctx, const ElfRel<E> &rel,
|
|
||||||
u64 offset, u64 val) {
|
|
||||||
u8 *loc = ctx.buf + this->shdr.sh_offset + offset;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_NONE:
|
|
||||||
break;
|
|
||||||
case R_PPC64_ADDR64:
|
|
||||||
*(ul64 *)loc = val;
|
|
||||||
break;
|
|
||||||
case R_PPC64_REL32:
|
|
||||||
*(ul32 *)loc = val - this->shdr.sh_addr - offset;
|
|
||||||
break;
|
|
||||||
case R_PPC64_REL64:
|
|
||||||
*(ul64 *)loc = val - this->shdr.sh_addr - offset;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static u64 get_local_entry_offset(Context<E> &ctx, Symbol<E> &sym) {
|
|
||||||
i64 val = sym.esym().ppc_local_entry;
|
|
||||||
assert(val <= 7);
|
|
||||||
if (val == 7)
|
|
||||||
Fatal(ctx) << sym << ": local entry offset 7 is reserved";
|
|
||||||
|
|
||||||
if (val == 0 || val == 1)
|
|
||||||
return 0;
|
|
||||||
return 1 << val;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
ElfRel<E> *dynrel = nullptr;
|
|
||||||
if (ctx.reldyn)
|
|
||||||
dynrel = (ElfRel<E> *)(ctx.buf + ctx.reldyn->shdr.sh_offset +
|
|
||||||
file.reldyn_offset + this->reldyn_offset);
|
|
||||||
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
u8 *loc = base + rel.r_offset;
|
|
||||||
|
|
||||||
u64 S = sym.get_addr(ctx);
|
|
||||||
u64 A = rel.r_addend;
|
|
||||||
u64 P = get_addr() + rel.r_offset;
|
|
||||||
u64 G = sym.get_got_idx(ctx) * sizeof(Word<E>);
|
|
||||||
u64 GOT = ctx.got->shdr.sh_addr;
|
|
||||||
u64 TOC = ctx.extra.TOC->value;
|
|
||||||
|
|
||||||
auto r2save_thunk_addr = [&] { return get_thunk_addr(i); };
|
|
||||||
auto no_r2save_thunk_addr = [&] { return get_thunk_addr(i) + 4; };
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_PPC64_ADDR64:
|
|
||||||
if (name() == ".toc")
|
|
||||||
apply_toc_rel(ctx, sym, rel, loc, S, A, P, dynrel);
|
|
||||||
else
|
|
||||||
apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel);
|
|
||||||
break;
|
|
||||||
case R_PPC64_TOC16_HA:
|
|
||||||
*(ul16 *)loc = ha(S + A - TOC);
|
|
||||||
break;
|
|
||||||
case R_PPC64_TOC16_LO:
|
|
||||||
*(ul16 *)loc = lo(S + A - TOC);
|
|
||||||
break;
|
|
||||||
case R_PPC64_TOC16_DS:
|
|
||||||
case R_PPC64_TOC16_LO_DS:
|
|
||||||
*(ul16 *)loc |= (S + A - TOC) & 0xfffc;
|
|
||||||
break;
|
|
||||||
case R_PPC64_REL24:
|
|
||||||
if (sym.has_plt(ctx) || !sym.esym().preserves_r2()) {
|
|
||||||
i64 val = r2save_thunk_addr() + A - P;
|
|
||||||
*(ul32 *)loc |= bits(val, 25, 2) << 2;
|
|
||||||
|
|
||||||
// The thunk saves %r2 to the caller's r2 save slot. We need to
|
|
||||||
// restore it after function return. To do so, there's usually a
|
|
||||||
// NOP as a placeholder after a BL. 0x6000'0000 is a NOP.
|
|
||||||
if (*(ul32 *)(loc + 4) == 0x6000'0000)
|
|
||||||
*(ul32 *)(loc + 4) = 0xe841'0018; // ld r2, 24(r1)
|
|
||||||
} else {
|
|
||||||
i64 val = S + get_local_entry_offset(ctx, sym) + A - P;
|
|
||||||
if (sign_extend(val, 25) != val)
|
|
||||||
val = no_r2save_thunk_addr() + A - P;
|
|
||||||
*(ul32 *)loc |= bits(val, 25, 2) << 2;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case R_PPC64_REL24_NOTOC:
|
|
||||||
if (sym.has_plt(ctx) || sym.esym().uses_toc()) {
|
|
||||||
i64 val = no_r2save_thunk_addr() + A - P;
|
|
||||||
*(ul32 *)loc |= bits(val, 25, 2) << 2;
|
|
||||||
} else {
|
|
||||||
i64 val = S + A - P;
|
|
||||||
if (sign_extend(val, 25) != val)
|
|
||||||
val = no_r2save_thunk_addr() + A - P;
|
|
||||||
*(ul32 *)loc |= bits(val, 25, 2) << 2;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case R_PPC64_REL32:
|
|
||||||
*(ul32 *)loc = S + A - P;
|
|
||||||
break;
|
|
||||||
case R_PPC64_REL64:
|
|
||||||
*(ul64 *)loc = S + A - P;
|
|
||||||
break;
|
|
||||||
case R_PPC64_REL16_HA:
|
|
||||||
*(ul16 *)loc = ha(S + A - P);
|
|
||||||
break;
|
|
||||||
case R_PPC64_REL16_LO:
|
|
||||||
*(ul16 *)loc = lo(S + A - P);
|
|
||||||
break;
|
|
||||||
case R_PPC64_PLT16_HA:
|
|
||||||
*(ul16 *)loc = ha(G + GOT - TOC);
|
|
||||||
break;
|
|
||||||
case R_PPC64_PLT16_HI:
|
|
||||||
*(ul16 *)loc = hi(G + GOT - TOC);
|
|
||||||
break;
|
|
||||||
case R_PPC64_PLT16_LO:
|
|
||||||
*(ul16 *)loc = lo(G + GOT - TOC);
|
|
||||||
break;
|
|
||||||
case R_PPC64_PLT16_LO_DS:
|
|
||||||
*(ul16 *)loc |= (G + GOT - TOC) & 0xfffc;
|
|
||||||
break;
|
|
||||||
case R_PPC64_PLT_PCREL34:
|
|
||||||
case R_PPC64_PLT_PCREL34_NOTOC:
|
|
||||||
case R_PPC64_GOT_PCREL34:
|
|
||||||
*(ul64 *)loc |= prefix34(G + GOT - P);
|
|
||||||
break;
|
|
||||||
case R_PPC64_PCREL34:
|
|
||||||
*(ul64 *)loc |= prefix34(S + A - P);
|
|
||||||
break;
|
|
||||||
case R_PPC64_GOT_TPREL16_HA:
|
|
||||||
*(ul16 *)loc = ha(sym.get_gottp_addr(ctx) - TOC);
|
|
||||||
break;
|
|
||||||
case R_PPC64_GOT_TPREL16_LO_DS:
|
|
||||||
*(ul16 *)loc |= (sym.get_gottp_addr(ctx) - TOC) & 0xfffc;
|
|
||||||
break;
|
|
||||||
case R_PPC64_GOT_TPREL_PCREL34:
|
|
||||||
*(ul64 *)loc |= prefix34(sym.get_gottp_addr(ctx) - P);
|
|
||||||
break;
|
|
||||||
case R_PPC64_GOT_TLSGD16_HA:
|
|
||||||
*(ul16 *)loc = ha(sym.get_tlsgd_addr(ctx) - TOC);
|
|
||||||
break;
|
|
||||||
case R_PPC64_GOT_TLSGD16_LO:
|
|
||||||
*(ul16 *)loc = lo(sym.get_tlsgd_addr(ctx) - TOC);
|
|
||||||
break;
|
|
||||||
case R_PPC64_GOT_TLSGD_PCREL34:
|
|
||||||
*(ul64 *)loc |= prefix34(sym.get_tlsgd_addr(ctx) - P);
|
|
||||||
break;
|
|
||||||
case R_PPC64_GOT_TLSLD16_HA:
|
|
||||||
*(ul16 *)loc = ha(ctx.got->get_tlsld_addr(ctx) - TOC);
|
|
||||||
break;
|
|
||||||
case R_PPC64_GOT_TLSLD16_LO:
|
|
||||||
*(ul16 *)loc = lo(ctx.got->get_tlsld_addr(ctx) - TOC);
|
|
||||||
break;
|
|
||||||
case R_PPC64_GOT_TLSLD_PCREL34:
|
|
||||||
*(ul64 *)loc |= prefix34(ctx.got->get_tlsld_addr(ctx) - P);
|
|
||||||
break;
|
|
||||||
case R_PPC64_DTPREL16_HA:
|
|
||||||
*(ul16 *)loc = ha(S + A - ctx.dtp_addr);
|
|
||||||
break;
|
|
||||||
case R_PPC64_DTPREL16_LO:
|
|
||||||
*(ul16 *)loc = lo(S + A - ctx.dtp_addr);
|
|
||||||
break;
|
|
||||||
case R_PPC64_DTPREL34:
|
|
||||||
*(ul64 *)loc |= prefix34(S + A - ctx.dtp_addr);
|
|
||||||
break;
|
|
||||||
case R_PPC64_TPREL16_HA:
|
|
||||||
*(ul16 *)loc = ha(S + A - ctx.tp_addr);
|
|
||||||
break;
|
|
||||||
case R_PPC64_TPREL16_LO:
|
|
||||||
*(ul16 *)loc = lo(S + A - ctx.tp_addr);
|
|
||||||
break;
|
|
||||||
case R_PPC64_PLTSEQ:
|
|
||||||
case R_PPC64_PLTSEQ_NOTOC:
|
|
||||||
case R_PPC64_PLTCALL:
|
|
||||||
case R_PPC64_PLTCALL_NOTOC:
|
|
||||||
case R_PPC64_TLS:
|
|
||||||
case R_PPC64_TLSGD:
|
|
||||||
case R_PPC64_TLSLD:
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
unreachable();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::apply_reloc_nonalloc(Context<E> &ctx, u8 *base) {
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
u8 *loc = base + rel.r_offset;
|
|
||||||
|
|
||||||
auto check = [&](i64 val, i64 lo, i64 hi) {
|
|
||||||
if (val < lo || hi <= val)
|
|
||||||
Error(ctx) << *this << ": relocation " << rel << " against "
|
|
||||||
<< sym << " out of range: " << val << " is not in ["
|
|
||||||
<< lo << ", " << hi << ")";
|
|
||||||
};
|
|
||||||
|
|
||||||
SectionFragment<E> *frag;
|
|
||||||
i64 frag_addend;
|
|
||||||
std::tie(frag, frag_addend) = get_fragment(ctx, rel);
|
|
||||||
|
|
||||||
u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx);
|
|
||||||
u64 A = frag ? frag_addend : (i64)rel.r_addend;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_PPC64_ADDR64:
|
|
||||||
if (std::optional<u64> val = get_tombstone(sym, frag))
|
|
||||||
*(ul64 *)loc = *val;
|
|
||||||
else
|
|
||||||
*(ul64 *)loc = S + A;
|
|
||||||
break;
|
|
||||||
case R_PPC64_ADDR32: {
|
|
||||||
i64 val = S + A;
|
|
||||||
check(val, 0, 1LL << 32);
|
|
||||||
*(ul32 *)loc = val;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_PPC64_DTPREL64:
|
|
||||||
*(ul64 *)loc = S + A - ctx.dtp_addr;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: "
|
|
||||||
<< rel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::scan_relocations(Context<E> &ctx) {
|
|
||||||
assert(shdr().sh_flags & SHF_ALLOC);
|
|
||||||
|
|
||||||
this->reldyn_offset = file.num_dynrel * sizeof(ElfRel<E>);
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
// Scan relocations
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
|
|
||||||
if (sym.is_ifunc())
|
|
||||||
sym.flags |= NEEDS_GOT | NEEDS_PLT;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_PPC64_ADDR64:
|
|
||||||
if (name() == ".toc")
|
|
||||||
scan_toc_rel(ctx, sym, rel);
|
|
||||||
else
|
|
||||||
scan_dyn_absrel(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_PPC64_GOT_TPREL16_HA:
|
|
||||||
case R_PPC64_GOT_TPREL_PCREL34:
|
|
||||||
sym.flags |= NEEDS_GOTTP;
|
|
||||||
break;
|
|
||||||
case R_PPC64_REL24:
|
|
||||||
if (sym.is_imported)
|
|
||||||
sym.flags |= NEEDS_PLT;
|
|
||||||
break;
|
|
||||||
case R_PPC64_REL24_NOTOC:
|
|
||||||
if (sym.is_imported)
|
|
||||||
sym.flags |= NEEDS_PLT;
|
|
||||||
ctx.extra.is_power10 = true;
|
|
||||||
break;
|
|
||||||
case R_PPC64_PLT16_HA:
|
|
||||||
case R_PPC64_PLT_PCREL34:
|
|
||||||
case R_PPC64_PLT_PCREL34_NOTOC:
|
|
||||||
case R_PPC64_GOT_PCREL34:
|
|
||||||
sym.flags |= NEEDS_GOT;
|
|
||||||
break;
|
|
||||||
case R_PPC64_GOT_TLSGD16_HA:
|
|
||||||
case R_PPC64_GOT_TLSGD_PCREL34:
|
|
||||||
sym.flags |= NEEDS_TLSGD;
|
|
||||||
break;
|
|
||||||
case R_PPC64_GOT_TLSLD16_HA:
|
|
||||||
case R_PPC64_GOT_TLSLD_PCREL34:
|
|
||||||
ctx.needs_tlsld = true;
|
|
||||||
break;
|
|
||||||
case R_PPC64_TPREL16_HA:
|
|
||||||
case R_PPC64_TPREL16_LO:
|
|
||||||
check_tlsle(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_PPC64_REL32:
|
|
||||||
case R_PPC64_REL64:
|
|
||||||
case R_PPC64_TOC16_HA:
|
|
||||||
case R_PPC64_TOC16_LO:
|
|
||||||
case R_PPC64_TOC16_LO_DS:
|
|
||||||
case R_PPC64_TOC16_DS:
|
|
||||||
case R_PPC64_REL16_HA:
|
|
||||||
case R_PPC64_REL16_LO:
|
|
||||||
case R_PPC64_PLT16_HI:
|
|
||||||
case R_PPC64_PLT16_LO:
|
|
||||||
case R_PPC64_PLT16_LO_DS:
|
|
||||||
case R_PPC64_PCREL34:
|
|
||||||
case R_PPC64_PLTSEQ:
|
|
||||||
case R_PPC64_PLTSEQ_NOTOC:
|
|
||||||
case R_PPC64_PLTCALL:
|
|
||||||
case R_PPC64_PLTCALL_NOTOC:
|
|
||||||
case R_PPC64_GOT_TPREL16_LO_DS:
|
|
||||||
case R_PPC64_GOT_TLSGD16_LO:
|
|
||||||
case R_PPC64_GOT_TLSLD16_LO:
|
|
||||||
case R_PPC64_TLS:
|
|
||||||
case R_PPC64_TLSGD:
|
|
||||||
case R_PPC64_TLSLD:
|
|
||||||
case R_PPC64_DTPREL16_HA:
|
|
||||||
case R_PPC64_DTPREL16_LO:
|
|
||||||
case R_PPC64_DTPREL34:
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Error(ctx) << *this << ": unknown relocation: " << rel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void RangeExtensionThunk<E>::copy_buf(Context<E> &ctx) {
|
|
||||||
u8 *buf = ctx.buf + output_section.shdr.sh_offset + offset;
|
|
||||||
|
|
||||||
// If the destination is PLT, we read an address from .got.plt or .got
|
|
||||||
// and jump there.
|
|
||||||
static const ul32 plt_thunk[] = {
|
|
||||||
0xf841'0018, // std r2, 24(r1)
|
|
||||||
0x3d82'0000, // addis r12, r2, foo@gotplt@toc@ha
|
|
||||||
0xe98c'0000, // ld r12, foo@gotplt@toc@lo(r12)
|
|
||||||
0x7d89'03a6, // mtctr r12
|
|
||||||
0x4e80'0420, // bctr
|
|
||||||
};
|
|
||||||
|
|
||||||
static const ul32 plt_thunk_power10[] = {
|
|
||||||
0xf841'0018, // std r2, 24(r1)
|
|
||||||
0x0410'0000, // pld r12, foo@gotplt@pcrel
|
|
||||||
0xe580'0000,
|
|
||||||
0x7d89'03a6, // mtctr r12
|
|
||||||
0x4e80'0420, // bctr
|
|
||||||
};
|
|
||||||
|
|
||||||
// If the destination is a non-imported function, we directly jump
|
|
||||||
// to its local entry point.
|
|
||||||
static const ul32 local_thunk[] = {
|
|
||||||
0xf841'0018, // std r2, 24(r1)
|
|
||||||
0x3d82'0000, // addis r12, r2, foo@toc@ha
|
|
||||||
0x398c'0000, // addi r12, r12, foo@toc@lo
|
|
||||||
0x7d89'03a6, // mtctr r12
|
|
||||||
0x4e80'0420, // bctr
|
|
||||||
};
|
|
||||||
|
|
||||||
static const ul32 local_thunk_power10[] = {
|
|
||||||
0xf841'0018, // std r2, 24(r1)
|
|
||||||
0x0610'0000, // pla r12, foo@pcrel
|
|
||||||
0x3980'0000,
|
|
||||||
0x7d89'03a6, // mtctr r12
|
|
||||||
0x4e80'0420, // bctr
|
|
||||||
};
|
|
||||||
|
|
||||||
static_assert(E::thunk_size == sizeof(plt_thunk));
|
|
||||||
static_assert(E::thunk_size == sizeof(plt_thunk_power10));
|
|
||||||
static_assert(E::thunk_size == sizeof(local_thunk));
|
|
||||||
static_assert(E::thunk_size == sizeof(local_thunk_power10));
|
|
||||||
|
|
||||||
for (i64 i = 0; i < symbols.size(); i++) {
|
|
||||||
Symbol<E> &sym = *symbols[i];
|
|
||||||
ul32 *loc = (ul32 *)(buf + i * E::thunk_size);
|
|
||||||
|
|
||||||
if (sym.has_plt(ctx)) {
|
|
||||||
u64 got = sym.has_got(ctx) ? sym.get_got_addr(ctx) : sym.get_gotplt_addr(ctx);
|
|
||||||
|
|
||||||
if (ctx.extra.is_power10) {
|
|
||||||
memcpy(loc, plt_thunk_power10, E::thunk_size);
|
|
||||||
*(ul64 *)(loc + 1) |= prefix34(got - get_addr(i) - 4);
|
|
||||||
} else {
|
|
||||||
i64 val = got - ctx.extra.TOC->value;
|
|
||||||
memcpy(loc, plt_thunk, E::thunk_size);
|
|
||||||
loc[1] |= higha(val);
|
|
||||||
loc[2] |= lo(val);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (ctx.extra.is_power10) {
|
|
||||||
memcpy(loc, local_thunk_power10, E::thunk_size);
|
|
||||||
*(ul64 *)(loc + 1) |= prefix34(sym.get_addr(ctx) - get_addr(i) - 4);
|
|
||||||
} else {
|
|
||||||
i64 val = sym.get_addr(ctx) - ctx.extra.TOC->value;
|
|
||||||
memcpy(loc, local_thunk, E::thunk_size);
|
|
||||||
loc[1] |= higha(val);
|
|
||||||
loc[2] |= lo(val);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace mold::elf
|
|
938
third_party/mold/elf/arch-riscv.cc
vendored
938
third_party/mold/elf/arch-riscv.cc
vendored
|
@ -1,938 +0,0 @@
|
||||||
// clang-format off
|
|
||||||
// RISC-V is a clean RISC ISA. It supports PC-relative load/store for
|
|
||||||
// position-independent code. Its 32-bit and 64-bit ISAs are almost
|
|
||||||
// identical. That is, you can think RV32 as a RV64 without 64-bit
|
|
||||||
// operations. In this file, we support both RV64 and RV32.
|
|
||||||
//
|
|
||||||
// RISC-V is essentially little-endian, but the big-endian version is
|
|
||||||
// available as an extension. GCC supports `-mbig-endian` to generate
|
|
||||||
// big-endian code. Even in big-endian mode, machine instructions are
|
|
||||||
// defined to be encoded in little-endian, though. Only the behavior of
|
|
||||||
// load/store instructions are different between LE RISC-V and BE RISC-V.
|
|
||||||
//
|
|
||||||
// From the linker's point of view, the RISC-V's psABI is unique because
|
|
||||||
// sections in input object files can be shrunk while being copied to the
|
|
||||||
// output file. That is contrary to other psABIs in which sections are an
|
|
||||||
// atomic unit of copying. Let me explain it in more details.
|
|
||||||
//
|
|
||||||
// Since RISC-V instructions are 16-bit or 32-bit long, there's no way to
|
|
||||||
// embed a very large immediate into a branch instruction. In fact, JAL
|
|
||||||
// (jump and link) instruction can jump to only within PC ± 1 MiB because
|
|
||||||
// its immediate is only 21 bits long. If the destination is out of its
|
|
||||||
// reach, we need to use two instructions instead; the first instruction
|
|
||||||
// being AUIPC which sets upper 20 bits to a register and the second being
|
|
||||||
// JALR with a 12-bit immediate and the register. Combined, they specify a
|
|
||||||
// 32 bits displacement.
|
|
||||||
//
|
|
||||||
// Other RISC ISAs have the same limitation, and they solved the problem by
|
|
||||||
// letting the linker create so-called "range extension thunks". It works as
|
|
||||||
// follows: the compiler optimistically emits single jump instructions for
|
|
||||||
// function calls. If the linker finds that a branch target is out of reach,
|
|
||||||
// it emits a small piece of machine code near the branch instruction and
|
|
||||||
// redirect the branch to the linker-synthesized code. The code constructs a
|
|
||||||
// full 32-bit address in a register and jump to the destination. That
|
|
||||||
// linker-synthesized code is called "range extension thunks" or just
|
|
||||||
// "thunks".
|
|
||||||
//
|
|
||||||
// The RISC-V psABI is unique that it works the other way around. That is,
|
|
||||||
// for RISC-V, the compiler always emits two instructions (AUIPC + JAL) for
|
|
||||||
// function calls. If the linker finds the destination is reachable with a
|
|
||||||
// single instruction, it replaces the two instructions with the one and
|
|
||||||
// shrink the section size by one instruction length, instead of filling the
|
|
||||||
// gap with a nop.
|
|
||||||
//
|
|
||||||
// With the presence of this relaxation, sections can no longer be
|
|
||||||
// considered as an atomic unit. If we delete 4 bytes from the middle of a
|
|
||||||
// section, all contents after that point needs to be shifted by 4. Symbol
|
|
||||||
// values and relocation offsets have to be adjusted accordingly if they
|
|
||||||
// refer to past the deleted bytes.
|
|
||||||
//
|
|
||||||
// In mold, we use `r_deltas` to memorize how many bytes have be adjusted
|
|
||||||
// for relocations. For symbols, we directly mutate their `value` member.
|
|
||||||
//
|
|
||||||
// RISC-V object files tend to have way more relocations than those for
|
|
||||||
// other targets. This is because all branches, including ones that jump
|
|
||||||
// within the same section, are explicitly expressed with relocations.
|
|
||||||
// Here is why we need them: all control-flow statements such as `if` or
|
|
||||||
// `for` are implemented using branch instructions. For other targets, the
|
|
||||||
// compiler doesn't emit relocations for such branches because they know
|
|
||||||
// at compile-time exactly how many bytes has to be skipped. That's not
|
|
||||||
// true to RISC-V because the linker may delete bytes between a branch and
|
|
||||||
// its destination. Therefore, all branches including in-section ones have
|
|
||||||
// to be explicitly expressed with relocations.
|
|
||||||
//
|
|
||||||
// Note that this mechanism only shrink sections and never enlarge, as
|
|
||||||
// the compiler always emits the longest instruction sequence. This
|
|
||||||
// makes the linker implementation a bit simpler because we don't need
|
|
||||||
// to worry about oscillation.
|
|
||||||
//
|
|
||||||
// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc
|
|
||||||
|
|
||||||
#include "third_party/mold/elf/mold.h"
|
|
||||||
|
|
||||||
// MISSING #include <tbb/parallel_for.h>
|
|
||||||
// MISSING #include <tbb/parallel_for_each.h>
|
|
||||||
|
|
||||||
namespace mold::elf {
|
|
||||||
|
|
||||||
static void write_itype(u8 *loc, u32 val) {
|
|
||||||
*(ul32 *)loc &= 0b000000'00000'11111'111'11111'1111111;
|
|
||||||
*(ul32 *)loc |= bits(val, 11, 0) << 20;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void write_stype(u8 *loc, u32 val) {
|
|
||||||
*(ul32 *)loc &= 0b000000'11111'11111'111'00000'1111111;
|
|
||||||
*(ul32 *)loc |= bits(val, 11, 5) << 25 | bits(val, 4, 0) << 7;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void write_btype(u8 *loc, u32 val) {
|
|
||||||
*(ul32 *)loc &= 0b000000'11111'11111'111'00000'1111111;
|
|
||||||
*(ul32 *)loc |= bit(val, 12) << 31 | bits(val, 10, 5) << 25 |
|
|
||||||
bits(val, 4, 1) << 8 | bit(val, 11) << 7;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void write_utype(u8 *loc, u32 val) {
|
|
||||||
*(ul32 *)loc &= 0b000000'00000'00000'000'11111'1111111;
|
|
||||||
|
|
||||||
// U-type instructions are used in combination with I-type
|
|
||||||
// instructions. U-type insn sets an immediate to the upper 20-bits
|
|
||||||
// of a register. I-type insn sign-extends a 12-bits immediate and
|
|
||||||
// adds it to a register value to construct a complete value. 0x800
|
|
||||||
// is added here to compensate for the sign-extension.
|
|
||||||
*(ul32 *)loc |= (val + 0x800) & 0xffff'f000;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void write_jtype(u8 *loc, u32 val) {
|
|
||||||
*(ul32 *)loc &= 0b000000'00000'00000'000'11111'1111111;
|
|
||||||
*(ul32 *)loc |= bit(val, 20) << 31 | bits(val, 10, 1) << 21 |
|
|
||||||
bit(val, 11) << 20 | bits(val, 19, 12) << 12;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void write_cbtype(u8 *loc, u32 val) {
|
|
||||||
*(ul16 *)loc &= 0b111'000'111'00000'11;
|
|
||||||
*(ul16 *)loc |= bit(val, 8) << 12 | bit(val, 4) << 11 | bit(val, 3) << 10 |
|
|
||||||
bit(val, 7) << 6 | bit(val, 6) << 5 | bit(val, 2) << 4 |
|
|
||||||
bit(val, 1) << 3 | bit(val, 5) << 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void write_cjtype(u8 *loc, u32 val) {
|
|
||||||
*(ul16 *)loc &= 0b111'00000000000'11;
|
|
||||||
*(ul16 *)loc |= bit(val, 11) << 12 | bit(val, 4) << 11 | bit(val, 9) << 10 |
|
|
||||||
bit(val, 8) << 9 | bit(val, 10) << 8 | bit(val, 6) << 7 |
|
|
||||||
bit(val, 7) << 6 | bit(val, 3) << 5 | bit(val, 2) << 4 |
|
|
||||||
bit(val, 1) << 3 | bit(val, 5) << 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void overwrite_uleb(u8 *loc, u64 val) {
|
|
||||||
while (*loc & 0b1000'0000) {
|
|
||||||
*loc++ = 0b1000'0000 | (val & 0b0111'1111);
|
|
||||||
val >>= 7;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Returns the rd register of an R/I/U/J-type instruction.
|
|
||||||
static u32 get_rd(u32 val) {
|
|
||||||
return bits(val, 11, 7);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void set_rs1(u8 *loc, u32 rs1) {
|
|
||||||
assert(rs1 < 32);
|
|
||||||
*(ul32 *)loc &= 0b111111'11111'00000'111'11111'1111111;
|
|
||||||
*(ul32 *)loc |= rs1 << 15;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename E>
|
|
||||||
void write_plt_header(Context<E> &ctx, u8 *buf) {
|
|
||||||
static const ul32 insn_64[] = {
|
|
||||||
0x0000'0397, // auipc t2, %pcrel_hi(.got.plt)
|
|
||||||
0x41c3'0333, // sub t1, t1, t3 # .plt entry + hdr + 12
|
|
||||||
0x0003'be03, // ld t3, %pcrel_lo(1b)(t2) # _dl_runtime_resolve
|
|
||||||
0xfd43'0313, // addi t1, t1, -44 # .plt entry
|
|
||||||
0x0003'8293, // addi t0, t2, %pcrel_lo(1b) # &.got.plt
|
|
||||||
0x0013'5313, // srli t1, t1, 1 # .plt entry offset
|
|
||||||
0x0082'b283, // ld t0, 8(t0) # link map
|
|
||||||
0x000e'0067, // jr t3
|
|
||||||
};
|
|
||||||
|
|
||||||
static const ul32 insn_32[] = {
|
|
||||||
0x0000'0397, // auipc t2, %pcrel_hi(.got.plt)
|
|
||||||
0x41c3'0333, // sub t1, t1, t3 # .plt entry + hdr + 12
|
|
||||||
0x0003'ae03, // lw t3, %pcrel_lo(1b)(t2) # _dl_runtime_resolve
|
|
||||||
0xfd43'0313, // addi t1, t1, -44 # .plt entry
|
|
||||||
0x0003'8293, // addi t0, t2, %pcrel_lo(1b) # &.got.plt
|
|
||||||
0x0023'5313, // srli t1, t1, 2 # .plt entry offset
|
|
||||||
0x0042'a283, // lw t0, 4(t0) # link map
|
|
||||||
0x000e'0067, // jr t3
|
|
||||||
};
|
|
||||||
|
|
||||||
if constexpr (E::is_64)
|
|
||||||
memcpy(buf, insn_64, sizeof(insn_64));
|
|
||||||
else
|
|
||||||
memcpy(buf, insn_32, sizeof(insn_32));
|
|
||||||
|
|
||||||
u64 gotplt = ctx.gotplt->shdr.sh_addr;
|
|
||||||
u64 plt = ctx.plt->shdr.sh_addr;
|
|
||||||
write_utype(buf, gotplt - plt);
|
|
||||||
write_itype(buf + 8, gotplt - plt);
|
|
||||||
write_itype(buf + 16, gotplt - plt);
|
|
||||||
}
|
|
||||||
|
|
||||||
static const ul32 plt_entry_64[] = {
|
|
||||||
0x0000'0e17, // auipc t3, %pcrel_hi(function@.got.plt)
|
|
||||||
0x000e'3e03, // ld t3, %pcrel_lo(1b)(t3)
|
|
||||||
0x000e'0367, // jalr t1, t3
|
|
||||||
0x0000'0013, // nop
|
|
||||||
};
|
|
||||||
|
|
||||||
static const ul32 plt_entry_32[] = {
|
|
||||||
0x0000'0e17, // auipc t3, %pcrel_hi(function@.got.plt)
|
|
||||||
0x000e'2e03, // lw t3, %pcrel_lo(1b)(t3)
|
|
||||||
0x000e'0367, // jalr t1, t3
|
|
||||||
0x0000'0013, // nop
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename E>
|
|
||||||
void write_plt_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
|
|
||||||
if constexpr (E::is_64)
|
|
||||||
memcpy(buf, plt_entry_64, sizeof(plt_entry_64));
|
|
||||||
else
|
|
||||||
memcpy(buf, plt_entry_32, sizeof(plt_entry_32));
|
|
||||||
|
|
||||||
u64 gotplt = sym.get_gotplt_addr(ctx);
|
|
||||||
u64 plt = sym.get_plt_addr(ctx);
|
|
||||||
write_utype(buf, gotplt - plt);
|
|
||||||
write_itype(buf + 4, gotplt - plt);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename E>
|
|
||||||
void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
|
|
||||||
if constexpr (E::is_64)
|
|
||||||
memcpy(buf, plt_entry_64, sizeof(plt_entry_64));
|
|
||||||
else
|
|
||||||
memcpy(buf, plt_entry_32, sizeof(plt_entry_32));
|
|
||||||
|
|
||||||
u64 got = sym.get_got_addr(ctx);
|
|
||||||
u64 plt = sym.get_plt_addr(ctx);
|
|
||||||
write_utype(buf, got - plt);
|
|
||||||
write_itype(buf + 4, got - plt);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename E>
|
|
||||||
void EhFrameSection<E>::apply_reloc(Context<E> &ctx, const ElfRel<E> &rel,
|
|
||||||
u64 offset, u64 val) {
|
|
||||||
u8 *loc = ctx.buf + this->shdr.sh_offset + offset;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_NONE:
|
|
||||||
break;
|
|
||||||
case R_RISCV_ADD32:
|
|
||||||
*(U32<E> *)loc += val;
|
|
||||||
break;
|
|
||||||
case R_RISCV_SUB8:
|
|
||||||
*loc -= val;
|
|
||||||
break;
|
|
||||||
case R_RISCV_SUB16:
|
|
||||||
*(U16<E> *)loc -= val;
|
|
||||||
break;
|
|
||||||
case R_RISCV_SUB32:
|
|
||||||
*(U32<E> *)loc -= val;
|
|
||||||
break;
|
|
||||||
case R_RISCV_SUB6:
|
|
||||||
*loc = (*loc & 0b1100'0000) | ((*loc - val) & 0b0011'1111);
|
|
||||||
break;
|
|
||||||
case R_RISCV_SET6:
|
|
||||||
*loc = (*loc & 0b1100'0000) | (val & 0b0011'1111);
|
|
||||||
break;
|
|
||||||
case R_RISCV_SET8:
|
|
||||||
*loc = val;
|
|
||||||
break;
|
|
||||||
case R_RISCV_SET16:
|
|
||||||
*(U16<E> *)loc = val;
|
|
||||||
break;
|
|
||||||
case R_RISCV_SET32:
|
|
||||||
*(U32<E> *)loc = val;
|
|
||||||
break;
|
|
||||||
case R_RISCV_32_PCREL:
|
|
||||||
*(U32<E> *)loc = val - this->shdr.sh_addr - offset;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename E>
|
|
||||||
void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
ElfRel<E> *dynrel = nullptr;
|
|
||||||
if (ctx.reldyn)
|
|
||||||
dynrel = (ElfRel<E> *)(ctx.buf + ctx.reldyn->shdr.sh_offset +
|
|
||||||
file.reldyn_offset + this->reldyn_offset);
|
|
||||||
|
|
||||||
auto get_r_delta = [&](i64 idx) {
|
|
||||||
return extra.r_deltas.empty() ? 0 : extra.r_deltas[idx];
|
|
||||||
};
|
|
||||||
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE || rel.r_type == R_RISCV_RELAX)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
i64 r_offset = rel.r_offset - get_r_delta(i);
|
|
||||||
i64 removed_bytes = get_r_delta(i + 1) - get_r_delta(i);
|
|
||||||
u8 *loc = base + r_offset;
|
|
||||||
|
|
||||||
auto check = [&](i64 val, i64 lo, i64 hi) {
|
|
||||||
if (val < lo || hi <= val)
|
|
||||||
Error(ctx) << *this << ": relocation " << rel << " against "
|
|
||||||
<< sym << " out of range: " << val << " is not in ["
|
|
||||||
<< lo << ", " << hi << ")";
|
|
||||||
};
|
|
||||||
|
|
||||||
auto find_paired_reloc = [&] {
|
|
||||||
Symbol<E> &sym = *file.symbols[rels[i].r_sym];
|
|
||||||
assert(sym.get_input_section() == this);
|
|
||||||
|
|
||||||
if (sym.value < r_offset) {
|
|
||||||
for (i64 j = i - 1; j >= 0; j--)
|
|
||||||
if (u32 ty = rels[j].r_type;
|
|
||||||
ty == R_RISCV_GOT_HI20 || ty == R_RISCV_TLS_GOT_HI20 ||
|
|
||||||
ty == R_RISCV_TLS_GD_HI20 || ty == R_RISCV_PCREL_HI20)
|
|
||||||
if (sym.value == rels[j].r_offset - get_r_delta(j))
|
|
||||||
return j;
|
|
||||||
} else {
|
|
||||||
for (i64 j = i + 1; j < rels.size(); j++)
|
|
||||||
if (u32 ty = rels[j].r_type;
|
|
||||||
ty == R_RISCV_GOT_HI20 || ty == R_RISCV_TLS_GOT_HI20 ||
|
|
||||||
ty == R_RISCV_TLS_GD_HI20 || ty == R_RISCV_PCREL_HI20)
|
|
||||||
if (sym.value == rels[j].r_offset - get_r_delta(j))
|
|
||||||
return j;
|
|
||||||
}
|
|
||||||
|
|
||||||
Fatal(ctx) << *this << ": paired relocation is missing: " << i;
|
|
||||||
};
|
|
||||||
|
|
||||||
u64 S = sym.get_addr(ctx);
|
|
||||||
u64 A = rel.r_addend;
|
|
||||||
u64 P = get_addr() + r_offset;
|
|
||||||
u64 G = sym.get_got_idx(ctx) * sizeof(Word<E>);
|
|
||||||
u64 GOT = ctx.got->shdr.sh_addr;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_RISCV_32:
|
|
||||||
if constexpr (E::is_64)
|
|
||||||
*(U32<E> *)loc = S + A;
|
|
||||||
else
|
|
||||||
apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel);
|
|
||||||
break;
|
|
||||||
case R_RISCV_64:
|
|
||||||
assert(E::is_64);
|
|
||||||
apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel);
|
|
||||||
break;
|
|
||||||
case R_RISCV_BRANCH:
|
|
||||||
check(S + A - P, -(1 << 12), 1 << 12);
|
|
||||||
write_btype(loc, S + A - P);
|
|
||||||
break;
|
|
||||||
case R_RISCV_JAL:
|
|
||||||
check(S + A - P, -(1 << 20), 1 << 20);
|
|
||||||
write_jtype(loc, S + A - P);
|
|
||||||
break;
|
|
||||||
case R_RISCV_CALL:
|
|
||||||
case R_RISCV_CALL_PLT: {
|
|
||||||
u32 rd = get_rd(*(ul32 *)(contents.data() + rel.r_offset + 4));
|
|
||||||
|
|
||||||
if (removed_bytes == 4) {
|
|
||||||
// auipc + jalr -> jal
|
|
||||||
*(ul32 *)loc = (rd << 7) | 0b1101111;
|
|
||||||
write_jtype(loc, S + A - P);
|
|
||||||
} else if (removed_bytes == 6 && rd == 0) {
|
|
||||||
// auipc + jalr -> c.j
|
|
||||||
*(ul16 *)loc = 0b101'00000000000'01;
|
|
||||||
write_cjtype(loc, S + A - P);
|
|
||||||
} else if (removed_bytes == 6 && rd == 1) {
|
|
||||||
// auipc + jalr -> c.jal
|
|
||||||
assert(!E::is_64);
|
|
||||||
*(ul16 *)loc = 0b001'00000000000'01;
|
|
||||||
write_cjtype(loc, S + A - P);
|
|
||||||
} else {
|
|
||||||
assert(removed_bytes == 0);
|
|
||||||
// Calling an undefined weak symbol does not make sense.
|
|
||||||
// We make such call into an infinite loop. This should
|
|
||||||
// help debugging of a faulty program.
|
|
||||||
u64 val = sym.esym().is_undef_weak() ? 0 : S + A - P;
|
|
||||||
check(val, -(1LL << 31), 1LL << 31);
|
|
||||||
write_utype(loc, val);
|
|
||||||
write_itype(loc + 4, val);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_RISCV_GOT_HI20:
|
|
||||||
write_utype(loc, G + GOT + A - P);
|
|
||||||
break;
|
|
||||||
case R_RISCV_TLS_GOT_HI20:
|
|
||||||
write_utype(loc, sym.get_gottp_addr(ctx) + A - P);
|
|
||||||
break;
|
|
||||||
case R_RISCV_TLS_GD_HI20:
|
|
||||||
write_utype(loc, sym.get_tlsgd_addr(ctx) + A - P);
|
|
||||||
break;
|
|
||||||
case R_RISCV_PCREL_HI20:
|
|
||||||
write_utype(loc, S + A - P);
|
|
||||||
break;
|
|
||||||
case R_RISCV_PCREL_LO12_I:
|
|
||||||
case R_RISCV_PCREL_LO12_S: {
|
|
||||||
i64 idx2 = find_paired_reloc();
|
|
||||||
const ElfRel<E> &rel2 = rels[idx2];
|
|
||||||
Symbol<E> &sym2 = *file.symbols[rel2.r_sym];
|
|
||||||
|
|
||||||
u64 S = sym2.get_addr(ctx);
|
|
||||||
u64 A = rel2.r_addend;
|
|
||||||
u64 P = get_addr() + rel2.r_offset - get_r_delta(idx2);
|
|
||||||
u64 G = sym2.get_got_idx(ctx) * sizeof(Word<E>);
|
|
||||||
u64 val;
|
|
||||||
|
|
||||||
switch (rel2.r_type) {
|
|
||||||
case R_RISCV_GOT_HI20:
|
|
||||||
val = G + GOT + A - P;
|
|
||||||
break;
|
|
||||||
case R_RISCV_TLS_GOT_HI20:
|
|
||||||
val = sym2.get_gottp_addr(ctx) + A - P;
|
|
||||||
break;
|
|
||||||
case R_RISCV_TLS_GD_HI20:
|
|
||||||
val = sym2.get_tlsgd_addr(ctx) + A - P;
|
|
||||||
break;
|
|
||||||
case R_RISCV_PCREL_HI20:
|
|
||||||
val = S + A - P;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
unreachable();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rel.r_type == R_RISCV_PCREL_LO12_I)
|
|
||||||
write_itype(loc, val);
|
|
||||||
else
|
|
||||||
write_stype(loc, val);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_RISCV_HI20:
|
|
||||||
assert(removed_bytes == 0 || removed_bytes == 4);
|
|
||||||
if (removed_bytes == 0) {
|
|
||||||
check(S + A, -(1LL << 31), 1LL << 31);
|
|
||||||
write_utype(loc, S + A);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case R_RISCV_LO12_I:
|
|
||||||
case R_RISCV_LO12_S:
|
|
||||||
if (rel.r_type == R_RISCV_LO12_I)
|
|
||||||
write_itype(loc, S + A);
|
|
||||||
else
|
|
||||||
write_stype(loc, S + A);
|
|
||||||
|
|
||||||
// Rewrite `lw t1, 0(t0)` with `lw t1, 0(x0)` if the address is
|
|
||||||
// accessible relative to the zero register. If the upper 20 bits
|
|
||||||
// are all zero, the corresponding LUI might have been removed.
|
|
||||||
if (bits(S + A, 31, 12) == 0)
|
|
||||||
set_rs1(loc, 0);
|
|
||||||
break;
|
|
||||||
case R_RISCV_TPREL_HI20:
|
|
||||||
assert(removed_bytes == 0 || removed_bytes == 4);
|
|
||||||
if (removed_bytes == 0)
|
|
||||||
write_utype(loc, S + A - ctx.tp_addr);
|
|
||||||
break;
|
|
||||||
case R_RISCV_TPREL_ADD:
|
|
||||||
// This relocation just annotates an ADD instruction that can be
|
|
||||||
// removed when a TPREL is relaxed. No value is needed to be
|
|
||||||
// written.
|
|
||||||
assert(removed_bytes == 0 || removed_bytes == 4);
|
|
||||||
break;
|
|
||||||
case R_RISCV_TPREL_LO12_I:
|
|
||||||
case R_RISCV_TPREL_LO12_S: {
|
|
||||||
i64 val = S + A - ctx.tp_addr;
|
|
||||||
if (rel.r_type == R_RISCV_TPREL_LO12_I)
|
|
||||||
write_itype(loc, val);
|
|
||||||
else
|
|
||||||
write_stype(loc, val);
|
|
||||||
|
|
||||||
// Rewrite `lw t1, 0(t0)` with `lw t1, 0(tp)` if the address is
|
|
||||||
// directly accessible using tp. tp is x4.
|
|
||||||
if (sign_extend(val, 11) == val)
|
|
||||||
set_rs1(loc, 4);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_RISCV_ADD8:
|
|
||||||
loc += S + A;
|
|
||||||
break;
|
|
||||||
case R_RISCV_ADD16:
|
|
||||||
*(U16<E> *)loc += S + A;
|
|
||||||
break;
|
|
||||||
case R_RISCV_ADD32:
|
|
||||||
*(U32<E> *)loc += S + A;
|
|
||||||
break;
|
|
||||||
case R_RISCV_ADD64:
|
|
||||||
*(U64<E> *)loc += S + A;
|
|
||||||
break;
|
|
||||||
case R_RISCV_SUB8:
|
|
||||||
loc -= S + A;
|
|
||||||
break;
|
|
||||||
case R_RISCV_SUB16:
|
|
||||||
*(U16<E> *)loc -= S + A;
|
|
||||||
break;
|
|
||||||
case R_RISCV_SUB32:
|
|
||||||
*(U32<E> *)loc -= S + A;
|
|
||||||
break;
|
|
||||||
case R_RISCV_SUB64:
|
|
||||||
*(U64<E> *)loc -= S + A;
|
|
||||||
break;
|
|
||||||
case R_RISCV_ALIGN: {
|
|
||||||
// A R_RISCV_ALIGN is followed by a NOP sequence. We need to remove
|
|
||||||
// zero or more bytes so that the instruction after R_RISCV_ALIGN is
|
|
||||||
// aligned to a given alignment boundary.
|
|
||||||
//
|
|
||||||
// We need to guarantee that the NOP sequence is valid after byte
|
|
||||||
// removal (e.g. we can't remove the first 2 bytes of a 4-byte NOP).
|
|
||||||
// For the sake of simplicity, we always rewrite the entire NOP sequence.
|
|
||||||
i64 padding_bytes = rel.r_addend - removed_bytes;
|
|
||||||
assert((padding_bytes & 1) == 0);
|
|
||||||
|
|
||||||
i64 i = 0;
|
|
||||||
for (; i <= padding_bytes - 4; i += 4)
|
|
||||||
*(ul32 *)(loc + i) = 0x0000'0013; // nop
|
|
||||||
if (i < padding_bytes)
|
|
||||||
*(ul16 *)(loc + i) = 0x0001; // c.nop
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_RISCV_RVC_BRANCH:
|
|
||||||
check(S + A - P, -(1 << 8), 1 << 8);
|
|
||||||
write_cbtype(loc, S + A - P);
|
|
||||||
break;
|
|
||||||
case R_RISCV_RVC_JUMP:
|
|
||||||
check(S + A - P, -(1 << 11), 1 << 11);
|
|
||||||
write_cjtype(loc, S + A - P);
|
|
||||||
break;
|
|
||||||
case R_RISCV_SUB6:
|
|
||||||
*loc = (*loc & 0b1100'0000) | ((*loc - (S + A)) & 0b0011'1111);
|
|
||||||
break;
|
|
||||||
case R_RISCV_SET6:
|
|
||||||
*loc = (*loc & 0b1100'0000) | ((S + A) & 0b0011'1111);
|
|
||||||
break;
|
|
||||||
case R_RISCV_SET8:
|
|
||||||
*loc = S + A;
|
|
||||||
break;
|
|
||||||
case R_RISCV_SET16:
|
|
||||||
*(U16<E> *)loc = S + A;
|
|
||||||
break;
|
|
||||||
case R_RISCV_SET32:
|
|
||||||
*(U32<E> *)loc = S + A;
|
|
||||||
break;
|
|
||||||
case R_RISCV_PLT32:
|
|
||||||
case R_RISCV_32_PCREL:
|
|
||||||
*(U32<E> *)loc = S + A - P;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
unreachable();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename E>
|
|
||||||
void InputSection<E>::apply_reloc_nonalloc(Context<E> &ctx, u8 *base) {
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
u8 *loc = base + rel.r_offset;
|
|
||||||
|
|
||||||
SectionFragment<E> *frag;
|
|
||||||
i64 frag_addend;
|
|
||||||
std::tie(frag, frag_addend) = get_fragment(ctx, rel);
|
|
||||||
|
|
||||||
u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx);
|
|
||||||
u64 A = frag ? frag_addend : (i64)rel.r_addend;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_RISCV_32:
|
|
||||||
*(U32<E> *)loc = S + A;
|
|
||||||
break;
|
|
||||||
case R_RISCV_64:
|
|
||||||
if (std::optional<u64> val = get_tombstone(sym, frag))
|
|
||||||
*(U64<E> *)loc = *val;
|
|
||||||
else
|
|
||||||
*(U64<E> *)loc = S + A;
|
|
||||||
break;
|
|
||||||
case R_RISCV_ADD8:
|
|
||||||
*loc += S + A;
|
|
||||||
break;
|
|
||||||
case R_RISCV_ADD16:
|
|
||||||
*(U16<E> *)loc += S + A;
|
|
||||||
break;
|
|
||||||
case R_RISCV_ADD32:
|
|
||||||
*(U32<E> *)loc += S + A;
|
|
||||||
break;
|
|
||||||
case R_RISCV_ADD64:
|
|
||||||
*(U64<E> *)loc += S + A;
|
|
||||||
break;
|
|
||||||
case R_RISCV_SUB8:
|
|
||||||
*loc -= S + A;
|
|
||||||
break;
|
|
||||||
case R_RISCV_SUB16:
|
|
||||||
*(U16<E> *)loc -= S + A;
|
|
||||||
break;
|
|
||||||
case R_RISCV_SUB32:
|
|
||||||
*(U32<E> *)loc -= S + A;
|
|
||||||
break;
|
|
||||||
case R_RISCV_SUB64:
|
|
||||||
*(U64<E> *)loc -= S + A;
|
|
||||||
break;
|
|
||||||
case R_RISCV_SUB6:
|
|
||||||
*loc = (*loc & 0b1100'0000) | ((*loc - (S + A)) & 0b0011'1111);
|
|
||||||
break;
|
|
||||||
case R_RISCV_SET6:
|
|
||||||
*loc = (*loc & 0b1100'0000) | ((S + A) & 0b0011'1111);
|
|
||||||
break;
|
|
||||||
case R_RISCV_SET8:
|
|
||||||
*loc = S + A;
|
|
||||||
break;
|
|
||||||
case R_RISCV_SET16:
|
|
||||||
*(U16<E> *)loc = S + A;
|
|
||||||
break;
|
|
||||||
case R_RISCV_SET32:
|
|
||||||
*(U32<E> *)loc = S + A;
|
|
||||||
break;
|
|
||||||
case R_RISCV_SET_ULEB128:
|
|
||||||
overwrite_uleb(loc, S + A);
|
|
||||||
break;
|
|
||||||
case R_RISCV_SUB_ULEB128: {
|
|
||||||
u8 *p = loc;
|
|
||||||
u64 val = read_uleb(p);
|
|
||||||
overwrite_uleb(loc, val - S - A);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: "
|
|
||||||
<< rel;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename E>
|
|
||||||
void InputSection<E>::copy_contents_riscv(Context<E> &ctx, u8 *buf) {
|
|
||||||
// If a section is not relaxed, we can copy it as a one big chunk.
|
|
||||||
if (extra.r_deltas.empty()) {
|
|
||||||
uncompress_to(ctx, buf);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// A relaxed section is copied piece-wise.
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
i64 pos = 0;
|
|
||||||
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
i64 delta = extra.r_deltas[i + 1] - extra.r_deltas[i];
|
|
||||||
if (delta == 0)
|
|
||||||
continue;
|
|
||||||
assert(delta > 0);
|
|
||||||
|
|
||||||
const ElfRel<E> &r = rels[i];
|
|
||||||
memcpy(buf, contents.data() + pos, r.r_offset - pos);
|
|
||||||
buf += r.r_offset - pos;
|
|
||||||
pos = r.r_offset + delta;
|
|
||||||
}
|
|
||||||
|
|
||||||
memcpy(buf, contents.data() + pos, contents.size() - pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename E>
|
|
||||||
void InputSection<E>::scan_relocations(Context<E> &ctx) {
|
|
||||||
assert(shdr().sh_flags & SHF_ALLOC);
|
|
||||||
|
|
||||||
this->reldyn_offset = file.num_dynrel * sizeof(ElfRel<E>);
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
// Scan relocations
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
|
|
||||||
if (sym.is_ifunc())
|
|
||||||
sym.flags |= NEEDS_GOT | NEEDS_PLT;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_RISCV_32:
|
|
||||||
if constexpr (E::is_64)
|
|
||||||
scan_absrel(ctx, sym, rel);
|
|
||||||
else
|
|
||||||
scan_dyn_absrel(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_RISCV_HI20:
|
|
||||||
scan_absrel(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_RISCV_64:
|
|
||||||
if constexpr (!E::is_64)
|
|
||||||
Fatal(ctx) << *this << ": R_RISCV_64 cannot be used on RV32";
|
|
||||||
scan_dyn_absrel(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_RISCV_CALL:
|
|
||||||
case R_RISCV_CALL_PLT:
|
|
||||||
case R_RISCV_PLT32:
|
|
||||||
if (sym.is_imported)
|
|
||||||
sym.flags |= NEEDS_PLT;
|
|
||||||
break;
|
|
||||||
case R_RISCV_GOT_HI20:
|
|
||||||
sym.flags |= NEEDS_GOT;
|
|
||||||
break;
|
|
||||||
case R_RISCV_TLS_GOT_HI20:
|
|
||||||
sym.flags |= NEEDS_GOTTP;
|
|
||||||
break;
|
|
||||||
case R_RISCV_TLS_GD_HI20:
|
|
||||||
sym.flags |= NEEDS_TLSGD;
|
|
||||||
break;
|
|
||||||
case R_RISCV_32_PCREL:
|
|
||||||
scan_pcrel(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_RISCV_TPREL_HI20:
|
|
||||||
case R_RISCV_TPREL_LO12_I:
|
|
||||||
case R_RISCV_TPREL_LO12_S:
|
|
||||||
case R_RISCV_TPREL_ADD:
|
|
||||||
check_tlsle(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_RISCV_BRANCH:
|
|
||||||
case R_RISCV_JAL:
|
|
||||||
case R_RISCV_PCREL_HI20:
|
|
||||||
case R_RISCV_PCREL_LO12_I:
|
|
||||||
case R_RISCV_PCREL_LO12_S:
|
|
||||||
case R_RISCV_LO12_I:
|
|
||||||
case R_RISCV_LO12_S:
|
|
||||||
case R_RISCV_ADD8:
|
|
||||||
case R_RISCV_ADD16:
|
|
||||||
case R_RISCV_ADD32:
|
|
||||||
case R_RISCV_ADD64:
|
|
||||||
case R_RISCV_SUB8:
|
|
||||||
case R_RISCV_SUB16:
|
|
||||||
case R_RISCV_SUB32:
|
|
||||||
case R_RISCV_SUB64:
|
|
||||||
case R_RISCV_ALIGN:
|
|
||||||
case R_RISCV_RVC_BRANCH:
|
|
||||||
case R_RISCV_RVC_JUMP:
|
|
||||||
case R_RISCV_RELAX:
|
|
||||||
case R_RISCV_SUB6:
|
|
||||||
case R_RISCV_SET6:
|
|
||||||
case R_RISCV_SET8:
|
|
||||||
case R_RISCV_SET16:
|
|
||||||
case R_RISCV_SET32:
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Error(ctx) << *this << ": unknown relocation: " << rel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename E>
|
|
||||||
static bool is_resizable(Context<E> &ctx, InputSection<E> *isec) {
|
|
||||||
return isec && isec->is_alive && (isec->shdr().sh_flags & SHF_ALLOC) &&
|
|
||||||
(isec->shdr().sh_flags & SHF_EXECINSTR);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Returns the distance between a relocated place and a symbol.
|
|
||||||
template <typename E>
|
|
||||||
static i64 compute_distance(Context<E> &ctx, Symbol<E> &sym,
|
|
||||||
InputSection<E> &isec, const ElfRel<E> &rel) {
|
|
||||||
// We handle absolute symbols as if they were infinitely far away
|
|
||||||
// because `shrink_section` may increase a distance between a branch
|
|
||||||
// instruction and an absolute symbol. Branching to an absolute
|
|
||||||
// location is extremely rare in real code, though.
|
|
||||||
if (sym.is_absolute())
|
|
||||||
return INT32_MAX;
|
|
||||||
|
|
||||||
// Likewise, relocations against weak undefined symbols won't be relaxed.
|
|
||||||
if (sym.esym().is_undef_weak())
|
|
||||||
return INT32_MAX;
|
|
||||||
|
|
||||||
// Compute a distance between the relocated place and the symbol.
|
|
||||||
i64 S = sym.get_addr(ctx);
|
|
||||||
i64 A = rel.r_addend;
|
|
||||||
i64 P = isec.get_addr() + rel.r_offset;
|
|
||||||
return S + A - P;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Scan relocations to shrink sections.
|
|
||||||
template <typename E>
|
|
||||||
static void shrink_section(Context<E> &ctx, InputSection<E> &isec, bool use_rvc) {
|
|
||||||
std::span<const ElfRel<E>> rels = isec.get_rels(ctx);
|
|
||||||
isec.extra.r_deltas.resize(rels.size() + 1);
|
|
||||||
|
|
||||||
i64 delta = 0;
|
|
||||||
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &r = rels[i];
|
|
||||||
Symbol<E> &sym = *isec.file.symbols[r.r_sym];
|
|
||||||
isec.extra.r_deltas[i] = delta;
|
|
||||||
|
|
||||||
// Handling R_RISCV_ALIGN is mandatory.
|
|
||||||
//
|
|
||||||
// R_RISCV_ALIGN refers to NOP instructions. We need to eliminate some
|
|
||||||
// or all of the instructions so that the instruction that immediately
|
|
||||||
// follows the NOPs is aligned to a specified alignment boundary.
|
|
||||||
if (r.r_type == R_RISCV_ALIGN) {
|
|
||||||
// The total bytes of NOPs is stored to r_addend, so the next
|
|
||||||
// instruction is r_addend away.
|
|
||||||
u64 loc = isec.get_addr() + r.r_offset - delta;
|
|
||||||
u64 next_loc = loc + r.r_addend;
|
|
||||||
u64 alignment = bit_ceil(r.r_addend + 1);
|
|
||||||
assert(alignment <= (1 << isec.p2align));
|
|
||||||
delta += next_loc - align_to(loc, alignment);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handling other relocations is optional.
|
|
||||||
if (!ctx.arg.relax || i == rels.size() - 1 ||
|
|
||||||
rels[i + 1].r_type != R_RISCV_RELAX)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
// Linker-synthesized symbols haven't been assigned their final
|
|
||||||
// values when we are shrinking sections because actual values can
|
|
||||||
// be computed only after we fix the file layout. Therefore, we
|
|
||||||
// assume that relocations against such symbols are always
|
|
||||||
// non-relaxable.
|
|
||||||
if (sym.file == ctx.internal_obj)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
switch (r.r_type) {
|
|
||||||
case R_RISCV_CALL:
|
|
||||||
case R_RISCV_CALL_PLT: {
|
|
||||||
// These relocations refer to an AUIPC + JALR instruction pair to
|
|
||||||
// allow to jump to anywhere in PC ± 2 GiB. If the jump target is
|
|
||||||
// close enough to PC, we can use C.J, C.JAL or JAL instead.
|
|
||||||
i64 dist = compute_distance(ctx, sym, isec, r);
|
|
||||||
if (dist & 1)
|
|
||||||
break;
|
|
||||||
|
|
||||||
i64 rd = get_rd(*(ul32 *)(isec.contents.data() + r.r_offset + 4));
|
|
||||||
|
|
||||||
if (rd == 0 && sign_extend(dist, 11) == dist && use_rvc) {
|
|
||||||
// If rd is x0 and the jump target is within ±2 KiB, we can use
|
|
||||||
// C.J, saving 6 bytes.
|
|
||||||
delta += 6;
|
|
||||||
} else if (rd == 1 && sign_extend(dist, 11) == dist && use_rvc && !E::is_64) {
|
|
||||||
// If rd is x1 and the jump target is within ±2 KiB, we can use
|
|
||||||
// C.JAL. This is RV32 only because C.JAL is RV32-only instruction.
|
|
||||||
delta += 6;
|
|
||||||
} else if (sign_extend(dist, 20) == dist) {
|
|
||||||
// If the jump target is within ±1 MiB, we can use JAL.
|
|
||||||
delta += 4;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_RISCV_HI20:
|
|
||||||
// If the upper 20 bits are all zero, we can remove LUI.
|
|
||||||
// The corresponding instructions referred to by LO12_I/LO12_S
|
|
||||||
// relocations will use the zero register instead.
|
|
||||||
if (bits(sym.get_addr(ctx), 31, 12) == 0)
|
|
||||||
delta += 4;
|
|
||||||
break;
|
|
||||||
case R_RISCV_TPREL_HI20:
|
|
||||||
case R_RISCV_TPREL_ADD:
|
|
||||||
// These relocations are used to add a high 20-bit value to the
|
|
||||||
// thread pointer. The following two instructions materializes
|
|
||||||
// TP + HI20(foo) in %r5, for example.
|
|
||||||
//
|
|
||||||
// lui a5,%tprel_hi(foo) # R_RISCV_TPREL_HI20 (symbol)
|
|
||||||
// add a5,a5,tp,%tprel_add(foo) # R_RISCV_TPREL_ADD (symbol)
|
|
||||||
//
|
|
||||||
// Then thread-local variable `foo` is accessed with a low 12-bit
|
|
||||||
// offset like this:
|
|
||||||
//
|
|
||||||
// sw t0,%tprel_lo(foo)(a5) # R_RISCV_TPREL_LO12_S (symbol)
|
|
||||||
//
|
|
||||||
// However, if the variable is at TP ±2 KiB, TP + HI20(foo) is the
|
|
||||||
// same as TP, so we can instead access the thread-local variable
|
|
||||||
// directly using TP like this:
|
|
||||||
//
|
|
||||||
// sw t0,%tprel_lo(foo)(tp)
|
|
||||||
//
|
|
||||||
// Here, we remove `lui` and `add` if the offset is within ±2 KiB.
|
|
||||||
if (i64 val = sym.get_addr(ctx) + r.r_addend - ctx.tp_addr;
|
|
||||||
sign_extend(val, 11) == val)
|
|
||||||
delta += 4;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
isec.extra.r_deltas[rels.size()] = delta;
|
|
||||||
isec.sh_size -= delta;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Shrink sections by interpreting relocations.
|
|
||||||
//
|
|
||||||
// This operation seems to be optional, because by default longest
|
|
||||||
// instructions are being used. However, calling this function is actually
|
|
||||||
// mandatory because of R_RISCV_ALIGN. R_RISCV_ALIGN is a directive to the
|
|
||||||
// linker to align the location referred to by the relocation to a
|
|
||||||
// specified byte boundary. We at least have to interpret them to satisfy
|
|
||||||
// the alignment constraints.
|
|
||||||
template <typename E>
|
|
||||||
i64 riscv_resize_sections(Context<E> &ctx) {
|
|
||||||
Timer t(ctx, "riscv_resize_sections");
|
|
||||||
|
|
||||||
// True if we can use the 2-byte instructions. This is usually true on
|
|
||||||
// Unix because RV64GC is generally considered the baseline hardware.
|
|
||||||
bool use_rvc = get_eflags(ctx) & EF_RISCV_RVC;
|
|
||||||
|
|
||||||
// Find all the relocations that can be relaxed.
|
|
||||||
// This step should only shrink sections.
|
|
||||||
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
|
|
||||||
for (std::unique_ptr<InputSection<E>> &isec : file->sections)
|
|
||||||
if (is_resizable(ctx, isec.get()))
|
|
||||||
shrink_section(ctx, *isec, use_rvc);
|
|
||||||
});
|
|
||||||
|
|
||||||
// Fix symbol values.
|
|
||||||
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
|
|
||||||
for (Symbol<E> *sym : file->symbols) {
|
|
||||||
if (sym->file != file)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
InputSection<E> *isec = sym->get_input_section();
|
|
||||||
if (!isec || isec->extra.r_deltas.empty())
|
|
||||||
continue;
|
|
||||||
|
|
||||||
std::span<const ElfRel<E>> rels = isec->get_rels(ctx);
|
|
||||||
auto it = std::lower_bound(rels.begin(), rels.end(), sym->value,
|
|
||||||
[&](const ElfRel<E> &r, u64 val) {
|
|
||||||
return r.r_offset < val;
|
|
||||||
});
|
|
||||||
|
|
||||||
sym->value -= isec->extra.r_deltas[it - rels.begin()];
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// Re-compute section offset again to finalize them.
|
|
||||||
compute_section_sizes(ctx);
|
|
||||||
return set_osec_offsets(ctx);
|
|
||||||
}
|
|
||||||
|
|
||||||
#define INSTANTIATE(E) \
|
|
||||||
template void write_plt_header(Context<E> &, u8 *); \
|
|
||||||
template void write_plt_entry(Context<E> &, u8 *, Symbol<E> &); \
|
|
||||||
template void write_pltgot_entry(Context<E> &, u8 *, Symbol<E> &); \
|
|
||||||
template void \
|
|
||||||
EhFrameSection<E>::apply_reloc(Context<E> &, const ElfRel<E> &, u64, u64); \
|
|
||||||
template void InputSection<E>::apply_reloc_alloc(Context<E> &, u8 *); \
|
|
||||||
template void InputSection<E>::apply_reloc_nonalloc(Context<E> &, u8 *); \
|
|
||||||
template void InputSection<E>::copy_contents_riscv(Context<E> &, u8 *); \
|
|
||||||
template void InputSection<E>::scan_relocations(Context<E> &); \
|
|
||||||
template i64 riscv_resize_sections(Context<E> &);
|
|
||||||
|
|
||||||
INSTANTIATE(RV64LE);
|
|
||||||
INSTANTIATE(RV64BE);
|
|
||||||
INSTANTIATE(RV32LE);
|
|
||||||
INSTANTIATE(RV32BE);
|
|
||||||
|
|
||||||
} // namespace mold::elf
|
|
491
third_party/mold/elf/arch-s390x.cc
vendored
491
third_party/mold/elf/arch-s390x.cc
vendored
|
@ -1,491 +0,0 @@
|
||||||
// clang-format off
|
|
||||||
// This file contains code for the IBM z/Architecture 64-bit ISA, which is
|
|
||||||
// commonly referred to as "s390x" on Linux.
|
|
||||||
//
|
|
||||||
// z/Architecture is a 64-bit CISC ISA developed by IBM around 2000 for
|
|
||||||
// IBM's "big iron" mainframe computers. The computers are direct
|
|
||||||
// descendents of IBM System/360 all the way back in 1966. I've never
|
|
||||||
// actually seen a mainframe, and you probaly haven't either, but it looks
|
|
||||||
// like the mainframe market is still large enough to sustain its ecosystem.
|
|
||||||
// Ubuntu for example provides the official support for s390x as of 2022.
|
|
||||||
// Since they are being actively maintained, we need to support them.
|
|
||||||
//
|
|
||||||
// As an instruction set, s390x isn't particularly odd. It has 16 general-
|
|
||||||
// purpose registers. Instructions are 2, 4 or 6 bytes long and always
|
|
||||||
// aligned to 2 bytes boundaries. Despite unfamiliarty, I found that it
|
|
||||||
// just feels like an x86-64 in a parallel universe.
|
|
||||||
//
|
|
||||||
// Here is the register usage in this ABI:
|
|
||||||
//
|
|
||||||
// r0-r1: reserved as scratch registers so we can use them in our PLT
|
|
||||||
// r2: parameter passing and return values
|
|
||||||
// r3-r6: parameter passing
|
|
||||||
// r12: address of GOT if position-independent code
|
|
||||||
// r14: return address
|
|
||||||
// r15: stack pointer
|
|
||||||
// a1: upper 32 bits of TP (thread pointer)
|
|
||||||
// a2: lower 32 bits of TP (thread pointer)
|
|
||||||
//
|
|
||||||
// Thread-local storage (TLS) is supported on s390x in the same way as it
|
|
||||||
// is on other targets with one exeption. On other targets, __tls_get_addr
|
|
||||||
// is used to get an address of a thread-local variable. On s390x,
|
|
||||||
// __tls_get_offset is used instead. The difference is __tls_get_offset
|
|
||||||
// returns an address of a thread-local variable as an offset from TP. So
|
|
||||||
// we need to add TP to a return value before use. I don't know why it is
|
|
||||||
// different, but that is the way it is.
|
|
||||||
//
|
|
||||||
// https://github.com/rui314/psabi/blob/main/s390x.pdf
|
|
||||||
|
|
||||||
#include "third_party/mold/elf/mold.h"
|
|
||||||
|
|
||||||
namespace mold::elf {
|
|
||||||
|
|
||||||
using E = S390X;
|
|
||||||
|
|
||||||
static void write_mid20(u8 *loc, u64 val) {
|
|
||||||
*(ub32 *)loc |= (bits(val, 11, 0) << 16) | (bits(val, 19, 12) << 8);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void write_plt_header(Context<E> &ctx, u8 *buf) {
|
|
||||||
static u8 insn[] = {
|
|
||||||
0xe3, 0x00, 0xf0, 0x38, 0x00, 0x24, // stg %r0, 56(%r15)
|
|
||||||
0xc0, 0x10, 0, 0, 0, 0, // larl %r1, GOTPLT_OFFSET
|
|
||||||
0xd2, 0x07, 0xf0, 0x30, 0x10, 0x08, // mvc 48(8, %r15), 8(%r1)
|
|
||||||
0xe3, 0x10, 0x10, 0x10, 0x00, 0x04, // lg %r1, 16(%r1)
|
|
||||||
0x07, 0xf1, // br %r1
|
|
||||||
0x07, 0x00, 0x07, 0x00, 0x07, 0x00, // nopr; nopr; nopr
|
|
||||||
};
|
|
||||||
|
|
||||||
memcpy(buf, insn, sizeof(insn));
|
|
||||||
*(ub32 *)(buf + 8) = (ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr - 6) >> 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void write_plt_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
|
|
||||||
static u8 insn[] = {
|
|
||||||
0xc0, 0x10, 0, 0, 0, 0, // larl %r1, GOTPLT_ENTRY_OFFSET
|
|
||||||
0xe3, 0x10, 0x10, 0x00, 0x00, 0x04, // lg %r1, (%r1)
|
|
||||||
0xc0, 0x01, 0, 0, 0, 0, // lgfi %r0, PLT_INDEX
|
|
||||||
0x07, 0xf1, // br %r1
|
|
||||||
0x07, 0x00, 0x07, 0x00, 0x07, 0x00, // nopr; nopr; nopr
|
|
||||||
0x07, 0x00, 0x07, 0x00, 0x07, 0x00, // nopr; nopr; nopr
|
|
||||||
};
|
|
||||||
|
|
||||||
memcpy(buf, insn, sizeof(insn));
|
|
||||||
*(ub32 *)(buf + 2) = (sym.get_gotplt_addr(ctx) - sym.get_plt_addr(ctx)) >> 1;
|
|
||||||
*(ub32 *)(buf + 14) = sym.get_plt_idx(ctx) * sizeof(ElfRel<E>);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
|
|
||||||
static u8 insn[] = {
|
|
||||||
0xc0, 0x10, 0, 0, 0, 0, // larl %r1, GOT_ENTRY_OFFSET
|
|
||||||
0xe3, 0x10, 0x10, 0x00, 0x00, 0x04, // lg %r1, (%r1)
|
|
||||||
0x07, 0xf1, // br %r1
|
|
||||||
0x07, 0x00, // nopr
|
|
||||||
};
|
|
||||||
|
|
||||||
memcpy(buf, insn, sizeof(insn));
|
|
||||||
*(ub32 *)(buf + 2) = (sym.get_got_addr(ctx) - sym.get_plt_addr(ctx)) >> 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void EhFrameSection<E>::apply_reloc(Context<E> &ctx, const ElfRel<E> &rel,
|
|
||||||
u64 offset, u64 val) {
|
|
||||||
u8 *loc = ctx.buf + this->shdr.sh_offset + offset;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_NONE:
|
|
||||||
break;
|
|
||||||
case R_390_PC32:
|
|
||||||
*(ub32 *)loc = val - this->shdr.sh_addr - offset;
|
|
||||||
break;
|
|
||||||
case R_390_64:
|
|
||||||
*(ub64 *)loc = val;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
ElfRel<E> *dynrel = nullptr;
|
|
||||||
if (ctx.reldyn)
|
|
||||||
dynrel = (ElfRel<E> *)(ctx.buf + ctx.reldyn->shdr.sh_offset +
|
|
||||||
file.reldyn_offset + this->reldyn_offset);
|
|
||||||
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
u8 *loc = base + rel.r_offset;
|
|
||||||
|
|
||||||
auto check = [&](i64 val, i64 lo, i64 hi) {
|
|
||||||
if (val < lo || hi <= val)
|
|
||||||
Error(ctx) << *this << ": relocation " << rel << " against "
|
|
||||||
<< sym << " out of range: " << val << " is not in ["
|
|
||||||
<< lo << ", " << hi << ")";
|
|
||||||
};
|
|
||||||
|
|
||||||
auto check_dbl = [&](i64 val, i64 lo, i64 hi) {
|
|
||||||
check(val, lo, hi);
|
|
||||||
|
|
||||||
// R_390_*DBL relocs should never refer a symbol at an odd address
|
|
||||||
if (val & 1)
|
|
||||||
Error(ctx) << *this << ": misaligned symbol " << sym
|
|
||||||
<< " for relocation " << rel;
|
|
||||||
};
|
|
||||||
|
|
||||||
u64 S = sym.get_addr(ctx);
|
|
||||||
u64 A = rel.r_addend;
|
|
||||||
u64 P = get_addr() + rel.r_offset;
|
|
||||||
u64 G = sym.get_got_idx(ctx) * sizeof(Word<E>);
|
|
||||||
u64 GOT = ctx.got->shdr.sh_addr;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_390_64:
|
|
||||||
apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel);
|
|
||||||
break;
|
|
||||||
case R_390_8:
|
|
||||||
check(S + A, 0, 1 << 8);
|
|
||||||
*loc = S + A;
|
|
||||||
break;
|
|
||||||
case R_390_12:
|
|
||||||
check(S + A, 0, 1 << 12);
|
|
||||||
*(ul16 *)loc |= bits(S + A, 11, 0);
|
|
||||||
break;
|
|
||||||
case R_390_16:
|
|
||||||
check(S + A, 0, 1 << 16);
|
|
||||||
*(ub16 *)loc = S + A;
|
|
||||||
break;
|
|
||||||
case R_390_20:
|
|
||||||
check(S + A, 0, 1 << 20);
|
|
||||||
write_mid20(loc, S + A);
|
|
||||||
break;
|
|
||||||
case R_390_32:
|
|
||||||
case R_390_PLT32:
|
|
||||||
check(S + A, 0, 1LL << 32);
|
|
||||||
*(ub32 *)loc = S + A;
|
|
||||||
break;
|
|
||||||
case R_390_PLT64:
|
|
||||||
*(ub64 *)loc = S + A;
|
|
||||||
break;
|
|
||||||
case R_390_PC12DBL:
|
|
||||||
case R_390_PLT12DBL:
|
|
||||||
check_dbl(S + A - P, -(1 << 12), 1 << 12);
|
|
||||||
*(ul16 *)loc |= bits(S + A - P, 12, 1);
|
|
||||||
break;
|
|
||||||
case R_390_PC16:
|
|
||||||
check(S + A - P, -(1 << 15), 1 << 15);
|
|
||||||
*(ub16 *)loc = S + A - P;
|
|
||||||
break;
|
|
||||||
case R_390_PC32:
|
|
||||||
check(S + A - P, -(1LL << 31), 1LL << 31);
|
|
||||||
*(ub32 *)loc = S + A - P;
|
|
||||||
break;
|
|
||||||
case R_390_PC64:
|
|
||||||
*(ub64 *)loc = S + A - P;
|
|
||||||
break;
|
|
||||||
case R_390_PC16DBL:
|
|
||||||
case R_390_PLT16DBL:
|
|
||||||
check_dbl(S + A - P, -(1 << 16), 1 << 16);
|
|
||||||
*(ub16 *)loc = (S + A - P) >> 1;
|
|
||||||
break;
|
|
||||||
case R_390_PC24DBL:
|
|
||||||
case R_390_PLT24DBL:
|
|
||||||
check_dbl(S + A - P, -(1 << 24), 1 << 24);
|
|
||||||
*(ub32 *)loc |= bits(S + A - P, 24, 1);
|
|
||||||
break;
|
|
||||||
case R_390_PC32DBL:
|
|
||||||
case R_390_PLT32DBL:
|
|
||||||
check_dbl(S + A - P, -(1LL << 32), 1LL << 32);
|
|
||||||
*(ub32 *)loc = (S + A - P) >> 1;
|
|
||||||
break;
|
|
||||||
case R_390_GOT12:
|
|
||||||
case R_390_GOTPLT12:
|
|
||||||
check(G + A, 0, 1 << 12);
|
|
||||||
*(ul16 *)loc |= bits(G + A, 11, 0);
|
|
||||||
break;
|
|
||||||
case R_390_GOT16:
|
|
||||||
case R_390_GOTPLT16:
|
|
||||||
check(G + A, 0, 1 << 16);
|
|
||||||
*(ub16 *)loc = G + A;
|
|
||||||
break;
|
|
||||||
case R_390_GOT20:
|
|
||||||
case R_390_GOTPLT20:
|
|
||||||
check(G + A, 0, 1 << 20);
|
|
||||||
write_mid20(loc, G + A);
|
|
||||||
break;
|
|
||||||
case R_390_GOT32:
|
|
||||||
case R_390_GOTPLT32:
|
|
||||||
check(G + A, 0, 1LL << 32);
|
|
||||||
*(ub32 *)loc = G + A;
|
|
||||||
break;
|
|
||||||
case R_390_GOT64:
|
|
||||||
case R_390_GOTPLT64:
|
|
||||||
*(ub64 *)loc = G + A;
|
|
||||||
break;
|
|
||||||
case R_390_GOTOFF16:
|
|
||||||
case R_390_PLTOFF16:
|
|
||||||
check(S + A - GOT, -(1 << 15), 1 << 15);
|
|
||||||
*(ub16 *)loc = S + A - GOT;
|
|
||||||
break;
|
|
||||||
case R_390_GOTOFF32:
|
|
||||||
case R_390_PLTOFF32:
|
|
||||||
check(S + A - GOT, -(1LL << 31), 1LL << 31);
|
|
||||||
*(ub32 *)loc = S + A - GOT;
|
|
||||||
break;
|
|
||||||
case R_390_GOTOFF64:
|
|
||||||
case R_390_PLTOFF64:
|
|
||||||
*(ub64 *)loc = S + A - GOT;
|
|
||||||
break;
|
|
||||||
case R_390_GOTPC:
|
|
||||||
*(ub64 *)loc = GOT + A - P;
|
|
||||||
break;
|
|
||||||
case R_390_GOTPCDBL:
|
|
||||||
check_dbl(GOT + A - P, -(1LL << 32), 1LL << 32);
|
|
||||||
*(ub32 *)loc = (GOT + A - P) >> 1;
|
|
||||||
break;
|
|
||||||
case R_390_GOTENT:
|
|
||||||
check(GOT + G + A - P, -(1LL << 32), 1LL << 32);
|
|
||||||
*(ub32 *)loc = (GOT + G + A - P) >> 1;
|
|
||||||
break;
|
|
||||||
case R_390_TLS_LE32:
|
|
||||||
*(ub32 *)loc = S + A - ctx.tp_addr;
|
|
||||||
break;
|
|
||||||
case R_390_TLS_LE64:
|
|
||||||
*(ub64 *)loc = S + A - ctx.tp_addr;
|
|
||||||
break;
|
|
||||||
case R_390_TLS_GOTIE20:
|
|
||||||
write_mid20(loc, sym.get_gottp_addr(ctx) + A - GOT);
|
|
||||||
break;
|
|
||||||
case R_390_TLS_IEENT:
|
|
||||||
*(ub32 *)loc = (sym.get_gottp_addr(ctx) + A - P) >> 1;
|
|
||||||
break;
|
|
||||||
case R_390_TLS_GD32:
|
|
||||||
if (sym.has_tlsgd(ctx))
|
|
||||||
*(ub32 *)loc = sym.get_tlsgd_addr(ctx) + A - GOT;
|
|
||||||
else if (sym.has_gottp(ctx))
|
|
||||||
*(ub32 *)loc = sym.get_gottp_addr(ctx) + A - GOT;
|
|
||||||
else
|
|
||||||
*(ub32 *)loc = S + A - ctx.tp_addr;
|
|
||||||
break;
|
|
||||||
case R_390_TLS_GD64:
|
|
||||||
if (sym.has_tlsgd(ctx))
|
|
||||||
*(ub64 *)loc = sym.get_tlsgd_addr(ctx) + A - GOT;
|
|
||||||
else if (sym.has_gottp(ctx))
|
|
||||||
*(ub64 *)loc = sym.get_gottp_addr(ctx) + A - GOT;
|
|
||||||
else
|
|
||||||
*(ub64 *)loc = S + A - ctx.tp_addr;
|
|
||||||
break;
|
|
||||||
case R_390_TLS_GDCALL:
|
|
||||||
if (sym.has_tlsgd(ctx)) {
|
|
||||||
// do nothing
|
|
||||||
} else if (sym.has_gottp(ctx)) {
|
|
||||||
// lg %r2, 0(%r2, %r12)
|
|
||||||
static u8 insn[] = { 0xe3, 0x22, 0xc0, 0x00, 0x00, 0x04 };
|
|
||||||
memcpy(loc, insn, sizeof(insn));
|
|
||||||
} else {
|
|
||||||
// nop
|
|
||||||
static u8 insn[] = { 0xc0, 0x04, 0x00, 0x00, 0x00, 0x00 };
|
|
||||||
memcpy(loc, insn, sizeof(insn));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case R_390_TLS_LDM32:
|
|
||||||
if (ctx.got->has_tlsld(ctx))
|
|
||||||
*(ub32 *)loc = ctx.got->get_tlsld_addr(ctx) + A - GOT;
|
|
||||||
break;
|
|
||||||
case R_390_TLS_LDM64:
|
|
||||||
if (ctx.got->has_tlsld(ctx))
|
|
||||||
*(ub64 *)loc = ctx.got->get_tlsld_addr(ctx) + A - GOT;
|
|
||||||
break;
|
|
||||||
case R_390_TLS_LDO32:
|
|
||||||
if (ctx.got->has_tlsld(ctx))
|
|
||||||
*(ub32 *)loc = S + A - ctx.dtp_addr;
|
|
||||||
else
|
|
||||||
*(ub32 *)loc = S + A - ctx.tp_addr;
|
|
||||||
break;
|
|
||||||
case R_390_TLS_LDO64:
|
|
||||||
if (ctx.got->has_tlsld(ctx))
|
|
||||||
*(ub64 *)loc = S + A - ctx.dtp_addr;
|
|
||||||
else
|
|
||||||
*(ub64 *)loc = S + A - ctx.tp_addr;
|
|
||||||
break;
|
|
||||||
case R_390_TLS_LDCALL:
|
|
||||||
if (!ctx.got->has_tlsld(ctx)) {
|
|
||||||
// nop
|
|
||||||
static u8 insn[] = { 0xc0, 0x04, 0x00, 0x00, 0x00, 0x00 };
|
|
||||||
memcpy(loc, insn, sizeof(insn));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
unreachable();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::apply_reloc_nonalloc(Context<E> &ctx, u8 *base) {
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
u8 *loc = base + rel.r_offset;
|
|
||||||
|
|
||||||
auto check = [&](i64 val, i64 lo, i64 hi) {
|
|
||||||
if (val < lo || hi <= val)
|
|
||||||
Error(ctx) << *this << ": relocation " << rel << " against "
|
|
||||||
<< sym << " out of range: " << val << " is not in ["
|
|
||||||
<< lo << ", " << hi << ")";
|
|
||||||
};
|
|
||||||
|
|
||||||
SectionFragment<E> *frag;
|
|
||||||
i64 frag_addend;
|
|
||||||
std::tie(frag, frag_addend) = get_fragment(ctx, rel);
|
|
||||||
|
|
||||||
u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx);
|
|
||||||
u64 A = frag ? frag_addend : (i64)rel.r_addend;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_390_32: {
|
|
||||||
i64 val = S + A;
|
|
||||||
check(val, 0, 1LL << 32);
|
|
||||||
*(ub32 *)loc = val;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_390_64:
|
|
||||||
if (std::optional<u64> val = get_tombstone(sym, frag))
|
|
||||||
*(ub64 *)loc = *val;
|
|
||||||
else
|
|
||||||
*(ub64 *)loc = S + A;
|
|
||||||
break;
|
|
||||||
case R_390_TLS_LDO64:
|
|
||||||
if (std::optional<u64> val = get_tombstone(sym, frag))
|
|
||||||
*(ub64 *)loc = *val;
|
|
||||||
else
|
|
||||||
*(ub64 *)loc = S + A - ctx.dtp_addr;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Fatal(ctx) << *this << ": apply_reloc_nonalloc: " << rel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::scan_relocations(Context<E> &ctx) {
|
|
||||||
assert(shdr().sh_flags & SHF_ALLOC);
|
|
||||||
|
|
||||||
this->reldyn_offset = file.num_dynrel * sizeof(ElfRel<E>);
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
// Scan relocations
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
|
|
||||||
if (sym.is_ifunc())
|
|
||||||
sym.flags |= NEEDS_GOT | NEEDS_PLT;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_390_64:
|
|
||||||
scan_dyn_absrel(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_390_8:
|
|
||||||
case R_390_12:
|
|
||||||
case R_390_16:
|
|
||||||
case R_390_20:
|
|
||||||
case R_390_32:
|
|
||||||
scan_absrel(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_390_PC16:
|
|
||||||
case R_390_PC16DBL:
|
|
||||||
case R_390_PC32:
|
|
||||||
case R_390_PC32DBL:
|
|
||||||
case R_390_PC64:
|
|
||||||
scan_pcrel(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_390_GOT12:
|
|
||||||
case R_390_GOT16:
|
|
||||||
case R_390_GOT20:
|
|
||||||
case R_390_GOT32:
|
|
||||||
case R_390_GOT64:
|
|
||||||
case R_390_GOTOFF16:
|
|
||||||
case R_390_GOTOFF32:
|
|
||||||
case R_390_GOTOFF64:
|
|
||||||
case R_390_GOTPLT12:
|
|
||||||
case R_390_GOTPLT16:
|
|
||||||
case R_390_GOTPLT20:
|
|
||||||
case R_390_GOTPLT32:
|
|
||||||
case R_390_GOTPLT64:
|
|
||||||
case R_390_GOTPC:
|
|
||||||
case R_390_GOTPCDBL:
|
|
||||||
case R_390_GOTENT:
|
|
||||||
sym.flags |= NEEDS_GOT;
|
|
||||||
break;
|
|
||||||
case R_390_PLT12DBL:
|
|
||||||
case R_390_PLT16DBL:
|
|
||||||
case R_390_PLT24DBL:
|
|
||||||
case R_390_PLT32:
|
|
||||||
case R_390_PLT32DBL:
|
|
||||||
case R_390_PLT64:
|
|
||||||
case R_390_PLTOFF16:
|
|
||||||
case R_390_PLTOFF32:
|
|
||||||
case R_390_PLTOFF64:
|
|
||||||
if (sym.is_imported)
|
|
||||||
sym.flags |= NEEDS_PLT;
|
|
||||||
break;
|
|
||||||
case R_390_TLS_GOTIE20:
|
|
||||||
case R_390_TLS_IEENT:
|
|
||||||
sym.flags |= NEEDS_GOTTP;
|
|
||||||
break;
|
|
||||||
case R_390_TLS_GD32:
|
|
||||||
case R_390_TLS_GD64:
|
|
||||||
// We always want to relax calls to __tls_get_offset() in statically-
|
|
||||||
// linked executables because __tls_get_offset() in libc.a just calls
|
|
||||||
// abort().
|
|
||||||
if (ctx.arg.is_static ||
|
|
||||||
(ctx.arg.relax && !sym.is_imported && !ctx.arg.shared)) {
|
|
||||||
// do nothing
|
|
||||||
} else if (ctx.arg.relax && !sym.is_imported && ctx.arg.shared &&
|
|
||||||
!ctx.arg.z_dlopen) {
|
|
||||||
sym.flags |= NEEDS_GOTTP;
|
|
||||||
} else {
|
|
||||||
sym.flags |= NEEDS_TLSGD;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case R_390_TLS_LDM32:
|
|
||||||
case R_390_TLS_LDM64: {
|
|
||||||
bool do_relax = ctx.arg.is_static || (ctx.arg.relax && !ctx.arg.shared);
|
|
||||||
if (!do_relax)
|
|
||||||
ctx.needs_tlsld = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_390_TLS_LE32:
|
|
||||||
case R_390_TLS_LE64:
|
|
||||||
check_tlsle(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_390_TLS_LDO32:
|
|
||||||
case R_390_TLS_LDO64:
|
|
||||||
case R_390_TLS_GDCALL:
|
|
||||||
case R_390_TLS_LDCALL:
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Fatal(ctx) << *this << ": scan_relocations: " << rel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace mold::elf
|
|
355
third_party/mold/elf/arch-sh4.cc
vendored
355
third_party/mold/elf/arch-sh4.cc
vendored
|
@ -1,355 +0,0 @@
|
||||||
// clang-format off
|
|
||||||
// SH-4 (SuperH 4) is a 32-bit RISC ISA developed by Hitachi in the early
|
|
||||||
// '90s. Some relatively powerful systems were developed with SH-4.
|
|
||||||
// A notable example is Sega's Dreamcast game console which debuted in 1998.
|
|
||||||
// Hitachi later spun off its semiconductor division as an independent
|
|
||||||
// company, Renesas, and Renesas is still selling SH-4 processors for the
|
|
||||||
// embedded market. It has never been as popular as ARM is, and its
|
|
||||||
// popularity continues to decline though.
|
|
||||||
//
|
|
||||||
// SH-4's most distinctive feature compared to other RISC ISAs is that its
|
|
||||||
// instructions are 16 bits in length instead of more common 32 bits for
|
|
||||||
// better code density. This difference affects various aspects of its
|
|
||||||
// instruction set as shown below:
|
|
||||||
//
|
|
||||||
// - SH-4 has 16 general-purpose registers (GPRs) instead of the most
|
|
||||||
// commmon 32 GPR configuration to save one bit to specify a register.
|
|
||||||
//
|
|
||||||
// - Binary instructions such as ADD normally take three register in
|
|
||||||
// RISC ISAs (e.g. x ← y ⊕ z where x, y and z are registers), but
|
|
||||||
// SH-4's instructions take only two registers. The result of an
|
|
||||||
// operation is written to one of the source registers (e.g. x ← x ⊕ y).
|
|
||||||
//
|
|
||||||
// - Usual RISC ISAs have "load high" and "load low" instructions to set
|
|
||||||
// an immediate to most significant and least significant bits in a
|
|
||||||
// register to construct a full 32-bit value in a register. This
|
|
||||||
// technique is hard to use in SH-4, as 16 bit instructions are too
|
|
||||||
// small to contain large immediates. On SH-4, large immediates are
|
|
||||||
// loaded from memory using `mov.l` PC-relative load instruction.
|
|
||||||
//
|
|
||||||
// - Many RISC ISAs are, despite their name, actually fairly complex.
|
|
||||||
// They tend to have hundreds if not thousands of different instructions.
|
|
||||||
// SH-4 doesn't really have that many instructions because its 16-bit
|
|
||||||
// machine code simply can't encode many different opcodes. As a
|
|
||||||
// result, the number of relocations the linker has to support is also
|
|
||||||
// small.
|
|
||||||
//
|
|
||||||
// Beside these, SH-4 has a delay branch slot just like contemporary MIPS
|
|
||||||
// and SPARC. That is, one instruction after a branch instruction will
|
|
||||||
// always be executed even if the branch is taken. Delay branch slot allows
|
|
||||||
// a pipelined CPU to start and finish executing an instruction after a
|
|
||||||
// branch regardless of the branch's condition, simplifying the processor's
|
|
||||||
// implementation. It's considered a bad premature optimization nowadays,
|
|
||||||
// though. Modern RISC processors don't have it.
|
|
||||||
//
|
|
||||||
// Here are notes about the SH-4 psABI:
|
|
||||||
//
|
|
||||||
// - If a source file is compiled with -fPIC, each function starts
|
|
||||||
// with a piece of code to store the address of .got to %r12.
|
|
||||||
// We can use the register in our PLT for position-independent output.
|
|
||||||
//
|
|
||||||
// - Even though it uses the RELA-type relocations, relocation addends
|
|
||||||
// are stored not to the r_addend field but to the relocated section
|
|
||||||
// contents for some reason. Therefore, it's effectively REL.
|
|
||||||
//
|
|
||||||
// - It looks like the ecosystem has bit-rotted. Some tests, especially
|
|
||||||
// one using C++ exceptions, don't pass even with GNU ld.
|
|
||||||
//
|
|
||||||
// - GCC/SH4 tends to write dynamically-relocated data into .text, so the
|
|
||||||
// output from the linker contains lots of text relocations. That's not
|
|
||||||
// a problem with embedded programming, I guess.
|
|
||||||
|
|
||||||
#include "third_party/mold/elf/mold.h"
|
|
||||||
|
|
||||||
namespace mold::elf {
|
|
||||||
|
|
||||||
using E = SH4;
|
|
||||||
|
|
||||||
// Even though SH-4 uses RELA-type relocations, addends are stored to
|
|
||||||
// relocated places for some reason.
|
|
||||||
template <>
|
|
||||||
i64 get_addend(u8 *loc, const ElfRel<E> &rel) {
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_SH_DIR32:
|
|
||||||
case R_SH_REL32:
|
|
||||||
case R_SH_TLS_GD_32:
|
|
||||||
case R_SH_TLS_LD_32:
|
|
||||||
case R_SH_TLS_LDO_32:
|
|
||||||
case R_SH_TLS_IE_32:
|
|
||||||
case R_SH_TLS_LE_32:
|
|
||||||
case R_SH_TLS_DTPMOD32:
|
|
||||||
case R_SH_TLS_DTPOFF32:
|
|
||||||
case R_SH_TLS_TPOFF32:
|
|
||||||
case R_SH_GOT32:
|
|
||||||
case R_SH_PLT32:
|
|
||||||
case R_SH_GOTOFF:
|
|
||||||
case R_SH_GOTPC:
|
|
||||||
case R_SH_GOTPLT32:
|
|
||||||
return *(ul32 *)loc;
|
|
||||||
default:
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void write_plt_header(Context<E> &ctx, u8 *buf) {
|
|
||||||
if (ctx.arg.pic) {
|
|
||||||
static const u8 insn[] = {
|
|
||||||
0x02, 0xd2, // mov.l 1f, r2
|
|
||||||
0xcc, 0x32, // add r12, r2
|
|
||||||
0x22, 0x50, // mov.l @(8, r2), r0
|
|
||||||
0x21, 0x52, // mov.l @(4, r2), r2
|
|
||||||
0x2b, 0x40, // jmp @r0
|
|
||||||
0x00, 0xe0, // mov #0, r0
|
|
||||||
0, 0, 0, 0, // 1: .long GOTPLT
|
|
||||||
};
|
|
||||||
|
|
||||||
static_assert(sizeof(insn) == E::plt_hdr_size);
|
|
||||||
memcpy(buf, insn, sizeof(insn));
|
|
||||||
*(ul32 *)(buf + 12) = ctx.gotplt->shdr.sh_addr - ctx.got->shdr.sh_addr;
|
|
||||||
} else {
|
|
||||||
static const u8 insn[] = {
|
|
||||||
0x02, 0xd2, // mov.l 1f, r2
|
|
||||||
0x22, 0x50, // mov.l @(8, r2), r0
|
|
||||||
0x21, 0x52, // mov.l @(4, r2), r2
|
|
||||||
0x2b, 0x40, // jmp @r0
|
|
||||||
0x00, 0xe0, // mov #0, r0
|
|
||||||
0x09, 0x00, // nop
|
|
||||||
0, 0, 0, 0, // 1: .long GOTPLT
|
|
||||||
};
|
|
||||||
|
|
||||||
static_assert(sizeof(insn) == E::plt_hdr_size);
|
|
||||||
memcpy(buf, insn, sizeof(insn));
|
|
||||||
*(ul32 *)(buf + 12) = ctx.gotplt->shdr.sh_addr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void write_plt_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
|
|
||||||
if (ctx.arg.pic) {
|
|
||||||
static const u8 insn[] = {
|
|
||||||
0x01, 0xd0, // mov.l 1f, r0
|
|
||||||
0xce, 0x00, // mov.l @(r0, r12), r0
|
|
||||||
0x2b, 0x40, // jmp @r0
|
|
||||||
0x01, 0xd1, // mov.l 2f, r1
|
|
||||||
0, 0, 0, 0, // 1: .long GOTPLT_ENTRY
|
|
||||||
0, 0, 0, 0, // 2: .long INDEX_IN_RELPLT
|
|
||||||
};
|
|
||||||
|
|
||||||
static_assert(sizeof(insn) == E::plt_size);
|
|
||||||
memcpy(buf, insn, sizeof(insn));
|
|
||||||
*(ul32 *)(buf + 8) = sym.get_gotplt_addr(ctx) - ctx.got->shdr.sh_addr;
|
|
||||||
*(ul32 *)(buf + 12) = sym.get_plt_idx(ctx) * sizeof(ElfRel<E>);
|
|
||||||
} else {
|
|
||||||
static const u8 insn[] = {
|
|
||||||
0x01, 0xd0, // mov.l 1f, r0
|
|
||||||
0x02, 0x60, // mov.l @r0, r0
|
|
||||||
0x2b, 0x40, // jmp @r0
|
|
||||||
0x01, 0xd1, // mov.l 2f, r1
|
|
||||||
0, 0, 0, 0, // 1: .long GOTPLT_ENTRY
|
|
||||||
0, 0, 0, 0, // 2: .long INDEX_IN_RELPLT
|
|
||||||
};
|
|
||||||
|
|
||||||
static_assert(sizeof(insn) == E::plt_size);
|
|
||||||
memcpy(buf, insn, sizeof(insn));
|
|
||||||
*(ul32 *)(buf + 8) = sym.get_gotplt_addr(ctx);
|
|
||||||
*(ul32 *)(buf + 12) = sym.get_plt_idx(ctx) * sizeof(ElfRel<E>);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
|
|
||||||
if (ctx.arg.pic) {
|
|
||||||
static const u8 insn[] = {
|
|
||||||
0x01, 0xd0, // mov.l 1f, r0
|
|
||||||
0xce, 0x00, // mov.l @(r0, r12), r0
|
|
||||||
0x2b, 0x40, // jmp @r0
|
|
||||||
0x09, 0x00, // nop
|
|
||||||
0, 0, 0, 0, // 1: .long GOT_ENTRY
|
|
||||||
};
|
|
||||||
|
|
||||||
static_assert(sizeof(insn) == E::pltgot_size);
|
|
||||||
memcpy(buf, insn, sizeof(insn));
|
|
||||||
*(ul32 *)(buf + 8) = sym.get_got_addr(ctx) - ctx.got->shdr.sh_addr;
|
|
||||||
} else {
|
|
||||||
static const u8 insn[] = {
|
|
||||||
0x01, 0xd0, // mov.l 1f, r0
|
|
||||||
0x02, 0x60, // mov.l @r0, r0
|
|
||||||
0x2b, 0x40, // jmp @r0
|
|
||||||
0x09, 0x00, // nop
|
|
||||||
0, 0, 0, 0, // 1: .long GOT_ENTRY
|
|
||||||
};
|
|
||||||
|
|
||||||
static_assert(sizeof(insn) == E::pltgot_size);
|
|
||||||
memcpy(buf, insn, sizeof(insn));
|
|
||||||
*(ul32 *)(buf + 8) = sym.get_got_addr(ctx);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void EhFrameSection<E>::apply_reloc(Context<E> &ctx, const ElfRel<E> &rel,
|
|
||||||
u64 offset, u64 val) {
|
|
||||||
u8 *loc = ctx.buf + this->shdr.sh_offset + offset;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_NONE:
|
|
||||||
break;
|
|
||||||
case R_SH_DIR32:
|
|
||||||
*(ul32 *)loc = val;
|
|
||||||
break;
|
|
||||||
case R_SH_REL32:
|
|
||||||
*(ul32 *)loc = val - this->shdr.sh_addr - offset;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
ElfRel<E> *dynrel = nullptr;
|
|
||||||
if (ctx.reldyn)
|
|
||||||
dynrel = (ElfRel<E> *)(ctx.buf + ctx.reldyn->shdr.sh_offset +
|
|
||||||
file.reldyn_offset + this->reldyn_offset);
|
|
||||||
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
u8 *loc = base + rel.r_offset;
|
|
||||||
|
|
||||||
u64 S = sym.get_addr(ctx);
|
|
||||||
u64 A = get_addend(loc, rel);
|
|
||||||
u64 P = get_addr() + rel.r_offset;
|
|
||||||
u64 G = sym.get_got_idx(ctx) * sizeof(Word<E>);
|
|
||||||
u64 GOT = ctx.got->shdr.sh_addr;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_SH_DIR32:
|
|
||||||
apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel);
|
|
||||||
break;
|
|
||||||
case R_SH_REL32:
|
|
||||||
case R_SH_PLT32:
|
|
||||||
*(ul32 *)loc = S + A - P;
|
|
||||||
break;
|
|
||||||
case R_SH_GOT32:
|
|
||||||
*(ul32 *)loc = G;
|
|
||||||
break;
|
|
||||||
case R_SH_GOTPC:
|
|
||||||
*(ul32 *)loc = GOT + A - P;
|
|
||||||
break;
|
|
||||||
case R_SH_GOTOFF:
|
|
||||||
*(ul32 *)loc = S + A - GOT;
|
|
||||||
break;
|
|
||||||
case R_SH_TLS_GD_32:
|
|
||||||
*(ul32 *)loc = sym.get_tlsgd_addr(ctx) + A - GOT;
|
|
||||||
break;
|
|
||||||
case R_SH_TLS_LD_32:
|
|
||||||
*(ul32 *)loc = ctx.got->get_tlsld_addr(ctx) + A - GOT;
|
|
||||||
break;
|
|
||||||
case R_SH_TLS_LDO_32:
|
|
||||||
*(ul32 *)loc = S + A - ctx.dtp_addr;
|
|
||||||
break;
|
|
||||||
case R_SH_TLS_IE_32:
|
|
||||||
*(ul32 *)loc = sym.get_gottp_addr(ctx) + A - GOT;
|
|
||||||
break;
|
|
||||||
case R_SH_TLS_LE_32:
|
|
||||||
*(ul32 *)loc = S + A - ctx.tp_addr;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
unreachable();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::apply_reloc_nonalloc(Context<E> &ctx, u8 *base) {
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
u8 *loc = base + rel.r_offset;
|
|
||||||
|
|
||||||
SectionFragment<E> *frag;
|
|
||||||
i64 frag_addend;
|
|
||||||
std::tie(frag, frag_addend) = get_fragment(ctx, rel);
|
|
||||||
|
|
||||||
u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx);
|
|
||||||
u64 A = frag ? frag_addend : get_addend(loc, rel);
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_SH_DIR32:
|
|
||||||
if (std::optional<u64> val = get_tombstone(sym, frag))
|
|
||||||
*(ul32 *)loc = *val;
|
|
||||||
else
|
|
||||||
*(ul32 *)loc = S + A;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: "
|
|
||||||
<< rel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::scan_relocations(Context<E> &ctx) {
|
|
||||||
assert(shdr().sh_flags & SHF_ALLOC);
|
|
||||||
|
|
||||||
this->reldyn_offset = file.num_dynrel * sizeof(ElfRel<E>);
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
|
|
||||||
if (sym.is_ifunc())
|
|
||||||
Error(ctx) << sym << ": GNU ifunc symbol is not supported on sh4";
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_SH_DIR32:
|
|
||||||
scan_dyn_absrel(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_SH_REL32:
|
|
||||||
scan_pcrel(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_SH_GOT32:
|
|
||||||
sym.flags |= NEEDS_GOT;
|
|
||||||
break;
|
|
||||||
case R_SH_PLT32:
|
|
||||||
if (sym.is_imported)
|
|
||||||
sym.flags |= NEEDS_PLT;
|
|
||||||
break;
|
|
||||||
case R_SH_TLS_GD_32:
|
|
||||||
sym.flags |= NEEDS_TLSGD;
|
|
||||||
break;
|
|
||||||
case R_SH_TLS_LD_32:
|
|
||||||
ctx.needs_tlsld = true;
|
|
||||||
break;
|
|
||||||
case R_SH_TLS_IE_32:
|
|
||||||
sym.flags |= NEEDS_GOTTP;
|
|
||||||
break;
|
|
||||||
case R_SH_TLS_LE_32:
|
|
||||||
check_tlsle(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_SH_GOTPC:
|
|
||||||
case R_SH_GOTOFF:
|
|
||||||
case R_SH_TLS_LDO_32:
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Fatal(ctx) << *this << ": unknown relocation: " << rel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace mold::elf
|
|
622
third_party/mold/elf/arch-sparc64.cc
vendored
622
third_party/mold/elf/arch-sparc64.cc
vendored
|
@ -1,622 +0,0 @@
|
||||||
// clang-format off
|
|
||||||
// SPARC is a RISC ISA developed by Sun Microsystems.
|
|
||||||
//
|
|
||||||
// The byte order of the processor is big-endian. Anything larger than a
|
|
||||||
// byte is stored in the "reverse" order compared to little-endian
|
|
||||||
// processors such as x86-64.
|
|
||||||
//
|
|
||||||
// All instructions are 4 bytes long and aligned to 4 bytes boundaries.
|
|
||||||
//
|
|
||||||
// A notable feature of SPARC is that, unlike other RISC ISAs, it doesn't
|
|
||||||
// need range extension thunks. It is because the SPARC's CALL instruction
|
|
||||||
// contains a whopping 30 bits immediate. The processor scales it by 4 to
|
|
||||||
// extend it to 32 bits (this is doable because all instructions are
|
|
||||||
// aligned to 4 bytes boundaries, so the least significant two bits are
|
|
||||||
// always zero). That means CALL's reach is PC ± 2 GiB, elinating the
|
|
||||||
// need of range extension thunks. It comes with the cost that the CALL
|
|
||||||
// instruction alone takes 1/4th of the instruction encoding space,
|
|
||||||
// though.
|
|
||||||
//
|
|
||||||
// SPARC has 32 general purpose registers. CALL instruction saves a return
|
|
||||||
// address to %o7, which is an alias for %r15. Thread pointer is stored to
|
|
||||||
// %g7 which is %r7.
|
|
||||||
//
|
|
||||||
// SPARC does not have PC-relative load/store instructions. To access data
|
|
||||||
// in the position-independent manner, we usually first set the address of
|
|
||||||
// .got to, for example, %l7, with the following piece of code
|
|
||||||
//
|
|
||||||
// sethi %hi(. - _GLOBAL_OFFSET_TABLE_), %l7
|
|
||||||
// add %l7, %lo(. - _GLOBAL_OFFSET_TABLE_), %l7
|
|
||||||
// call __sparc_get_pc_thunk.l7
|
|
||||||
// nop
|
|
||||||
//
|
|
||||||
// where __sparc_get_pc_thunk.l7 is defined as
|
|
||||||
//
|
|
||||||
// retl
|
|
||||||
// add %o7, %l7, %l7
|
|
||||||
//
|
|
||||||
// . SETHI and the following ADD materialize a 32 bits offset to .got.
|
|
||||||
// CALL instruction sets a return address to $o7, and the subsequent ADD
|
|
||||||
// adds it to the GOT offset to materialize the absolute address of .got.
|
|
||||||
//
|
|
||||||
// Note that we have a NOP after CALL and an ADD after RETL because of
|
|
||||||
// SPARC's delay branch slots. That is, the SPARC processor always
|
|
||||||
// executes one instruction after a branch even if the branch is taken.
|
|
||||||
// This may seem like an odd behavior, and indeed it is considered as such
|
|
||||||
// (that's a premature optimization for the early pipelined SPARC
|
|
||||||
// processors), but that's been a part of the ISA's spec so that's what it
|
|
||||||
// is.
|
|
||||||
//
|
|
||||||
// Note also that the .got address obtained this way is not shared between
|
|
||||||
// functions, so functions can use an arbitrary register to hold the .got
|
|
||||||
// address. That also means each function needs to execute the above piece
|
|
||||||
// of code to become position-independent.
|
|
||||||
//
|
|
||||||
// This scheme is very similar to i386. That may not be a coincidence
|
|
||||||
// because the i386 ELF psABI is created by Sun Microsystems too.
|
|
||||||
//
|
|
||||||
// https://github.com/rui314/psabi/blob/main/sparc.pdf
|
|
||||||
|
|
||||||
#include "third_party/mold/elf/mold.h"
|
|
||||||
|
|
||||||
namespace mold::elf {
|
|
||||||
|
|
||||||
using E = SPARC64;
|
|
||||||
|
|
||||||
// SPARC's PLT section is writable despite containing executable code.
|
|
||||||
// We don't need to write the PLT header entry because the dynamic loader
|
|
||||||
// will do that for us.
|
|
||||||
//
|
|
||||||
// We also don't need a .got.plt section to store the result of lazy PLT
|
|
||||||
// symbol resolution because the dynamic symbol resolver directly mutates
|
|
||||||
// instructions in PLT so that they jump to the right places next time.
|
|
||||||
// That's why each PLT entry contains lots of NOPs; they are a placeholder
|
|
||||||
// for the runtime to add more instructions.
|
|
||||||
//
|
|
||||||
// Self-modifying code is nowadays considered really bad from the security
|
|
||||||
// point of view, though.
|
|
||||||
template <>
|
|
||||||
void write_plt_header(Context<E> &ctx, u8 *buf) {
|
|
||||||
memset(buf, 0, E::plt_hdr_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void write_plt_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
|
|
||||||
static ub32 insn[] = {
|
|
||||||
0x0300'0000, // sethi (. - .PLT0), %g1
|
|
||||||
0x3068'0000, // ba,a %xcc, .PLT1
|
|
||||||
0x0100'0000, // nop
|
|
||||||
0x0100'0000, // nop
|
|
||||||
0x0100'0000, // nop
|
|
||||||
0x0100'0000, // nop
|
|
||||||
0x0100'0000, // nop
|
|
||||||
0x0100'0000, // nop
|
|
||||||
};
|
|
||||||
|
|
||||||
u64 plt0 = ctx.plt->shdr.sh_addr;
|
|
||||||
u64 plt1 = ctx.plt->shdr.sh_addr + E::plt_size;
|
|
||||||
u64 entry = sym.get_plt_addr(ctx);
|
|
||||||
|
|
||||||
memcpy(buf, insn, sizeof(insn));
|
|
||||||
*(ub32 *)buf |= bits(entry - plt0, 21, 0);
|
|
||||||
*(ub32 *)(buf + 4) |= bits(plt1 - entry - 4, 20, 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
|
|
||||||
static ub32 entry[] = {
|
|
||||||
0x8a10'000f, // mov %o7, %g5
|
|
||||||
0x4000'0002, // call . + 8
|
|
||||||
0xc25b'e014, // ldx [ %o7 + 20 ], %g1
|
|
||||||
0xc25b'c001, // ldx [ %o7 + %g1 ], %g1
|
|
||||||
0x81c0'4000, // jmp %g1
|
|
||||||
0x9e10'0005, // mov %g5, %o7
|
|
||||||
0x0000'0000, // .quad $plt_entry - $got_entry
|
|
||||||
0x0000'0000,
|
|
||||||
};
|
|
||||||
|
|
||||||
memcpy(buf, entry, sizeof(entry));
|
|
||||||
*(ub64 *)(buf + 24) = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void EhFrameSection<E>::apply_reloc(Context<E> &ctx, const ElfRel<E> &rel,
|
|
||||||
u64 offset, u64 val) {
|
|
||||||
u8 *loc = ctx.buf + this->shdr.sh_offset + offset;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_NONE:
|
|
||||||
break;
|
|
||||||
case R_SPARC_64:
|
|
||||||
case R_SPARC_UA64:
|
|
||||||
*(ub64 *)loc = val;
|
|
||||||
break;
|
|
||||||
case R_SPARC_DISP32:
|
|
||||||
*(ub32 *)loc = val - this->shdr.sh_addr - offset;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
ElfRel<E> *dynrel = nullptr;
|
|
||||||
if (ctx.reldyn)
|
|
||||||
dynrel = (ElfRel<E> *)(ctx.buf + ctx.reldyn->shdr.sh_offset +
|
|
||||||
file.reldyn_offset + this->reldyn_offset);
|
|
||||||
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
u8 *loc = base + rel.r_offset;
|
|
||||||
|
|
||||||
auto check = [&](i64 val, i64 lo, i64 hi) {
|
|
||||||
if (val < lo || hi <= val)
|
|
||||||
Error(ctx) << *this << ": relocation " << rel << " against "
|
|
||||||
<< sym << " out of range: " << val << " is not in ["
|
|
||||||
<< lo << ", " << hi << ")";
|
|
||||||
};
|
|
||||||
|
|
||||||
u64 S = sym.get_addr(ctx);
|
|
||||||
u64 A = rel.r_addend;
|
|
||||||
u64 P = (get_addr() + rel.r_offset);
|
|
||||||
u64 G = (sym.get_got_idx(ctx) * sizeof(Word<E>));
|
|
||||||
u64 GOT = ctx.got->shdr.sh_addr;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_SPARC_64:
|
|
||||||
apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel);
|
|
||||||
break;
|
|
||||||
case R_SPARC_5:
|
|
||||||
check(S + A, 0, 1 << 5);
|
|
||||||
*(ub32 *)loc |= bits(S + A, 4, 0);
|
|
||||||
break;
|
|
||||||
case R_SPARC_6:
|
|
||||||
check(S + A, 0, 1 << 6);
|
|
||||||
*(ub32 *)loc |= bits(S + A, 5, 0);
|
|
||||||
break;
|
|
||||||
case R_SPARC_7:
|
|
||||||
check(S + A, 0, 1 << 7);
|
|
||||||
*(ub32 *)loc |= bits(S + A, 6, 0);
|
|
||||||
break;
|
|
||||||
case R_SPARC_8:
|
|
||||||
check(S + A, 0, 1 << 8);
|
|
||||||
*(u8 *)loc = S + A;
|
|
||||||
break;
|
|
||||||
case R_SPARC_10:
|
|
||||||
check(S + A, 0, 1 << 10);
|
|
||||||
*(ub32 *)loc |= bits(S + A, 9, 0);
|
|
||||||
break;
|
|
||||||
case R_SPARC_LO10:
|
|
||||||
case R_SPARC_LOPLT10:
|
|
||||||
*(ub32 *)loc |= bits(S + A, 9, 0);
|
|
||||||
break;
|
|
||||||
case R_SPARC_11:
|
|
||||||
check(S + A, 0, 1 << 11);
|
|
||||||
*(ub32 *)loc |= bits(S + A, 10, 0);
|
|
||||||
break;
|
|
||||||
case R_SPARC_13:
|
|
||||||
check(S + A, 0, 1 << 13);
|
|
||||||
*(ub32 *)loc |= bits(S + A, 12, 0);
|
|
||||||
break;
|
|
||||||
case R_SPARC_16:
|
|
||||||
case R_SPARC_UA16:
|
|
||||||
check(S + A, 0, 1 << 16);
|
|
||||||
*(ub16 *)loc = S + A;
|
|
||||||
break;
|
|
||||||
case R_SPARC_22:
|
|
||||||
check(S + A, 0, 1 << 22);
|
|
||||||
*(ub32 *)loc |= bits(S + A, 21, 0);
|
|
||||||
break;
|
|
||||||
case R_SPARC_32:
|
|
||||||
case R_SPARC_UA32:
|
|
||||||
case R_SPARC_PLT32:
|
|
||||||
check(S + A, 0, 1LL << 32);
|
|
||||||
*(ub32 *)loc = S + A;
|
|
||||||
break;
|
|
||||||
case R_SPARC_PLT64:
|
|
||||||
case R_SPARC_UA64:
|
|
||||||
case R_SPARC_REGISTER:
|
|
||||||
*(ub64 *)loc = S + A;
|
|
||||||
break;
|
|
||||||
case R_SPARC_DISP8:
|
|
||||||
check(S + A - P, -(1 << 7), 1 << 7);
|
|
||||||
*(u8 *)loc = S + A - P;
|
|
||||||
break;
|
|
||||||
case R_SPARC_DISP16:
|
|
||||||
check(S + A - P, -(1 << 15), 1 << 15);
|
|
||||||
*(ub16 *)loc = S + A - P;
|
|
||||||
break;
|
|
||||||
case R_SPARC_DISP32:
|
|
||||||
case R_SPARC_PCPLT32:
|
|
||||||
check(S + A - P, -(1LL << 31), 1LL << 31);
|
|
||||||
*(ub32 *)loc = S + A - P;
|
|
||||||
break;
|
|
||||||
case R_SPARC_DISP64:
|
|
||||||
*(ub64 *)loc = S + A - P;
|
|
||||||
break;
|
|
||||||
case R_SPARC_WDISP16: {
|
|
||||||
i64 val = S + A - P;
|
|
||||||
check(val, -(1 << 16), 1 << 16);
|
|
||||||
*(ub16 *)loc |= (bit(val, 16) << 21) | bits(val, 15, 2);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_SPARC_WDISP19:
|
|
||||||
check(S + A - P, -(1 << 20), 1 << 20);
|
|
||||||
*(ub32 *)loc |= bits(S + A - P, 20, 2);
|
|
||||||
break;
|
|
||||||
case R_SPARC_WDISP22:
|
|
||||||
check(S + A - P, -(1 << 23), 1 << 23);
|
|
||||||
*(ub32 *)loc |= bits(S + A - P, 23, 2);
|
|
||||||
break;
|
|
||||||
case R_SPARC_WDISP30:
|
|
||||||
case R_SPARC_WPLT30:
|
|
||||||
check(S + A - P, -(1LL << 31), 1LL << 31);
|
|
||||||
*(ub32 *)loc |= bits(S + A - P, 31, 2);
|
|
||||||
break;
|
|
||||||
case R_SPARC_HI22:
|
|
||||||
case R_SPARC_HIPLT22:
|
|
||||||
case R_SPARC_LM22:
|
|
||||||
*(ub32 *)loc |= bits(S + A, 31, 10);
|
|
||||||
break;
|
|
||||||
case R_SPARC_GOT10:
|
|
||||||
*(ub32 *)loc |= bits(G, 9, 0);
|
|
||||||
break;
|
|
||||||
case R_SPARC_GOT13:
|
|
||||||
check(G, 0, 1 << 12);
|
|
||||||
*(ub32 *)loc |= bits(G, 12, 0);
|
|
||||||
break;
|
|
||||||
case R_SPARC_GOT22:
|
|
||||||
*(ub32 *)loc |= bits(G, 31, 10);
|
|
||||||
break;
|
|
||||||
case R_SPARC_GOTDATA_HIX22: {
|
|
||||||
i64 val = S + A - GOT;
|
|
||||||
*(ub32 *)loc |= bits(val < 0 ? ~val : val, 31, 10);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_SPARC_GOTDATA_LOX10: {
|
|
||||||
i64 val = S + A - GOT;
|
|
||||||
*(ub32 *)loc |= bits(val, 9, 0) | (val < 0 ? 0b1'1100'0000'0000 : 0);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_SPARC_GOTDATA_OP_HIX22:
|
|
||||||
// We always have to relax a GOT load to a load immediate if a
|
|
||||||
// symbol is local, because R_SPARC_GOTDATA_OP cannot represent
|
|
||||||
// an addend for a local symbol.
|
|
||||||
if (sym.is_imported || sym.is_ifunc()) {
|
|
||||||
*(ub32 *)loc |= bits(G, 31, 10);
|
|
||||||
} else if (sym.is_absolute()) {
|
|
||||||
i64 val = S + A;
|
|
||||||
*(ub32 *)loc |= bits(val < 0 ? ~val : val, 31, 10);
|
|
||||||
} else {
|
|
||||||
i64 val = S + A - GOT;
|
|
||||||
*(ub32 *)loc |= bits(val < 0 ? ~val : val, 31, 10);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case R_SPARC_GOTDATA_OP_LOX10: {
|
|
||||||
if (sym.is_imported || sym.is_ifunc()) {
|
|
||||||
*(ub32 *)loc |= bits(G, 9, 0);
|
|
||||||
} else if (sym.is_absolute()) {
|
|
||||||
i64 val = S + A;
|
|
||||||
*(ub32 *)loc |= bits(val, 9, 0) | (val < 0 ? 0b1'1100'0000'0000 : 0);
|
|
||||||
} else {
|
|
||||||
i64 val = S + A - GOT;
|
|
||||||
*(ub32 *)loc |= bits(val, 9, 0) | (val < 0 ? 0b1'1100'0000'0000 : 0);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_SPARC_GOTDATA_OP:
|
|
||||||
if (sym.is_imported || sym.is_ifunc())
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (sym.is_absolute()) {
|
|
||||||
// ldx [ %g2 + %g1 ], %g1 → nop
|
|
||||||
*(ub32 *)loc = 0x0100'0000;
|
|
||||||
} else {
|
|
||||||
// ldx [ %g2 + %g1 ], %g1 → add %g2, %g1, %g1
|
|
||||||
*(ub32 *)loc &= 0b00'11111'000000'11111'1'11111111'11111;
|
|
||||||
*(ub32 *)loc |= 0b10'00000'000000'00000'0'00000000'00000;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case R_SPARC_PC10:
|
|
||||||
case R_SPARC_PCPLT10:
|
|
||||||
*(ub32 *)loc |= bits(S + A - P, 9, 0);
|
|
||||||
break;
|
|
||||||
case R_SPARC_PC22:
|
|
||||||
case R_SPARC_PCPLT22:
|
|
||||||
case R_SPARC_PC_LM22:
|
|
||||||
*(ub32 *)loc |= bits(S + A - P, 31, 10);
|
|
||||||
break;
|
|
||||||
case R_SPARC_OLO10:
|
|
||||||
*(ub32 *)loc |= bits(bits(S + A, 9, 0) + rel.r_type_data, 12, 0);
|
|
||||||
break;
|
|
||||||
case R_SPARC_HH22:
|
|
||||||
*(ub32 *)loc |= bits(S + A, 63, 42);
|
|
||||||
break;
|
|
||||||
case R_SPARC_HM10:
|
|
||||||
*(ub32 *)loc |= bits(S + A, 41, 32);
|
|
||||||
break;
|
|
||||||
case R_SPARC_PC_HH22:
|
|
||||||
*(ub32 *)loc |= bits(S + A - P, 63, 42);
|
|
||||||
break;
|
|
||||||
case R_SPARC_PC_HM10:
|
|
||||||
*(ub32 *)loc |= bits(S + A - P, 41, 32);
|
|
||||||
break;
|
|
||||||
case R_SPARC_HIX22:
|
|
||||||
*(ub32 *)loc |= bits(~(S + A), 31, 10);
|
|
||||||
break;
|
|
||||||
case R_SPARC_LOX10:
|
|
||||||
*(ub32 *)loc |= bits(S + A, 9, 0) | 0b1'1100'0000'0000;
|
|
||||||
break;
|
|
||||||
case R_SPARC_H44:
|
|
||||||
*(ub32 *)loc |= bits(S + A, 43, 22);
|
|
||||||
break;
|
|
||||||
case R_SPARC_M44:
|
|
||||||
*(ub32 *)loc |= bits(S + A, 21, 12);
|
|
||||||
break;
|
|
||||||
case R_SPARC_L44:
|
|
||||||
*(ub32 *)loc |= bits(S + A, 11, 0);
|
|
||||||
break;
|
|
||||||
case R_SPARC_TLS_GD_HI22:
|
|
||||||
*(ub32 *)loc |= bits(sym.get_tlsgd_addr(ctx) + A - GOT, 31, 10);
|
|
||||||
break;
|
|
||||||
case R_SPARC_TLS_GD_LO10:
|
|
||||||
*(ub32 *)loc |= bits(sym.get_tlsgd_addr(ctx) + A - GOT, 9, 0);
|
|
||||||
break;
|
|
||||||
case R_SPARC_TLS_GD_CALL:
|
|
||||||
case R_SPARC_TLS_LDM_CALL: {
|
|
||||||
u64 addr;
|
|
||||||
if (ctx.arg.is_static)
|
|
||||||
addr = ctx.extra.tls_get_addr_sec->shdr.sh_addr;
|
|
||||||
else
|
|
||||||
addr = ctx.extra.tls_get_addr_sym->get_addr(ctx);
|
|
||||||
|
|
||||||
*(ub32 *)loc |= bits(addr + A - P, 31, 2);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_SPARC_TLS_LDM_HI22:
|
|
||||||
*(ub32 *)loc |= bits(ctx.got->get_tlsld_addr(ctx) + A - GOT, 31, 10);
|
|
||||||
break;
|
|
||||||
case R_SPARC_TLS_LDM_LO10:
|
|
||||||
*(ub32 *)loc |= bits(ctx.got->get_tlsld_addr(ctx) + A - GOT, 9, 0);
|
|
||||||
break;
|
|
||||||
case R_SPARC_TLS_LDO_HIX22:
|
|
||||||
*(ub32 *)loc |= bits(S + A - ctx.dtp_addr, 31, 10);
|
|
||||||
break;
|
|
||||||
case R_SPARC_TLS_LDO_LOX10:
|
|
||||||
*(ub32 *)loc |= bits(S + A - ctx.dtp_addr, 9, 0);
|
|
||||||
break;
|
|
||||||
case R_SPARC_TLS_IE_HI22:
|
|
||||||
*(ub32 *)loc |= bits(sym.get_gottp_addr(ctx) + A - GOT, 31, 10);
|
|
||||||
break;
|
|
||||||
case R_SPARC_TLS_IE_LO10:
|
|
||||||
*(ub32 *)loc |= bits(sym.get_gottp_addr(ctx) + A - GOT, 9, 0);
|
|
||||||
break;
|
|
||||||
case R_SPARC_TLS_LE_HIX22:
|
|
||||||
*(ub32 *)loc |= bits(~(S + A - ctx.tp_addr), 31, 10);
|
|
||||||
break;
|
|
||||||
case R_SPARC_TLS_LE_LOX10:
|
|
||||||
*(ub32 *)loc |= bits(S + A - ctx.tp_addr, 9, 0) | 0b1'1100'0000'0000;
|
|
||||||
break;
|
|
||||||
case R_SPARC_SIZE32:
|
|
||||||
*(ub32 *)loc = sym.esym().st_size + A;
|
|
||||||
break;
|
|
||||||
case R_SPARC_TLS_GD_ADD:
|
|
||||||
case R_SPARC_TLS_LDM_ADD:
|
|
||||||
case R_SPARC_TLS_LDO_ADD:
|
|
||||||
case R_SPARC_TLS_IE_LD:
|
|
||||||
case R_SPARC_TLS_IE_LDX:
|
|
||||||
case R_SPARC_TLS_IE_ADD:
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
unreachable();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::apply_reloc_nonalloc(Context<E> &ctx, u8 *base) {
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
u8 *loc = base + rel.r_offset;
|
|
||||||
|
|
||||||
auto check = [&](i64 val, i64 lo, i64 hi) {
|
|
||||||
if (val < lo || hi <= val)
|
|
||||||
Error(ctx) << *this << ": relocation " << rel << " against "
|
|
||||||
<< sym << " out of range: " << val << " is not in ["
|
|
||||||
<< lo << ", " << hi << ")";
|
|
||||||
};
|
|
||||||
|
|
||||||
SectionFragment<E> *frag;
|
|
||||||
i64 frag_addend;
|
|
||||||
std::tie(frag, frag_addend) = get_fragment(ctx, rel);
|
|
||||||
|
|
||||||
u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx);
|
|
||||||
u64 A = frag ? frag_addend : (i64)rel.r_addend;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_SPARC_64:
|
|
||||||
case R_SPARC_UA64:
|
|
||||||
if (std::optional<u64> val = get_tombstone(sym, frag))
|
|
||||||
*(ub64 *)loc = *val;
|
|
||||||
else
|
|
||||||
*(ub64 *)loc = S + A;
|
|
||||||
break;
|
|
||||||
case R_SPARC_32:
|
|
||||||
case R_SPARC_UA32: {
|
|
||||||
i64 val = S + A;
|
|
||||||
check(val, 0, 1LL << 32);
|
|
||||||
*(ub32 *)loc = val;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case R_SPARC_TLS_DTPOFF32:
|
|
||||||
*(ub32 *)loc = S + A - ctx.dtp_addr;
|
|
||||||
break;
|
|
||||||
case R_SPARC_TLS_DTPOFF64:
|
|
||||||
*(ub64 *)loc = S + A - ctx.dtp_addr;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Fatal(ctx) << *this << ": apply_reloc_nonalloc: " << rel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void InputSection<E>::scan_relocations(Context<E> &ctx) {
|
|
||||||
assert(shdr().sh_flags & SHF_ALLOC);
|
|
||||||
|
|
||||||
this->reldyn_offset = file.num_dynrel * sizeof(ElfRel<E>);
|
|
||||||
std::span<const ElfRel<E>> rels = get_rels(ctx);
|
|
||||||
|
|
||||||
// Scan relocations
|
|
||||||
for (i64 i = 0; i < rels.size(); i++) {
|
|
||||||
const ElfRel<E> &rel = rels[i];
|
|
||||||
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
Symbol<E> &sym = *file.symbols[rel.r_sym];
|
|
||||||
|
|
||||||
if (sym.is_ifunc())
|
|
||||||
sym.flags |= NEEDS_GOT | NEEDS_PLT;
|
|
||||||
|
|
||||||
switch (rel.r_type) {
|
|
||||||
case R_SPARC_64:
|
|
||||||
scan_dyn_absrel(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_SPARC_8:
|
|
||||||
case R_SPARC_5:
|
|
||||||
case R_SPARC_6:
|
|
||||||
case R_SPARC_7:
|
|
||||||
case R_SPARC_10:
|
|
||||||
case R_SPARC_11:
|
|
||||||
case R_SPARC_13:
|
|
||||||
case R_SPARC_16:
|
|
||||||
case R_SPARC_22:
|
|
||||||
case R_SPARC_32:
|
|
||||||
case R_SPARC_REGISTER:
|
|
||||||
case R_SPARC_UA16:
|
|
||||||
case R_SPARC_UA32:
|
|
||||||
case R_SPARC_UA64:
|
|
||||||
case R_SPARC_PC_HM10:
|
|
||||||
case R_SPARC_OLO10:
|
|
||||||
case R_SPARC_LOX10:
|
|
||||||
case R_SPARC_HM10:
|
|
||||||
case R_SPARC_M44:
|
|
||||||
case R_SPARC_HIX22:
|
|
||||||
case R_SPARC_LO10:
|
|
||||||
case R_SPARC_L44:
|
|
||||||
case R_SPARC_LM22:
|
|
||||||
case R_SPARC_HI22:
|
|
||||||
case R_SPARC_H44:
|
|
||||||
case R_SPARC_HH22:
|
|
||||||
scan_absrel(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_SPARC_PLT32:
|
|
||||||
case R_SPARC_WPLT30:
|
|
||||||
case R_SPARC_WDISP30:
|
|
||||||
case R_SPARC_HIPLT22:
|
|
||||||
case R_SPARC_LOPLT10:
|
|
||||||
case R_SPARC_PCPLT32:
|
|
||||||
case R_SPARC_PCPLT22:
|
|
||||||
case R_SPARC_PCPLT10:
|
|
||||||
case R_SPARC_PLT64:
|
|
||||||
if (sym.is_imported)
|
|
||||||
sym.flags |= NEEDS_PLT;
|
|
||||||
break;
|
|
||||||
case R_SPARC_GOT13:
|
|
||||||
case R_SPARC_GOT10:
|
|
||||||
case R_SPARC_GOT22:
|
|
||||||
case R_SPARC_GOTDATA_HIX22:
|
|
||||||
sym.flags |= NEEDS_GOT;
|
|
||||||
break;
|
|
||||||
case R_SPARC_GOTDATA_OP_HIX22:
|
|
||||||
if (sym.is_imported)
|
|
||||||
sym.flags |= NEEDS_GOT;
|
|
||||||
break;
|
|
||||||
case R_SPARC_DISP16:
|
|
||||||
case R_SPARC_DISP32:
|
|
||||||
case R_SPARC_DISP64:
|
|
||||||
case R_SPARC_DISP8:
|
|
||||||
case R_SPARC_PC10:
|
|
||||||
case R_SPARC_PC22:
|
|
||||||
case R_SPARC_PC_LM22:
|
|
||||||
case R_SPARC_WDISP16:
|
|
||||||
case R_SPARC_WDISP19:
|
|
||||||
case R_SPARC_WDISP22:
|
|
||||||
case R_SPARC_PC_HH22:
|
|
||||||
scan_pcrel(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_SPARC_TLS_GD_HI22:
|
|
||||||
sym.flags |= NEEDS_TLSGD;
|
|
||||||
break;
|
|
||||||
case R_SPARC_TLS_LDM_HI22:
|
|
||||||
ctx.needs_tlsld = true;
|
|
||||||
break;
|
|
||||||
case R_SPARC_TLS_IE_HI22:
|
|
||||||
sym.flags |= NEEDS_GOTTP;
|
|
||||||
break;
|
|
||||||
case R_SPARC_TLS_GD_CALL:
|
|
||||||
case R_SPARC_TLS_LDM_CALL:
|
|
||||||
if (!ctx.arg.is_static && ctx.extra.tls_get_addr_sym->is_imported)
|
|
||||||
ctx.extra.tls_get_addr_sym->flags |= NEEDS_PLT;
|
|
||||||
break;
|
|
||||||
case R_SPARC_TLS_LE_HIX22:
|
|
||||||
case R_SPARC_TLS_LE_LOX10:
|
|
||||||
check_tlsle(ctx, sym, rel);
|
|
||||||
break;
|
|
||||||
case R_SPARC_GOTDATA_OP_LOX10:
|
|
||||||
case R_SPARC_GOTDATA_OP:
|
|
||||||
case R_SPARC_GOTDATA_LOX10:
|
|
||||||
case R_SPARC_TLS_GD_LO10:
|
|
||||||
case R_SPARC_TLS_GD_ADD:
|
|
||||||
case R_SPARC_TLS_LDM_LO10:
|
|
||||||
case R_SPARC_TLS_LDM_ADD:
|
|
||||||
case R_SPARC_TLS_LDO_HIX22:
|
|
||||||
case R_SPARC_TLS_LDO_LOX10:
|
|
||||||
case R_SPARC_TLS_LDO_ADD:
|
|
||||||
case R_SPARC_TLS_IE_ADD:
|
|
||||||
case R_SPARC_TLS_IE_LD:
|
|
||||||
case R_SPARC_TLS_IE_LDX:
|
|
||||||
case R_SPARC_TLS_IE_LO10:
|
|
||||||
case R_SPARC_SIZE32:
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Fatal(ctx) << *this << ": scan_relocations: " << rel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// __tls_get_addr is not defined by libc.a, so we can't use that function
|
|
||||||
// in statically-linked executables. This section provides a replacement.
|
|
||||||
void SparcTlsGetAddrSection::copy_buf(Context<E> &ctx) {
|
|
||||||
ub32 *buf = (ub32 *)(ctx.buf + this->shdr.sh_offset);
|
|
||||||
|
|
||||||
static const ub32 insn[] = {
|
|
||||||
0x0300'0000, // sethi %hi(TP_SIZE), %g1
|
|
||||||
0x8210'6000, // or %g1, %lo(TP_SIZE), %g1
|
|
||||||
0x8221'c001, // sub %g7, %g1, %g1
|
|
||||||
0xd05a'2008, // ldx [ %o0 + 8 ], %o0
|
|
||||||
0x81c3'e008, // retl
|
|
||||||
0x9000'4008, // add %g1, %o0, %o0
|
|
||||||
};
|
|
||||||
|
|
||||||
assert(this->shdr.sh_size == sizeof(insn));
|
|
||||||
memcpy(buf, insn, sizeof(insn));
|
|
||||||
|
|
||||||
buf[0] |= bits(ctx.tp_addr - ctx.tls_begin, 31, 10);
|
|
||||||
buf[1] |= bits(ctx.tp_addr - ctx.tls_begin, 9, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace mold::elf
|
|
3
third_party/mold/elf/cmdline.cc
vendored
3
third_party/mold/elf/cmdline.cc
vendored
|
@ -1,6 +1,6 @@
|
||||||
// clang-format off
|
// clang-format off
|
||||||
#include "third_party/mold/elf/mold.h"
|
#include "third_party/mold/elf/mold.h"
|
||||||
// MISSING #include "../common/cmdline.h"
|
#include "third_party/mold/cmdline.h"
|
||||||
|
|
||||||
#include "third_party/libcxx/regex"
|
#include "third_party/libcxx/regex"
|
||||||
#include "third_party/libcxx/sstream"
|
#include "third_party/libcxx/sstream"
|
||||||
|
@ -36,7 +36,6 @@
|
||||||
#include "libc/sysv/consts/o.h"
|
#include "libc/sysv/consts/o.h"
|
||||||
#include "libc/sysv/consts/ok.h"
|
#include "libc/sysv/consts/ok.h"
|
||||||
#include "libc/time/time.h"
|
#include "libc/time/time.h"
|
||||||
#include "third_party/getopt/getopt.internal.h"
|
|
||||||
#include "third_party/musl/crypt.h"
|
#include "third_party/musl/crypt.h"
|
||||||
#include "third_party/musl/lockf.h"
|
#include "third_party/musl/lockf.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
2
third_party/mold/elf/elf.h
vendored
2
third_party/mold/elf/elf.h
vendored
|
@ -1,7 +1,7 @@
|
||||||
// clang-format off
|
// clang-format off
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
// MISSING #include "../common/integers.h"
|
#include "third_party/mold/integers.h"
|
||||||
|
|
||||||
#include "third_party/libcxx/ostream"
|
#include "third_party/libcxx/ostream"
|
||||||
#include "third_party/libcxx/string"
|
#include "third_party/libcxx/string"
|
||||||
|
|
7
third_party/mold/elf/main.cc
vendored
7
third_party/mold/elf/main.cc
vendored
|
@ -1,9 +1,8 @@
|
||||||
// clang-format off
|
// clang-format off
|
||||||
#include "third_party/mold/elf/mold.h"
|
#include "third_party/mold/elf/mold.h"
|
||||||
// MISSING #include "../common/archive-file.h"
|
#include "third_party/mold/archive-file.h"
|
||||||
// MISSING #include "../common/cmdline.h"
|
#include "third_party/mold/cmdline.h"
|
||||||
// MISSING #include "../common/output-file.h"
|
#include "third_party/mold/output-file.h"
|
||||||
|
|
||||||
#include "third_party/libcxx/cstring"
|
#include "third_party/libcxx/cstring"
|
||||||
#include "third_party/libcxx/functional"
|
#include "third_party/libcxx/functional"
|
||||||
#include "third_party/libcxx/iomanip"
|
#include "third_party/libcxx/iomanip"
|
||||||
|
|
8
third_party/mold/elf/mold.h
vendored
8
third_party/mold/elf/mold.h
vendored
|
@ -2,7 +2,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "third_party/mold/elf/elf.h"
|
#include "third_party/mold/elf/elf.h"
|
||||||
// MISSING #include "../common/common.h"
|
#include "third_party/mold/common.h"
|
||||||
|
|
||||||
#include "third_party/libcxx/atomic"
|
#include "third_party/libcxx/atomic"
|
||||||
#include "third_party/libcxx/bitset"
|
#include "third_party/libcxx/bitset"
|
||||||
|
@ -15,16 +15,19 @@
|
||||||
#include "third_party/libcxx/memory"
|
#include "third_party/libcxx/memory"
|
||||||
#include "third_party/libcxx/mutex"
|
#include "third_party/libcxx/mutex"
|
||||||
#include "third_party/libcxx/optional"
|
#include "third_party/libcxx/optional"
|
||||||
// MISSING #include <span>
|
#include "third_party/libcxx/span"
|
||||||
#include "third_party/libcxx/sstream"
|
#include "third_party/libcxx/sstream"
|
||||||
#include "third_party/libcxx/string"
|
#include "third_party/libcxx/string"
|
||||||
#include "third_party/libcxx/string_view"
|
#include "third_party/libcxx/string_view"
|
||||||
|
|
||||||
|
#include "third_party/mold/fake_tbb.h"
|
||||||
// MISSING #include <tbb/concurrent_hash_map.h>
|
// MISSING #include <tbb/concurrent_hash_map.h>
|
||||||
// MISSING #include <tbb/concurrent_unordered_map.h>
|
// MISSING #include <tbb/concurrent_unordered_map.h>
|
||||||
// MISSING #include <tbb/concurrent_vector.h>
|
// MISSING #include <tbb/concurrent_vector.h>
|
||||||
// MISSING #include <tbb/enumerable_thread_specific.h>
|
// MISSING #include <tbb/enumerable_thread_specific.h>
|
||||||
// MISSING #include <tbb/spin_mutex.h>
|
// MISSING #include <tbb/spin_mutex.h>
|
||||||
// MISSING #include <tbb/task_group.h>
|
// MISSING #include <tbb/task_group.h>
|
||||||
|
|
||||||
#include "third_party/libcxx/type_traits"
|
#include "third_party/libcxx/type_traits"
|
||||||
#include "third_party/libcxx/unordered_map"
|
#include "third_party/libcxx/unordered_map"
|
||||||
#include "third_party/libcxx/unordered_set"
|
#include "third_party/libcxx/unordered_set"
|
||||||
|
@ -42,7 +45,6 @@
|
||||||
#include "libc/sysv/consts/o.h"
|
#include "libc/sysv/consts/o.h"
|
||||||
#include "libc/sysv/consts/ok.h"
|
#include "libc/sysv/consts/ok.h"
|
||||||
#include "libc/time/time.h"
|
#include "libc/time/time.h"
|
||||||
#include "third_party/getopt/getopt.internal.h"
|
|
||||||
#include "third_party/musl/crypt.h"
|
#include "third_party/musl/crypt.h"
|
||||||
#include "third_party/musl/lockf.h"
|
#include "third_party/musl/lockf.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
40
third_party/mold/fake_tbb.h
vendored
40
third_party/mold/fake_tbb.h
vendored
|
@ -6,6 +6,16 @@ namespace tbb {
|
||||||
template <typename T>
|
template <typename T>
|
||||||
using concurrent_vector = std::vector<T>;
|
using concurrent_vector = std::vector<T>;
|
||||||
|
|
||||||
|
template <
|
||||||
|
class Key,
|
||||||
|
class T,
|
||||||
|
class Hash = std::hash<Key>,
|
||||||
|
class KeyEqual = std::equal_to<Key>,
|
||||||
|
class Allocator = std::allocator< std::pair<const Key, T> > >
|
||||||
|
using concurrent_hash_map = std::unordered_map<Key, T, Hash, KeyEqual, Allocator>;
|
||||||
|
|
||||||
|
using spin_mutex = std::mutex;
|
||||||
|
|
||||||
template<typename InputIterator, typename Function>
|
template<typename InputIterator, typename Function>
|
||||||
void parallel_for_each(InputIterator first, InputIterator last, const Function& f) {
|
void parallel_for_each(InputIterator first, InputIterator last, const Function& f) {
|
||||||
}
|
}
|
||||||
|
@ -22,5 +32,35 @@ namespace tbb {
|
||||||
void parallel_for(Index first, Index last, const Function& f) {
|
void parallel_for(Index first, Index last, const Function& f) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum task_group_status {
|
||||||
|
not_complete,
|
||||||
|
complete,
|
||||||
|
canceled
|
||||||
|
};
|
||||||
|
|
||||||
|
class task_group {
|
||||||
|
public:
|
||||||
|
task_group() {};
|
||||||
|
~task_group() {};
|
||||||
|
|
||||||
|
template<typename Func>
|
||||||
|
void run( Func&& f ) {
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Func>
|
||||||
|
task_group_status run_and_wait( const Func& f ) {
|
||||||
|
return task_group_status::complete;
|
||||||
|
};
|
||||||
|
|
||||||
|
task_group_status wait() {
|
||||||
|
return task_group_status::complete;
|
||||||
|
};
|
||||||
|
|
||||||
|
void cancel() {
|
||||||
|
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
5
third_party/mold/git-hash.cc
vendored
Normal file
5
third_party/mold/git-hash.cc
vendored
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
#include "third_party/libcxx/string"
|
||||||
|
|
||||||
|
namespace mold {
|
||||||
|
std::string mold_git_hash = "d4d93d7fb72dd19c44aafa4dd5397e35787d33ad";
|
||||||
|
}
|
3
third_party/mold/hyperloglog.cc
vendored
3
third_party/mold/hyperloglog.cc
vendored
|
@ -5,9 +5,10 @@
|
||||||
// For more info, read
|
// For more info, read
|
||||||
// https://engineering.fb.com/2018/12/13/data-infrastructure/hyperloglog
|
// https://engineering.fb.com/2018/12/13/data-infrastructure/hyperloglog
|
||||||
|
|
||||||
|
// TODO(fzakaria): changed from libcxx because pow symbol wasn't present.
|
||||||
#include "third_party/mold/common.h"
|
#include "third_party/mold/common.h"
|
||||||
|
|
||||||
#include "third_party/libcxx/cmath"
|
#include "libc/math.h"
|
||||||
|
|
||||||
namespace mold {
|
namespace mold {
|
||||||
|
|
||||||
|
|
7
third_party/mold/mold.mk
vendored
7
third_party/mold/mold.mk
vendored
|
@ -6,7 +6,7 @@ PKGS += THIRD_PARTY_MOLD
|
||||||
THIRD_PARTY_MOLD_ARTIFACTS += THIRD_PARTY_MOLD_A
|
THIRD_PARTY_MOLD_ARTIFACTS += THIRD_PARTY_MOLD_A
|
||||||
THIRD_PARTY_MOLD = $(THIRD_PARTY_MOLD_A_DEPS) $(THIRD_PARTY_MOLD_A)
|
THIRD_PARTY_MOLD = $(THIRD_PARTY_MOLD_A_DEPS) $(THIRD_PARTY_MOLD_A)
|
||||||
THIRD_PARTY_MOLD_A = o/$(MODE)/third_party/mold/mold.a
|
THIRD_PARTY_MOLD_A = o/$(MODE)/third_party/mold/mold.a
|
||||||
THIRD_PARTY_MOLD_FILES := $(wildcard third_party/mold/*)
|
THIRD_PARTY_MOLD_FILES := $(wildcard third_party/mold/*) $(wildcard third_party/mold/elf/*)
|
||||||
THIRD_PARTY_MOLD_HDRS = $(filter %.h,$(THIRD_PARTY_MOLD_FILES))
|
THIRD_PARTY_MOLD_HDRS = $(filter %.h,$(THIRD_PARTY_MOLD_FILES))
|
||||||
THIRD_PARTY_MOLD_SRCS = $(filter %.cc,$(THIRD_PARTY_MOLD_FILES))
|
THIRD_PARTY_MOLD_SRCS = $(filter %.cc,$(THIRD_PARTY_MOLD_FILES))
|
||||||
THIRD_PARTY_MOLD_OBJS = $(THIRD_PARTY_MOLD_SRCS:%.cc=o/$(MODE)/%.o)
|
THIRD_PARTY_MOLD_OBJS = $(THIRD_PARTY_MOLD_SRCS:%.cc=o/$(MODE)/%.o)
|
||||||
|
@ -16,6 +16,9 @@ THIRD_PARTY_MOLD_A_DIRECTDEPS = \
|
||||||
LIBC_STR \
|
LIBC_STR \
|
||||||
LIBC_INTRIN \
|
LIBC_INTRIN \
|
||||||
LIBC_STDIO \
|
LIBC_STDIO \
|
||||||
|
LIBC_CALLS \
|
||||||
|
LIBC_TINYMATH \
|
||||||
|
LIBC_SYSV \
|
||||||
LIBC_RUNTIME \
|
LIBC_RUNTIME \
|
||||||
THIRD_PARTY_ZSTD \
|
THIRD_PARTY_ZSTD \
|
||||||
THIRD_PARTY_XXHASH \
|
THIRD_PARTY_XXHASH \
|
||||||
|
@ -35,6 +38,8 @@ $(THIRD_PARTY_MOLD_OBJS): private \
|
||||||
-fno-asynchronous-unwind-tables \
|
-fno-asynchronous-unwind-tables \
|
||||||
-Wno-sign-compare \
|
-Wno-sign-compare \
|
||||||
-Wno-unused-function \
|
-Wno-unused-function \
|
||||||
|
-DMOLD_X86_64=1 \
|
||||||
|
-DMOLD_TARGET=X86_64
|
||||||
|
|
||||||
THIRD_PARTY_MOLD_CHECKS = \
|
THIRD_PARTY_MOLD_CHECKS = \
|
||||||
$(THIRD_PARTY_MOLD_A).pkg \
|
$(THIRD_PARTY_MOLD_A).pkg \
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue