This commit is contained in:
Farid Zakaria 2023-06-26 16:32:01 +00:00
parent 95fbdb4f76
commit 29b8816f1f
391 changed files with 41166 additions and 6 deletions

View file

@ -781,7 +781,7 @@ bool __cxx_atomic_compare_exchange_strong(
}
template <typename _Tp>
_LIBCPP_INLINE_VISIBILITY
_LIBCPP_INLINE_VISIBILITY inline
bool __cxx_atomic_compare_exchange_strong(
__cxx_atomic_base_impl<_Tp>* __a, _Tp* __expected, _Tp __value, memory_order __success,
memory_order __failure) {
@ -835,7 +835,7 @@ _Tp __cxx_atomic_fetch_add(volatile __cxx_atomic_base_impl<_Tp>* __a,
}
template <typename _Tp, typename _Td>
_LIBCPP_INLINE_VISIBILITY
_LIBCPP_INLINE_VISIBILITY inline
_Tp __cxx_atomic_fetch_add(__cxx_atomic_base_impl<_Tp>* __a, _Td __delta,
memory_order __order) {
return __atomic_fetch_add(&__a->__a_value, __delta * __skip_amt<_Tp>::value,
@ -851,7 +851,7 @@ _Tp __cxx_atomic_fetch_sub(volatile __cxx_atomic_base_impl<_Tp>* __a,
}
template <typename _Tp, typename _Td>
_LIBCPP_INLINE_VISIBILITY
_LIBCPP_INLINE_VISIBILITY inline
_Tp __cxx_atomic_fetch_sub(__cxx_atomic_base_impl<_Tp>* __a, _Td __delta,
memory_order __order) {
return __atomic_fetch_sub(&__a->__a_value, __delta * __skip_amt<_Tp>::value,
@ -867,7 +867,7 @@ _Tp __cxx_atomic_fetch_and(volatile __cxx_atomic_base_impl<_Tp>* __a,
}
template <typename _Tp>
_LIBCPP_INLINE_VISIBILITY
_LIBCPP_INLINE_VISIBILITY inline
_Tp __cxx_atomic_fetch_and(__cxx_atomic_base_impl<_Tp>* __a,
_Tp __pattern, memory_order __order) {
return __atomic_fetch_and(&__a->__a_value, __pattern,
@ -875,7 +875,7 @@ _Tp __cxx_atomic_fetch_and(__cxx_atomic_base_impl<_Tp>* __a,
}
template <typename _Tp>
_LIBCPP_INLINE_VISIBILITY
_LIBCPP_INLINE_VISIBILITY inline
_Tp __cxx_atomic_fetch_or(volatile __cxx_atomic_base_impl<_Tp>* __a,
_Tp __pattern, memory_order __order) {
return __atomic_fetch_or(&__a->__a_value, __pattern,
@ -883,7 +883,7 @@ _Tp __cxx_atomic_fetch_or(volatile __cxx_atomic_base_impl<_Tp>* __a,
}
template <typename _Tp>
_LIBCPP_INLINE_VISIBILITY
_LIBCPP_INLINE_VISIBILITY inline
_Tp __cxx_atomic_fetch_or(__cxx_atomic_base_impl<_Tp>* __a, _Tp __pattern,
memory_order __order) {
return __atomic_fetch_or(&__a->__a_value, __pattern,

View file

@ -110,6 +110,7 @@ THIRD_PARTY_LIBCXX_A_HDRS = \
third_party/libcxx/refstring.hh \
third_party/libcxx/regex \
third_party/libcxx/scoped_allocator \
third_party/libcxx/span \
third_party/libcxx/set \
third_party/libcxx/span \
third_party/libcxx/sstream \

View file

@ -1,5 +1,8 @@
// -*- C++ -*-
<<<<<<< HEAD
// clang-format off
=======
>>>>>>> 80151924e (wip)
//===------------------------------ span ---------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@ -130,10 +133,17 @@ template<class Container>
*/
#include "third_party/libcxx/__config"
<<<<<<< HEAD
#include "third_party/libcxx/iterator" // for iterators
#include "third_party/libcxx/array" // for array
#include "third_party/libcxx/type_traits" // for remove_cv, etc
#include "third_party/libcxx/cstddef" // for byte
=======
#include "third_party/libcxx/cstddef" // for ptrdiff_t
#include "third_party/libcxx/iterator" // for iterators
#include "third_party/libcxx/array" // for array
#include "third_party/libcxx/type_traits" // for remove_cv, etc
>>>>>>> 80151924e (wip)
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
@ -588,4 +598,8 @@ template<class _Container>
_LIBCPP_END_NAMESPACE_STD
<<<<<<< HEAD
#endif // _LIBCPP_SPAN
=======
#endif // _LIBCPP_SPAN
>>>>>>> 80151924e (wip)

18
third_party/mold/README.cosmo vendored Normal file
View file

@ -0,0 +1,18 @@
DESCRIPTION
Mold: A Modern Linker 🦠
mold is a faster drop-in replacement for existing Unix linkers.
It is several times quicker than the LLVM lld linker, the second-fastest open-source linker,
which I initially developed a few years ago. mold aims to enhance developer productivity by minimizing build time,
particularly in rapid debug-edit-rebuild cycles.
SOURCE
https://github.com/rui314/mold
commit d4d93d7fb72dd19c44aafa4dd5397e35787d33ad
Author: Rui Ueyama <ruiu@bluewhale.systems>
Date: Mon Jun 19 12:35:20 2023 +0900
Format

178
third_party/mold/archive-file.h vendored Normal file
View file

@ -0,0 +1,178 @@
// clang-format off
// This file contains functions to read an archive file (.a file).
// An archive file is just a bundle of object files. It's similar to
// tar or zip, but the contents are not compressed.
//
// An archive file is either "regular" or "thin". A regular archive
// contains object files directly, while a thin archive contains only
// pathnames. In the latter case, actual file contents have to be read
// from given pathnames. A regular archive is sometimes called "fat"
// archive as opposed to "thin".
//
// If an archive file is given to the linker, the linker pulls out
// object files that are needed to resolve undefined symbols. So,
// bunding object files as an archive and giving that archive to the
// linker has a different meaning than directly giving the same set of
// object files to the linker. The former links only needed object
// files, while the latter links all the given object files.
//
// Therefore, if you link libc.a for example, not all the libc
// functions are linked to your binary. Instead, only object files
// that provides functions and variables used in your program get
// linked. To make this efficient, static library functions are
// usually separated to each object file in an archive file. You can
// see the contents of libc.a by running `ar t
// /usr/lib/x86_64-linux-gnu/libc.a`.
#pragma once
#include "third_party/mold/common.h"
#include "third_party/mold/filetype.h"
namespace mold {
struct ArHdr {
char ar_name[16];
char ar_date[12];
char ar_uid[6];
char ar_gid[6];
char ar_mode[8];
char ar_size[10];
char ar_fmag[2];
bool starts_with(std::string_view s) const {
return std::string_view(ar_name, s.size()) == s;
}
bool is_strtab() const {
return starts_with("// ");
}
bool is_symtab() const {
return starts_with("/ ") || starts_with("/SYM64/ ");
}
std::string read_name(std::string_view strtab, u8 *&ptr) const {
// BSD-style long filename
if (starts_with("#1/")) {
int namelen = atoi(ar_name + 3);
std::string name{(char *)ptr, (size_t)namelen};
ptr += namelen;
if (size_t pos = name.find('\0'))
name = name.substr(0, pos);
return name;
}
// SysV-style long filename
if (starts_with("/")) {
const char *start = strtab.data() + atoi(ar_name + 1);
return {start, (const char *)strstr(start, "/\n")};
}
// Short fileanme
if (const char *end = (char *)memchr(ar_name, '/', sizeof(ar_name)))
return {ar_name, end};
return {ar_name, sizeof(ar_name)};
}
};
template <typename Context, typename MappedFile>
std::vector<MappedFile *>
read_thin_archive_members(Context &ctx, MappedFile *mf) {
u8 *begin = mf->data;
u8 *data = begin + 8;
std::vector<MappedFile *> vec;
std::string_view strtab;
while (data < begin + mf->size) {
// Each header is aligned to a 2 byte boundary.
if ((begin - data) % 2)
data++;
ArHdr &hdr = *(ArHdr *)data;
u8 *body = data + sizeof(hdr);
u64 size = atol(hdr.ar_size);
// Read a string table.
if (hdr.is_strtab()) {
strtab = {(char *)body, (size_t)size};
data = body + size;
continue;
}
// Skip a symbol table.
if (hdr.is_symtab()) {
data = body + size;
continue;
}
if (!hdr.starts_with("#1/") && !hdr.starts_with("/"))
Fatal(ctx) << mf->name << ": filename is not stored as a long filename";
std::string name = hdr.read_name(strtab, body);
// Skip if symbol table
if (name == "__.SYMDEF" || name == "__.SYMDEF SORTED")
continue;
std::string path = name.starts_with('/') ?
name : (filepath(mf->name).parent_path() / name).string();
vec.push_back(MappedFile::must_open(ctx, path));
vec.back()->thin_parent = mf;
data = body;
}
return vec;
}
template <typename Context, typename MappedFile>
std::vector<MappedFile *> read_fat_archive_members(Context &ctx, MappedFile *mf) {
u8 *begin = mf->data;
u8 *data = begin + 8;
std::vector<MappedFile *> vec;
std::string_view strtab;
while (begin + mf->size - data >= 2) {
if ((begin - data) % 2)
data++;
ArHdr &hdr = *(ArHdr *)data;
u8 *body = data + sizeof(hdr);
u64 size = atol(hdr.ar_size);
data = body + size;
// Read if string table
if (hdr.is_strtab()) {
strtab = {(char *)body, (size_t)size};
continue;
}
// Skip if symbol table
if (hdr.is_symtab())
continue;
// Read the name field
std::string name = hdr.read_name(strtab, body);
// Skip if symbol table
if (name == "__.SYMDEF" || name == "__.SYMDEF SORTED")
continue;
vec.push_back(mf->slice(ctx, name, body - begin, data - body));
}
return vec;
}
template <typename Context, typename MappedFile>
std::vector<MappedFile *> read_archive_members(Context &ctx, MappedFile *mf) {
switch (get_file_type(ctx, mf)) {
case FileType::AR:
return read_fat_archive_members(ctx, mf);
case FileType::THIN_AR:
return read_thin_archive_members(ctx, mf);
default:
unreachable();
}
}
} // namespace mold

91
third_party/mold/cmdline.h vendored Normal file
View file

@ -0,0 +1,91 @@
// clang-format off
#pragma once
#include "third_party/mold/common.h"
namespace mold {
template <typename Context>
std::vector<std::string_view>
read_response_file(Context &ctx, std::string_view path) {
std::vector<std::string_view> vec;
MappedFile<Context> *mf = MappedFile<Context>::must_open(ctx, std::string(path));
u8 *data = mf->data;
auto read_quoted = [&](i64 i, char quote) {
std::string buf;
while (i < mf->size && data[i] != quote) {
if (data[i] == '\\') {
buf.append(1, data[i + 1]);
i += 2;
} else {
buf.append(1, data[i++]);
}
}
if (i >= mf->size)
Fatal(ctx) << path << ": premature end of input";
vec.push_back(save_string(ctx, buf));
return i + 1;
};
auto read_unquoted = [&](i64 i) {
std::string buf;
while (i < mf->size) {
if (data[i] == '\\' && i + 1 < mf->size) {
buf.append(1, data[i + 1]);
i += 2;
continue;
}
if (!isspace(data[i])) {
buf.append(1, data[i++]);
continue;
}
break;
}
vec.push_back(save_string(ctx, buf));
return i;
};
for (i64 i = 0; i < mf->size;) {
if (isspace(data[i]))
i++;
else if (data[i] == '\'')
i = read_quoted(i + 1, '\'');
else if (data[i] == '\"')
i = read_quoted(i + 1, '\"');
else
i = read_unquoted(i);
}
return vec;
}
// Replace "@path/to/some/text/file" with its file contents.
template <typename Context>
std::vector<std::string_view> expand_response_files(Context &ctx, char **argv) {
std::vector<std::string_view> vec;
for (i64 i = 0; argv[i]; i++) {
if (argv[i][0] == '@')
append(vec, read_response_file(ctx, argv[i] + 1));
else
vec.push_back(argv[i]);
}
return vec;
}
static inline std::string_view string_trim(std::string_view str) {
size_t pos = str.find_first_not_of(" \t");
if (pos == str.npos)
return "";
str = str.substr(pos);
pos = str.find_last_not_of(" \t");
if (pos == str.npos)
return str;
return str.substr(0, pos + 1);
}
} // namespace mold

1001
third_party/mold/common.h vendored Normal file

File diff suppressed because it is too large Load diff

186
third_party/mold/compress.cc vendored Normal file
View file

@ -0,0 +1,186 @@
// clang-format off
// This file implements a multi-threaded zlib and zstd compression
// routine.
//
// zlib-compressed data can be merged just by concatenation as long as
// each piece of data is flushed with Z_SYNC_FLUSH. In this file, we
// split input data into multiple shards, compress them individually
// and concatenate them. We then append a header, a trailer and a
// checksum so that the concatenated data is valid zlib-format data.
//
// zstd-compressed data can be merged in the same way.
//
// Using threads to compress data has a downside. Since the dictionary
// is reset on boundaries of shards, compression ratio is sacrificed
// a little bit. However, if a shard size is large enough, that loss
// is negligible in practice.
#include "third_party/mold/common.h"
// MISSING #include <tbb/parallel_for_each.h>
// MISSING #include <zlib.h>
// MISSING #include <zstd.h>
#define CHECK(fn) \
do { \
[[maybe_unused]] int r = (fn); \
assert(r == Z_OK); \
} while (0)
namespace mold {
static constexpr i64 SHARD_SIZE = 1024 * 1024;
static std::vector<std::string_view> split(std::string_view input) {
std::vector<std::string_view> shards;
while (input.size() >= SHARD_SIZE) {
shards.push_back(input.substr(0, SHARD_SIZE));
input = input.substr(SHARD_SIZE);
}
if (!input.empty())
shards.push_back(input);
return shards;
}
static std::vector<u8> zlib_compress(std::string_view input) {
// Initialize zlib stream. Since debug info is generally compressed
// pretty well with lower compression levels, we chose compression
// level 1.
z_stream strm;
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
CHECK(deflateInit2(&strm, 1, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY));
// Set an input buffer
strm.avail_in = input.size();
strm.next_in = (u8 *)input.data();
// Set an output buffer. deflateBound() returns an upper bound
// on the compression size. +16 for Z_SYNC_FLUSH.
std::vector<u8> buf(deflateBound(&strm, strm.avail_in) + 16);
// Compress data. It writes all compressed bytes except the last
// partial byte, so up to 7 bits can be held to be written to the
// buffer.
strm.avail_out = buf.size();
strm.next_out = buf.data();
CHECK(deflate(&strm, Z_BLOCK));
// This is a workaround for libbacktrace before 2022-04-06.
//
// Zlib is a bit stream, and what Z_SYNC_FLUSH does is to write a
// three bit value indicating the start of an uncompressed data
// block followed by four byte data 00 00 ff ff which indicates that
// the length of the block is zero. libbacktrace uses its own zlib
// inflate routine, and it had a bug that if that particular three
// bit value happens to end at a byte boundary, it accidentally
// skipped the next byte.
//
// In order to avoid triggering that bug, we should avoid calling
// deflate() with Z_SYNC_FLUSH if the current bit position is 5.
// If it's 5, we insert an empty block consisting of 10 bits so
// that the bit position is 7 in the next byte.
//
// https://github.com/ianlancetaylor/libbacktrace/pull/87
int nbits;
deflatePending(&strm, Z_NULL, &nbits);
if (nbits == 5)
CHECK(deflatePrime(&strm, 10, 2));
CHECK(deflate(&strm, Z_SYNC_FLUSH));
assert(strm.avail_out > 0);
buf.resize(buf.size() - strm.avail_out);
buf.shrink_to_fit();
deflateEnd(&strm);
return buf;
}
ZlibCompressor::ZlibCompressor(u8 *buf, i64 size) {
std::string_view input{(char *)buf, (size_t)size};
std::vector<std::string_view> inputs = split(input);
std::vector<u64> adlers(inputs.size());
shards.resize(inputs.size());
// Compress each shard
tbb::parallel_for((i64)0, (i64)inputs.size(), [&](i64 i) {
adlers[i] = adler32(1, (u8 *)inputs[i].data(), inputs[i].size());
shards[i] = zlib_compress(inputs[i]);
});
// Combine checksums
checksum = adlers[0];
for (i64 i = 1; i < inputs.size(); i++)
checksum = adler32_combine(checksum, adlers[i], inputs[i].size());
// Comput the total size
compressed_size = 8; // the header and the trailer
for (std::vector<u8> &shard : shards)
compressed_size += shard.size();
}
void ZlibCompressor::write_to(u8 *buf) {
// Write a zlib-format header
buf[0] = 0x78;
buf[1] = 0x9c;
// Copy compressed data
std::vector<i64> offsets(shards.size());
offsets[0] = 2; // +2 for header
for (i64 i = 1; i < shards.size(); i++)
offsets[i] = offsets[i - 1] + shards[i - 1].size();
tbb::parallel_for((i64)0, (i64)shards.size(), [&](i64 i) {
memcpy(&buf[offsets[i]], shards[i].data(), shards[i].size());
});
// Write a trailer
u8 *end = buf + compressed_size;
end[-6] = 3;
end[-5] = 0;
// Write a checksum
*(ub32 *)(end - 4) = checksum;
}
static std::vector<u8> zstd_compress(std::string_view input) {
std::vector<u8> buf(ZSTD_COMPRESSBOUND(input.size()));
constexpr int level = 3; // compression level; must be between 1 to 22
size_t sz = ZSTD_compress(buf.data(), buf.size(), input.data(), input.size(),
level);
assert(!ZSTD_isError(sz));
buf.resize(sz);
buf.shrink_to_fit();
return buf;
}
ZstdCompressor::ZstdCompressor(u8 *buf, i64 size) {
std::string_view input{(char *)buf, (size_t)size};
std::vector<std::string_view> inputs = split(input);
shards.resize(inputs.size());
// Compress each shard
tbb::parallel_for((i64)0, (i64)inputs.size(), [&](i64 i) {
shards[i] = zstd_compress(inputs[i]);
});
compressed_size = 0;
for (std::vector<u8> &shard : shards)
compressed_size += shard.size();
}
void ZstdCompressor::write_to(u8 *buf) {
// Copy compressed data
std::vector<i64> offsets(shards.size());
for (i64 i = 1; i < shards.size(); i++)
offsets[i] = offsets[i - 1] + shards[i - 1].size();
tbb::parallel_for((i64)0, (i64)shards.size(), [&](i64 i) {
memcpy(&buf[offsets[i]], shards[i].data(), shards[i].size());
});
}
} // namespace mold

4
third_party/mold/config.h.in vendored Normal file
View file

@ -0,0 +1,4 @@
// clang-format off
#define MOLD_VERSION "@mold_VERSION@"
#define MOLD_LIBDIR "@CMAKE_INSTALL_FULL_LIBDIR@"
#cmakedefine01 MOLD_IS_SOLD

52
third_party/mold/demangle.cc vendored Normal file
View file

@ -0,0 +1,52 @@
// clang-format off
#include "third_party/mold/common.h"
#include "third_party/libcxx/cstdlib"
#ifndef _WIN32
// MISSING #include <cxxabi.h>
#endif
// MISSING #include "../third-party/rust-demangle/rust-demangle.h"
namespace mold {
std::string_view demangle(std::string_view name) {
static thread_local char *p;
if (p)
free(p);
// Try to demangle as a Rust symbol. Since legacy-style Rust symbols
// are also valid as a C++ mangled name, we need to call this before
// cpp_demangle.
p = rust_demangle(std::string(name).c_str(), 0);
if (p)
return p;
// Try to demangle as a C++ symbol.
if (std::optional<std::string_view> s = cpp_demangle(name))
return *s;
return name;
}
std::optional<std::string_view> cpp_demangle(std::string_view name) {
static thread_local char *buf;
static thread_local size_t buflen;
// TODO(cwasser): Actually demangle Symbols on Windows using e.g.
// `UnDecorateSymbolName` from Dbghelp, maybe even Itanium symbols?
#ifndef _WIN32
if (name.starts_with("_Z")) {
int status;
char *p = abi::__cxa_demangle(std::string(name).c_str(), buf, &buflen, &status);
if (status == 0) {
buf = p;
return p;
}
}
#endif
return {};
}
} // namespace mold

331
third_party/mold/elf/arch-alpha.cc vendored Normal file
View file

@ -0,0 +1,331 @@
// clang-format off
// Alpha is a 64-bit RISC ISA developed by DEC (Digital Equipment
// Corporation) in the early '90s. It aimed to be an ISA that would last
// 25 years. DEC expected Alpha would become 1000x faster during that time
// span. Since the ISA was developed from scratch for future machines,
// it's 64-bit from the beginning. There's no 32-bit variant.
//
// DEC ported its own Unix (Tru64) to Alpha. Microsoft also ported Windows
// NT to it. But it wasn't a huge commercial success.
//
// DEC was acquired by Compaq in 1997. In the late '90s, Intel and
// Hewlett-Packard were advertising that their upcoming Itanium processor
// would achieve significantly better performance than RISC processors, so
// Compaq decided to discontinue the Alpha processor line to switch to
// Itanium. Itanium resulted in a miserable failure, but it still suceeded
// to wipe out several RISC processors just by promising overly optimistic
// perf numbers. Alpha as an ISA would probably have been fine after 25
// years since its introduction (which is 1992 + 25 = 2017), but the
// company and its market didn't last that long.
//
// From the linker's point of view, there are a few peculiarities in its
// psABI as shown below:
//
// - Alpha lacks PC-relative memory load/store instructions, so it uses
// register-relative load/store instructions in position-independent
// code. Specifically, GP (which is an alias for $r29) is always
// maintained to refer to .got+0x8000, and global variables' addresses
// are loaded in a GP-relative manner.
//
// - It looks like even function addresses are first loaded to register
// in a GP-relative manner before calling it. We can relax it to
// convert the instruction sequence with a direct branch instruction,
// but by default, object files don't use a direct branch to call a
// function. Therefore, by default, we don't need to create a PLT.
// Any function call is made by first reading its address from GOT and
// jump to the address.
#include "third_party/mold/elf/mold.h"
namespace mold::elf {
using E = ALPHA;
// A 32-bit immediate can be materialized in a register with a "load high"
// and a "load low" instruction sequence. The first instruction sets the
// upper 16 bits in a register, and the second one set the lower 16
// bits. When doing so, they sign-extend an immediate. Therefore, if the
// 15th bit of an immediate happens to be 1, setting a "low half" value
// negates the upper 16 bit values that has already been set in a
// register. To compensate that, we need to add 0x8000 when setting the
// upper 16 bits.
static u32 hi(u32 val) {
return bits(val + 0x8000, 31, 16);
}
template <>
void write_plt_header(Context<E> &ctx, u8 *buf) {}
template <>
void write_plt_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {}
template <>
void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {}
template <>
void EhFrameSection<E>::apply_reloc(Context<E> &ctx, const ElfRel<E> &rel,
u64 offset, u64 val) {
u8 *loc = ctx.buf + this->shdr.sh_offset + offset;
switch (rel.r_type) {
case R_NONE:
break;
case R_ALPHA_SREL32:
*(ul32 *)loc = val - this->shdr.sh_addr - offset;
break;
default:
Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel;
}
}
template <>
void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
std::span<const ElfRel<E>> rels = get_rels(ctx);
ElfRel<E> *dynrel = nullptr;
if (ctx.reldyn)
dynrel = (ElfRel<E> *)(ctx.buf + ctx.reldyn->shdr.sh_offset +
file.reldyn_offset + this->reldyn_offset);
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE)
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
u8 *loc = base + rel.r_offset;
u64 S = sym.get_addr(ctx);
u64 A = rel.r_addend;
u64 P = get_addr() + rel.r_offset;
u64 G = sym.get_got_idx(ctx) * sizeof(Word<E>);
u64 GOT = ctx.got->shdr.sh_addr;
u64 GP = ctx.got->shdr.sh_addr + 0x8000;
switch (rel.r_type) {
case R_ALPHA_REFQUAD:
apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel);
break;
case R_ALPHA_GPREL32:
*(ul32 *)loc = S + A - GP;
break;
case R_ALPHA_LITERAL:
if (A)
*(ul16 *)loc = ctx.extra.got->get_addr(sym, A) - GP;
else
*(ul16 *)loc = GOT + G - GP;
break;
case R_ALPHA_BRSGP:
*(ul32 *)loc |= bits(S + A - P - 4, 22, 0);
break;
case R_ALPHA_GPDISP:
*(ul16 *)loc = hi(GP - P);
*(ul16 *)(loc + A) = GP - P;
break;
case R_ALPHA_SREL32:
*(ul32 *)loc = S + A - P;
break;
case R_ALPHA_GPRELHIGH:
*(ul16 *)loc = hi(S + A - GP);
break;
case R_ALPHA_GPRELLOW:
*(ul16 *)loc = S + A - GP;
break;
case R_ALPHA_TLSGD:
*(ul16 *)loc = sym.get_tlsgd_addr(ctx) - GP;
break;
case R_ALPHA_TLSLDM:
*(ul16 *)loc = ctx.got->get_tlsld_addr(ctx) - GP;
break;
case R_ALPHA_DTPRELHI:
*(ul16 *)loc = hi(S + A - ctx.dtp_addr);
break;
case R_ALPHA_DTPRELLO:
*(ul16 *)loc = S + A - ctx.dtp_addr;
break;
case R_ALPHA_GOTTPREL:
*(ul16 *)loc = sym.get_gottp_addr(ctx) + A - GP;
break;
case R_ALPHA_TPRELHI:
*(ul16 *)loc = hi(S + A - ctx.tp_addr);
break;
case R_ALPHA_TPRELLO:
*(ul16 *)loc = S + A - ctx.tp_addr;
break;
case R_ALPHA_LITUSE:
case R_ALPHA_HINT:
break;
default:
unreachable();
}
}
}
template <>
void InputSection<E>::apply_reloc_nonalloc(Context<E> &ctx, u8 *base) {
std::span<const ElfRel<E>> rels = get_rels(ctx);
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
u8 *loc = base + rel.r_offset;
SectionFragment<E> *frag;
i64 frag_addend;
std::tie(frag, frag_addend) = get_fragment(ctx, rel);
u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx);
u64 A = frag ? frag_addend : (i64)rel.r_addend;
switch (rel.r_type) {
case R_ALPHA_REFLONG:
if (std::optional<u64> val = get_tombstone(sym, frag))
*(ul32 *)loc = *val;
else
*(ul32 *)loc = S + A;
break;
case R_ALPHA_REFQUAD:
if (std::optional<u64> val = get_tombstone(sym, frag))
*(ul64 *)loc = *val;
else
*(ul64 *)loc = S + A;
break;
default:
Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: "
<< rel;
}
}
}
template <>
void InputSection<E>::scan_relocations(Context<E> &ctx) {
assert(shdr().sh_flags & SHF_ALLOC);
this->reldyn_offset = file.num_dynrel * sizeof(ElfRel<E>);
std::span<const ElfRel<E>> rels = get_rels(ctx);
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
if (sym.is_ifunc())
Error(ctx) << sym << ": GNU ifunc symbol is not supported on Alpha";
switch (rel.r_type) {
case R_ALPHA_REFQUAD:
scan_dyn_absrel(ctx, sym, rel);
break;
case R_ALPHA_LITERAL:
if (rel.r_addend)
ctx.extra.got->add_symbol(sym, rel.r_addend);
else
sym.flags |= NEEDS_GOT;
break;
case R_ALPHA_SREL32:
scan_pcrel(ctx, sym, rel);
break;
case R_ALPHA_BRSGP:
if (sym.is_imported)
sym.flags |= NEEDS_PLT;
break;
case R_ALPHA_TLSGD:
sym.flags |= NEEDS_TLSGD;
break;
case R_ALPHA_TLSLDM:
ctx.needs_tlsld = true;
break;
case R_ALPHA_GOTTPREL:
sym.flags |= NEEDS_GOTTP;
break;
case R_ALPHA_TPRELHI:
case R_ALPHA_TPRELLO:
check_tlsle(ctx, sym, rel);
break;
case R_ALPHA_GPREL32:
case R_ALPHA_LITUSE:
case R_ALPHA_GPDISP:
case R_ALPHA_HINT:
case R_ALPHA_GPRELHIGH:
case R_ALPHA_GPRELLOW:
case R_ALPHA_DTPRELHI:
case R_ALPHA_DTPRELLO:
break;
default:
Fatal(ctx) << *this << ": unknown relocation: " << rel;
}
}
}
// An R_ALPHA_LITERAL relocation may request the linker to create a GOT
// entry for an external symbol with a non-zero addend. This is an unusual
// request which is not found in any other targets.
//
// Referring an external symbol with a non-zero addend is a bad practice
// because we need to create as many dynamic relocations as the number of
// distinctive addends for the same symbol.
//
// We don't want to mess up the implementation of the common GOT section
// for Alpha. So we create another GOT-like section, .alpha_got. Any GOT
// entry for an R_ALPHA_LITERAL reloc with a non-zero addend is created
// not in .got but in .alpha_got.
//
// Since .alpha_got entries are accessed relative to GP, .alpha_got
// needs to be close enough to .got. It's actually placed next to .got.
void AlphaGotSection::add_symbol(Symbol<E> &sym, i64 addend) {
assert(addend);
std::scoped_lock lock(mu);
entries.push_back({&sym, addend});
}
bool operator<(const AlphaGotSection::Entry &a, const AlphaGotSection::Entry &b) {
return std::tuple(a.sym->file->priority, a.sym->sym_idx, a.addend) <
std::tuple(b.sym->file->priority, b.sym->sym_idx, b.addend);
};
u64 AlphaGotSection::get_addr(Symbol<E> &sym, i64 addend) {
auto it = std::lower_bound(entries.begin(), entries.end(), Entry{&sym, addend});
assert(it != entries.end());
return this->shdr.sh_addr + (it - entries.begin()) * sizeof(Word<E>);
}
i64 AlphaGotSection::get_reldyn_size(Context<E> &ctx) const {
i64 n = 0;
for (const Entry &e : entries)
if (e.sym->is_imported || (ctx.arg.pic && !e.sym->is_absolute()))
n++;
return n;
}
void AlphaGotSection::finalize() {
sort(entries);
remove_duplicates(entries);
shdr.sh_size = entries.size() * sizeof(Word<E>);
}
void AlphaGotSection::copy_buf(Context<E> &ctx) {
ElfRel<E> *dynrel = (ElfRel<E> *)(ctx.buf + ctx.reldyn->shdr.sh_offset +
reldyn_offset);
for (i64 i = 0; i < entries.size(); i++) {
Entry &e = entries[i];
u64 P = this->shdr.sh_addr + sizeof(Word<E>) * i;
ul64 *buf = (ul64 *)(ctx.buf + this->shdr.sh_offset + sizeof(Word<E>) * i);
if (e.sym->is_imported) {
*buf = ctx.arg.apply_dynamic_relocs ? e.addend : 0;
*dynrel++ = ElfRel<E>(P, E::R_ABS, e.sym->get_dynsym_idx(ctx), e.addend);
} else {
*buf = e.sym->get_addr(ctx) + e.addend;
if (ctx.arg.pic && !e.sym->is_absolute())
*dynrel++ = ElfRel<E>(P, E::R_RELATIVE, 0, *buf);
}
}
}
} // namespace mold::elf

737
third_party/mold/elf/arch-arm32.cc vendored Normal file
View file

@ -0,0 +1,737 @@
// clang-format off
// ARM32 is a bit special from the linker's viewpoint because ARM
// processors support two different instruction encodings: Thumb and
// ARM (in a narrower sense). Thumb instructions are either 16 bits or
// 32 bits, while ARM instructions are all 32 bits. Feature-wise,
// thumb is a subset of ARM, so not all ARM instructions are
// representable in Thumb.
//
// ARM processors originally supported only ARM instructions. Thumb
// instructions were later added to increase code density.
//
// ARM processors runs in either ARM mode or Thumb mode. The mode can
// be switched using BX (branch and mode exchange)-family instructions.
// We need to use that instructions to, for example, call a function
// encoded in Thumb from a function encoded in ARM. Sometimes, the
// linker even has to emit an interworking thunk code to switch mode.
//
// ARM instructions are aligned to 4 byte boundaries. Thumb are to 2
// byte boundaries.
//
// You can distinguish Thumb functions from ARM functions by looking
// at the least significant bit (LSB) of its "address". If LSB is 0,
// it's ARM; otherwise, Thumb.
//
// For example, if a symbol `foo` is of type STT_FUNC and has value
// 0x2001, `foo` is a function using Thumb instructions whose address
// is 0x2000 (not 0x2001, as Thumb instructions are always 2-byte
// aligned). Likewise, if a function pointer has value 0x2001, it
// refers a Thumb function at 0x2000.
//
// https://github.com/ARM-software/abi-aa/blob/main/aaelf32/aaelf32.rst
#include "third_party/mold/elf/mold.h"
namespace mold::elf {
using E = ARM32;
template <>
i64 get_addend(u8 *loc, const ElfRel<E> &rel) {
switch (rel.r_type) {
case R_ARM_ABS32:
case R_ARM_REL32:
case R_ARM_TARGET1:
case R_ARM_BASE_PREL:
case R_ARM_GOTOFF32:
case R_ARM_GOT_PREL:
case R_ARM_GOT_BREL:
case R_ARM_TLS_GD32:
case R_ARM_TLS_LDM32:
case R_ARM_TLS_LDO32:
case R_ARM_TLS_IE32:
case R_ARM_TLS_LE32:
case R_ARM_TLS_GOTDESC:
case R_ARM_TARGET2:
return *(il32 *)loc;
case R_ARM_THM_JUMP11:
return sign_extend(*(ul16 *)loc, 10) << 1;
case R_ARM_THM_CALL:
case R_ARM_THM_JUMP24:
case R_ARM_THM_TLS_CALL: {
u32 S = bit(*(ul16 *)loc, 10);
u32 J1 = bit(*(ul16 *)(loc + 2), 13);
u32 J2 = bit(*(ul16 *)(loc + 2), 11);
u32 I1 = !(J1 ^ S);
u32 I2 = !(J2 ^ S);
u32 imm10 = bits(*(ul16 *)loc, 9, 0);
u32 imm11 = bits(*(ul16 *)(loc + 2), 10, 0);
u32 val = (S << 24) | (I1 << 23) | (I2 << 22) | (imm10 << 12) | (imm11 << 1);
return sign_extend(val, 24);
}
case R_ARM_CALL:
case R_ARM_JUMP24:
case R_ARM_PLT32:
case R_ARM_TLS_CALL:
return sign_extend(*(ul32 *)loc, 23) << 2;
case R_ARM_MOVW_PREL_NC:
case R_ARM_MOVW_ABS_NC:
case R_ARM_MOVT_PREL:
case R_ARM_MOVT_ABS: {
u32 imm12 = bits(*(ul32 *)loc, 11, 0);
u32 imm4 = bits(*(ul32 *)loc, 19, 16);
return sign_extend((imm4 << 12) | imm12, 15);
}
case R_ARM_PREL31:
return sign_extend(*(ul32 *)loc, 30);
case R_ARM_THM_MOVW_PREL_NC:
case R_ARM_THM_MOVW_ABS_NC:
case R_ARM_THM_MOVT_PREL:
case R_ARM_THM_MOVT_ABS: {
u32 imm4 = bits(*(ul16 *)loc, 3, 0);
u32 i = bit(*(ul16 *)loc, 10);
u32 imm3 = bits(*(ul16 *)(loc + 2), 14, 12);
u32 imm8 = bits(*(ul16 *)(loc + 2), 7, 0);
u32 val = (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8;
return sign_extend(val, 15);
}
default:
return 0;
}
}
static void write_mov_imm(u8 *loc, u32 val) {
u32 imm12 = bits(val, 11, 0);
u32 imm4 = bits(val, 15, 12);
*(ul32 *)loc = (*(ul32 *)loc & 0xfff0f000) | (imm4 << 16) | imm12;
}
static void write_thm_b_imm(u8 *loc, u32 val) {
// https://developer.arm.com/documentation/ddi0406/cb/Application-Level-Architecture/Instruction-Details/Alphabetical-list-of-instructions/BL--BLX--immediate-
u32 sign = bit(val, 24);
u32 I1 = bit(val, 23);
u32 I2 = bit(val, 22);
u32 J1 = !I1 ^ sign;
u32 J2 = !I2 ^ sign;
u32 imm10 = bits(val, 21, 12);
u32 imm11 = bits(val, 11, 1);
ul16 *buf = (ul16 *)loc;
buf[0] = (buf[0] & 0b1111'1000'0000'0000) | (sign << 10) | imm10;
buf[1] = (buf[1] & 0b1101'0000'0000'0000) | (J1 << 13) | (J2 << 11) | imm11;
}
static void write_thm_mov_imm(u8 *loc, u32 val) {
// https://developer.arm.com/documentation/ddi0406/cb/Application-Level-Architecture/Instruction-Details/Alphabetical-list-of-instructions/MOVT
u32 imm4 = bits(val, 15, 12);
u32 i = bit(val, 11);
u32 imm3 = bits(val, 10, 8);
u32 imm8 = bits(val, 7, 0);
ul16 *buf = (ul16 *)loc;
buf[0] = (buf[0] & 0b1111'1011'1111'0000) | (i << 10) | imm4;
buf[1] = (buf[1] & 0b1000'1111'0000'0000) | (imm3 << 12) | imm8;
}
template <>
void write_addend(u8 *loc, i64 val, const ElfRel<E> &rel) {
switch (rel.r_type) {
case R_ARM_NONE:
break;
case R_ARM_ABS32:
case R_ARM_REL32:
case R_ARM_TARGET1:
case R_ARM_BASE_PREL:
case R_ARM_GOTOFF32:
case R_ARM_GOT_PREL:
case R_ARM_GOT_BREL:
case R_ARM_TLS_GD32:
case R_ARM_TLS_LDM32:
case R_ARM_TLS_LDO32:
case R_ARM_TLS_IE32:
case R_ARM_TLS_LE32:
case R_ARM_TLS_GOTDESC:
case R_ARM_TARGET2:
*(ul32 *)loc = val;
break;
case R_ARM_THM_JUMP11:
*(ul16 *)loc = (*(ul16 *)loc & 0xf800) | bits(val, 11, 1);
break;
case R_ARM_THM_CALL:
case R_ARM_THM_JUMP24:
case R_ARM_THM_TLS_CALL:
write_thm_b_imm(loc, val);
break;
case R_ARM_CALL:
case R_ARM_JUMP24:
case R_ARM_PLT32:
*(ul32 *)loc = (*(ul32 *)loc & 0xff00'0000) | bits(val, 25, 2);
break;
case R_ARM_MOVW_PREL_NC:
case R_ARM_MOVW_ABS_NC:
case R_ARM_MOVT_PREL:
case R_ARM_MOVT_ABS:
write_mov_imm(loc, val);
break;
case R_ARM_PREL31:
*(ul32 *)loc = (*(ul32 *)loc & 0x8000'0000) | (val & 0x7fff'ffff);
break;
case R_ARM_THM_MOVW_PREL_NC:
case R_ARM_THM_MOVW_ABS_NC:
case R_ARM_THM_MOVT_PREL:
case R_ARM_THM_MOVT_ABS:
write_thm_mov_imm(loc, val);
break;
default:
unreachable();
}
}
template <>
void write_plt_header(Context<E> &ctx, u8 *buf) {
static const ul32 insn[] = {
0xe52d'e004, // push {lr}
0xe59f'e004, // ldr lr, 2f
0xe08f'e00e, // 1: add lr, pc, lr
0xe5be'f008, // ldr pc, [lr, #8]!
0x0000'0000, // 2: .word .got.plt - 1b - 8
0xe320'f000, // nop
0xe320'f000, // nop
0xe320'f000, // nop
};
memcpy(buf, insn, sizeof(insn));
*(ul32 *)(buf + 16) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr - 16;
}
static const ul32 plt_entry[] = {
0xe59f'c004, // 1: ldr ip, 2f
0xe08c'c00f, // add ip, ip, pc
0xe59c'f000, // ldr pc, [ip]
0x0000'0000, // 2: .word sym@GOT - 1b
};
template <>
void write_plt_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
memcpy(buf, plt_entry, sizeof(plt_entry));
*(ul32 *)(buf + 12) = sym.get_gotplt_addr(ctx) - sym.get_plt_addr(ctx) - 12;
}
template <>
void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
memcpy(buf, plt_entry, sizeof(plt_entry));
*(ul32 *)(buf + 12) = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 12;
}
// ARM does not use .eh_frame for exception handling. Instead, it uses
// .ARM.exidx and .ARM.extab. So this function is empty.
template <>
void EhFrameSection<E>::apply_reloc(Context<E> &ctx, const ElfRel<E> &rel,
u64 offset, u64 val) {}
// ARM and Thumb branch instructions can jump within ±16 MiB.
static bool is_jump_reachable(i64 val) {
return sign_extend(val, 24) == val;
}
template <>
void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
std::span<const ElfRel<E>> rels = get_rels(ctx);
ElfRel<E> *dynrel = nullptr;
if (ctx.reldyn)
dynrel = (ElfRel<E> *)(ctx.buf + ctx.reldyn->shdr.sh_offset +
file.reldyn_offset + this->reldyn_offset);
auto get_tls_trampoline_addr = [&, i = 0](u64 addr) mutable {
for (; i < output_section->thunks.size(); i++) {
i64 disp = output_section->shdr.sh_addr + output_section->thunks[i]->offset -
addr;
if (is_jump_reachable(disp))
return disp;
}
unreachable();
};
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE || rel.r_type == R_ARM_V4BX)
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
u8 *loc = base + rel.r_offset;
auto check = [&](i64 val, i64 lo, i64 hi) {
if (val < lo || hi <= val)
Error(ctx) << *this << ": relocation " << rel << " against "
<< sym << " out of range: " << val << " is not in ["
<< lo << ", " << hi << ")";
};
u64 S = sym.get_addr(ctx);
u64 A = get_addend(*this, rel);
u64 P = get_addr() + rel.r_offset;
u64 T = S & 1;
u64 G = sym.get_got_idx(ctx) * sizeof(Word<E>);
u64 GOT = ctx.got->shdr.sh_addr;
auto get_thumb_thunk_addr = [&] { return get_thunk_addr(i); };
auto get_arm_thunk_addr = [&] { return get_thunk_addr(i) + 4; };
switch (rel.r_type) {
case R_ARM_ABS32:
case R_ARM_TARGET1:
apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel);
break;
case R_ARM_REL32:
*(ul32 *)loc = S + A - P;
break;
case R_ARM_THM_CALL: {
if (sym.is_remaining_undef_weak()) {
// On ARM, calling an weak undefined symbol jumps to the
// next instruction.
*(ul32 *)loc = 0x8000'f3af; // NOP.W
break;
}
// THM_CALL relocation refers either BL or BLX instruction.
// They are different in only one bit. We need to use BL if
// the jump target is Thumb. Otherwise, use BLX.
i64 val = S + A - P;
if (is_jump_reachable(val)) {
if (T) {
write_thm_b_imm(loc, val);
*(ul16 *)(loc + 2) |= 0x1000; // rewrite to BL
} else {
write_thm_b_imm(loc, align_to(val, 4));
*(ul16 *)(loc + 2) &= ~0x1000; // rewrite to BLX
}
} else {
write_thm_b_imm(loc, align_to(get_arm_thunk_addr() + A - P, 4));
*(ul16 *)(loc + 2) &= ~0x1000; // rewrite to BLX
}
break;
}
case R_ARM_BASE_PREL:
*(ul32 *)loc = GOT + A - P;
break;
case R_ARM_GOTOFF32:
*(ul32 *)loc = ((S + A) | T) - GOT;
break;
case R_ARM_GOT_PREL:
case R_ARM_TARGET2:
*(ul32 *)loc = GOT + G + A - P;
break;
case R_ARM_GOT_BREL:
*(ul32 *)loc = G + A;
break;
case R_ARM_CALL: {
if (sym.is_remaining_undef_weak()) {
*(ul32 *)loc = 0xe320'f000; // NOP
break;
}
// Just like THM_CALL, ARM_CALL relocation refers either BL or
// BLX instruction. We may need to rewrite BL → BLX or BLX → BL.
bool is_bl = ((*(ul32 *)loc & 0xff00'0000) == 0xeb00'0000);
bool is_blx = ((*(ul32 *)loc & 0xfe00'0000) == 0xfa00'0000);
if (!is_bl && !is_blx)
Fatal(ctx) << *this << ": R_ARM_CALL refers neither BL nor BLX";
u64 val = S + A - P;
if (is_jump_reachable(val)) {
if (T) {
*(ul32 *)loc = 0xfa00'0000; // BLX
*(ul32 *)loc |= (bit(val, 1) << 24) | bits(val, 25, 2);
} else {
*(ul32 *)loc = 0xeb00'0000; // BL
*(ul32 *)loc |= bits(val, 25, 2);
}
} else {
*(ul32 *)loc = 0xeb00'0000; // BL
*(ul32 *)loc |= bits(get_arm_thunk_addr() + A - P, 25, 2);
}
break;
}
case R_ARM_JUMP24: {
if (sym.is_remaining_undef_weak()) {
*(ul32 *)loc = 0xe320'f000; // NOP
break;
}
// These relocs refers a B (unconditional branch) instruction.
// Unlike BL or BLX, we can't rewrite B to BX in place when the
// processor mode switch is required because BX doesn't takes an
// immediate; it takes only a register. So if mode switch is
// required, we jump to a linker-synthesized thunk which does the
// job with a longer code sequence.
u64 val = S + A - P;
if (!is_jump_reachable(val) || T)
val = get_arm_thunk_addr() + A - P;
*(ul32 *)loc = (*(ul32 *)loc & 0xff00'0000) | bits(val, 25, 2);
break;
}
case R_ARM_PLT32:
if (sym.is_remaining_undef_weak()) {
*(ul32 *)loc = 0xe320'f000; // NOP
} else {
u64 val = (T ? get_arm_thunk_addr() : S) + A - P;
*(ul32 *)loc = (*(ul32 *)loc & 0xff00'0000) | bits(val, 25, 2);
}
break;
case R_ARM_THM_JUMP11:
assert(T);
check(S + A - P, -(1 << 11), 1 << 11);
*(ul16 *)loc &= 0xf800;
*(ul16 *)loc |= bits(S + A - P, 11, 1);
break;
case R_ARM_THM_JUMP19: {
i64 val = S + A - P;
check(val, -(1 << 19), 1 << 19);
// sign:J2:J1:imm6:imm11:'0'
u32 sign = bit(val, 20);
u32 J2 = bit(val, 19);
u32 J1 = bit(val, 18);
u32 imm6 = bits(val, 17, 12);
u32 imm11 = bits(val, 11, 1);
*(ul16 *)loc &= 0b1111'1011'1100'0000;
*(ul16 *)loc |= (sign << 10) | imm6;
*(ul16 *)(loc + 2) &= 0b1101'0000'0000'0000;
*(ul16 *)(loc + 2) |= (J2 << 13) | (J1 << 11) | imm11;
break;
}
case R_ARM_THM_JUMP24: {
if (sym.is_remaining_undef_weak()) {
*(ul32 *)loc = 0x8000'f3af; // NOP
break;
}
// Just like R_ARM_JUMP24, we need to jump to a thunk if we need to
// switch processor mode.
u64 val = S + A - P;
if (!is_jump_reachable(val) || !T)
val = get_thumb_thunk_addr() + A - P;
write_thm_b_imm(loc, val);
break;
}
case R_ARM_MOVW_PREL_NC:
write_mov_imm(loc, ((S + A) | T) - P);
break;
case R_ARM_MOVW_ABS_NC:
write_mov_imm(loc, (S + A) | T);
break;
case R_ARM_THM_MOVW_PREL_NC:
write_thm_mov_imm(loc, ((S + A) | T) - P);
break;
case R_ARM_PREL31:
check(S + A - P, -(1LL << 30), 1LL << 30);
*(ul32 *)loc &= 0x8000'0000;
*(ul32 *)loc |= (S + A - P) & 0x7fff'ffff;
break;
case R_ARM_THM_MOVW_ABS_NC:
write_thm_mov_imm(loc, (S + A) | T);
break;
case R_ARM_MOVT_PREL:
write_mov_imm(loc, (S + A - P) >> 16);
break;
case R_ARM_THM_MOVT_PREL:
write_thm_mov_imm(loc, (S + A - P) >> 16);
break;
case R_ARM_MOVT_ABS:
write_mov_imm(loc, (S + A) >> 16);
break;
case R_ARM_THM_MOVT_ABS:
write_thm_mov_imm(loc, (S + A) >> 16);
break;
case R_ARM_TLS_GD32:
*(ul32 *)loc = sym.get_tlsgd_addr(ctx) + A - P;
break;
case R_ARM_TLS_LDM32:
*(ul32 *)loc = ctx.got->get_tlsld_addr(ctx) + A - P;
break;
case R_ARM_TLS_LDO32:
*(ul32 *)loc = S + A - ctx.dtp_addr;
break;
case R_ARM_TLS_IE32:
*(ul32 *)loc = sym.get_gottp_addr(ctx) + A - P;
break;
case R_ARM_TLS_LE32:
*(ul32 *)loc = S + A - ctx.tp_addr;
break;
case R_ARM_TLS_GOTDESC:
if (sym.has_tlsdesc(ctx)) {
// A is odd if the corresponding TLS_CALL is Thumb.
if (A & 1)
*(ul32 *)loc = sym.get_tlsdesc_addr(ctx) - P + A - 6;
else
*(ul32 *)loc = sym.get_tlsdesc_addr(ctx) - P + A - 4;
} else {
*(ul32 *)loc = S - ctx.tp_addr;
}
break;
case R_ARM_TLS_CALL:
if (sym.has_tlsdesc(ctx)) {
// BL <tls_trampoline>
*(ul32 *)loc = 0xeb00'0000 | bits(get_tls_trampoline_addr(P + 8), 25, 2);
} else {
// BL -> NOP
*(ul32 *)loc = 0xe320'f000;
}
break;
case R_ARM_THM_TLS_CALL:
if (sym.has_tlsdesc(ctx)) {
u64 val = align_to(get_tls_trampoline_addr(P + 4), 4);
write_thm_b_imm(loc, val);
*(ul16 *)(loc + 2) &= ~0x1000; // rewrite BL with BLX
} else {
// BL -> NOP.W
*(ul32 *)loc = 0x8000'f3af;
}
break;
default:
Error(ctx) << *this << ": unknown relocation: " << rel;
}
}
}
template <>
void InputSection<E>::apply_reloc_nonalloc(Context<E> &ctx, u8 *base) {
std::span<const ElfRel<E>> rels = get_rels(ctx);
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
u8 *loc = base + rel.r_offset;
SectionFragment<E> *frag;
i64 frag_addend;
std::tie(frag, frag_addend) = get_fragment(ctx, rel);
u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx);
u64 A = frag ? frag_addend : get_addend(*this, rel);
switch (rel.r_type) {
case R_ARM_ABS32:
if (std::optional<u64> val = get_tombstone(sym, frag))
*(ul32 *)loc = *val;
else
*(ul32 *)loc = S + A;
break;
case R_ARM_TLS_LDO32:
if (std::optional<u64> val = get_tombstone(sym, frag))
*(ul32 *)loc = *val;
else
*(ul32 *)loc = S + A - ctx.dtp_addr;
break;
default:
Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: "
<< rel;
break;
}
}
}
template <>
void InputSection<E>::scan_relocations(Context<E> &ctx) {
assert(shdr().sh_flags & SHF_ALLOC);
this->reldyn_offset = file.num_dynrel * sizeof(ElfRel<E>);
std::span<const ElfRel<E>> rels = get_rels(ctx);
// Scan relocations
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
if (sym.is_ifunc())
sym.flags |= NEEDS_GOT | NEEDS_PLT;
switch (rel.r_type) {
case R_ARM_ABS32:
case R_ARM_MOVT_ABS:
case R_ARM_THM_MOVT_ABS:
case R_ARM_TARGET1:
scan_dyn_absrel(ctx, sym, rel);
break;
case R_ARM_THM_CALL:
case R_ARM_CALL:
case R_ARM_JUMP24:
case R_ARM_PLT32:
case R_ARM_THM_JUMP24:
if (sym.is_imported)
sym.flags |= NEEDS_PLT;
break;
case R_ARM_GOT_PREL:
case R_ARM_GOT_BREL:
case R_ARM_TARGET2:
sym.flags |= NEEDS_GOT;
break;
case R_ARM_MOVT_PREL:
case R_ARM_THM_MOVT_PREL:
case R_ARM_PREL31:
scan_pcrel(ctx, sym, rel);
break;
case R_ARM_TLS_GD32:
sym.flags |= NEEDS_TLSGD;
break;
case R_ARM_TLS_LDM32:
ctx.needs_tlsld = true;
break;
case R_ARM_TLS_IE32:
sym.flags |= NEEDS_GOTTP;
break;
case R_ARM_TLS_GOTDESC:
if (!relax_tlsdesc(ctx, sym))
sym.flags |= NEEDS_TLSDESC;
break;
case R_ARM_TLS_LE32:
check_tlsle(ctx, sym, rel);
break;
case R_ARM_REL32:
case R_ARM_BASE_PREL:
case R_ARM_GOTOFF32:
case R_ARM_THM_JUMP11:
case R_ARM_THM_JUMP19:
case R_ARM_MOVW_PREL_NC:
case R_ARM_MOVW_ABS_NC:
case R_ARM_THM_MOVW_PREL_NC:
case R_ARM_THM_MOVW_ABS_NC:
case R_ARM_TLS_LDO32:
case R_ARM_TLS_CALL:
case R_ARM_THM_TLS_CALL:
case R_ARM_V4BX:
break;
default:
Error(ctx) << *this << ": unknown relocation: " << rel;
}
}
}
template <>
void RangeExtensionThunk<E>::copy_buf(Context<E> &ctx) {
u8 *buf = ctx.buf + output_section.shdr.sh_offset + offset;
// TLS trampoline code. ARM32's TLSDESC is designed so that this
// common piece of code is factored out from object files to reduce
// output size. Since no one provide, the linker has to synthesize it.
static ul32 hdr[] = {
0xe08e'0000, // add r0, lr, r0
0xe590'1004, // ldr r1, [r0, #4]
0xe12f'ff11, // bx r1
};
// This is a range extension and mode switch thunk.
// It has two entry points: +0 for Thumb and +4 for ARM.
const u8 entry[] = {
// .thumb
0xfc, 0x46, // mov ip, pc
0x60, 0x47, // bx ip # jumps to the following `ldr` insn
// .arm
0x04, 0xc0, 0x9f, 0xe5, // ldr ip, 2f
0x0f, 0xc0, 0x8c, 0xe0, // 1: add ip, ip, pc
0x1c, 0xff, 0x2f, 0xe1, // bx ip
0x00, 0x00, 0x00, 0x00, // 2: .word sym - 1b
};
static_assert(E::thunk_hdr_size == sizeof(hdr));
static_assert(E::thunk_size == sizeof(entry));
memcpy(buf, hdr, sizeof(hdr));
for (i64 i = 0; i < symbols.size(); i++) {
u8 *loc = buf + sizeof(hdr) + i * sizeof(entry);
memcpy(loc, entry, sizeof(entry));
u64 S = symbols[i]->get_addr(ctx);
u64 P = output_section.shdr.sh_addr + offset + sizeof(hdr) + i * sizeof(entry);
*(ul32 *)(loc + 16) = S - P - 16;
}
}
// ARM executables use an .ARM.exidx section to look up an exception
// handling record for the current instruction pointer. The table needs
// to be sorted by their addresses.
//
// Other target uses .eh_frame_hdr instead for the same purpose.
// I don't know why only ARM uses the different mechanism, but it's
// likely that it's due to some historical reason.
//
// This function sorts .ARM.exidx records.
void fixup_arm_exidx_section(Context<E> &ctx) {
Timer t(ctx, "fixup_arm_exidx_section");
OutputSection<E> *osec = find_section(ctx, SHT_ARM_EXIDX);
if (!osec)
return;
// .ARM.exidx records consists of a signed 31-bit relative address
// and a 32-bit value. The relative address indicates the start
// address of a function that the record covers. The value is one of
// the followings:
//
// 1. CANTUNWIND indicating that there's no unwinding info for the function,
// 2. a compact unwinding record encoded into a 32-bit value, or
// 3. a 31-bit relative address which points to a larger record in
// the .ARM.extab section.
//
// CANTUNWIND is value 1. The most significant bit is set in (2) but
// not in (3). So we can distinguished them just by looking at a value.
const u32 EXIDX_CANTUNWIND = 1;
struct Entry {
ul32 addr;
ul32 val;
};
if (osec->shdr.sh_size % sizeof(Entry))
Fatal(ctx) << "invalid .ARM.exidx section size";
Entry *ent = (Entry *)(ctx.buf + osec->shdr.sh_offset);
i64 num_entries = osec->shdr.sh_size / sizeof(Entry);
// Entry's addresses are relative to themselves. In order to sort
// records by addresses, we first translate them so that the addresses
// are relative to the beginning of the section.
auto is_relative = [](u32 val) {
return val != EXIDX_CANTUNWIND && !(val & 0x8000'0000);
};
tbb::parallel_for((i64)0, num_entries, [&](i64 i) {
i64 offset = sizeof(Entry) * i;
ent[i].addr = sign_extend(ent[i].addr, 30) + offset;
if (is_relative(ent[i].val))
ent[i].val = 0x7fff'ffff & (ent[i].val + offset);
});
tbb::parallel_sort(ent, ent + num_entries, [](const Entry &a, const Entry &b) {
return a.addr < b.addr;
});
// Make addresses relative to themselves.
tbb::parallel_for((i64)0, num_entries, [&](i64 i) {
i64 offset = sizeof(Entry) * i;
ent[i].addr = 0x7fff'ffff & (ent[i].addr - offset);
if (is_relative(ent[i].val))
ent[i].val = 0x7fff'ffff & (ent[i].val - offset);
});
// .ARM.exidx's sh_link should be set to the .text section index.
// Runtime doesn't care about it, but the binutils's strip command does.
if (ctx.shdr) {
if (Chunk<E> *text = find_section(ctx, ".text")) {
osec->shdr.sh_link = text->shndx;
ctx.shdr->copy_buf(ctx);
}
}
}
} // namespace mold::elf

595
third_party/mold/elf/arch-arm64.cc vendored Normal file
View file

@ -0,0 +1,595 @@
// clang-format off
// This file contains ARM64-specific code. Being new, the ARM64's ELF
// psABI doesn't have anything peculiar. ARM64 is a clean RISC
// instruction set that supports PC-relative load/store instructions.
//
// Unlike ARM32, instructions length doesn't vary. All ARM64
// instructions are 4 bytes long.
//
// Branch instructions used for function call can jump within ±128 MiB.
// We need to create range extension thunks to support binaries whose
// .text is larger than that.
//
// Unlike most other targets, the TLSDESC access model is used by default
// for -fPIC to access thread-local variables instead of the less
// efficient GD model. You can still enable GD but it needs the
// -mtls-dialect=trad flag. Since GD is used rarely, we don't need to
// implement GD → LE relaxation.
//
// https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst
#include "third_party/mold/elf/mold.h"
namespace mold::elf {
using E = ARM64;
static void write_adrp(u8 *buf, u64 val) {
*(ul32 *)buf |= (bits(val, 13, 12) << 29) | (bits(val, 32, 14) << 5);
}
static void write_adr(u8 *buf, u64 val) {
*(ul32 *)buf |= (bits(val, 1, 0) << 29) | (bits(val, 20, 2) << 5);
}
static void write_movn_movz(u8 *buf, i64 val) {
*(ul32 *)buf &= 0b0000'0000'0110'0000'0000'0000'0001'1111;
if (val >= 0)
*(ul32 *)buf |= 0xd280'0000 | (bits(val, 15, 0) << 5); // rewrite to movz
else
*(ul32 *)buf |= 0x9280'0000 | (bits(~val, 15, 0) << 5); // rewrite to movn
}
static u64 page(u64 val) {
return val & 0xffff'ffff'ffff'f000;
}
template <>
void write_plt_header(Context<E> &ctx, u8 *buf) {
static const ul32 insn[] = {
0xa9bf'7bf0, // stp x16, x30, [sp,#-16]!
0x9000'0010, // adrp x16, .got.plt[2]
0xf940'0211, // ldr x17, [x16, .got.plt[2]]
0x9100'0210, // add x16, x16, .got.plt[2]
0xd61f'0220, // br x17
0xd503'201f, // nop
0xd503'201f, // nop
0xd503'201f, // nop
};
u64 gotplt = ctx.gotplt->shdr.sh_addr + 16;
u64 plt = ctx.plt->shdr.sh_addr;
memcpy(buf, insn, sizeof(insn));
write_adrp(buf + 4, page(gotplt) - page(plt + 4));
*(ul32 *)(buf + 8) |= bits(gotplt, 11, 3) << 10;
*(ul32 *)(buf + 12) |= (gotplt & 0xfff) << 10;
}
template <>
void write_plt_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
static const ul32 insn[] = {
0x9000'0010, // adrp x16, .got.plt[n]
0xf940'0211, // ldr x17, [x16, .got.plt[n]]
0x9100'0210, // add x16, x16, .got.plt[n]
0xd61f'0220, // br x17
};
u64 gotplt = sym.get_gotplt_addr(ctx);
u64 plt = sym.get_plt_addr(ctx);
memcpy(buf, insn, sizeof(insn));
write_adrp(buf, page(gotplt) - page(plt));
*(ul32 *)(buf + 4) |= bits(gotplt, 11, 3) << 10;
*(ul32 *)(buf + 8) |= (gotplt & 0xfff) << 10;
}
template <>
void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
static const ul32 insn[] = {
0x9000'0010, // adrp x16, GOT[n]
0xf940'0211, // ldr x17, [x16, GOT[n]]
0xd61f'0220, // br x17
0xd503'201f, // nop
};
u64 got = sym.get_got_addr(ctx);
u64 plt = sym.get_plt_addr(ctx);
memcpy(buf, insn, sizeof(insn));
write_adrp(buf, page(got) - page(plt));
*(ul32 *)(buf + 4) |= bits(got, 11, 3) << 10;
}
template <>
void EhFrameSection<E>::apply_reloc(Context<E> &ctx, const ElfRel<E> &rel,
u64 offset, u64 val) {
u8 *loc = ctx.buf + this->shdr.sh_offset + offset;
switch (rel.r_type) {
case R_NONE:
break;
case R_AARCH64_ABS64:
*(ul64 *)loc = val;
break;
case R_AARCH64_PREL32:
*(ul32 *)loc = val - this->shdr.sh_addr - offset;
break;
case R_AARCH64_PREL64:
*(ul64 *)loc = val - this->shdr.sh_addr - offset;
break;
default:
Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel;
}
}
static bool is_adrp(u8 *loc) {
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADRP--Form-PC-relative-address-to-4KB-page-
u32 insn = *(ul32 *)loc;
return (bits(insn, 31, 24) & 0b1001'1111) == 0b1001'0000;
}
static bool is_ldr(u8 *loc) {
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--immediate---Load-Register--immediate--
u32 insn = *(ul32 *)loc;
return (bits(insn, 31, 20) & 0b1111'1111'1100) == 0b1111'1001'0100;
}
static bool is_add(u8 *loc) {
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADD--immediate---Add--immediate--
u32 insn = *(ul32 *)loc;
return (bits(insn, 31, 20) & 0b1111'1111'1100) == 0b1001'0001'0000;
}
template <>
void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
std::span<const ElfRel<E>> rels = get_rels(ctx);
ElfRel<E> *dynrel = nullptr;
if (ctx.reldyn)
dynrel = (ElfRel<E> *)(ctx.buf + ctx.reldyn->shdr.sh_offset +
file.reldyn_offset + this->reldyn_offset);
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE)
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
u8 *loc = base + rel.r_offset;
auto check = [&](i64 val, i64 lo, i64 hi) {
if (val < lo || hi <= val)
Error(ctx) << *this << ": relocation " << rel << " against "
<< sym << " out of range: " << val << " is not in ["
<< lo << ", " << hi << ")";
};
u64 S = sym.get_addr(ctx);
u64 A = rel.r_addend;
u64 P = get_addr() + rel.r_offset;
u64 G = sym.get_got_idx(ctx) * sizeof(Word<E>);
u64 GOT = ctx.got->shdr.sh_addr;
switch (rel.r_type) {
case R_AARCH64_ABS64:
apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel);
break;
case R_AARCH64_LDST8_ABS_LO12_NC:
case R_AARCH64_ADD_ABS_LO12_NC:
*(ul32 *)loc |= bits(S + A, 11, 0) << 10;
break;
case R_AARCH64_LDST16_ABS_LO12_NC:
*(ul32 *)loc |= bits(S + A, 11, 1) << 10;
break;
case R_AARCH64_LDST32_ABS_LO12_NC:
*(ul32 *)loc |= bits(S + A, 11, 2) << 10;
break;
case R_AARCH64_LDST64_ABS_LO12_NC:
*(ul32 *)loc |= bits(S + A, 11, 3) << 10;
break;
case R_AARCH64_LDST128_ABS_LO12_NC:
*(ul32 *)loc |= bits(S + A, 11, 4) << 10;
break;
case R_AARCH64_MOVW_UABS_G0:
check(S + A, 0, 1 << 16);
*(ul32 *)loc |= bits(S + A, 15, 0) << 5;
break;
case R_AARCH64_MOVW_UABS_G0_NC:
*(ul32 *)loc |= bits(S + A, 15, 0) << 5;
break;
case R_AARCH64_MOVW_UABS_G1:
check(S + A, 0, 1LL << 32);
*(ul32 *)loc |= bits(S + A, 31, 16) << 5;
break;
case R_AARCH64_MOVW_UABS_G1_NC:
*(ul32 *)loc |= bits(S + A, 31, 16) << 5;
break;
case R_AARCH64_MOVW_UABS_G2:
check(S + A, 0, 1LL << 48);
*(ul32 *)loc |= bits(S + A, 47, 32) << 5;
break;
case R_AARCH64_MOVW_UABS_G2_NC:
*(ul32 *)loc |= bits(S + A, 47, 32) << 5;
break;
case R_AARCH64_MOVW_UABS_G3:
*(ul32 *)loc |= bits(S + A, 63, 48) << 5;
break;
case R_AARCH64_ADR_GOT_PAGE:
if (sym.has_got(ctx)) {
i64 val = page(G + GOT + A) - page(P);
check(val, -(1LL << 32), 1LL << 32);
write_adrp(loc, val);
} else {
// Relax GOT-loading ADRP+LDR to an immediate ADRP+ADD
i64 val = page(S + A) - page(P);
check(val, -(1LL << 32), 1LL << 32);
write_adrp(loc, val);
u32 reg = bits(*(ul32 *)loc, 4, 0);
*(ul32 *)(loc + 4) = 0x9100'0000 | (reg << 5) | reg; // ADD
*(ul32 *)(loc + 4) |= bits(S + A, 11, 0) << 10;
i++;
}
break;
case R_AARCH64_ADR_PREL_PG_HI21: {
// The ARM64 psABI defines that an `ADRP x0, foo` and `ADD x0, x0,
// :lo12: foo` instruction pair to materialize a PC-relative address
// in a register can be relaxed to `NOP` followed by `ADR x0, foo`
// if foo is in PC ± 1 MiB.
if (ctx.arg.relax && i + 1 < rels.size() &&
sign_extend(S + A - P - 4, 20) == S + A - P - 4) {
const ElfRel<E> &rel2 = rels[i + 1];
if (rel2.r_type == R_AARCH64_ADD_ABS_LO12_NC &&
rel2.r_sym == rel.r_sym &&
rel2.r_offset == rel.r_offset + 4 &&
rel2.r_addend == rel.r_addend &&
is_adrp(loc) &&
is_add(loc + 4)) {
u32 reg1 = bits(*(ul32 *)loc, 4, 0);
u32 reg2 = bits(*(ul32 *)(loc + 4), 4, 0);
if (reg1 == reg2) {
*(ul32 *)loc = 0xd503'201f; // nop
*(ul32 *)(loc + 4) = 0x1000'0000 | reg1; // adr
write_adr(loc + 4, S + A - P - 4);
i++;
break;
}
}
}
i64 val = page(S + A) - page(P);
check(val, -(1LL << 32), 1LL << 32);
write_adrp(loc, val);
break;
}
case R_AARCH64_ADR_PREL_LO21:
check(S + A - P, -(1LL << 20), 1LL << 20);
write_adr(loc, S + A - P);
break;
case R_AARCH64_CALL26:
case R_AARCH64_JUMP26: {
if (sym.is_remaining_undef_weak()) {
// On ARM, calling an weak undefined symbol jumps to the
// next instruction.
*(ul32 *)loc = 0xd503'201f; // nop
break;
}
i64 val = S + A - P;
if (val < -(1 << 27) || (1 << 27) <= val)
val = get_thunk_addr(i) + A - P;
*(ul32 *)loc |= bits(val, 27, 2);
break;
}
case R_AARCH64_PLT32:
check(S + A - P, -(1LL << 31), 1LL << 31);
*(ul32 *)loc = S + A - P;
break;
case R_AARCH64_CONDBR19:
case R_AARCH64_LD_PREL_LO19:
check(S + A - P, -(1LL << 20), 1LL << 20);
*(ul32 *)loc |= bits(S + A - P, 20, 2) << 5;
break;
case R_AARCH64_PREL16:
check(S + A - P, -(1LL << 15), 1LL << 15);
*(ul16 *)loc = S + A - P;
break;
case R_AARCH64_PREL32:
check(S + A - P, -(1LL << 31), 1LL << 32);
*(ul32 *)loc = S + A - P;
break;
case R_AARCH64_PREL64:
*(ul64 *)loc = S + A - P;
break;
case R_AARCH64_LD64_GOT_LO12_NC:
*(ul32 *)loc |= bits(G + GOT + A, 11, 3) << 10;
break;
case R_AARCH64_LD64_GOTPAGE_LO15: {
i64 val = G + GOT + A - page(GOT);
check(val, 0, 1 << 15);
*(ul32 *)loc |= bits(val, 14, 3) << 10;
break;
}
case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: {
i64 val = page(sym.get_gottp_addr(ctx) + A) - page(P);
check(val, -(1LL << 32), 1LL << 32);
write_adrp(loc, val);
break;
}
case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
*(ul32 *)loc |= bits(sym.get_gottp_addr(ctx) + A, 11, 3) << 10;
break;
case R_AARCH64_TLSLE_MOVW_TPREL_G0: {
i64 val = S + A - ctx.tp_addr;
check(val, -(1 << 15), 1 << 15);
write_movn_movz(loc, val);
break;
}
case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC:
*(ul32 *)loc |= bits(S + A - ctx.tp_addr, 15, 0) << 5;
break;
case R_AARCH64_TLSLE_MOVW_TPREL_G1: {
i64 val = S + A - ctx.tp_addr;
check(val, -(1LL << 31), 1LL << 31);
write_movn_movz(loc, val >> 16);
break;
}
case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC:
*(ul32 *)loc |= bits(S + A - ctx.tp_addr, 31, 16) << 5;
break;
case R_AARCH64_TLSLE_MOVW_TPREL_G2: {
i64 val = S + A - ctx.tp_addr;
check(val, -(1LL << 47), 1LL << 47);
write_movn_movz(loc, val >> 32);
break;
}
case R_AARCH64_TLSLE_ADD_TPREL_HI12: {
i64 val = S + A - ctx.tp_addr;
check(val, 0, 1LL << 24);
*(ul32 *)loc |= bits(val, 23, 12) << 10;
break;
}
case R_AARCH64_TLSLE_ADD_TPREL_LO12:
check(S + A - ctx.tp_addr, 0, 1 << 12);
*(ul32 *)loc |= bits(S + A - ctx.tp_addr, 11, 0) << 10;
break;
case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
*(ul32 *)loc |= bits(S + A - ctx.tp_addr, 11, 0) << 10;
break;
case R_AARCH64_TLSGD_ADR_PAGE21: {
i64 val = page(sym.get_tlsgd_addr(ctx) + A) - page(P);
check(val, -(1LL << 32), 1LL << 32);
write_adrp(loc, val);
break;
}
case R_AARCH64_TLSGD_ADD_LO12_NC:
*(ul32 *)loc |= bits(sym.get_tlsgd_addr(ctx) + A, 11, 0) << 10;
break;
case R_AARCH64_TLSDESC_ADR_PAGE21:
if (sym.has_tlsdesc(ctx)) {
i64 val = page(sym.get_tlsdesc_addr(ctx) + A) - page(P);
check(val, -(1LL << 32), 1LL << 32);
write_adrp(loc, val);
} else {
// adrp x0, 0 -> movz x0, #tls_ofset_hi, lsl #16
i64 val = (S + A - ctx.tp_addr);
check(val, -(1LL << 32), 1LL << 32);
*(ul32 *)loc = 0xd2a0'0000 | (bits(val, 32, 16) << 5);
}
break;
case R_AARCH64_TLSDESC_LD64_LO12:
if (sym.has_tlsdesc(ctx)) {
*(ul32 *)loc |= bits(sym.get_tlsdesc_addr(ctx) + A, 11, 3) << 10;
} else {
// ldr x2, [x0] -> movk x0, #tls_ofset_lo
u32 offset_lo = (S + A - ctx.tp_addr) & 0xffff;
*(ul32 *)loc = 0xf280'0000 | (offset_lo << 5);
}
break;
case R_AARCH64_TLSDESC_ADD_LO12:
if (sym.has_tlsdesc(ctx)) {
*(ul32 *)loc |= bits(sym.get_tlsdesc_addr(ctx) + A, 11, 0) << 10;
} else {
// add x0, x0, #0 -> nop
*(ul32 *)loc = 0xd503'201f;
}
break;
case R_AARCH64_TLSDESC_CALL:
if (!sym.has_tlsdesc(ctx)) {
// blr x2 -> nop
*(ul32 *)loc = 0xd503'201f;
}
break;
default:
unreachable();
}
}
}
template <>
void InputSection<E>::apply_reloc_nonalloc(Context<E> &ctx, u8 *base) {
std::span<const ElfRel<E>> rels = get_rels(ctx);
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
u8 *loc = base + rel.r_offset;
auto check = [&](i64 val, i64 lo, i64 hi) {
if (val < lo || hi <= val)
Error(ctx) << *this << ": relocation " << rel << " against "
<< sym << " out of range: " << val << " is not in ["
<< lo << ", " << hi << ")";
};
SectionFragment<E> *frag;
i64 frag_addend;
std::tie(frag, frag_addend) = get_fragment(ctx, rel);
u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx);
u64 A = frag ? frag_addend : (i64)rel.r_addend;
switch (rel.r_type) {
case R_AARCH64_ABS64:
if (std::optional<u64> val = get_tombstone(sym, frag))
*(ul64 *)loc = *val;
else
*(ul64 *)loc = S + A;
break;
case R_AARCH64_ABS32: {
i64 val = S + A;
check(val, 0, 1LL << 32);
*(ul32 *)loc = val;
break;
}
default:
Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: "
<< rel;
break;
}
}
}
template <>
void InputSection<E>::scan_relocations(Context<E> &ctx) {
assert(shdr().sh_flags & SHF_ALLOC);
this->reldyn_offset = file.num_dynrel * sizeof(ElfRel<E>);
std::span<const ElfRel<E>> rels = get_rels(ctx);
// Scan relocations
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
u8 *loc = (u8 *)(contents.data() + rel.r_offset);
if (sym.is_ifunc())
sym.flags |= NEEDS_GOT | NEEDS_PLT;
switch (rel.r_type) {
case R_AARCH64_ABS64:
scan_dyn_absrel(ctx, sym, rel);
break;
case R_AARCH64_ADR_GOT_PAGE:
// An ADR_GOT_PAGE and GOT_LO12_NC relocation pair is used to load a
// symbol's address from GOT. If the GOT value is a link-time
// constant, we may be able to rewrite the ADRP+LDR instruction pair
// with an ADRP+ADD, eliminating a GOT memory load.
if (ctx.arg.relax && sym.is_relative() && !sym.is_imported &&
!sym.is_ifunc() && i + 1 < rels.size()) {
// ADRP+LDR must be consecutive and use the same register to relax.
const ElfRel<E> &rel2 = rels[i + 1];
if (rel2.r_type == R_AARCH64_LD64_GOT_LO12_NC &&
rel2.r_offset == rel.r_offset + 4 &&
rel2.r_sym == rel.r_sym &&
rel.r_addend == 0 &&
rel2.r_addend == 0 &&
is_adrp(loc) &&
is_ldr(loc + 4)) {
u32 rd = bits(*(ul32 *)loc, 4, 0);
u32 rn = bits(*(ul32 *)(loc + 4), 9, 5);
u32 rt = bits(*(ul32 *)(loc + 4), 4, 0);
if (rd == rn && rn == rt) {
i++;
break;
}
}
}
sym.flags |= NEEDS_GOT;
break;
case R_AARCH64_LD64_GOT_LO12_NC:
case R_AARCH64_LD64_GOTPAGE_LO15:
sym.flags |= NEEDS_GOT;
break;
case R_AARCH64_CALL26:
case R_AARCH64_JUMP26:
case R_AARCH64_PLT32:
if (sym.is_imported)
sym.flags |= NEEDS_PLT;
break;
case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
sym.flags |= NEEDS_GOTTP;
break;
case R_AARCH64_ADR_PREL_PG_HI21:
scan_pcrel(ctx, sym, rel);
break;
case R_AARCH64_TLSGD_ADR_PAGE21:
sym.flags |= NEEDS_TLSGD;
break;
case R_AARCH64_TLSDESC_ADR_PAGE21:
case R_AARCH64_TLSDESC_LD64_LO12:
case R_AARCH64_TLSDESC_ADD_LO12:
if (!relax_tlsdesc(ctx, sym))
sym.flags |= NEEDS_TLSDESC;
break;
case R_AARCH64_TLSLE_MOVW_TPREL_G0:
case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC:
case R_AARCH64_TLSLE_MOVW_TPREL_G1:
case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC:
case R_AARCH64_TLSLE_MOVW_TPREL_G2:
case R_AARCH64_TLSLE_ADD_TPREL_HI12:
case R_AARCH64_TLSLE_ADD_TPREL_LO12:
case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
check_tlsle(ctx, sym, rel);
break;
case R_AARCH64_ADD_ABS_LO12_NC:
case R_AARCH64_ADR_PREL_LO21:
case R_AARCH64_CONDBR19:
case R_AARCH64_LD_PREL_LO19:
case R_AARCH64_LDST16_ABS_LO12_NC:
case R_AARCH64_LDST32_ABS_LO12_NC:
case R_AARCH64_LDST64_ABS_LO12_NC:
case R_AARCH64_LDST128_ABS_LO12_NC:
case R_AARCH64_LDST8_ABS_LO12_NC:
case R_AARCH64_MOVW_UABS_G0:
case R_AARCH64_MOVW_UABS_G0_NC:
case R_AARCH64_MOVW_UABS_G1:
case R_AARCH64_MOVW_UABS_G1_NC:
case R_AARCH64_MOVW_UABS_G2:
case R_AARCH64_MOVW_UABS_G2_NC:
case R_AARCH64_MOVW_UABS_G3:
case R_AARCH64_PREL16:
case R_AARCH64_PREL32:
case R_AARCH64_PREL64:
case R_AARCH64_TLSGD_ADD_LO12_NC:
case R_AARCH64_TLSDESC_CALL:
break;
default:
Error(ctx) << *this << ": unknown relocation: " << rel;
}
}
}
template <>
void RangeExtensionThunk<E>::copy_buf(Context<E> &ctx) {
u8 *buf = ctx.buf + output_section.shdr.sh_offset + offset;
static const ul32 data[] = {
0x9000'0010, // adrp x16, 0 # R_AARCH64_ADR_PREL_PG_HI21
0x9100'0210, // add x16, x16 # R_AARCH64_ADD_ABS_LO12_NC
0xd61f'0200, // br x16
};
static_assert(E::thunk_size == sizeof(data));
for (i64 i = 0; i < symbols.size(); i++) {
u64 S = symbols[i]->get_addr(ctx);
u64 P = output_section.shdr.sh_addr + offset + i * E::thunk_size;
u8 *loc = buf + i * E::thunk_size;
memcpy(loc , data, sizeof(data));
write_adrp(loc, page(S) - page(P));
*(ul32 *)(loc + 4) |= bits(S, 11, 0) << 10;
}
}
} // namespace mold::elf

565
third_party/mold/elf/arch-i386.cc vendored Normal file
View file

@ -0,0 +1,565 @@
// clang-format off
// i386 is similar to x86-64 but lacks PC-relative memory access
// instructions. So it's not straightforward to support position-
// independent code (PIC) on that target.
//
// If an object file is compiled with -fPIC, a function that needs to load
// a value from memory first obtains its own address with the following
// code
//
// call __x86.get_pc_thunk.bx
//
// where __x86.get_pc_thunk.bx is defined as
//
// __x86.get_pc_thunk.bx:
// mov (%esp), %ebx # move the return address to %ebx
// ret
//
// . With the function's own address (or, more precisely, the address
// immediately after the call instruction), the function can compute an
// absolute address of a variable with its address + link-time constant.
//
// Executing call-mov-ret isn't very cheap, and allocating one register to
// store PC isn't cheap too, especially given that i386 has only 8
// general-purpose registers. But that's the cost of PIC on i386. You need
// to pay it when creating a .so and a position-independent executable.
//
// When a position-independent function calls another function, it sets
// %ebx to the address of .got. Position-independent PLT entries use that
// register to load values from .got.plt/.got.
//
// If we are creating a position-dependent executable (PDE), we can't
// assume that %ebx is set to .got. For PDE, we need to create position-
// dependent PLT entries which don't use %ebx.
//
// https://github.com/rui314/psabi/blob/main/i386.pdf
#include "third_party/mold/elf/mold.h"
namespace mold::elf {
using E = I386;
template <>
i64 get_addend(u8 *loc, const ElfRel<E> &rel) {
switch (rel.r_type) {
case R_386_8:
case R_386_PC8:
return *loc;
case R_386_16:
case R_386_PC16:
return *(ul16 *)loc;
case R_386_32:
case R_386_PC32:
case R_386_GOT32:
case R_386_GOT32X:
case R_386_PLT32:
case R_386_GOTOFF:
case R_386_GOTPC:
case R_386_TLS_LDM:
case R_386_TLS_GOTIE:
case R_386_TLS_LE:
case R_386_TLS_IE:
case R_386_TLS_GD:
case R_386_TLS_LDO_32:
case R_386_SIZE32:
case R_386_TLS_GOTDESC:
return *(ul32 *)loc;
default:
return 0;
}
}
template <>
void write_addend(u8 *loc, i64 val, const ElfRel<E> &rel) {
switch (rel.r_type) {
case R_386_NONE:
break;
case R_386_8:
case R_386_PC8:
*loc = val;
break;
case R_386_16:
case R_386_PC16:
*(ul16 *)loc = val;
break;
case R_386_32:
case R_386_PC32:
case R_386_GOT32:
case R_386_GOT32X:
case R_386_PLT32:
case R_386_GOTOFF:
case R_386_GOTPC:
case R_386_TLS_LDM:
case R_386_TLS_GOTIE:
case R_386_TLS_LE:
case R_386_TLS_IE:
case R_386_TLS_GD:
case R_386_TLS_LDO_32:
case R_386_SIZE32:
case R_386_TLS_GOTDESC:
*(ul32 *)loc = val;
break;
default:
unreachable();
}
}
template <>
void write_plt_header(Context<E> &ctx, u8 *buf) {
if (ctx.arg.pic) {
static const u8 insn[] = {
0xf3, 0x0f, 0x1e, 0xfb, // endbr32
0x51, // push %ecx
0x8d, 0x8b, 0, 0, 0, 0, // lea GOTPLT+4(%ebx), %ecx
0xff, 0x31, // push (%ecx)
0xff, 0x61, 0x04, // jmp *0x4(%ecx)
};
memcpy(buf, insn, sizeof(insn));
*(ul32 *)(buf + 7) = ctx.gotplt->shdr.sh_addr - ctx.got->shdr.sh_addr + 4;
} else {
static const u8 insn[] = {
0xf3, 0x0f, 0x1e, 0xfb, // endbr32
0x51, // push %ecx
0xb9, 0, 0, 0, 0, // mov GOTPLT+4, %ecx
0xff, 0x31, // push (%ecx)
0xff, 0x61, 0x04, // jmp *0x4(%ecx)
0xcc, // (padding)
};
memcpy(buf, insn, sizeof(insn));
*(ul32 *)(buf + 6) = ctx.gotplt->shdr.sh_addr + 4;
}
}
template <>
void write_plt_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
if (ctx.arg.pic) {
static const u8 insn[] = {
0xf3, 0x0f, 0x1e, 0xfb, // endbr32
0xb9, 0, 0, 0, 0, // mov $reloc_offset, %ecx
0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx)
0xcc, // (padding)
};
memcpy(buf, insn, sizeof(insn));
*(ul32 *)(buf + 5) = sym.get_plt_idx(ctx) * sizeof(ElfRel<E>);
*(ul32 *)(buf + 11) = sym.get_gotplt_addr(ctx) - ctx.got->shdr.sh_addr;
} else {
static const u8 insn[] = {
0xf3, 0x0f, 0x1e, 0xfb, // endbr32
0xb9, 0, 0, 0, 0, // mov $reloc_offset, %ecx
0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
0xcc, // (padding)
};
memcpy(buf, insn, sizeof(insn));
*(ul32 *)(buf + 5) = sym.get_plt_idx(ctx) * sizeof(ElfRel<E>);
*(ul32 *)(buf + 11) = sym.get_gotplt_addr(ctx);
}
}
template <>
void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
if (ctx.arg.pic) {
static const u8 insn[] = {
0xf3, 0x0f, 0x1e, 0xfb, // endbr32
0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx)
0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // (padding)
};
memcpy(buf, insn, sizeof(insn));
*(ul32 *)(buf + 6) = sym.get_got_addr(ctx) - ctx.got->shdr.sh_addr;
} else {
static const u8 insn[] = {
0xf3, 0x0f, 0x1e, 0xfb, // endbr32
0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // (padding)
};
memcpy(buf, insn, sizeof(insn));
*(ul32 *)(buf + 6) = sym.get_got_addr(ctx);
}
}
template <>
void EhFrameSection<E>::apply_reloc(Context<E> &ctx, const ElfRel<E> &rel,
u64 offset, u64 val) {
u8 *loc = ctx.buf + this->shdr.sh_offset + offset;
switch (rel.r_type) {
case R_NONE:
break;
case R_386_32:
*(ul32 *)loc = val;
break;
case R_386_PC32:
*(ul32 *)loc = val - this->shdr.sh_addr - offset;
break;
default:
Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel;
}
}
static u32 relax_got32x(u8 *loc) {
// mov imm(%reg1), %reg2 -> lea imm(%reg1), %reg2
if (loc[0] == 0x8b)
return 0x8d00 | loc[1];
return 0;
}
// Relax GD to LE
static void relax_gd_to_le(u8 *loc, ElfRel<E> rel, u64 val) {
static const u8 insn[] = {
0x65, 0xa1, 0, 0, 0, 0, // mov %gs:0, %eax
0x81, 0xc0, 0, 0, 0, 0, // add $tp_offset, %eax
};
switch (rel.r_type) {
case R_386_PLT32:
case R_386_PC32:
memcpy(loc - 3, insn, sizeof(insn));
*(ul32 *)(loc + 5) = val;
break;
case R_386_GOT32:
case R_386_GOT32X:
memcpy(loc - 2, insn, sizeof(insn));
*(ul32 *)(loc + 6) = val;
break;
default:
unreachable();
}
}
// Relax LD to LE
static void relax_ld_to_le(u8 *loc, ElfRel<E> rel, u64 val) {
switch (rel.r_type) {
case R_386_PLT32:
case R_386_PC32: {
static const u8 insn[] = {
0x65, 0xa1, 0, 0, 0, 0, // mov %gs:0, %eax
0x2d, 0, 0, 0, 0, // sub $tls_size, %eax
};
memcpy(loc - 2, insn, sizeof(insn));
*(ul32 *)(loc + 5) = val;
break;
}
case R_386_GOT32:
case R_386_GOT32X: {
static const u8 insn[] = {
0x65, 0xa1, 0, 0, 0, 0, // mov %gs:0, %eax
0x2d, 0, 0, 0, 0, // sub $tls_size, %eax
0x90, // nop
};
memcpy(loc - 2, insn, sizeof(insn));
*(ul32 *)(loc + 5) = val;
break;
}
default:
unreachable();
}
}
template <>
void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
std::span<const ElfRel<E>> rels = get_rels(ctx);
ElfRel<E> *dynrel = nullptr;
if (ctx.reldyn)
dynrel = (ElfRel<E> *)(ctx.buf + ctx.reldyn->shdr.sh_offset +
file.reldyn_offset + this->reldyn_offset);
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE)
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
u8 *loc = base + rel.r_offset;
auto check = [&](i64 val, i64 lo, i64 hi) {
if (val < lo || hi <= val)
Error(ctx) << *this << ": relocation " << rel << " against "
<< sym << " out of range: " << val << " is not in ["
<< lo << ", " << hi << ")";
};
u64 S = sym.get_addr(ctx);
u64 A = get_addend(*this, rel);
u64 P = get_addr() + rel.r_offset;
u64 G = sym.get_got_idx(ctx) * sizeof(Word<E>);
u64 GOT = ctx.got->shdr.sh_addr;
switch (rel.r_type) {
case R_386_8:
check(S + A, 0, 1 << 8);
*loc = S + A;
break;
case R_386_16:
check(S + A, 0, 1 << 16);
*(ul16 *)loc = S + A;
break;
case R_386_32:
apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel);
break;
case R_386_PC8:
check(S + A - P, -(1 << 7), 1 << 7);
*loc = S + A - P;
break;
case R_386_PC16:
check(S + A - P, -(1 << 15), 1 << 15);
*(ul16 *)loc = S + A - P;
break;
case R_386_PC32:
case R_386_PLT32:
*(ul32 *)loc = S + A - P;
break;
case R_386_GOT32:
*(ul32 *)loc = G + A;
break;
case R_386_GOT32X:
if (sym.has_got(ctx)) {
*(ul32 *)loc = G + A;
} else {
u32 insn = relax_got32x(loc - 2);
assert(insn);
loc[-2] = insn >> 8;
loc[-1] = insn;
*(ul32 *)loc = S + A - GOT;
}
break;
case R_386_GOTOFF:
*(ul32 *)loc = S + A - GOT;
break;
case R_386_GOTPC:
*(ul32 *)loc = GOT + A - P;
break;
case R_386_TLS_GOTIE:
*(ul32 *)loc = sym.get_gottp_addr(ctx) + A - GOT;
break;
case R_386_TLS_LE:
*(ul32 *)loc = S + A - ctx.tp_addr;
break;
case R_386_TLS_IE:
*(ul32 *)loc = sym.get_gottp_addr(ctx) + A;
break;
case R_386_TLS_GD:
if (sym.has_tlsgd(ctx)) {
*(ul32 *)loc = sym.get_tlsgd_addr(ctx) + A - GOT;
} else {
relax_gd_to_le(loc, rels[i + 1], S - ctx.tp_addr);
i++;
}
break;
case R_386_TLS_LDM:
if (ctx.got->has_tlsld(ctx)) {
*(ul32 *)loc = ctx.got->get_tlsld_addr(ctx) + A - GOT;
} else {
relax_ld_to_le(loc, rels[i + 1], ctx.tp_addr - ctx.tls_begin);
i++;
}
break;
case R_386_TLS_LDO_32:
*(ul32 *)loc = S + A - ctx.dtp_addr;
break;
case R_386_SIZE32:
*(ul32 *)loc = sym.esym().st_size + A;
break;
case R_386_TLS_GOTDESC:
if (sym.has_tlsdesc(ctx)) {
*(ul32 *)loc = sym.get_tlsdesc_addr(ctx) + A - GOT;
} else {
static const u8 insn[] = {
0x8d, 0x05, 0, 0, 0, 0, // lea 0, %eax
};
memcpy(loc - 2, insn, sizeof(insn));
*(ul32 *)loc = S + A - ctx.tp_addr;
}
break;
case R_386_TLS_DESC_CALL:
if (!sym.has_tlsdesc(ctx)) {
// call *(%eax) -> nop
loc[0] = 0x66;
loc[1] = 0x90;
}
break;
default:
unreachable();
}
}
}
template <>
void InputSection<E>::apply_reloc_nonalloc(Context<E> &ctx, u8 *base) {
std::span<const ElfRel<E>> rels = get_rels(ctx);
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
u8 *loc = base + rel.r_offset;
auto check = [&](i64 val, i64 lo, i64 hi) {
if (val < lo || hi <= val)
Error(ctx) << *this << ": relocation " << rel << " against "
<< sym << " out of range: " << val << " is not in ["
<< lo << ", " << hi << ")";
};
SectionFragment<E> *frag;
i64 frag_addend;
std::tie(frag, frag_addend) = get_fragment(ctx, rel);
u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx);
u64 A = frag ? frag_addend : get_addend(*this, rel);
u64 GOT = ctx.got->shdr.sh_addr;
switch (rel.r_type) {
case R_386_8:
check(S + A, 0, 1 << 8);
*loc = S + A;
break;
case R_386_16:
check(S + A, 0, 1 << 16);
*(ul16 *)loc = S + A;
break;
case R_386_32:
if (std::optional<u64> val = get_tombstone(sym, frag))
*(ul32 *)loc = *val;
else
*(ul32 *)loc = S + A;
break;
case R_386_PC8:
check(S + A, -(1 << 7), 1 << 7);
*loc = S + A;
break;
case R_386_PC16:
check(S + A, -(1 << 15), 1 << 15);
*(ul16 *)loc = S + A;
break;
case R_386_PC32:
*(ul32 *)loc = S + A;
break;
case R_386_GOTPC:
*(ul32 *)loc = GOT + A;
break;
case R_386_GOTOFF:
*(ul32 *)loc = S + A - GOT;
break;
case R_386_TLS_LDO_32:
if (std::optional<u64> val = get_tombstone(sym, frag))
*(ul32 *)loc = *val;
else
*(ul32 *)loc = S + A - ctx.dtp_addr;
break;
case R_386_SIZE32:
*(ul32 *)loc = sym.esym().st_size + A;
break;
default:
unreachable();
}
}
}
template <>
void InputSection<E>::scan_relocations(Context<E> &ctx) {
assert(shdr().sh_flags & SHF_ALLOC);
this->reldyn_offset = file.num_dynrel * sizeof(ElfRel<E>);
std::span<const ElfRel<E>> rels = get_rels(ctx);
// Scan relocations
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
u8 *loc = (u8 *)(contents.data() + rel.r_offset);
if (sym.is_ifunc())
sym.flags |= NEEDS_GOT | NEEDS_PLT;
switch (rel.r_type) {
case R_386_8:
case R_386_16:
scan_absrel(ctx, sym, rel);
break;
case R_386_32:
scan_dyn_absrel(ctx, sym, rel);
break;
case R_386_PC8:
case R_386_PC16:
case R_386_PC32:
scan_pcrel(ctx, sym, rel);
break;
case R_386_GOT32:
case R_386_GOTPC:
sym.flags |= NEEDS_GOT;
break;
case R_386_GOT32X: {
// We always want to relax GOT32X because static PIE doesn't
// work without it.
bool do_relax = !sym.is_imported && sym.is_relative() &&
relax_got32x(loc - 2);
if (!do_relax)
sym.flags |= NEEDS_GOT;
break;
}
case R_386_PLT32:
if (sym.is_imported)
sym.flags |= NEEDS_PLT;
break;
case R_386_TLS_GOTIE:
case R_386_TLS_IE:
sym.flags |= NEEDS_GOTTP;
break;
case R_386_TLS_GD:
if (i + 1 == rels.size())
Fatal(ctx) << *this << ": TLS_GD reloc must be followed by PLT or GOT32";
if (u32 ty = rels[i + 1].r_type;
ty != R_386_PLT32 && ty != R_386_PC32 &&
ty != R_386_GOT32 && ty != R_386_GOT32X)
Fatal(ctx) << *this << ": TLS_GD reloc must be followed by PLT or GOT32";
// We always relax if -static because libc.a doesn't contain
// __tls_get_addr().
if (ctx.arg.is_static ||
(ctx.arg.relax && !ctx.arg.shared && !sym.is_imported))
i++;
else
sym.flags |= NEEDS_TLSGD;
break;
case R_386_TLS_LDM:
if (i + 1 == rels.size())
Fatal(ctx) << *this << ": TLS_LDM reloc must be followed by PLT or GOT32";
if (u32 ty = rels[i + 1].r_type;
ty != R_386_PLT32 && ty != R_386_PC32 &&
ty != R_386_GOT32 && ty != R_386_GOT32X)
Fatal(ctx) << *this << ": TLS_LDM reloc must be followed by PLT or GOT32";
// We always relax if -static because libc.a doesn't contain
// __tls_get_addr().
if (ctx.arg.is_static || (ctx.arg.relax && !ctx.arg.shared))
i++;
else
ctx.needs_tlsld = true;
break;
case R_386_TLS_GOTDESC:
if (!relax_tlsdesc(ctx, sym))
sym.flags |= NEEDS_TLSDESC;
break;
case R_386_TLS_LE:
check_tlsle(ctx, sym, rel);
break;
case R_386_GOTOFF:
case R_386_TLS_LDO_32:
case R_386_SIZE32:
case R_386_TLS_DESC_CALL:
break;
default:
Error(ctx) << *this << ": unknown relocation: " << rel;
}
}
}
} // namespace mold::elf

326
third_party/mold/elf/arch-m68k.cc vendored Normal file
View file

@ -0,0 +1,326 @@
// clang-format off
// This file contains code for the Motorola 68000 series microprocessors,
// which is often abbreviated as m68k. Running a Unix-like system on a
// m68k-based machine today is probably a retro-computing hobby activity,
// but the processor was a popular choice to build Unix computers during
// '80s. Early Sun workstations for example used m68k. Macintosh until
// 1994 were based on m68k as well until they switched to PowerPC (and
// then to x86 and to ARM.)
//
// From the linker's point of view, it is not hard to support m68k. It's
// just a 32-bit big-endian CISC ISA. Compared to comtemporary i386,
// m68k's psABI is actually simpler because m68k has PC-relative memory
// access instructions and therefore can support position-independent
// code without too much hassle.
//
// https://github.com/rui314/psabi/blob/main/m68k.pdf
#include "third_party/mold/elf/mold.h"
namespace mold::elf {
using E = M68K;
template <>
void write_plt_header(Context<E> &ctx, u8 *buf) {
static const u8 insn[] = {
0x2f, 0x00, // move.l %d0, -(%sp)
0x2f, 0x3b, 0x01, 0x70, 0, 0, 0, 0, // move.l (GOTPLT+4, %pc), -(%sp)
0x4e, 0xfb, 0x01, 0x71, 0, 0, 0, 0, // jmp ([GOTPLT+8, %pc])
};
memcpy(buf, insn, sizeof(insn));
*(ub32 *)(buf + 6) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr;
*(ub32 *)(buf + 14) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr - 4;
}
template <>
void write_plt_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
static const u8 insn[] = {
0x20, 0x3c, 0, 0, 0, 0, // move.l PLT_OFFSET, %d0
0x4e, 0xfb, 0x01, 0x71, 0, 0, 0, 0, // jmp ([GOTPLT_ENTRY, %pc])
};
memcpy(buf, insn, sizeof(insn));
*(ub32 *)(buf + 2) = sym.get_plt_idx(ctx) * sizeof(ElfRel<E>);
*(ub32 *)(buf + 10) = sym.get_gotplt_addr(ctx) - sym.get_plt_addr(ctx) - 8;
}
template <>
void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
static const u8 insn[] = {
0x4e, 0xfb, 0x01, 0x71, 0, 0, 0, 0, // jmp ([GOT_ENTRY, %pc])
};
memcpy(buf, insn, sizeof(insn));
*(ub32 *)(buf + 4) = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 2;
}
template <>
void EhFrameSection<E>::apply_reloc(Context<E> &ctx, const ElfRel<E> &rel,
u64 offset, u64 val) {
u8 *loc = ctx.buf + this->shdr.sh_offset + offset;
switch (rel.r_type) {
case R_NONE:
break;
case R_68K_32:
*(ub32 *)loc = val;
break;
case R_68K_PC32:
*(ub32 *)loc = val - this->shdr.sh_addr - offset;
break;
default:
Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel;
}
}
template <>
void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
std::span<const ElfRel<E>> rels = get_rels(ctx);
ElfRel<E> *dynrel = nullptr;
if (ctx.reldyn)
dynrel = (ElfRel<E> *)(ctx.buf + ctx.reldyn->shdr.sh_offset +
file.reldyn_offset + this->reldyn_offset);
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE)
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
u8 *loc = base + rel.r_offset;
auto check = [&](i64 val, i64 lo, i64 hi) {
if (val < lo || hi <= val)
Error(ctx) << *this << ": relocation " << rel << " against "
<< sym << " out of range: " << val << " is not in ["
<< lo << ", " << hi << ")";
};
auto write16 = [&](u64 val) {
check(val, 0, 1 << 16);
*(ub16 *)loc = val;
};
auto write16s = [&](u64 val) {
check(val, -(1 << 15), 1 << 15);
*(ub16 *)loc = val;
};
auto write8 = [&](u64 val) {
check(val, 0, 1 << 8);
*loc = val;
};
auto write8s = [&](u64 val) {
check(val, -(1 << 7), 1 << 7);
*loc = val;
};
u64 S = sym.get_addr(ctx);
u64 A = rel.r_addend;
u64 P = get_addr() + rel.r_offset;
u64 G = sym.get_got_idx(ctx) * sizeof(Word<E>);
u64 GOT = ctx.got->shdr.sh_addr;
switch (rel.r_type) {
case R_68K_32:
apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel);
break;
case R_68K_16:
write16(S + A);
break;
case R_68K_8:
write8(S + A);
break;
case R_68K_PC32:
case R_68K_PLT32:
*(ub32 *)loc = S + A - P;
break;
case R_68K_PC16:
case R_68K_PLT16:
write16s(S + A - P);
break;
case R_68K_PC8:
case R_68K_PLT8:
write8s(S + A - P);
break;
case R_68K_GOTPCREL32:
*(ub32 *)loc = GOT + A - P;
break;
case R_68K_GOTPCREL16:
write16s(GOT + A - P);
break;
case R_68K_GOTPCREL8:
write8s(GOT + A - P);
break;
case R_68K_GOTOFF32:
*(ub32 *)loc = G + A;
break;
case R_68K_GOTOFF16:
write16(G + A);
break;
case R_68K_GOTOFF8:
write8(G + A);
break;
case R_68K_TLS_GD32:
*(ub32 *)loc = sym.get_tlsgd_addr(ctx) + A - GOT;
break;
case R_68K_TLS_GD16:
write16(sym.get_tlsgd_addr(ctx) + A - GOT);
break;
case R_68K_TLS_GD8:
write8(sym.get_tlsgd_addr(ctx) + A - GOT);
break;
case R_68K_TLS_LDM32:
*(ub32 *)loc = ctx.got->get_tlsld_addr(ctx) + A - GOT;
break;
case R_68K_TLS_LDM16:
write16(ctx.got->get_tlsld_addr(ctx) + A - GOT);
break;
case R_68K_TLS_LDM8:
write8(ctx.got->get_tlsld_addr(ctx) + A - GOT);
break;
case R_68K_TLS_LDO32:
*(ub32 *)loc = S + A - ctx.dtp_addr;
break;
case R_68K_TLS_LDO16:
write16s(S + A - ctx.dtp_addr);
break;
case R_68K_TLS_LDO8:
write8s(S + A - ctx.dtp_addr);
break;
case R_68K_TLS_IE32:
*(ub32 *)loc = sym.get_gottp_addr(ctx) + A - GOT;
break;
case R_68K_TLS_IE16:
write16(sym.get_gottp_addr(ctx) + A - GOT);
break;
case R_68K_TLS_IE8:
write8(sym.get_gottp_addr(ctx) + A - GOT);
break;
case R_68K_TLS_LE32:
*(ub32 *)loc = S + A - ctx.tp_addr;
break;
case R_68K_TLS_LE16:
write16(S + A - ctx.tp_addr);
break;
case R_68K_TLS_LE8:
write8(S + A - ctx.tp_addr);
break;
default:
unreachable();
}
}
}
template <>
void InputSection<E>::apply_reloc_nonalloc(Context<E> &ctx, u8 *base) {
std::span<const ElfRel<E>> rels = get_rels(ctx);
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
u8 *loc = base + rel.r_offset;
SectionFragment<E> *frag;
i64 frag_addend;
std::tie(frag, frag_addend) = get_fragment(ctx, rel);
u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx);
u64 A = frag ? frag_addend : (i64)rel.r_addend;
switch (rel.r_type) {
case R_68K_32:
if (std::optional<u64> val = get_tombstone(sym, frag))
*(ub32 *)loc = *val;
else
*(ub32 *)loc = S + A;
break;
default:
Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: "
<< rel;
}
}
}
template <>
void InputSection<E>::scan_relocations(Context<E> &ctx) {
assert(shdr().sh_flags & SHF_ALLOC);
this->reldyn_offset = file.num_dynrel * sizeof(ElfRel<E>);
std::span<const ElfRel<E>> rels = get_rels(ctx);
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
if (sym.is_ifunc())
Error(ctx) << sym << ": GNU ifunc symbol is not supported on m68k";
switch (rel.r_type) {
case R_68K_32:
scan_dyn_absrel(ctx, sym, rel);
break;
case R_68K_16:
case R_68K_8:
scan_absrel(ctx, sym, rel);
break;
case R_68K_PC32:
case R_68K_PC16:
case R_68K_PC8:
scan_pcrel(ctx, sym, rel);
break;
case R_68K_GOTPCREL32:
case R_68K_GOTPCREL16:
case R_68K_GOTPCREL8:
case R_68K_GOTOFF32:
case R_68K_GOTOFF16:
case R_68K_GOTOFF8:
sym.flags |= NEEDS_GOT;
break;
case R_68K_PLT32:
case R_68K_PLT16:
case R_68K_PLT8:
if (sym.is_imported)
sym.flags |= NEEDS_PLT;
break;
case R_68K_TLS_GD32:
case R_68K_TLS_GD16:
case R_68K_TLS_GD8:
sym.flags |= NEEDS_TLSGD;
break;
case R_68K_TLS_LDM32:
case R_68K_TLS_LDM16:
case R_68K_TLS_LDM8:
ctx.needs_tlsld = true;
break;
case R_68K_TLS_IE32:
case R_68K_TLS_IE16:
case R_68K_TLS_IE8:
sym.flags |= NEEDS_GOTTP;
break;
case R_68K_TLS_LE32:
case R_68K_TLS_LE16:
case R_68K_TLS_LE8:
check_tlsle(ctx, sym, rel);
break;
case R_68K_TLS_LDO32:
case R_68K_TLS_LDO16:
case R_68K_TLS_LDO8:
break;
default:
Error(ctx) << *this << ": unknown relocation: " << rel;
}
}
}
} // namespace mold::elf

452
third_party/mold/elf/arch-ppc32.cc vendored Normal file
View file

@ -0,0 +1,452 @@
// clang-format off
// This file implements the PowerPC 32-bit ISA. For 64-bit PowerPC, see
// arch-ppc64v1.cpp and arch-ppc64v2.cpp.
//
// PPC32 is a RISC ISA. It has 32 general-purpose registers (GPRs).
// r0, r11 and r12 are reserved for static linkers, so we can use these
// registers in PLTs and range extension thunks. In addition to that, it
// has a few special registers. Notable ones are LR which holds a return
// address and CTR which we can use to store a branch target address.
//
// It feels that the PPC32 psABI is unnecessarily complicated at first
// glance, but that is mainly stemmed from the fact that the ISA lacks
// PC-relative load/store instructions. Since machine instructions cannot
// load data relative to its own address, it is not straightforward to
// support position-independent code (PIC) on PPC32.
//
// A position-independent function typically contains the following code
// in the prologue to obtain its own address:
//
// mflr r0 // save the current return address to %r0
// bcl 20, 31, 4 // call the next instruction as if it were a function
// mtlr r12 // save the return address to %r12
// mtlr r0 // restore the original return address
//
// An object file compiled with -fPIC contains a data section named
// `.got2` to store addresses of locally-defined global variables and
// constants. A PIC function usually computes its .got2+0x8000 and set it
// to %r30. This scheme allows the function to access global objects
// defined in the same input file with a single %r30-relative load/store
// instruction with a 16-bit offset, given that .got2 is smaller than
// 0x10000 (or 65536) bytes.
//
// Since each object file has its own .got2, %r30 refers to different
// places in a merged .got2 for two functions that came from different
// input files. Therefore, %r30 makes sense only within a single function.
//
// Technically, we can reuse a %r30 value in our PLT if we create a PLT
// _for each input file_ (that's what GNU ld seems to be doing), but that
// doesn't seems to be worth its complexity. Our PLT simply doesn't rely
// on a %r30 value.
//
// https://github.com/rui314/psabi/blob/main/ppc32.pdf
#include "third_party/mold/elf/mold.h"
namespace mold::elf {
using E = PPC32;
static u64 lo(u64 x) { return x & 0xffff; }
static u64 hi(u64 x) { return x >> 16; }
static u64 ha(u64 x) { return (x + 0x8000) >> 16; }
static u64 high(u64 x) { return (x >> 16) & 0xffff; }
static u64 higha(u64 x) { return ((x + 0x8000) >> 16) & 0xffff; }
template <>
void write_plt_header(Context<E> &ctx, u8 *buf) {
static const ub32 insn[] = {
// Get the address of this PLT section
0x7c08'02a6, // mflr r0
0x429f'0005, // bcl 20, 31, 4
0x7d88'02a6, // 1: mflr r12
0x7c08'03a6, // mtlr r0
// Compute the runtime address of GOTPLT+12
0x3d8c'0000, // addis r12, r12, (GOTPLT - 1b)@higha
0x398c'0000, // addi r12, r12, (GOTPLT - 1b)@lo
// Compute the PLT entry offset
0x7d6c'5850, // sub r11, r11, r12
0x1d6b'0003, // mulli r11, r11, 3
// Load GOTPLT[2] and branch to GOTPLT[1]
0x800c'fff8, // lwz r0, -8(r12)
0x7c09'03a6, // mtctr r0
0x818c'fffc, // lwz r12, -4(r12)
0x4e80'0420, // bctr
0x6000'0000, // nop
0x6000'0000, // nop
0x6000'0000, // nop
0x6000'0000, // nop
};
static_assert(sizeof(insn) == E::plt_hdr_size);
memcpy(buf, insn, sizeof(insn));
ub32 *loc = (ub32 *)buf;
loc[4] |= higha(ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr + 4);
loc[5] |= lo(ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr + 4);
}
static const ub32 plt_entry[] = {
// Get the address of this PLT entry
0x7c08'02a6, // mflr r0
0x429f'0005, // bcl 20, 31, 4
0x7d88'02a6, // mflr r12
0x7c08'03a6, // mtlr r0
// Load an address from the GOT/GOTPLT entry and jump to that address
0x3d6c'0000, // addis r11, r12, OFFSET@higha
0x396b'0000, // addi r11, r11, OFFSET@lo
0x818b'0000, // lwz r12, 0(r11)
0x7d89'03a6, // mtctr r12
0x4e80'0420, // bctr
};
template <>
void write_plt_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
static_assert(E::plt_size == sizeof(plt_entry));
memcpy(buf, plt_entry, sizeof(plt_entry));
ub32 *loc = (ub32 *)buf;
i64 offset = sym.get_gotplt_addr(ctx) - sym.get_plt_addr(ctx) - 8;
loc[4] |= higha(offset);
loc[5] |= lo(offset);
}
template <>
void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
static_assert(E::pltgot_size == sizeof(plt_entry));
memcpy(buf, plt_entry, sizeof(plt_entry));
ub32 *loc = (ub32 *)buf;
i64 offset = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 8;
loc[4] |= higha(offset);
loc[5] |= lo(offset);
}
template <>
void EhFrameSection<E>::apply_reloc(Context<E> &ctx, const ElfRel<E> &rel,
u64 offset, u64 val) {
u8 *loc = ctx.buf + this->shdr.sh_offset + offset;
switch (rel.r_type) {
case R_NONE:
break;
case R_PPC_ADDR32:
*(ub32 *)loc = val;
break;
case R_PPC_REL32:
*(ub32 *)loc = val - this->shdr.sh_addr - offset;
break;
default:
Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel;
}
}
template <>
void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
std::span<const ElfRel<E>> rels = get_rels(ctx);
ElfRel<E> *dynrel = nullptr;
if (ctx.reldyn)
dynrel = (ElfRel<E> *)(ctx.buf + ctx.reldyn->shdr.sh_offset +
file.reldyn_offset + this->reldyn_offset);
u64 GOT2 = file.ppc32_got2 ? file.ppc32_got2->get_addr() : 0;
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE)
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
u8 *loc = base + rel.r_offset;
u64 S = sym.get_addr(ctx);
u64 A = rel.r_addend;
u64 P = get_addr() + rel.r_offset;
u64 G = sym.get_got_idx(ctx) * sizeof(Word<E>);
u64 GOT = ctx.got->shdr.sh_addr;
switch (rel.r_type) {
case R_PPC_ADDR32:
case R_PPC_UADDR32:
apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel);
break;
case R_PPC_ADDR14:
*(ub32 *)loc |= bits(S + A, 15, 2) << 2;
break;
case R_PPC_ADDR16:
case R_PPC_UADDR16:
case R_PPC_ADDR16_LO:
*(ub16 *)loc = lo(S + A);
break;
case R_PPC_ADDR16_HI:
*(ub16 *)loc = hi(S + A);
break;
case R_PPC_ADDR16_HA:
*(ub16 *)loc = ha(S + A);
break;
case R_PPC_ADDR24:
*(ub32 *)loc |= bits(S + A, 25, 2) << 2;
break;
case R_PPC_ADDR30:
*(ub32 *)loc |= bits(S + A, 31, 2) << 2;
break;
case R_PPC_PLT16_LO:
*(ub16 *)loc = lo(G + GOT - A - GOT2);
break;
case R_PPC_PLT16_HI:
*(ub16 *)loc = hi(G + GOT - A - GOT2);
break;
case R_PPC_PLT16_HA:
*(ub16 *)loc = ha(G + GOT - A - GOT2);
break;
case R_PPC_PLT32:
*(ub32 *)loc = G + GOT - A - GOT2;
break;
case R_PPC_REL14:
*(ub32 *)loc |= bits(S + A - P, 15, 2) << 2;
break;
case R_PPC_REL16:
case R_PPC_REL16_LO:
*(ub16 *)loc = lo(S + A - P);
break;
case R_PPC_REL16_HI:
*(ub16 *)loc = hi(S + A - P);
break;
case R_PPC_REL16_HA:
*(ub16 *)loc = ha(S + A - P);
break;
case R_PPC_REL24:
case R_PPC_LOCAL24PC: {
i64 val = S + A - P;
if (sign_extend(val, 25) != val)
val = get_thunk_addr(i) - P;
*(ub32 *)loc |= bits(val, 25, 2) << 2;
break;
}
case R_PPC_PLTREL24: {
i64 val = S - P;
if (sym.has_plt(ctx) || sign_extend(val, 25) != val)
val = get_thunk_addr(i) - P;
*(ub32 *)loc |= bits(val, 25, 2) << 2;
break;
}
case R_PPC_REL32:
case R_PPC_PLTREL32:
*(ub32 *)loc = S + A - P;
break;
case R_PPC_GOT16:
case R_PPC_GOT16_LO:
*(ub16 *)loc = lo(G + A);
break;
case R_PPC_GOT16_HI:
*(ub16 *)loc = hi(G + A);
break;
case R_PPC_GOT16_HA:
*(ub16 *)loc = ha(G + A);
break;
case R_PPC_TPREL16_LO:
*(ub16 *)loc = lo(S + A - ctx.tp_addr);
break;
case R_PPC_TPREL16_HI:
*(ub16 *)loc = hi(S + A - ctx.tp_addr);
break;
case R_PPC_TPREL16_HA:
*(ub16 *)loc = ha(S + A - ctx.tp_addr);
break;
case R_PPC_DTPREL16_LO:
*(ub16 *)loc = lo(S + A - ctx.dtp_addr);
break;
case R_PPC_DTPREL16_HI:
*(ub16 *)loc = hi(S + A - ctx.dtp_addr);
break;
case R_PPC_DTPREL16_HA:
*(ub16 *)loc = ha(S + A - ctx.dtp_addr);
break;
case R_PPC_GOT_TLSGD16:
*(ub16 *)loc = sym.get_tlsgd_addr(ctx) - GOT;
break;
case R_PPC_GOT_TLSLD16:
*(ub16 *)loc = ctx.got->get_tlsld_addr(ctx) - GOT;
break;
case R_PPC_GOT_TPREL16:
*(ub16 *)loc = sym.get_gottp_addr(ctx) - GOT;
break;
case R_PPC_TLS:
case R_PPC_TLSGD:
case R_PPC_TLSLD:
case R_PPC_PLTSEQ:
case R_PPC_PLTCALL:
break;
default:
unreachable();
}
}
}
template <>
void InputSection<E>::apply_reloc_nonalloc(Context<E> &ctx, u8 *base) {
std::span<const ElfRel<E>> rels = get_rels(ctx);
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
u8 *loc = base + rel.r_offset;
SectionFragment<E> *frag;
i64 frag_addend;
std::tie(frag, frag_addend) = get_fragment(ctx, rel);
u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx);
u64 A = frag ? frag_addend : (i64)rel.r_addend;
switch (rel.r_type) {
case R_PPC_ADDR32:
if (std::optional<u64> val = get_tombstone(sym, frag))
*(ub32 *)loc = *val;
else
*(ub32 *)loc = S + A;
break;
default:
Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: "
<< rel;
}
}
}
template <>
void InputSection<E>::scan_relocations(Context<E> &ctx) {
assert(shdr().sh_flags & SHF_ALLOC);
this->reldyn_offset = file.num_dynrel * sizeof(ElfRel<E>);
std::span<const ElfRel<E>> rels = get_rels(ctx);
// Scan relocations
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
if (sym.is_ifunc())
sym.flags |= NEEDS_GOT | NEEDS_PLT;
switch (rel.r_type) {
case R_PPC_ADDR32:
case R_PPC_UADDR32:
scan_dyn_absrel(ctx, sym, rel);
break;
case R_PPC_ADDR14:
case R_PPC_ADDR16:
case R_PPC_UADDR16:
case R_PPC_ADDR16_LO:
case R_PPC_ADDR16_HI:
case R_PPC_ADDR16_HA:
case R_PPC_ADDR24:
case R_PPC_ADDR30:
scan_absrel(ctx, sym, rel);
break;
case R_PPC_REL14:
case R_PPC_REL16:
case R_PPC_REL16_LO:
case R_PPC_REL16_HI:
case R_PPC_REL16_HA:
case R_PPC_REL32:
scan_pcrel(ctx, sym, rel);
break;
case R_PPC_GOT16:
case R_PPC_GOT16_LO:
case R_PPC_GOT16_HI:
case R_PPC_GOT16_HA:
case R_PPC_PLT16_LO:
case R_PPC_PLT16_HI:
case R_PPC_PLT16_HA:
case R_PPC_PLT32:
sym.flags |= NEEDS_GOT;
break;
case R_PPC_REL24:
case R_PPC_PLTREL24:
case R_PPC_PLTREL32:
if (sym.is_imported)
sym.flags |= NEEDS_PLT;
break;
case R_PPC_GOT_TLSGD16:
sym.flags |= NEEDS_TLSGD;
break;
case R_PPC_GOT_TLSLD16:
ctx.needs_tlsld = true;
break;
case R_PPC_GOT_TPREL16:
sym.flags |= NEEDS_GOTTP;
break;
case R_PPC_TPREL16_LO:
case R_PPC_TPREL16_HI:
case R_PPC_TPREL16_HA:
check_tlsle(ctx, sym, rel);
break;
case R_PPC_LOCAL24PC:
case R_PPC_TLS:
case R_PPC_TLSGD:
case R_PPC_TLSLD:
case R_PPC_DTPREL16_LO:
case R_PPC_DTPREL16_HI:
case R_PPC_DTPREL16_HA:
case R_PPC_PLTSEQ:
case R_PPC_PLTCALL:
break;
default:
Error(ctx) << *this << ": unknown relocation: " << rel;
}
}
}
template <>
void RangeExtensionThunk<E>::copy_buf(Context<E> &ctx) {
u8 *buf = ctx.buf + output_section.shdr.sh_offset + offset;
static const ub32 local_thunk[] = {
// Get this thunk's address
0x7c08'02a6, // mflr r0
0x429f'0005, // bcl 20, 31, 4
0x7d88'02a6, // mflr r12
0x7c08'03a6, // mtlr r0
// Materialize the destination's address in %r11 and jump to that address
0x3d6c'0000, // addis r11, r12, OFFSET@higha
0x396b'0000, // addi r11, r11, OFFSET@lo
0x7d69'03a6, // mtctr r11
0x4e80'0420, // bctr
0x6000'0000, // nop
};
static_assert(E::thunk_size == sizeof(plt_entry));
static_assert(E::thunk_size == sizeof(local_thunk));
for (i64 i = 0; i < symbols.size(); i++) {
ub32 *loc = (ub32 *)(buf + i * E::thunk_size);
Symbol<E> &sym = *symbols[i];
if (sym.has_plt(ctx)) {
memcpy(loc, plt_entry, sizeof(plt_entry));
u64 got = sym.has_got(ctx) ? sym.get_got_addr(ctx) : sym.get_gotplt_addr(ctx);
i64 val = got - get_addr(i) - 8;
loc[4] |= higha(val);
loc[5] |= lo(val);
} else {
memcpy(loc, local_thunk, sizeof(local_thunk));
i64 val = sym.get_addr(ctx) - get_addr(i) - 8;
loc[4] |= higha(val);
loc[5] |= lo(val);
}
}
}
} // namespace mold::elf

687
third_party/mold/elf/arch-ppc64v1.cc vendored Normal file
View file

@ -0,0 +1,687 @@
// clang-format off
// This file contains code for the 64-bit PowerPC ELFv1 ABI that is
// commonly used for big-endian PPC systems. Modern PPC systems that use
// the processor in the little-endian mode use the ELFv2 ABI instead. For
// ELFv2, see arch-ppc64v2.cc.
//
// Even though they are similiar, ELFv1 isn't only different from ELFv2 in
// endianness. The most notable difference is, in ELFv1, a function
// pointer doesn't directly refer to the entry point of a function but
// instead refers to a data structure so-called "function descriptor".
//
// The function descriptor is essentially a pair of a function entry point
// address and a value that should be set to %r2 before calling that
// function. There is also a third member for "the environment pointer for
// languages such as Pascal and PL/1" according to the psABI, but it looks
// like no one acutally uses it. In total, the function descriptor is 24
// bytes long. Here is why we need it.
//
// PPC generally lacks PC-relative data access instructions. Position-
// independent code sets GOT + 0x8000 to %r2 and access global variables
// relative to %r2.
//
// Each ELF file has its own GOT. If a function calls another function in
// the same ELF file, it doesn't have to reset %r2. However, if it is in
// other file (e.g. other .so), it has to set a new value to %r2 so that
// the register contains the callee's GOT + 0x8000.
//
// In this way, you can't call a function just by knowing the function's
// entry point address. You also need to know a proper %r2 value for the
// function. This is why a function pointer refers to a tuple of an
// address and a %r2 value.
//
// If a function call is made through PLT, PLT takes care of restoring %r2.
// Therefore, the caller has to restore %r2 only for function calls
// through function pointers.
//
// .opd (short for "official procedure descriptors") contains function
// descriptors.
//
// You can think OPD as this: even in other targets, a function can have a
// few different addresses for different purposes. It may not only have an
// entry point address but may also have PLT and/or GOT addresses.
// In PPCV1, it may have an OPD address in addition to these. OPD address
// is used for relocations that refers to the address of a function as a
// function pointer.
//
// https://github.com/rui314/psabi/blob/main/ppc64v1.pdf
#include "third_party/mold/elf/mold.h"
#include "third_party/libcxx/algorithm"
// MISSING #include <tbb/parallel_for_each.h>
namespace mold::elf {
using E = PPC64V1;
static u64 lo(u64 x) { return x & 0xffff; }
static u64 hi(u64 x) { return x >> 16; }
static u64 ha(u64 x) { return (x + 0x8000) >> 16; }
static u64 high(u64 x) { return (x >> 16) & 0xffff; }
static u64 higha(u64 x) { return ((x + 0x8000) >> 16) & 0xffff; }
// .plt is used only for lazy symbol resolution on PPC64. All PLT
// calls are made via range extension thunks even if they are within
// reach. Thunks read addresses from .got.plt and jump there.
// Therefore, once PLT symbols are resolved and final addresses are
// written to .got.plt, thunks just skip .plt and directly jump to the
// resolved addresses.
template <>
void write_plt_header(Context<E> &ctx, u8 *buf) {
static const ub32 insn[] = {
0x7d88'02a6, // mflr r12
0x429f'0005, // bcl 20, 31, 4 // obtain PC
0x7d68'02a6, // mflr r11
0xe84b'0024, // ld r2,36(r11)
0x7d88'03a6, // mtlr r12
0x7d62'5a14, // add r11,r2,r11
0xe98b'0000, // ld r12,0(r11)
0xe84b'0008, // ld r2,8(r11)
0x7d89'03a6, // mtctr r12
0xe96b'0010, // ld r11,16(r11)
0x4e80'0420, // bctr
// .quad .got.plt - .plt - 8
0x0000'0000,
0x0000'0000,
};
static_assert(sizeof(insn) == E::plt_hdr_size);
memcpy(buf, insn, sizeof(insn));
*(ub64 *)(buf + 44) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr - 8;
}
template <>
void write_plt_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
ub32 *loc = (ub32 *)buf;
i64 idx = sym.get_plt_idx(ctx);
// The PPC64 ELFv1 ABI requires PLT entries to be vary in size depending
// on their indices. Unlike other targets, .got.plt is filled not by us
// but by the loader, so we don't have a control over where the initial
// call to the PLT entry jumps to. So we need to strictly follow the PLT
// section layout as the loader expect it to be.
if (idx < 0x8000) {
static const ub32 insn[] = {
0x3800'0000, // li r0, PLT_INDEX
0x4b00'0000, // b plt0
};
memcpy(loc, insn, sizeof(insn));
loc[0] |= idx;
loc[1] |= (ctx.plt->shdr.sh_addr - sym.get_plt_addr(ctx) - 4) & 0x00ff'ffff;
} else {
static const ub32 insn[] = {
0x3c00'0000, // lis r0, PLT_INDEX@high
0x6000'0000, // ori r0, r0, PLT_INDEX@lo
0x4b00'0000, // b plt0
};
memcpy(loc, insn, sizeof(insn));
loc[0] |= high(idx);
loc[1] |= lo(idx);
loc[2] |= (ctx.plt->shdr.sh_addr - sym.get_plt_addr(ctx) - 8) & 0x00ff'ffff;
}
}
// .plt.got is not necessary on PPC64 because range extension thunks
// directly read GOT entries and jump there.
template <>
void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {}
template <>
void EhFrameSection<E>::apply_reloc(Context<E> &ctx, const ElfRel<E> &rel,
u64 offset, u64 val) {
u8 *loc = ctx.buf + this->shdr.sh_offset + offset;
switch (rel.r_type) {
case R_NONE:
break;
case R_PPC64_ADDR64:
*(ub64 *)loc = val;
break;
case R_PPC64_REL32:
*(ub32 *)loc = val - this->shdr.sh_addr - offset;
break;
case R_PPC64_REL64:
*(ub64 *)loc = val - this->shdr.sh_addr - offset;
break;
default:
Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel;
}
}
template <>
void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
std::span<const ElfRel<E>> rels = get_rels(ctx);
ElfRel<E> *dynrel = nullptr;
if (ctx.reldyn)
dynrel = (ElfRel<E> *)(ctx.buf + ctx.reldyn->shdr.sh_offset +
file.reldyn_offset + this->reldyn_offset);
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE)
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
u8 *loc = base + rel.r_offset;
auto check = [&](i64 val, i64 lo, i64 hi) {
if (val < lo || hi <= val)
Error(ctx) << *this << ": relocation " << rel << " against "
<< sym << " out of range: " << val << " is not in ["
<< lo << ", " << hi << ")";
};
u64 S = sym.get_addr(ctx);
u64 A = rel.r_addend;
u64 P = get_addr() + rel.r_offset;
u64 G = sym.get_got_idx(ctx) * sizeof(Word<E>);
u64 GOT = ctx.got->shdr.sh_addr;
u64 TOC = ctx.extra.TOC->value;
switch (rel.r_type) {
case R_PPC64_ADDR64:
apply_toc_rel(ctx, sym, rel, loc, S, A, P, dynrel);
break;
case R_PPC64_TOC:
apply_toc_rel(ctx, *ctx.extra.TOC, rel, loc, TOC, A, P, dynrel);
break;
case R_PPC64_TOC16_HA:
*(ub16 *)loc = ha(S + A - TOC);
break;
case R_PPC64_TOC16_LO:
*(ub16 *)loc = lo(S + A - TOC);
break;
case R_PPC64_TOC16_DS:
check(S + A - TOC, -(1 << 15), 1 << 15);
*(ub16 *)loc |= (S + A - TOC) & 0xfffc;
break;
case R_PPC64_TOC16_LO_DS:
*(ub16 *)loc |= (S + A - TOC) & 0xfffc;
break;
case R_PPC64_REL24: {
i64 val = sym.get_addr(ctx, NO_OPD) + A - P;
if (sym.has_plt(ctx) || sign_extend(val, 25) != val)
val = get_thunk_addr(i) + A - P;
check(val, -(1 << 25), 1 << 25);
*(ub32 *)loc |= bits(val, 25, 2) << 2;
// If a callee is an external function, PLT saves %r2 to the
// caller's r2 save slot. We need to restore it after function
// return. To do so, there's usually a NOP as a placeholder
// after a BL. 0x6000'0000 is a NOP.
if (sym.has_plt(ctx) && *(ub32 *)(loc + 4) == 0x6000'0000)
*(ub32 *)(loc + 4) = 0xe841'0028; // ld r2, 40(r1)
break;
}
case R_PPC64_REL32:
*(ub32 *)loc = S + A - P;
break;
case R_PPC64_REL64:
*(ub64 *)loc = S + A - P;
break;
case R_PPC64_REL16_HA:
*(ub16 *)loc = ha(S + A - P);
break;
case R_PPC64_REL16_LO:
*(ub16 *)loc = lo(S + A - P);
break;
case R_PPC64_PLT16_HA:
*(ub16 *)loc = ha(G + GOT - TOC);
break;
case R_PPC64_PLT16_HI:
*(ub16 *)loc = hi(G + GOT - TOC);
break;
case R_PPC64_PLT16_LO:
*(ub16 *)loc = lo(G + GOT - TOC);
break;
case R_PPC64_PLT16_LO_DS:
*(ub16 *)loc |= (G + GOT - TOC) & 0xfffc;
break;
case R_PPC64_GOT_TPREL16_HA:
*(ub16 *)loc = ha(sym.get_gottp_addr(ctx) - TOC);
break;
case R_PPC64_GOT_TLSGD16_HA:
*(ub16 *)loc = ha(sym.get_tlsgd_addr(ctx) - TOC);
break;
case R_PPC64_GOT_TLSGD16_LO:
*(ub16 *)loc = lo(sym.get_tlsgd_addr(ctx) - TOC);
break;
case R_PPC64_GOT_TLSLD16_HA:
*(ub16 *)loc = ha(ctx.got->get_tlsld_addr(ctx) - TOC);
break;
case R_PPC64_GOT_TLSLD16_LO:
*(ub16 *)loc = lo(ctx.got->get_tlsld_addr(ctx) - TOC);
break;
case R_PPC64_DTPREL16_HA:
*(ub16 *)loc = ha(S + A - ctx.dtp_addr);
break;
case R_PPC64_DTPREL16_LO:
*(ub16 *)loc = lo(S + A - ctx.dtp_addr);
break;
case R_PPC64_TPREL16_HA:
*(ub16 *)loc = ha(S + A - ctx.tp_addr);
break;
case R_PPC64_TPREL16_LO:
*(ub16 *)loc = lo(S + A - ctx.tp_addr);
break;
case R_PPC64_GOT_TPREL16_LO_DS:
*(ub16 *)loc |= (sym.get_gottp_addr(ctx) - TOC) & 0xfffc;
break;
case R_PPC64_PLTSEQ:
case R_PPC64_PLTCALL:
case R_PPC64_TLS:
case R_PPC64_TLSGD:
case R_PPC64_TLSLD:
break;
default:
unreachable();
}
}
}
template <>
void InputSection<E>::apply_reloc_nonalloc(Context<E> &ctx, u8 *base) {
std::span<const ElfRel<E>> rels = get_rels(ctx);
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
u8 *loc = base + rel.r_offset;
auto check = [&](i64 val, i64 lo, i64 hi) {
if (val < lo || hi <= val)
Error(ctx) << *this << ": relocation " << rel << " against "
<< sym << " out of range: " << val << " is not in ["
<< lo << ", " << hi << ")";
};
SectionFragment<E> *frag;
i64 frag_addend;
std::tie(frag, frag_addend) = get_fragment(ctx, rel);
u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx);
u64 A = frag ? frag_addend : (i64)rel.r_addend;
switch (rel.r_type) {
case R_PPC64_ADDR64:
if (std::optional<u64> val = get_tombstone(sym, frag))
*(ub64 *)loc = *val;
else
*(ub64 *)loc = S + A;
break;
case R_PPC64_ADDR32: {
i64 val = S + A;
check(val, 0, 1LL << 32);
*(ub32 *)loc = val;
break;
}
case R_PPC64_DTPREL64:
*(ub64 *)loc = S + A - ctx.dtp_addr;
break;
default:
Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: "
<< rel;
}
}
}
template <>
void InputSection<E>::scan_relocations(Context<E> &ctx) {
assert(shdr().sh_flags & SHF_ALLOC);
this->reldyn_offset = file.num_dynrel * sizeof(ElfRel<E>);
std::span<const ElfRel<E>> rels = get_rels(ctx);
// Scan relocations
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
if (sym.is_ifunc())
sym.flags |= NEEDS_GOT | NEEDS_PLT | NEEDS_PPC_OPD;
// Any relocation except R_PPC64_REL24 is considered as an
// address-taking relocation.
if (rel.r_type != R_PPC64_REL24 && sym.get_type() == STT_FUNC)
sym.flags |= NEEDS_PPC_OPD;
switch (rel.r_type) {
case R_PPC64_ADDR64:
case R_PPC64_TOC:
scan_toc_rel(ctx, sym, rel);
break;
case R_PPC64_GOT_TPREL16_HA:
sym.flags |= NEEDS_GOTTP;
break;
case R_PPC64_REL24:
if (sym.is_imported)
sym.flags |= NEEDS_PLT;
break;
case R_PPC64_PLT16_HA:
sym.flags |= NEEDS_GOT;
break;
case R_PPC64_GOT_TLSGD16_HA:
sym.flags |= NEEDS_TLSGD;
break;
case R_PPC64_GOT_TLSLD16_HA:
ctx.needs_tlsld = true;
break;
case R_PPC64_TPREL16_HA:
case R_PPC64_TPREL16_LO:
check_tlsle(ctx, sym, rel);
break;
case R_PPC64_REL32:
case R_PPC64_REL64:
case R_PPC64_TOC16_HA:
case R_PPC64_TOC16_LO:
case R_PPC64_TOC16_LO_DS:
case R_PPC64_TOC16_DS:
case R_PPC64_REL16_HA:
case R_PPC64_REL16_LO:
case R_PPC64_PLT16_HI:
case R_PPC64_PLT16_LO:
case R_PPC64_PLT16_LO_DS:
case R_PPC64_PLTSEQ:
case R_PPC64_PLTCALL:
case R_PPC64_GOT_TPREL16_LO_DS:
case R_PPC64_GOT_TLSGD16_LO:
case R_PPC64_GOT_TLSLD16_LO:
case R_PPC64_TLS:
case R_PPC64_TLSGD:
case R_PPC64_TLSLD:
case R_PPC64_DTPREL16_HA:
case R_PPC64_DTPREL16_LO:
break;
default:
Error(ctx) << *this << ": unknown relocation: " << rel;
}
}
}
template <>
void RangeExtensionThunk<E>::copy_buf(Context<E> &ctx) {
u8 *buf = ctx.buf + output_section.shdr.sh_offset + offset;
// If the destination is .plt.got, we save the current r2, read an
// address of a function descriptor from .got, restore %r2 and jump
// to the function.
static const ub32 pltgot_thunk[] = {
// Store the caller's %r2
0xf841'0028, // std %r2, 40(%r1)
// Load an address of a function descriptor
0x3d82'0000, // addis %r12, %r2, foo@got@toc@ha
0xe98c'0000, // ld %r12, foo@got@toc@lo(%r12)
// Restore the callee's %r2
0xe84c'0008, // ld %r2, 8(%r12)
// Jump to the function
0xe98c'0000, // ld %r12, 0(%r12)
0x7d89'03a6, // mtctr %r12
0x4e80'0420, // bctr
};
// If the destination is .plt, read a function descriptor from .got.plt.
static const ub32 plt_thunk[] = {
// Store the caller's %r2
0xf841'0028, // std %r2, 40(%r1)
// Materialize an address of a function descriptor
0x3d82'0000, // addis %r12, %r2, foo@gotplt@toc@ha
0x398c'0000, // addi %r12, %r12, foo@gotplt@toc@lo
// Restore the callee's %r2
0xe84c'0008, // ld %r2, 8(%r12)
// Jump to the function
0xe98c'0000, // ld %r12, 0(%r12)
0x7d89'03a6, // mtctr %r12
0x4e80'0420, // bctr
};
// If the destination is a non-imported function, we directly jump
// to the function entry address.
static const ub32 local_thunk[] = {
0x3d82'0000, // addis r12, r2, foo@toc@ha
0x398c'0000, // addi r12, r12, foo@toc@lo
0x7d89'03a6, // mtctr r12
0x4e80'0420, // bctr
0x6000'0000, // nop
0x6000'0000, // nop
0x6000'0000, // nop
};
static_assert(E::thunk_size == sizeof(pltgot_thunk));
static_assert(E::thunk_size == sizeof(plt_thunk));
static_assert(E::thunk_size == sizeof(local_thunk));
for (i64 i = 0; i < symbols.size(); i++) {
Symbol<E> &sym = *symbols[i];
ub32 *loc = (ub32 *)(buf + i * E::thunk_size);
if (sym.has_got(ctx)) {
memcpy(loc, pltgot_thunk, sizeof(pltgot_thunk));
i64 val = sym.get_got_addr(ctx) - ctx.extra.TOC->value;
loc[1] |= higha(val);
loc[2] |= lo(val);
} else if(sym.has_plt(ctx)) {
memcpy(loc, plt_thunk, sizeof(plt_thunk));
i64 val = sym.get_gotplt_addr(ctx) - ctx.extra.TOC->value;
loc[1] |= higha(val);
loc[2] |= lo(val);
} else {
memcpy(loc, local_thunk, sizeof(local_thunk));
i64 val = sym.get_addr(ctx, NO_OPD) - ctx.extra.TOC->value;
loc[0] |= higha(val);
loc[1] |= lo(val);
}
}
}
static InputSection<E> *get_opd_section(ObjectFile<E> &file) {
for (std::unique_ptr<InputSection<E>> &isec : file.sections)
if (isec && isec->name() == ".opd")
return isec.get();
return nullptr;
}
static ElfRel<E> *
get_relocation_at(Context<E> &ctx, InputSection<E> &isec, i64 offset) {
std::span<ElfRel<E>> rels = isec.get_rels(ctx);
auto it = std::lower_bound(rels.begin(), rels.end(), offset,
[](const ElfRel<E> &r, i64 offset) {
return r.r_offset < offset;
});
if (it == rels.end())
return nullptr;
if (it->r_offset != offset)
return nullptr;
return &*it;
}
struct OpdSymbol {
bool operator<(const OpdSymbol &x) const { return r_offset < x.r_offset; }
u64 r_offset = 0;
Symbol<E> *sym = nullptr;
};
static Symbol<E> *
get_opd_sym_at(Context<E> &ctx, std::span<OpdSymbol> syms, u64 offset) {
auto it = std::lower_bound(syms.begin(), syms.end(), OpdSymbol{offset});
if (it == syms.end())
return nullptr;
if (it->r_offset != offset)
return nullptr;
return it->sym;
}
// Compiler creates an .opd entry for each function symbol. The intention
// is to make it possible to create an output .opd section just by linking
// input .opd sections in the same manner as we do to other normal input
// sections.
//
// However, in reality, .opd isn't a normal input section. It needs many
// special treatments as follows:
//
// 1. A function symbol refers to not a .text but an .opd. Its address
// works fine for address-taking relocations such as R_PPC64_ADDR64.
// However, R_PPC64_REL24 (which is used for branch instruction) needs
// a function's real address instead of the function's .opd address.
// We need to read .opd contents to find out a function entry point
// address to apply R_PPC64_REL24.
//
// 2. Output .opd entries are needed only for functions whose addresses
// are taken. Just copying input .opd sections to an output would
// produces lots of dead .opd entries.
//
// 3. In this design, all function symbols refer to an .opd section, and
// that doesn't work well with graph traversal optimizations such as
// garbage collection or identical comdat folding. For example, garbage
// collector would mark an .opd alive which in turn mark all functions
// thatare referenced by .opd as alive, effectively keeping all
// functions as alive.
//
// The problem is that the compiler creates a half-baked .opd section, and
// the linker has to figure out what all these .opd entries and
// relocations are trying to achieve. It's like the compiler would emit a
// half-baked .plt section in an object file and the linker has to deal
// with that. That's not a good design.
//
// So, in this function, we undo what the compiler did to .opd. We remove
// function symbols from .opd and reattach them to their function entry
// points. We also rewrite relocations that directly refer to an input
// .opd section so that they refer to function symbols instead. We then
// mark input .opd sections as dead.
//
// After this function, we mark symbols with the NEEDS_PPC_OPD flag if the
// symbol needs an .opd entry. We then create an output .opd just like we
// do for .plt or .got.
void ppc64v1_rewrite_opd(Context<E> &ctx) {
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
InputSection<E> *opd = get_opd_section(*file);
if (!opd)
return;
opd->is_alive = false;
// Move symbols from .opd to .text.
std::vector<OpdSymbol> opd_syms;
for (Symbol<E> *sym : file->symbols) {
if (sym->file != file || sym->get_input_section() != opd)
continue;
if (u32 ty = sym->get_type(); ty != STT_FUNC && ty != STT_GNU_IFUNC)
continue;
ElfRel<E> *rel = get_relocation_at(ctx, *opd, sym->value);
if (!rel)
Fatal(ctx) << *file << ": cannot find a relocation in .opd for "
<< *sym << " at offset 0x" << std::hex << (u64)sym->value;
Symbol<E> *sym2 = file->symbols[rel->r_sym];
if (sym2->get_type() != STT_SECTION)
Fatal(ctx) << *file << ": bad relocation in .opd referring " << *sym2;
opd_syms.push_back({sym->value, sym});
sym->set_input_section(sym2->get_input_section());
sym->value = rel->r_addend;
}
// Sort symbols so that get_opd_sym_at() can do binary search.
sort(opd_syms);
// Rewrite relocations so that they directly refer to .opd.
for (std::unique_ptr<InputSection<E>> &isec : file->sections) {
if (!isec || !isec->is_alive || isec.get() == opd)
continue;
for (ElfRel<E> &r : isec->get_rels(ctx)) {
Symbol<E> &sym = *file->symbols[r.r_sym];
if (sym.get_input_section() != opd)
continue;
Symbol<E> *real_sym = get_opd_sym_at(ctx, opd_syms, r.r_addend);
if (!real_sym)
Fatal(ctx) << *isec << ": cannot find a symbol in .opd for " << r
<< " at offset 0x" << std::hex << (u64)r.r_addend;
r.r_sym = real_sym->sym_idx;
r.r_addend = 0;
}
}
});
}
// When a function is exported, the dynamic symbol for the function should
// refers to the function's .opd entry. This function marks such symbols
// with NEEDS_PPC_OPD.
void ppc64v1_scan_symbols(Context<E> &ctx) {
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
for (Symbol<E> *sym : file->symbols)
if (sym->file == file && sym->is_exported)
if (u32 ty = sym->get_type(); ty == STT_FUNC || ty == STT_GNU_IFUNC)
sym->flags |= NEEDS_PPC_OPD;
});
// Functions referenced by the ELF header also have to have .opd entries.
auto mark = [&](std::string_view name) {
if (!name.empty())
if (Symbol<E> &sym = *get_symbol(ctx, name); !sym.is_imported)
sym.flags |= NEEDS_PPC_OPD;
};
mark(ctx.arg.entry);
mark(ctx.arg.init);
mark(ctx.arg.fini);
}
void PPC64OpdSection::add_symbol(Context<E> &ctx, Symbol<E> *sym) {
sym->set_opd_idx(ctx, symbols.size());
symbols.push_back(sym);
this->shdr.sh_size += ENTRY_SIZE;
}
i64 PPC64OpdSection::get_reldyn_size(Context<E> &ctx) const {
if (ctx.arg.pic)
return symbols.size() * 2;
return 0;
}
void PPC64OpdSection::copy_buf(Context<E> &ctx) {
ub64 *buf = (ub64 *)(ctx.buf + this->shdr.sh_offset);
ElfRel<E> *rel = nullptr;
if (ctx.arg.pic)
rel = (ElfRel<E> *)(ctx.buf + ctx.reldyn->shdr.sh_offset + reldyn_offset);
for (Symbol<E> *sym : symbols) {
u64 addr = sym->get_addr(ctx, NO_PLT | NO_OPD);
*buf++ = addr;
*buf++ = ctx.extra.TOC->value;
*buf++ = 0;
if (ctx.arg.pic) {
u64 loc = sym->get_opd_addr(ctx);
*rel++ = ElfRel<E>(loc, E::R_RELATIVE, 0, addr);
*rel++ = ElfRel<E>(loc + 8, E::R_RELATIVE, 0, ctx.extra.TOC->value);
}
}
}
} // namespace mold::elf

555
third_party/mold/elf/arch-ppc64v2.cc vendored Normal file
View file

@ -0,0 +1,555 @@
// clang-format off
// This file implements the PowerPC ELFv2 ABI which was standardized in
// 2014. Modern little-endian PowerPC systems are based on this ABI.
// The ABI is often referred to as "ppc64le". This shouldn't be confused
// with "ppc64" which refers to the original, big-endian PowerPC systems.
//
// PPC64 is a bit tricky to support because PC-relative load/store
// instructions hadn't been available until Power10 which debuted in 2021.
// Prior to Power10, it wasn't trivial for position-independent code (PIC)
// to load a value from, for example, .got, as we can't do that with [PC +
// the offset to the .got entry].
//
// In the following, I'll explain how PIC is supported on pre-Power10
// systems first and then explain what has changed with Power10.
//
//
// Position-independent code on Power9 or earlier:
//
// We can get the program counter on older PPC64 systems with the
// following four instructions
//
// mflr r1 // save the current link register to r1
// bl .+4 // branch to the next instruction as if it were a function
// mflr r0 // copy the return address to r0
// mtlr r1 // restore the original link register value
//
// , but it's too expensive to do if we do this for each load/store.
//
// As a workaround, most functions are compiled in such a way that r2 is
// assumed to always contain the address of .got + 0x8000. With this, we
// can for example load the first entry of .got with a single instruction
// `lw r0, -0x8000(r2)`. r2 is called the TOC pointer.
//
// There's only one .got for each ELF module. Therefore, if a callee is in
// the same ELF module, r2 doesn't have to be recomputed. Most function
// calls are usually within the same ELF module, so this mechanism is
// efficient.
//
// A function compiled for pre-Power10 usually has two entry points,
// global and local. The global entry point usually 8 bytes precedes
// the local entry point. In between is the following instructions:
//
// addis r2, r12, .TOC.@ha
// addi r2, r2, .TOC.@lo + 4;
//
// The global entry point assumes that the address of itself is in r12,
// and it computes its own TOC pointer from r12. It's easy to do so for
// the callee because the offset between its .got + 0x8000 and the
// function is known at link-time. The above code sequence then falls
// through to the local entry point that assumes r2 is .got + 0x8000.
//
// So, if a callee's TOC pointer is different from the current one
// (e.g. calling a function in another .so), we first load the callee's
// address to r12 (e.g. from .got.plt with a r2-relative load) and branch
// to that address. Then the callee computes its own TOC pointer using
// r12.
//
//
// Position-independent code on Power10:
//
// Power10 added 8-bytes-long instructions to the ISA. Some of them are
// PC-relative load/store instructions that take 34 bits offsets.
// Functions compiled with `-mcpu=power10` use these instructions for PIC.
// r2 does not have a special meaning in such fucntions.
//
// When a fucntion compiled for Power10 calls a function that uses the TOC
// pointer, we need to compute a correct value for TOC and set it to r2
// before transferring the control to the callee. Thunks are responsible
// for doing it.
//
// `_NOTOC` relocations such as `R_PPC64_REL24_NOTOC` indicate that the
// callee does not use TOC (i.e. compiled with `-mcpu=power10`). If a
// function using TOC is referenced via a `_NOTOC` relocation, that call
// is made through a range extension thunk.
//
//
// Note on section names: the PPC64 psABI uses a weird naming convention
// which calls .got.plt .plt. We ignored that part because it's just
// confusing. Since the runtime only cares about segments, we should be
// able to name sections whatever we want.
//
// https://github.com/rui314/psabi/blob/main/ppc64v2.pdf
#include "third_party/mold/elf/mold.h"
namespace mold::elf {
using E = PPC64V2;
static u64 lo(u64 x) { return x & 0xffff; }
static u64 hi(u64 x) { return x >> 16; }
static u64 ha(u64 x) { return (x + 0x8000) >> 16; }
static u64 high(u64 x) { return (x >> 16) & 0xffff; }
static u64 higha(u64 x) { return ((x + 0x8000) >> 16) & 0xffff; }
static u64 prefix34(u64 x) {
return bits(x, 33, 16) | (bits(x, 15, 0) << 32);
}
// .plt is used only for lazy symbol resolution on PPC64. All PLT
// calls are made via range extension thunks even if they are within
// reach. Thunks read addresses from .got.plt and jump there.
// Therefore, once PLT symbols are resolved and final addresses are
// written to .got.plt, thunks just skip .plt and directly jump to the
// resolved addresses.
template <>
void write_plt_header(Context<E> &ctx, u8 *buf) {
static const ul32 insn[] = {
// Get PC
0x7c08'02a6, // mflr r0
0x429f'0005, // bcl 20, 31, 4 // obtain PC
0x7d68'02a6, // mflr r11
0x7c08'03a6, // mtlr r0
// Compute the PLT entry index
0xe80b'002c, // ld r0, 44(r11)
0x7d8b'6050, // subf r12, r11, r12
0x7d60'5a14, // add r11, r0, r11
0x380c'ffcc, // addi r0, r12, -52
0x7800'f082, // rldicl r0, r0, 62, 2
// Load .got.plt[0] and .got.plt[1] and branch to .got.plt[0]
0xe98b'0000, // ld r12, 0(r11)
0x7d89'03a6, // mtctr r12
0xe96b'0008, // ld r11, 8(r11)
0x4e80'0420, // bctr
// .quad .got.plt - .plt - 8
0x0000'0000,
0x0000'0000,
};
memcpy(buf, insn, sizeof(insn));
*(ul64 *)(buf + 52) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr - 8;
}
template <>
void write_plt_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
// When the control is transferred to a PLT entry, the PLT entry's
// address is already set to %r12 by the caller.
i64 offset = ctx.plt->shdr.sh_addr - sym.get_plt_addr(ctx);
*(ul32 *)buf = 0x4b00'0000 | (offset & 0x00ff'ffff); // b plt0
}
// .plt.got is not necessary on PPC64 because range extension thunks
// directly read GOT entries and jump there.
template <>
void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {}
template <>
void EhFrameSection<E>::apply_reloc(Context<E> &ctx, const ElfRel<E> &rel,
u64 offset, u64 val) {
u8 *loc = ctx.buf + this->shdr.sh_offset + offset;
switch (rel.r_type) {
case R_NONE:
break;
case R_PPC64_ADDR64:
*(ul64 *)loc = val;
break;
case R_PPC64_REL32:
*(ul32 *)loc = val - this->shdr.sh_addr - offset;
break;
case R_PPC64_REL64:
*(ul64 *)loc = val - this->shdr.sh_addr - offset;
break;
default:
Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel;
}
}
static u64 get_local_entry_offset(Context<E> &ctx, Symbol<E> &sym) {
i64 val = sym.esym().ppc_local_entry;
assert(val <= 7);
if (val == 7)
Fatal(ctx) << sym << ": local entry offset 7 is reserved";
if (val == 0 || val == 1)
return 0;
return 1 << val;
}
template <>
void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
std::span<const ElfRel<E>> rels = get_rels(ctx);
ElfRel<E> *dynrel = nullptr;
if (ctx.reldyn)
dynrel = (ElfRel<E> *)(ctx.buf + ctx.reldyn->shdr.sh_offset +
file.reldyn_offset + this->reldyn_offset);
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE)
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
u8 *loc = base + rel.r_offset;
u64 S = sym.get_addr(ctx);
u64 A = rel.r_addend;
u64 P = get_addr() + rel.r_offset;
u64 G = sym.get_got_idx(ctx) * sizeof(Word<E>);
u64 GOT = ctx.got->shdr.sh_addr;
u64 TOC = ctx.extra.TOC->value;
auto r2save_thunk_addr = [&] { return get_thunk_addr(i); };
auto no_r2save_thunk_addr = [&] { return get_thunk_addr(i) + 4; };
switch (rel.r_type) {
case R_PPC64_ADDR64:
if (name() == ".toc")
apply_toc_rel(ctx, sym, rel, loc, S, A, P, dynrel);
else
apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel);
break;
case R_PPC64_TOC16_HA:
*(ul16 *)loc = ha(S + A - TOC);
break;
case R_PPC64_TOC16_LO:
*(ul16 *)loc = lo(S + A - TOC);
break;
case R_PPC64_TOC16_DS:
case R_PPC64_TOC16_LO_DS:
*(ul16 *)loc |= (S + A - TOC) & 0xfffc;
break;
case R_PPC64_REL24:
if (sym.has_plt(ctx) || !sym.esym().preserves_r2()) {
i64 val = r2save_thunk_addr() + A - P;
*(ul32 *)loc |= bits(val, 25, 2) << 2;
// The thunk saves %r2 to the caller's r2 save slot. We need to
// restore it after function return. To do so, there's usually a
// NOP as a placeholder after a BL. 0x6000'0000 is a NOP.
if (*(ul32 *)(loc + 4) == 0x6000'0000)
*(ul32 *)(loc + 4) = 0xe841'0018; // ld r2, 24(r1)
} else {
i64 val = S + get_local_entry_offset(ctx, sym) + A - P;
if (sign_extend(val, 25) != val)
val = no_r2save_thunk_addr() + A - P;
*(ul32 *)loc |= bits(val, 25, 2) << 2;
}
break;
case R_PPC64_REL24_NOTOC:
if (sym.has_plt(ctx) || sym.esym().uses_toc()) {
i64 val = no_r2save_thunk_addr() + A - P;
*(ul32 *)loc |= bits(val, 25, 2) << 2;
} else {
i64 val = S + A - P;
if (sign_extend(val, 25) != val)
val = no_r2save_thunk_addr() + A - P;
*(ul32 *)loc |= bits(val, 25, 2) << 2;
}
break;
case R_PPC64_REL32:
*(ul32 *)loc = S + A - P;
break;
case R_PPC64_REL64:
*(ul64 *)loc = S + A - P;
break;
case R_PPC64_REL16_HA:
*(ul16 *)loc = ha(S + A - P);
break;
case R_PPC64_REL16_LO:
*(ul16 *)loc = lo(S + A - P);
break;
case R_PPC64_PLT16_HA:
*(ul16 *)loc = ha(G + GOT - TOC);
break;
case R_PPC64_PLT16_HI:
*(ul16 *)loc = hi(G + GOT - TOC);
break;
case R_PPC64_PLT16_LO:
*(ul16 *)loc = lo(G + GOT - TOC);
break;
case R_PPC64_PLT16_LO_DS:
*(ul16 *)loc |= (G + GOT - TOC) & 0xfffc;
break;
case R_PPC64_PLT_PCREL34:
case R_PPC64_PLT_PCREL34_NOTOC:
case R_PPC64_GOT_PCREL34:
*(ul64 *)loc |= prefix34(G + GOT - P);
break;
case R_PPC64_PCREL34:
*(ul64 *)loc |= prefix34(S + A - P);
break;
case R_PPC64_GOT_TPREL16_HA:
*(ul16 *)loc = ha(sym.get_gottp_addr(ctx) - TOC);
break;
case R_PPC64_GOT_TPREL16_LO_DS:
*(ul16 *)loc |= (sym.get_gottp_addr(ctx) - TOC) & 0xfffc;
break;
case R_PPC64_GOT_TPREL_PCREL34:
*(ul64 *)loc |= prefix34(sym.get_gottp_addr(ctx) - P);
break;
case R_PPC64_GOT_TLSGD16_HA:
*(ul16 *)loc = ha(sym.get_tlsgd_addr(ctx) - TOC);
break;
case R_PPC64_GOT_TLSGD16_LO:
*(ul16 *)loc = lo(sym.get_tlsgd_addr(ctx) - TOC);
break;
case R_PPC64_GOT_TLSGD_PCREL34:
*(ul64 *)loc |= prefix34(sym.get_tlsgd_addr(ctx) - P);
break;
case R_PPC64_GOT_TLSLD16_HA:
*(ul16 *)loc = ha(ctx.got->get_tlsld_addr(ctx) - TOC);
break;
case R_PPC64_GOT_TLSLD16_LO:
*(ul16 *)loc = lo(ctx.got->get_tlsld_addr(ctx) - TOC);
break;
case R_PPC64_GOT_TLSLD_PCREL34:
*(ul64 *)loc |= prefix34(ctx.got->get_tlsld_addr(ctx) - P);
break;
case R_PPC64_DTPREL16_HA:
*(ul16 *)loc = ha(S + A - ctx.dtp_addr);
break;
case R_PPC64_DTPREL16_LO:
*(ul16 *)loc = lo(S + A - ctx.dtp_addr);
break;
case R_PPC64_DTPREL34:
*(ul64 *)loc |= prefix34(S + A - ctx.dtp_addr);
break;
case R_PPC64_TPREL16_HA:
*(ul16 *)loc = ha(S + A - ctx.tp_addr);
break;
case R_PPC64_TPREL16_LO:
*(ul16 *)loc = lo(S + A - ctx.tp_addr);
break;
case R_PPC64_PLTSEQ:
case R_PPC64_PLTSEQ_NOTOC:
case R_PPC64_PLTCALL:
case R_PPC64_PLTCALL_NOTOC:
case R_PPC64_TLS:
case R_PPC64_TLSGD:
case R_PPC64_TLSLD:
break;
default:
unreachable();
}
}
}
template <>
void InputSection<E>::apply_reloc_nonalloc(Context<E> &ctx, u8 *base) {
std::span<const ElfRel<E>> rels = get_rels(ctx);
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
u8 *loc = base + rel.r_offset;
auto check = [&](i64 val, i64 lo, i64 hi) {
if (val < lo || hi <= val)
Error(ctx) << *this << ": relocation " << rel << " against "
<< sym << " out of range: " << val << " is not in ["
<< lo << ", " << hi << ")";
};
SectionFragment<E> *frag;
i64 frag_addend;
std::tie(frag, frag_addend) = get_fragment(ctx, rel);
u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx);
u64 A = frag ? frag_addend : (i64)rel.r_addend;
switch (rel.r_type) {
case R_PPC64_ADDR64:
if (std::optional<u64> val = get_tombstone(sym, frag))
*(ul64 *)loc = *val;
else
*(ul64 *)loc = S + A;
break;
case R_PPC64_ADDR32: {
i64 val = S + A;
check(val, 0, 1LL << 32);
*(ul32 *)loc = val;
break;
}
case R_PPC64_DTPREL64:
*(ul64 *)loc = S + A - ctx.dtp_addr;
break;
default:
Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: "
<< rel;
}
}
}
template <>
void InputSection<E>::scan_relocations(Context<E> &ctx) {
assert(shdr().sh_flags & SHF_ALLOC);
this->reldyn_offset = file.num_dynrel * sizeof(ElfRel<E>);
std::span<const ElfRel<E>> rels = get_rels(ctx);
// Scan relocations
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
if (sym.is_ifunc())
sym.flags |= NEEDS_GOT | NEEDS_PLT;
switch (rel.r_type) {
case R_PPC64_ADDR64:
if (name() == ".toc")
scan_toc_rel(ctx, sym, rel);
else
scan_dyn_absrel(ctx, sym, rel);
break;
case R_PPC64_GOT_TPREL16_HA:
case R_PPC64_GOT_TPREL_PCREL34:
sym.flags |= NEEDS_GOTTP;
break;
case R_PPC64_REL24:
if (sym.is_imported)
sym.flags |= NEEDS_PLT;
break;
case R_PPC64_REL24_NOTOC:
if (sym.is_imported)
sym.flags |= NEEDS_PLT;
ctx.extra.is_power10 = true;
break;
case R_PPC64_PLT16_HA:
case R_PPC64_PLT_PCREL34:
case R_PPC64_PLT_PCREL34_NOTOC:
case R_PPC64_GOT_PCREL34:
sym.flags |= NEEDS_GOT;
break;
case R_PPC64_GOT_TLSGD16_HA:
case R_PPC64_GOT_TLSGD_PCREL34:
sym.flags |= NEEDS_TLSGD;
break;
case R_PPC64_GOT_TLSLD16_HA:
case R_PPC64_GOT_TLSLD_PCREL34:
ctx.needs_tlsld = true;
break;
case R_PPC64_TPREL16_HA:
case R_PPC64_TPREL16_LO:
check_tlsle(ctx, sym, rel);
break;
case R_PPC64_REL32:
case R_PPC64_REL64:
case R_PPC64_TOC16_HA:
case R_PPC64_TOC16_LO:
case R_PPC64_TOC16_LO_DS:
case R_PPC64_TOC16_DS:
case R_PPC64_REL16_HA:
case R_PPC64_REL16_LO:
case R_PPC64_PLT16_HI:
case R_PPC64_PLT16_LO:
case R_PPC64_PLT16_LO_DS:
case R_PPC64_PCREL34:
case R_PPC64_PLTSEQ:
case R_PPC64_PLTSEQ_NOTOC:
case R_PPC64_PLTCALL:
case R_PPC64_PLTCALL_NOTOC:
case R_PPC64_GOT_TPREL16_LO_DS:
case R_PPC64_GOT_TLSGD16_LO:
case R_PPC64_GOT_TLSLD16_LO:
case R_PPC64_TLS:
case R_PPC64_TLSGD:
case R_PPC64_TLSLD:
case R_PPC64_DTPREL16_HA:
case R_PPC64_DTPREL16_LO:
case R_PPC64_DTPREL34:
break;
default:
Error(ctx) << *this << ": unknown relocation: " << rel;
}
}
}
template <>
void RangeExtensionThunk<E>::copy_buf(Context<E> &ctx) {
u8 *buf = ctx.buf + output_section.shdr.sh_offset + offset;
// If the destination is PLT, we read an address from .got.plt or .got
// and jump there.
static const ul32 plt_thunk[] = {
0xf841'0018, // std r2, 24(r1)
0x3d82'0000, // addis r12, r2, foo@gotplt@toc@ha
0xe98c'0000, // ld r12, foo@gotplt@toc@lo(r12)
0x7d89'03a6, // mtctr r12
0x4e80'0420, // bctr
};
static const ul32 plt_thunk_power10[] = {
0xf841'0018, // std r2, 24(r1)
0x0410'0000, // pld r12, foo@gotplt@pcrel
0xe580'0000,
0x7d89'03a6, // mtctr r12
0x4e80'0420, // bctr
};
// If the destination is a non-imported function, we directly jump
// to its local entry point.
static const ul32 local_thunk[] = {
0xf841'0018, // std r2, 24(r1)
0x3d82'0000, // addis r12, r2, foo@toc@ha
0x398c'0000, // addi r12, r12, foo@toc@lo
0x7d89'03a6, // mtctr r12
0x4e80'0420, // bctr
};
static const ul32 local_thunk_power10[] = {
0xf841'0018, // std r2, 24(r1)
0x0610'0000, // pla r12, foo@pcrel
0x3980'0000,
0x7d89'03a6, // mtctr r12
0x4e80'0420, // bctr
};
static_assert(E::thunk_size == sizeof(plt_thunk));
static_assert(E::thunk_size == sizeof(plt_thunk_power10));
static_assert(E::thunk_size == sizeof(local_thunk));
static_assert(E::thunk_size == sizeof(local_thunk_power10));
for (i64 i = 0; i < symbols.size(); i++) {
Symbol<E> &sym = *symbols[i];
ul32 *loc = (ul32 *)(buf + i * E::thunk_size);
if (sym.has_plt(ctx)) {
u64 got = sym.has_got(ctx) ? sym.get_got_addr(ctx) : sym.get_gotplt_addr(ctx);
if (ctx.extra.is_power10) {
memcpy(loc, plt_thunk_power10, E::thunk_size);
*(ul64 *)(loc + 1) |= prefix34(got - get_addr(i) - 4);
} else {
i64 val = got - ctx.extra.TOC->value;
memcpy(loc, plt_thunk, E::thunk_size);
loc[1] |= higha(val);
loc[2] |= lo(val);
}
} else {
if (ctx.extra.is_power10) {
memcpy(loc, local_thunk_power10, E::thunk_size);
*(ul64 *)(loc + 1) |= prefix34(sym.get_addr(ctx) - get_addr(i) - 4);
} else {
i64 val = sym.get_addr(ctx) - ctx.extra.TOC->value;
memcpy(loc, local_thunk, E::thunk_size);
loc[1] |= higha(val);
loc[2] |= lo(val);
}
}
}
}
} // namespace mold::elf

938
third_party/mold/elf/arch-riscv.cc vendored Normal file
View file

@ -0,0 +1,938 @@
// clang-format off
// RISC-V is a clean RISC ISA. It supports PC-relative load/store for
// position-independent code. Its 32-bit and 64-bit ISAs are almost
// identical. That is, you can think RV32 as a RV64 without 64-bit
// operations. In this file, we support both RV64 and RV32.
//
// RISC-V is essentially little-endian, but the big-endian version is
// available as an extension. GCC supports `-mbig-endian` to generate
// big-endian code. Even in big-endian mode, machine instructions are
// defined to be encoded in little-endian, though. Only the behavior of
// load/store instructions are different between LE RISC-V and BE RISC-V.
//
// From the linker's point of view, the RISC-V's psABI is unique because
// sections in input object files can be shrunk while being copied to the
// output file. That is contrary to other psABIs in which sections are an
// atomic unit of copying. Let me explain it in more details.
//
// Since RISC-V instructions are 16-bit or 32-bit long, there's no way to
// embed a very large immediate into a branch instruction. In fact, JAL
// (jump and link) instruction can jump to only within PC ± 1 MiB because
// its immediate is only 21 bits long. If the destination is out of its
// reach, we need to use two instructions instead; the first instruction
// being AUIPC which sets upper 20 bits to a register and the second being
// JALR with a 12-bit immediate and the register. Combined, they specify a
// 32 bits displacement.
//
// Other RISC ISAs have the same limitation, and they solved the problem by
// letting the linker create so-called "range extension thunks". It works as
// follows: the compiler optimistically emits single jump instructions for
// function calls. If the linker finds that a branch target is out of reach,
// it emits a small piece of machine code near the branch instruction and
// redirect the branch to the linker-synthesized code. The code constructs a
// full 32-bit address in a register and jump to the destination. That
// linker-synthesized code is called "range extension thunks" or just
// "thunks".
//
// The RISC-V psABI is unique that it works the other way around. That is,
// for RISC-V, the compiler always emits two instructions (AUIPC + JAL) for
// function calls. If the linker finds the destination is reachable with a
// single instruction, it replaces the two instructions with the one and
// shrink the section size by one instruction length, instead of filling the
// gap with a nop.
//
// With the presence of this relaxation, sections can no longer be
// considered as an atomic unit. If we delete 4 bytes from the middle of a
// section, all contents after that point needs to be shifted by 4. Symbol
// values and relocation offsets have to be adjusted accordingly if they
// refer to past the deleted bytes.
//
// In mold, we use `r_deltas` to memorize how many bytes have be adjusted
// for relocations. For symbols, we directly mutate their `value` member.
//
// RISC-V object files tend to have way more relocations than those for
// other targets. This is because all branches, including ones that jump
// within the same section, are explicitly expressed with relocations.
// Here is why we need them: all control-flow statements such as `if` or
// `for` are implemented using branch instructions. For other targets, the
// compiler doesn't emit relocations for such branches because they know
// at compile-time exactly how many bytes has to be skipped. That's not
// true to RISC-V because the linker may delete bytes between a branch and
// its destination. Therefore, all branches including in-section ones have
// to be explicitly expressed with relocations.
//
// Note that this mechanism only shrink sections and never enlarge, as
// the compiler always emits the longest instruction sequence. This
// makes the linker implementation a bit simpler because we don't need
// to worry about oscillation.
//
// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc
#include "third_party/mold/elf/mold.h"
// MISSING #include <tbb/parallel_for.h>
// MISSING #include <tbb/parallel_for_each.h>
namespace mold::elf {
static void write_itype(u8 *loc, u32 val) {
*(ul32 *)loc &= 0b000000'00000'11111'111'11111'1111111;
*(ul32 *)loc |= bits(val, 11, 0) << 20;
}
static void write_stype(u8 *loc, u32 val) {
*(ul32 *)loc &= 0b000000'11111'11111'111'00000'1111111;
*(ul32 *)loc |= bits(val, 11, 5) << 25 | bits(val, 4, 0) << 7;
}
static void write_btype(u8 *loc, u32 val) {
*(ul32 *)loc &= 0b000000'11111'11111'111'00000'1111111;
*(ul32 *)loc |= bit(val, 12) << 31 | bits(val, 10, 5) << 25 |
bits(val, 4, 1) << 8 | bit(val, 11) << 7;
}
static void write_utype(u8 *loc, u32 val) {
*(ul32 *)loc &= 0b000000'00000'00000'000'11111'1111111;
// U-type instructions are used in combination with I-type
// instructions. U-type insn sets an immediate to the upper 20-bits
// of a register. I-type insn sign-extends a 12-bits immediate and
// adds it to a register value to construct a complete value. 0x800
// is added here to compensate for the sign-extension.
*(ul32 *)loc |= (val + 0x800) & 0xffff'f000;
}
static void write_jtype(u8 *loc, u32 val) {
*(ul32 *)loc &= 0b000000'00000'00000'000'11111'1111111;
*(ul32 *)loc |= bit(val, 20) << 31 | bits(val, 10, 1) << 21 |
bit(val, 11) << 20 | bits(val, 19, 12) << 12;
}
static void write_cbtype(u8 *loc, u32 val) {
*(ul16 *)loc &= 0b111'000'111'00000'11;
*(ul16 *)loc |= bit(val, 8) << 12 | bit(val, 4) << 11 | bit(val, 3) << 10 |
bit(val, 7) << 6 | bit(val, 6) << 5 | bit(val, 2) << 4 |
bit(val, 1) << 3 | bit(val, 5) << 2;
}
static void write_cjtype(u8 *loc, u32 val) {
*(ul16 *)loc &= 0b111'00000000000'11;
*(ul16 *)loc |= bit(val, 11) << 12 | bit(val, 4) << 11 | bit(val, 9) << 10 |
bit(val, 8) << 9 | bit(val, 10) << 8 | bit(val, 6) << 7 |
bit(val, 7) << 6 | bit(val, 3) << 5 | bit(val, 2) << 4 |
bit(val, 1) << 3 | bit(val, 5) << 2;
}
static void overwrite_uleb(u8 *loc, u64 val) {
while (*loc & 0b1000'0000) {
*loc++ = 0b1000'0000 | (val & 0b0111'1111);
val >>= 7;
}
}
// Returns the rd register of an R/I/U/J-type instruction.
static u32 get_rd(u32 val) {
return bits(val, 11, 7);
}
static void set_rs1(u8 *loc, u32 rs1) {
assert(rs1 < 32);
*(ul32 *)loc &= 0b111111'11111'00000'111'11111'1111111;
*(ul32 *)loc |= rs1 << 15;
}
template <typename E>
void write_plt_header(Context<E> &ctx, u8 *buf) {
static const ul32 insn_64[] = {
0x0000'0397, // auipc t2, %pcrel_hi(.got.plt)
0x41c3'0333, // sub t1, t1, t3 # .plt entry + hdr + 12
0x0003'be03, // ld t3, %pcrel_lo(1b)(t2) # _dl_runtime_resolve
0xfd43'0313, // addi t1, t1, -44 # .plt entry
0x0003'8293, // addi t0, t2, %pcrel_lo(1b) # &.got.plt
0x0013'5313, // srli t1, t1, 1 # .plt entry offset
0x0082'b283, // ld t0, 8(t0) # link map
0x000e'0067, // jr t3
};
static const ul32 insn_32[] = {
0x0000'0397, // auipc t2, %pcrel_hi(.got.plt)
0x41c3'0333, // sub t1, t1, t3 # .plt entry + hdr + 12
0x0003'ae03, // lw t3, %pcrel_lo(1b)(t2) # _dl_runtime_resolve
0xfd43'0313, // addi t1, t1, -44 # .plt entry
0x0003'8293, // addi t0, t2, %pcrel_lo(1b) # &.got.plt
0x0023'5313, // srli t1, t1, 2 # .plt entry offset
0x0042'a283, // lw t0, 4(t0) # link map
0x000e'0067, // jr t3
};
if constexpr (E::is_64)
memcpy(buf, insn_64, sizeof(insn_64));
else
memcpy(buf, insn_32, sizeof(insn_32));
u64 gotplt = ctx.gotplt->shdr.sh_addr;
u64 plt = ctx.plt->shdr.sh_addr;
write_utype(buf, gotplt - plt);
write_itype(buf + 8, gotplt - plt);
write_itype(buf + 16, gotplt - plt);
}
static const ul32 plt_entry_64[] = {
0x0000'0e17, // auipc t3, %pcrel_hi(function@.got.plt)
0x000e'3e03, // ld t3, %pcrel_lo(1b)(t3)
0x000e'0367, // jalr t1, t3
0x0000'0013, // nop
};
static const ul32 plt_entry_32[] = {
0x0000'0e17, // auipc t3, %pcrel_hi(function@.got.plt)
0x000e'2e03, // lw t3, %pcrel_lo(1b)(t3)
0x000e'0367, // jalr t1, t3
0x0000'0013, // nop
};
template <typename E>
void write_plt_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
if constexpr (E::is_64)
memcpy(buf, plt_entry_64, sizeof(plt_entry_64));
else
memcpy(buf, plt_entry_32, sizeof(plt_entry_32));
u64 gotplt = sym.get_gotplt_addr(ctx);
u64 plt = sym.get_plt_addr(ctx);
write_utype(buf, gotplt - plt);
write_itype(buf + 4, gotplt - plt);
}
template <typename E>
void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
if constexpr (E::is_64)
memcpy(buf, plt_entry_64, sizeof(plt_entry_64));
else
memcpy(buf, plt_entry_32, sizeof(plt_entry_32));
u64 got = sym.get_got_addr(ctx);
u64 plt = sym.get_plt_addr(ctx);
write_utype(buf, got - plt);
write_itype(buf + 4, got - plt);
}
template <typename E>
void EhFrameSection<E>::apply_reloc(Context<E> &ctx, const ElfRel<E> &rel,
u64 offset, u64 val) {
u8 *loc = ctx.buf + this->shdr.sh_offset + offset;
switch (rel.r_type) {
case R_NONE:
break;
case R_RISCV_ADD32:
*(U32<E> *)loc += val;
break;
case R_RISCV_SUB8:
*loc -= val;
break;
case R_RISCV_SUB16:
*(U16<E> *)loc -= val;
break;
case R_RISCV_SUB32:
*(U32<E> *)loc -= val;
break;
case R_RISCV_SUB6:
*loc = (*loc & 0b1100'0000) | ((*loc - val) & 0b0011'1111);
break;
case R_RISCV_SET6:
*loc = (*loc & 0b1100'0000) | (val & 0b0011'1111);
break;
case R_RISCV_SET8:
*loc = val;
break;
case R_RISCV_SET16:
*(U16<E> *)loc = val;
break;
case R_RISCV_SET32:
*(U32<E> *)loc = val;
break;
case R_RISCV_32_PCREL:
*(U32<E> *)loc = val - this->shdr.sh_addr - offset;
break;
default:
Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel;
}
}
template <typename E>
void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
std::span<const ElfRel<E>> rels = get_rels(ctx);
ElfRel<E> *dynrel = nullptr;
if (ctx.reldyn)
dynrel = (ElfRel<E> *)(ctx.buf + ctx.reldyn->shdr.sh_offset +
file.reldyn_offset + this->reldyn_offset);
auto get_r_delta = [&](i64 idx) {
return extra.r_deltas.empty() ? 0 : extra.r_deltas[idx];
};
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE || rel.r_type == R_RISCV_RELAX)
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
i64 r_offset = rel.r_offset - get_r_delta(i);
i64 removed_bytes = get_r_delta(i + 1) - get_r_delta(i);
u8 *loc = base + r_offset;
auto check = [&](i64 val, i64 lo, i64 hi) {
if (val < lo || hi <= val)
Error(ctx) << *this << ": relocation " << rel << " against "
<< sym << " out of range: " << val << " is not in ["
<< lo << ", " << hi << ")";
};
auto find_paired_reloc = [&] {
Symbol<E> &sym = *file.symbols[rels[i].r_sym];
assert(sym.get_input_section() == this);
if (sym.value < r_offset) {
for (i64 j = i - 1; j >= 0; j--)
if (u32 ty = rels[j].r_type;
ty == R_RISCV_GOT_HI20 || ty == R_RISCV_TLS_GOT_HI20 ||
ty == R_RISCV_TLS_GD_HI20 || ty == R_RISCV_PCREL_HI20)
if (sym.value == rels[j].r_offset - get_r_delta(j))
return j;
} else {
for (i64 j = i + 1; j < rels.size(); j++)
if (u32 ty = rels[j].r_type;
ty == R_RISCV_GOT_HI20 || ty == R_RISCV_TLS_GOT_HI20 ||
ty == R_RISCV_TLS_GD_HI20 || ty == R_RISCV_PCREL_HI20)
if (sym.value == rels[j].r_offset - get_r_delta(j))
return j;
}
Fatal(ctx) << *this << ": paired relocation is missing: " << i;
};
u64 S = sym.get_addr(ctx);
u64 A = rel.r_addend;
u64 P = get_addr() + r_offset;
u64 G = sym.get_got_idx(ctx) * sizeof(Word<E>);
u64 GOT = ctx.got->shdr.sh_addr;
switch (rel.r_type) {
case R_RISCV_32:
if constexpr (E::is_64)
*(U32<E> *)loc = S + A;
else
apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel);
break;
case R_RISCV_64:
assert(E::is_64);
apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel);
break;
case R_RISCV_BRANCH:
check(S + A - P, -(1 << 12), 1 << 12);
write_btype(loc, S + A - P);
break;
case R_RISCV_JAL:
check(S + A - P, -(1 << 20), 1 << 20);
write_jtype(loc, S + A - P);
break;
case R_RISCV_CALL:
case R_RISCV_CALL_PLT: {
u32 rd = get_rd(*(ul32 *)(contents.data() + rel.r_offset + 4));
if (removed_bytes == 4) {
// auipc + jalr -> jal
*(ul32 *)loc = (rd << 7) | 0b1101111;
write_jtype(loc, S + A - P);
} else if (removed_bytes == 6 && rd == 0) {
// auipc + jalr -> c.j
*(ul16 *)loc = 0b101'00000000000'01;
write_cjtype(loc, S + A - P);
} else if (removed_bytes == 6 && rd == 1) {
// auipc + jalr -> c.jal
assert(!E::is_64);
*(ul16 *)loc = 0b001'00000000000'01;
write_cjtype(loc, S + A - P);
} else {
assert(removed_bytes == 0);
// Calling an undefined weak symbol does not make sense.
// We make such call into an infinite loop. This should
// help debugging of a faulty program.
u64 val = sym.esym().is_undef_weak() ? 0 : S + A - P;
check(val, -(1LL << 31), 1LL << 31);
write_utype(loc, val);
write_itype(loc + 4, val);
}
break;
}
case R_RISCV_GOT_HI20:
write_utype(loc, G + GOT + A - P);
break;
case R_RISCV_TLS_GOT_HI20:
write_utype(loc, sym.get_gottp_addr(ctx) + A - P);
break;
case R_RISCV_TLS_GD_HI20:
write_utype(loc, sym.get_tlsgd_addr(ctx) + A - P);
break;
case R_RISCV_PCREL_HI20:
write_utype(loc, S + A - P);
break;
case R_RISCV_PCREL_LO12_I:
case R_RISCV_PCREL_LO12_S: {
i64 idx2 = find_paired_reloc();
const ElfRel<E> &rel2 = rels[idx2];
Symbol<E> &sym2 = *file.symbols[rel2.r_sym];
u64 S = sym2.get_addr(ctx);
u64 A = rel2.r_addend;
u64 P = get_addr() + rel2.r_offset - get_r_delta(idx2);
u64 G = sym2.get_got_idx(ctx) * sizeof(Word<E>);
u64 val;
switch (rel2.r_type) {
case R_RISCV_GOT_HI20:
val = G + GOT + A - P;
break;
case R_RISCV_TLS_GOT_HI20:
val = sym2.get_gottp_addr(ctx) + A - P;
break;
case R_RISCV_TLS_GD_HI20:
val = sym2.get_tlsgd_addr(ctx) + A - P;
break;
case R_RISCV_PCREL_HI20:
val = S + A - P;
break;
default:
unreachable();
}
if (rel.r_type == R_RISCV_PCREL_LO12_I)
write_itype(loc, val);
else
write_stype(loc, val);
break;
}
case R_RISCV_HI20:
assert(removed_bytes == 0 || removed_bytes == 4);
if (removed_bytes == 0) {
check(S + A, -(1LL << 31), 1LL << 31);
write_utype(loc, S + A);
}
break;
case R_RISCV_LO12_I:
case R_RISCV_LO12_S:
if (rel.r_type == R_RISCV_LO12_I)
write_itype(loc, S + A);
else
write_stype(loc, S + A);
// Rewrite `lw t1, 0(t0)` with `lw t1, 0(x0)` if the address is
// accessible relative to the zero register. If the upper 20 bits
// are all zero, the corresponding LUI might have been removed.
if (bits(S + A, 31, 12) == 0)
set_rs1(loc, 0);
break;
case R_RISCV_TPREL_HI20:
assert(removed_bytes == 0 || removed_bytes == 4);
if (removed_bytes == 0)
write_utype(loc, S + A - ctx.tp_addr);
break;
case R_RISCV_TPREL_ADD:
// This relocation just annotates an ADD instruction that can be
// removed when a TPREL is relaxed. No value is needed to be
// written.
assert(removed_bytes == 0 || removed_bytes == 4);
break;
case R_RISCV_TPREL_LO12_I:
case R_RISCV_TPREL_LO12_S: {
i64 val = S + A - ctx.tp_addr;
if (rel.r_type == R_RISCV_TPREL_LO12_I)
write_itype(loc, val);
else
write_stype(loc, val);
// Rewrite `lw t1, 0(t0)` with `lw t1, 0(tp)` if the address is
// directly accessible using tp. tp is x4.
if (sign_extend(val, 11) == val)
set_rs1(loc, 4);
break;
}
case R_RISCV_ADD8:
loc += S + A;
break;
case R_RISCV_ADD16:
*(U16<E> *)loc += S + A;
break;
case R_RISCV_ADD32:
*(U32<E> *)loc += S + A;
break;
case R_RISCV_ADD64:
*(U64<E> *)loc += S + A;
break;
case R_RISCV_SUB8:
loc -= S + A;
break;
case R_RISCV_SUB16:
*(U16<E> *)loc -= S + A;
break;
case R_RISCV_SUB32:
*(U32<E> *)loc -= S + A;
break;
case R_RISCV_SUB64:
*(U64<E> *)loc -= S + A;
break;
case R_RISCV_ALIGN: {
// A R_RISCV_ALIGN is followed by a NOP sequence. We need to remove
// zero or more bytes so that the instruction after R_RISCV_ALIGN is
// aligned to a given alignment boundary.
//
// We need to guarantee that the NOP sequence is valid after byte
// removal (e.g. we can't remove the first 2 bytes of a 4-byte NOP).
// For the sake of simplicity, we always rewrite the entire NOP sequence.
i64 padding_bytes = rel.r_addend - removed_bytes;
assert((padding_bytes & 1) == 0);
i64 i = 0;
for (; i <= padding_bytes - 4; i += 4)
*(ul32 *)(loc + i) = 0x0000'0013; // nop
if (i < padding_bytes)
*(ul16 *)(loc + i) = 0x0001; // c.nop
break;
}
case R_RISCV_RVC_BRANCH:
check(S + A - P, -(1 << 8), 1 << 8);
write_cbtype(loc, S + A - P);
break;
case R_RISCV_RVC_JUMP:
check(S + A - P, -(1 << 11), 1 << 11);
write_cjtype(loc, S + A - P);
break;
case R_RISCV_SUB6:
*loc = (*loc & 0b1100'0000) | ((*loc - (S + A)) & 0b0011'1111);
break;
case R_RISCV_SET6:
*loc = (*loc & 0b1100'0000) | ((S + A) & 0b0011'1111);
break;
case R_RISCV_SET8:
*loc = S + A;
break;
case R_RISCV_SET16:
*(U16<E> *)loc = S + A;
break;
case R_RISCV_SET32:
*(U32<E> *)loc = S + A;
break;
case R_RISCV_PLT32:
case R_RISCV_32_PCREL:
*(U32<E> *)loc = S + A - P;
break;
default:
unreachable();
}
}
}
template <typename E>
void InputSection<E>::apply_reloc_nonalloc(Context<E> &ctx, u8 *base) {
std::span<const ElfRel<E>> rels = get_rels(ctx);
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
u8 *loc = base + rel.r_offset;
SectionFragment<E> *frag;
i64 frag_addend;
std::tie(frag, frag_addend) = get_fragment(ctx, rel);
u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx);
u64 A = frag ? frag_addend : (i64)rel.r_addend;
switch (rel.r_type) {
case R_RISCV_32:
*(U32<E> *)loc = S + A;
break;
case R_RISCV_64:
if (std::optional<u64> val = get_tombstone(sym, frag))
*(U64<E> *)loc = *val;
else
*(U64<E> *)loc = S + A;
break;
case R_RISCV_ADD8:
*loc += S + A;
break;
case R_RISCV_ADD16:
*(U16<E> *)loc += S + A;
break;
case R_RISCV_ADD32:
*(U32<E> *)loc += S + A;
break;
case R_RISCV_ADD64:
*(U64<E> *)loc += S + A;
break;
case R_RISCV_SUB8:
*loc -= S + A;
break;
case R_RISCV_SUB16:
*(U16<E> *)loc -= S + A;
break;
case R_RISCV_SUB32:
*(U32<E> *)loc -= S + A;
break;
case R_RISCV_SUB64:
*(U64<E> *)loc -= S + A;
break;
case R_RISCV_SUB6:
*loc = (*loc & 0b1100'0000) | ((*loc - (S + A)) & 0b0011'1111);
break;
case R_RISCV_SET6:
*loc = (*loc & 0b1100'0000) | ((S + A) & 0b0011'1111);
break;
case R_RISCV_SET8:
*loc = S + A;
break;
case R_RISCV_SET16:
*(U16<E> *)loc = S + A;
break;
case R_RISCV_SET32:
*(U32<E> *)loc = S + A;
break;
case R_RISCV_SET_ULEB128:
overwrite_uleb(loc, S + A);
break;
case R_RISCV_SUB_ULEB128: {
u8 *p = loc;
u64 val = read_uleb(p);
overwrite_uleb(loc, val - S - A);
break;
}
default:
Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: "
<< rel;
break;
}
}
}
template <typename E>
void InputSection<E>::copy_contents_riscv(Context<E> &ctx, u8 *buf) {
// If a section is not relaxed, we can copy it as a one big chunk.
if (extra.r_deltas.empty()) {
uncompress_to(ctx, buf);
return;
}
// A relaxed section is copied piece-wise.
std::span<const ElfRel<E>> rels = get_rels(ctx);
i64 pos = 0;
for (i64 i = 0; i < rels.size(); i++) {
i64 delta = extra.r_deltas[i + 1] - extra.r_deltas[i];
if (delta == 0)
continue;
assert(delta > 0);
const ElfRel<E> &r = rels[i];
memcpy(buf, contents.data() + pos, r.r_offset - pos);
buf += r.r_offset - pos;
pos = r.r_offset + delta;
}
memcpy(buf, contents.data() + pos, contents.size() - pos);
}
template <typename E>
void InputSection<E>::scan_relocations(Context<E> &ctx) {
assert(shdr().sh_flags & SHF_ALLOC);
this->reldyn_offset = file.num_dynrel * sizeof(ElfRel<E>);
std::span<const ElfRel<E>> rels = get_rels(ctx);
// Scan relocations
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
if (sym.is_ifunc())
sym.flags |= NEEDS_GOT | NEEDS_PLT;
switch (rel.r_type) {
case R_RISCV_32:
if constexpr (E::is_64)
scan_absrel(ctx, sym, rel);
else
scan_dyn_absrel(ctx, sym, rel);
break;
case R_RISCV_HI20:
scan_absrel(ctx, sym, rel);
break;
case R_RISCV_64:
if constexpr (!E::is_64)
Fatal(ctx) << *this << ": R_RISCV_64 cannot be used on RV32";
scan_dyn_absrel(ctx, sym, rel);
break;
case R_RISCV_CALL:
case R_RISCV_CALL_PLT:
case R_RISCV_PLT32:
if (sym.is_imported)
sym.flags |= NEEDS_PLT;
break;
case R_RISCV_GOT_HI20:
sym.flags |= NEEDS_GOT;
break;
case R_RISCV_TLS_GOT_HI20:
sym.flags |= NEEDS_GOTTP;
break;
case R_RISCV_TLS_GD_HI20:
sym.flags |= NEEDS_TLSGD;
break;
case R_RISCV_32_PCREL:
scan_pcrel(ctx, sym, rel);
break;
case R_RISCV_TPREL_HI20:
case R_RISCV_TPREL_LO12_I:
case R_RISCV_TPREL_LO12_S:
case R_RISCV_TPREL_ADD:
check_tlsle(ctx, sym, rel);
break;
case R_RISCV_BRANCH:
case R_RISCV_JAL:
case R_RISCV_PCREL_HI20:
case R_RISCV_PCREL_LO12_I:
case R_RISCV_PCREL_LO12_S:
case R_RISCV_LO12_I:
case R_RISCV_LO12_S:
case R_RISCV_ADD8:
case R_RISCV_ADD16:
case R_RISCV_ADD32:
case R_RISCV_ADD64:
case R_RISCV_SUB8:
case R_RISCV_SUB16:
case R_RISCV_SUB32:
case R_RISCV_SUB64:
case R_RISCV_ALIGN:
case R_RISCV_RVC_BRANCH:
case R_RISCV_RVC_JUMP:
case R_RISCV_RELAX:
case R_RISCV_SUB6:
case R_RISCV_SET6:
case R_RISCV_SET8:
case R_RISCV_SET16:
case R_RISCV_SET32:
break;
default:
Error(ctx) << *this << ": unknown relocation: " << rel;
}
}
}
template <typename E>
static bool is_resizable(Context<E> &ctx, InputSection<E> *isec) {
return isec && isec->is_alive && (isec->shdr().sh_flags & SHF_ALLOC) &&
(isec->shdr().sh_flags & SHF_EXECINSTR);
}
// Returns the distance between a relocated place and a symbol.
template <typename E>
static i64 compute_distance(Context<E> &ctx, Symbol<E> &sym,
InputSection<E> &isec, const ElfRel<E> &rel) {
// We handle absolute symbols as if they were infinitely far away
// because `shrink_section` may increase a distance between a branch
// instruction and an absolute symbol. Branching to an absolute
// location is extremely rare in real code, though.
if (sym.is_absolute())
return INT32_MAX;
// Likewise, relocations against weak undefined symbols won't be relaxed.
if (sym.esym().is_undef_weak())
return INT32_MAX;
// Compute a distance between the relocated place and the symbol.
i64 S = sym.get_addr(ctx);
i64 A = rel.r_addend;
i64 P = isec.get_addr() + rel.r_offset;
return S + A - P;
}
// Scan relocations to shrink sections.
template <typename E>
static void shrink_section(Context<E> &ctx, InputSection<E> &isec, bool use_rvc) {
std::span<const ElfRel<E>> rels = isec.get_rels(ctx);
isec.extra.r_deltas.resize(rels.size() + 1);
i64 delta = 0;
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &r = rels[i];
Symbol<E> &sym = *isec.file.symbols[r.r_sym];
isec.extra.r_deltas[i] = delta;
// Handling R_RISCV_ALIGN is mandatory.
//
// R_RISCV_ALIGN refers to NOP instructions. We need to eliminate some
// or all of the instructions so that the instruction that immediately
// follows the NOPs is aligned to a specified alignment boundary.
if (r.r_type == R_RISCV_ALIGN) {
// The total bytes of NOPs is stored to r_addend, so the next
// instruction is r_addend away.
u64 loc = isec.get_addr() + r.r_offset - delta;
u64 next_loc = loc + r.r_addend;
u64 alignment = bit_ceil(r.r_addend + 1);
assert(alignment <= (1 << isec.p2align));
delta += next_loc - align_to(loc, alignment);
continue;
}
// Handling other relocations is optional.
if (!ctx.arg.relax || i == rels.size() - 1 ||
rels[i + 1].r_type != R_RISCV_RELAX)
continue;
// Linker-synthesized symbols haven't been assigned their final
// values when we are shrinking sections because actual values can
// be computed only after we fix the file layout. Therefore, we
// assume that relocations against such symbols are always
// non-relaxable.
if (sym.file == ctx.internal_obj)
continue;
switch (r.r_type) {
case R_RISCV_CALL:
case R_RISCV_CALL_PLT: {
// These relocations refer to an AUIPC + JALR instruction pair to
// allow to jump to anywhere in PC ± 2 GiB. If the jump target is
// close enough to PC, we can use C.J, C.JAL or JAL instead.
i64 dist = compute_distance(ctx, sym, isec, r);
if (dist & 1)
break;
i64 rd = get_rd(*(ul32 *)(isec.contents.data() + r.r_offset + 4));
if (rd == 0 && sign_extend(dist, 11) == dist && use_rvc) {
// If rd is x0 and the jump target is within ±2 KiB, we can use
// C.J, saving 6 bytes.
delta += 6;
} else if (rd == 1 && sign_extend(dist, 11) == dist && use_rvc && !E::is_64) {
// If rd is x1 and the jump target is within ±2 KiB, we can use
// C.JAL. This is RV32 only because C.JAL is RV32-only instruction.
delta += 6;
} else if (sign_extend(dist, 20) == dist) {
// If the jump target is within ±1 MiB, we can use JAL.
delta += 4;
}
break;
}
case R_RISCV_HI20:
// If the upper 20 bits are all zero, we can remove LUI.
// The corresponding instructions referred to by LO12_I/LO12_S
// relocations will use the zero register instead.
if (bits(sym.get_addr(ctx), 31, 12) == 0)
delta += 4;
break;
case R_RISCV_TPREL_HI20:
case R_RISCV_TPREL_ADD:
// These relocations are used to add a high 20-bit value to the
// thread pointer. The following two instructions materializes
// TP + HI20(foo) in %r5, for example.
//
// lui a5,%tprel_hi(foo) # R_RISCV_TPREL_HI20 (symbol)
// add a5,a5,tp,%tprel_add(foo) # R_RISCV_TPREL_ADD (symbol)
//
// Then thread-local variable `foo` is accessed with a low 12-bit
// offset like this:
//
// sw t0,%tprel_lo(foo)(a5) # R_RISCV_TPREL_LO12_S (symbol)
//
// However, if the variable is at TP ±2 KiB, TP + HI20(foo) is the
// same as TP, so we can instead access the thread-local variable
// directly using TP like this:
//
// sw t0,%tprel_lo(foo)(tp)
//
// Here, we remove `lui` and `add` if the offset is within ±2 KiB.
if (i64 val = sym.get_addr(ctx) + r.r_addend - ctx.tp_addr;
sign_extend(val, 11) == val)
delta += 4;
break;
}
}
isec.extra.r_deltas[rels.size()] = delta;
isec.sh_size -= delta;
}
// Shrink sections by interpreting relocations.
//
// This operation seems to be optional, because by default longest
// instructions are being used. However, calling this function is actually
// mandatory because of R_RISCV_ALIGN. R_RISCV_ALIGN is a directive to the
// linker to align the location referred to by the relocation to a
// specified byte boundary. We at least have to interpret them to satisfy
// the alignment constraints.
template <typename E>
i64 riscv_resize_sections(Context<E> &ctx) {
Timer t(ctx, "riscv_resize_sections");
// True if we can use the 2-byte instructions. This is usually true on
// Unix because RV64GC is generally considered the baseline hardware.
bool use_rvc = get_eflags(ctx) & EF_RISCV_RVC;
// Find all the relocations that can be relaxed.
// This step should only shrink sections.
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
for (std::unique_ptr<InputSection<E>> &isec : file->sections)
if (is_resizable(ctx, isec.get()))
shrink_section(ctx, *isec, use_rvc);
});
// Fix symbol values.
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
for (Symbol<E> *sym : file->symbols) {
if (sym->file != file)
continue;
InputSection<E> *isec = sym->get_input_section();
if (!isec || isec->extra.r_deltas.empty())
continue;
std::span<const ElfRel<E>> rels = isec->get_rels(ctx);
auto it = std::lower_bound(rels.begin(), rels.end(), sym->value,
[&](const ElfRel<E> &r, u64 val) {
return r.r_offset < val;
});
sym->value -= isec->extra.r_deltas[it - rels.begin()];
}
});
// Re-compute section offset again to finalize them.
compute_section_sizes(ctx);
return set_osec_offsets(ctx);
}
#define INSTANTIATE(E) \
template void write_plt_header(Context<E> &, u8 *); \
template void write_plt_entry(Context<E> &, u8 *, Symbol<E> &); \
template void write_pltgot_entry(Context<E> &, u8 *, Symbol<E> &); \
template void \
EhFrameSection<E>::apply_reloc(Context<E> &, const ElfRel<E> &, u64, u64); \
template void InputSection<E>::apply_reloc_alloc(Context<E> &, u8 *); \
template void InputSection<E>::apply_reloc_nonalloc(Context<E> &, u8 *); \
template void InputSection<E>::copy_contents_riscv(Context<E> &, u8 *); \
template void InputSection<E>::scan_relocations(Context<E> &); \
template i64 riscv_resize_sections(Context<E> &);
INSTANTIATE(RV64LE);
INSTANTIATE(RV64BE);
INSTANTIATE(RV32LE);
INSTANTIATE(RV32BE);
} // namespace mold::elf

491
third_party/mold/elf/arch-s390x.cc vendored Normal file
View file

@ -0,0 +1,491 @@
// clang-format off
// This file contains code for the IBM z/Architecture 64-bit ISA, which is
// commonly referred to as "s390x" on Linux.
//
// z/Architecture is a 64-bit CISC ISA developed by IBM around 2000 for
// IBM's "big iron" mainframe computers. The computers are direct
// descendents of IBM System/360 all the way back in 1966. I've never
// actually seen a mainframe, and you probaly haven't either, but it looks
// like the mainframe market is still large enough to sustain its ecosystem.
// Ubuntu for example provides the official support for s390x as of 2022.
// Since they are being actively maintained, we need to support them.
//
// As an instruction set, s390x isn't particularly odd. It has 16 general-
// purpose registers. Instructions are 2, 4 or 6 bytes long and always
// aligned to 2 bytes boundaries. Despite unfamiliarty, I found that it
// just feels like an x86-64 in a parallel universe.
//
// Here is the register usage in this ABI:
//
// r0-r1: reserved as scratch registers so we can use them in our PLT
// r2: parameter passing and return values
// r3-r6: parameter passing
// r12: address of GOT if position-independent code
// r14: return address
// r15: stack pointer
// a1: upper 32 bits of TP (thread pointer)
// a2: lower 32 bits of TP (thread pointer)
//
// Thread-local storage (TLS) is supported on s390x in the same way as it
// is on other targets with one exeption. On other targets, __tls_get_addr
// is used to get an address of a thread-local variable. On s390x,
// __tls_get_offset is used instead. The difference is __tls_get_offset
// returns an address of a thread-local variable as an offset from TP. So
// we need to add TP to a return value before use. I don't know why it is
// different, but that is the way it is.
//
// https://github.com/rui314/psabi/blob/main/s390x.pdf
#include "third_party/mold/elf/mold.h"
namespace mold::elf {
using E = S390X;
static void write_mid20(u8 *loc, u64 val) {
*(ub32 *)loc |= (bits(val, 11, 0) << 16) | (bits(val, 19, 12) << 8);
}
template <>
void write_plt_header(Context<E> &ctx, u8 *buf) {
static u8 insn[] = {
0xe3, 0x00, 0xf0, 0x38, 0x00, 0x24, // stg %r0, 56(%r15)
0xc0, 0x10, 0, 0, 0, 0, // larl %r1, GOTPLT_OFFSET
0xd2, 0x07, 0xf0, 0x30, 0x10, 0x08, // mvc 48(8, %r15), 8(%r1)
0xe3, 0x10, 0x10, 0x10, 0x00, 0x04, // lg %r1, 16(%r1)
0x07, 0xf1, // br %r1
0x07, 0x00, 0x07, 0x00, 0x07, 0x00, // nopr; nopr; nopr
};
memcpy(buf, insn, sizeof(insn));
*(ub32 *)(buf + 8) = (ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr - 6) >> 1;
}
template <>
void write_plt_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
static u8 insn[] = {
0xc0, 0x10, 0, 0, 0, 0, // larl %r1, GOTPLT_ENTRY_OFFSET
0xe3, 0x10, 0x10, 0x00, 0x00, 0x04, // lg %r1, (%r1)
0xc0, 0x01, 0, 0, 0, 0, // lgfi %r0, PLT_INDEX
0x07, 0xf1, // br %r1
0x07, 0x00, 0x07, 0x00, 0x07, 0x00, // nopr; nopr; nopr
0x07, 0x00, 0x07, 0x00, 0x07, 0x00, // nopr; nopr; nopr
};
memcpy(buf, insn, sizeof(insn));
*(ub32 *)(buf + 2) = (sym.get_gotplt_addr(ctx) - sym.get_plt_addr(ctx)) >> 1;
*(ub32 *)(buf + 14) = sym.get_plt_idx(ctx) * sizeof(ElfRel<E>);
}
template <>
void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
static u8 insn[] = {
0xc0, 0x10, 0, 0, 0, 0, // larl %r1, GOT_ENTRY_OFFSET
0xe3, 0x10, 0x10, 0x00, 0x00, 0x04, // lg %r1, (%r1)
0x07, 0xf1, // br %r1
0x07, 0x00, // nopr
};
memcpy(buf, insn, sizeof(insn));
*(ub32 *)(buf + 2) = (sym.get_got_addr(ctx) - sym.get_plt_addr(ctx)) >> 1;
}
template <>
void EhFrameSection<E>::apply_reloc(Context<E> &ctx, const ElfRel<E> &rel,
u64 offset, u64 val) {
u8 *loc = ctx.buf + this->shdr.sh_offset + offset;
switch (rel.r_type) {
case R_NONE:
break;
case R_390_PC32:
*(ub32 *)loc = val - this->shdr.sh_addr - offset;
break;
case R_390_64:
*(ub64 *)loc = val;
break;
default:
Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel;
}
}
template <>
void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
std::span<const ElfRel<E>> rels = get_rels(ctx);
ElfRel<E> *dynrel = nullptr;
if (ctx.reldyn)
dynrel = (ElfRel<E> *)(ctx.buf + ctx.reldyn->shdr.sh_offset +
file.reldyn_offset + this->reldyn_offset);
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE)
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
u8 *loc = base + rel.r_offset;
auto check = [&](i64 val, i64 lo, i64 hi) {
if (val < lo || hi <= val)
Error(ctx) << *this << ": relocation " << rel << " against "
<< sym << " out of range: " << val << " is not in ["
<< lo << ", " << hi << ")";
};
auto check_dbl = [&](i64 val, i64 lo, i64 hi) {
check(val, lo, hi);
// R_390_*DBL relocs should never refer a symbol at an odd address
if (val & 1)
Error(ctx) << *this << ": misaligned symbol " << sym
<< " for relocation " << rel;
};
u64 S = sym.get_addr(ctx);
u64 A = rel.r_addend;
u64 P = get_addr() + rel.r_offset;
u64 G = sym.get_got_idx(ctx) * sizeof(Word<E>);
u64 GOT = ctx.got->shdr.sh_addr;
switch (rel.r_type) {
case R_390_64:
apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel);
break;
case R_390_8:
check(S + A, 0, 1 << 8);
*loc = S + A;
break;
case R_390_12:
check(S + A, 0, 1 << 12);
*(ul16 *)loc |= bits(S + A, 11, 0);
break;
case R_390_16:
check(S + A, 0, 1 << 16);
*(ub16 *)loc = S + A;
break;
case R_390_20:
check(S + A, 0, 1 << 20);
write_mid20(loc, S + A);
break;
case R_390_32:
case R_390_PLT32:
check(S + A, 0, 1LL << 32);
*(ub32 *)loc = S + A;
break;
case R_390_PLT64:
*(ub64 *)loc = S + A;
break;
case R_390_PC12DBL:
case R_390_PLT12DBL:
check_dbl(S + A - P, -(1 << 12), 1 << 12);
*(ul16 *)loc |= bits(S + A - P, 12, 1);
break;
case R_390_PC16:
check(S + A - P, -(1 << 15), 1 << 15);
*(ub16 *)loc = S + A - P;
break;
case R_390_PC32:
check(S + A - P, -(1LL << 31), 1LL << 31);
*(ub32 *)loc = S + A - P;
break;
case R_390_PC64:
*(ub64 *)loc = S + A - P;
break;
case R_390_PC16DBL:
case R_390_PLT16DBL:
check_dbl(S + A - P, -(1 << 16), 1 << 16);
*(ub16 *)loc = (S + A - P) >> 1;
break;
case R_390_PC24DBL:
case R_390_PLT24DBL:
check_dbl(S + A - P, -(1 << 24), 1 << 24);
*(ub32 *)loc |= bits(S + A - P, 24, 1);
break;
case R_390_PC32DBL:
case R_390_PLT32DBL:
check_dbl(S + A - P, -(1LL << 32), 1LL << 32);
*(ub32 *)loc = (S + A - P) >> 1;
break;
case R_390_GOT12:
case R_390_GOTPLT12:
check(G + A, 0, 1 << 12);
*(ul16 *)loc |= bits(G + A, 11, 0);
break;
case R_390_GOT16:
case R_390_GOTPLT16:
check(G + A, 0, 1 << 16);
*(ub16 *)loc = G + A;
break;
case R_390_GOT20:
case R_390_GOTPLT20:
check(G + A, 0, 1 << 20);
write_mid20(loc, G + A);
break;
case R_390_GOT32:
case R_390_GOTPLT32:
check(G + A, 0, 1LL << 32);
*(ub32 *)loc = G + A;
break;
case R_390_GOT64:
case R_390_GOTPLT64:
*(ub64 *)loc = G + A;
break;
case R_390_GOTOFF16:
case R_390_PLTOFF16:
check(S + A - GOT, -(1 << 15), 1 << 15);
*(ub16 *)loc = S + A - GOT;
break;
case R_390_GOTOFF32:
case R_390_PLTOFF32:
check(S + A - GOT, -(1LL << 31), 1LL << 31);
*(ub32 *)loc = S + A - GOT;
break;
case R_390_GOTOFF64:
case R_390_PLTOFF64:
*(ub64 *)loc = S + A - GOT;
break;
case R_390_GOTPC:
*(ub64 *)loc = GOT + A - P;
break;
case R_390_GOTPCDBL:
check_dbl(GOT + A - P, -(1LL << 32), 1LL << 32);
*(ub32 *)loc = (GOT + A - P) >> 1;
break;
case R_390_GOTENT:
check(GOT + G + A - P, -(1LL << 32), 1LL << 32);
*(ub32 *)loc = (GOT + G + A - P) >> 1;
break;
case R_390_TLS_LE32:
*(ub32 *)loc = S + A - ctx.tp_addr;
break;
case R_390_TLS_LE64:
*(ub64 *)loc = S + A - ctx.tp_addr;
break;
case R_390_TLS_GOTIE20:
write_mid20(loc, sym.get_gottp_addr(ctx) + A - GOT);
break;
case R_390_TLS_IEENT:
*(ub32 *)loc = (sym.get_gottp_addr(ctx) + A - P) >> 1;
break;
case R_390_TLS_GD32:
if (sym.has_tlsgd(ctx))
*(ub32 *)loc = sym.get_tlsgd_addr(ctx) + A - GOT;
else if (sym.has_gottp(ctx))
*(ub32 *)loc = sym.get_gottp_addr(ctx) + A - GOT;
else
*(ub32 *)loc = S + A - ctx.tp_addr;
break;
case R_390_TLS_GD64:
if (sym.has_tlsgd(ctx))
*(ub64 *)loc = sym.get_tlsgd_addr(ctx) + A - GOT;
else if (sym.has_gottp(ctx))
*(ub64 *)loc = sym.get_gottp_addr(ctx) + A - GOT;
else
*(ub64 *)loc = S + A - ctx.tp_addr;
break;
case R_390_TLS_GDCALL:
if (sym.has_tlsgd(ctx)) {
// do nothing
} else if (sym.has_gottp(ctx)) {
// lg %r2, 0(%r2, %r12)
static u8 insn[] = { 0xe3, 0x22, 0xc0, 0x00, 0x00, 0x04 };
memcpy(loc, insn, sizeof(insn));
} else {
// nop
static u8 insn[] = { 0xc0, 0x04, 0x00, 0x00, 0x00, 0x00 };
memcpy(loc, insn, sizeof(insn));
}
break;
case R_390_TLS_LDM32:
if (ctx.got->has_tlsld(ctx))
*(ub32 *)loc = ctx.got->get_tlsld_addr(ctx) + A - GOT;
break;
case R_390_TLS_LDM64:
if (ctx.got->has_tlsld(ctx))
*(ub64 *)loc = ctx.got->get_tlsld_addr(ctx) + A - GOT;
break;
case R_390_TLS_LDO32:
if (ctx.got->has_tlsld(ctx))
*(ub32 *)loc = S + A - ctx.dtp_addr;
else
*(ub32 *)loc = S + A - ctx.tp_addr;
break;
case R_390_TLS_LDO64:
if (ctx.got->has_tlsld(ctx))
*(ub64 *)loc = S + A - ctx.dtp_addr;
else
*(ub64 *)loc = S + A - ctx.tp_addr;
break;
case R_390_TLS_LDCALL:
if (!ctx.got->has_tlsld(ctx)) {
// nop
static u8 insn[] = { 0xc0, 0x04, 0x00, 0x00, 0x00, 0x00 };
memcpy(loc, insn, sizeof(insn));
}
break;
default:
unreachable();
}
}
}
template <>
void InputSection<E>::apply_reloc_nonalloc(Context<E> &ctx, u8 *base) {
std::span<const ElfRel<E>> rels = get_rels(ctx);
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
u8 *loc = base + rel.r_offset;
auto check = [&](i64 val, i64 lo, i64 hi) {
if (val < lo || hi <= val)
Error(ctx) << *this << ": relocation " << rel << " against "
<< sym << " out of range: " << val << " is not in ["
<< lo << ", " << hi << ")";
};
SectionFragment<E> *frag;
i64 frag_addend;
std::tie(frag, frag_addend) = get_fragment(ctx, rel);
u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx);
u64 A = frag ? frag_addend : (i64)rel.r_addend;
switch (rel.r_type) {
case R_390_32: {
i64 val = S + A;
check(val, 0, 1LL << 32);
*(ub32 *)loc = val;
break;
}
case R_390_64:
if (std::optional<u64> val = get_tombstone(sym, frag))
*(ub64 *)loc = *val;
else
*(ub64 *)loc = S + A;
break;
case R_390_TLS_LDO64:
if (std::optional<u64> val = get_tombstone(sym, frag))
*(ub64 *)loc = *val;
else
*(ub64 *)loc = S + A - ctx.dtp_addr;
break;
default:
Fatal(ctx) << *this << ": apply_reloc_nonalloc: " << rel;
}
}
}
template <>
void InputSection<E>::scan_relocations(Context<E> &ctx) {
assert(shdr().sh_flags & SHF_ALLOC);
this->reldyn_offset = file.num_dynrel * sizeof(ElfRel<E>);
std::span<const ElfRel<E>> rels = get_rels(ctx);
// Scan relocations
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
if (sym.is_ifunc())
sym.flags |= NEEDS_GOT | NEEDS_PLT;
switch (rel.r_type) {
case R_390_64:
scan_dyn_absrel(ctx, sym, rel);
break;
case R_390_8:
case R_390_12:
case R_390_16:
case R_390_20:
case R_390_32:
scan_absrel(ctx, sym, rel);
break;
case R_390_PC16:
case R_390_PC16DBL:
case R_390_PC32:
case R_390_PC32DBL:
case R_390_PC64:
scan_pcrel(ctx, sym, rel);
break;
case R_390_GOT12:
case R_390_GOT16:
case R_390_GOT20:
case R_390_GOT32:
case R_390_GOT64:
case R_390_GOTOFF16:
case R_390_GOTOFF32:
case R_390_GOTOFF64:
case R_390_GOTPLT12:
case R_390_GOTPLT16:
case R_390_GOTPLT20:
case R_390_GOTPLT32:
case R_390_GOTPLT64:
case R_390_GOTPC:
case R_390_GOTPCDBL:
case R_390_GOTENT:
sym.flags |= NEEDS_GOT;
break;
case R_390_PLT12DBL:
case R_390_PLT16DBL:
case R_390_PLT24DBL:
case R_390_PLT32:
case R_390_PLT32DBL:
case R_390_PLT64:
case R_390_PLTOFF16:
case R_390_PLTOFF32:
case R_390_PLTOFF64:
if (sym.is_imported)
sym.flags |= NEEDS_PLT;
break;
case R_390_TLS_GOTIE20:
case R_390_TLS_IEENT:
sym.flags |= NEEDS_GOTTP;
break;
case R_390_TLS_GD32:
case R_390_TLS_GD64:
// We always want to relax calls to __tls_get_offset() in statically-
// linked executables because __tls_get_offset() in libc.a just calls
// abort().
if (ctx.arg.is_static ||
(ctx.arg.relax && !sym.is_imported && !ctx.arg.shared)) {
// do nothing
} else if (ctx.arg.relax && !sym.is_imported && ctx.arg.shared &&
!ctx.arg.z_dlopen) {
sym.flags |= NEEDS_GOTTP;
} else {
sym.flags |= NEEDS_TLSGD;
}
break;
case R_390_TLS_LDM32:
case R_390_TLS_LDM64: {
bool do_relax = ctx.arg.is_static || (ctx.arg.relax && !ctx.arg.shared);
if (!do_relax)
ctx.needs_tlsld = true;
break;
}
case R_390_TLS_LE32:
case R_390_TLS_LE64:
check_tlsle(ctx, sym, rel);
break;
case R_390_TLS_LDO32:
case R_390_TLS_LDO64:
case R_390_TLS_GDCALL:
case R_390_TLS_LDCALL:
break;
default:
Fatal(ctx) << *this << ": scan_relocations: " << rel;
}
}
}
} // namespace mold::elf

355
third_party/mold/elf/arch-sh4.cc vendored Normal file
View file

@ -0,0 +1,355 @@
// clang-format off
// SH-4 (SuperH 4) is a 32-bit RISC ISA developed by Hitachi in the early
// '90s. Some relatively powerful systems were developed with SH-4.
// A notable example is Sega's Dreamcast game console which debuted in 1998.
// Hitachi later spun off its semiconductor division as an independent
// company, Renesas, and Renesas is still selling SH-4 processors for the
// embedded market. It has never been as popular as ARM is, and its
// popularity continues to decline though.
//
// SH-4's most distinctive feature compared to other RISC ISAs is that its
// instructions are 16 bits in length instead of more common 32 bits for
// better code density. This difference affects various aspects of its
// instruction set as shown below:
//
// - SH-4 has 16 general-purpose registers (GPRs) instead of the most
// commmon 32 GPR configuration to save one bit to specify a register.
//
// - Binary instructions such as ADD normally take three register in
// RISC ISAs (e.g. x ← y ⊕ z where x, y and z are registers), but
// SH-4's instructions take only two registers. The result of an
// operation is written to one of the source registers (e.g. x ← x ⊕ y).
//
// - Usual RISC ISAs have "load high" and "load low" instructions to set
// an immediate to most significant and least significant bits in a
// register to construct a full 32-bit value in a register. This
// technique is hard to use in SH-4, as 16 bit instructions are too
// small to contain large immediates. On SH-4, large immediates are
// loaded from memory using `mov.l` PC-relative load instruction.
//
// - Many RISC ISAs are, despite their name, actually fairly complex.
// They tend to have hundreds if not thousands of different instructions.
// SH-4 doesn't really have that many instructions because its 16-bit
// machine code simply can't encode many different opcodes. As a
// result, the number of relocations the linker has to support is also
// small.
//
// Beside these, SH-4 has a delay branch slot just like contemporary MIPS
// and SPARC. That is, one instruction after a branch instruction will
// always be executed even if the branch is taken. Delay branch slot allows
// a pipelined CPU to start and finish executing an instruction after a
// branch regardless of the branch's condition, simplifying the processor's
// implementation. It's considered a bad premature optimization nowadays,
// though. Modern RISC processors don't have it.
//
// Here are notes about the SH-4 psABI:
//
// - If a source file is compiled with -fPIC, each function starts
// with a piece of code to store the address of .got to %r12.
// We can use the register in our PLT for position-independent output.
//
// - Even though it uses the RELA-type relocations, relocation addends
// are stored not to the r_addend field but to the relocated section
// contents for some reason. Therefore, it's effectively REL.
//
// - It looks like the ecosystem has bit-rotted. Some tests, especially
// one using C++ exceptions, don't pass even with GNU ld.
//
// - GCC/SH4 tends to write dynamically-relocated data into .text, so the
// output from the linker contains lots of text relocations. That's not
// a problem with embedded programming, I guess.
#include "third_party/mold/elf/mold.h"
namespace mold::elf {
using E = SH4;
// Even though SH-4 uses RELA-type relocations, addends are stored to
// relocated places for some reason.
template <>
i64 get_addend(u8 *loc, const ElfRel<E> &rel) {
switch (rel.r_type) {
case R_SH_DIR32:
case R_SH_REL32:
case R_SH_TLS_GD_32:
case R_SH_TLS_LD_32:
case R_SH_TLS_LDO_32:
case R_SH_TLS_IE_32:
case R_SH_TLS_LE_32:
case R_SH_TLS_DTPMOD32:
case R_SH_TLS_DTPOFF32:
case R_SH_TLS_TPOFF32:
case R_SH_GOT32:
case R_SH_PLT32:
case R_SH_GOTOFF:
case R_SH_GOTPC:
case R_SH_GOTPLT32:
return *(ul32 *)loc;
default:
return 0;
}
}
template <>
void write_plt_header(Context<E> &ctx, u8 *buf) {
if (ctx.arg.pic) {
static const u8 insn[] = {
0x02, 0xd2, // mov.l 1f, r2
0xcc, 0x32, // add r12, r2
0x22, 0x50, // mov.l @(8, r2), r0
0x21, 0x52, // mov.l @(4, r2), r2
0x2b, 0x40, // jmp @r0
0x00, 0xe0, // mov #0, r0
0, 0, 0, 0, // 1: .long GOTPLT
};
static_assert(sizeof(insn) == E::plt_hdr_size);
memcpy(buf, insn, sizeof(insn));
*(ul32 *)(buf + 12) = ctx.gotplt->shdr.sh_addr - ctx.got->shdr.sh_addr;
} else {
static const u8 insn[] = {
0x02, 0xd2, // mov.l 1f, r2
0x22, 0x50, // mov.l @(8, r2), r0
0x21, 0x52, // mov.l @(4, r2), r2
0x2b, 0x40, // jmp @r0
0x00, 0xe0, // mov #0, r0
0x09, 0x00, // nop
0, 0, 0, 0, // 1: .long GOTPLT
};
static_assert(sizeof(insn) == E::plt_hdr_size);
memcpy(buf, insn, sizeof(insn));
*(ul32 *)(buf + 12) = ctx.gotplt->shdr.sh_addr;
}
}
template <>
void write_plt_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
if (ctx.arg.pic) {
static const u8 insn[] = {
0x01, 0xd0, // mov.l 1f, r0
0xce, 0x00, // mov.l @(r0, r12), r0
0x2b, 0x40, // jmp @r0
0x01, 0xd1, // mov.l 2f, r1
0, 0, 0, 0, // 1: .long GOTPLT_ENTRY
0, 0, 0, 0, // 2: .long INDEX_IN_RELPLT
};
static_assert(sizeof(insn) == E::plt_size);
memcpy(buf, insn, sizeof(insn));
*(ul32 *)(buf + 8) = sym.get_gotplt_addr(ctx) - ctx.got->shdr.sh_addr;
*(ul32 *)(buf + 12) = sym.get_plt_idx(ctx) * sizeof(ElfRel<E>);
} else {
static const u8 insn[] = {
0x01, 0xd0, // mov.l 1f, r0
0x02, 0x60, // mov.l @r0, r0
0x2b, 0x40, // jmp @r0
0x01, 0xd1, // mov.l 2f, r1
0, 0, 0, 0, // 1: .long GOTPLT_ENTRY
0, 0, 0, 0, // 2: .long INDEX_IN_RELPLT
};
static_assert(sizeof(insn) == E::plt_size);
memcpy(buf, insn, sizeof(insn));
*(ul32 *)(buf + 8) = sym.get_gotplt_addr(ctx);
*(ul32 *)(buf + 12) = sym.get_plt_idx(ctx) * sizeof(ElfRel<E>);
}
}
template <>
void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
if (ctx.arg.pic) {
static const u8 insn[] = {
0x01, 0xd0, // mov.l 1f, r0
0xce, 0x00, // mov.l @(r0, r12), r0
0x2b, 0x40, // jmp @r0
0x09, 0x00, // nop
0, 0, 0, 0, // 1: .long GOT_ENTRY
};
static_assert(sizeof(insn) == E::pltgot_size);
memcpy(buf, insn, sizeof(insn));
*(ul32 *)(buf + 8) = sym.get_got_addr(ctx) - ctx.got->shdr.sh_addr;
} else {
static const u8 insn[] = {
0x01, 0xd0, // mov.l 1f, r0
0x02, 0x60, // mov.l @r0, r0
0x2b, 0x40, // jmp @r0
0x09, 0x00, // nop
0, 0, 0, 0, // 1: .long GOT_ENTRY
};
static_assert(sizeof(insn) == E::pltgot_size);
memcpy(buf, insn, sizeof(insn));
*(ul32 *)(buf + 8) = sym.get_got_addr(ctx);
}
}
template <>
void EhFrameSection<E>::apply_reloc(Context<E> &ctx, const ElfRel<E> &rel,
u64 offset, u64 val) {
u8 *loc = ctx.buf + this->shdr.sh_offset + offset;
switch (rel.r_type) {
case R_NONE:
break;
case R_SH_DIR32:
*(ul32 *)loc = val;
break;
case R_SH_REL32:
*(ul32 *)loc = val - this->shdr.sh_addr - offset;
break;
default:
Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel;
}
}
template <>
void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
std::span<const ElfRel<E>> rels = get_rels(ctx);
ElfRel<E> *dynrel = nullptr;
if (ctx.reldyn)
dynrel = (ElfRel<E> *)(ctx.buf + ctx.reldyn->shdr.sh_offset +
file.reldyn_offset + this->reldyn_offset);
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE)
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
u8 *loc = base + rel.r_offset;
u64 S = sym.get_addr(ctx);
u64 A = get_addend(loc, rel);
u64 P = get_addr() + rel.r_offset;
u64 G = sym.get_got_idx(ctx) * sizeof(Word<E>);
u64 GOT = ctx.got->shdr.sh_addr;
switch (rel.r_type) {
case R_SH_DIR32:
apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel);
break;
case R_SH_REL32:
case R_SH_PLT32:
*(ul32 *)loc = S + A - P;
break;
case R_SH_GOT32:
*(ul32 *)loc = G;
break;
case R_SH_GOTPC:
*(ul32 *)loc = GOT + A - P;
break;
case R_SH_GOTOFF:
*(ul32 *)loc = S + A - GOT;
break;
case R_SH_TLS_GD_32:
*(ul32 *)loc = sym.get_tlsgd_addr(ctx) + A - GOT;
break;
case R_SH_TLS_LD_32:
*(ul32 *)loc = ctx.got->get_tlsld_addr(ctx) + A - GOT;
break;
case R_SH_TLS_LDO_32:
*(ul32 *)loc = S + A - ctx.dtp_addr;
break;
case R_SH_TLS_IE_32:
*(ul32 *)loc = sym.get_gottp_addr(ctx) + A - GOT;
break;
case R_SH_TLS_LE_32:
*(ul32 *)loc = S + A - ctx.tp_addr;
break;
default:
unreachable();
}
}
}
template <>
void InputSection<E>::apply_reloc_nonalloc(Context<E> &ctx, u8 *base) {
std::span<const ElfRel<E>> rels = get_rels(ctx);
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
u8 *loc = base + rel.r_offset;
SectionFragment<E> *frag;
i64 frag_addend;
std::tie(frag, frag_addend) = get_fragment(ctx, rel);
u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx);
u64 A = frag ? frag_addend : get_addend(loc, rel);
switch (rel.r_type) {
case R_SH_DIR32:
if (std::optional<u64> val = get_tombstone(sym, frag))
*(ul32 *)loc = *val;
else
*(ul32 *)loc = S + A;
break;
default:
Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: "
<< rel;
}
}
}
template <>
void InputSection<E>::scan_relocations(Context<E> &ctx) {
assert(shdr().sh_flags & SHF_ALLOC);
this->reldyn_offset = file.num_dynrel * sizeof(ElfRel<E>);
std::span<const ElfRel<E>> rels = get_rels(ctx);
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
if (sym.is_ifunc())
Error(ctx) << sym << ": GNU ifunc symbol is not supported on sh4";
switch (rel.r_type) {
case R_SH_DIR32:
scan_dyn_absrel(ctx, sym, rel);
break;
case R_SH_REL32:
scan_pcrel(ctx, sym, rel);
break;
case R_SH_GOT32:
sym.flags |= NEEDS_GOT;
break;
case R_SH_PLT32:
if (sym.is_imported)
sym.flags |= NEEDS_PLT;
break;
case R_SH_TLS_GD_32:
sym.flags |= NEEDS_TLSGD;
break;
case R_SH_TLS_LD_32:
ctx.needs_tlsld = true;
break;
case R_SH_TLS_IE_32:
sym.flags |= NEEDS_GOTTP;
break;
case R_SH_TLS_LE_32:
check_tlsle(ctx, sym, rel);
break;
case R_SH_GOTPC:
case R_SH_GOTOFF:
case R_SH_TLS_LDO_32:
break;
default:
Fatal(ctx) << *this << ": unknown relocation: " << rel;
}
}
}
} // namespace mold::elf

622
third_party/mold/elf/arch-sparc64.cc vendored Normal file
View file

@ -0,0 +1,622 @@
// clang-format off
// SPARC is a RISC ISA developed by Sun Microsystems.
//
// The byte order of the processor is big-endian. Anything larger than a
// byte is stored in the "reverse" order compared to little-endian
// processors such as x86-64.
//
// All instructions are 4 bytes long and aligned to 4 bytes boundaries.
//
// A notable feature of SPARC is that, unlike other RISC ISAs, it doesn't
// need range extension thunks. It is because the SPARC's CALL instruction
// contains a whopping 30 bits immediate. The processor scales it by 4 to
// extend it to 32 bits (this is doable because all instructions are
// aligned to 4 bytes boundaries, so the least significant two bits are
// always zero). That means CALL's reach is PC ± 2 GiB, elinating the
// need of range extension thunks. It comes with the cost that the CALL
// instruction alone takes 1/4th of the instruction encoding space,
// though.
//
// SPARC has 32 general purpose registers. CALL instruction saves a return
// address to %o7, which is an alias for %r15. Thread pointer is stored to
// %g7 which is %r7.
//
// SPARC does not have PC-relative load/store instructions. To access data
// in the position-independent manner, we usually first set the address of
// .got to, for example, %l7, with the following piece of code
//
// sethi %hi(. - _GLOBAL_OFFSET_TABLE_), %l7
// add %l7, %lo(. - _GLOBAL_OFFSET_TABLE_), %l7
// call __sparc_get_pc_thunk.l7
// nop
//
// where __sparc_get_pc_thunk.l7 is defined as
//
// retl
// add %o7, %l7, %l7
//
// . SETHI and the following ADD materialize a 32 bits offset to .got.
// CALL instruction sets a return address to $o7, and the subsequent ADD
// adds it to the GOT offset to materialize the absolute address of .got.
//
// Note that we have a NOP after CALL and an ADD after RETL because of
// SPARC's delay branch slots. That is, the SPARC processor always
// executes one instruction after a branch even if the branch is taken.
// This may seem like an odd behavior, and indeed it is considered as such
// (that's a premature optimization for the early pipelined SPARC
// processors), but that's been a part of the ISA's spec so that's what it
// is.
//
// Note also that the .got address obtained this way is not shared between
// functions, so functions can use an arbitrary register to hold the .got
// address. That also means each function needs to execute the above piece
// of code to become position-independent.
//
// This scheme is very similar to i386. That may not be a coincidence
// because the i386 ELF psABI is created by Sun Microsystems too.
//
// https://github.com/rui314/psabi/blob/main/sparc.pdf
#include "third_party/mold/elf/mold.h"
namespace mold::elf {
using E = SPARC64;
// SPARC's PLT section is writable despite containing executable code.
// We don't need to write the PLT header entry because the dynamic loader
// will do that for us.
//
// We also don't need a .got.plt section to store the result of lazy PLT
// symbol resolution because the dynamic symbol resolver directly mutates
// instructions in PLT so that they jump to the right places next time.
// That's why each PLT entry contains lots of NOPs; they are a placeholder
// for the runtime to add more instructions.
//
// Self-modifying code is nowadays considered really bad from the security
// point of view, though.
template <>
void write_plt_header(Context<E> &ctx, u8 *buf) {
memset(buf, 0, E::plt_hdr_size);
}
template <>
void write_plt_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
static ub32 insn[] = {
0x0300'0000, // sethi (. - .PLT0), %g1
0x3068'0000, // ba,a %xcc, .PLT1
0x0100'0000, // nop
0x0100'0000, // nop
0x0100'0000, // nop
0x0100'0000, // nop
0x0100'0000, // nop
0x0100'0000, // nop
};
u64 plt0 = ctx.plt->shdr.sh_addr;
u64 plt1 = ctx.plt->shdr.sh_addr + E::plt_size;
u64 entry = sym.get_plt_addr(ctx);
memcpy(buf, insn, sizeof(insn));
*(ub32 *)buf |= bits(entry - plt0, 21, 0);
*(ub32 *)(buf + 4) |= bits(plt1 - entry - 4, 20, 2);
}
template <>
void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
static ub32 entry[] = {
0x8a10'000f, // mov %o7, %g5
0x4000'0002, // call . + 8
0xc25b'e014, // ldx [ %o7 + 20 ], %g1
0xc25b'c001, // ldx [ %o7 + %g1 ], %g1
0x81c0'4000, // jmp %g1
0x9e10'0005, // mov %g5, %o7
0x0000'0000, // .quad $plt_entry - $got_entry
0x0000'0000,
};
memcpy(buf, entry, sizeof(entry));
*(ub64 *)(buf + 24) = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 4;
}
template <>
void EhFrameSection<E>::apply_reloc(Context<E> &ctx, const ElfRel<E> &rel,
u64 offset, u64 val) {
u8 *loc = ctx.buf + this->shdr.sh_offset + offset;
switch (rel.r_type) {
case R_NONE:
break;
case R_SPARC_64:
case R_SPARC_UA64:
*(ub64 *)loc = val;
break;
case R_SPARC_DISP32:
*(ub32 *)loc = val - this->shdr.sh_addr - offset;
break;
default:
Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel;
}
}
template <>
void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
std::span<const ElfRel<E>> rels = get_rels(ctx);
ElfRel<E> *dynrel = nullptr;
if (ctx.reldyn)
dynrel = (ElfRel<E> *)(ctx.buf + ctx.reldyn->shdr.sh_offset +
file.reldyn_offset + this->reldyn_offset);
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE)
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
u8 *loc = base + rel.r_offset;
auto check = [&](i64 val, i64 lo, i64 hi) {
if (val < lo || hi <= val)
Error(ctx) << *this << ": relocation " << rel << " against "
<< sym << " out of range: " << val << " is not in ["
<< lo << ", " << hi << ")";
};
u64 S = sym.get_addr(ctx);
u64 A = rel.r_addend;
u64 P = (get_addr() + rel.r_offset);
u64 G = (sym.get_got_idx(ctx) * sizeof(Word<E>));
u64 GOT = ctx.got->shdr.sh_addr;
switch (rel.r_type) {
case R_SPARC_64:
apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel);
break;
case R_SPARC_5:
check(S + A, 0, 1 << 5);
*(ub32 *)loc |= bits(S + A, 4, 0);
break;
case R_SPARC_6:
check(S + A, 0, 1 << 6);
*(ub32 *)loc |= bits(S + A, 5, 0);
break;
case R_SPARC_7:
check(S + A, 0, 1 << 7);
*(ub32 *)loc |= bits(S + A, 6, 0);
break;
case R_SPARC_8:
check(S + A, 0, 1 << 8);
*(u8 *)loc = S + A;
break;
case R_SPARC_10:
check(S + A, 0, 1 << 10);
*(ub32 *)loc |= bits(S + A, 9, 0);
break;
case R_SPARC_LO10:
case R_SPARC_LOPLT10:
*(ub32 *)loc |= bits(S + A, 9, 0);
break;
case R_SPARC_11:
check(S + A, 0, 1 << 11);
*(ub32 *)loc |= bits(S + A, 10, 0);
break;
case R_SPARC_13:
check(S + A, 0, 1 << 13);
*(ub32 *)loc |= bits(S + A, 12, 0);
break;
case R_SPARC_16:
case R_SPARC_UA16:
check(S + A, 0, 1 << 16);
*(ub16 *)loc = S + A;
break;
case R_SPARC_22:
check(S + A, 0, 1 << 22);
*(ub32 *)loc |= bits(S + A, 21, 0);
break;
case R_SPARC_32:
case R_SPARC_UA32:
case R_SPARC_PLT32:
check(S + A, 0, 1LL << 32);
*(ub32 *)loc = S + A;
break;
case R_SPARC_PLT64:
case R_SPARC_UA64:
case R_SPARC_REGISTER:
*(ub64 *)loc = S + A;
break;
case R_SPARC_DISP8:
check(S + A - P, -(1 << 7), 1 << 7);
*(u8 *)loc = S + A - P;
break;
case R_SPARC_DISP16:
check(S + A - P, -(1 << 15), 1 << 15);
*(ub16 *)loc = S + A - P;
break;
case R_SPARC_DISP32:
case R_SPARC_PCPLT32:
check(S + A - P, -(1LL << 31), 1LL << 31);
*(ub32 *)loc = S + A - P;
break;
case R_SPARC_DISP64:
*(ub64 *)loc = S + A - P;
break;
case R_SPARC_WDISP16: {
i64 val = S + A - P;
check(val, -(1 << 16), 1 << 16);
*(ub16 *)loc |= (bit(val, 16) << 21) | bits(val, 15, 2);
break;
}
case R_SPARC_WDISP19:
check(S + A - P, -(1 << 20), 1 << 20);
*(ub32 *)loc |= bits(S + A - P, 20, 2);
break;
case R_SPARC_WDISP22:
check(S + A - P, -(1 << 23), 1 << 23);
*(ub32 *)loc |= bits(S + A - P, 23, 2);
break;
case R_SPARC_WDISP30:
case R_SPARC_WPLT30:
check(S + A - P, -(1LL << 31), 1LL << 31);
*(ub32 *)loc |= bits(S + A - P, 31, 2);
break;
case R_SPARC_HI22:
case R_SPARC_HIPLT22:
case R_SPARC_LM22:
*(ub32 *)loc |= bits(S + A, 31, 10);
break;
case R_SPARC_GOT10:
*(ub32 *)loc |= bits(G, 9, 0);
break;
case R_SPARC_GOT13:
check(G, 0, 1 << 12);
*(ub32 *)loc |= bits(G, 12, 0);
break;
case R_SPARC_GOT22:
*(ub32 *)loc |= bits(G, 31, 10);
break;
case R_SPARC_GOTDATA_HIX22: {
i64 val = S + A - GOT;
*(ub32 *)loc |= bits(val < 0 ? ~val : val, 31, 10);
break;
}
case R_SPARC_GOTDATA_LOX10: {
i64 val = S + A - GOT;
*(ub32 *)loc |= bits(val, 9, 0) | (val < 0 ? 0b1'1100'0000'0000 : 0);
break;
}
case R_SPARC_GOTDATA_OP_HIX22:
// We always have to relax a GOT load to a load immediate if a
// symbol is local, because R_SPARC_GOTDATA_OP cannot represent
// an addend for a local symbol.
if (sym.is_imported || sym.is_ifunc()) {
*(ub32 *)loc |= bits(G, 31, 10);
} else if (sym.is_absolute()) {
i64 val = S + A;
*(ub32 *)loc |= bits(val < 0 ? ~val : val, 31, 10);
} else {
i64 val = S + A - GOT;
*(ub32 *)loc |= bits(val < 0 ? ~val : val, 31, 10);
}
break;
case R_SPARC_GOTDATA_OP_LOX10: {
if (sym.is_imported || sym.is_ifunc()) {
*(ub32 *)loc |= bits(G, 9, 0);
} else if (sym.is_absolute()) {
i64 val = S + A;
*(ub32 *)loc |= bits(val, 9, 0) | (val < 0 ? 0b1'1100'0000'0000 : 0);
} else {
i64 val = S + A - GOT;
*(ub32 *)loc |= bits(val, 9, 0) | (val < 0 ? 0b1'1100'0000'0000 : 0);
}
break;
}
case R_SPARC_GOTDATA_OP:
if (sym.is_imported || sym.is_ifunc())
break;
if (sym.is_absolute()) {
// ldx [ %g2 + %g1 ], %g1 → nop
*(ub32 *)loc = 0x0100'0000;
} else {
// ldx [ %g2 + %g1 ], %g1 → add %g2, %g1, %g1
*(ub32 *)loc &= 0b00'11111'000000'11111'1'11111111'11111;
*(ub32 *)loc |= 0b10'00000'000000'00000'0'00000000'00000;
}
break;
case R_SPARC_PC10:
case R_SPARC_PCPLT10:
*(ub32 *)loc |= bits(S + A - P, 9, 0);
break;
case R_SPARC_PC22:
case R_SPARC_PCPLT22:
case R_SPARC_PC_LM22:
*(ub32 *)loc |= bits(S + A - P, 31, 10);
break;
case R_SPARC_OLO10:
*(ub32 *)loc |= bits(bits(S + A, 9, 0) + rel.r_type_data, 12, 0);
break;
case R_SPARC_HH22:
*(ub32 *)loc |= bits(S + A, 63, 42);
break;
case R_SPARC_HM10:
*(ub32 *)loc |= bits(S + A, 41, 32);
break;
case R_SPARC_PC_HH22:
*(ub32 *)loc |= bits(S + A - P, 63, 42);
break;
case R_SPARC_PC_HM10:
*(ub32 *)loc |= bits(S + A - P, 41, 32);
break;
case R_SPARC_HIX22:
*(ub32 *)loc |= bits(~(S + A), 31, 10);
break;
case R_SPARC_LOX10:
*(ub32 *)loc |= bits(S + A, 9, 0) | 0b1'1100'0000'0000;
break;
case R_SPARC_H44:
*(ub32 *)loc |= bits(S + A, 43, 22);
break;
case R_SPARC_M44:
*(ub32 *)loc |= bits(S + A, 21, 12);
break;
case R_SPARC_L44:
*(ub32 *)loc |= bits(S + A, 11, 0);
break;
case R_SPARC_TLS_GD_HI22:
*(ub32 *)loc |= bits(sym.get_tlsgd_addr(ctx) + A - GOT, 31, 10);
break;
case R_SPARC_TLS_GD_LO10:
*(ub32 *)loc |= bits(sym.get_tlsgd_addr(ctx) + A - GOT, 9, 0);
break;
case R_SPARC_TLS_GD_CALL:
case R_SPARC_TLS_LDM_CALL: {
u64 addr;
if (ctx.arg.is_static)
addr = ctx.extra.tls_get_addr_sec->shdr.sh_addr;
else
addr = ctx.extra.tls_get_addr_sym->get_addr(ctx);
*(ub32 *)loc |= bits(addr + A - P, 31, 2);
break;
}
case R_SPARC_TLS_LDM_HI22:
*(ub32 *)loc |= bits(ctx.got->get_tlsld_addr(ctx) + A - GOT, 31, 10);
break;
case R_SPARC_TLS_LDM_LO10:
*(ub32 *)loc |= bits(ctx.got->get_tlsld_addr(ctx) + A - GOT, 9, 0);
break;
case R_SPARC_TLS_LDO_HIX22:
*(ub32 *)loc |= bits(S + A - ctx.dtp_addr, 31, 10);
break;
case R_SPARC_TLS_LDO_LOX10:
*(ub32 *)loc |= bits(S + A - ctx.dtp_addr, 9, 0);
break;
case R_SPARC_TLS_IE_HI22:
*(ub32 *)loc |= bits(sym.get_gottp_addr(ctx) + A - GOT, 31, 10);
break;
case R_SPARC_TLS_IE_LO10:
*(ub32 *)loc |= bits(sym.get_gottp_addr(ctx) + A - GOT, 9, 0);
break;
case R_SPARC_TLS_LE_HIX22:
*(ub32 *)loc |= bits(~(S + A - ctx.tp_addr), 31, 10);
break;
case R_SPARC_TLS_LE_LOX10:
*(ub32 *)loc |= bits(S + A - ctx.tp_addr, 9, 0) | 0b1'1100'0000'0000;
break;
case R_SPARC_SIZE32:
*(ub32 *)loc = sym.esym().st_size + A;
break;
case R_SPARC_TLS_GD_ADD:
case R_SPARC_TLS_LDM_ADD:
case R_SPARC_TLS_LDO_ADD:
case R_SPARC_TLS_IE_LD:
case R_SPARC_TLS_IE_LDX:
case R_SPARC_TLS_IE_ADD:
break;
default:
unreachable();
}
}
}
template <>
void InputSection<E>::apply_reloc_nonalloc(Context<E> &ctx, u8 *base) {
std::span<const ElfRel<E>> rels = get_rels(ctx);
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
u8 *loc = base + rel.r_offset;
auto check = [&](i64 val, i64 lo, i64 hi) {
if (val < lo || hi <= val)
Error(ctx) << *this << ": relocation " << rel << " against "
<< sym << " out of range: " << val << " is not in ["
<< lo << ", " << hi << ")";
};
SectionFragment<E> *frag;
i64 frag_addend;
std::tie(frag, frag_addend) = get_fragment(ctx, rel);
u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx);
u64 A = frag ? frag_addend : (i64)rel.r_addend;
switch (rel.r_type) {
case R_SPARC_64:
case R_SPARC_UA64:
if (std::optional<u64> val = get_tombstone(sym, frag))
*(ub64 *)loc = *val;
else
*(ub64 *)loc = S + A;
break;
case R_SPARC_32:
case R_SPARC_UA32: {
i64 val = S + A;
check(val, 0, 1LL << 32);
*(ub32 *)loc = val;
break;
}
case R_SPARC_TLS_DTPOFF32:
*(ub32 *)loc = S + A - ctx.dtp_addr;
break;
case R_SPARC_TLS_DTPOFF64:
*(ub64 *)loc = S + A - ctx.dtp_addr;
break;
default:
Fatal(ctx) << *this << ": apply_reloc_nonalloc: " << rel;
}
}
}
template <>
void InputSection<E>::scan_relocations(Context<E> &ctx) {
assert(shdr().sh_flags & SHF_ALLOC);
this->reldyn_offset = file.num_dynrel * sizeof(ElfRel<E>);
std::span<const ElfRel<E>> rels = get_rels(ctx);
// Scan relocations
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
if (sym.is_ifunc())
sym.flags |= NEEDS_GOT | NEEDS_PLT;
switch (rel.r_type) {
case R_SPARC_64:
scan_dyn_absrel(ctx, sym, rel);
break;
case R_SPARC_8:
case R_SPARC_5:
case R_SPARC_6:
case R_SPARC_7:
case R_SPARC_10:
case R_SPARC_11:
case R_SPARC_13:
case R_SPARC_16:
case R_SPARC_22:
case R_SPARC_32:
case R_SPARC_REGISTER:
case R_SPARC_UA16:
case R_SPARC_UA32:
case R_SPARC_UA64:
case R_SPARC_PC_HM10:
case R_SPARC_OLO10:
case R_SPARC_LOX10:
case R_SPARC_HM10:
case R_SPARC_M44:
case R_SPARC_HIX22:
case R_SPARC_LO10:
case R_SPARC_L44:
case R_SPARC_LM22:
case R_SPARC_HI22:
case R_SPARC_H44:
case R_SPARC_HH22:
scan_absrel(ctx, sym, rel);
break;
case R_SPARC_PLT32:
case R_SPARC_WPLT30:
case R_SPARC_WDISP30:
case R_SPARC_HIPLT22:
case R_SPARC_LOPLT10:
case R_SPARC_PCPLT32:
case R_SPARC_PCPLT22:
case R_SPARC_PCPLT10:
case R_SPARC_PLT64:
if (sym.is_imported)
sym.flags |= NEEDS_PLT;
break;
case R_SPARC_GOT13:
case R_SPARC_GOT10:
case R_SPARC_GOT22:
case R_SPARC_GOTDATA_HIX22:
sym.flags |= NEEDS_GOT;
break;
case R_SPARC_GOTDATA_OP_HIX22:
if (sym.is_imported)
sym.flags |= NEEDS_GOT;
break;
case R_SPARC_DISP16:
case R_SPARC_DISP32:
case R_SPARC_DISP64:
case R_SPARC_DISP8:
case R_SPARC_PC10:
case R_SPARC_PC22:
case R_SPARC_PC_LM22:
case R_SPARC_WDISP16:
case R_SPARC_WDISP19:
case R_SPARC_WDISP22:
case R_SPARC_PC_HH22:
scan_pcrel(ctx, sym, rel);
break;
case R_SPARC_TLS_GD_HI22:
sym.flags |= NEEDS_TLSGD;
break;
case R_SPARC_TLS_LDM_HI22:
ctx.needs_tlsld = true;
break;
case R_SPARC_TLS_IE_HI22:
sym.flags |= NEEDS_GOTTP;
break;
case R_SPARC_TLS_GD_CALL:
case R_SPARC_TLS_LDM_CALL:
if (!ctx.arg.is_static && ctx.extra.tls_get_addr_sym->is_imported)
ctx.extra.tls_get_addr_sym->flags |= NEEDS_PLT;
break;
case R_SPARC_TLS_LE_HIX22:
case R_SPARC_TLS_LE_LOX10:
check_tlsle(ctx, sym, rel);
break;
case R_SPARC_GOTDATA_OP_LOX10:
case R_SPARC_GOTDATA_OP:
case R_SPARC_GOTDATA_LOX10:
case R_SPARC_TLS_GD_LO10:
case R_SPARC_TLS_GD_ADD:
case R_SPARC_TLS_LDM_LO10:
case R_SPARC_TLS_LDM_ADD:
case R_SPARC_TLS_LDO_HIX22:
case R_SPARC_TLS_LDO_LOX10:
case R_SPARC_TLS_LDO_ADD:
case R_SPARC_TLS_IE_ADD:
case R_SPARC_TLS_IE_LD:
case R_SPARC_TLS_IE_LDX:
case R_SPARC_TLS_IE_LO10:
case R_SPARC_SIZE32:
break;
default:
Fatal(ctx) << *this << ": scan_relocations: " << rel;
}
}
}
// __tls_get_addr is not defined by libc.a, so we can't use that function
// in statically-linked executables. This section provides a replacement.
void SparcTlsGetAddrSection::copy_buf(Context<E> &ctx) {
ub32 *buf = (ub32 *)(ctx.buf + this->shdr.sh_offset);
static const ub32 insn[] = {
0x0300'0000, // sethi %hi(TP_SIZE), %g1
0x8210'6000, // or %g1, %lo(TP_SIZE), %g1
0x8221'c001, // sub %g7, %g1, %g1
0xd05a'2008, // ldx [ %o0 + 8 ], %o0
0x81c3'e008, // retl
0x9000'4008, // add %g1, %o0, %o0
};
assert(this->shdr.sh_size == sizeof(insn));
memcpy(buf, insn, sizeof(insn));
buf[0] |= bits(ctx.tp_addr - ctx.tls_begin, 31, 10);
buf[1] |= bits(ctx.tp_addr - ctx.tls_begin, 9, 0);
}
} // namespace mold::elf

773
third_party/mold/elf/arch-x86-64.cc vendored Normal file
View file

@ -0,0 +1,773 @@
// clang-format off
// Supporting x86-64 is straightforward. Unlike its predecessor, i386,
// x86-64 supports PC-relative addressing for position-independent code.
// Being CISC, its instructions are variable in size. Branch instructions
// take 4 bytes offsets, so we don't need range extension thunks.
//
// The psABI specifies %r11 as neither caller- nor callee-saved. It's
// intentionally left out so that we can use it as a scratch register in
// PLT.
//
// Thread Pointer (TP) is stored not to a general-purpose register but to
// FS segment register. Segment register is a 64-bits register which can
// be used as a base address for memory access. Each thread has a unique
// FS value, and they access their thread-local variables relative to FS
// as %fs:offset_from_tp.
//
// The value of a segment register itself is not generally readable from
// the user space. As a workaround, libc initializes %fs:0 (the first word
// referenced by FS) to the value of %fs itself. So we can obtain TP just
// by `mov %fs:0, %rax` if we need it.
//
// For historical reasons, TP points past the end of the TLS block on x86.
// This is contrary to other psABIs which usually use the beginning of the
// TLS block as TP (with some addend). As a result, offsets from TP to
// thread-local variables (TLVs) in the main executable are all negative.
//
// https://github.com/rui314/psabi/blob/main/x86-64.pdf
// https://github.com/rui314/psabi/blob/main/i386.pdf
#include "third_party/mold/elf/mold.h"
namespace mold::elf {
using E = X86_64;
// This is a security-enhanced version of the regular PLT. The PLT
// header and each PLT entry starts with endbr64 for the Intel's
// control-flow enforcement security mechanism.
//
// Note that our IBT-enabled PLT instruction sequence is different
// from the one used in GNU ld. GNU's IBTPLT implementation uses two
// separate sections (.plt and .plt.sec) in which one PLT entry takes
// 32 bytes in total. Our IBTPLT consists of just .plt and each entry
// is 16 bytes long.
//
// Our PLT entry clobbers %r11, but that's fine because the resolver
// function (_dl_runtime_resolve) clobbers %r11 anyway.
template <>
void write_plt_header(Context<E> &ctx, u8 *buf) {
static const u8 insn[] = {
0xf3, 0x0f, 0x1e, 0xfa, // endbr64
0x41, 0x53, // push %r11
0xff, 0x35, 0, 0, 0, 0, // push GOTPLT+8(%rip)
0xff, 0x25, 0, 0, 0, 0, // jmp *GOTPLT+16(%rip)
0xcc, 0xcc, 0xcc, 0xcc, // (padding)
0xcc, 0xcc, 0xcc, 0xcc, // (padding)
0xcc, 0xcc, 0xcc, 0xcc, // (padding)
0xcc, 0xcc, // (padding)
};
memcpy(buf, insn, sizeof(insn));
*(ul32 *)(buf + 8) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr - 4;
*(ul32 *)(buf + 14) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr - 2;
}
template <>
void write_plt_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
static const u8 insn[] = {
0xf3, 0x0f, 0x1e, 0xfa, // endbr64
0x41, 0xbb, 0, 0, 0, 0, // mov $index_in_relplt, %r11d
0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOTPLT
};
memcpy(buf, insn, sizeof(insn));
*(ul32 *)(buf + 6) = sym.get_plt_idx(ctx);
*(ul32 *)(buf + 12) = sym.get_gotplt_addr(ctx) - sym.get_plt_addr(ctx) - 16;
}
template <>
void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
static const u8 insn[] = {
0xf3, 0x0f, 0x1e, 0xfa, // endbr64
0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
0xcc, 0xcc, 0xcc, 0xcc, // (padding)
0xcc, 0xcc, // (padding)
};
memcpy(buf, insn, sizeof(insn));
*(ul32 *)(buf + 6) = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 10;
}
template <>
void EhFrameSection<E>::apply_reloc(Context<E> &ctx, const ElfRel<E> &rel,
u64 offset, u64 val) {
u8 *loc = ctx.buf + this->shdr.sh_offset + offset;
switch (rel.r_type) {
case R_NONE:
break;
case R_X86_64_32:
*(ul32 *)loc = val;
break;
case R_X86_64_64:
*(ul64 *)loc = val;
break;
case R_X86_64_PC32:
*(ul32 *)loc = val - this->shdr.sh_addr - offset;
break;
case R_X86_64_PC64:
*(ul64 *)loc = val - this->shdr.sh_addr - offset;
break;
default:
Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel;
}
}
static u32 relax_gotpcrelx(u8 *loc) {
switch ((loc[0] << 8) | loc[1]) {
case 0xff15: return 0x90e8; // call *0(%rip) -> call 0
case 0xff25: return 0x90e9; // jmp *0(%rip) -> jmp 0
}
return 0;
}
static u32 relax_rex_gotpcrelx(u8 *loc) {
switch ((loc[0] << 16) | (loc[1] << 8) | loc[2]) {
case 0x488b05: return 0x488d05; // mov 0(%rip), %rax -> lea 0(%rip), %rax
case 0x488b0d: return 0x488d0d; // mov 0(%rip), %rcx -> lea 0(%rip), %rcx
case 0x488b15: return 0x488d15; // mov 0(%rip), %rdx -> lea 0(%rip), %rdx
case 0x488b1d: return 0x488d1d; // mov 0(%rip), %rbx -> lea 0(%rip), %rbx
case 0x488b25: return 0x488d25; // mov 0(%rip), %rsp -> lea 0(%rip), %rsp
case 0x488b2d: return 0x488d2d; // mov 0(%rip), %rbp -> lea 0(%rip), %rbp
case 0x488b35: return 0x488d35; // mov 0(%rip), %rsi -> lea 0(%rip), %rsi
case 0x488b3d: return 0x488d3d; // mov 0(%rip), %rdi -> lea 0(%rip), %rdi
case 0x4c8b05: return 0x4c8d05; // mov 0(%rip), %r8 -> lea 0(%rip), %r8
case 0x4c8b0d: return 0x4c8d0d; // mov 0(%rip), %r9 -> lea 0(%rip), %r9
case 0x4c8b15: return 0x4c8d15; // mov 0(%rip), %r10 -> lea 0(%rip), %r10
case 0x4c8b1d: return 0x4c8d1d; // mov 0(%rip), %r11 -> lea 0(%rip), %r11
case 0x4c8b25: return 0x4c8d25; // mov 0(%rip), %r12 -> lea 0(%rip), %r12
case 0x4c8b2d: return 0x4c8d2d; // mov 0(%rip), %r13 -> lea 0(%rip), %r13
case 0x4c8b35: return 0x4c8d35; // mov 0(%rip), %r14 -> lea 0(%rip), %r14
case 0x4c8b3d: return 0x4c8d3d; // mov 0(%rip), %r15 -> lea 0(%rip), %r15
}
return 0;
}
static u32 relax_gottpoff(u8 *loc) {
switch ((loc[0] << 16) | (loc[1] << 8) | loc[2]) {
case 0x488b05: return 0x48c7c0; // mov 0(%rip), %rax -> mov $0, %rax
case 0x488b0d: return 0x48c7c1; // mov 0(%rip), %rcx -> mov $0, %rcx
case 0x488b15: return 0x48c7c2; // mov 0(%rip), %rdx -> mov $0, %rdx
case 0x488b1d: return 0x48c7c3; // mov 0(%rip), %rbx -> mov $0, %rbx
case 0x488b25: return 0x48c7c4; // mov 0(%rip), %rsp -> mov $0, %rsp
case 0x488b2d: return 0x48c7c5; // mov 0(%rip), %rbp -> mov $0, %rbp
case 0x488b35: return 0x48c7c6; // mov 0(%rip), %rsi -> mov $0, %rsi
case 0x488b3d: return 0x48c7c7; // mov 0(%rip), %rdi -> mov $0, %rdi
case 0x4c8b05: return 0x49c7c0; // mov 0(%rip), %r8 -> mov $0, %r8
case 0x4c8b0d: return 0x49c7c1; // mov 0(%rip), %r9 -> mov $0, %r9
case 0x4c8b15: return 0x49c7c2; // mov 0(%rip), %r10 -> mov $0, %r10
case 0x4c8b1d: return 0x49c7c3; // mov 0(%rip), %r11 -> mov $0, %r11
case 0x4c8b25: return 0x49c7c4; // mov 0(%rip), %r12 -> mov $0, %r12
case 0x4c8b2d: return 0x49c7c5; // mov 0(%rip), %r13 -> mov $0, %r13
case 0x4c8b35: return 0x49c7c6; // mov 0(%rip), %r14 -> mov $0, %r14
case 0x4c8b3d: return 0x49c7c7; // mov 0(%rip), %r15 -> mov $0, %r15
}
return 0;
}
static u32 relax_gotpc32_tlsdesc(u8 *loc) {
switch ((loc[0] << 16) | (loc[1] << 8) | loc[2]) {
case 0x488d05: return 0x48c7c0; // lea 0(%rip), %rax -> mov $0, %rax
case 0x488d0d: return 0x48c7c1; // lea 0(%rip), %rcx -> mov $0, %rcx
case 0x488d15: return 0x48c7c2; // lea 0(%rip), %rdx -> mov $0, %rdx
case 0x488d1d: return 0x48c7c3; // lea 0(%rip), %rbx -> mov $0, %rbx
case 0x488d25: return 0x48c7c4; // lea 0(%rip), %rsp -> mov $0, %rsp
case 0x488d2d: return 0x48c7c5; // lea 0(%rip), %rbp -> mov $0, %rbp
case 0x488d35: return 0x48c7c6; // lea 0(%rip), %rsi -> mov $0, %rsi
case 0x488d3d: return 0x48c7c7; // lea 0(%rip), %rdi -> mov $0, %rdi
case 0x4c8d05: return 0x49c7c0; // lea 0(%rip), %r8 -> mov $0, %r8
case 0x4c8d0d: return 0x49c7c1; // lea 0(%rip), %r9 -> mov $0, %r9
case 0x4c8d15: return 0x49c7c2; // lea 0(%rip), %r10 -> mov $0, %r10
case 0x4c8d1d: return 0x49c7c3; // lea 0(%rip), %r11 -> mov $0, %r11
case 0x4c8d25: return 0x49c7c4; // lea 0(%rip), %r12 -> mov $0, %r12
case 0x4c8d2d: return 0x49c7c5; // lea 0(%rip), %r13 -> mov $0, %r13
case 0x4c8d35: return 0x49c7c6; // lea 0(%rip), %r14 -> mov $0, %r14
case 0x4c8d3d: return 0x49c7c7; // lea 0(%rip), %r15 -> mov $0, %r15
}
return 0;
}
// Rewrite a function call to __tls_get_addr to a cheaper instruction
// sequence. We can do this when we know the thread-local variable's TP-
// relative address at link-time.
static void relax_gd_to_le(u8 *loc, ElfRel<E> rel, u64 val) {
switch (rel.r_type) {
case R_X86_64_PLT32:
case R_X86_64_PC32:
case R_X86_64_GOTPCREL:
case R_X86_64_GOTPCRELX: {
// The original instructions are the following:
//
// 66 48 8d 3d 00 00 00 00 lea foo@tlsgd(%rip), %rdi
// 66 66 48 e8 00 00 00 00 call __tls_get_addr
//
// or
//
// 66 48 8d 3d 00 00 00 00 lea foo@tlsgd(%rip), %rdi
// 66 48 ff 15 00 00 00 00 call *__tls_get_addr@GOT(%rip)
static const u8 insn[] = {
0x64, 0x48, 0x8b, 0x04, 0x25, 0, 0, 0, 0, // mov %fs:0, %rax
0x48, 0x81, 0xc0, 0, 0, 0, 0, // add $tp_offset, %rax
};
memcpy(loc - 4, insn, sizeof(insn));
*(ul32 *)(loc + 8) = val;
break;
}
case R_X86_64_PLTOFF64: {
// The original instructions are the following:
//
// 48 8d 3d 00 00 00 00 lea foo@tlsgd(%rip), %rdi
// 48 b8 00 00 00 00 00 00 00 00 movabs __tls_get_addr, %rax
// 48 01 d8 add %rbx, %rax
// ff d0 call *%rax
static const u8 insn[] = {
0x64, 0x48, 0x8b, 0x04, 0x25, 0, 0, 0, 0, // mov %fs:0, %rax
0x48, 0x81, 0xc0, 0, 0, 0, 0, // add $tp_offset, %rax
0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00, // nop
};
memcpy(loc - 3, insn, sizeof(insn));
*(ul32 *)(loc + 9) = val;
break;
}
default:
unreachable();
}
}
static void relax_gd_to_ie(u8 *loc, ElfRel<E> rel, u64 val) {
switch (rel.r_type) {
case R_X86_64_PLT32:
case R_X86_64_PC32:
case R_X86_64_GOTPCREL:
case R_X86_64_GOTPCRELX: {
static const u8 insn[] = {
0x64, 0x48, 0x8b, 0x04, 0x25, 0, 0, 0, 0, // mov %fs:0, %rax
0x48, 0x03, 0x05, 0, 0, 0, 0, // add foo@gottpoff(%rip), %rax
};
memcpy(loc - 4, insn, sizeof(insn));
*(ul32 *)(loc + 8) = val - 12;
break;
}
case R_X86_64_PLTOFF64: {
static const u8 insn[] = {
0x64, 0x48, 0x8b, 0x04, 0x25, 0, 0, 0, 0, // mov %fs:0, %rax
0x48, 0x03, 0x05, 0, 0, 0, 0, // add foo@gottpoff(%rip), %rax
0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00, // nop
};
memcpy(loc - 3, insn, sizeof(insn));
*(ul32 *)(loc + 9) = val - 13;
break;
}
default:
unreachable();
}
}
// Rewrite a function call to __tls_get_addr to a cheaper instruction
// sequence. The difference from relax_gd_to_le is that we are
// materializing a Dynamic Thread Pointer for the current ELF module
// instead of an address for a particular thread-local variable.
static void relax_ld_to_le(u8 *loc, ElfRel<E> rel, u64 val) {
switch (rel.r_type) {
case R_X86_64_PLT32:
case R_X86_64_PC32: {
// The original instructions are the following:
//
// 48 8d 3d 00 00 00 00 lea foo@tlsld(%rip), %rdi
// e8 00 00 00 00 call __tls_get_addr
static const u8 insn[] = {
0x31, 0xc0, // xor %eax, %eax
0x64, 0x48, 0x8b, 0x00, // mov %fs:(%rax), %rax
0x48, 0x2d, 0, 0, 0, 0, // sub $tls_size, %rax
};
memcpy(loc - 3, insn, sizeof(insn));
*(ul32 *)(loc + 5) = val;
break;
}
case R_X86_64_GOTPCREL:
case R_X86_64_GOTPCRELX: {
// The original instructions are the following:
//
// 48 8d 3d 00 00 00 00 lea foo@tlsld(%rip), %rdi
// ff 15 00 00 00 00 call *__tls_get_addr@GOT(%rip)
static const u8 insn[] = {
0x31, 0xc0, // xor %eax, %eax
0x64, 0x48, 0x8b, 0x00, // mov %fs:(%rax), %rax
0x48, 0x2d, 0, 0, 0, 0, // sub $tls_size, %rax
0x90, // nop
};
memcpy(loc - 3, insn, sizeof(insn));
*(ul32 *)(loc + 5) = val;
break;
}
case R_X86_64_PLTOFF64: {
// The original instructions are the following:
//
// 48 8d 3d 00 00 00 00 lea foo@tlsld(%rip), %rdi
// 48 b8 00 00 00 00 00 00 00 00 movabs __tls_get_addr@GOTOFF, %rax
// 48 01 d8 add %rbx, %rax
// ff d0 call *%rax
static const u8 insn[] = {
0x31, 0xc0, // xor %eax, %eax
0x64, 0x48, 0x8b, 0x00, // mov %fs:(%rax), %rax
0x48, 0x2d, 0, 0, 0, 0, // sub $tls_size, %rax
0x0f, 0x1f, 0x44, 0x00, 0x00, // nop
0x0f, 0x1f, 0x44, 0x00, 0x00, // nop
};
memcpy(loc - 3, insn, sizeof(insn));
*(ul32 *)(loc + 5) = val;
break;
}
default:
unreachable();
}
}
// Apply relocations to SHF_ALLOC sections (i.e. sections that are
// mapped to memory at runtime) based on the result of
// scan_relocations().
template <>
void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
std::span<const ElfRel<E>> rels = get_rels(ctx);
ElfRel<E> *dynrel = nullptr;
if (ctx.reldyn)
dynrel = (ElfRel<E> *)(ctx.buf + ctx.reldyn->shdr.sh_offset +
file.reldyn_offset + this->reldyn_offset);
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE)
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
u8 *loc = base + rel.r_offset;
auto check = [&](i64 val, i64 lo, i64 hi) {
if (val < lo || hi <= val)
Error(ctx) << *this << ": relocation " << rel << " against "
<< sym << " out of range: " << val << " is not in ["
<< lo << ", " << hi << ")";
};
auto write32 = [&](u64 val) {
check(val, 0, 1LL << 32);
*(ul32 *)loc = val;
};
auto write32s = [&](u64 val) {
check(val, -(1LL << 31), 1LL << 31);
*(ul32 *)loc = val;
};
u64 S = sym.get_addr(ctx);
u64 A = rel.r_addend;
u64 P = get_addr() + rel.r_offset;
u64 G = sym.get_got_addr(ctx) - ctx.gotplt->shdr.sh_addr;
u64 GOTPLT = ctx.gotplt->shdr.sh_addr;
switch (rel.r_type) {
case R_X86_64_8:
check(S + A, 0, 1 << 8);
*loc = S + A;
break;
case R_X86_64_16:
check(S + A, 0, 1 << 16);
*(ul16 *)loc = S + A;
break;
case R_X86_64_32:
write32(S + A);
break;
case R_X86_64_32S:
write32s(S + A);
break;
case R_X86_64_64:
apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel);
break;
case R_X86_64_PC8:
check(S + A - P, -(1 << 7), 1 << 7);
*loc = S + A - P;
break;
case R_X86_64_PC16:
check(S + A - P, -(1 << 15), 1 << 15);
*(ul16 *)loc = S + A - P;
break;
case R_X86_64_PC32:
case R_X86_64_PLT32:
write32s(S + A - P);
break;
case R_X86_64_PC64:
*(ul64 *)loc = S + A - P;
break;
case R_X86_64_GOT32:
write32s(G + A);
break;
case R_X86_64_GOT64:
*(ul64 *)loc = G + A;
break;
case R_X86_64_GOTOFF64:
case R_X86_64_PLTOFF64:
*(ul64 *)loc = S + A - GOTPLT;
break;
case R_X86_64_GOTPC32:
write32s(GOTPLT + A - P);
break;
case R_X86_64_GOTPC64:
*(ul64 *)loc = GOTPLT + A - P;
break;
case R_X86_64_GOTPCREL:
write32s(G + GOTPLT + A - P);
break;
case R_X86_64_GOTPCREL64:
*(ul64 *)loc = G + GOTPLT + A - P;
break;
case R_X86_64_GOTPCRELX:
// We always want to relax GOTPCRELX relocs even if --no-relax
// was given because some static PIE runtime code depends on these
// relaxations.
if (!sym.is_imported && !sym.is_ifunc() && sym.is_relative()) {
u32 insn = relax_gotpcrelx(loc - 2);
i64 val = S + A - P;
if (insn && (i32)val == val) {
loc[-2] = insn >> 8;
loc[-1] = insn;
*(ul32 *)loc = val;
break;
}
}
write32s(G + GOTPLT + A - P);
break;
case R_X86_64_REX_GOTPCRELX:
if (!sym.is_imported && !sym.is_ifunc() && sym.is_relative()) {
u32 insn = relax_rex_gotpcrelx(loc - 3);
i64 val = S + A - P;
if (insn && (i32)val == val) {
loc[-3] = insn >> 16;
loc[-2] = insn >> 8;
loc[-1] = insn;
*(ul32 *)loc = val;
break;
}
}
write32s(G + GOTPLT + A - P);
break;
case R_X86_64_TLSGD:
if (sym.has_tlsgd(ctx)) {
write32s(sym.get_tlsgd_addr(ctx) + A - P);
} else if (sym.has_gottp(ctx)) {
relax_gd_to_ie(loc, rels[i + 1], sym.get_gottp_addr(ctx) - P);
i++;
} else {
relax_gd_to_le(loc, rels[i + 1], S - ctx.tp_addr);
i++;
}
break;
case R_X86_64_TLSLD:
if (ctx.got->has_tlsld(ctx)) {
write32s(ctx.got->get_tlsld_addr(ctx) + A - P);
} else {
relax_ld_to_le(loc, rels[i + 1], ctx.tp_addr - ctx.tls_begin);
i++;
}
break;
case R_X86_64_DTPOFF32:
write32s(S + A - ctx.dtp_addr);
break;
case R_X86_64_DTPOFF64:
*(ul64 *)loc = S + A - ctx.dtp_addr;
break;
case R_X86_64_TPOFF32:
write32s(S + A - ctx.tp_addr);
break;
case R_X86_64_TPOFF64:
*(ul64 *)loc = S + A - ctx.tp_addr;
break;
case R_X86_64_GOTTPOFF:
if (sym.has_gottp(ctx)) {
write32s(sym.get_gottp_addr(ctx) + A - P);
} else {
u32 insn = relax_gottpoff(loc - 3);
loc[-3] = insn >> 16;
loc[-2] = insn >> 8;
loc[-1] = insn;
write32s(S - ctx.tp_addr);
assert(A == -4);
}
break;
case R_X86_64_GOTPC32_TLSDESC:
if (sym.has_tlsdesc(ctx)) {
write32s(sym.get_tlsdesc_addr(ctx) + A - P);
} else {
u32 insn = relax_gotpc32_tlsdesc(loc - 3);
loc[-3] = insn >> 16;
loc[-2] = insn >> 8;
loc[-1] = insn;
write32s(S - ctx.tp_addr);
assert(A == -4);
}
break;
case R_X86_64_SIZE32:
write32(sym.esym().st_size + A);
break;
case R_X86_64_SIZE64:
*(ul64 *)loc = sym.esym().st_size + A;
break;
case R_X86_64_TLSDESC_CALL:
if (!sym.has_tlsdesc(ctx)) {
// call *(%rax) -> nop
loc[0] = 0x66;
loc[1] = 0x90;
}
break;
default:
unreachable();
}
}
}
// This function is responsible for applying relocations against
// non-SHF_ALLOC sections (i.e. sections that are not mapped to memory
// at runtime).
//
// Relocations against non-SHF_ALLOC sections are much easier to
// handle than that against SHF_ALLOC sections. It is because, since
// they are not mapped to memory, they don't contain any variable or
// function and never need PLT or GOT. Non-SHF_ALLOC sections are
// mostly debug info sections.
//
// Relocations against non-SHF_ALLOC sections are not scanned by
// scan_relocations.
template <>
void InputSection<E>::apply_reloc_nonalloc(Context<E> &ctx, u8 *base) {
std::span<const ElfRel<E>> rels = get_rels(ctx);
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
u8 *loc = base + rel.r_offset;
auto check = [&](i64 val, i64 lo, i64 hi) {
if (val < lo || hi <= val)
Error(ctx) << *this << ": relocation " << rel << " against "
<< sym << " out of range: " << val << " is not in ["
<< lo << ", " << hi << ")";
};
auto write32 = [&](u64 val) {
check(val, 0, 1LL << 32);
*(ul32 *)loc = val;
};
auto write32s = [&](u64 val) {
check(val, -(1LL << 31), 1LL << 31);
*(ul32 *)loc = val;
};
SectionFragment<E> *frag;
i64 frag_addend;
std::tie(frag, frag_addend) = get_fragment(ctx, rel);
u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx);
u64 A = frag ? frag_addend : (i64)rel.r_addend;
switch (rel.r_type) {
case R_X86_64_8:
check(S + A, 0, 1 << 8);
*loc = S + A;
break;
case R_X86_64_16:
check(S + A, 0, 1 << 16);
*(ul16 *)loc = S + A;
break;
case R_X86_64_32:
write32(S + A);
break;
case R_X86_64_32S:
write32s(S + A);
break;
case R_X86_64_64:
if (std::optional<u64> val = get_tombstone(sym, frag))
*(ul64 *)loc = *val;
else
*(ul64 *)loc = S + A;
break;
case R_X86_64_DTPOFF32:
if (std::optional<u64> val = get_tombstone(sym, frag))
*(ul32 *)loc = *val;
else
write32s(S + A - ctx.dtp_addr);
break;
case R_X86_64_DTPOFF64:
if (std::optional<u64> val = get_tombstone(sym, frag))
*(ul64 *)loc = *val;
else
*(ul64 *)loc = S + A - ctx.dtp_addr;
break;
case R_X86_64_GOTOFF64:
*(ul64 *)loc = S + A - ctx.gotplt->shdr.sh_addr;
break;
case R_X86_64_GOTPC64:
// PC-relative relocation doesn't make sense for non-memory-allocated
// section, but GCC 6.3.0 seems to create this reloc for
// _GLOBAL_OFFSET_TABLE_.
*(ul64 *)loc = ctx.gotplt->shdr.sh_addr + A;
break;
case R_X86_64_SIZE32:
write32(sym.esym().st_size + A);
break;
case R_X86_64_SIZE64:
*(ul64 *)loc = sym.esym().st_size + A;
break;
default:
Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: "
<< rel;
break;
}
}
}
// Linker has to create data structures in an output file to apply
// some type of relocations. For example, if a relocation refers a GOT
// or a PLT entry of a symbol, linker has to create an entry in .got
// or in .plt for that symbol. In order to fix the file layout, we
// need to scan relocations.
template <>
void InputSection<E>::scan_relocations(Context<E> &ctx) {
assert(shdr().sh_flags & SHF_ALLOC);
this->reldyn_offset = file.num_dynrel * sizeof(ElfRel<E>);
std::span<const ElfRel<E>> rels = get_rels(ctx);
// Scan relocations
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (rel.r_type == R_NONE || record_undef_error(ctx, rel))
continue;
Symbol<E> &sym = *file.symbols[rel.r_sym];
u8 *loc = (u8 *)(contents.data() + rel.r_offset);
if (sym.is_ifunc())
sym.flags |= NEEDS_GOT | NEEDS_PLT;
switch (rel.r_type) {
case R_X86_64_8:
case R_X86_64_16:
case R_X86_64_32:
case R_X86_64_32S:
scan_absrel(ctx, sym, rel);
break;
case R_X86_64_64:
scan_dyn_absrel(ctx, sym, rel);
break;
case R_X86_64_PC8:
case R_X86_64_PC16:
case R_X86_64_PC32:
case R_X86_64_PC64:
scan_pcrel(ctx, sym, rel);
break;
case R_X86_64_GOT32:
case R_X86_64_GOT64:
case R_X86_64_GOTPC32:
case R_X86_64_GOTPC64:
case R_X86_64_GOTPCREL:
case R_X86_64_GOTPCREL64:
case R_X86_64_GOTPCRELX:
case R_X86_64_REX_GOTPCRELX:
sym.flags |= NEEDS_GOT;
break;
case R_X86_64_PLT32:
case R_X86_64_PLTOFF64:
if (sym.is_imported)
sym.flags |= NEEDS_PLT;
break;
case R_X86_64_TLSGD:
if (rel.r_addend != -4)
Fatal(ctx) << *this << ": bad r_addend for R_X86_64_TLSGD";
if (i + 1 == rels.size())
Fatal(ctx) << *this << ": TLSGD reloc must be followed by PLT or GOTPCREL";
if (u32 ty = rels[i + 1].r_type;
ty != R_X86_64_PLT32 && ty != R_X86_64_PC32 &&
ty != R_X86_64_PLTOFF64 && ty != R_X86_64_GOTPCREL &&
ty != R_X86_64_GOTPCRELX)
Fatal(ctx) << *this << ": TLSGD reloc must be followed by PLT or GOTPCREL";
if (ctx.arg.is_static ||
(ctx.arg.relax && !sym.is_imported && !ctx.arg.shared)) {
// We always relax if -static because libc.a doesn't contain
// __tls_get_addr().
i++;
} else if (ctx.arg.relax && !sym.is_imported && ctx.arg.shared &&
!ctx.arg.z_dlopen) {
sym.flags |= NEEDS_GOTTP;
i++;
} else {
sym.flags |= NEEDS_TLSGD;
}
break;
case R_X86_64_TLSLD:
if (rel.r_addend != -4)
Fatal(ctx) << *this << ": bad r_addend for R_X86_64_TLSLD";
if (i + 1 == rels.size())
Fatal(ctx) << *this << ": TLSLD reloc must be followed by PLT or GOTPCREL";
if (u32 ty = rels[i + 1].r_type;
ty != R_X86_64_PLT32 && ty != R_X86_64_PC32 &&
ty != R_X86_64_PLTOFF64 && ty != R_X86_64_GOTPCREL &&
ty != R_X86_64_GOTPCRELX)
Fatal(ctx) << *this << ": TLSLD reloc must be followed by PLT or GOTPCREL";
// We always relax if -static because libc.a doesn't contain
// __tls_get_addr().
if (ctx.arg.is_static || (ctx.arg.relax && !ctx.arg.shared))
i++;
else
ctx.needs_tlsld = true;
break;
case R_X86_64_GOTTPOFF: {
if (rel.r_addend != -4)
Fatal(ctx) << *this << ": bad r_addend for R_X86_64_GOTTPOFF";
bool do_relax = ctx.arg.relax && !ctx.arg.shared &&
!sym.is_imported && relax_gottpoff(loc - 3);
if (!do_relax)
sym.flags |= NEEDS_GOTTP;
break;
}
case R_X86_64_GOTPC32_TLSDESC: {
if (rel.r_addend != -4)
Fatal(ctx) << *this << ": bad r_addend for R_X86_64_GOTPC32_TLSDESC";
if (relax_gotpc32_tlsdesc(loc - 3) == 0)
Fatal(ctx) << *this << ": GOTPC32_TLSDESC relocation is used"
<< " against an invalid code sequence";
if (!relax_tlsdesc(ctx, sym))
sym.flags |= NEEDS_TLSDESC;
break;
}
case R_X86_64_TPOFF32:
case R_X86_64_TPOFF64:
check_tlsle(ctx, sym, rel);
break;
case R_X86_64_GOTOFF64:
case R_X86_64_DTPOFF32:
case R_X86_64_DTPOFF64:
case R_X86_64_SIZE32:
case R_X86_64_SIZE64:
case R_X86_64_TLSDESC_CALL:
break;
default:
Error(ctx) << *this << ": unknown relocation: " << rel;
}
}
}
} // namespace mold::elf

1278
third_party/mold/elf/cmdline.cc vendored Normal file

File diff suppressed because it is too large Load diff

555
third_party/mold/elf/dwarf.cc vendored Normal file
View file

@ -0,0 +1,555 @@
// clang-format off
// This file contains code to read DWARF debug info to create .gdb_index.
//
// .gdb_index is an optional section to speed up GNU debugger. It contains
// two maps: 1) a map from function/variable/type names to compunits, and
// 2) a map from function address ranges to compunits. gdb uses these
// maps to quickly find a compunit given a name or an instruction pointer.
//
// (Terminology: a compilation unit, which often abbreviated as compunit
// or cu, is a unit of debug info. An input .debug_info section usually
// contains one compunit, and thus an output .debug_info contains as
// many compunits as the number of input files.)
//
// .gdb_index is not mandatory. All the information in .gdb_index is
// also in other debug info sections. You can actually create an
// executable without .gdb_index and later add it using `gdb-add-index`
// post-processing tool that comes with gdb.
//
// The mapping from names to compunits is 1:n while the mapping from
// address ranges to compunits is 1:1. That is, two object files may
// define the same type name (with the same definition), while there
// should be no two functions that overlap with each other in memory.
//
// .gdb_index contains an on-disk hash table for names, so gdb can
// lookup names without loading all strings into memory and construct an
// in-memory hash table.
//
// Names are in .debug_gnu_pubnames and .debug_gnu_pubtypes input
// sections. These sections are created if `-ggnu-pubnames` is given.
// Besides names, these sections contains attributes for each name so
// that gdb can distinguish type names from function names, for example.
//
// A compunit contains one or more function address ranges. If an
// object file is compiled without -ffunction-sections, it contains
// only one .text section and therefore contains a single address range.
// Such range is typically stored directly to the compunit.
//
// If an object file is compiled with -fucntion-sections, it contains
// more than one .text section, and it has as many address ranges as
// the number of .text sections. Such discontiguous address ranges are
// stored to .debug_ranges in DWARF 2/3/4/5 and
// .debug_rnglists/.debug_addr in DWARF 5.
//
// .debug_info section contains DWARF debug info. Although we don't need
// to parse the whole .debug_info section to read address ranges, we
// have to do a little bit. DWARF is complicated and often handled using
// a library such as libdwarf. But we don't use any library because we
// don't want to add an extra run-time dependency just for --gdb-index.
//
// This page explains the format of .gdb_index:
// https://sourceware.org/gdb/onlinedocs/gdb/Index-Section-Format.html
#include "third_party/mold/elf/mold.h"
namespace mold::elf {
// The hash function for .gdb_index.
static u32 gdb_hash(std::string_view name) {
u32 h = 0;
for (u8 c : name) {
if ('A' <= c && c <= 'Z')
c = 'a' + c - 'A';
h = h * 67 + c - 113;
}
return h;
}
// Split .debug_info into so-called "compilation units". A .debug_info
// section usually contains one compunit unless it was created by `ld -r`.
// This is for --gdb-index.
template <typename E>
std::vector<std::string_view>
read_compunits(Context<E> &ctx, ObjectFile<E> &file) {
file.debug_info->uncompress(ctx);
std::string_view data = file.debug_info->contents;
std::vector<std::string_view> vec;
while (!data.empty()) {
if (data.size() < 4)
Fatal(ctx) << *file.debug_info << ": corrupted .debug_info";
if (*(U32<E> *)data.data() == 0xffff'ffff)
Fatal(ctx) << *file.debug_info << ": --gdb-index: DWARF64 not supported";
i64 len = *(U32<E> *)data.data() + 4;
vec.push_back(data.substr(0, len));
data = data.substr(len);
}
return vec;
}
// Parses .debug_gnu_pubnames and .debug_gnu_pubtypes. These sections
// start with a 14 bytes header followed by (4-byte offset, 1-byte type,
// null-terminated string) tuples.
//
// The 4-byte offset is an offset into .debug_info that contains details
// about the name. The 1-byte type is a type of the corresponding name
// (e.g. function, variable or datatype). The string is a name of a
// function, a variable or a type.
template <typename E>
std::vector<GdbIndexName> read_pubnames(Context<E> &ctx, ObjectFile<E> &file) {
std::vector<GdbIndexName> vec;
auto get_cu_idx = [&](InputSection<E> &isec, i64 offset) {
i64 off = 0;
for (i64 i = 0; i < file.compunits.size(); i++) {
if (offset == off)
return file.compunits_idx + i;
off += file.compunits[i].size();
}
Fatal(ctx) << isec << ": corrupted debug_info_offset";
};
auto read = [&](InputSection<E> &isec) {
isec.uncompress(ctx);
std::string_view contents = isec.contents;
while (!contents.empty()) {
if (contents.size() < 14)
Fatal(ctx) << isec << ": corrupted header";
u32 len = *(U32<E> *)contents.data() + 4;
u32 debug_info_offset = *(U32<E> *)(contents.data() + 6);
u32 cu_idx = get_cu_idx(isec, debug_info_offset);
std::string_view data = contents.substr(14, len - 14);
contents = contents.substr(len);
while (!data.empty()) {
u32 offset = *(U32<E> *)data.data();
data = data.substr(4);
if (offset == 0)
break;
u8 type = data[0];
data = data.substr(1);
std::string_view name = data.data();
data = data.substr(name.size() + 1);
vec.push_back({name, gdb_hash(name), (type << 24) | cu_idx});
}
}
};
if (file.debug_pubnames)
read(*file.debug_pubnames);
if (file.debug_pubtypes)
read(*file.debug_pubtypes);
// Uniquify elements because GCC 11 seems to emit one record for each
// comdat group which results in having a lot of duplicate records.
auto less = [](const GdbIndexName &a, const GdbIndexName &b) {
return std::tuple{a.hash, a.attr, a.name} <
std::tuple{b.hash, b.attr, b.name};
};
auto equal = [](const GdbIndexName &a, const GdbIndexName &b) {
return std::tuple{a.hash, a.attr, a.name} ==
std::tuple{b.hash, b.attr, b.name};
};
std::sort(vec.begin(), vec.end(), less);
vec.erase(std::unique(vec.begin(), vec.end(), equal), vec.end());
return vec;
}
template <typename E>
static u8 *get_buffer(Context<E> &ctx, Chunk<E> *chunk) {
if (u8 *buf = chunk->get_uncompressed_data())
return buf;
return ctx.buf + chunk->shdr.sh_offset;
}
// Try to find a compilation unit from .debug_info and its
// corresponding record from .debug_abbrev and returns them.
template <typename E>
static std::tuple<u8 *, u8 *, u32>
find_compunit(Context<E> &ctx, ObjectFile<E> &file, i64 offset) {
// Read .debug_info to find the record at a given offset.
u8 *cu = get_buffer(ctx, ctx.debug_info) + offset;
u32 dwarf_version = *(U16<E> *)(cu + 4);
u32 abbrev_offset;
// Skip a header.
switch (dwarf_version) {
case 2:
case 3:
case 4:
abbrev_offset = *(U32<E> *)(cu + 6);
if (u32 address_size = cu[10]; address_size != sizeof(Word<E>))
Fatal(ctx) << file << ": --gdb-index: unsupported address size "
<< address_size;
cu += 11;
break;
case 5: {
abbrev_offset = *(U32<E> *)(cu + 8);
if (u32 address_size = cu[7]; address_size != sizeof(Word<E>))
Fatal(ctx) << file << ": --gdb-index: unsupported address size "
<< address_size;
switch (u32 unit_type = cu[6]; unit_type) {
case DW_UT_compile:
case DW_UT_partial:
cu += 12;
break;
case DW_UT_skeleton:
case DW_UT_split_compile:
cu += 20;
break;
default:
Fatal(ctx) << file << ": --gdb-index: unknown DW_UT_* value: 0x"
<< std::hex << unit_type;
}
break;
}
default:
Fatal(ctx) << file << ": --gdb-index: unknown DWARF version: "
<< dwarf_version;
}
u32 abbrev_code = read_uleb(cu);
// Find a .debug_abbrev record corresponding to the .debug_info record.
// We assume the .debug_info record at a given offset is of
// DW_TAG_compile_unit which describes a compunit.
u8 *abbrev = get_buffer(ctx, ctx.debug_abbrev) + abbrev_offset;
for (;;) {
u32 code = read_uleb(abbrev);
if (code == 0)
Fatal(ctx) << file << ": --gdb-index: .debug_abbrev does not contain"
<< " a record for the first .debug_info record";
if (code == abbrev_code) {
// Found a record
u64 abbrev_tag = read_uleb(abbrev);
if (abbrev_tag != DW_TAG_compile_unit && abbrev_tag != DW_TAG_skeleton_unit)
Fatal(ctx) << file << ": --gdb-index: the first entry's tag is not"
<< " DW_TAG_compile_unit/DW_TAG_skeleton_unit but 0x"
<< std::hex << abbrev_tag;
break;
}
// Skip an uninteresting record
read_uleb(abbrev); // tag
abbrev++; // has_children byte
for (;;) {
u64 name = read_uleb(abbrev);
u64 form = read_uleb(abbrev);
if (name == 0 && form == 0)
break;
if (form == DW_FORM_implicit_const)
read_uleb(abbrev);
}
}
abbrev++; // skip has_children byte
return {cu, abbrev, dwarf_version};
}
// Estimate the number of address ranges contained in a given file.
// It may over-estimate but never under-estimate.
template <typename E>
i64 estimate_address_areas(Context<E> &ctx, ObjectFile<E> &file) {
// Each CU contains zero or one address area.
i64 ret = file.compunits.size();
// In DWARF 4, a CU can refer address ranges in .debug_ranges.
// .debug_ranges contains a vector of [begin, end) address pairs.
// The last entry must be a null terminator, so we do -1.
if (file.debug_ranges)
ret += file.debug_ranges->sh_size / sizeof(Word<E>) / 2 - 1;
// In DWARF 5, a CU can refer address ranges in .debug_rnglists, which
// contains variable-length entries. The smallest possible range entry
// is one byte for the code and two ULEB128 values (each can be as
// small as one byte), so 3 bytes.
if (file.debug_rnglists)
ret += file.debug_rnglists->sh_size / 3;
return ret;
}
// .debug_info contains variable-length fields. This class reads them.
template <typename E>
class DebugInfoReader {
public:
DebugInfoReader(Context<E> &ctx, ObjectFile<E> &file, u8 *cu)
: ctx(ctx), file(file), cu(cu) {}
u64 read(u64 form);
Context<E> &ctx;
ObjectFile<E> &file;
u8 *cu;
};
// Read value of the given DW_FORM_* form. If a value is not scalar,
// returns a dummy value 0.
template <typename E>
u64 DebugInfoReader<E>::read(u64 form) {
switch (form) {
case DW_FORM_flag_present:
return 0;
case DW_FORM_data1:
case DW_FORM_flag:
case DW_FORM_strx1:
case DW_FORM_addrx1:
case DW_FORM_ref1:
return *cu++;
case DW_FORM_data2:
case DW_FORM_strx2:
case DW_FORM_addrx2:
case DW_FORM_ref2: {
u64 val = *(U16<E> *)cu;
cu += 2;
return val;
}
case DW_FORM_strx3:
case DW_FORM_addrx3: {
u64 val = *(U24<E> *)cu;
cu += 3;
return val;
}
case DW_FORM_data4:
case DW_FORM_strp:
case DW_FORM_sec_offset:
case DW_FORM_line_strp:
case DW_FORM_strx4:
case DW_FORM_addrx4:
case DW_FORM_ref4: {
u64 val = *(U32<E> *)cu;
cu += 4;
return val;
}
case DW_FORM_data8:
case DW_FORM_ref8: {
u64 val = *(U64<E> *)cu;
cu += 8;
return val;
}
case DW_FORM_addr:
case DW_FORM_ref_addr: {
u64 val = *(Word<E> *)cu;
cu += sizeof(Word<E>);
return val;
}
case DW_FORM_strx:
case DW_FORM_addrx:
case DW_FORM_udata:
case DW_FORM_ref_udata:
case DW_FORM_loclistx:
case DW_FORM_rnglistx:
return read_uleb(cu);
case DW_FORM_string:
cu += strlen((char *)cu) + 1;
return 0;
default:
Fatal(ctx) << file << ": --gdb-index: unhandled debug info form: 0x"
<< std::hex << form;
return 0;
}
}
// Read a range list from .debug_ranges starting at the given offset.
template <typename E>
static std::vector<u64>
read_debug_range(Context<E> &ctx, ObjectFile<E> &file, Word<E> *range) {
std::vector<u64> vec;
u64 base = 0;
for (i64 i = 0; range[i] || range[i + 1]; i += 2) {
if (range[i] + 1 == 0) {
// base address selection entry
base = range[i + 1];
} else {
vec.push_back(range[i] + base);
vec.push_back(range[i + 1] + base);
}
}
return vec;
}
// Read a range list from .debug_rnglists starting at the given offset.
template <typename E>
static std::vector<u64>
read_rnglist_range(Context<E> &ctx, ObjectFile<E> &file, u8 *rnglist,
Word<E> *addrx) {
std::vector<u64> vec;
u64 base = 0;
for (;;) {
switch (*rnglist++) {
case DW_RLE_end_of_list:
return vec;
case DW_RLE_base_addressx:
base = addrx[read_uleb(rnglist)];
break;
case DW_RLE_startx_endx:
vec.push_back(addrx[read_uleb(rnglist)]);
vec.push_back(addrx[read_uleb(rnglist)]);
break;
case DW_RLE_startx_length:
vec.push_back(addrx[read_uleb(rnglist)]);
vec.push_back(vec.back() + read_uleb(rnglist));
break;
case DW_RLE_offset_pair:
vec.push_back(base + read_uleb(rnglist));
vec.push_back(base + read_uleb(rnglist));
break;
case DW_RLE_base_address:
base = *(Word<E> *)rnglist;
rnglist += sizeof(Word<E>);
break;
case DW_RLE_start_end:
vec.push_back(*(Word<E> *)rnglist);
rnglist += sizeof(Word<E>);
vec.push_back(*(Word<E> *)rnglist);
rnglist += sizeof(Word<E>);
break;
case DW_RLE_start_length:
vec.push_back(*(Word<E> *)rnglist);
rnglist += sizeof(Word<E>);
vec.push_back(vec.back() + read_uleb(rnglist));
break;
}
}
}
// Returns a list of address ranges explained by a compunit at the
// `offset` in an output .debug_info section.
//
// .debug_info contains DWARF debug info records, so this function
// parses DWARF. If a designated compunit contains multiple ranges, the
// ranges are read from .debug_ranges (or .debug_rnglists for DWARF5).
// Otherwise, a range is read directly from .debug_info (or possibly
// from .debug_addr for DWARF5).
template <typename E>
std::vector<u64>
read_address_areas(Context<E> &ctx, ObjectFile<E> &file, i64 offset) {
u8 *cu;
u8 *abbrev;
u32 dwarf_version;
std::tie(cu, abbrev, dwarf_version) = find_compunit(ctx, file, offset);
DebugInfoReader<E> reader{ctx, file, cu};
struct Record {
u64 form = 0;
u64 value = 0;
};
Record low_pc;
Record high_pc;
Record ranges;
std::optional<u64> rnglists_base;
Word<E> *addrx = nullptr;
// Read all interesting debug records.
for (;;) {
u64 name = read_uleb(abbrev);
u64 form = read_uleb(abbrev);
if (name == 0 && form == 0)
break;
u64 val = reader.read(form);
switch (name) {
case DW_AT_low_pc:
low_pc = {form, val};
break;
case DW_AT_high_pc:
high_pc = {form, val};
break;
case DW_AT_rnglists_base:
rnglists_base = val;
break;
case DW_AT_addr_base:
addrx = (Word<E> *)(get_buffer(ctx, ctx.debug_addr) + val);
break;
case DW_AT_ranges:
ranges = {form, val};
break;
}
}
// Handle non-contiguous address ranges.
if (ranges.form) {
if (dwarf_version <= 4) {
Word<E> *range_begin =
(Word<E> *)(get_buffer(ctx, ctx.debug_ranges) + ranges.value);
return read_debug_range(ctx, file, range_begin);
}
assert(dwarf_version == 5);
u8 *buf = get_buffer(ctx, ctx.debug_rnglists);
if (ranges.form == DW_FORM_sec_offset)
return read_rnglist_range(ctx, file, buf + ranges.value, addrx);
if (!rnglists_base)
Fatal(ctx) << file << ": --gdb-index: missing DW_AT_rnglists_base";
u8 *base = buf + *rnglists_base;
return read_rnglist_range(ctx, file, base + *(U32<E> *)base, addrx);
}
// Handle a contiguous address range.
if (low_pc.form && high_pc.form) {
u64 lo;
switch (low_pc.form) {
case DW_FORM_addr:
lo = low_pc.value;
break;
case DW_FORM_addrx:
case DW_FORM_addrx1:
case DW_FORM_addrx2:
case DW_FORM_addrx4:
lo = addrx[low_pc.value];
break;
default:
Fatal(ctx) << file << ": --gdb-index: unhandled form for DW_AT_low_pc: 0x"
<< std::hex << high_pc.form;
}
switch (high_pc.form) {
case DW_FORM_addr:
return {lo, high_pc.value};
case DW_FORM_addrx:
case DW_FORM_addrx1:
case DW_FORM_addrx2:
case DW_FORM_addrx4:
return {lo, addrx[high_pc.value]};
case DW_FORM_udata:
case DW_FORM_data1:
case DW_FORM_data2:
case DW_FORM_data4:
case DW_FORM_data8:
return {lo, lo + high_pc.value};
default:
Fatal(ctx) << file << ": --gdb-index: unhandled form for DW_AT_high_pc: 0x"
<< std::hex << high_pc.form;
}
}
return {};
}
using E = MOLD_TARGET;
template std::vector<std::string_view> read_compunits(Context<E> &, ObjectFile<E> &);
template std::vector<GdbIndexName> read_pubnames(Context<E> &, ObjectFile<E> &);
template i64 estimate_address_areas(Context<E> &, ObjectFile<E> &);
template std::vector<u64> read_address_areas(Context<E> &, ObjectFile<E> &, i64);
} // namespace mold::elf

922
third_party/mold/elf/elf.cc vendored Normal file
View file

@ -0,0 +1,922 @@
// clang-format off
#include "third_party/mold/elf/mold.h"
namespace mold::elf {
template <>
std::string rel_to_string<X86_64>(u32 r_type) {
switch (r_type) {
case R_X86_64_NONE: return "R_X86_64_NONE";
case R_X86_64_64: return "R_X86_64_64";
case R_X86_64_PC32: return "R_X86_64_PC32";
case R_X86_64_GOT32: return "R_X86_64_GOT32";
case R_X86_64_PLT32: return "R_X86_64_PLT32";
case R_X86_64_COPY: return "R_X86_64_COPY";
case R_X86_64_GLOB_DAT: return "R_X86_64_GLOB_DAT";
case R_X86_64_JUMP_SLOT: return "R_X86_64_JUMP_SLOT";
case R_X86_64_RELATIVE: return "R_X86_64_RELATIVE";
case R_X86_64_GOTPCREL: return "R_X86_64_GOTPCREL";
case R_X86_64_32: return "R_X86_64_32";
case R_X86_64_32S: return "R_X86_64_32S";
case R_X86_64_16: return "R_X86_64_16";
case R_X86_64_PC16: return "R_X86_64_PC16";
case R_X86_64_8: return "R_X86_64_8";
case R_X86_64_PC8: return "R_X86_64_PC8";
case R_X86_64_DTPMOD64: return "R_X86_64_DTPMOD64";
case R_X86_64_DTPOFF64: return "R_X86_64_DTPOFF64";
case R_X86_64_TPOFF64: return "R_X86_64_TPOFF64";
case R_X86_64_TLSGD: return "R_X86_64_TLSGD";
case R_X86_64_TLSLD: return "R_X86_64_TLSLD";
case R_X86_64_DTPOFF32: return "R_X86_64_DTPOFF32";
case R_X86_64_GOTTPOFF: return "R_X86_64_GOTTPOFF";
case R_X86_64_TPOFF32: return "R_X86_64_TPOFF32";
case R_X86_64_PC64: return "R_X86_64_PC64";
case R_X86_64_GOTOFF64: return "R_X86_64_GOTOFF64";
case R_X86_64_GOTPC32: return "R_X86_64_GOTPC32";
case R_X86_64_GOT64: return "R_X86_64_GOT64";
case R_X86_64_GOTPCREL64: return "R_X86_64_GOTPCREL64";
case R_X86_64_GOTPC64: return "R_X86_64_GOTPC64";
case R_X86_64_GOTPLT64: return "R_X86_64_GOTPLT64";
case R_X86_64_PLTOFF64: return "R_X86_64_PLTOFF64";
case R_X86_64_SIZE32: return "R_X86_64_SIZE32";
case R_X86_64_SIZE64: return "R_X86_64_SIZE64";
case R_X86_64_GOTPC32_TLSDESC: return "R_X86_64_GOTPC32_TLSDESC";
case R_X86_64_TLSDESC_CALL: return "R_X86_64_TLSDESC_CALL";
case R_X86_64_TLSDESC: return "R_X86_64_TLSDESC";
case R_X86_64_IRELATIVE: return "R_X86_64_IRELATIVE";
case R_X86_64_GOTPCRELX: return "R_X86_64_GOTPCRELX";
case R_X86_64_REX_GOTPCRELX: return "R_X86_64_REX_GOTPCRELX";
}
return "unknown (" + std::to_string(r_type) + ")";
}
template <>
std::string rel_to_string<I386>(u32 r_type) {
switch (r_type) {
case R_386_NONE: return "R_386_NONE";
case R_386_32: return "R_386_32";
case R_386_PC32: return "R_386_PC32";
case R_386_GOT32: return "R_386_GOT32";
case R_386_PLT32: return "R_386_PLT32";
case R_386_COPY: return "R_386_COPY";
case R_386_GLOB_DAT: return "R_386_GLOB_DAT";
case R_386_JUMP_SLOT: return "R_386_JUMP_SLOT";
case R_386_RELATIVE: return "R_386_RELATIVE";
case R_386_GOTOFF: return "R_386_GOTOFF";
case R_386_GOTPC: return "R_386_GOTPC";
case R_386_32PLT: return "R_386_32PLT";
case R_386_TLS_TPOFF: return "R_386_TLS_TPOFF";
case R_386_TLS_IE: return "R_386_TLS_IE";
case R_386_TLS_GOTIE: return "R_386_TLS_GOTIE";
case R_386_TLS_LE: return "R_386_TLS_LE";
case R_386_TLS_GD: return "R_386_TLS_GD";
case R_386_TLS_LDM: return "R_386_TLS_LDM";
case R_386_16: return "R_386_16";
case R_386_PC16: return "R_386_PC16";
case R_386_8: return "R_386_8";
case R_386_PC8: return "R_386_PC8";
case R_386_TLS_GD_32: return "R_386_TLS_GD_32";
case R_386_TLS_GD_PUSH: return "R_386_TLS_GD_PUSH";
case R_386_TLS_GD_CALL: return "R_386_TLS_GD_CALL";
case R_386_TLS_GD_POP: return "R_386_TLS_GD_POP";
case R_386_TLS_LDM_32: return "R_386_TLS_LDM_32";
case R_386_TLS_LDM_PUSH: return "R_386_TLS_LDM_PUSH";
case R_386_TLS_LDM_CALL: return "R_386_TLS_LDM_CALL";
case R_386_TLS_LDM_POP: return "R_386_TLS_LDM_POP";
case R_386_TLS_LDO_32: return "R_386_TLS_LDO_32";
case R_386_TLS_IE_32: return "R_386_TLS_IE_32";
case R_386_TLS_LE_32: return "R_386_TLS_LE_32";
case R_386_TLS_DTPMOD32: return "R_386_TLS_DTPMOD32";
case R_386_TLS_DTPOFF32: return "R_386_TLS_DTPOFF32";
case R_386_TLS_TPOFF32: return "R_386_TLS_TPOFF32";
case R_386_SIZE32: return "R_386_SIZE32";
case R_386_TLS_GOTDESC: return "R_386_TLS_GOTDESC";
case R_386_TLS_DESC_CALL: return "R_386_TLS_DESC_CALL";
case R_386_TLS_DESC: return "R_386_TLS_DESC";
case R_386_IRELATIVE: return "R_386_IRELATIVE";
case R_386_GOT32X: return "R_386_GOT32X";
}
return "unknown (" + std::to_string(r_type) + ")";
}
template <>
std::string rel_to_string<ARM64>(u32 r_type) {
switch (r_type) {
case R_AARCH64_NONE: return "R_AARCH64_NONE";
case R_AARCH64_ABS64: return "R_AARCH64_ABS64";
case R_AARCH64_ABS32: return "R_AARCH64_ABS32";
case R_AARCH64_ABS16: return "R_AARCH64_ABS16";
case R_AARCH64_PREL64: return "R_AARCH64_PREL64";
case R_AARCH64_PREL32: return "R_AARCH64_PREL32";
case R_AARCH64_PREL16: return "R_AARCH64_PREL16";
case R_AARCH64_MOVW_UABS_G0: return "R_AARCH64_MOVW_UABS_G0";
case R_AARCH64_MOVW_UABS_G0_NC: return "R_AARCH64_MOVW_UABS_G0_NC";
case R_AARCH64_MOVW_UABS_G1: return "R_AARCH64_MOVW_UABS_G1";
case R_AARCH64_MOVW_UABS_G1_NC: return "R_AARCH64_MOVW_UABS_G1_NC";
case R_AARCH64_MOVW_UABS_G2: return "R_AARCH64_MOVW_UABS_G2";
case R_AARCH64_MOVW_UABS_G2_NC: return "R_AARCH64_MOVW_UABS_G2_NC";
case R_AARCH64_MOVW_UABS_G3: return "R_AARCH64_MOVW_UABS_G3";
case R_AARCH64_MOVW_SABS_G0: return "R_AARCH64_MOVW_SABS_G0";
case R_AARCH64_MOVW_SABS_G1: return "R_AARCH64_MOVW_SABS_G1";
case R_AARCH64_MOVW_SABS_G2: return "R_AARCH64_MOVW_SABS_G2";
case R_AARCH64_LD_PREL_LO19: return "R_AARCH64_LD_PREL_LO19";
case R_AARCH64_ADR_PREL_LO21: return "R_AARCH64_ADR_PREL_LO21";
case R_AARCH64_ADR_PREL_PG_HI21: return "R_AARCH64_ADR_PREL_PG_HI21";
case R_AARCH64_ADR_PREL_PG_HI21_NC: return "R_AARCH64_ADR_PREL_PG_HI21_NC";
case R_AARCH64_ADD_ABS_LO12_NC: return "R_AARCH64_ADD_ABS_LO12_NC";
case R_AARCH64_LDST8_ABS_LO12_NC: return "R_AARCH64_LDST8_ABS_LO12_NC";
case R_AARCH64_TSTBR14: return "R_AARCH64_TSTBR14";
case R_AARCH64_CONDBR19: return "R_AARCH64_CONDBR19";
case R_AARCH64_JUMP26: return "R_AARCH64_JUMP26";
case R_AARCH64_CALL26: return "R_AARCH64_CALL26";
case R_AARCH64_LDST16_ABS_LO12_NC: return "R_AARCH64_LDST16_ABS_LO12_NC";
case R_AARCH64_LDST32_ABS_LO12_NC: return "R_AARCH64_LDST32_ABS_LO12_NC";
case R_AARCH64_LDST64_ABS_LO12_NC: return "R_AARCH64_LDST64_ABS_LO12_NC";
case R_AARCH64_MOVW_PREL_G0: return "R_AARCH64_MOVW_PREL_G0";
case R_AARCH64_MOVW_PREL_G0_NC: return "R_AARCH64_MOVW_PREL_G0_NC";
case R_AARCH64_MOVW_PREL_G1: return "R_AARCH64_MOVW_PREL_G1";
case R_AARCH64_MOVW_PREL_G1_NC: return "R_AARCH64_MOVW_PREL_G1_NC";
case R_AARCH64_MOVW_PREL_G2: return "R_AARCH64_MOVW_PREL_G2";
case R_AARCH64_MOVW_PREL_G2_NC: return "R_AARCH64_MOVW_PREL_G2_NC";
case R_AARCH64_MOVW_PREL_G3: return "R_AARCH64_MOVW_PREL_G3";
case R_AARCH64_LDST128_ABS_LO12_NC: return "R_AARCH64_LDST128_ABS_LO12_NC";
case R_AARCH64_ADR_GOT_PAGE: return "R_AARCH64_ADR_GOT_PAGE";
case R_AARCH64_LD64_GOT_LO12_NC: return "R_AARCH64_LD64_GOT_LO12_NC";
case R_AARCH64_LD64_GOTPAGE_LO15: return "R_AARCH64_LD64_GOTPAGE_LO15";
case R_AARCH64_PLT32: return "R_AARCH64_PLT32";
case R_AARCH64_TLSGD_ADR_PREL21: return "R_AARCH64_TLSGD_ADR_PREL21";
case R_AARCH64_TLSGD_ADR_PAGE21: return "R_AARCH64_TLSGD_ADR_PAGE21";
case R_AARCH64_TLSGD_ADD_LO12_NC: return "R_AARCH64_TLSGD_ADD_LO12_NC";
case R_AARCH64_TLSGD_MOVW_G1: return "R_AARCH64_TLSGD_MOVW_G1";
case R_AARCH64_TLSGD_MOVW_G0_NC: return "R_AARCH64_TLSGD_MOVW_G0_NC";
case R_AARCH64_TLSLD_ADR_PREL21: return "R_AARCH64_TLSLD_ADR_PREL21";
case R_AARCH64_TLSLD_ADR_PAGE21: return "R_AARCH64_TLSLD_ADR_PAGE21";
case R_AARCH64_TLSLD_ADD_LO12_NC: return "R_AARCH64_TLSLD_ADD_LO12_NC";
case R_AARCH64_TLSLD_MOVW_G1: return "R_AARCH64_TLSLD_MOVW_G1";
case R_AARCH64_TLSLD_MOVW_G0_NC: return "R_AARCH64_TLSLD_MOVW_G0_NC";
case R_AARCH64_TLSLD_LD_PREL19: return "R_AARCH64_TLSLD_LD_PREL19";
case R_AARCH64_TLSLD_MOVW_DTPREL_G2: return "R_AARCH64_TLSLD_MOVW_DTPREL_G2";
case R_AARCH64_TLSLD_MOVW_DTPREL_G1: return "R_AARCH64_TLSLD_MOVW_DTPREL_G1";
case R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC: return "R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC";
case R_AARCH64_TLSLD_MOVW_DTPREL_G0: return "R_AARCH64_TLSLD_MOVW_DTPREL_G0";
case R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC: return "R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC";
case R_AARCH64_TLSLD_ADD_DTPREL_HI12: return "R_AARCH64_TLSLD_ADD_DTPREL_HI12";
case R_AARCH64_TLSLD_ADD_DTPREL_LO12: return "R_AARCH64_TLSLD_ADD_DTPREL_LO12";
case R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC: return "R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC";
case R_AARCH64_TLSLD_LDST8_DTPREL_LO12: return "R_AARCH64_TLSLD_LDST8_DTPREL_LO12";
case R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC: return "R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC";
case R_AARCH64_TLSLD_LDST16_DTPREL_LO12: return "R_AARCH64_TLSLD_LDST16_DTPREL_LO12";
case R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC: return "R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC";
case R_AARCH64_TLSLD_LDST32_DTPREL_LO12: return "R_AARCH64_TLSLD_LDST32_DTPREL_LO12";
case R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC: return "R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC";
case R_AARCH64_TLSLD_LDST64_DTPREL_LO12: return "R_AARCH64_TLSLD_LDST64_DTPREL_LO12";
case R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC: return "R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC";
case R_AARCH64_TLSIE_MOVW_GOTTPREL_G1: return "R_AARCH64_TLSIE_MOVW_GOTTPREL_G1";
case R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC: return "R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC";
case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: return "R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21";
case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: return "R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC";
case R_AARCH64_TLSIE_LD_GOTTPREL_PREL19: return "R_AARCH64_TLSIE_LD_GOTTPREL_PREL19";
case R_AARCH64_TLSLE_MOVW_TPREL_G2: return "R_AARCH64_TLSLE_MOVW_TPREL_G2";
case R_AARCH64_TLSLE_MOVW_TPREL_G1: return "R_AARCH64_TLSLE_MOVW_TPREL_G1";
case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC: return "R_AARCH64_TLSLE_MOVW_TPREL_G1_NC";
case R_AARCH64_TLSLE_MOVW_TPREL_G0: return "R_AARCH64_TLSLE_MOVW_TPREL_G0";
case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC: return "R_AARCH64_TLSLE_MOVW_TPREL_G0_NC";
case R_AARCH64_TLSLE_ADD_TPREL_HI12: return "R_AARCH64_TLSLE_ADD_TPREL_HI12";
case R_AARCH64_TLSLE_ADD_TPREL_LO12: return "R_AARCH64_TLSLE_ADD_TPREL_LO12";
case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: return "R_AARCH64_TLSLE_ADD_TPREL_LO12_NC";
case R_AARCH64_TLSLE_LDST8_TPREL_LO12: return "R_AARCH64_TLSLE_LDST8_TPREL_LO12";
case R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC: return "R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC";
case R_AARCH64_TLSLE_LDST16_TPREL_LO12: return "R_AARCH64_TLSLE_LDST16_TPREL_LO12";
case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC: return "R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC";
case R_AARCH64_TLSLE_LDST32_TPREL_LO12: return "R_AARCH64_TLSLE_LDST32_TPREL_LO12";
case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC: return "R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC";
case R_AARCH64_TLSLE_LDST64_TPREL_LO12: return "R_AARCH64_TLSLE_LDST64_TPREL_LO12";
case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC: return "R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC";
case R_AARCH64_TLSDESC_ADR_PAGE21: return "R_AARCH64_TLSDESC_ADR_PAGE21";
case R_AARCH64_TLSDESC_LD64_LO12: return "R_AARCH64_TLSDESC_LD64_LO12";
case R_AARCH64_TLSDESC_ADD_LO12: return "R_AARCH64_TLSDESC_ADD_LO12";
case R_AARCH64_TLSDESC_CALL: return "R_AARCH64_TLSDESC_CALL";
case R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC: return "R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC";
case R_AARCH64_COPY: return "R_AARCH64_COPY";
case R_AARCH64_GLOB_DAT: return "R_AARCH64_GLOB_DAT";
case R_AARCH64_JUMP_SLOT: return "R_AARCH64_JUMP_SLOT";
case R_AARCH64_RELATIVE: return "R_AARCH64_RELATIVE";
case R_AARCH64_TLS_DTPMOD64: return "R_AARCH64_TLS_DTPMOD64";
case R_AARCH64_TLS_DTPREL64: return "R_AARCH64_TLS_DTPREL64";
case R_AARCH64_TLS_TPREL64: return "R_AARCH64_TLS_TPREL64";
case R_AARCH64_TLSDESC: return "R_AARCH64_TLSDESC";
case R_AARCH64_IRELATIVE: return "R_AARCH64_IRELATIVE";
}
return "unknown (" + std::to_string(r_type) + ")";
}
template <>
std::string rel_to_string<ARM32>(u32 r_type) {
switch (r_type) {
case R_ARM_NONE: return "R_ARM_NONE";
case R_ARM_PC24: return "R_ARM_PC24";
case R_ARM_ABS32: return "R_ARM_ABS32";
case R_ARM_REL32: return "R_ARM_REL32";
case R_ARM_LDR_PC_G0: return "R_ARM_LDR_PC_G0";
case R_ARM_ABS16: return "R_ARM_ABS16";
case R_ARM_ABS12: return "R_ARM_ABS12";
case R_ARM_THM_ABS5: return "R_ARM_THM_ABS5";
case R_ARM_ABS8: return "R_ARM_ABS8";
case R_ARM_SBREL32: return "R_ARM_SBREL32";
case R_ARM_THM_CALL: return "R_ARM_THM_CALL";
case R_ARM_THM_PC8: return "R_ARM_THM_PC8";
case R_ARM_BREL_ADJ: return "R_ARM_BREL_ADJ";
case R_ARM_TLS_DESC: return "R_ARM_TLS_DESC";
case R_ARM_THM_SWI8: return "R_ARM_THM_SWI8";
case R_ARM_XPC25: return "R_ARM_XPC25";
case R_ARM_THM_XPC22: return "R_ARM_THM_XPC22";
case R_ARM_TLS_DTPMOD32: return "R_ARM_TLS_DTPMOD32";
case R_ARM_TLS_DTPOFF32: return "R_ARM_TLS_DTPOFF32";
case R_ARM_TLS_TPOFF32: return "R_ARM_TLS_TPOFF32";
case R_ARM_COPY: return "R_ARM_COPY";
case R_ARM_GLOB_DAT: return "R_ARM_GLOB_DAT";
case R_ARM_JUMP_SLOT: return "R_ARM_JUMP_SLOT";
case R_ARM_RELATIVE: return "R_ARM_RELATIVE";
case R_ARM_GOTOFF32: return "R_ARM_GOTOFF32";
case R_ARM_BASE_PREL: return "R_ARM_BASE_PREL";
case R_ARM_GOT_BREL: return "R_ARM_GOT_BREL";
case R_ARM_PLT32: return "R_ARM_PLT32";
case R_ARM_CALL: return "R_ARM_CALL";
case R_ARM_JUMP24: return "R_ARM_JUMP24";
case R_ARM_THM_JUMP24: return "R_ARM_THM_JUMP24";
case R_ARM_BASE_ABS: return "R_ARM_BASE_ABS";
case R_ARM_ALU_PCREL_7_0: return "R_ARM_ALU_PCREL_7_0";
case R_ARM_ALU_PCREL_15_8: return "R_ARM_ALU_PCREL_15_8";
case R_ARM_ALU_PCREL_23_15: return "R_ARM_ALU_PCREL_23_15";
case R_ARM_LDR_SBREL_11_0_NC: return "R_ARM_LDR_SBREL_11_0_NC";
case R_ARM_ALU_SBREL_19_12_NC: return "R_ARM_ALU_SBREL_19_12_NC";
case R_ARM_ALU_SBREL_27_20_CK: return "R_ARM_ALU_SBREL_27_20_CK";
case R_ARM_TARGET1: return "R_ARM_TARGET1";
case R_ARM_SBREL31: return "R_ARM_SBREL31";
case R_ARM_V4BX: return "R_ARM_V4BX";
case R_ARM_TARGET2: return "R_ARM_TARGET2";
case R_ARM_PREL31: return "R_ARM_PREL31";
case R_ARM_MOVW_ABS_NC: return "R_ARM_MOVW_ABS_NC";
case R_ARM_MOVT_ABS: return "R_ARM_MOVT_ABS";
case R_ARM_MOVW_PREL_NC: return "R_ARM_MOVW_PREL_NC";
case R_ARM_MOVT_PREL: return "R_ARM_MOVT_PREL";
case R_ARM_THM_MOVW_ABS_NC: return "R_ARM_THM_MOVW_ABS_NC";
case R_ARM_THM_MOVT_ABS: return "R_ARM_THM_MOVT_ABS";
case R_ARM_THM_MOVW_PREL_NC: return "R_ARM_THM_MOVW_PREL_NC";
case R_ARM_THM_MOVT_PREL: return "R_ARM_THM_MOVT_PREL";
case R_ARM_THM_JUMP19: return "R_ARM_THM_JUMP19";
case R_ARM_THM_JUMP6: return "R_ARM_THM_JUMP6";
case R_ARM_THM_ALU_PREL_11_0: return "R_ARM_THM_ALU_PREL_11_0";
case R_ARM_THM_PC12: return "R_ARM_THM_PC12";
case R_ARM_ABS32_NOI: return "R_ARM_ABS32_NOI";
case R_ARM_REL32_NOI: return "R_ARM_REL32_NOI";
case R_ARM_ALU_PC_G0_NC: return "R_ARM_ALU_PC_G0_NC";
case R_ARM_ALU_PC_G0: return "R_ARM_ALU_PC_G0";
case R_ARM_ALU_PC_G1_NC: return "R_ARM_ALU_PC_G1_NC";
case R_ARM_ALU_PC_G1: return "R_ARM_ALU_PC_G1";
case R_ARM_ALU_PC_G2: return "R_ARM_ALU_PC_G2";
case R_ARM_LDR_PC_G1: return "R_ARM_LDR_PC_G1";
case R_ARM_LDR_PC_G2: return "R_ARM_LDR_PC_G2";
case R_ARM_LDRS_PC_G0: return "R_ARM_LDRS_PC_G0";
case R_ARM_LDRS_PC_G1: return "R_ARM_LDRS_PC_G1";
case R_ARM_LDRS_PC_G2: return "R_ARM_LDRS_PC_G2";
case R_ARM_LDC_PC_G0: return "R_ARM_LDC_PC_G0";
case R_ARM_LDC_PC_G1: return "R_ARM_LDC_PC_G1";
case R_ARM_LDC_PC_G2: return "R_ARM_LDC_PC_G2";
case R_ARM_ALU_SB_G0_NC: return "R_ARM_ALU_SB_G0_NC";
case R_ARM_ALU_SB_G0: return "R_ARM_ALU_SB_G0";
case R_ARM_ALU_SB_G1_NC: return "R_ARM_ALU_SB_G1_NC";
case R_ARM_ALU_SB_G1: return "R_ARM_ALU_SB_G1";
case R_ARM_ALU_SB_G2: return "R_ARM_ALU_SB_G2";
case R_ARM_LDR_SB_G0: return "R_ARM_LDR_SB_G0";
case R_ARM_LDR_SB_G1: return "R_ARM_LDR_SB_G1";
case R_ARM_LDR_SB_G2: return "R_ARM_LDR_SB_G2";
case R_ARM_LDRS_SB_G0: return "R_ARM_LDRS_SB_G0";
case R_ARM_LDRS_SB_G1: return "R_ARM_LDRS_SB_G1";
case R_ARM_LDRS_SB_G2: return "R_ARM_LDRS_SB_G2";
case R_ARM_LDC_SB_G0: return "R_ARM_LDC_SB_G0";
case R_ARM_LDC_SB_G1: return "R_ARM_LDC_SB_G1";
case R_ARM_LDC_SB_G2: return "R_ARM_LDC_SB_G2";
case R_ARM_MOVW_BREL_NC: return "R_ARM_MOVW_BREL_NC";
case R_ARM_MOVT_BREL: return "R_ARM_MOVT_BREL";
case R_ARM_MOVW_BREL: return "R_ARM_MOVW_BREL";
case R_ARM_THM_MOVW_BREL_NC: return "R_ARM_THM_MOVW_BREL_NC";
case R_ARM_THM_MOVT_BREL: return "R_ARM_THM_MOVT_BREL";
case R_ARM_THM_MOVW_BREL: return "R_ARM_THM_MOVW_BREL";
case R_ARM_TLS_GOTDESC: return "R_ARM_TLS_GOTDESC";
case R_ARM_TLS_CALL: return "R_ARM_TLS_CALL";
case R_ARM_TLS_DESCSEQ: return "R_ARM_TLS_DESCSEQ";
case R_ARM_THM_TLS_CALL: return "R_ARM_THM_TLS_CALL";
case R_ARM_PLT32_ABS: return "R_ARM_PLT32_ABS";
case R_ARM_GOT_ABS: return "R_ARM_GOT_ABS";
case R_ARM_GOT_PREL: return "R_ARM_GOT_PREL";
case R_ARM_GOT_BREL12: return "R_ARM_GOT_BREL12";
case R_ARM_GOTOFF12: return "R_ARM_GOTOFF12";
case R_ARM_GOTRELAX: return "R_ARM_GOTRELAX";
case R_ARM_GNU_VTENTRY: return "R_ARM_GNU_VTENTRY";
case R_ARM_GNU_VTINHERIT: return "R_ARM_GNU_VTINHERIT";
case R_ARM_THM_JUMP11: return "R_ARM_THM_JUMP11";
case R_ARM_THM_JUMP8: return "R_ARM_THM_JUMP8";
case R_ARM_TLS_GD32: return "R_ARM_TLS_GD32";
case R_ARM_TLS_LDM32: return "R_ARM_TLS_LDM32";
case R_ARM_TLS_LDO32: return "R_ARM_TLS_LDO32";
case R_ARM_TLS_IE32: return "R_ARM_TLS_IE32";
case R_ARM_TLS_LE32: return "R_ARM_TLS_LE32";
case R_ARM_TLS_LDO12: return "R_ARM_TLS_LDO12";
case R_ARM_TLS_LE12: return "R_ARM_TLS_LE12";
case R_ARM_TLS_IE12GP: return "R_ARM_TLS_IE12GP";
case R_ARM_PRIVATE_0: return "R_ARM_PRIVATE_0";
case R_ARM_PRIVATE_1: return "R_ARM_PRIVATE_1";
case R_ARM_PRIVATE_2: return "R_ARM_PRIVATE_2";
case R_ARM_PRIVATE_3: return "R_ARM_PRIVATE_3";
case R_ARM_PRIVATE_4: return "R_ARM_PRIVATE_4";
case R_ARM_PRIVATE_5: return "R_ARM_PRIVATE_5";
case R_ARM_PRIVATE_6: return "R_ARM_PRIVATE_6";
case R_ARM_PRIVATE_7: return "R_ARM_PRIVATE_7";
case R_ARM_PRIVATE_8: return "R_ARM_PRIVATE_8";
case R_ARM_PRIVATE_9: return "R_ARM_PRIVATE_9";
case R_ARM_PRIVATE_10: return "R_ARM_PRIVATE_10";
case R_ARM_PRIVATE_11: return "R_ARM_PRIVATE_11";
case R_ARM_PRIVATE_12: return "R_ARM_PRIVATE_12";
case R_ARM_PRIVATE_13: return "R_ARM_PRIVATE_13";
case R_ARM_PRIVATE_14: return "R_ARM_PRIVATE_14";
case R_ARM_PRIVATE_15: return "R_ARM_PRIVATE_15";
case R_ARM_ME_TOO: return "R_ARM_ME_TOO";
case R_ARM_THM_TLS_DESCSEQ16: return "R_ARM_THM_TLS_DESCSEQ16";
case R_ARM_THM_TLS_DESCSEQ32: return "R_ARM_THM_TLS_DESCSEQ32";
case R_ARM_THM_BF16: return "R_ARM_THM_BF16";
case R_ARM_THM_BF12: return "R_ARM_THM_BF12";
case R_ARM_THM_BF18: return "R_ARM_THM_BF18";
case R_ARM_IRELATIVE: return "R_ARM_IRELATIVE";
}
return "unknown (" + std::to_string(r_type) + ")";
}
template <>
std::string rel_to_string<RV64LE>(u32 r_type) {
switch (r_type) {
case R_RISCV_NONE: return "R_RISCV_NONE";
case R_RISCV_32: return "R_RISCV_32";
case R_RISCV_64: return "R_RISCV_64";
case R_RISCV_RELATIVE: return "R_RISCV_RELATIVE";
case R_RISCV_COPY: return "R_RISCV_COPY";
case R_RISCV_JUMP_SLOT: return "R_RISCV_JUMP_SLOT";
case R_RISCV_TLS_DTPMOD32: return "R_RISCV_TLS_DTPMOD32";
case R_RISCV_TLS_DTPMOD64: return "R_RISCV_TLS_DTPMOD64";
case R_RISCV_TLS_DTPREL32: return "R_RISCV_TLS_DTPREL32";
case R_RISCV_TLS_DTPREL64: return "R_RISCV_TLS_DTPREL64";
case R_RISCV_TLS_TPREL32: return "R_RISCV_TLS_TPREL32";
case R_RISCV_TLS_TPREL64: return "R_RISCV_TLS_TPREL64";
case R_RISCV_BRANCH: return "R_RISCV_BRANCH";
case R_RISCV_JAL: return "R_RISCV_JAL";
case R_RISCV_CALL: return "R_RISCV_CALL";
case R_RISCV_CALL_PLT: return "R_RISCV_CALL_PLT";
case R_RISCV_GOT_HI20: return "R_RISCV_GOT_HI20";
case R_RISCV_TLS_GOT_HI20: return "R_RISCV_TLS_GOT_HI20";
case R_RISCV_TLS_GD_HI20: return "R_RISCV_TLS_GD_HI20";
case R_RISCV_PCREL_HI20: return "R_RISCV_PCREL_HI20";
case R_RISCV_PCREL_LO12_I: return "R_RISCV_PCREL_LO12_I";
case R_RISCV_PCREL_LO12_S: return "R_RISCV_PCREL_LO12_S";
case R_RISCV_HI20: return "R_RISCV_HI20";
case R_RISCV_LO12_I: return "R_RISCV_LO12_I";
case R_RISCV_LO12_S: return "R_RISCV_LO12_S";
case R_RISCV_TPREL_HI20: return "R_RISCV_TPREL_HI20";
case R_RISCV_TPREL_LO12_I: return "R_RISCV_TPREL_LO12_I";
case R_RISCV_TPREL_LO12_S: return "R_RISCV_TPREL_LO12_S";
case R_RISCV_TPREL_ADD: return "R_RISCV_TPREL_ADD";
case R_RISCV_ADD8: return "R_RISCV_ADD8";
case R_RISCV_ADD16: return "R_RISCV_ADD16";
case R_RISCV_ADD32: return "R_RISCV_ADD32";
case R_RISCV_ADD64: return "R_RISCV_ADD64";
case R_RISCV_SUB8: return "R_RISCV_SUB8";
case R_RISCV_SUB16: return "R_RISCV_SUB16";
case R_RISCV_SUB32: return "R_RISCV_SUB32";
case R_RISCV_SUB64: return "R_RISCV_SUB64";
case R_RISCV_ALIGN: return "R_RISCV_ALIGN";
case R_RISCV_RVC_BRANCH: return "R_RISCV_RVC_BRANCH";
case R_RISCV_RVC_JUMP: return "R_RISCV_RVC_JUMP";
case R_RISCV_RVC_LUI: return "R_RISCV_RVC_LUI";
case R_RISCV_RELAX: return "R_RISCV_RELAX";
case R_RISCV_SUB6: return "R_RISCV_SUB6";
case R_RISCV_SET6: return "R_RISCV_SET6";
case R_RISCV_SET8: return "R_RISCV_SET8";
case R_RISCV_SET16: return "R_RISCV_SET16";
case R_RISCV_SET32: return "R_RISCV_SET32";
case R_RISCV_32_PCREL: return "R_RISCV_32_PCREL";
case R_RISCV_IRELATIVE: return "R_RISCV_IRELATIVE";
case R_RISCV_PLT32: return "R_RISCV_PLT32";
case R_RISCV_SET_ULEB128: return "R_RISCV_SET_ULEB128";
case R_RISCV_SUB_ULEB128: return "R_RISCV_SUB_ULEB128";
}
return "unknown (" + std::to_string(r_type) + ")";
}
template <>
std::string rel_to_string<RV64BE>(u32 r_type) {
return rel_to_string<RV64LE>(r_type);
}
template <>
std::string rel_to_string<RV32LE>(u32 r_type) {
return rel_to_string<RV64LE>(r_type);
}
template <>
std::string rel_to_string<RV32BE>(u32 r_type) {
return rel_to_string<RV64LE>(r_type);
}
template <>
std::string rel_to_string<PPC32>(u32 r_type) {
switch (r_type) {
case R_PPC_NONE: return "R_PPC_NONE";
case R_PPC_ADDR32: return "R_PPC_ADDR32";
case R_PPC_ADDR24: return "R_PPC_ADDR24";
case R_PPC_ADDR16: return "R_PPC_ADDR16";
case R_PPC_ADDR16_LO: return "R_PPC_ADDR16_LO";
case R_PPC_ADDR16_HI: return "R_PPC_ADDR16_HI";
case R_PPC_ADDR16_HA: return "R_PPC_ADDR16_HA";
case R_PPC_ADDR14: return "R_PPC_ADDR14";
case R_PPC_ADDR14_BRTAKEN: return "R_PPC_ADDR14_BRTAKEN";
case R_PPC_ADDR14_BRNTAKEN: return "R_PPC_ADDR14_BRNTAKEN";
case R_PPC_REL24: return "R_PPC_REL24";
case R_PPC_REL14: return "R_PPC_REL14";
case R_PPC_REL14_BRTAKEN: return "R_PPC_REL14_BRTAKEN";
case R_PPC_REL14_BRNTAKEN: return "R_PPC_REL14_BRNTAKEN";
case R_PPC_GOT16: return "R_PPC_GOT16";
case R_PPC_GOT16_LO: return "R_PPC_GOT16_LO";
case R_PPC_GOT16_HI: return "R_PPC_GOT16_HI";
case R_PPC_GOT16_HA: return "R_PPC_GOT16_HA";
case R_PPC_PLTREL24: return "R_PPC_PLTREL24";
case R_PPC_COPY: return "R_PPC_COPY";
case R_PPC_GLOB_DAT: return "R_PPC_GLOB_DAT";
case R_PPC_JMP_SLOT: return "R_PPC_JMP_SLOT";
case R_PPC_RELATIVE: return "R_PPC_RELATIVE";
case R_PPC_LOCAL24PC: return "R_PPC_LOCAL24PC";
case R_PPC_UADDR32: return "R_PPC_UADDR32";
case R_PPC_UADDR16: return "R_PPC_UADDR16";
case R_PPC_REL32: return "R_PPC_REL32";
case R_PPC_PLT32: return "R_PPC_PLT32";
case R_PPC_PLTREL32: return "R_PPC_PLTREL32";
case R_PPC_PLT16_LO: return "R_PPC_PLT16_LO";
case R_PPC_PLT16_HI: return "R_PPC_PLT16_HI";
case R_PPC_PLT16_HA: return "R_PPC_PLT16_HA";
case R_PPC_SDAREL16: return "R_PPC_SDAREL16";
case R_PPC_SECTOFF: return "R_PPC_SECTOFF";
case R_PPC_SECTOFF_LO: return "R_PPC_SECTOFF_LO";
case R_PPC_SECTOFF_HI: return "R_PPC_SECTOFF_HI";
case R_PPC_SECTOFF_HA: return "R_PPC_SECTOFF_HA";
case R_PPC_ADDR30: return "R_PPC_ADDR30";
case R_PPC_TLS: return "R_PPC_TLS";
case R_PPC_DTPMOD32: return "R_PPC_DTPMOD32";
case R_PPC_TPREL16: return "R_PPC_TPREL16";
case R_PPC_TPREL16_LO: return "R_PPC_TPREL16_LO";
case R_PPC_TPREL16_HI: return "R_PPC_TPREL16_HI";
case R_PPC_TPREL16_HA: return "R_PPC_TPREL16_HA";
case R_PPC_TPREL32: return "R_PPC_TPREL32";
case R_PPC_DTPREL16: return "R_PPC_DTPREL16";
case R_PPC_DTPREL16_LO: return "R_PPC_DTPREL16_LO";
case R_PPC_DTPREL16_HI: return "R_PPC_DTPREL16_HI";
case R_PPC_DTPREL16_HA: return "R_PPC_DTPREL16_HA";
case R_PPC_DTPREL32: return "R_PPC_DTPREL32";
case R_PPC_GOT_TLSGD16: return "R_PPC_GOT_TLSGD16";
case R_PPC_GOT_TLSGD16_LO: return "R_PPC_GOT_TLSGD16_LO";
case R_PPC_GOT_TLSGD16_HI: return "R_PPC_GOT_TLSGD16_HI";
case R_PPC_GOT_TLSGD16_HA: return "R_PPC_GOT_TLSGD16_HA";
case R_PPC_GOT_TLSLD16: return "R_PPC_GOT_TLSLD16";
case R_PPC_GOT_TLSLD16_LO: return "R_PPC_GOT_TLSLD16_LO";
case R_PPC_GOT_TLSLD16_HI: return "R_PPC_GOT_TLSLD16_HI";
case R_PPC_GOT_TLSLD16_HA: return "R_PPC_GOT_TLSLD16_HA";
case R_PPC_GOT_TPREL16: return "R_PPC_GOT_TPREL16";
case R_PPC_GOT_TPREL16_LO: return "R_PPC_GOT_TPREL16_LO";
case R_PPC_GOT_TPREL16_HI: return "R_PPC_GOT_TPREL16_HI";
case R_PPC_GOT_TPREL16_HA: return "R_PPC_GOT_TPREL16_HA";
case R_PPC_GOT_DTPREL16: return "R_PPC_GOT_DTPREL16";
case R_PPC_GOT_DTPREL16_LO: return "R_PPC_GOT_DTPREL16_LO";
case R_PPC_GOT_DTPREL16_HI: return "R_PPC_GOT_DTPREL16_HI";
case R_PPC_GOT_DTPREL16_HA: return "R_PPC_GOT_DTPREL16_HA";
case R_PPC_TLSGD: return "R_PPC_TLSGD";
case R_PPC_TLSLD: return "R_PPC_TLSLD";
case R_PPC_PLTSEQ: return "R_PPC_PLTSEQ";
case R_PPC_PLTCALL: return "R_PPC_PLTCALL";
case R_PPC_IRELATIVE: return "R_PPC_IRELATIVE";
case R_PPC_REL16: return "R_PPC_REL16";
case R_PPC_REL16_LO: return "R_PPC_REL16_LO";
case R_PPC_REL16_HI: return "R_PPC_REL16_HI";
case R_PPC_REL16_HA: return "R_PPC_REL16_HA";
}
return "unknown (" + std::to_string(r_type) + ")";
}
template <>
std::string rel_to_string<PPC64V1>(u32 r_type) {
switch (r_type) {
case R_PPC64_NONE: return "R_PPC64_NONE";
case R_PPC64_ADDR32: return "R_PPC64_ADDR32";
case R_PPC64_ADDR24: return "R_PPC64_ADDR24";
case R_PPC64_ADDR16: return "R_PPC64_ADDR16";
case R_PPC64_ADDR16_LO: return "R_PPC64_ADDR16_LO";
case R_PPC64_ADDR16_HI: return "R_PPC64_ADDR16_HI";
case R_PPC64_ADDR16_HA: return "R_PPC64_ADDR16_HA";
case R_PPC64_ADDR14: return "R_PPC64_ADDR14";
case R_PPC64_ADDR14_BRTAKEN: return "R_PPC64_ADDR14_BRTAKEN";
case R_PPC64_ADDR14_BRNTAKEN: return "R_PPC64_ADDR14_BRNTAKEN";
case R_PPC64_REL24: return "R_PPC64_REL24";
case R_PPC64_REL14: return "R_PPC64_REL14";
case R_PPC64_REL14_BRTAKEN: return "R_PPC64_REL14_BRTAKEN";
case R_PPC64_REL14_BRNTAKEN: return "R_PPC64_REL14_BRNTAKEN";
case R_PPC64_GOT16: return "R_PPC64_GOT16";
case R_PPC64_GOT16_LO: return "R_PPC64_GOT16_LO";
case R_PPC64_GOT16_HI: return "R_PPC64_GOT16_HI";
case R_PPC64_GOT16_HA: return "R_PPC64_GOT16_HA";
case R_PPC64_COPY: return "R_PPC64_COPY";
case R_PPC64_GLOB_DAT: return "R_PPC64_GLOB_DAT";
case R_PPC64_JMP_SLOT: return "R_PPC64_JMP_SLOT";
case R_PPC64_RELATIVE: return "R_PPC64_RELATIVE";
case R_PPC64_REL32: return "R_PPC64_REL32";
case R_PPC64_PLT16_LO: return "R_PPC64_PLT16_LO";
case R_PPC64_PLT16_HI: return "R_PPC64_PLT16_HI";
case R_PPC64_PLT16_HA: return "R_PPC64_PLT16_HA";
case R_PPC64_ADDR64: return "R_PPC64_ADDR64";
case R_PPC64_ADDR16_HIGHER: return "R_PPC64_ADDR16_HIGHER";
case R_PPC64_ADDR16_HIGHERA: return "R_PPC64_ADDR16_HIGHERA";
case R_PPC64_ADDR16_HIGHEST: return "R_PPC64_ADDR16_HIGHEST";
case R_PPC64_ADDR16_HIGHESTA: return "R_PPC64_ADDR16_HIGHESTA";
case R_PPC64_REL64: return "R_PPC64_REL64";
case R_PPC64_TOC16: return "R_PPC64_TOC16";
case R_PPC64_TOC16_LO: return "R_PPC64_TOC16_LO";
case R_PPC64_TOC16_HI: return "R_PPC64_TOC16_HI";
case R_PPC64_TOC16_HA: return "R_PPC64_TOC16_HA";
case R_PPC64_TOC: return "R_PPC64_TOC";
case R_PPC64_ADDR16_DS: return "R_PPC64_ADDR16_DS";
case R_PPC64_ADDR16_LO_DS: return "R_PPC64_ADDR16_LO_DS";
case R_PPC64_GOT16_DS: return "R_PPC64_GOT16_DS";
case R_PPC64_GOT16_LO_DS: return "R_PPC64_GOT16_LO_DS";
case R_PPC64_PLT16_LO_DS: return "R_PPC64_PLT16_LO_DS";
case R_PPC64_TOC16_DS: return "R_PPC64_TOC16_DS";
case R_PPC64_TOC16_LO_DS: return "R_PPC64_TOC16_LO_DS";
case R_PPC64_TLS: return "R_PPC64_TLS";
case R_PPC64_DTPMOD64: return "R_PPC64_DTPMOD64";
case R_PPC64_TPREL16: return "R_PPC64_TPREL16";
case R_PPC64_TPREL16_LO: return "R_PPC64_TPREL16_LO";
case R_PPC64_TPREL16_HI: return "R_PPC64_TPREL16_HI";
case R_PPC64_TPREL16_HA: return "R_PPC64_TPREL16_HA";
case R_PPC64_TPREL64: return "R_PPC64_TPREL64";
case R_PPC64_DTPREL16: return "R_PPC64_DTPREL16";
case R_PPC64_DTPREL16_LO: return "R_PPC64_DTPREL16_LO";
case R_PPC64_DTPREL16_HI: return "R_PPC64_DTPREL16_HI";
case R_PPC64_DTPREL16_HA: return "R_PPC64_DTPREL16_HA";
case R_PPC64_DTPREL64: return "R_PPC64_DTPREL64";
case R_PPC64_GOT_TLSGD16: return "R_PPC64_GOT_TLSGD16";
case R_PPC64_GOT_TLSGD16_LO: return "R_PPC64_GOT_TLSGD16_LO";
case R_PPC64_GOT_TLSGD16_HI: return "R_PPC64_GOT_TLSGD16_HI";
case R_PPC64_GOT_TLSGD16_HA: return "R_PPC64_GOT_TLSGD16_HA";
case R_PPC64_GOT_TLSLD16: return "R_PPC64_GOT_TLSLD16";
case R_PPC64_GOT_TLSLD16_LO: return "R_PPC64_GOT_TLSLD16_LO";
case R_PPC64_GOT_TLSLD16_HI: return "R_PPC64_GOT_TLSLD16_HI";
case R_PPC64_GOT_TLSLD16_HA: return "R_PPC64_GOT_TLSLD16_HA";
case R_PPC64_GOT_TPREL16_DS: return "R_PPC64_GOT_TPREL16_DS";
case R_PPC64_GOT_TPREL16_LO_DS: return "R_PPC64_GOT_TPREL16_LO_DS";
case R_PPC64_GOT_TPREL16_HI: return "R_PPC64_GOT_TPREL16_HI";
case R_PPC64_GOT_TPREL16_HA: return "R_PPC64_GOT_TPREL16_HA";
case R_PPC64_GOT_DTPREL16_DS: return "R_PPC64_GOT_DTPREL16_DS";
case R_PPC64_GOT_DTPREL16_LO_DS: return "R_PPC64_GOT_DTPREL16_LO_DS";
case R_PPC64_GOT_DTPREL16_HI: return "R_PPC64_GOT_DTPREL16_HI";
case R_PPC64_GOT_DTPREL16_HA: return "R_PPC64_GOT_DTPREL16_HA";
case R_PPC64_TPREL16_DS: return "R_PPC64_TPREL16_DS";
case R_PPC64_TPREL16_LO_DS: return "R_PPC64_TPREL16_LO_DS";
case R_PPC64_TPREL16_HIGHER: return "R_PPC64_TPREL16_HIGHER";
case R_PPC64_TPREL16_HIGHERA: return "R_PPC64_TPREL16_HIGHERA";
case R_PPC64_TPREL16_HIGHEST: return "R_PPC64_TPREL16_HIGHEST";
case R_PPC64_TPREL16_HIGHESTA: return "R_PPC64_TPREL16_HIGHESTA";
case R_PPC64_DTPREL16_DS: return "R_PPC64_DTPREL16_DS";
case R_PPC64_DTPREL16_LO_DS: return "R_PPC64_DTPREL16_LO_DS";
case R_PPC64_DTPREL16_HIGHER: return "R_PPC64_DTPREL16_HIGHER";
case R_PPC64_DTPREL16_HIGHERA: return "R_PPC64_DTPREL16_HIGHERA";
case R_PPC64_DTPREL16_HIGHEST: return "R_PPC64_DTPREL16_HIGHEST";
case R_PPC64_DTPREL16_HIGHESTA: return "R_PPC64_DTPREL16_HIGHESTA";
case R_PPC64_TLSGD: return "R_PPC64_TLSGD";
case R_PPC64_TLSLD: return "R_PPC64_TLSLD";
case R_PPC64_ADDR16_HIGH: return "R_PPC64_ADDR16_HIGH";
case R_PPC64_ADDR16_HIGHA: return "R_PPC64_ADDR16_HIGHA";
case R_PPC64_TPREL16_HIGH: return "R_PPC64_TPREL16_HIGH";
case R_PPC64_TPREL16_HIGHA: return "R_PPC64_TPREL16_HIGHA";
case R_PPC64_DTPREL16_HIGH: return "R_PPC64_DTPREL16_HIGH";
case R_PPC64_DTPREL16_HIGHA: return "R_PPC64_DTPREL16_HIGHA";
case R_PPC64_REL24_NOTOC: return "R_PPC64_REL24_NOTOC";
case R_PPC64_PLTSEQ: return "R_PPC64_PLTSEQ";
case R_PPC64_PLTCALL: return "R_PPC64_PLTCALL";
case R_PPC64_PLTSEQ_NOTOC: return "R_PPC64_PLTSEQ_NOTOC";
case R_PPC64_PLTCALL_NOTOC: return "R_PPC64_PLTCALL_NOTOC";
case R_PPC64_PCREL_OPT: return "R_PPC64_PCREL_OPT";
case R_PPC64_PCREL34: return "R_PPC64_PCREL34";
case R_PPC64_GOT_PCREL34: return "R_PPC64_GOT_PCREL34";
case R_PPC64_PLT_PCREL34: return "R_PPC64_PLT_PCREL34";
case R_PPC64_PLT_PCREL34_NOTOC: return "R_PPC64_PLT_PCREL34_NOTOC";
case R_PPC64_TPREL34: return "R_PPC64_TPREL34";
case R_PPC64_DTPREL34: return "R_PPC64_DTPREL34";
case R_PPC64_GOT_TLSGD_PCREL34: return "R_PPC64_GOT_TLSGD_PCREL34";
case R_PPC64_GOT_TLSLD_PCREL34: return "R_PPC64_GOT_TLSLD_PCREL34";
case R_PPC64_GOT_TPREL_PCREL34: return "R_PPC64_GOT_TPREL_PCREL34";
case R_PPC64_IRELATIVE: return "R_PPC64_IRELATIVE";
case R_PPC64_REL16: return "R_PPC64_REL16";
case R_PPC64_REL16_LO: return "R_PPC64_REL16_LO";
case R_PPC64_REL16_HI: return "R_PPC64_REL16_HI";
case R_PPC64_REL16_HA: return "R_PPC64_REL16_HA";
}
return "unknown (" + std::to_string(r_type) + ")";
}
template <>
std::string rel_to_string<PPC64V2>(u32 r_type) {
return rel_to_string<PPC64V1>(r_type);
}
template <>
std::string rel_to_string<SPARC64>(u32 r_type) {
switch (r_type) {
case R_SPARC_NONE: return "R_SPARC_NONE";
case R_SPARC_8: return "R_SPARC_8";
case R_SPARC_16: return "R_SPARC_16";
case R_SPARC_32: return "R_SPARC_32";
case R_SPARC_DISP8: return "R_SPARC_DISP8";
case R_SPARC_DISP16: return "R_SPARC_DISP16";
case R_SPARC_DISP32: return "R_SPARC_DISP32";
case R_SPARC_WDISP30: return "R_SPARC_WDISP30";
case R_SPARC_WDISP22: return "R_SPARC_WDISP22";
case R_SPARC_HI22: return "R_SPARC_HI22";
case R_SPARC_22: return "R_SPARC_22";
case R_SPARC_13: return "R_SPARC_13";
case R_SPARC_LO10: return "R_SPARC_LO10";
case R_SPARC_GOT10: return "R_SPARC_GOT10";
case R_SPARC_GOT13: return "R_SPARC_GOT13";
case R_SPARC_GOT22: return "R_SPARC_GOT22";
case R_SPARC_PC10: return "R_SPARC_PC10";
case R_SPARC_PC22: return "R_SPARC_PC22";
case R_SPARC_WPLT30: return "R_SPARC_WPLT30";
case R_SPARC_COPY: return "R_SPARC_COPY";
case R_SPARC_GLOB_DAT: return "R_SPARC_GLOB_DAT";
case R_SPARC_JMP_SLOT: return "R_SPARC_JMP_SLOT";
case R_SPARC_RELATIVE: return "R_SPARC_RELATIVE";
case R_SPARC_UA32: return "R_SPARC_UA32";
case R_SPARC_PLT32: return "R_SPARC_PLT32";
case R_SPARC_HIPLT22: return "R_SPARC_HIPLT22";
case R_SPARC_LOPLT10: return "R_SPARC_LOPLT10";
case R_SPARC_PCPLT32: return "R_SPARC_PCPLT32";
case R_SPARC_PCPLT22: return "R_SPARC_PCPLT22";
case R_SPARC_PCPLT10: return "R_SPARC_PCPLT10";
case R_SPARC_10: return "R_SPARC_10";
case R_SPARC_11: return "R_SPARC_11";
case R_SPARC_64: return "R_SPARC_64";
case R_SPARC_OLO10: return "R_SPARC_OLO10";
case R_SPARC_HH22: return "R_SPARC_HH22";
case R_SPARC_HM10: return "R_SPARC_HM10";
case R_SPARC_LM22: return "R_SPARC_LM22";
case R_SPARC_PC_HH22: return "R_SPARC_PC_HH22";
case R_SPARC_PC_HM10: return "R_SPARC_PC_HM10";
case R_SPARC_PC_LM22: return "R_SPARC_PC_LM22";
case R_SPARC_WDISP16: return "R_SPARC_WDISP16";
case R_SPARC_WDISP19: return "R_SPARC_WDISP19";
case R_SPARC_7: return "R_SPARC_7";
case R_SPARC_5: return "R_SPARC_5";
case R_SPARC_6: return "R_SPARC_6";
case R_SPARC_DISP64: return "R_SPARC_DISP64";
case R_SPARC_PLT64: return "R_SPARC_PLT64";
case R_SPARC_HIX22: return "R_SPARC_HIX22";
case R_SPARC_LOX10: return "R_SPARC_LOX10";
case R_SPARC_H44: return "R_SPARC_H44";
case R_SPARC_M44: return "R_SPARC_M44";
case R_SPARC_L44: return "R_SPARC_L44";
case R_SPARC_REGISTER: return "R_SPARC_REGISTER";
case R_SPARC_UA64: return "R_SPARC_UA64";
case R_SPARC_UA16: return "R_SPARC_UA16";
case R_SPARC_TLS_GD_HI22: return "R_SPARC_TLS_GD_HI22";
case R_SPARC_TLS_GD_LO10: return "R_SPARC_TLS_GD_LO10";
case R_SPARC_TLS_GD_ADD: return "R_SPARC_TLS_GD_ADD";
case R_SPARC_TLS_GD_CALL: return "R_SPARC_TLS_GD_CALL";
case R_SPARC_TLS_LDM_HI22: return "R_SPARC_TLS_LDM_HI22";
case R_SPARC_TLS_LDM_LO10: return "R_SPARC_TLS_LDM_LO10";
case R_SPARC_TLS_LDM_ADD: return "R_SPARC_TLS_LDM_ADD";
case R_SPARC_TLS_LDM_CALL: return "R_SPARC_TLS_LDM_CALL";
case R_SPARC_TLS_LDO_HIX22: return "R_SPARC_TLS_LDO_HIX22";
case R_SPARC_TLS_LDO_LOX10: return "R_SPARC_TLS_LDO_LOX10";
case R_SPARC_TLS_LDO_ADD: return "R_SPARC_TLS_LDO_ADD";
case R_SPARC_TLS_IE_HI22: return "R_SPARC_TLS_IE_HI22";
case R_SPARC_TLS_IE_LO10: return "R_SPARC_TLS_IE_LO10";
case R_SPARC_TLS_IE_LD: return "R_SPARC_TLS_IE_LD";
case R_SPARC_TLS_IE_LDX: return "R_SPARC_TLS_IE_LDX";
case R_SPARC_TLS_IE_ADD: return "R_SPARC_TLS_IE_ADD";
case R_SPARC_TLS_LE_HIX22: return "R_SPARC_TLS_LE_HIX22";
case R_SPARC_TLS_LE_LOX10: return "R_SPARC_TLS_LE_LOX10";
case R_SPARC_TLS_DTPMOD32: return "R_SPARC_TLS_DTPMOD32";
case R_SPARC_TLS_DTPMOD64: return "R_SPARC_TLS_DTPMOD64";
case R_SPARC_TLS_DTPOFF32: return "R_SPARC_TLS_DTPOFF32";
case R_SPARC_TLS_DTPOFF64: return "R_SPARC_TLS_DTPOFF64";
case R_SPARC_TLS_TPOFF32: return "R_SPARC_TLS_TPOFF32";
case R_SPARC_TLS_TPOFF64: return "R_SPARC_TLS_TPOFF64";
case R_SPARC_GOTDATA_HIX22: return "R_SPARC_GOTDATA_HIX22";
case R_SPARC_GOTDATA_LOX10: return "R_SPARC_GOTDATA_LOX10";
case R_SPARC_GOTDATA_OP_HIX22: return "R_SPARC_GOTDATA_OP_HIX22";
case R_SPARC_GOTDATA_OP_LOX10: return "R_SPARC_GOTDATA_OP_LOX10";
case R_SPARC_GOTDATA_OP: return "R_SPARC_GOTDATA_OP";
case R_SPARC_IRELATIVE: return "R_SPARC_IRELATIVE";
}
return "unknown (" + std::to_string(r_type) + ")";
}
template <>
std::string rel_to_string<S390X>(u32 r_type) {
switch (r_type) {
case R_390_NONE: return "R_390_NONE";
case R_390_8: return "R_390_8";
case R_390_12: return "R_390_12";
case R_390_16: return "R_390_16";
case R_390_32: return "R_390_32";
case R_390_PC32: return "R_390_PC32";
case R_390_GOT12: return "R_390_GOT12";
case R_390_GOT32: return "R_390_GOT32";
case R_390_PLT32: return "R_390_PLT32";
case R_390_COPY: return "R_390_COPY";
case R_390_GLOB_DAT: return "R_390_GLOB_DAT";
case R_390_JMP_SLOT: return "R_390_JMP_SLOT";
case R_390_RELATIVE: return "R_390_RELATIVE";
case R_390_GOTOFF32: return "R_390_GOTOFF32";
case R_390_GOTPC: return "R_390_GOTPC";
case R_390_GOT16: return "R_390_GOT16";
case R_390_PC16: return "R_390_PC16";
case R_390_PC16DBL: return "R_390_PC16DBL";
case R_390_PLT16DBL: return "R_390_PLT16DBL";
case R_390_PC32DBL: return "R_390_PC32DBL";
case R_390_PLT32DBL: return "R_390_PLT32DBL";
case R_390_GOTPCDBL: return "R_390_GOTPCDBL";
case R_390_64: return "R_390_64";
case R_390_PC64: return "R_390_PC64";
case R_390_GOT64: return "R_390_GOT64";
case R_390_PLT64: return "R_390_PLT64";
case R_390_GOTENT: return "R_390_GOTENT";
case R_390_GOTOFF16: return "R_390_GOTOFF16";
case R_390_GOTOFF64: return "R_390_GOTOFF64";
case R_390_GOTPLT12: return "R_390_GOTPLT12";
case R_390_GOTPLT16: return "R_390_GOTPLT16";
case R_390_GOTPLT32: return "R_390_GOTPLT32";
case R_390_GOTPLT64: return "R_390_GOTPLT64";
case R_390_GOTPLTENT: return "R_390_GOTPLTENT";
case R_390_PLTOFF16: return "R_390_PLTOFF16";
case R_390_PLTOFF32: return "R_390_PLTOFF32";
case R_390_PLTOFF64: return "R_390_PLTOFF64";
case R_390_TLS_LOAD: return "R_390_TLS_LOAD";
case R_390_TLS_GDCALL: return "R_390_TLS_GDCALL";
case R_390_TLS_LDCALL: return "R_390_TLS_LDCALL";
case R_390_TLS_GD32: return "R_390_TLS_GD32";
case R_390_TLS_GD64: return "R_390_TLS_GD64";
case R_390_TLS_GOTIE12: return "R_390_TLS_GOTIE12";
case R_390_TLS_GOTIE32: return "R_390_TLS_GOTIE32";
case R_390_TLS_GOTIE64: return "R_390_TLS_GOTIE64";
case R_390_TLS_LDM32: return "R_390_TLS_LDM32";
case R_390_TLS_LDM64: return "R_390_TLS_LDM64";
case R_390_TLS_IE32: return "R_390_TLS_IE32";
case R_390_TLS_IE64: return "R_390_TLS_IE64";
case R_390_TLS_IEENT: return "R_390_TLS_IEENT";
case R_390_TLS_LE32: return "R_390_TLS_LE32";
case R_390_TLS_LE64: return "R_390_TLS_LE64";
case R_390_TLS_LDO32: return "R_390_TLS_LDO32";
case R_390_TLS_LDO64: return "R_390_TLS_LDO64";
case R_390_TLS_DTPMOD: return "R_390_TLS_DTPMOD";
case R_390_TLS_DTPOFF: return "R_390_TLS_DTPOFF";
case R_390_TLS_TPOFF: return "R_390_TLS_TPOFF";
case R_390_20: return "R_390_20";
case R_390_GOT20: return "R_390_GOT20";
case R_390_GOTPLT20: return "R_390_GOTPLT20";
case R_390_TLS_GOTIE20: return "R_390_TLS_GOTIE20";
case R_390_IRELATIVE: return "R_390_IRELATIVE";
case R_390_PC12DBL: return "R_390_PC12DBL";
case R_390_PLT12DBL: return "R_390_PLT12DBL";
case R_390_PC24DBL: return "R_390_PC24DBL";
case R_390_PLT24DBL: return "R_390_PLT24DBL";
}
return "unknown (" + std::to_string(r_type) + ")";
}
template <>
std::string rel_to_string<M68K>(u32 r_type) {
switch (r_type) {
case R_68K_NONE: return "R_68K_NONE";
case R_68K_32: return "R_68K_32";
case R_68K_16: return "R_68K_16";
case R_68K_8: return "R_68K_8";
case R_68K_PC32: return "R_68K_PC32";
case R_68K_PC16: return "R_68K_PC16";
case R_68K_PC8: return "R_68K_PC8";
case R_68K_GOTPCREL32: return "R_68K_GOTPCREL32";
case R_68K_GOTPCREL16: return "R_68K_GOTPCREL16";
case R_68K_GOTPCREL8: return "R_68K_GOTPCREL8";
case R_68K_GOTOFF32: return "R_68K_GOTOFF32";
case R_68K_GOTOFF16: return "R_68K_GOTOFF16";
case R_68K_GOTOFF8: return "R_68K_GOTOFF8";
case R_68K_PLT32: return "R_68K_PLT32";
case R_68K_PLT16: return "R_68K_PLT16";
case R_68K_PLT8: return "R_68K_PLT8";
case R_68K_PLTOFF32: return "R_68K_PLTOFF32";
case R_68K_PLTOFF16: return "R_68K_PLTOFF16";
case R_68K_PLTOFF8: return "R_68K_PLTOFF8";
case R_68K_COPY: return "R_68K_COPY";
case R_68K_GLOB_DAT: return "R_68K_GLOB_DAT";
case R_68K_JMP_SLOT: return "R_68K_JMP_SLOT";
case R_68K_RELATIVE: return "R_68K_RELATIVE";
case R_68K_TLS_GD32: return "R_68K_TLS_GD32";
case R_68K_TLS_GD16: return "R_68K_TLS_GD16";
case R_68K_TLS_GD8: return "R_68K_TLS_GD8";
case R_68K_TLS_LDM32: return "R_68K_TLS_LDM32";
case R_68K_TLS_LDM16: return "R_68K_TLS_LDM16";
case R_68K_TLS_LDM8: return "R_68K_TLS_LDM8";
case R_68K_TLS_LDO32: return "R_68K_TLS_LDO32";
case R_68K_TLS_LDO16: return "R_68K_TLS_LDO16";
case R_68K_TLS_LDO8: return "R_68K_TLS_LDO8";
case R_68K_TLS_IE32: return "R_68K_TLS_IE32";
case R_68K_TLS_IE16: return "R_68K_TLS_IE16";
case R_68K_TLS_IE8: return "R_68K_TLS_IE8";
case R_68K_TLS_LE32: return "R_68K_TLS_LE32";
case R_68K_TLS_LE16: return "R_68K_TLS_LE16";
case R_68K_TLS_LE8: return "R_68K_TLS_LE8";
case R_68K_TLS_DTPMOD32: return "R_68K_TLS_DTPMOD32";
case R_68K_TLS_DTPREL32: return "R_68K_TLS_DTPREL32";
case R_68K_TLS_TPREL32: return "R_68K_TLS_TPREL32";
}
return "unknown (" + std::to_string(r_type) + ")";
}
template <>
std::string rel_to_string<SH4>(u32 r_type) {
switch (r_type) {
case R_SH_NONE: return "R_SH_NONE";
case R_SH_DIR32: return "R_SH_DIR32";
case R_SH_REL32: return "R_SH_REL32";
case R_SH_DIR8WPN: return "R_SH_DIR8WPN";
case R_SH_IND12W: return "R_SH_IND12W";
case R_SH_DIR8WPL: return "R_SH_DIR8WPL";
case R_SH_DIR8WPZ: return "R_SH_DIR8WPZ";
case R_SH_DIR8BP: return "R_SH_DIR8BP";
case R_SH_DIR8W: return "R_SH_DIR8W";
case R_SH_DIR8L: return "R_SH_DIR8L";
case R_SH_TLS_GD_32: return "R_SH_TLS_GD_32";
case R_SH_TLS_LD_32: return "R_SH_TLS_LD_32";
case R_SH_TLS_LDO_32: return "R_SH_TLS_LDO_32";
case R_SH_TLS_IE_32: return "R_SH_TLS_IE_32";
case R_SH_TLS_LE_32: return "R_SH_TLS_LE_32";
case R_SH_TLS_DTPMOD32: return "R_SH_TLS_DTPMOD32";
case R_SH_TLS_DTPOFF32: return "R_SH_TLS_DTPOFF32";
case R_SH_TLS_TPOFF32: return "R_SH_TLS_TPOFF32";
case R_SH_GOT32: return "R_SH_GOT32";
case R_SH_PLT32: return "R_SH_PLT32";
case R_SH_COPY: return "R_SH_COPY";
case R_SH_GLOB_DAT: return "R_SH_GLOB_DAT";
case R_SH_JMP_SLOT: return "R_SH_JMP_SLOT";
case R_SH_RELATIVE: return "R_SH_RELATIVE";
case R_SH_GOTOFF: return "R_SH_GOTOFF";
case R_SH_GOTPC: return "R_SH_GOTPC";
case R_SH_GOTPLT32: return "R_SH_GOTPLT32";
}
return "unknown (" + std::to_string(r_type) + ")";
}
template <>
std::string rel_to_string<ALPHA>(u32 r_type) {
switch (r_type) {
case R_ALPHA_NONE: return "R_ALPHA_NONE";
case R_ALPHA_REFLONG: return "R_ALPHA_REFLONG";
case R_ALPHA_REFQUAD: return "R_ALPHA_REFQUAD";
case R_ALPHA_GPREL32: return "R_ALPHA_GPREL32";
case R_ALPHA_LITERAL: return "R_ALPHA_LITERAL";
case R_ALPHA_LITUSE: return "R_ALPHA_LITUSE";
case R_ALPHA_GPDISP: return "R_ALPHA_GPDISP";
case R_ALPHA_BRADDR: return "R_ALPHA_BRADDR";
case R_ALPHA_HINT: return "R_ALPHA_HINT";
case R_ALPHA_SREL16: return "R_ALPHA_SREL16";
case R_ALPHA_SREL32: return "R_ALPHA_SREL32";
case R_ALPHA_SREL64: return "R_ALPHA_SREL64";
case R_ALPHA_GPRELHIGH: return "R_ALPHA_GPRELHIGH";
case R_ALPHA_GPRELLOW: return "R_ALPHA_GPRELLOW";
case R_ALPHA_GPREL16: return "R_ALPHA_GPREL16";
case R_ALPHA_COPY: return "R_ALPHA_COPY";
case R_ALPHA_GLOB_DAT: return "R_ALPHA_GLOB_DAT";
case R_ALPHA_JMP_SLOT: return "R_ALPHA_JMP_SLOT";
case R_ALPHA_RELATIVE: return "R_ALPHA_RELATIVE";
case R_ALPHA_BRSGP: return "R_ALPHA_BRSGP";
case R_ALPHA_TLSGD: return "R_ALPHA_TLSGD";
case R_ALPHA_TLSLDM: return "R_ALPHA_TLSLDM";
case R_ALPHA_DTPMOD64: return "R_ALPHA_DTPMOD64";
case R_ALPHA_GOTDTPREL: return "R_ALPHA_GOTDTPREL";
case R_ALPHA_DTPREL64: return "R_ALPHA_DTPREL64";
case R_ALPHA_DTPRELHI: return "R_ALPHA_DTPRELHI";
case R_ALPHA_DTPRELLO: return "R_ALPHA_DTPRELLO";
case R_ALPHA_DTPREL16: return "R_ALPHA_DTPREL16";
case R_ALPHA_GOTTPREL: return "R_ALPHA_GOTTPREL";
case R_ALPHA_TPREL64: return "R_ALPHA_TPREL64";
case R_ALPHA_TPRELHI: return "R_ALPHA_TPRELHI";
case R_ALPHA_TPRELLO: return "R_ALPHA_TPRELLO";
case R_ALPHA_TPREL16: return "R_ALPHA_TPREL16";
}
return "unknown (" + std::to_string(r_type) + ")";
}
} // namespace mold::elf

2053
third_party/mold/elf/elf.h vendored Normal file

File diff suppressed because it is too large Load diff

180
third_party/mold/elf/gc-sections.cc vendored Normal file
View file

@ -0,0 +1,180 @@
// clang-format off
// This file implements a mark-sweep garbage collector for -gc-sections.
// In this algorithm, vertices are sections and edges are relocations.
// Any section that is reachable from a root section is considered alive.
#include "third_party/mold/elf/mold.h"
// MISSING #include <tbb/concurrent_vector.h>
// MISSING #include <tbb/parallel_for_each.h>
namespace mold::elf {
template <typename E>
static bool should_keep(const InputSection<E> &isec) {
u32 type = isec.shdr().sh_type;
u32 flags = isec.shdr().sh_flags;
std::string_view name = isec.name();
return (flags & SHF_GNU_RETAIN) ||
type == SHT_NOTE ||
type == SHT_INIT_ARRAY ||
type == SHT_FINI_ARRAY ||
type == SHT_PREINIT_ARRAY ||
(is_arm32<E> && type == SHT_ARM_EXIDX) ||
name.starts_with(".ctors") ||
name.starts_with(".dtors") ||
name.starts_with(".init") ||
name.starts_with(".fini") ||
is_c_identifier(name);
}
template <typename E>
static bool mark_section(InputSection<E> *isec) {
return isec && isec->is_alive && !isec->is_visited.test_and_set();
}
template <typename E>
static void visit(Context<E> &ctx, InputSection<E> *isec,
tbb::feeder<InputSection<E> *> &feeder, i64 depth) {
assert(isec->is_visited);
// If this is a text section, .eh_frame may contain records
// describing how to handle exceptions for that function.
// We want to keep associated .eh_frame records.
for (FdeRecord<E> &fde : isec->get_fdes())
for (const ElfRel<E> &rel : fde.get_rels(isec->file).subspan(1))
if (Symbol<E> *sym = isec->file.symbols[rel.r_sym])
if (mark_section(sym->get_input_section()))
feeder.add(sym->get_input_section());
for (const ElfRel<E> &rel : isec->get_rels(ctx)) {
Symbol<E> &sym = *isec->file.symbols[rel.r_sym];
// Symbol can refer either a section fragment or an input section.
// Mark a fragment as alive.
if (SectionFragment<E> *frag = sym.get_frag()) {
frag->is_alive = true;
continue;
}
// Mark a section alive. For better performacne, we don't call
// `feeder.add` too often.
if (mark_section(sym.get_input_section())) {
if (depth < 3)
visit(ctx, sym.get_input_section(), feeder, depth + 1);
else
feeder.add(sym.get_input_section());
}
}
}
template <typename E>
static void collect_root_set(Context<E> &ctx,
tbb::concurrent_vector<InputSection<E> *> &rootset) {
Timer t(ctx, "collect_root_set");
auto enqueue_section = [&](InputSection<E> *isec) {
if (mark_section(isec))
rootset.push_back(isec);
};
auto enqueue_symbol = [&](Symbol<E> *sym) {
if (sym) {
if (SectionFragment<E> *frag = sym->get_frag())
frag->is_alive = true;
else
enqueue_section(sym->get_input_section());
}
};
// Add sections that are not subject to garbage collection.
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
for (std::unique_ptr<InputSection<E>> &isec : file->sections) {
if (!isec || !isec->is_alive)
continue;
// --gc-sections discards only SHF_ALLOC sections. If you want to
// reduce the amount of non-memory-mapped segments, you should
// use `strip` command, compile without debug info or use
// --strip-all linker option.
u32 flags = isec->shdr().sh_flags;
if (!(flags & SHF_ALLOC))
isec->is_visited = true;
if (should_keep(*isec))
enqueue_section(isec.get());
}
});
// Add sections containing exported symbols
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
for (Symbol<E> *sym : file->symbols)
if (sym->file == file && sym->is_exported)
enqueue_symbol(sym);
});
// Add sections referenced by root symbols.
enqueue_symbol(get_symbol(ctx, ctx.arg.entry));
for (std::string_view name : ctx.arg.undefined)
enqueue_symbol(get_symbol(ctx, name));
for (std::string_view name : ctx.arg.require_defined)
enqueue_symbol(get_symbol(ctx, name));
// .eh_frame consists of variable-length records called CIE and FDE
// records, and they are a unit of inclusion or exclusion.
// We just keep all CIEs and everything that are referenced by them.
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
for (CieRecord<E> &cie : file->cies)
for (const ElfRel<E> &rel : cie.get_rels())
enqueue_symbol(file->symbols[rel.r_sym]);
});
}
// Mark all reachable sections
template <typename E>
static void mark(Context<E> &ctx,
tbb::concurrent_vector<InputSection<E> *> &rootset) {
Timer t(ctx, "mark");
tbb::parallel_for_each(rootset, [&](InputSection<E> *isec,
tbb::feeder<InputSection<E> *> &feeder) {
visit(ctx, isec, feeder, 0);
});
}
// Remove unreachable sections
template <typename E>
static void sweep(Context<E> &ctx) {
Timer t(ctx, "sweep");
static Counter counter("garbage_sections");
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
for (std::unique_ptr<InputSection<E>> &isec : file->sections) {
if (isec && isec->is_alive && !isec->is_visited) {
if (ctx.arg.print_gc_sections)
SyncOut(ctx) << "removing unused section " << *isec;
isec->kill();
counter++;
}
}
});
}
template <typename E>
void gc_sections(Context<E> &ctx) {
Timer t(ctx, "gc");
tbb::concurrent_vector<InputSection<E> *> rootset;
collect_root_set(ctx, rootset);
mark(ctx, rootset);
sweep(ctx);
}
using E = MOLD_TARGET;
template void gc_sections(Context<E> &ctx);
} // namespace mold::elf

615
third_party/mold/elf/icf.cc vendored Normal file
View file

@ -0,0 +1,615 @@
// clang-format off
// This file implements the Identical Code Folding feature which can
// reduce the output file size of a typical program by a few percent.
// ICF identifies read-only input sections that happen to be identical
// and thus can be used interchangeably. ICF leaves one of them and discards
// the others.
//
// ICF is usually used in combination with -ffunction-sections and
// -fdata-sections compiler options, so that object files have one section
// for each function or variable instead of having one large .text or .data.
// The unit of ICF merging is section.
//
// Two sections are considered identical by ICF if they have the exact
// same contents, metadata such as section flags, exception handling
// records, and relocations. The last one is interesting because two
// relocations are considered identical if they point to the _same_
// section in terms of ICF.
//
// To see what that means, consider two sections, A and B, which are
// identical except for one pair of relocations. Say, A has a relocation to
// section C, and B has a relocation to D. In this case, A and B are
// considered identical if C and D are considered identical. C and D can be
// either really the same section or two different sections that are
// considered identical by ICF. Below is an example of such inputs, A, B, C
// and D:
//
// void A() { C(); }
// void B() { D(); }
// void C() { A(); }
// void D() { B(); }
//
// If we assume A and B are mergeable, we can merge C and D, which makes A
// and B mergeable. There's no contradiction in our assumption, so we can
// conclude that A and B as well as C and D are mergeable.
//
// This problem boils down to one in graph theory. Input to ICF can be
// considered as a directed graph in which vertices are sections and edges
// are relocations. Vertices have labels (section contents, etc.), and so
// are edges (relocation offsets, etc.). Two vertices are considered
// identical if and only if the (possibly infinite) their unfoldings into
// regular trees are equal. Given this formulation, we want to find as
// many identical vertices as possible.
//
// Just like a lot of problems with graph, this problem doesn't have a
// straightforward "optimal" solution, and we need to resort to heuristics.
//
// mold approaches this problem by hashing program trees with increasing depth
// on each iteration.
// For example, when we start, we only hash individual functions with
// their call into other functions omitted. From the second iteration, we
// put the function they call into the hash by appending the hash of those
// functions from the previous iteration. This means that the nth iteration
// hashes call chain up to (n-1) levels deep.
// We use a cryptographic hash function, so the unique number of hashes will
// only monotonically increase as we take into account of deeper trees with
// iterations (otherwise, that means we have found a hash collision). We stop
// when the unique number of hashes stop increasing; this is based on the fact
// that once we observe an iteration with the same amount of unique hashes as
// the previous iteration, it will remain unchanged for further iterations.
// This is provable, but here we omit the proof for brevity.
//
// When compared to other approaches, mold's approach has a relatively cheaper
// cost per iteration, and as a bonus, is highly parallelizable.
// For Chromium, mold's ICF finishes in less than 1 second with 20 threads,
// whereas lld takes 5 seconds and gold takes 50 seconds under the same
// conditions.
#include "third_party/mold/elf/mold.h"
// MISSING #include "../common/sha.h"
#include "third_party/libcxx/array"
#include "third_party/libcxx/cstdio"
// MISSING #include <tbb/concurrent_unordered_map.h>
// MISSING #include <tbb/concurrent_vector.h>
// MISSING #include <tbb/enumerable_thread_specific.h>
// MISSING #include <tbb/parallel_for.h>
// MISSING #include <tbb/parallel_for_each.h>
// MISSING #include <tbb/parallel_sort.h>
static constexpr int64_t HASH_SIZE = 16;
typedef std::array<uint8_t, HASH_SIZE> Digest;
namespace std {
template<> struct hash<Digest> {
size_t operator()(const Digest &k) const {
return *(int64_t *)&k[0];
}
};
}
namespace mold::elf {
template <typename E>
static void uniquify_cies(Context<E> &ctx) {
Timer t(ctx, "uniquify_cies");
std::vector<CieRecord<E> *> cies;
for (ObjectFile<E> *file : ctx.objs) {
for (CieRecord<E> &cie : file->cies) {
for (i64 i = 0; i < cies.size(); i++) {
if (cie.equals(*cies[i])) {
cie.icf_idx = i;
goto found;
}
}
cie.icf_idx = cies.size();
cies.push_back(&cie);
found:;
}
}
}
template <typename E>
static bool is_eligible(Context<E> &ctx, InputSection<E> &isec) {
const ElfShdr<E> &shdr = isec.shdr();
std::string_view name = isec.name();
bool is_alloc = (shdr.sh_flags & SHF_ALLOC);
bool is_exec = (shdr.sh_flags & SHF_EXECINSTR) ||
ctx.arg.ignore_data_address_equality;
bool is_relro = (name == ".data.rel.ro" ||
name.starts_with(".data.rel.ro."));
bool is_readonly = !(shdr.sh_flags & SHF_WRITE) || is_relro;
bool is_bss = (shdr.sh_type == SHT_NOBITS);
bool is_empty = (shdr.sh_size == 0);
bool is_init = (shdr.sh_type == SHT_INIT_ARRAY || name == ".init");
bool is_fini = (shdr.sh_type == SHT_FINI_ARRAY || name == ".fini");
bool is_enumerable = is_c_identifier(name);
bool is_addr_taken = !ctx.arg.icf_all && isec.address_significant;
return is_alloc && is_exec && is_readonly && !is_bss && !is_empty &&
!is_init && !is_fini && !is_enumerable && !is_addr_taken;
}
static Digest digest_final(SHA256Hash &sha) {
u8 buf[SHA256_SIZE];
sha.finish(buf);
Digest digest;
memcpy(digest.data(), buf, HASH_SIZE);
return digest;
}
template <typename E>
static bool is_leaf(Context<E> &ctx, InputSection<E> &isec) {
if (!isec.get_rels(ctx).empty())
return false;
for (FdeRecord<E> &fde : isec.get_fdes())
if (fde.get_rels(isec.file).size() > 1)
return false;
return true;
}
template <typename E>
struct LeafHasher {
size_t operator()(InputSection<E> *isec) const {
u64 h = hash_string(isec->contents);
for (FdeRecord<E> &fde : isec->get_fdes()) {
u64 h2 = hash_string(fde.get_contents(isec->file).substr(8));
h = combine_hash(h, h2);
}
return h;
}
};
template <typename E>
struct LeafEq {
bool operator()(InputSection<E> *a, InputSection<E> *b) const {
if (a->contents != b->contents)
return false;
std::span<FdeRecord<E>> x = a->get_fdes();
std::span<FdeRecord<E>> y = b->get_fdes();
if (x.size() != y.size())
return false;
for (i64 i = 0; i < x.size(); i++)
if (x[i].get_contents(a->file).substr(8) !=
y[i].get_contents(b->file).substr(8))
return false;
return true;
}
};
// Early merge of leaf nodes, which can be processed without constructing the
// entire graph. This reduces the vertex count and improves memory efficiency.
template <typename E>
static void merge_leaf_nodes(Context<E> &ctx) {
Timer t(ctx, "merge_leaf_nodes");
static Counter eligible("icf_eligibles");
static Counter non_eligible("icf_non_eligibles");
static Counter leaf("icf_leaf_nodes");
tbb::concurrent_unordered_map<InputSection<E> *, InputSection<E> *,
LeafHasher<E>, LeafEq<E>> map;
tbb::parallel_for((i64)0, (i64)ctx.objs.size(), [&](i64 i) {
for (std::unique_ptr<InputSection<E>> &isec : ctx.objs[i]->sections) {
if (!isec || !isec->is_alive)
continue;
if (!is_eligible(ctx, *isec)) {
non_eligible++;
continue;
}
if (is_leaf(ctx, *isec)) {
leaf++;
isec->icf_leaf = true;
auto [it, inserted] = map.insert({isec.get(), isec.get()});
if (!inserted && isec->get_priority() < it->second->get_priority())
it->second = isec.get();
} else {
eligible++;
isec->icf_eligible = true;
}
}
});
tbb::parallel_for((i64)0, (i64)ctx.objs.size(), [&](i64 i) {
for (std::unique_ptr<InputSection<E>> &isec : ctx.objs[i]->sections) {
if (isec && isec->is_alive && isec->icf_leaf) {
auto it = map.find(isec.get());
assert(it != map.end());
isec->leader = it->second;
}
}
});
}
template <typename E>
static Digest compute_digest(Context<E> &ctx, InputSection<E> &isec) {
SHA256Hash sha;
auto hash = [&](auto val) {
sha.update((u8 *)&val, sizeof(val));
};
auto hash_string = [&](std::string_view str) {
hash(str.size());
sha.update((u8 *)str.data(), str.size());
};
auto hash_symbol = [&](Symbol<E> &sym) {
InputSection<E> *isec = sym.get_input_section();
if (!sym.file) {
hash('1');
hash((u64)&sym);
} else if (SectionFragment<E> *frag = sym.get_frag()) {
hash('2');
hash((u64)frag);
} else if (!isec) {
hash('3');
} else if (isec->leader) {
hash('4');
hash((u64)isec->leader);
} else if (isec->icf_eligible) {
hash('5');
} else {
hash('6');
hash((u64)isec);
}
hash(sym.value);
};
hash_string(isec.contents);
hash(isec.shdr().sh_flags);
hash(isec.get_fdes().size());
hash(isec.get_rels(ctx).size());
for (FdeRecord<E> &fde : isec.get_fdes()) {
hash(isec.file.cies[fde.cie_idx].icf_idx);
// Bytes 0 to 4 contain the length of this record, and
// bytes 4 to 8 contain an offset to CIE.
hash_string(fde.get_contents(isec.file).substr(8));
hash(fde.get_rels(isec.file).size());
for (const ElfRel<E> &rel : fde.get_rels(isec.file).subspan(1)) {
hash_symbol(*isec.file.symbols[rel.r_sym]);
hash(rel.r_type);
hash(rel.r_offset - fde.input_offset);
hash(get_addend(isec.file.cies[fde.cie_idx].input_section, rel));
}
}
for (i64 i = 0; i < isec.get_rels(ctx).size(); i++) {
const ElfRel<E> &rel = isec.get_rels(ctx)[i];
hash(rel.r_offset);
hash(rel.r_type);
hash(get_addend(isec, rel));
hash_symbol(*isec.file.symbols[rel.r_sym]);
}
return digest_final(sha);
}
template <typename E>
static std::vector<InputSection<E> *> gather_sections(Context<E> &ctx) {
Timer t(ctx, "gather_sections");
// Count the number of input sections for each input file.
std::vector<i64> num_sections(ctx.objs.size());
tbb::parallel_for((i64)0, (i64)ctx.objs.size(), [&](i64 i) {
for (std::unique_ptr<InputSection<E>> &isec : ctx.objs[i]->sections)
if (isec && isec->is_alive && isec->icf_eligible)
num_sections[i]++;
});
std::vector<i64> section_indices(ctx.objs.size());
for (i64 i = 0; i < ctx.objs.size() - 1; i++)
section_indices[i + 1] = section_indices[i] + num_sections[i];
std::vector<InputSection<E> *> sections(
section_indices.back() + num_sections.back());
// Fill `sections` contents.
tbb::parallel_for((i64)0, (i64)ctx.objs.size(), [&](i64 i) {
i64 idx = section_indices[i];
for (std::unique_ptr<InputSection<E>> &isec : ctx.objs[i]->sections)
if (isec && isec->is_alive && isec->icf_eligible)
sections[idx++] = isec.get();
});
tbb::parallel_for((i64)0, (i64)sections.size(), [&](i64 i) {
sections[i]->icf_idx = i;
});
return sections;
}
template <typename E>
static std::vector<Digest>
compute_digests(Context<E> &ctx, std::span<InputSection<E> *> sections) {
Timer t(ctx, "compute_digests");
std::vector<Digest> digests(sections.size());
tbb::parallel_for((i64)0, (i64)sections.size(), [&](i64 i) {
digests[i] = compute_digest(ctx, *sections[i]);
});
return digests;
}
// Build a graph, treating every function as a vertex and every function call
// as an edge. See the description at the top for a more detailed formulation.
// We use u32 indices here to improve cache locality.
template <typename E>
static void gather_edges(Context<E> &ctx,
std::span<InputSection<E> *> sections,
std::vector<u32> &edges,
std::vector<u32> &edge_indices) {
Timer t(ctx, "gather_edges");
if (sections.empty())
return;
std::vector<i64> num_edges(sections.size());
edge_indices.resize(sections.size());
tbb::parallel_for((i64)0, (i64)sections.size(), [&](i64 i) {
InputSection<E> &isec = *sections[i];
assert(isec.icf_eligible);
for (i64 j = 0; j < isec.get_rels(ctx).size(); j++) {
const ElfRel<E> &rel = isec.get_rels(ctx)[j];
Symbol<E> &sym = *isec.file.symbols[rel.r_sym];
if (!sym.get_frag())
if (InputSection<E> *isec = sym.get_input_section())
if (isec->icf_eligible)
num_edges[i]++;
}
});
for (i64 i = 0; i < num_edges.size() - 1; i++)
edge_indices[i + 1] = edge_indices[i] + num_edges[i];
edges.resize(edge_indices.back() + num_edges.back());
tbb::parallel_for((i64)0, (i64)num_edges.size(), [&](i64 i) {
InputSection<E> &isec = *sections[i];
i64 idx = edge_indices[i];
for (ElfRel<E> &rel : isec.get_rels(ctx)) {
Symbol<E> &sym = *isec.file.symbols[rel.r_sym];
if (InputSection<E> *isec = sym.get_input_section())
if (isec->icf_eligible)
edges[idx++] = isec->icf_idx;
}
});
}
template <typename E>
static i64 propagate(std::span<std::vector<Digest>> digests,
std::span<u32> edges, std::span<u32> edge_indices,
bool &slot, BitVector &converged,
tbb::affinity_partitioner &ap) {
static Counter round("icf_round");
round++;
i64 num_digests = digests[0].size();
tbb::enumerable_thread_specific<i64> changed;
tbb::parallel_for((i64)0, num_digests, [&](i64 i) {
if (converged.get(i))
return;
SHA256Hash sha;
sha.update(digests[2][i].data(), HASH_SIZE);
i64 begin = edge_indices[i];
i64 end = (i + 1 == num_digests) ? edges.size() : edge_indices[i + 1];
for (i64 j : edges.subspan(begin, end - begin))
sha.update(digests[slot][j].data(), HASH_SIZE);
digests[!slot][i] = digest_final(sha);
if (digests[slot][i] == digests[!slot][i]) {
// This node has converged. Skip further iterations as it will
// yield the same hash.
converged.set(i);
} else {
changed.local()++;
}
}, ap);
slot = !slot;
return changed.combine(std::plus());
}
template <typename E>
static i64 count_num_classes(std::span<Digest> digests,
tbb::affinity_partitioner &ap) {
std::vector<Digest> vec(digests.begin(), digests.end());
tbb::parallel_sort(vec);
tbb::enumerable_thread_specific<i64> num_classes;
tbb::parallel_for((i64)0, (i64)vec.size() - 1, [&](i64 i) {
if (vec[i] != vec[i + 1])
num_classes.local()++;
}, ap);
return num_classes.combine(std::plus());
}
template <typename E>
static void print_icf_sections(Context<E> &ctx) {
tbb::concurrent_vector<InputSection<E> *> leaders;
tbb::concurrent_unordered_multimap<InputSection<E> *, InputSection<E> *> map;
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
for (std::unique_ptr<InputSection<E>> &isec : file->sections) {
if (isec && isec->is_alive && isec->leader) {
if (isec.get() == isec->leader)
leaders.push_back(isec.get());
else
map.insert({isec->leader, isec.get()});
}
}
});
tbb::parallel_sort(leaders.begin(), leaders.end(),
[&](InputSection<E> *a, InputSection<E> *b) {
return a->get_priority() < b->get_priority();
});
i64 saved_bytes = 0;
for (InputSection<E> *leader : leaders) {
auto [begin, end] = map.equal_range(leader);
if (begin == end)
continue;
SyncOut(ctx) << "selected section " << *leader;
i64 n = 0;
for (auto it = begin; it != end; it++) {
SyncOut(ctx) << " removing identical section " << *it->second;
n++;
}
saved_bytes += leader->contents.size() * n;
}
SyncOut(ctx) << "ICF saved " << saved_bytes << " bytes";
}
template <typename E>
void icf_sections(Context<E> &ctx) {
Timer t(ctx, "icf");
if (ctx.objs.empty())
return;
uniquify_cies(ctx);
merge_leaf_nodes(ctx);
// Prepare for the propagation rounds.
std::vector<InputSection<E> *> sections = gather_sections(ctx);
// We allocate 3 arrays to store hashes for each vertex.
//
// Index 0 and 1 are used for tree hashes from the previous
// iteration and the current iteration. They switch roles every
// iteration. See `slot` below.
//
// Index 2 stores the initial, single-vertex hash. This is combined
// with hashes from the connected vertices to form the tree hash
// described above.
std::vector<std::vector<Digest>> digests(3);
digests[0] = compute_digests<E>(ctx, sections);
digests[1].resize(digests[0].size());
digests[2] = digests[0];
std::vector<u32> edges;
std::vector<u32> edge_indices;
gather_edges<E>(ctx, sections, edges, edge_indices);
BitVector converged(digests[0].size());
bool slot = 0;
// Execute the propagation rounds until convergence is obtained.
{
Timer t(ctx, "propagate");
tbb::affinity_partitioner ap;
// A cheap test that the graph hasn't converged yet.
// The loop after this one uses a strict condition, but it's expensive
// as it requires sorting the entire hash collection.
//
// For nodes that have a cycle in downstream (i.e. recursive
// functions and functions that calls recursive functions) will always
// change with the iterations. Nodes that doesn't (i.e. non-recursive
// functions) will stop changing as soon as the propagation depth reaches
// the call tree depth.
// Here, we test whether we have reached sufficient depth for the latter,
// which is a necessary (but not sufficient) condition for convergence.
i64 num_changed = -1;
for (;;) {
i64 n = propagate<E>(digests, edges, edge_indices, slot, converged, ap);
if (n == num_changed)
break;
num_changed = n;
}
// Run the pass until the unique number of hashes stop increasing, at which
// point we have achieved convergence (proof omitted for brevity).
i64 num_classes = -1;
for (;;) {
// count_num_classes requires sorting which is O(n log n), so do a little
// more work beforehand to amortize that log factor.
for (i64 i = 0; i < 10; i++)
propagate<E>(digests, edges, edge_indices, slot, converged, ap);
i64 n = count_num_classes<E>(digests[slot], ap);
if (n == num_classes)
break;
num_classes = n;
}
}
// Group sections by SHA digest.
{
Timer t(ctx, "group");
auto *map = new tbb::concurrent_unordered_map<Digest, InputSection<E> *>;
std::span<Digest> digest = digests[slot];
tbb::parallel_for((i64)0, (i64)sections.size(), [&](i64 i) {
InputSection<E> *isec = sections[i];
auto [it, inserted] = map->insert({digest[i], isec});
if (!inserted && isec->get_priority() < it->second->get_priority())
it->second = isec;
});
tbb::parallel_for((i64)0, (i64)sections.size(), [&](i64 i) {
auto it = map->find(digest[i]);
assert(it != map->end());
sections[i]->leader = it->second;
});
// Since free'ing the map is slow, postpone it.
ctx.on_exit.push_back([=] { delete map; });
}
if (ctx.arg.print_icf_sections)
print_icf_sections(ctx);
// Eliminate duplicate sections.
// Symbols pointing to eliminated sections will be redirected on the fly when
// exporting to the symtab.
{
Timer t(ctx, "sweep");
static Counter eliminated("icf_eliminated");
tbb::parallel_for_each(ctx.objs, [](ObjectFile<E> *file) {
for (std::unique_ptr<InputSection<E>> &isec : file->sections) {
if (isec && isec->is_alive && isec->is_killed_by_icf()) {
isec->kill();
eliminated++;
}
}
});
}
}
using E = MOLD_TARGET;
template void icf_sections(Context<E> &ctx);
} // namespace mold::elf

1497
third_party/mold/elf/input-files.cc vendored Normal file

File diff suppressed because it is too large Load diff

498
third_party/mold/elf/input-sections.cc vendored Normal file
View file

@ -0,0 +1,498 @@
// clang-format off
#include "third_party/mold/elf/mold.h"
#include "third_party/libcxx/limits"
// MISSING #include <zlib.h>
// MISSING #include <zstd.h>
namespace mold::elf {
typedef enum {
NONE, ERROR, COPYREL, DYN_COPYREL, PLT, CPLT, DYN_CPLT, DYNREL, BASEREL, IFUNC,
} Action;
template <typename E>
bool CieRecord<E>::equals(const CieRecord<E> &other) const {
if (get_contents() != other.get_contents())
return false;
std::span<const ElfRel<E>> x = get_rels();
std::span<const ElfRel<E>> y = other.get_rels();
if (x.size() != y.size())
return false;
for (i64 i = 0; i < x.size(); i++) {
if (x[i].r_offset - input_offset != y[i].r_offset - other.input_offset ||
x[i].r_type != y[i].r_type ||
file.symbols[x[i].r_sym] != other.file.symbols[y[i].r_sym] ||
get_addend(input_section, x[i]) != get_addend(other.input_section, y[i]))
return false;
}
return true;
}
static i64 to_p2align(u64 alignment) {
if (alignment == 0)
return 0;
return std::countr_zero(alignment);
}
template <typename E>
InputSection<E>::InputSection(Context<E> &ctx, ObjectFile<E> &file,
std::string_view name, i64 shndx)
: file(file), shndx(shndx) {
if (shndx < file.elf_sections.size())
contents = {(char *)file.mf->data + shdr().sh_offset, (size_t)shdr().sh_size};
if (shdr().sh_flags & SHF_COMPRESSED) {
ElfChdr<E> &chdr = *(ElfChdr<E> *)&contents[0];
sh_size = chdr.ch_size;
p2align = to_p2align(chdr.ch_addralign);
} else {
sh_size = shdr().sh_size;
p2align = to_p2align(shdr().sh_addralign);
}
// Sections may have been compressed. We usually uncompress them
// directly into the mmap'ed output file, but we want to uncompress
// early for REL-type ELF types to read relocation addends from
// section contents. For RELA-type, we don't need to do this because
// addends are in relocations.
//
// SH-4 stores addends to sections despite being RELA, which is a
// special (and buggy) case.
if constexpr (!E::is_rela || is_sh4<E>)
uncompress(ctx);
}
template <typename E>
void InputSection<E>::uncompress(Context<E> &ctx) {
if (!(shdr().sh_flags & SHF_COMPRESSED) || uncompressed)
return;
u8 *buf = new u8[sh_size];
uncompress_to(ctx, buf);
contents = std::string_view((char *)buf, sh_size);
ctx.string_pool.emplace_back(buf);
uncompressed = true;
}
template <typename E>
void InputSection<E>::uncompress_to(Context<E> &ctx, u8 *buf) {
if (!(shdr().sh_flags & SHF_COMPRESSED) || uncompressed) {
memcpy(buf, contents.data(), contents.size());
return;
}
if (contents.size() < sizeof(ElfChdr<E>))
Fatal(ctx) << *this << ": corrupted compressed section";
ElfChdr<E> &hdr = *(ElfChdr<E> *)&contents[0];
std::string_view data = contents.substr(sizeof(ElfChdr<E>));
switch (hdr.ch_type) {
case ELFCOMPRESS_ZLIB: {
unsigned long size = sh_size;
if (::uncompress(buf, &size, (u8 *)data.data(), data.size()) != Z_OK)
Fatal(ctx) << *this << ": uncompress failed";
assert(size == sh_size);
break;
}
case ELFCOMPRESS_ZSTD:
if (ZSTD_decompress(buf, sh_size, (u8 *)data.data(), data.size()) != sh_size)
Fatal(ctx) << *this << ": ZSTD_decompress failed";
break;
default:
Fatal(ctx) << *this << ": unsupported compression type: 0x"
<< std::hex << hdr.ch_type;
}
}
template <typename E>
static Action get_rel_action(Context<E> &ctx, Symbol<E> &sym,
const Action table[3][4]) {
auto get_output_type = [&] {
if (ctx.arg.shared)
return 0;
if (ctx.arg.pie)
return 1;
return 2;
};
auto get_sym_type = [&] {
if (sym.is_absolute())
return 0;
if (!sym.is_imported)
return 1;
if (sym.get_type() != STT_FUNC)
return 2;
return 3;
};
return table[get_output_type()][get_sym_type()];
}
template <typename E>
static void scan_rel(Context<E> &ctx, InputSection<E> &isec, Symbol<E> &sym,
const ElfRel<E> &rel, Action action) {
bool writable = (isec.shdr().sh_flags & SHF_WRITE);
auto error = [&] {
std::string msg = sym.is_absolute() ? "-fno-PIC" : "-fPIC";
Error(ctx) << isec << ": " << rel << " relocation at offset 0x"
<< std::hex << rel.r_offset << " against symbol `"
<< sym << "' can not be used; recompile with " << msg;
};
auto check_textrel = [&] {
if (!writable) {
if (ctx.arg.z_text) {
error();
} else if (ctx.arg.warn_textrel) {
Warn(ctx) << isec << ": relocation against symbol `" << sym
<< "' in read-only section";
}
ctx.has_textrel = true;
}
};
auto copyrel = [&] {
assert(sym.is_imported);
if (sym.esym().st_visibility == STV_PROTECTED) {
Error(ctx) << isec
<< ": cannot make copy relocation for protected symbol '" << sym
<< "', defined in " << *sym.file << "; recompile with -fPIC";
}
sym.flags |= NEEDS_COPYREL;
};
auto dynrel = [&] {
check_textrel();
isec.file.num_dynrel++;
};
switch (action) {
case NONE:
break;
case ERROR:
error();
break;
case COPYREL:
if (!ctx.arg.z_copyreloc)
error();
copyrel();
break;
case DYN_COPYREL:
if (writable || !ctx.arg.z_copyreloc)
dynrel();
else
copyrel();
break;
case PLT:
sym.flags |= NEEDS_PLT;
break;
case CPLT:
sym.flags |= NEEDS_CPLT;
break;
case DYN_CPLT:
if (writable)
dynrel();
else
sym.flags |= NEEDS_CPLT;
break;
case DYNREL:
dynrel();
break;
case BASEREL:
check_textrel();
if (!isec.is_relr_reloc(ctx, rel))
isec.file.num_dynrel++;
break;
case IFUNC:
dynrel();
ctx.num_ifunc_dynrels++;
break;
default:
unreachable();
}
}
template <typename E>
static Action get_pcrel_action(Context<E> &ctx, Symbol<E> &sym) {
// This is for PC-relative relocations (e.g. R_X86_64_PC32).
// We cannot promote them to dynamic relocations because the dynamic
// linker generally does not support PC-relative relocations.
constexpr static Action table[3][4] = {
// Absolute Local Imported data Imported code
{ ERROR, NONE, ERROR, PLT }, // Shared object
{ ERROR, NONE, COPYREL, PLT }, // Position-independent exec
{ NONE, NONE, COPYREL, CPLT }, // Position-dependent exec
};
return get_rel_action(ctx, sym, table);
}
template <typename E>
static Action get_absrel_action(Context<E> &ctx, Symbol<E> &sym) {
// This is a decision table for absolute relocations that is smaller
// than the word size (e.g. R_X86_64_32). Since the dynamic linker
// generally does not support dynamic relocations smaller than the
// word size, we need to report an error if a relocation cannot be
// resolved at link-time.
constexpr static Action table[3][4] = {
// Absolute Local Imported data Imported code
{ NONE, ERROR, ERROR, ERROR }, // Shared object
{ NONE, ERROR, ERROR, ERROR }, // Position-independent exec
{ NONE, NONE, COPYREL, CPLT }, // Position-dependent exec
};
return get_rel_action(ctx, sym, table);
}
template <typename E>
static Action get_dyn_absrel_action(Context<E> &ctx, Symbol<E> &sym) {
if (sym.is_ifunc())
return IFUNC;
// This is a decision table for absolute relocations for the word
// size data (e.g. R_X86_64_64). Unlike the absrel_table, we can emit
// a dynamic relocation if we cannot resolve an address at link-time.
constexpr static Action table[3][4] = {
// Absolute Local Imported data Imported code
{ NONE, BASEREL, DYNREL, DYNREL }, // Shared object
{ NONE, BASEREL, DYNREL, DYNREL }, // Position-independent exec
{ NONE, NONE, DYN_COPYREL, DYN_CPLT }, // Position-dependent exec
};
return get_rel_action(ctx, sym, table);
}
template <typename E>
static Action get_ppc64_toc_action(Context<E> &ctx, Symbol<E> &sym) {
if (sym.is_ifunc())
return IFUNC;
// As a special case, we do not create copy relocations nor canonical
// PLTs for .toc sections. PPC64's .toc is a compiler-generated
// GOT-like section, and no user-generated code directly uses values
// in it.
constexpr static Action table[3][4] = {
// Absolute Local Imported data Imported code
{ NONE, BASEREL, DYNREL, DYNREL }, // Shared object
{ NONE, BASEREL, DYNREL, DYNREL }, // Position-independent exec
{ NONE, NONE, DYNREL, DYNREL }, // Position-dependent exec
};
return get_rel_action(ctx, sym, table);
}
template <typename E>
void InputSection<E>::scan_pcrel(Context<E> &ctx, Symbol<E> &sym,
const ElfRel<E> &rel) {
scan_rel(ctx, *this, sym, rel, get_pcrel_action(ctx, sym));
}
template <typename E>
void InputSection<E>::scan_absrel(Context<E> &ctx, Symbol<E> &sym,
const ElfRel<E> &rel) {
scan_rel(ctx, *this, sym, rel, get_absrel_action(ctx, sym));
}
template <typename E>
void InputSection<E>::scan_dyn_absrel(Context<E> &ctx, Symbol<E> &sym,
const ElfRel<E> &rel) {
scan_rel(ctx, *this, sym, rel, get_dyn_absrel_action(ctx, sym));
}
template <typename E>
void InputSection<E>::scan_toc_rel(Context<E> &ctx, Symbol<E> &sym,
const ElfRel<E> &rel) {
scan_rel(ctx, *this, sym, rel, get_ppc64_toc_action(ctx, sym));
}
template <typename E>
void InputSection<E>::check_tlsle(Context<E> &ctx, Symbol<E> &sym,
const ElfRel<E> &rel) {
if (ctx.arg.shared)
Error(ctx) << *this << ": relocation " << rel << " against `" << sym
<< "` can not be used when making a shared object;"
<< " recompile with -fPIC";
}
template <typename E>
static void apply_absrel(Context<E> &ctx, InputSection<E> &isec,
Symbol<E> &sym, const ElfRel<E> &rel, u8 *loc,
u64 S, i64 A, u64 P, ElfRel<E> *&dynrel,
Action action) {
bool writable = (isec.shdr().sh_flags & SHF_WRITE);
auto apply_dynrel = [&] {
*dynrel++ = ElfRel<E>(P, E::R_ABS, sym.get_dynsym_idx(ctx), A);
if (ctx.arg.apply_dynamic_relocs)
*(Word<E> *)loc = A;
};
switch (action) {
case COPYREL:
case CPLT:
case NONE:
*(Word<E> *)loc = S + A;
break;
case BASEREL:
if (isec.is_relr_reloc(ctx, rel)) {
*(Word<E> *)loc = S + A;
} else {
*dynrel++ = ElfRel<E>(P, E::R_RELATIVE, 0, S + A);
if (ctx.arg.apply_dynamic_relocs)
*(Word<E> *)loc = S + A;
}
break;
case DYN_COPYREL:
if (writable || !ctx.arg.z_copyreloc)
apply_dynrel();
else
*(Word<E> *)loc = S + A;
break;
case DYN_CPLT:
if (writable)
apply_dynrel();
else
*(Word<E> *)loc = S + A;
break;
case DYNREL:
apply_dynrel();
break;
case IFUNC:
if constexpr (supports_ifunc<E>) {
u64 addr = sym.get_addr(ctx, NO_PLT) + A;
*dynrel++ = ElfRel<E>(P, E::R_IRELATIVE, 0, addr);
if (ctx.arg.apply_dynamic_relocs)
*(Word<E> *)loc = addr;
} else {
unreachable();
}
break;
default:
unreachable();
}
}
template <typename E>
void InputSection<E>::apply_dyn_absrel(Context<E> &ctx, Symbol<E> &sym,
const ElfRel<E> &rel, u8 *loc,
u64 S, i64 A, u64 P,
ElfRel<E> *&dynrel) {
apply_absrel(ctx, *this, sym, rel, loc, S, A, P, dynrel,
get_dyn_absrel_action(ctx, sym));
}
template <typename E>
void InputSection<E>::apply_toc_rel(Context<E> &ctx, Symbol<E> &sym,
const ElfRel<E> &rel, u8 *loc,
u64 S, i64 A, u64 P,
ElfRel<E> *&dynrel) {
apply_absrel(ctx, *this, sym, rel, loc, S, A, P, dynrel,
get_ppc64_toc_action(ctx, sym));
}
template <typename E>
void InputSection<E>::write_to(Context<E> &ctx, u8 *buf) {
if (shdr().sh_type == SHT_NOBITS || sh_size == 0)
return;
// Copy data
if constexpr (is_riscv<E>)
copy_contents_riscv(ctx, buf);
else
uncompress_to(ctx, buf);
// Apply relocations
if (!ctx.arg.relocatable) {
if (shdr().sh_flags & SHF_ALLOC)
apply_reloc_alloc(ctx, buf);
else
apply_reloc_nonalloc(ctx, buf);
}
}
// Get the name of a function containin a given offset.
template <typename E>
std::string_view InputSection<E>::get_func_name(Context<E> &ctx, i64 offset) const {
for (const ElfSym<E> &esym : file.elf_syms) {
if (esym.st_shndx == shndx && esym.st_type == STT_FUNC &&
esym.st_value <= offset && offset < esym.st_value + esym.st_size) {
std::string_view name = file.symbol_strtab.data() + esym.st_name;
if (ctx.arg.demangle)
return demangle(name);
return name;
}
}
return "";
}
// Test if the symbol a given relocation refers to has already been resolved.
// If not, record that error and returns true.
template <typename E>
bool InputSection<E>::record_undef_error(Context<E> &ctx, const ElfRel<E> &rel) {
// If a relocation refers to a linker-synthesized symbol for a
// section fragment, it's always been resolved.
if (file.elf_syms.size() <= rel.r_sym)
return false;
Symbol<E> &sym = *file.symbols[rel.r_sym];
const ElfSym<E> &esym = file.elf_syms[rel.r_sym];
// If a symbol is defined in a comdat group, and the comdat group is
// discarded, the symbol may not have an owner. It is technically an
// violation of the One Definition Rule, so it is a programmer's fault.
if (!sym.file) {
Error(ctx) << *this << ": " << sym << " refers to a discarded COMDAT section"
<< " probably due to an ODR violation";
return true;
}
auto record = [&] {
std::stringstream ss;
if (std::string_view source = file.get_source_name(); !source.empty())
ss << ">>> referenced by " << source << "\n";
else
ss << ">>> referenced by " << *this << "\n";
ss << ">>> " << file;
if (std::string_view func = get_func_name(ctx, rel.r_offset); !func.empty())
ss << ":(" << func << ")";
typename decltype(ctx.undef_errors)::accessor acc;
ctx.undef_errors.insert(acc, {sym.name(), {}});
acc->second.push_back(ss.str());
};
// A non-weak undefined symbol must be promoted to an imported
// symbol or resolved to an defined symbol. Otherwise, it's an
// undefined symbol error.
//
// Every ELF file has an absolute local symbol as its first symbol.
// Referring to that symbol is always valid.
bool is_undef = esym.is_undef() && !esym.is_weak() && sym.sym_idx;
if (!sym.is_imported && is_undef && sym.esym().is_undef()) {
record();
return true;
}
// If a protected/hidden undefined symbol is resolved to other .so,
// it's handled as if no symbols were found.
if (sym.file->is_dso &&
(sym.visibility == STV_PROTECTED || sym.visibility == STV_HIDDEN)) {
record();
return true;
}
return false;
}
using E = MOLD_TARGET;
template struct CieRecord<E>;
template class InputSection<E>;
} // namespace mold::elf

85
third_party/mold/elf/jobs.cc vendored Normal file
View file

@ -0,0 +1,85 @@
// clang-format off
#include "third_party/mold/elf/mold.h"
#ifndef _WIN32
#include "libc/calls/calls.h"
#include "libc/calls/struct/flock.h"
#include "libc/calls/weirdtypes.h"
#include "libc/sysv/consts/at.h"
#include "libc/sysv/consts/f.h"
#include "libc/sysv/consts/fd.h"
#include "libc/sysv/consts/o.h"
#include "libc/sysv/consts/posix.h"
#include "libc/sysv/consts/s.h"
#include "libc/sysv/consts/splice.h"
#include "third_party/musl/passwd.h"
#include "libc/calls/calls.h"
#include "libc/calls/struct/stat.h"
#include "libc/calls/struct/stat.macros.h"
#include "libc/calls/struct/timespec.h"
#include "libc/calls/weirdtypes.h"
#include "libc/sysv/consts/s.h"
#include "libc/sysv/consts/utime.h"
#include "libc/time/time.h"
#include "libc/calls/makedev.h"
#include "libc/calls/weirdtypes.h"
#include "libc/thread/thread.h"
#include "libc/calls/typedef/u.h"
#include "libc/calls/weirdtypes.h"
#include "libc/intrin/newbie.h"
#include "libc/sock/select.h"
#include "libc/sysv/consts/endian.h"
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/runtime/pathconf.h"
#include "libc/runtime/runtime.h"
#include "libc/runtime/sysconf.h"
#include "libc/sysv/consts/f.h"
#include "libc/sysv/consts/fileno.h"
#include "libc/sysv/consts/o.h"
#include "libc/sysv/consts/ok.h"
#include "libc/time/time.h"
#include "third_party/getopt/getopt.h"
#include "third_party/musl/crypt.h"
#include "third_party/musl/lockf.h"
#endif
namespace mold::elf {
template <typename E>
void acquire_global_lock(Context<E> &ctx) {
#ifndef _WIN32
char *jobs = getenv("MOLD_JOBS");
if (!jobs || std::string(jobs) != "1")
return;
char *home = getenv("HOME");
if (!home)
home = getpwuid(getuid())->pw_dir;
std::string path = std::string(home) + "/.mold-lock";
int fd = open(path.c_str(), O_WRONLY | O_CREAT | O_CLOEXEC, 0600);
if (fd == -1)
return;
if (lockf(fd, F_LOCK, 0) == -1)
return;
ctx.global_lock_fd = fd;
#endif
}
template <typename E>
void release_global_lock(Context<E> &ctx) {
#ifndef _WIN32
if (ctx.global_lock_fd)
close(*ctx.global_lock_fd);
#endif
}
using E = MOLD_TARGET;
template void acquire_global_lock(Context<E> &);
template void release_global_lock(Context<E> &);
} // namespace mold::elf

425
third_party/mold/elf/linker-script.cc vendored Normal file
View file

@ -0,0 +1,425 @@
// clang-format off
// On Linux, /usr/lib/x86_64-linux-gnu/libc.so is not actually
// a shared object file but an ASCII text file containing a linker
// script to include a "real" libc.so file. Therefore, we need to
// support a (very limited) subset of the linker script language.
#include "third_party/mold/elf/mold.h"
#include "third_party/libcxx/cctype"
#include "third_party/libcxx/iomanip"
namespace mold::elf {
template <typename E>
static thread_local MappedFile<Context<E>> *current_file;
template <typename E>
void read_version_script(Context<E> &ctx, std::span<std::string_view> &tok);
static std::string_view get_line(std::string_view input, const char *pos) {
assert(input.data() <= pos);
assert(pos < input.data() + input.size());
i64 start = input.rfind('\n', pos - input.data());
if (start == input.npos)
start = 0;
else
start++;
i64 end = input.find('\n', pos - input.data());
if (end == input.npos)
end = input.size();
return input.substr(start, end - start);
}
template <typename E>
class SyntaxError {
public:
SyntaxError(Context<E> &ctx, std::string_view errpos) : out(ctx) {
std::string_view contents = current_file<E>->get_contents();
std::string_view line = get_line(contents, errpos.data());
i64 lineno = 1;
for (i64 i = 0; contents.data() + i < line.data(); i++)
if (contents[i] == '\n')
lineno++;
i64 column = errpos.data() - line.data();
std::stringstream ss;
ss << current_file<E>->name << ":" << lineno << ": ";
i64 indent = (i64)ss.tellp() + strlen("mold: ");
ss << line << "\n" << std::setw(indent + column) << " " << "^ ";
out << ss.str();
}
template <class T> SyntaxError &operator<<(T &&val) {
out << std::forward<T>(val);
return *this;
}
[[noreturn]] ~SyntaxError() = default;
Fatal<Context<E>> out;
};
template <typename E>
static std::vector<std::string_view>
tokenize(Context<E> &ctx, std::string_view input) {
std::vector<std::string_view> vec;
while (!input.empty()) {
if (isspace(input[0])) {
input = input.substr(1);
continue;
}
if (input.starts_with("/*")) {
i64 pos = input.find("*/", 2);
if (pos == std::string_view::npos)
SyntaxError(ctx, input) << "unclosed comment";
input = input.substr(pos + 2);
continue;
}
if (input[0] == '#') {
i64 pos = input.find("\n", 1);
if (pos == std::string_view::npos)
break;
input = input.substr(pos + 1);
continue;
}
if (input[0] == '"') {
i64 pos = input.find('"', 1);
if (pos == std::string_view::npos)
SyntaxError(ctx, input) << "unclosed string literal";
vec.push_back(input.substr(0, pos + 1));
input = input.substr(pos + 1);
continue;
}
i64 pos = input.find_first_not_of(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
"0123456789_.$/\\~=+[]*?-!^:");
if (pos == 0)
pos = 1;
else if (pos == input.npos)
pos = input.size();
vec.push_back(input.substr(0, pos));
input = input.substr(pos);
}
return vec;
}
template <typename E>
static std::span<std::string_view>
skip(Context<E> &ctx, std::span<std::string_view> tok, std::string_view str) {
if (tok.empty())
Fatal(ctx) << current_file<E>->name << ": expected '" << str
<< "', but got EOF";
if (tok[0] != str)
SyntaxError(ctx, tok[0]) << "expected '" << str << "'";
return tok.subspan(1);
}
static std::string_view unquote(std::string_view s) {
if (s.size() > 0 && s[0] == '"') {
assert(s[s.size() - 1] == '"');
return s.substr(1, s.size() - 2);
}
return s;
}
template <typename E>
static std::span<std::string_view>
read_output_format(Context<E> &ctx, std::span<std::string_view> tok) {
tok = skip(ctx, tok, "(");
while (!tok.empty() && tok[0] != ")")
tok = tok.subspan(1);
if (tok.empty())
Fatal(ctx) << current_file<E>->name << ": expected ')', but got EOF";
return tok.subspan(1);
}
template <typename E>
static bool is_in_sysroot(Context<E> &ctx, std::string path) {
std::string rel = to_abs_path(path)
.lexically_relative(to_abs_path(ctx.arg.sysroot))
.string();
return rel != "." && !rel.starts_with("../");
}
template <typename E>
static MappedFile<Context<E>> *resolve_path(Context<E> &ctx, std::string_view tok) {
std::string str(unquote(tok));
// GNU ld prepends the sysroot if a pathname starts with '/' and the
// script being processed is in the sysroot. We do the same.
if (str.starts_with('/') && is_in_sysroot(ctx, current_file<E>->name))
return MappedFile<Context<E>>::must_open(ctx, ctx.arg.sysroot + str);
if (str.starts_with('=')) {
std::string path;
if (ctx.arg.sysroot.empty())
path = str.substr(1);
else
path = ctx.arg.sysroot + str.substr(1);
return MappedFile<Context<E>>::must_open(ctx, path);
}
if (str.starts_with("-l"))
return find_library(ctx, str.substr(2));
if (MappedFile<Context<E>> *mf = open_library(ctx, str))
return mf;
for (std::string_view dir : ctx.arg.library_paths) {
std::string path = std::string(dir) + "/" + str;
if (MappedFile<Context<E>> *mf = open_library(ctx, path))
return mf;
}
SyntaxError(ctx, tok) << "library not found: " << str;
}
template <typename E>
static std::span<std::string_view>
read_group(Context<E> &ctx, std::span<std::string_view> tok) {
tok = skip(ctx, tok, "(");
while (!tok.empty() && tok[0] != ")") {
if (tok[0] == "AS_NEEDED") {
bool orig = ctx.as_needed;
ctx.as_needed = true;
tok = read_group(ctx, tok.subspan(1));
ctx.as_needed = orig;
continue;
}
MappedFile<Context<E>> *mf = resolve_path(ctx, tok[0]);
read_file(ctx, mf);
tok = tok.subspan(1);
}
if (tok.empty())
Fatal(ctx) << current_file<E>->name << ": expected ')', but got EOF";
return tok.subspan(1);
}
template <typename E>
void parse_linker_script(Context<E> &ctx, MappedFile<Context<E>> *mf) {
current_file<E> = mf;
std::vector<std::string_view> vec = tokenize(ctx, mf->get_contents());
std::span<std::string_view> tok = vec;
while (!tok.empty()) {
if (tok[0] == "OUTPUT_FORMAT") {
tok = read_output_format(ctx, tok.subspan(1));
} else if (tok[0] == "INPUT" || tok[0] == "GROUP") {
tok = read_group(ctx, tok.subspan(1));
} else if (tok[0] == "VERSION") {
tok = tok.subspan(1);
tok = skip(ctx, tok, "{");
read_version_script(ctx, tok);
tok = skip(ctx, tok, "}");
} else if (tok.size() > 3 && tok[1] == "=" && tok[3] == ";") {
ctx.arg.defsyms.emplace_back(get_symbol(ctx, unquote(tok[0])),
get_symbol(ctx, unquote(tok[2])));
tok = tok.subspan(4);
} else if (tok[0] == ";") {
tok = tok.subspan(1);
} else {
SyntaxError(ctx, tok[0]) << "unknown linker script token";
}
}
}
template <typename E>
std::string_view
get_script_output_type(Context<E> &ctx, MappedFile<Context<E>> *mf) {
current_file<E> = mf;
std::vector<std::string_view> vec = tokenize(ctx, mf->get_contents());
std::span<std::string_view> tok = vec;
if (tok.size() >= 3 && tok[0] == "OUTPUT_FORMAT" && tok[1] == "(") {
if (tok[2] == "elf64-x86-64")
return X86_64::target_name;
if (tok[2] == "elf32-i386")
return I386::target_name;
}
if (tok.size() >= 3 && (tok[0] == "INPUT" || tok[0] == "GROUP") &&
tok[1] == "(")
if (MappedFile<Context<E>> *mf =
MappedFile<Context<E>>::open(ctx, std::string(unquote(tok[2]))))
return get_machine_type(ctx, mf);
return "";
}
static bool read_label(std::span<std::string_view> &tok,
std::string label) {
if (tok.size() >= 1 && tok[0] == label + ":") {
tok = tok.subspan(1);
return true;
}
if (tok.size() >= 2 && tok[0] == label && tok[1] == ":") {
tok = tok.subspan(2);
return true;
}
return false;
}
template <typename E>
static void
read_version_script_commands(Context<E> &ctx, std::span<std::string_view> &tok,
std::string_view ver_str, u16 ver_idx, bool is_cpp) {
bool is_global = true;
while (!tok.empty() && tok[0] != "}") {
if (read_label(tok, "global")) {
is_global = true;
continue;
}
if (read_label(tok, "local")) {
is_global = false;
continue;
}
if (tok[0] == "extern") {
tok = tok.subspan(1);
if (!tok.empty() && tok[0] == "\"C\"") {
tok = tok.subspan(1);
tok = skip(ctx, tok, "{");
read_version_script_commands( ctx, tok, ver_str, ver_idx, false);
} else {
tok = skip(ctx, tok, "\"C++\"");
tok = skip(ctx, tok, "{");
read_version_script_commands(ctx, tok, ver_str, ver_idx, true);
}
tok = skip(ctx, tok, "}");
tok = skip(ctx, tok, ";");
continue;
}
if (tok[0] == "*") {
ctx.default_version = (is_global ? ver_idx : (u32)VER_NDX_LOCAL);
ctx.default_version_from_version_script = true;
} else if (is_global) {
ctx.version_patterns.push_back({unquote(tok[0]), current_file<E>->name,
ver_str, ver_idx, is_cpp});
} else {
ctx.version_patterns.push_back({unquote(tok[0]), current_file<E>->name,
ver_str, VER_NDX_LOCAL, is_cpp});
}
tok = tok.subspan(1);
if (!tok.empty() && tok[0] == "}")
return;
tok = skip(ctx, tok, ";");
}
}
template <typename E>
void read_version_script(Context<E> &ctx, std::span<std::string_view> &tok) {
u16 next_ver = VER_NDX_LAST_RESERVED + ctx.arg.version_definitions.size() + 1;
while (!tok.empty() && tok[0] != "}") {
std::string_view ver_str;
u16 ver_idx;
if (tok[0] == "{") {
ver_str = "global";
ver_idx = VER_NDX_GLOBAL;
} else {
ver_str = tok[0];
ver_idx = next_ver++;
ctx.arg.version_definitions.push_back(std::string(tok[0]));
tok = tok.subspan(1);
}
tok = skip(ctx, tok, "{");
read_version_script_commands(ctx, tok, ver_str, ver_idx, false);
tok = skip(ctx, tok, "}");
if (!tok.empty() && tok[0] != ";")
tok = tok.subspan(1);
tok = skip(ctx, tok, ";");
}
}
template <typename E>
void parse_version_script(Context<E> &ctx, MappedFile<Context<E>> *mf) {
current_file<E> = mf;
std::vector<std::string_view> vec = tokenize(ctx, mf->get_contents());
std::span<std::string_view> tok = vec;
read_version_script(ctx, tok);
if (!tok.empty())
SyntaxError(ctx, tok[0]) << "trailing garbage token";
}
template <typename E>
void read_dynamic_list_commands(Context<E> &ctx, std::span<std::string_view> &tok,
bool is_cpp) {
while (!tok.empty() && tok[0] != "}") {
if (tok[0] == "extern") {
tok = tok.subspan(1);
if (!tok.empty() && tok[0] == "\"C\"") {
tok = tok.subspan(1);
tok = skip(ctx, tok, "{");
read_dynamic_list_commands(ctx, tok, false);
} else {
tok = skip(ctx, tok, "\"C++\"");
tok = skip(ctx, tok, "{");
read_dynamic_list_commands(ctx, tok, true);
}
tok = skip(ctx, tok, "}");
tok = skip(ctx, tok, ";");
continue;
}
if (tok[0] == "*")
ctx.default_version = VER_NDX_GLOBAL;
else
ctx.version_patterns.push_back({unquote(tok[0]), current_file<E>->name,
"global", VER_NDX_GLOBAL, is_cpp});
tok = skip(ctx, tok.subspan(1), ";");
}
}
template <typename E>
void parse_dynamic_list(Context<E> &ctx, MappedFile<Context<E>> *mf) {
current_file<E> = mf;
std::vector<std::string_view> vec = tokenize(ctx, mf->get_contents());
std::span<std::string_view> tok = vec;
tok = skip(ctx, tok, "{");
read_dynamic_list_commands(ctx, tok, false);
tok = skip(ctx, tok, "}");
tok = skip(ctx, tok, ";");
if (!tok.empty())
SyntaxError(ctx, tok[0]) << "trailing garbage token";
}
using E = MOLD_TARGET;
template void parse_linker_script(Context<E> &, MappedFile<Context<E>> *);
template std::string_view get_script_output_type(Context<E> &, MappedFile<Context<E>> *);
template void parse_version_script(Context<E> &, MappedFile<Context<E>> *);
template void parse_dynamic_list(Context<E> &, MappedFile<Context<E>> *);
} // namespace mold::elf

739
third_party/mold/elf/lto-unix.cc vendored Normal file
View file

@ -0,0 +1,739 @@
// clang-format off
// This file handles the linker plugin to support LTO (Link-Time
// Optimization).
//
// LTO is a technique to do whole-program optimization to a program. Since
// a linker sees the whole program as opposed to a single compilation
// unit, it in theory can do some optimizations that cannot be done in the
// usual separate compilation model. For example, LTO should be able to
// inline functions that are defined in other compilation unit.
//
// In GCC and Clang, all you have to do to enable LTO is adding the
// `-flto` flag to the compiler and the linker command lines. If `-flto`
// is given, the compiler generates a file that contains not machine code
// but the compiler's IR (intermediate representation). In GCC, the output
// is an ELF file which wraps GCC's IR. In LLVM, it's not even an ELF file
// but just a raw LLVM IR file.
//
// Here is what we have to do if at least one input file is not a usual
// ELF file but an IR object file:
//
// 1. Read symbols both from usual ELF files and from IR object files and
// resolve symbols as usual.
//
// 2. Pass all IR objects to the compiler backend. The compiler backend
// compiles the IRs and returns a few big ELF object files as a
// result.
//
// 3. Parse the returned ELF files and overwrite IR object symbols with
// the returned ones, discarding IR object files.
//
// 4. Continue the rest of the linking process as usual.
//
// When gcc or clang inovkes ld, they pass `-plugin linker-plugin.so` to
// the linker. The given .so file provides a way to call the compiler
// backend.
//
// The linker plugin API is documented at
// https://gcc.gnu.org/wiki/whopr/driver, though the document is a bit
// outdated.
//
// Frankly, the linker plugin API is peculiar and is not very easy to use.
// For some reason, the API functions don't return the result of a
// function call as a return value but instead calls other function with
// the result as its argument to "return" the result.
//
// For example, the first thing you need to do after dlopen()'ing a linker
// plugin .so is to call `onload` function with a list of callback
// functions. `onload` calls callbacks to notify about the pointers to
// other functions the linker plugin provides. I don't know why `onload`
// can't just return a list of functions or why the linker plugin can't
// define not only `onload` but other functions, but that's what it is.
//
// Here is the steps to use the linker plugin:
//
// 1. dlopen() the linker plugin .so and call `onload` to obtain pointers
// to other functions provided by the plugin.
//
// 2. Call `claim_file_hook` with an IR object file to read its symbol
// table. `claim_file_hook` calls the `add_symbols` callback to
// "return" a list of symbols.
//
// 3. `claim_file_hook` returns LDPT_OK only when the plugin wants to
// handle a given file. Since we pass only IR object files to the
// plugin in mold, it always returns LDPT_OK in our case.
//
// 4. Once we made a decision as to which object file to include into the
// output file, we call `all_symbols_read_hook` to compile IR objects
// into a few big ELF files. That function calls the `get_symbols`
// callback to ask us about the symbol resolution results. (The
// compiler backend needs to know whether an undefined symbol in an IR
// object was resolved to a regular object file or a shared object to
// do whole program optimization, for example.)
//
// 5. `all_symbols_read_hook` "returns" the result by calling the
// `add_input_file` callback. The callback is called with a path to an
// LTO'ed ELF file. We parse that ELF file and override symbols
// defined by IR objects with the ELF file's ones.
//
// 6. Lastly, we call `cleanup_hook` to remove temporary files created by
// the compiler backend.
#include "third_party/mold/elf/mold.h"
#include "third_party/mold/elf/lto.h"
#include "third_party/libcxx/cstdarg"
#include "third_party/libcxx/cstring"
#include "libc/runtime/dlfcn.h"
#include "libc/calls/calls.h"
#include "libc/calls/struct/flock.h"
#include "libc/calls/weirdtypes.h"
#include "libc/sysv/consts/at.h"
#include "libc/sysv/consts/f.h"
#include "libc/sysv/consts/fd.h"
#include "libc/sysv/consts/o.h"
#include "libc/sysv/consts/posix.h"
#include "libc/sysv/consts/s.h"
#include "libc/sysv/consts/splice.h"
#include "third_party/libcxx/sstream"
// MISSING #include <tbb/parallel_for_each.h>
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/runtime/pathconf.h"
#include "libc/runtime/runtime.h"
#include "libc/runtime/sysconf.h"
#include "libc/sysv/consts/f.h"
#include "libc/sysv/consts/fileno.h"
#include "libc/sysv/consts/o.h"
#include "libc/sysv/consts/ok.h"
#include "libc/time/time.h"
#include "third_party/getopt/getopt.h"
#include "third_party/musl/crypt.h"
#include "third_party/musl/lockf.h"
#if 0
# define LOG std::cerr
#else
# define LOG std::ostringstream()
#endif
namespace mold::elf {
// Global variables
// We store LTO-related information to global variables,
// as the LTO plugin is not thread-safe by design anyway.
template <typename E> static Context<E> *gctx;
template <typename E> static std::vector<ObjectFile<E> *> lto_objects;
static int phase = 0;
static std::vector<PluginSymbol> plugin_symbols;
static ClaimFileHandler *claim_file_hook;
static AllSymbolsReadHandler *all_symbols_read_hook;
static CleanupHandler *cleanup_hook;
static bool is_gcc_linker_api_v1 = false;
// Event handlers
template <typename E>
static PluginStatus message(PluginLevel level, const char *fmt, ...) {
LOG << "message\n";
Context<E> &ctx = *gctx<E>;
char buf[1000];
va_list ap;
va_start(ap, fmt);
vsnprintf(buf, sizeof(buf), fmt, ap);
va_end(ap);
switch (level) {
case LDPL_INFO:
SyncOut(ctx) << buf;
break;
case LDPL_WARNING:
Warn(ctx) << buf;
break;
case LDPL_ERROR:
case LDPL_FATAL:
Fatal(ctx) << buf;
}
return LDPS_OK;
}
template <typename E>
static PluginStatus register_claim_file_hook(ClaimFileHandler fn) {
LOG << "register_claim_file_hook\n";
claim_file_hook = fn;
return LDPS_OK;
}
template <typename E>
static PluginStatus
register_all_symbols_read_hook(AllSymbolsReadHandler fn) {
LOG << "register_all_symbols_read_hook\n";
all_symbols_read_hook = fn;
return LDPS_OK;
}
template <typename E>
static PluginStatus register_cleanup_hook(CleanupHandler fn) {
LOG << "register_cleanup_hook\n";
cleanup_hook = fn;
return LDPS_OK;
}
static PluginStatus
add_symbols(void *handle, int nsyms, const PluginSymbol *psyms) {
LOG << "add_symbols: " << nsyms << "\n";
assert(phase == 1);
plugin_symbols = {psyms, psyms + nsyms};
return LDPS_OK;
}
template <typename E>
static PluginStatus add_input_file(const char *path) {
LOG << "add_input_file: " << path << "\n";
Context<E> &ctx = *gctx<E>;
static i64 file_priority = 100;
MappedFile<Context<E>> *mf = MappedFile<Context<E>>::must_open(ctx, path);
ObjectFile<E> *file = ObjectFile<E>::create(ctx, mf, "", false);
ctx.obj_pool.emplace_back(file);
lto_objects<E>.push_back(file);
file->priority = file_priority++;
file->is_alive = true;
file->parse(ctx);
file->resolve_symbols(ctx);
return LDPS_OK;
}
static PluginStatus
get_input_file(const void *handle, struct PluginInputFile *file) {
LOG << "get_input_file\n";
return LDPS_OK;
}
template <typename E>
static PluginStatus release_input_file(const void *handle) {
LOG << "release_input_file\n";
ObjectFile<E> &file = *(ObjectFile<E> *)handle;
if (file.mf->fd != -1) {
close(file.mf->fd);
file.mf->fd = -1;
}
return LDPS_OK;
}
static PluginStatus add_input_library(const char *path) {
LOG << "add_input_library\n";
return LDPS_OK;
}
static PluginStatus set_extra_library_path(const char *path) {
LOG << "set_extra_library_path\n";
return LDPS_OK;
}
template <typename E>
static PluginStatus get_view(const void *handle, const void **view) {
LOG << "get_view\n";
ObjectFile<E> &file = *(ObjectFile<E> *)handle;
*view = (void *)file.mf->data;
return LDPS_OK;
}
static PluginStatus
get_input_section_count(const void *handle, int *count) {
LOG << "get_input_section_count\n";
return LDPS_OK;
}
static PluginStatus
get_input_section_type(const PluginSection section, int *type) {
LOG << "get_input_section_type\n";
return LDPS_OK;
}
static PluginStatus
get_input_section_name(const PluginSection section,
char **section_name) {
LOG << "get_input_section_name\n";
return LDPS_OK;
}
static PluginStatus
get_input_section_contents(const PluginSection section,
const char **section_contents,
size_t *len) {
LOG << "get_input_section_contents\n";
return LDPS_OK;
}
static PluginStatus
update_section_order(const PluginSection *section_list,
int num_sections) {
LOG << "update_section_order\n";
return LDPS_OK;
}
static PluginStatus allow_section_ordering() {
LOG << "allow_section_ordering\n";
return LDPS_OK;
}
static PluginStatus
get_symbols_v1(const void *handle, int nsyms, PluginSymbol *psyms) {
unreachable();
}
// get_symbols teaches the LTO plugin as to how we have resolved symbols.
// The plugin uses the symbol resolution info to optimize the program.
//
// For example, if a definition in an IR file is not referenced by
// non-IR objects at all, the plugin may choose to completely inline
// that definition within the IR objects and remove the symbol from the
// LTO result. On the other hand, if a definition is referenced by a
// non-IR object, it has to keep the symbol in the LTO result.
template <typename E>
static PluginStatus
get_symbols(const void *handle, int nsyms, PluginSymbol *psyms, bool is_v2) {
ObjectFile<E> &file = *(ObjectFile<E> *)handle;
assert(file.is_lto_obj);
// If file is an archive member which was not chose to be included in
// to the final result, we need to make the plugin to ignore all
// symbols.
if (!file.is_alive) {
assert(!is_v2);
for (int i = 0; i < nsyms; i++)
psyms[i].resolution = LDPR_PREEMPTED_REG;
return LDPS_NO_SYMS;
}
auto get_resolution = [&](ElfSym<E> &esym, Symbol<E> &sym) {
if (!sym.file)
return LDPR_UNDEF;
if (sym.file == &file) {
if (sym.referenced_by_regular_obj)
return LDPR_PREVAILING_DEF;
if (sym.is_exported)
return is_v2 ? LDPR_PREVAILING_DEF : LDPR_PREVAILING_DEF_IRONLY_EXP;
return LDPR_PREVAILING_DEF_IRONLY;
}
if (sym.file->is_dso)
return LDPR_RESOLVED_DYN;
if (((ObjectFile<E> *)sym.file)->is_lto_obj && !sym.is_wrapped)
return esym.is_undef() ? LDPR_RESOLVED_IR : LDPR_PREEMPTED_IR;
return esym.is_undef() ? LDPR_RESOLVED_EXEC : LDPR_PREEMPTED_REG;
};
// Set the symbol resolution results to psyms.
for (i64 i = 0; i < nsyms; i++) {
ElfSym<E> &esym = file.elf_syms[i + 1];
Symbol<E> &sym = *file.symbols[i + 1];
psyms[i].resolution = get_resolution(esym, sym);
}
return LDPS_OK;
}
// This function restarts mold itself with `--:lto-pass2` and
// `--:ignore-ir-file` flags. We do this as a workaround for the old
// linker plugins that do not support the get_symbols_v3 API.
//
// get_symbols_v1 and get_symbols_v2 don't provide a way to ignore an
// object file we previously passed to the linker plugin. So we can't
// "unload" object files in archives that we ended up not choosing to
// include into the final output.
//
// As a workaround, we restart the linker with a list of object files
// the linker has to ignore, so that it won't read the object files
// from archives next time.
//
// This is an ugly hack and should be removed once GCC adopts the v3 API.
template <typename E>
static void restart_process(Context<E> &ctx) {
std::vector<const char *> args;
for (std::string_view arg : ctx.cmdline_args)
args.push_back(strdup(std::string(arg).c_str()));
for (std::unique_ptr<ObjectFile<E>> &file : ctx.obj_pool)
if (file->is_lto_obj && !file->is_alive)
args.push_back(strdup(("--:ignore-ir-file=" +
file->mf->get_identifier()).c_str()));
args.push_back("--:lto-pass2");
args.push_back(nullptr);
std::cout << std::flush;
std::cerr << std::flush;
std::string self = get_self_path();
execv(self.c_str(), (char * const *)args.data());
std::cerr << "execv failed: " << errno_string() << "\n";
_exit(1);
}
template <typename E>
static PluginStatus
get_symbols_v2(const void *handle, int nsyms, PluginSymbol *psyms) {
LOG << "get_symbols_v2\n";
return get_symbols<E>(handle, nsyms, psyms, true);
}
template <typename E>
static PluginStatus
get_symbols_v3(const void *handle, int nsyms, PluginSymbol *psyms) {
LOG << "get_symbols_v3\n";
return get_symbols<E>(handle, nsyms, psyms, false);
}
static PluginStatus allow_unique_segment_for_sections() {
LOG << "allow_unique_segment_for_sections\n";
return LDPS_OK;
}
static PluginStatus
unique_segment_for_sections(const char *segment_name,
uint64_t flags,
uint64_t align,
const PluginSection *section_list,
int num_sections) {
LOG << "unique_segment_for_sections\n";
return LDPS_OK;
}
static PluginStatus
get_input_section_alignment(const PluginSection section,
int *addralign) {
LOG << "get_input_section_alignment\n";
return LDPS_OK;
}
static PluginStatus
get_input_section_size(const PluginSection section, uint64_t *size) {
LOG << "get_input_section_size\n";
return LDPS_OK;
}
template <typename E>
static PluginStatus
register_new_input_hook(NewInputHandler fn) {
LOG << "register_new_input_hook\n";
return LDPS_OK;
}
static PluginStatus
get_wrap_symbols(uint64_t *num_symbols, const char ***wrap_symbols) {
LOG << "get_wrap_symbols\n";
return LDPS_OK;
}
template <typename E>
static PluginLinkerAPIVersion
get_api_version(const char *plugin_identifier,
unsigned plugin_version,
int minimal_api_supported,
int maximal_api_supported,
const char **linker_identifier,
const char **linker_version) {
if (LAPI_V1 < minimal_api_supported)
Fatal(*gctx<E>) << "LTO plugin does not support V0 or V1 API";
std::string version = mold_version + "\0"s;
*linker_identifier = "mold";
*linker_version = version.data();
if (LAPI_V1 <= maximal_api_supported) {
is_gcc_linker_api_v1 = true;
return LAPI_V1;
}
return LAPI_V0;
}
template <typename E>
static void load_plugin(Context<E> &ctx) {
assert(phase == 0);
phase = 1;
gctx<E> = &ctx;
void *handle = dlopen(ctx.arg.plugin.c_str(), RTLD_NOW | RTLD_GLOBAL);
if (!handle)
Fatal(ctx) << "could not open plugin file: " << dlerror();
OnloadFn *onload = (OnloadFn *)dlsym(handle, "onload");
if (!onload)
Fatal(ctx) << "failed to load plugin " << ctx.arg.plugin << ": "
<< dlerror();
auto save = [&](std::string_view str) {
return save_string(ctx, std::string(str).c_str()).data();
};
std::vector<PluginTagValue> tv;
tv.emplace_back(LDPT_MESSAGE, message<E>);
if (ctx.arg.shared)
tv.emplace_back(LDPT_LINKER_OUTPUT, LDPO_DYN);
else if (ctx.arg.pie)
tv.emplace_back(LDPT_LINKER_OUTPUT, LDPO_PIE);
else
tv.emplace_back(LDPT_LINKER_OUTPUT, LDPO_EXEC);
for (std::string_view opt : ctx.arg.plugin_opt)
tv.emplace_back(LDPT_OPTION, save(opt));
tv.emplace_back(LDPT_REGISTER_CLAIM_FILE_HOOK, register_claim_file_hook<E>);
tv.emplace_back(LDPT_REGISTER_ALL_SYMBOLS_READ_HOOK,
register_all_symbols_read_hook<E>);
tv.emplace_back(LDPT_REGISTER_CLEANUP_HOOK, register_cleanup_hook<E>);
tv.emplace_back(LDPT_ADD_SYMBOLS, add_symbols);
tv.emplace_back(LDPT_GET_SYMBOLS, get_symbols_v1);
tv.emplace_back(LDPT_ADD_INPUT_FILE, add_input_file<E>);
tv.emplace_back(LDPT_GET_INPUT_FILE, get_input_file);
tv.emplace_back(LDPT_RELEASE_INPUT_FILE, release_input_file<E>);
tv.emplace_back(LDPT_ADD_INPUT_LIBRARY, add_input_library);
tv.emplace_back(LDPT_OUTPUT_NAME, save(ctx.arg.output));
tv.emplace_back(LDPT_SET_EXTRA_LIBRARY_PATH, set_extra_library_path);
tv.emplace_back(LDPT_GET_VIEW, get_view<E>);
tv.emplace_back(LDPT_GET_INPUT_SECTION_COUNT, get_input_section_count);
tv.emplace_back(LDPT_GET_INPUT_SECTION_TYPE, get_input_section_type);
tv.emplace_back(LDPT_GET_INPUT_SECTION_NAME, get_input_section_name);
tv.emplace_back(LDPT_GET_INPUT_SECTION_CONTENTS, get_input_section_contents);
tv.emplace_back(LDPT_UPDATE_SECTION_ORDER, update_section_order);
tv.emplace_back(LDPT_ALLOW_SECTION_ORDERING, allow_section_ordering);
tv.emplace_back(LDPT_ADD_SYMBOLS_V2, add_symbols);
tv.emplace_back(LDPT_GET_SYMBOLS_V2, get_symbols_v2<E>);
tv.emplace_back(LDPT_ALLOW_UNIQUE_SEGMENT_FOR_SECTIONS,
allow_unique_segment_for_sections);
tv.emplace_back(LDPT_UNIQUE_SEGMENT_FOR_SECTIONS, unique_segment_for_sections);
tv.emplace_back(LDPT_GET_SYMBOLS_V3, get_symbols_v3<E>);
tv.emplace_back(LDPT_GET_INPUT_SECTION_ALIGNMENT, get_input_section_alignment);
tv.emplace_back(LDPT_GET_INPUT_SECTION_SIZE, get_input_section_size);
tv.emplace_back(LDPT_REGISTER_NEW_INPUT_HOOK, register_new_input_hook<E>);
tv.emplace_back(LDPT_GET_WRAP_SYMBOLS, get_wrap_symbols);
tv.emplace_back(LDPT_GET_API_VERSION, get_api_version<E>);
tv.emplace_back(LDPT_NULL, 0);
[[maybe_unused]] PluginStatus status = onload(tv.data());
assert(status == LDPS_OK);
}
template <typename E>
static ElfSym<E> to_elf_sym(PluginSymbol &psym) {
ElfSym<E> esym;
memset(&esym, 0, sizeof(esym));
switch (psym.def) {
case LDPK_DEF:
esym.st_shndx = SHN_ABS;
break;
case LDPK_WEAKDEF:
esym.st_shndx = SHN_ABS;
esym.st_bind = STB_WEAK;
break;
case LDPK_UNDEF:
esym.st_shndx = SHN_UNDEF;
break;
case LDPK_WEAKUNDEF:
esym.st_shndx = SHN_UNDEF;
esym.st_bind = STB_WEAK;
break;
case LDPK_COMMON:
esym.st_shndx = SHN_COMMON;
break;
}
switch (psym.symbol_type) {
case LDST_UNKNOWN:
break;
case LDST_FUNCTION:
esym.st_type = STT_FUNC;
break;
case LDST_VARIABLE:
esym.st_type = STT_OBJECT;
break;
};
switch (psym.visibility) {
case LDPV_DEFAULT:
break;
case LDPV_PROTECTED:
esym.st_visibility = STV_PROTECTED;
break;
case LDPV_INTERNAL:
esym.st_visibility = STV_INTERNAL;
break;
case LDPV_HIDDEN:
esym.st_visibility = STV_HIDDEN;
break;
}
esym.st_size = psym.size;
return esym;
}
// Returns true if a given linker plugin looks like LLVM's one.
// Returns false if it's GCC.
template <typename E>
static bool is_llvm(Context<E> &ctx) {
return ctx.arg.plugin.ends_with("LLVMgold.so");
}
// Returns true if a given linker plugin supports the get_symbols_v3 API.
// Any version of LLVM and GCC 12 or newer support it.
template <typename E>
static bool supports_v3_api(Context<E> &ctx) {
return is_gcc_linker_api_v1 || is_llvm(ctx);
}
template <typename E>
ObjectFile<E> *read_lto_object(Context<E> &ctx, MappedFile<Context<E>> *mf) {
// V0 API's claim_file is not thread-safe.
static std::mutex mu;
std::unique_lock lock(mu, std::defer_lock);
if (!is_gcc_linker_api_v1)
lock.lock();
if (ctx.arg.plugin.empty())
Fatal(ctx) << mf->name << ": don't know how to handle this LTO object file "
<< "because no -plugin option was given. Please make sure you "
<< "added -flto not only for creating object files but also for "
<< "creating the final executable.";
// dlopen the linker plugin file
static std::once_flag flag;
std::call_once(flag, [&] { load_plugin(ctx); });
// Create mold's object instance
ObjectFile<E> *obj = new ObjectFile<E>;
ctx.obj_pool.emplace_back(obj);
obj->filename = mf->name;
obj->symbols.push_back(new Symbol<E>);
obj->first_global = 1;
obj->is_lto_obj = true;
obj->mf = mf;
// Create plugin's object instance
PluginInputFile file = {};
MappedFile<Context<E>> *mf2 = mf->parent ? mf->parent : mf;
file.name = save_string(ctx, mf2->name).data();
if (mf2->fd == -1)
mf2->fd = open(file.name, O_RDONLY);
file.fd = mf2->fd;
if (file.fd == -1)
Fatal(ctx) << "cannot open " << file.name << ": " << errno_string();
if (mf->parent)
obj->archive_name = mf->parent->name;
file.offset = mf->get_offset();
file.filesize = mf->size;
file.handle = (void *)obj;
LOG << "read_lto_symbols: "<< mf->name << "\n";
// claim_file_hook() calls add_symbols() which initializes `plugin_symbols`
int claimed = false;
claim_file_hook(&file, &claimed);
if (!claimed)
Fatal(ctx) << mf->name << ": not claimed by the LTO plugin;"
<< " please make sure you are using the same compiler of the"
<< " same version for all object files";
// It looks like GCC doesn't need fd after claim_file_hook() while
// LLVM needs it and takes the ownership of fd. To prevent "too many
// open files" issue, we close fd only for GCC. This is ugly, though.
if (!is_llvm(ctx)) {
close(mf2->fd);
mf2->fd = -1;
}
// Initialize object symbols
std::vector<ElfSym<E>> *esyms = new std::vector<ElfSym<E>>(1);
for (PluginSymbol &psym : plugin_symbols) {
esyms->push_back(to_elf_sym<E>(psym));
obj->symbols.push_back(get_symbol(ctx, save_string(ctx, psym.name)));
}
obj->elf_syms = *esyms;
obj->has_symver.resize(esyms->size());
plugin_symbols.clear();
return obj;
}
// Entry point
template <typename E>
std::vector<ObjectFile<E> *> do_lto(Context<E> &ctx) {
Timer t(ctx, "do_lto");
if (!ctx.arg.lto_pass2 && !supports_v3_api(ctx))
restart_process(ctx);
assert(phase == 1);
phase = 2;
// Set `referenced_by_regular_obj` bit.
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
if (file->is_lto_obj)
return;
for (i64 i = file->first_global; i < file->symbols.size(); i++) {
Symbol<E> &sym = *file->symbols[i];
if (sym.file && !sym.file->is_dso &&
((ObjectFile<E> *)sym.file)->is_lto_obj) {
std::scoped_lock lock(sym.mu);
sym.referenced_by_regular_obj = true;
}
}
});
// Symbols specified by the --wrap option needs to be visible from
// regular object files.
for (std::string_view name : ctx.arg.wrap) {
get_symbol(ctx, name)->referenced_by_regular_obj = true;
std::string_view x = save_string(ctx, "__wrap_" + std::string(name));
std::string_view y = save_string(ctx, "__real_" + std::string(name));
get_symbol(ctx, x)->referenced_by_regular_obj = true;
get_symbol(ctx, y)->referenced_by_regular_obj = true;
}
// all_symbols_read_hook() calls add_input_file() and add_input_library()
LOG << "all symbols read\n";
if (PluginStatus st = all_symbols_read_hook(); st != LDPS_OK)
Fatal(ctx) << "LTO: all_symbols_read_hook returns " << st;
return lto_objects<E>;
}
template <typename E>
void lto_cleanup(Context<E> &ctx) {
Timer t(ctx, "lto_cleanup");
if (cleanup_hook)
cleanup_hook();
}
using E = MOLD_TARGET;
template ObjectFile<E> *read_lto_object(Context<E> &, MappedFile<Context<E>> *);
template std::vector<ObjectFile<E> *> do_lto(Context<E> &);
template void lto_cleanup(Context<E> &);
} // namespace mold::elf

26
third_party/mold/elf/lto-win32.cc vendored Normal file
View file

@ -0,0 +1,26 @@
// clang-format off
#include "third_party/mold/elf/mold.h"
#include "third_party/mold/elf/lto.h"
namespace mold::elf {
template <typename E>
ObjectFile<E> *read_lto_object(Context<E> &ctx, MappedFile<Context<E>> *mf) {
Fatal(ctx) << "LTO is not supported on Windows";
}
template <typename E>
std::vector<ObjectFile<E> *> do_lto(Context<E> &ctx) {
return {};
}
template <typename E>
void lto_cleanup(Context<E> &ctx) {}
using E = MOLD_TARGET;
template ObjectFile<E> *read_lto_object(Context<E> &, MappedFile<Context<E>> *);
template std::vector<ObjectFile<E> *> do_lto(Context<E> &);
template void lto_cleanup(Context<E> &);
} // namespace mold::elf

6
third_party/mold/elf/lto.cc vendored Normal file
View file

@ -0,0 +1,6 @@
// clang-format off
#ifdef _WIN32
#include "third_party/mold/elf/lto-win32.cc"
#else
#include "third_party/mold/elf/lto-unix.cc"
#endif

167
third_party/mold/elf/lto.h vendored Normal file
View file

@ -0,0 +1,167 @@
// clang-format off
#pragma once
// MISSING #include "../common/integers.h"
namespace mold {
enum PluginStatus {
LDPS_OK,
LDPS_NO_SYMS,
LDPS_BAD_HANDLE,
LDPS_ERR,
};
enum PluginTag {
LDPT_NULL,
LDPT_API_VERSION,
LDPT_GOLD_VERSION,
LDPT_LINKER_OUTPUT,
LDPT_OPTION,
LDPT_REGISTER_CLAIM_FILE_HOOK,
LDPT_REGISTER_ALL_SYMBOLS_READ_HOOK,
LDPT_REGISTER_CLEANUP_HOOK,
LDPT_ADD_SYMBOLS,
LDPT_GET_SYMBOLS,
LDPT_ADD_INPUT_FILE,
LDPT_MESSAGE,
LDPT_GET_INPUT_FILE,
LDPT_RELEASE_INPUT_FILE,
LDPT_ADD_INPUT_LIBRARY,
LDPT_OUTPUT_NAME,
LDPT_SET_EXTRA_LIBRARY_PATH,
LDPT_GNU_LD_VERSION,
LDPT_GET_VIEW,
LDPT_GET_INPUT_SECTION_COUNT,
LDPT_GET_INPUT_SECTION_TYPE,
LDPT_GET_INPUT_SECTION_NAME,
LDPT_GET_INPUT_SECTION_CONTENTS,
LDPT_UPDATE_SECTION_ORDER,
LDPT_ALLOW_SECTION_ORDERING,
LDPT_GET_SYMBOLS_V2,
LDPT_ALLOW_UNIQUE_SEGMENT_FOR_SECTIONS,
LDPT_UNIQUE_SEGMENT_FOR_SECTIONS,
LDPT_GET_SYMBOLS_V3,
LDPT_GET_INPUT_SECTION_ALIGNMENT,
LDPT_GET_INPUT_SECTION_SIZE,
LDPT_REGISTER_NEW_INPUT_HOOK,
LDPT_GET_WRAP_SYMBOLS,
LDPT_ADD_SYMBOLS_V2,
LDPT_GET_API_VERSION,
};
enum PluginApiVersion {
LD_PLUGIN_API_VERSION = 1,
};
struct PluginTagValue {
PluginTagValue(PluginTag tag, int val) : tag(tag), val(val) {}
template <typename T>
PluginTagValue(PluginTag tag, T *ptr) : tag(tag), ptr((void *)ptr) {}
PluginTag tag;
union {
int val;
void *ptr;
};
};
enum PluginOutputFileType {
LDPO_REL,
LDPO_EXEC,
LDPO_DYN,
LDPO_PIE,
};
struct PluginInputFile {
const char *name;
i32 fd;
u64 offset;
u64 filesize;
void *handle;
};
struct PluginSection {
const void *handle;
u32 shndx;
};
struct PluginSymbol {
char *name;
char *version;
#ifdef __LITTLE_ENDIAN__
u8 def;
u8 symbol_type;
u8 section_kind;
u8 padding;
#else
u8 padding;
u8 section_kind;
u8 symbol_type;
u8 def;
#endif
i32 visibility;
u64 size;
char *comdat_key;
i32 resolution;
};
enum PluginSymbolKind {
LDPK_DEF,
LDPK_WEAKDEF,
LDPK_UNDEF,
LDPK_WEAKUNDEF,
LDPK_COMMON,
};
enum PluginSymbolVisibility {
LDPV_DEFAULT,
LDPV_PROTECTED,
LDPV_INTERNAL,
LDPV_HIDDEN,
};
enum PluginSymbolType {
LDST_UNKNOWN,
LDST_FUNCTION,
LDST_VARIABLE,
};
enum PluginSymbolSectionKind {
LDSSK_DEFAULT,
LDSSK_BSS,
};
enum PluginSymbolResolution {
LDPR_UNKNOWN,
LDPR_UNDEF,
LDPR_PREVAILING_DEF,
LDPR_PREVAILING_DEF_IRONLY,
LDPR_PREEMPTED_REG,
LDPR_PREEMPTED_IR,
LDPR_RESOLVED_IR,
LDPR_RESOLVED_EXEC,
LDPR_RESOLVED_DYN,
LDPR_PREVAILING_DEF_IRONLY_EXP,
};
enum PluginLevel {
LDPL_INFO,
LDPL_WARNING,
LDPL_ERROR,
LDPL_FATAL,
};
enum PluginLinkerAPIVersion {
LAPI_V0,
LAPI_V1,
};
typedef PluginStatus OnloadFn(PluginTagValue *tv);
typedef PluginStatus ClaimFileHandler(const PluginInputFile *, int *);
typedef PluginStatus AllSymbolsReadHandler();
typedef PluginStatus CleanupHandler();
typedef PluginStatus NewInputHandler(const PluginInputFile *);
} // namespace mold

812
third_party/mold/elf/main.cc vendored Normal file
View file

@ -0,0 +1,812 @@
// clang-format off
#include "third_party/mold/elf/mold.h"
// MISSING #include "../common/archive-file.h"
// MISSING #include "../common/cmdline.h"
// MISSING #include "../common/output-file.h"
#include "third_party/libcxx/cstring"
#include "third_party/libcxx/functional"
#include "third_party/libcxx/iomanip"
#include "third_party/libcxx/map"
#include "third_party/libcxx/regex"
#include "libc/calls/calls.h"
#include "libc/calls/sigtimedwait.h"
#include "libc/calls/struct/sigaction.h"
#include "libc/calls/struct/siginfo.h"
#include "libc/sysv/consts/sa.h"
#include "libc/sysv/consts/sicode.h"
#include "libc/sysv/consts/ss.h"
#include "libc/calls/calls.h"
#include "libc/calls/struct/stat.h"
#include "libc/calls/struct/stat.macros.h"
#include "libc/calls/struct/timespec.h"
#include "libc/calls/weirdtypes.h"
#include "libc/sysv/consts/s.h"
#include "libc/sysv/consts/utime.h"
#include "libc/time/time.h"
#include "libc/calls/makedev.h"
#include "libc/calls/weirdtypes.h"
#include "libc/thread/thread.h"
#include "libc/calls/typedef/u.h"
#include "libc/calls/weirdtypes.h"
#include "libc/intrin/newbie.h"
#include "libc/sock/select.h"
#include "libc/sysv/consts/endian.h"
// MISSING #include <tbb/global_control.h>
// MISSING #include <tbb/parallel_for_each.h>
#include "third_party/libcxx/unordered_set"
#ifdef _WIN32
// MISSING #include <direct.h>
# define _chdir chdir
#else
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/runtime/pathconf.h"
#include "libc/runtime/runtime.h"
#include "libc/runtime/sysconf.h"
#include "libc/sysv/consts/f.h"
#include "libc/sysv/consts/fileno.h"
#include "libc/sysv/consts/o.h"
#include "libc/sysv/consts/ok.h"
#include "libc/time/time.h"
#include "third_party/getopt/getopt.h"
#include "third_party/musl/crypt.h"
#include "third_party/musl/lockf.h"
#endif
namespace mold::elf {
// Read the beginning of a given file and returns its machine type
// (e.g. EM_X86_64 or EM_386).
template <typename E>
std::string_view get_machine_type(Context<E> &ctx, MappedFile<Context<E>> *mf) {
auto get_elf_type = [&](u8 *buf) -> std::string_view {
bool is_le = (((ElfEhdr<I386> *)buf)->e_ident[EI_DATA] == ELFDATA2LSB);
bool is_64;
u32 e_machine;
if (is_le) {
auto &ehdr = *(ElfEhdr<I386> *)buf;
is_64 = (ehdr.e_ident[EI_CLASS] == ELFCLASS64);
e_machine = ehdr.e_machine;
} else {
auto &ehdr = *(ElfEhdr<M68K> *)buf;
is_64 = (ehdr.e_ident[EI_CLASS] == ELFCLASS64);
e_machine = ehdr.e_machine;
}
switch (e_machine) {
case EM_386:
return I386::target_name;
case EM_X86_64:
return X86_64::target_name;
case EM_ARM:
return ARM32::target_name;
case EM_AARCH64:
return ARM64::target_name;
case EM_RISCV:
if (is_le)
return is_64 ? RV64LE::target_name : RV32LE::target_name;
return is_64 ? RV64BE::target_name : RV32BE::target_name;
case EM_PPC:
return PPC32::target_name;
case EM_PPC64:
return is_le ? PPC64V2::target_name : PPC64V1::target_name;
case EM_S390X:
return S390X::target_name;
case EM_SPARC64:
return SPARC64::target_name;
case EM_68K:
return M68K::target_name;
case EM_SH:
return SH4::target_name;
case EM_ALPHA:
return ALPHA::target_name;
default:
return "";
}
};
switch (get_file_type(ctx, mf)) {
case FileType::ELF_OBJ:
case FileType::ELF_DSO:
case FileType::GCC_LTO_OBJ:
return get_elf_type(mf->data);
case FileType::AR:
for (MappedFile<Context<E>> *child : read_fat_archive_members(ctx, mf))
if (get_file_type(ctx, child) == FileType::ELF_OBJ)
return get_elf_type(child->data);
return "";
case FileType::THIN_AR:
for (MappedFile<Context<E>> *child : read_thin_archive_members(ctx, mf))
if (get_file_type(ctx, child) == FileType::ELF_OBJ)
return get_elf_type(child->data);
return "";
case FileType::TEXT:
return get_script_output_type(ctx, mf);
default:
return "";
}
}
template <typename E>
static void
check_file_compatibility(Context<E> &ctx, MappedFile<Context<E>> *mf) {
std::string_view target = get_machine_type(ctx, mf);
if (target != ctx.arg.emulation)
Fatal(ctx) << mf->name << ": incompatible file type: "
<< ctx.arg.emulation << " is expected but got " << target;
}
template <typename E>
static ObjectFile<E> *new_object_file(Context<E> &ctx, MappedFile<Context<E>> *mf,
std::string archive_name) {
static Counter count("parsed_objs");
count++;
check_file_compatibility(ctx, mf);
bool in_lib = ctx.in_lib || (!archive_name.empty() && !ctx.whole_archive);
ObjectFile<E> *file = ObjectFile<E>::create(ctx, mf, archive_name, in_lib);
file->priority = ctx.file_priority++;
ctx.tg.run([file, &ctx] { file->parse(ctx); });
if (ctx.arg.trace)
SyncOut(ctx) << "trace: " << *file;
return file;
}
template <typename E>
static ObjectFile<E> *new_lto_obj(Context<E> &ctx, MappedFile<Context<E>> *mf,
std::string archive_name) {
static Counter count("parsed_lto_objs");
count++;
if (ctx.arg.ignore_ir_file.count(mf->get_identifier()))
return nullptr;
ObjectFile<E> *file = read_lto_object(ctx, mf);
file->priority = ctx.file_priority++;
file->archive_name = archive_name;
file->is_in_lib = ctx.in_lib || (!archive_name.empty() && !ctx.whole_archive);
file->is_alive = !file->is_in_lib;
ctx.has_lto_object = true;
if (ctx.arg.trace)
SyncOut(ctx) << "trace: " << *file;
return file;
}
template <typename E>
static SharedFile<E> *
new_shared_file(Context<E> &ctx, MappedFile<Context<E>> *mf) {
check_file_compatibility(ctx, mf);
SharedFile<E> *file = SharedFile<E>::create(ctx, mf);
file->priority = ctx.file_priority++;
ctx.tg.run([file, &ctx] { file->parse(ctx); });
if (ctx.arg.trace)
SyncOut(ctx) << "trace: " << *file;
return file;
}
template <typename E>
void read_file(Context<E> &ctx, MappedFile<Context<E>> *mf) {
if (ctx.visited.contains(mf->name))
return;
switch (get_file_type(ctx, mf)) {
case FileType::ELF_OBJ:
ctx.objs.push_back(new_object_file(ctx, mf, ""));
return;
case FileType::ELF_DSO:
ctx.dsos.push_back(new_shared_file(ctx, mf));
ctx.visited.insert(mf->name);
return;
case FileType::AR:
case FileType::THIN_AR:
for (MappedFile<Context<E>> *child : read_archive_members(ctx, mf)) {
switch (get_file_type(ctx, child)) {
case FileType::ELF_OBJ:
ctx.objs.push_back(new_object_file(ctx, child, mf->name));
break;
case FileType::GCC_LTO_OBJ:
case FileType::LLVM_BITCODE:
if (ObjectFile<E> *file = new_lto_obj(ctx, child, mf->name))
ctx.objs.push_back(file);
break;
case FileType::ELF_DSO:
Warn(ctx) << mf->name << "(" << child->name
<< "): shared object file in an archive is ignored";
break;
default:
break;
}
}
ctx.visited.insert(mf->name);
return;
case FileType::TEXT:
parse_linker_script(ctx, mf);
return;
case FileType::GCC_LTO_OBJ:
case FileType::LLVM_BITCODE:
if (ObjectFile<E> *file = new_lto_obj(ctx, mf, ""))
ctx.objs.push_back(file);
return;
default:
Fatal(ctx) << mf->name << ": unknown file type";
}
}
template <typename E>
static std::string_view
deduce_machine_type(Context<E> &ctx, std::span<std::string> args) {
for (std::string_view arg : args)
if (!arg.starts_with('-'))
if (auto *mf = MappedFile<Context<E>>::open(ctx, std::string(arg)))
if (std::string_view target = get_machine_type(ctx, mf);
!target.empty())
return target;
Fatal(ctx) << "-m option is missing";
}
template <typename E>
MappedFile<Context<E>> *open_library(Context<E> &ctx, std::string path) {
MappedFile<Context<E>> *mf = MappedFile<Context<E>>::open(ctx, path);
if (!mf)
return nullptr;
std::string_view target = get_machine_type(ctx, mf);
if (target.empty() || target == E::target_name)
return mf;
Warn(ctx) << path << ": skipping incompatible file " << target
<< " " << (int)E::e_machine;
return nullptr;
}
template <typename E>
MappedFile<Context<E>> *find_library(Context<E> &ctx, std::string name) {
if (name.starts_with(':')) {
for (std::string_view dir : ctx.arg.library_paths) {
std::string path = std::string(dir) + "/" + name.substr(1);
if (MappedFile<Context<E>> *mf = open_library(ctx, path))
return mf;
}
Fatal(ctx) << "library not found: " << name;
}
for (std::string_view dir : ctx.arg.library_paths) {
std::string stem = std::string(dir) + "/lib" + name;
if (!ctx.is_static)
if (MappedFile<Context<E>> *mf = open_library(ctx, stem + ".so"))
return mf;
if (MappedFile<Context<E>> *mf = open_library(ctx, stem + ".a"))
return mf;
}
Fatal(ctx) << "library not found: " << name;
}
template <typename E>
MappedFile<Context<E>> *find_from_search_paths(Context<E> &ctx, std::string name) {
if (MappedFile<Context<E>> *mf = MappedFile<Context<E>>::open(ctx, name))
return mf;
for (std::string_view dir : ctx.arg.library_paths)
if (MappedFile<Context<E>> *mf =
MappedFile<Context<E>>::open(ctx, std::string(dir) + "/" + name))
return mf;
return nullptr;
}
template <typename E>
static void read_input_files(Context<E> &ctx, std::span<std::string> args) {
Timer t(ctx, "read_input_files");
std::vector<std::tuple<bool, bool, bool, bool>> state;
ctx.is_static = ctx.arg.is_static;
while (!args.empty()) {
std::string_view arg = args[0];
args = args.subspan(1);
if (arg == "--as-needed") {
ctx.as_needed = true;
} else if (arg == "--no-as-needed") {
ctx.as_needed = false;
} else if (arg == "--whole-archive") {
ctx.whole_archive = true;
} else if (arg == "--no-whole-archive") {
ctx.whole_archive = false;
} else if (arg == "--Bstatic") {
ctx.is_static = true;
} else if (arg == "--Bdynamic") {
ctx.is_static = false;
} else if (arg == "--start-lib") {
ctx.in_lib = true;
} else if (arg == "--end-lib") {
ctx.in_lib = false;
} else if (remove_prefix(arg, "--version-script=")) {
MappedFile<Context<E>> *mf = find_from_search_paths(ctx, std::string(arg));
if (!mf)
Fatal(ctx) << "--version-script: file not found: " << arg;
parse_version_script(ctx, mf);
} else if (remove_prefix(arg, "--dynamic-list=")) {
MappedFile<Context<E>> *mf = find_from_search_paths(ctx, std::string(arg));
if (!mf)
Fatal(ctx) << "--dynamic-list: file not found: " << arg;
parse_dynamic_list(ctx, mf);
} else if (remove_prefix(arg, "--export-dynamic-symbol=")) {
if (arg == "*")
ctx.default_version = VER_NDX_GLOBAL;
else
ctx.version_patterns.push_back({arg, "--export-dynamic-symbol",
"global", VER_NDX_GLOBAL, false});
} else if (remove_prefix(arg, "--export-dynamic-symbol-list=")) {
MappedFile<Context<E>> *mf = find_from_search_paths(ctx, std::string(arg));
if (!mf)
Fatal(ctx) << "--export-dynamic-symbol-list: file not found: " << arg;
parse_dynamic_list(ctx, mf);
} else if (arg == "--push-state") {
state.push_back({ctx.as_needed, ctx.whole_archive, ctx.is_static,
ctx.in_lib});
} else if (arg == "--pop-state") {
if (state.empty())
Fatal(ctx) << "no state pushed before popping";
std::tie(ctx.as_needed, ctx.whole_archive, ctx.is_static, ctx.in_lib) =
state.back();
state.pop_back();
} else if (remove_prefix(arg, "-l")) {
MappedFile<Context<E>> *mf = find_library(ctx, std::string(arg));
mf->given_fullpath = false;
read_file(ctx, mf);
} else {
read_file(ctx, MappedFile<Context<E>>::must_open(ctx, std::string(arg)));
}
}
if (ctx.objs.empty())
Fatal(ctx) << "no input files";
ctx.tg.wait();
}
// Since elf_main is a template, we can't run it without a type parameter.
// We speculatively run elf_main with X86_64, and if the speculation was
// wrong, re-run it with an actual machine type.
template <typename E>
static int redo_main(int argc, char **argv, std::string_view target) {
if (target == I386::target_name)
return elf_main<I386>(argc, argv);
if (target == ARM64::target_name)
return elf_main<ARM64>(argc, argv);
if (target == ARM32::target_name)
return elf_main<ARM32>(argc, argv);
if (target == RV64LE::target_name)
return elf_main<RV64LE>(argc, argv);
if (target == RV64BE::target_name)
return elf_main<RV64BE>(argc, argv);
if (target == RV32LE::target_name)
return elf_main<RV32LE>(argc, argv);
if (target == RV32BE::target_name)
return elf_main<RV32BE>(argc, argv);
if (target == PPC32::target_name)
return elf_main<PPC32>(argc, argv);
if (target == PPC64V1::target_name)
return elf_main<PPC64V1>(argc, argv);
if (target == PPC64V2::target_name)
return elf_main<PPC64V2>(argc, argv);
if (target == S390X::target_name)
return elf_main<S390X>(argc, argv);
if (target == SPARC64::target_name)
return elf_main<SPARC64>(argc, argv);
if (target == M68K::target_name)
return elf_main<M68K>(argc, argv);
if (target == SH4::target_name)
return elf_main<SH4>(argc, argv);
if (target == ALPHA::target_name)
return elf_main<ALPHA>(argc, argv);
unreachable();
}
template <typename E>
int elf_main(int argc, char **argv) {
Context<E> ctx;
// Process -run option first. process_run_subcommand() does not return.
if (argc >= 2 && (argv[1] == "-run"sv || argv[1] == "--run"sv)) {
#if defined(_WIN32) || defined(__APPLE__)
Fatal(ctx) << "-run is supported only on Unix";
#endif
process_run_subcommand(ctx, argc, argv);
}
// Parse non-positional command line options
ctx.cmdline_args = expand_response_files(ctx, argv);
std::vector<std::string> file_args = parse_nonpositional_args(ctx);
// If no -m option is given, deduce it from input files.
if (ctx.arg.emulation.empty())
ctx.arg.emulation = deduce_machine_type(ctx, file_args);
// Redo if -m is not x86-64.
if constexpr (is_x86_64<E>)
if (ctx.arg.emulation != X86_64::target_name)
return redo_main<E>(argc, argv, ctx.arg.emulation);
Timer t_all(ctx, "all");
install_signal_handler();
if (!ctx.arg.directory.empty())
if (chdir(ctx.arg.directory.c_str()) == -1)
Fatal(ctx) << "chdir failed: " << ctx.arg.directory
<< ": " << errno_string();
// Fork a subprocess unless --no-fork is given.
std::function<void()> on_complete;
#if !defined(_WIN32) && !defined(__APPLE__)
if (ctx.arg.fork)
on_complete = fork_child();
#endif
acquire_global_lock(ctx);
tbb::global_control tbb_cont(tbb::global_control::max_allowed_parallelism,
ctx.arg.thread_count);
// Handle --wrap options if any.
for (std::string_view name : ctx.arg.wrap)
get_symbol(ctx, name)->is_wrapped = true;
// Handle --retain-symbols-file options if any.
if (ctx.arg.retain_symbols_file)
for (std::string_view name : *ctx.arg.retain_symbols_file)
get_symbol(ctx, name)->write_to_symtab = true;
for (std::string_view arg : ctx.arg.trace_symbol)
get_symbol(ctx, arg)->is_traced = true;
// Parse input files
read_input_files(ctx, file_args);
// Uniquify shared object files by soname
{
std::unordered_set<std::string_view> seen;
std::erase_if(ctx.dsos, [&](SharedFile<E> *file) {
return !seen.insert(file->soname).second;
});
}
Timer t_total(ctx, "total");
Timer t_before_copy(ctx, "before_copy");
// Apply -exclude-libs
apply_exclude_libs(ctx);
// Create a dummy file containing linker-synthesized symbols.
if (!ctx.arg.relocatable)
create_internal_file(ctx);
// resolve_symbols is 4 things in 1 phase:
//
// - Determine the set of object files to extract from archives.
// - Remove redundant COMDAT sections (e.g. duplicate inline functions).
// - Finally, the actual symbol resolution.
// - LTO, which requires preliminary symbol resolution before running
// and a follow-up re-resolution after the LTO objects are emitted.
//
// These passes have complex interactions, and unfortunately has to be
// put together in a single phase.
resolve_symbols(ctx);
// "Kill" .eh_frame input sections after symbol resolution.
kill_eh_frame_sections(ctx);
// Resolve mergeable section pieces to merge them.
resolve_section_pieces(ctx);
// Handle --relocatable. Since the linker's behavior is quite different
// from the normal one when the option is given, the logic is implemented
// to a separate file.
if (ctx.arg.relocatable) {
combine_objects(ctx);
return 0;
}
// Create .bss sections for common symbols.
convert_common_symbols(ctx);
// Apply version scripts.
apply_version_script(ctx);
// Parse symbol version suffixes (e.g. "foo@ver1").
parse_symbol_version(ctx);
// Set is_imported and is_exported bits for each symbol.
compute_import_export(ctx);
// Read address-significant section information.
if (ctx.arg.icf && !ctx.arg.icf_all)
mark_addrsig(ctx);
// Garbage-collect unreachable sections.
if (ctx.arg.gc_sections)
gc_sections(ctx);
// Merge identical read-only sections.
if (ctx.arg.icf)
icf_sections(ctx);
// Compute sizes of sections containing mergeable strings.
compute_merged_section_sizes(ctx);
// Create linker-synthesized sections such as .got or .plt.
create_synthetic_sections(ctx);
// Make sure that there's no duplicate symbol
if (!ctx.arg.allow_multiple_definition)
check_duplicate_symbols(ctx);
// Warn if symbols with different types are defined under the same name.
check_symbol_types(ctx);
if constexpr (is_ppc64v1<E>)
ppc64v1_rewrite_opd(ctx);
// Bin input sections into output sections.
create_output_sections(ctx);
// Add synthetic symbols such as __ehdr_start or __end.
add_synthetic_symbols(ctx);
// Beyond this point, no new files will be added to ctx.objs
// or ctx.dsos.
// Handle `-z cet-report`.
if (ctx.arg.z_cet_report != CET_REPORT_NONE)
check_cet_errors(ctx);
// Handle `-z execstack-if-needed`.
if (ctx.arg.z_execstack_if_needed)
for (ObjectFile<E> *file : ctx.objs)
if (file->needs_executable_stack)
ctx.arg.z_execstack = true;
// If we are linking a .so file, remaining undefined symbols does
// not cause a linker error. Instead, they are treated as if they
// were imported symbols.
//
// If we are linking an executable, weak undefs are converted to
// weakly imported symbols so that they'll have another chance to be
// resolved.
claim_unresolved_symbols(ctx);
// Beyond this point, no new symbols will be added to the result.
// Handle --print-dependencies
if (ctx.arg.print_dependencies)
print_dependencies(ctx);
// Handle -repro
if (ctx.arg.repro)
write_repro_file(ctx);
// Handle --require-defined
for (std::string_view name : ctx.arg.require_defined)
if (!get_symbol(ctx, name)->file)
Error(ctx) << "--require-defined: undefined symbol: " << name;
// .init_array and .fini_array contents have to be sorted by
// a special rule. Sort them.
sort_init_fini(ctx);
// Likewise, .ctors and .dtors have to be sorted. They are rare
// because they are superceded by .init_array/.fini_array, though.
sort_ctor_dtor(ctx);
// Handle --shuffle-sections
if (ctx.arg.shuffle_sections != SHUFFLE_SECTIONS_NONE)
shuffle_sections(ctx);
// Copy string referred by .dynamic to .dynstr.
for (SharedFile<E> *file : ctx.dsos)
ctx.dynstr->add_string(file->soname);
for (std::string_view str : ctx.arg.auxiliary)
ctx.dynstr->add_string(str);
for (std::string_view str : ctx.arg.filter)
ctx.dynstr->add_string(str);
if (!ctx.arg.rpaths.empty())
ctx.dynstr->add_string(ctx.arg.rpaths);
if (!ctx.arg.soname.empty())
ctx.dynstr->add_string(ctx.arg.soname);
if constexpr (is_ppc64v1<E>)
ppc64v1_scan_symbols(ctx);
// Scan relocations to find symbols that need entries in .got, .plt,
// .got.plt, .dynsym, .dynstr, etc.
scan_relocations(ctx);
// Compute sizes of output sections while assigning offsets
// within an output section to input sections.
compute_section_sizes(ctx);
// Sort sections by section attributes so that we'll have to
// create as few segments as possible.
sort_output_sections(ctx);
// If --packed_dyn_relocs=relr was given, base relocations are stored
// to a .relr.dyn section in a compressed form. Construct a compressed
// relocations now so that we can fix section sizes and file layout.
if (ctx.arg.pack_dyn_relocs_relr)
construct_relr(ctx);
// Reserve a space for dynamic symbol strings in .dynstr and sort
// .dynsym contents if necessary. Beyond this point, no symbol will
// be added to .dynsym.
ctx.dynsym->finalize(ctx);
// Print reports about undefined symbols, if needed.
if (ctx.arg.unresolved_symbols == UNRESOLVED_ERROR)
report_undef_errors(ctx);
// Fill .gnu.version_d section contents.
if (ctx.verdef)
ctx.verdef->construct(ctx);
// Fill .gnu.version_r section contents.
ctx.verneed->construct(ctx);
// Compute .symtab and .strtab sizes for each file.
create_output_symtab(ctx);
// .eh_frame is a special section from the linker's point of view,
// as its contents are parsed and reconstructed by the linker,
// unlike other sections that are regarded as opaque bytes.
// Here, we construct output .eh_frame contents.
ctx.eh_frame->construct(ctx);
// Handle --gdb-index.
if (ctx.arg.gdb_index)
ctx.gdb_index->construct(ctx);
// If --emit-relocs is given, we'll copy relocation sections from input
// files to an output file.
if (ctx.arg.emit_relocs)
create_reloc_sections(ctx);
// Compute the section header values for all sections.
compute_section_headers(ctx);
// Assign offsets to output sections
i64 filesize = set_osec_offsets(ctx);
// On RISC-V, branches are encode using multiple instructions so
// that they can jump to anywhere in ±2 GiB by default. They may
// be replaced with shorter instruction sequences if destinations
// are close enough. Do this optimization.
if constexpr (is_riscv<E>)
filesize = riscv_resize_sections(ctx);
// At this point, memory layout is fixed.
// Set actual addresses to linker-synthesized symbols.
fix_synthetic_symbols(ctx);
// Beyond this, you can assume that symbol addresses including their
// GOT or PLT addresses have a correct final value.
// If --compress-debug-sections is given, compress .debug_* sections
// using zlib.
if (ctx.arg.compress_debug_sections != COMPRESS_NONE)
filesize = compress_debug_sections(ctx);
// At this point, both memory and file layouts are fixed.
t_before_copy.stop();
// Create an output file
ctx.output_file =
OutputFile<Context<E>>::open(ctx, ctx.arg.output, filesize, 0777);
ctx.buf = ctx.output_file->buf;
Timer t_copy(ctx, "copy");
// Copy input sections to the output file and apply relocations.
copy_chunks(ctx);
// Some part of .gdb_index couldn't be computed until other debug
// sections are complete. We have complete debug sections now, so
// write the rest of .gdb_index.
if (ctx.gdb_index)
ctx.gdb_index->write_address_areas(ctx);
// Dynamic linker works better with sorted .rela.dyn section,
// so we sort them.
ctx.reldyn->sort(ctx);
// Zero-clear paddings between sections
clear_padding(ctx);
// .note.gnu.build-id section contains a cryptographic hash of the
// entire output file. Now that we wrote everything except build-id,
// we can compute it.
if (ctx.buildid)
ctx.buildid->write_buildid(ctx);
t_copy.stop();
ctx.checkpoint();
// Close the output file. This is the end of the linker's main job.
ctx.output_file->close(ctx);
// Handle --dependency-file
if (!ctx.arg.dependency_file.empty())
write_dependency_file(ctx);
if (ctx.has_lto_object)
lto_cleanup(ctx);
t_total.stop();
t_all.stop();
if (ctx.arg.print_map)
print_map(ctx);
// Show stats numbers
if (ctx.arg.stats)
show_stats(ctx);
if (ctx.arg.perf)
print_timer_records(ctx.timer_records);
std::cout << std::flush;
std::cerr << std::flush;
if (on_complete)
on_complete();
release_global_lock(ctx);
if (ctx.arg.quick_exit)
_exit(0);
for (std::function<void()> &fn : ctx.on_exit)
fn();
ctx.checkpoint();
return 0;
}
using E = MOLD_TARGET;
template void read_file(Context<E> &, MappedFile<Context<E>> *);
template MappedFile<Context<E>> *open_library(Context<E> &, std::string);
#ifdef MOLD_X86_64
extern template int elf_main<I386>(int, char **);
extern template int elf_main<ARM32>(int, char **);
extern template int elf_main<ARM64>(int, char **);
extern template int elf_main<RV32BE>(int, char **);
extern template int elf_main<RV32LE>(int, char **);
extern template int elf_main<RV64LE>(int, char **);
extern template int elf_main<RV64BE>(int, char **);
extern template int elf_main<PPC32>(int, char **);
extern template int elf_main<PPC64V1>(int, char **);
extern template int elf_main<PPC64V2>(int, char **);
extern template int elf_main<S390X>(int, char **);
extern template int elf_main<SPARC64>(int, char **);
extern template int elf_main<M68K>(int, char **);
extern template int elf_main<SH4>(int, char **);
extern template int elf_main<ALPHA>(int, char **);
int main(int argc, char **argv) {
return elf_main<X86_64>(argc, argv);
}
#else
template int elf_main<E>(int, char **);
#endif
} // namespace mold::elf

117
third_party/mold/elf/mapfile.cc vendored Normal file
View file

@ -0,0 +1,117 @@
// clang-format off
#include "third_party/mold/elf/mold.h"
#include "third_party/libcxx/fstream"
#include "third_party/libcxx/iomanip"
#include "third_party/libcxx/ios"
#include "third_party/libcxx/sstream"
// MISSING #include <tbb/parallel_for_each.h>
#include "third_party/libcxx/unordered_map"
namespace mold::elf {
template <typename E>
using Map =
tbb::concurrent_hash_map<InputSection<E> *, std::vector<Symbol<E> *>>;
template <typename E>
static std::unique_ptr<std::ofstream> open_output_file(Context<E> &ctx) {
std::unique_ptr<std::ofstream> file(new std::ofstream);
file->open(ctx.arg.Map.c_str());
if (!file->is_open())
Fatal(ctx) << "cannot open " << ctx.arg.Map << ": " << errno_string();
return file;
}
template <typename E>
static Map<E> get_map(Context<E> &ctx) {
Map<E> map;
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
for (Symbol<E> *sym : file->symbols) {
if (sym->file != file || sym->get_type() == STT_SECTION)
continue;
if (InputSection<E> *isec = sym->get_input_section()) {
assert(file == &isec->file);
typename Map<E>::accessor acc;
map.insert(acc, {isec, {}});
acc->second.push_back(sym);
}
}
});
if (map.size() <= 1)
return map;
tbb::parallel_for(map.range(), [](const typename Map<E>::range_type &range) {
for (auto it = range.begin(); it != range.end(); it++) {
std::vector<Symbol<E> *> &vec = it->second;
sort(vec, [](Symbol<E> *a, Symbol<E> *b) { return a->value < b->value; });
}
});
return map;
}
template <typename E>
void print_map(Context<E> &ctx) {
std::ostream *out = &std::cout;
std::unique_ptr<std::ofstream> file;
if (!ctx.arg.Map.empty()) {
file = open_output_file(ctx);
out = file.get();
}
// Construct a section-to-symbol map.
Map<E> map = get_map(ctx);
// Print a mapfile.
*out << " VMA Size Align Out In Symbol\n";
for (Chunk<E> *osec : ctx.chunks) {
*out << std::showbase
<< std::setw(18) << std::hex << (u64)osec->shdr.sh_addr << std::dec
<< std::setw(11) << (u64)osec->shdr.sh_size
<< std::setw(6) << (u64)osec->shdr.sh_addralign
<< " " << osec->name << "\n";
if (osec->kind() != OUTPUT_SECTION)
continue;
std::span<InputSection<E> *> members = ((OutputSection<E> *)osec)->members;
std::vector<std::string> bufs(members.size());
tbb::parallel_for((i64)0, (i64)members.size(), [&](i64 i) {
InputSection<E> *mem = members[i];
std::ostringstream ss;
opt_demangle = ctx.arg.demangle;
u64 addr = osec->shdr.sh_addr + mem->offset;
ss << std::showbase
<< std::setw(18) << std::hex << addr << std::dec
<< std::setw(11) << (u64)mem->sh_size
<< std::setw(6) << (1 << (u64)mem->p2align)
<< " " << *mem << "\n";
typename Map<E>::const_accessor acc;
if (map.find(acc, mem))
for (Symbol<E> *sym : acc->second)
ss << std::showbase
<< std::setw(18) << std::hex << sym->get_addr(ctx) << std::dec
<< " 0 0 "
<< *sym << "\n";
bufs[i] = ss.str();
});
for (std::string &str : bufs)
*out << str;
}
}
using E = MOLD_TARGET;
template void print_map(Context<E> &ctx);
} // namespace mold::elf

171
third_party/mold/elf/mold-wrapper.c vendored Normal file
View file

@ -0,0 +1,171 @@
// clang-format off
#define _GNU_SOURCE 1
#if !defined(__OpenBSD__) && !defined(__FreeBSD__)
#include "libc/mem/alloca.h"
#endif
#include "libc/runtime/dlfcn.h"
#include "libc/calls/weirdtypes.h"
#include "libc/stdio/posix_spawn.h"
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/fmt/fmt.h"
#include "libc/stdio/dprintf.h"
#include "libc/stdio/stdio.h"
#include "libc/stdio/temp.h"
#include "third_party/musl/tempnam.h"
#include "libc/calls/calls.h"
#include "libc/calls/termios.h"
#include "libc/fmt/conv.h"
#include "libc/limits.h"
#include "libc/mem/alg.h"
#include "libc/mem/alloca.h"
#include "libc/mem/mem.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/dprintf.h"
#include "libc/stdio/rand.h"
#include "libc/stdio/temp.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/exit.h"
#include "third_party/getopt/getopt.h"
#include "third_party/musl/crypt.h"
#include "third_party/musl/rand48.h"
#include "libc/mem/alg.h"
#include "libc/mem/mem.h"
#include "libc/str/str.h"
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/runtime/pathconf.h"
#include "libc/runtime/runtime.h"
#include "libc/runtime/sysconf.h"
#include "libc/sysv/consts/f.h"
#include "libc/sysv/consts/fileno.h"
#include "libc/sysv/consts/o.h"
#include "libc/sysv/consts/ok.h"
#include "libc/time/time.h"
#include "third_party/getopt/getopt.h"
#include "third_party/musl/crypt.h"
#include "third_party/musl/lockf.h"
extern char **environ;
static char *get_mold_path() {
char *path = getenv("MOLD_PATH");
if (path)
return path;
fprintf(stderr, "MOLD_PATH is not set\n");
exit(1);
}
static void debug_print(const char *fmt, ...) {
if (!getenv("MOLD_WRAPPER_DEBUG"))
return;
va_list ap;
va_start(ap, fmt);
fprintf(stderr, "mold-wrapper.so: ");
vfprintf(stderr, fmt, ap);
fflush(stderr);
va_end(ap);
}
static int count_args(va_list *ap) {
va_list aq;
va_copy(aq, *ap);
int i = 0;
while (va_arg(aq, char *))
i++;
va_end(aq);
return i;
}
static void copy_args(char **argv, const char *arg0, va_list *ap) {
int i = 1;
char *arg;
while ((arg = va_arg(*ap, char *)))
argv[i++] = arg;
((const char **)argv)[0] = arg0;
((const char **)argv)[i] = NULL;
}
static bool is_ld(const char *path) {
const char *ptr = path + strlen(path);
while (path < ptr && ptr[-1] != '/')
ptr--;
return !strcmp(ptr, "ld") || !strcmp(ptr, "ld.lld") ||
!strcmp(ptr, "ld.gold") || !strcmp(ptr, "ld.bfd") ||
!strcmp(ptr, "ld.mold");
}
int execvpe(const char *file, char *const *argv, char *const *envp) {
debug_print("execvpe %s\n", file);
if (!strcmp(file, "ld") || is_ld(file))
file = get_mold_path();
for (int i = 0; envp[i]; i++)
putenv(envp[i]);
typeof(execvpe) *real = dlsym(RTLD_NEXT, "execvp");
return real(file, argv, environ);
}
int execve(const char *path, char *const *argv, char *const *envp) {
debug_print("execve %s\n", path);
if (is_ld(path))
path = get_mold_path();
typeof(execve) *real = dlsym(RTLD_NEXT, "execve");
return real(path, argv, envp);
}
int execl(const char *path, const char *arg0, ...) {
va_list ap;
va_start(ap, arg0);
char **argv = alloca((count_args(&ap) + 2) * sizeof(char *));
copy_args(argv, arg0, &ap);
va_end(ap);
return execve(path, argv, environ);
}
int execlp(const char *file, const char *arg0, ...) {
va_list ap;
va_start(ap, arg0);
char **argv = alloca((count_args(&ap) + 2) * sizeof(char *));
copy_args(argv, arg0, &ap);
va_end(ap);
return execvpe(file, argv, environ);
}
int execle(const char *path, const char *arg0, ...) {
va_list ap;
va_start(ap, arg0);
char **argv = alloca((count_args(&ap) + 2) * sizeof(char *));
copy_args(argv, arg0, &ap);
char **env = va_arg(ap, char **);
va_end(ap);
return execve(path, argv, env);
}
int execv(const char *path, char *const *argv) {
return execve(path, argv, environ);
}
int execvp(const char *file, char *const *argv) {
return execvpe(file, argv, environ);
}
int posix_spawn(pid_t *pid, const char *path,
const posix_spawn_file_actions_t *file_actions,
const posix_spawnattr_t *attrp,
char *const *argv, char *const *envp) {
debug_print("posix_spawn %s\n", path);
if (is_ld(path))
path = get_mold_path();
typeof(posix_spawn) *real = dlsym(RTLD_NEXT, "posix_spawn");
return real(pid, path, file_actions, attrp, argv, envp);
}

2852
third_party/mold/elf/mold.h vendored Normal file

File diff suppressed because it is too large Load diff

3153
third_party/mold/elf/output-chunks.cc vendored Normal file

File diff suppressed because it is too large Load diff

2653
third_party/mold/elf/passes.cc vendored Normal file

File diff suppressed because it is too large Load diff

198
third_party/mold/elf/relocatable.cc vendored Normal file
View file

@ -0,0 +1,198 @@
// clang-format off
// This file implements -r or --relocatable. That option forces the linker
// to combine input object files into another single large object file.
// Since the behavior of the linker when the option is given is quite
// different from that of the normal execution mode, we separate code for
// the feature into this separate file.
//
// The --relocatable option isn't used very often. After all, if you want
// to combine object files into a single file, you could use `ar`.
// However, some programs use it in a creative manner which is hard to be
// substituted with static archives, so we need to support this option in
// the same way as GNU ld does. A notable example is GHC (Glasgow Haskell
// Compiler). GHC has its own dynamic linker which can load a .o file (as
// opposed to a .so) into memory. GHC's module is not a shared object file
// but a combined object file.
//
// There are many different ways to combine object files into a single file.
// The simplest approach would be to just copy all sections from input files
// to an output file as-is with a few exceptions for singleton sections such
// as the symbol table or the string table. That works, but that's not
// compatible with GNU ld.
//
// To be compatible with GNU ld, we need to do the followings:
//
// - Regular sections containing opaque data (e.g. ".text" or ".data")
// are just copied as-is. Two sections with the same name are merged.
//
// - .symtab, .strtab and .shstrtab are merged.
//
// - COMDAT groups are uniquified.
//
// - Relocations are copied, but we need to fix symbol indices.
#include "third_party/mold/elf/mold.h"
// MISSING #include <tbb/parallel_for.h>
// MISSING #include <tbb/parallel_for_each.h>
namespace mold::elf {
// Create linker-synthesized sections
template <typename E>
static void r_create_synthetic_sections(Context<E> &ctx) {
auto push = [&]<typename T>(T *x) {
ctx.chunks.push_back(x);
ctx.chunk_pool.emplace_back(x);
return x;
};
ctx.ehdr = push(new OutputEhdr<E>(0));
ctx.shdr = push(new OutputShdr<E>);
ctx.eh_frame = push(new EhFrameSection<E>);
ctx.eh_frame_reloc = push(new EhFrameRelocSection<E>);
ctx.strtab = push(new StrtabSection<E>);
ctx.symtab = push(new SymtabSection<E>);
ctx.shstrtab = push(new ShstrtabSection<E>);
ctx.note_property = push(new NotePropertySection<E>);
}
// Create SHT_GROUP (i.e. comdat group) sections. We uniquify comdat
// sections by signature. We want to propagate input comdat groups as
// output comdat groups if they are still alive after uniquification.
template <typename E>
static void create_comdat_group_sections(Context<E> &ctx) {
Timer t(ctx, "create_comdat_group_sections");
std::vector<std::vector<Chunk<E> *>> vec{ctx.objs.size()};
tbb::parallel_for((i64)0, (i64)ctx.objs.size(), [&](i64 i) {
ObjectFile<E> &file = *ctx.objs[i];
for (ComdatGroupRef<E> &ref : file.comdat_groups) {
if (ref.group->owner != file.priority)
continue;
Symbol<E> *sym = file.symbols[file.elf_sections[ref.sect_idx].sh_info];
assert(sym);
std::vector<Chunk<E> *> members;
for (u32 j : ref.members) {
const ElfShdr<E> &shdr = file.elf_sections[j];
if (shdr.sh_type == (E::is_rela ? SHT_RELA : SHT_REL)) {
InputSection<E> &isec = *file.sections[shdr.sh_info];
members.push_back(isec.output_section->reloc_sec.get());
} else {
InputSection<E> &isec = *file.sections[j];
members.push_back(isec.output_section);
}
}
vec[i].push_back(new ComdatGroupSection<E>(*sym, std::move(members)));
}
});
for (std::vector<Chunk<E> *> &vec2 : vec) {
for (Chunk<E> *chunk : vec2) {
ctx.chunks.push_back(chunk);
ctx.chunk_pool.emplace_back(chunk);
}
}
}
// Unresolved undefined symbols in the -r mode are simply propagated to an
// output file as undefined symbols. This function guarantees that
// unresolved undefined symbols belongs to some input file.
template <typename E>
static void r_claim_unresolved_symbols(Context<E> &ctx) {
Timer t(ctx, "r_claim_unresolved_symbols");
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
if (!file->is_alive)
return;
for (i64 i = file->first_global; i < file->elf_syms.size(); i++) {
const ElfSym<E> &esym = file->elf_syms[i];
Symbol<E> &sym = *file->symbols[i];
if (!esym.is_undef())
continue;
std::scoped_lock lock(sym.mu);
if (sym.file &&
(!sym.esym().is_undef() || sym.file->priority <= file->priority))
continue;
sym.file = file;
sym.origin = 0;
sym.value = 0;
sym.sym_idx = i;
}
});
}
// Set output section in-file offsets. Output section memory addresses
// are left as zero.
template <typename E>
static u64 r_set_osec_offsets(Context<E> &ctx) {
u64 offset = 0;
for (Chunk<E> *chunk : ctx.chunks) {
offset = align_to(offset, chunk->shdr.sh_addralign);
chunk->shdr.sh_offset = offset;
offset += chunk->shdr.sh_size;
}
return offset;
}
template <typename E>
void combine_objects(Context<E> &ctx) {
compute_merged_section_sizes(ctx);
create_output_sections(ctx);
r_create_synthetic_sections(ctx);
r_claim_unresolved_symbols(ctx);
compute_section_sizes(ctx);
sort_output_sections(ctx);
create_output_symtab(ctx);
ctx.eh_frame->construct(ctx);
create_reloc_sections(ctx);
create_comdat_group_sections(ctx);
compute_section_headers(ctx);
i64 filesize = r_set_osec_offsets(ctx);
ctx.output_file =
OutputFile<Context<E>>::open(ctx, ctx.arg.output, filesize, 0666);
ctx.buf = ctx.output_file->buf;
copy_chunks(ctx);
clear_padding(ctx);
ctx.output_file->close(ctx);
ctx.checkpoint();
if (ctx.arg.print_map)
print_map(ctx);
if (ctx.arg.stats)
show_stats(ctx);
if (ctx.arg.perf)
print_timer_records(ctx.timer_records);
if (ctx.arg.quick_exit)
_exit(0);
}
using E = MOLD_TARGET;
template void combine_objects(Context<E> &);
} // namespace mold::elf

166
third_party/mold/elf/subprocess.cc vendored Normal file
View file

@ -0,0 +1,166 @@
// clang-format off
#if !defined(_WIN32) && !defined(__APPLE__)
#include "third_party/mold/elf/mold.h"
// MISSING #include "config.h"
#include "third_party/libcxx/filesystem"
#include "libc/calls/calls.h"
#include "libc/calls/sigtimedwait.h"
#include "libc/calls/struct/sigaction.h"
#include "libc/calls/struct/siginfo.h"
#include "libc/sysv/consts/sa.h"
#include "libc/sysv/consts/sicode.h"
#include "libc/sysv/consts/ss.h"
#include "libc/calls/calls.h"
#include "libc/calls/struct/stat.h"
#include "libc/calls/struct/stat.macros.h"
#include "libc/calls/struct/timespec.h"
#include "libc/calls/weirdtypes.h"
#include "libc/sysv/consts/s.h"
#include "libc/sysv/consts/utime.h"
#include "libc/time/time.h"
#include "libc/calls/struct/itimerval.h"
#include "libc/calls/struct/timeval.h"
#include "libc/calls/weirdtypes.h"
#include "libc/sock/select.h"
#include "libc/sysv/consts/clock.h"
#include "libc/sysv/consts/itimer.h"
#include "libc/time/struct/timezone.h"
#include "libc/time/time.h"
#include "libc/calls/makedev.h"
#include "libc/calls/weirdtypes.h"
#include "libc/thread/thread.h"
#include "libc/calls/typedef/u.h"
#include "libc/calls/weirdtypes.h"
#include "libc/intrin/newbie.h"
#include "libc/sock/select.h"
#include "libc/sysv/consts/endian.h"
#include "libc/calls/calls.h"
#include "libc/calls/struct/siginfo.h"
#include "libc/calls/weirdtypes.h"
#include "libc/sysv/consts/w.h"
#include "libc/sysv/consts/waitid.h"
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/runtime/pathconf.h"
#include "libc/runtime/runtime.h"
#include "libc/runtime/sysconf.h"
#include "libc/sysv/consts/f.h"
#include "libc/sysv/consts/fileno.h"
#include "libc/sysv/consts/o.h"
#include "libc/sysv/consts/ok.h"
#include "libc/time/time.h"
#include "third_party/getopt/getopt.h"
#include "third_party/musl/crypt.h"
#include "third_party/musl/lockf.h"
namespace mold::elf {
#ifdef MOLD_X86_64
// Exiting from a program with large memory usage is slow --
// it may take a few hundred milliseconds. To hide the latency,
// we fork a child and let it do the actual linking work.
std::function<void()> fork_child() {
int pipefd[2];
if (pipe(pipefd) == -1) {
perror("pipe");
exit(1);
}
pid_t pid = fork();
if (pid == -1) {
perror("fork");
exit(1);
}
if (pid > 0) {
// Parent
close(pipefd[1]);
char buf[1];
if (read(pipefd[0], buf, 1) == 1)
_exit(0);
int status;
waitpid(pid, &status, 0);
if (WIFEXITED(status))
_exit(WEXITSTATUS(status));
if (WIFSIGNALED(status))
raise(WTERMSIG(status));
_exit(1);
}
// Child
close(pipefd[0]);
return [=] {
char buf[] = {1};
[[maybe_unused]] int n = write(pipefd[1], buf, 1);
assert(n == 1);
};
}
#endif
template <typename E>
static std::string find_dso(Context<E> &ctx, std::filesystem::path self) {
// Look for mold-wrapper.so from the same directory as the executable is.
std::filesystem::path path = self.parent_path() / "mold-wrapper.so";
std::error_code ec;
if (std::filesystem::is_regular_file(path, ec) && !ec)
return path;
// If not found, search $(MOLD_LIBDIR)/mold, which is /usr/local/lib/mold
// by default.
path = MOLD_LIBDIR "/mold/mold-wrapper.so";
if (std::filesystem::is_regular_file(path, ec) && !ec)
return path;
// Look for ../lib/mold/mold-wrapper.so
path = self.parent_path() / "../lib/mold/mold-wrapper.so";
if (std::filesystem::is_regular_file(path, ec) && !ec)
return path;
Fatal(ctx) << "mold-wrapper.so is missing";
}
template <typename E>
[[noreturn]]
void process_run_subcommand(Context<E> &ctx, int argc, char **argv) {
assert(argv[1] == "-run"s || argv[1] == "--run"s);
if (!argv[2])
Fatal(ctx) << "-run: argument missing";
// Get the mold-wrapper.so path
std::string self = get_self_path();
std::string dso_path = find_dso(ctx, self);
// Set environment variables
putenv(strdup(("LD_PRELOAD=" + dso_path).c_str()));
putenv(strdup(("MOLD_PATH=" + self).c_str()));
// If ld, ld.lld or ld.gold is specified, run mold itself
if (std::string cmd = filepath(argv[2]).filename();
cmd == "ld" || cmd == "ld.lld" || cmd == "ld.gold") {
std::vector<char *> args;
args.push_back(argv[0]);
args.insert(args.end(), argv + 3, argv + argc);
args.push_back(nullptr);
execv(self.c_str(), args.data());
Fatal(ctx) << "mold -run failed: " << self << ": " << errno_string();
}
// Execute a given command
execvp(argv[2], argv + 2);
Fatal(ctx) << "mold -run failed: " << argv[2] << ": " << errno_string();
}
using E = MOLD_TARGET;
template void process_run_subcommand(Context<E> &, int, char **);
} // namespace mold::elf
#endif

318
third_party/mold/elf/thunks.cc vendored Normal file
View file

@ -0,0 +1,318 @@
// clang-format off
// RISC instructions are usually up to 4 bytes long, so the immediates of
// their branch instructions are naturally smaller than 32 bits. This is
// contrary to x86-64 on which branch instructions take 4 bytes immediates
// and can jump to anywhere within PC ± 2 GiB.
//
// In fact, ARM32's branch instructions can jump only within ±16 MiB and
// ARM64's ±128 MiB, for example. If a branch target is further than that,
// we need to let it branch to a linker-synthesized code sequence that
// construct a full 32 bit address in a register and jump there. That
// linker-synthesized code is called "thunk".
//
// The function in this file creates thunks.
//
// Note that although thunks play an important role in an executable, they
// don't take up too much space in it. For example, among the clang-16's
// text segment whose size is ~300 MiB on ARM64, thunks in total occupy
// only ~30 KiB or 0.01%. Of course the number depends on an ISA; we would
// need more thunks on ARM32 whose branch range is shorter than ARM64.
// That said, the total size of thunks still isn't that much. Therefore,
// we don't need to try too hard to reduce thunk size to the absolute
// minimum.
#if MOLD_ARM32 || MOLD_ARM64 || MOLD_PPC32 || MOLD_PPC64V1 || MOLD_PPC64V2
#include "third_party/mold/elf/mold.h"
// MISSING #include <tbb/parallel_for.h>
// MISSING #include <tbb/parallel_for_each.h>
namespace mold::elf {
// Returns a branch reach in bytes for a given target.
template <typename E>
static consteval i64 max_distance() {
// ARM64's branch has 26 bits immediate. The immediate is padded with
// implicit two-bit zeros because all instructions are 4 bytes aligned
// and therefore the least two bits are always zero. So the branch
// operand is effectively 28 bits long. That means the branch range is
// [-2^27, 2^27) or PC ± 128 MiB.
if (is_arm64<E>)
return 1 << 27;
// ARM32's Thumb branch has 24 bits immediate, and the instructions are
// aligned to 2, so it's effectively 25 bits. It's [-2^24, 2^24) or PC ±
// 16 MiB.
//
// ARM32's non-Thumb branches have twice longer range than its Thumb
// counterparts, but we conservatively use the Thumb's limitation.
if (is_arm32<E>)
return 1 << 24;
// PPC's branch has 24 bits immediate, and the instructions are aligned
// to 4, therefore the reach is [-2^25, 2^25) or PC ± 32 MiB.
assert(is_ppc<E>);
return 1 << 25;
}
// We create thunks for each 12.8/1.6/3.2 MiB code block for
// ARM64/ARM32/PPC, respectively.
template <typename E>
static constexpr i64 batch_size = max_distance<E>() / 10;
// We assume that a single thunk group is smaller than 100 KiB.
static constexpr i64 max_thunk_size = 102400;
// Returns true if a given relocation is of type used for function calls.
template <typename E>
static bool needs_thunk_rel(const ElfRel<E> &r) {
u32 ty = r.r_type;
if constexpr (is_arm64<E>) {
return ty == R_AARCH64_JUMP26 || ty == R_AARCH64_CALL26;
} else if constexpr (is_arm32<E>) {
return ty == R_ARM_JUMP24 || ty == R_ARM_THM_JUMP24 ||
ty == R_ARM_CALL || ty == R_ARM_THM_CALL ||
ty == R_ARM_PLT32;
} else if constexpr (is_ppc32<E>) {
return ty == R_PPC_REL24 || ty == R_PPC_PLTREL24 || ty == R_PPC_LOCAL24PC;
} else {
static_assert(is_ppc64<E>);
return ty == R_PPC64_REL24 || ty == R_PPC64_REL24_NOTOC;
}
}
template <typename E>
static bool is_reachable(Context<E> &ctx, InputSection<E> &isec,
Symbol<E> &sym, const ElfRel<E> &rel) {
// We create thunks with a pessimistic assumption that all
// out-of-section relocations would be out-of-range.
InputSection<E> *isec2 = sym.get_input_section();
if (!isec2 || isec.output_section != isec2->output_section)
return false;
// Even if the target is the same section, we branch to its PLT
// if it has one. So a symbol with a PLT is also considered an
// out-of-section reference.
if (sym.has_plt(ctx))
return false;
// If the target section is in the same output section but
// hasn't got any address yet, that's unreacahble.
if (isec2->offset == -1)
return false;
// Thumb and ARM B instructions cannot be converted to BX, so we
// always have to make them jump to a thunk to switch processor mode
// even if their destinations are within their ranges.
if constexpr (is_arm32<E>) {
bool is_thumb = sym.get_addr(ctx) & 1;
if ((rel.r_type == R_ARM_THM_JUMP24 && !is_thumb) ||
(rel.r_type == R_ARM_JUMP24 && is_thumb) ||
(rel.r_type == R_ARM_PLT32 && is_thumb))
return false;
}
// PowerPC before Power9 lacks PC-relative load/store instructions.
// Functions compiled for Power9 or earlier assume that r2 points to
// GOT+0x8000, while those for Power10 uses r2 as a scratch register.
// We need to a thunk to recompute r2 for interworking.
if constexpr (is_ppc64v2<E>) {
if (rel.r_type == R_PPC64_REL24 && !sym.esym().preserves_r2())
return false;
if (rel.r_type == R_PPC64_REL24_NOTOC && sym.esym().uses_toc())
return false;
}
// Compute a distance between the relocated place and the symbol
// and check if they are within reach.
i64 S = sym.get_addr(ctx, NO_OPD);
i64 A = get_addend(isec, rel);
i64 P = isec.get_addr() + rel.r_offset;
i64 val = S + A - P;
return -max_distance<E>() <= val && val < max_distance<E>();
}
template <typename E>
static void reset_thunk(RangeExtensionThunk<E> &thunk) {
for (Symbol<E> *sym : thunk.symbols) {
sym->extra.thunk_idx = -1;
sym->extra.thunk_sym_idx = -1;
sym->flags = 0;
}
}
// Scan relocations to collect symbols that need thunks.
template <typename E>
static void scan_rels(Context<E> &ctx, InputSection<E> &isec,
RangeExtensionThunk<E> &thunk) {
std::span<const ElfRel<E>> rels = isec.get_rels(ctx);
std::vector<RangeExtensionRef> &range_extn = isec.extra.range_extn;
range_extn.resize(rels.size());
for (i64 i = 0; i < rels.size(); i++) {
const ElfRel<E> &rel = rels[i];
if (!needs_thunk_rel(rel))
continue;
// Skip if the symbol is undefined. apply_reloc() will report an error.
Symbol<E> &sym = *isec.file.symbols[rel.r_sym];
if (!sym.file)
continue;
// Skip if the destination is within reach.
if (is_reachable(ctx, isec, sym, rel))
continue;
// This relocation needs a thunk. If the symbol is already in a
// previous thunk, reuse it.
if (sym.extra.thunk_idx != -1) {
range_extn[i].thunk_idx = sym.extra.thunk_idx;
range_extn[i].sym_idx = sym.extra.thunk_sym_idx;
continue;
}
// Otherwise, add the symbol to the current thunk if it's not
// added already.
range_extn[i].thunk_idx = thunk.thunk_idx;
range_extn[i].sym_idx = -1;
if (sym.flags.exchange(-1) == 0) {
std::scoped_lock lock(thunk.mu);
thunk.symbols.push_back(&sym);
}
}
}
template <typename E>
void create_range_extension_thunks(Context<E> &ctx, OutputSection<E> &osec) {
std::span<InputSection<E> *> m = osec.members;
if (m.empty())
return;
m[0]->offset = 0;
// Initialize input sections with a dummy offset so that we can
// distinguish sections that have got an address with the one who
// haven't.
tbb::parallel_for((i64)1, (i64)m.size(), [&](i64 i) {
m[i]->offset = -1;
});
// We create thunks from the beginning of the section to the end.
// We manage progress using four offsets which increase monotonically.
// The locations they point to are always A <= B <= C <= D.
//
// Input sections between B and C are in the current batch.
//
// A is the input section with the smallest address than can reach
// anywhere from the current batch.
//
// D is the input section with the largest address such that the thunk
// is reachable from the current batch if it's inserted right before D.
//
// ................................ <input sections> ............
// A B C D
// ^ We insert a thunk for the current batch just before D
// <---> The current batch, which is smaller than batch_size
// <--------> Smaller than max_distance
// <--------> Smaller than max_distance
// <-------------> Reachable from the current batch
i64 a = 0;
i64 b = 0;
i64 c = 0;
i64 d = 0;
i64 offset = 0;
i64 thunk_idx = 0;
while (b < m.size()) {
// Move D foward as far as we can jump from B to anywhere in a thunk at D.
while (d < m.size() &&
align_to(offset, 1 << m[d]->p2align) + m[d]->sh_size + max_thunk_size <
m[b]->offset + max_distance<E>()) {
offset = align_to(offset, 1 << m[d]->p2align);
m[d]->offset = offset;
offset += m[d]->sh_size;
d++;
}
// Move C forward so that C is apart from B by BATCH_SIZE. We want
// to make sure that there's at least one section between B and C
// to ensure progress.
c = b + 1;
while (c < m.size() &&
m[c]->offset + m[c]->sh_size < m[b]->offset + batch_size<E>)
c++;
// Move A forward so that A is reachable from C.
i64 c_offset = (c == m.size()) ? offset : m[c]->offset;
while (a < m.size() && m[a]->offset + max_distance<E>() < c_offset)
a++;
// Erase references to out-of-range thunks.
while (thunk_idx < osec.thunks.size() &&
osec.thunks[thunk_idx]->offset < m[a]->offset)
reset_thunk(*osec.thunks[thunk_idx++]);
// Create a thunk for input sections between B and C and place it at D.
offset = align_to(offset, RangeExtensionThunk<E>::alignment);
RangeExtensionThunk<E> *thunk =
new RangeExtensionThunk<E>(osec, osec.thunks.size(), offset);
osec.thunks.emplace_back(thunk);
// Scan relocations between B and C to collect symbols that need thunks.
tbb::parallel_for_each(m.begin() + b, m.begin() + c,
[&](InputSection<E> *isec) {
scan_rels(ctx, *isec, *thunk);
});
// Now that we know the number of symbols in the thunk, we can compute
// its size.
assert(thunk->size() < max_thunk_size);
offset += thunk->size();
// Sort symbols added to the thunk to make the output deterministic.
sort(thunk->symbols, [](Symbol<E> *a, Symbol<E> *b) {
return std::tuple{a->file->priority, a->sym_idx} <
std::tuple{b->file->priority, b->sym_idx};
});
// Assign offsets within the thunk to the symbols.
for (i64 i = 0; i < thunk->symbols.size(); i++) {
Symbol<E> &sym = *thunk->symbols[i];
sym.extra.thunk_idx = thunk->thunk_idx;
sym.extra.thunk_sym_idx = i;
}
// Scan relocations again to fix symbol offsets in the last thunk.
tbb::parallel_for_each(m.begin() + b, m.begin() + c,
[&](InputSection<E> *isec) {
std::span<Symbol<E> *> syms = isec->file.symbols;
std::span<const ElfRel<E>> rels = isec->get_rels(ctx);
std::span<RangeExtensionRef> range_extn = isec->extra.range_extn;
for (i64 i = 0; i < rels.size(); i++)
if (range_extn[i].thunk_idx == thunk->thunk_idx)
range_extn[i].sym_idx = syms[rels[i].r_sym]->extra.thunk_sym_idx;
});
// Move B forward to point to the begining of the next batch.
b = c;
}
while (thunk_idx < osec.thunks.size())
reset_thunk(*osec.thunks[thunk_idx++]);
osec.shdr.sh_size = offset;
}
using E = MOLD_TARGET;
static_assert(max_thunk_size / E::thunk_size < INT16_MAX);
template void create_range_extension_thunks(Context<E> &, OutputSection<E> &);
} // namespace mold::elf
#endif

215
third_party/mold/elf/tls.cc vendored Normal file
View file

@ -0,0 +1,215 @@
// clang-format off
// This file contains helper functions for thread-local storage (TLS).
// TLS is probably the most obscure feature the linker has to support,
// so I'll explain it in detail in this comment.
//
// TLS is a per-thread storage. Thread-local variables (TLVs) are in a TLS
// so that each thread has its own set of thread-local variables. Taking
// an address of a TLV returns a unique value for each thread. For example,
// `&foo` for the following code returns different pointer values for
// different threads.
//
// thread_local int foo;
//
// TLV is a relatively new feature. C for example didn't provide the
// official support for it through the keyword `thread_local` until C11.
// TLV needs a coordination between the compiler, the linker and the
// runtime to work correctly.
//
// An ELF exectuable or a shared library using TLV contains a "TLS template
// image" in the PT_TLS segment. For each newly created thread including the
// initial one, the runtime allocates a contiguous memory for an executable
// and its depending shared libraries and copies template images there. That
// per-thread memory is called the "TLS block". After allocating and
// initializing a TLS block, the runtime sets a register to refer to the TLS
// block, so that the thread-local variables are accessible relative to the
// register.
//
// The register referring to the per-thread storage is called the Thread
// Pointer (TP). TP is part of the thread's context. When the kernel
// scheduler switches threads, TP is saved and restored automatically just
// like other registers are.
//
// The TLS template image is read-only. It contains TLVs' initial values
// for new threads, and no one writes to it at runtime.
//
// Now, let's think about how to access a TLV. We need to know the TLV's
// address to access it which can be done in several different ways as
// follows:
//
// 1. If we are creating an executable, we know the exact size of the TLS
// template image we are creating, and we know where the TP will be
// set to after the template is copied to the TLS block. Therefore,
// the TP-relative address of a TLV in the main executable is known at
// link-time. That means, computing a TLV's address can be as easy as
// `add %dst, %tp, <link-time constant>`.
//
// 2. If we are creating a shared library, we don't exactly know where
// its TLS template image will be copied to in terms of the
// TP-relative address, because we don't know how large is the main
// executable's and other libraries' TLS template images are. Only the
// runtime knows the exact TP-relative address.
//
// We can solve the problem with an indirection. Specifically, for
// each TLV whose TP-relative address is only known at process startup
// time, we create a GOT entry to store its TP-relative address. We
// also emit a dynamic relocation to let the runtime to fill the GOT
// entry with a TP-relative address.
//
// Computing a TLV address in this scheme needs at least two machine
// instructions in most ISAs; first instruction loads a value from a
// GOT entry, and the second one adds the loaded value to TP.
//
// 3. Now, think about libraries that you dynamically load with dlopen.
// The TLS block for such library has to be allocated separately from
// the initial TLS block, so we now have two or more discontiguous
// TLS blocks. There's no easy formula to compute an address of a TLV
// in a separate TLS block.
//
// The address of a TLV in a separate TLS block can be obtained by
// calling a libc-provided function, __tls_get_addr(). The function
// takes two arguments; a module ID to identify the ELF file and the
// TLV's offset within the ELF file's TLS template image. Accessing a
// TLV is sometimes compiled to a function call! The module ID and the
// offset are usually stored to GOT as two consecutive words.
//
// The last access method is the most generic, so the compiler emits such
// code by default. But that's the most expensive one, so the linker
// rewrites instructions if possible so that 3) is relaxed to 2) or even
// to 1).
//
// 1) is called the Local Exec access model. 2) is Initial Exec, and 3) is
// General Dynamic.
//
// There's another little trick that the compiler can use if it knows two
// TLVs are in the same ELF file (usually in the same file as the code is).
// In this case, we can call __tls_get_addr() only once with a module ID and
// the offset 0 to obtain the base address of the ELF file's TLS block. The
// base address obtained this way is sometimes called Dynamic Thread Pointer
// or DTP. We can then compute TLVs' addresses by adding their DTP-relative
// addresses to DTP. This access model is called the Local Dynamic.
//
//
// === TLS Descriptor access model ===
//
// As described above, there are arguably too many different TLS access
// models from the most generic one you can use in any ELF file to the most
// efficient one you can use only when building a main executable. Compiling
// source code with an appropriate TLS access model is bothersome. To solve
// the problem, a new TLS access model was proposed. That is called the TLS
// Descriptor (TLSDESC) model.
//
// For a TLV compiled with TLSDESC, we allocate two consecutive GOT slots
// and create a TLSDESC dynamic relocation for them. The dynamic linker
// sets a function pointer to the first GOT slot and its argument to the
// second slot.
//
// To access the TLV, we call the function pointer with the argument we
// read from the second GOT slot. The function returns the TLV's
// TP-relative address.
//
// The runtime chooses the best access method depending on the situation
// and sets a pointer to the most efficient code to the first GOT slot.
// For example, if a TLV's TP-relative address is known at process startup
// time, the runtime sets that address to the second GOT slot and set a
// function that just returns its argument to the first GOT slot.
//
// With TLSDECS, the compiler can always emit the same code for TLVs
// without sacrificing runtime performance.
//
// TLSDESC is better than the traditional, non-TLSDESC TLS access models.
// It's the default on ARM64, but on other targets, TLSDESC is
// unfortunately either optional or even not supported at all. So we still
// need to support both the traditional TLS models and the TLSDESC model.
#include "third_party/mold/elf/mold.h"
namespace mold::elf {
template <typename E>
static ElfPhdr<E> *get_tls_segment(Context<E> &ctx) {
if (ctx.phdr)
for (ElfPhdr<E> &phdr : ctx.phdr->phdrs)
if (phdr.p_type == PT_TLS)
return &phdr;
return nullptr;
}
template <typename E>
u64 get_tls_begin(Context<E> &ctx) {
if (ElfPhdr<E> *phdr = get_tls_segment(ctx))
return phdr->p_vaddr;
return 0;
}
// Returns the TP address which can be used for efficient TLV accesses in
// the main executable. TP at runtime refers to a per-process TLS block
// whose address is not known at link-time. So the address returned from
// this function is the TP if the TLS template image were a TLS block.
template <typename E>
u64 get_tp_addr(Context<E> &ctx) {
ElfPhdr<E> *phdr = get_tls_segment(ctx);
if (!phdr)
return 0;
// On x86, SPARC and s390x, TP (%gs on i386, %fs on x86-64, %g7 on SPARC
// and %a0/%a1 on s390x) refers to past the end of the TLS block for
// historical reasons. TLVs are accessed with negative offsets from TP.
if constexpr (is_x86<E> || is_sparc<E> || is_s390x<E>)
return align_to(phdr->p_vaddr + phdr->p_memsz, phdr->p_align);
// On ARM, SH4 and Alpha, the runtime appends two words at the beginning
// of TLV template image when copying TLVs to the TLS block, so we need
// to offset it.
if constexpr (is_arm<E> || is_sh4<E> || is_alpha<E>)
return align_down(phdr->p_vaddr - sizeof(Word<E>) * 2, phdr->p_align);
// On PPC and m68k, TP is 0x7000 (28 KiB) past the beginning of the TLV
// block to maximize the addressable range for load/store instructions
// with 16-bits signed immediates. It's not exactly 0x8000 (32 KiB) off
// because there's a small implementation-defined piece of data before
// the TLV block, and the runtime wants to access them efficiently too.
if constexpr (is_ppc<E> || is_m68k<E>)
return phdr->p_vaddr + 0x7000;
// RISC-V just uses the beginning of the main executable's TLV block as
// TP. RISC-V load/store instructions usually take 12-bits signed
// immediates, so the beginning of TLV ± 2 KiB is accessible with a
// single load/store instruction.
assert(is_riscv<E>);
return phdr->p_vaddr;
}
// Returns the address __tls_get_addr() would return if it's called
// with offset 0.
template <typename E>
u64 get_dtp_addr(Context<E> &ctx) {
ElfPhdr<E> *phdr = get_tls_segment(ctx);
if (!phdr)
return 0;
// On PPC64 and m68k, R_DTPOFF is resolved to the address 0x8000 (32
// KiB) past the start of the TLS block. The bias maximizes the
// accessible range for load/store instructions with 16-bits signed
// immediates. That is, if the offset were right at the beginning of
// the start of the TLS block, the half of addressible space (negative
// immediates) would have been wasted.
if constexpr (is_ppc<E> || is_m68k<E>)
return phdr->p_vaddr + 0x8000;
// On RISC-V, the bias is 0x800 as the load/store instructions in the
// ISA usually have a 12-bit immediate.
if constexpr (is_riscv<E>)
return phdr->p_vaddr + 0x800;
// On other targets, DTP simply refers to the beginning of the TLS block.
return phdr->p_vaddr;
}
using E = MOLD_TARGET;
template u64 get_tls_begin<E>(Context<E> &);
template u64 get_tp_addr<E>(Context<E> &);
template u64 get_dtp_addr<E>(Context<E> &);
} // namespace mold::elf

15
third_party/mold/fake_tbb.h vendored Normal file
View file

@ -0,0 +1,15 @@
#ifndef __TBB_FAKE_H
#define __TBB_FAKE_H
namespace tbb {
template<typename InputIterator, typename Function>
void parallel_for_each(InputIterator first, InputIterator last, const Function& f) {
}
template<typename Range, typename Body>
void parallel_for_each(Range& rng, const Body& body) {
}
}
#endif

37
third_party/mold/filepath.cc vendored Normal file
View file

@ -0,0 +1,37 @@
// clang-format off
#include "third_party/mold/common.h"
#include "third_party/libcxx/filesystem"
#include "libc/calls/calls.h"
#include "libc/calls/struct/stat.h"
#include "libc/calls/struct/stat.macros.h"
#include "libc/calls/struct/timespec.h"
#include "libc/calls/weirdtypes.h"
#include "libc/sysv/consts/s.h"
#include "libc/sysv/consts/utime.h"
#include "libc/time/time.h"
namespace mold {
std::string get_realpath(std::string_view path) {
std::error_code ec;
std::filesystem::path link = std::filesystem::read_symlink(path, ec);
if (ec)
return std::string(path);
return (filepath(path) / ".." / link).lexically_normal().string();
}
// Removes redundant '/..' or '/.' from a given path.
// The transformation is done purely by lexical processing.
// This function does not access file system.
std::string path_clean(std::string_view path) {
return filepath(path).lexically_normal().string();
}
std::filesystem::path to_abs_path(std::filesystem::path path) {
if (path.is_absolute())
return path.lexically_normal();
return (std::filesystem::current_path() / path).lexically_normal();
}
} // namespace mold

194
third_party/mold/filetype.h vendored Normal file
View file

@ -0,0 +1,194 @@
// clang-format off
#pragma once
#include "third_party/mold/common.h"
// MISSING #include "../elf/elf.h"
namespace mold {
enum class FileType {
UNKNOWN,
EMPTY,
ELF_OBJ,
ELF_DSO,
MACH_OBJ,
MACH_EXE,
MACH_DYLIB,
MACH_BUNDLE,
MACH_UNIVERSAL,
AR,
THIN_AR,
TAPI,
TEXT,
GCC_LTO_OBJ,
LLVM_BITCODE,
};
template <typename MappedFile>
bool is_text_file(MappedFile *mf) {
u8 *data = mf->data;
return mf->size >= 4 && isprint(data[0]) && isprint(data[1]) &&
isprint(data[2]) && isprint(data[3]);
}
template <typename E, typename Context, typename MappedFile>
inline bool is_gcc_lto_obj(Context &ctx, MappedFile *mf) {
using namespace mold::elf;
const char *data = mf->get_contents().data();
ElfEhdr<E> &ehdr = *(ElfEhdr<E> *)data;
ElfShdr<E> *sh_begin = (ElfShdr<E> *)(data + ehdr.e_shoff);
std::span<ElfShdr<E>> shdrs{(ElfShdr<E> *)(data + ehdr.e_shoff), ehdr.e_shnum};
// e_shstrndx is a 16-bit field. If .shstrtab's section index is
// too large, the actual number is stored to sh_link field.
i64 shstrtab_idx = (ehdr.e_shstrndx == SHN_XINDEX)
? sh_begin->sh_link : ehdr.e_shstrndx;
for (ElfShdr<E> &sec : shdrs) {
// GCC FAT LTO objects contain both regular ELF sections and GCC-
// specific LTO sections, so that they can be linked as LTO objects if
// the LTO linker plugin is available and falls back as regular
// objects otherwise. GCC FAT LTO object can be identified by the
// presence of `.gcc.lto_.symtab` section.
if (!ctx.arg.plugin.empty()) {
std::string_view name = data + shdrs[shstrtab_idx].sh_offset + sec.sh_name;
if (name.starts_with(".gnu.lto_.symtab."))
return true;
}
if (sec.sh_type != SHT_SYMTAB)
continue;
// GCC non-FAT LTO object contains only sections symbols followed by
// a common symbol whose name is `__gnu_lto_slim` (or `__gnu_lto_v1`
// for older GCC releases).
std::span<ElfSym<E>> elf_syms{(ElfSym<E> *)(data + sec.sh_offset),
(size_t)sec.sh_size / sizeof(ElfSym<E>)};
auto skip = [](u8 type) {
return type == STT_NOTYPE || type == STT_FILE || type == STT_SECTION;
};
i64 i = 1;
while (i < elf_syms.size() && skip(elf_syms[i].st_type))
i++;
if (i < elf_syms.size() && elf_syms[i].st_shndx == SHN_COMMON) {
std::string_view name =
data + shdrs[sec.sh_link].sh_offset + elf_syms[i].st_name;
if (name.starts_with("__gnu_lto_"))
return true;
}
break;
}
return false;
}
template <typename Context, typename MappedFile>
FileType get_file_type(Context &ctx, MappedFile *mf) {
using namespace elf;
std::string_view data = mf->get_contents();
if (data.empty())
return FileType::EMPTY;
if (data.starts_with("\177ELF")) {
u8 byte_order = ((ElfEhdr<I386> *)data.data())->e_ident[EI_DATA];
if (byte_order == ELFDATA2LSB) {
auto &ehdr = *(ElfEhdr<I386> *)data.data();
if (ehdr.e_type == ET_REL) {
if (ehdr.e_ident[EI_CLASS] == ELFCLASS32) {
if (is_gcc_lto_obj<I386>(ctx, mf))
return FileType::GCC_LTO_OBJ;
} else {
if (is_gcc_lto_obj<X86_64>(ctx, mf))
return FileType::GCC_LTO_OBJ;
}
return FileType::ELF_OBJ;
}
if (ehdr.e_type == ET_DYN)
return FileType::ELF_DSO;
} else {
auto &ehdr = *(ElfEhdr<M68K> *)data.data();
if (ehdr.e_type == ET_REL) {
if (ehdr.e_ident[EI_CLASS] == ELFCLASS32) {
if (is_gcc_lto_obj<M68K>(ctx, mf))
return FileType::GCC_LTO_OBJ;
} else {
if (is_gcc_lto_obj<SPARC64>(ctx, mf))
return FileType::GCC_LTO_OBJ;
}
return FileType::ELF_OBJ;
}
if (ehdr.e_type == ET_DYN)
return FileType::ELF_DSO;
}
return FileType::UNKNOWN;
}
if (data.starts_with("\xcf\xfa\xed\xfe")) {
switch (*(ul32 *)(data.data() + 12)) {
case 1: // MH_OBJECT
return FileType::MACH_OBJ;
case 2: // MH_EXECUTE
return FileType::MACH_EXE;
case 6: // MH_DYLIB
return FileType::MACH_DYLIB;
case 8: // MH_BUNDLE
return FileType::MACH_BUNDLE;
}
return FileType::UNKNOWN;
}
if (data.starts_with("!<arch>\n"))
return FileType::AR;
if (data.starts_with("!<thin>\n"))
return FileType::THIN_AR;
if (data.starts_with("--- !tapi-tbd"))
return FileType::TAPI;
if (data.starts_with("\xca\xfe\xba\xbe"))
return FileType::MACH_UNIVERSAL;
if (is_text_file(mf))
return FileType::TEXT;
if (data.starts_with("\xde\xc0\x17\x0b"))
return FileType::LLVM_BITCODE;
if (data.starts_with("BC\xc0\xde"))
return FileType::LLVM_BITCODE;
return FileType::UNKNOWN;
}
inline std::string filetype_to_string(FileType type) {
switch (type) {
case FileType::UNKNOWN: return "UNKNOWN";
case FileType::EMPTY: return "EMPTY";
case FileType::ELF_OBJ: return "ELF_OBJ";
case FileType::ELF_DSO: return "ELF_DSO";
case FileType::MACH_EXE: return "MACH_EXE";
case FileType::MACH_OBJ: return "MACH_OBJ";
case FileType::MACH_DYLIB: return "MACH_DYLIB";
case FileType::MACH_BUNDLE: return "MACH_BUNDLE";
case FileType::MACH_UNIVERSAL: return "MACH_UNIVERSAL";
case FileType::AR: return "AR";
case FileType::THIN_AR: return "THIN_AR";
case FileType::TAPI: return "TAPI";
case FileType::TEXT: return "TEXT";
case FileType::GCC_LTO_OBJ: return "GCC_LTO_OBJ";
case FileType::LLVM_BITCODE: return "LLVM_BITCODE";
}
return "UNKNOWN";
}
inline std::ostream &operator<<(std::ostream &out, FileType type) {
out << filetype_to_string(type);
return out;
}
} // namespace mold

150
third_party/mold/glob.cc vendored Normal file
View file

@ -0,0 +1,150 @@
// clang-format off
#include "third_party/mold/common.h"
#include "third_party/libcxx/cstring"
namespace mold {
std::optional<Glob> Glob::compile(std::string_view pat) {
std::vector<Element> vec;
while (!pat.empty()) {
u8 c = pat[0];
pat = pat.substr(1);
switch (c) {
case '[': {
// Here are a few bracket pattern examples:
//
// [abc]: a, b or c
// [$\]!]: $, ] or !
// [a-czg-i]: a, b, c, z, g, h, or i
// [^a-z]: Any character except lowercase letters
vec.push_back({BRACKET});
std::bitset<256> &bitset = vec.back().bitset;
bool negate = false;
if (!pat.empty() && pat[0] == '^') {
negate = true;
pat = pat.substr(1);
}
bool closed = false;
while (!pat.empty()) {
if (pat[0] == ']') {
pat = pat.substr(1);
closed = true;
break;
}
if (pat[0] == '\\') {
pat = pat.substr(1);
if (pat.empty())
return {};
}
if (pat.size() >= 3 && pat[1] == '-') {
u8 start = pat[0];
u8 end = pat[2];
pat = pat.substr(3);
if (end == '\\') {
if (pat.empty())
return {};
end = pat[0];
pat = pat.substr(1);
}
if (end < start)
return {};
for (i64 i = start; i <= end; i++)
bitset[i] = true;
} else {
bitset[(u8)pat[0]] = true;
pat = pat.substr(1);
}
}
if (!closed)
return {};
if (negate)
bitset.flip();
break;
}
case '?':
vec.push_back({QUESTION});
break;
case '*':
vec.push_back({STAR});
break;
default:
if (vec.empty() || vec.back().kind != STRING)
vec.push_back({STRING});
vec.back().str += c;
break;
}
}
return {Glob{std::move(vec)}};
}
bool Glob::match(std::string_view str) {
return do_match(str, elements);
}
bool Glob::do_match(std::string_view str, std::span<Element> elements) {
while (!elements.empty()) {
Element &e = elements[0];
elements = elements.subspan(1);
switch (e.kind) {
case STRING:
if (str.empty() || !str.starts_with(e.str))
return false;
str = str.substr(e.str.size());
break;
case STAR:
if (elements.empty())
return true;
// Patterns like "*foo*bar*" should be much more common than more
// complex ones like "*foo*[abc]*" or "*foo**?bar*", so we optimize
// the former case here.
if (elements[0].kind == STRING) {
for (;;) {
size_t pos = str.find(elements[0].str);
if (pos == str.npos)
break;
if (do_match(str.substr(pos + elements[0].str.size()),
elements.subspan(1)))
return true;
str = str.substr(pos + 1);
}
return false;
}
// Other cases are handled here.
for (i64 j = 0; j < str.size(); j++)
if (do_match(str.substr(j), elements))
return true;
return false;
case QUESTION:
if (str.empty())
return false;
str = str.substr(1);
break;
case BRACKET:
if (str.empty() || !e.bitset[str[0]])
return false;
str = str.substr(1);
break;
}
}
return str.empty();
}
} // namespace mold

21
third_party/mold/hyperloglog.cc vendored Normal file
View file

@ -0,0 +1,21 @@
// clang-format off
// This file implements HyperLogLog algorithm, which estimates
// the number of unique items in a given multiset.
//
// For more info, read
// https://engineering.fb.com/2018/12/13/data-infrastructure/hyperloglog
#include "third_party/mold/common.h"
#include "third_party/libcxx/cmath"
namespace mold {
i64 HyperLogLog::get_cardinality() const {
double z = 0;
for (i64 val : buckets)
z += pow(2, -val);
return ALPHA * NBUCKETS * NBUCKETS / z;
}
} // namespace mold

222
third_party/mold/integers.h vendored Normal file
View file

@ -0,0 +1,222 @@
// clang-format off
// This file defines integral types for file input/output. We need to use
// these types instead of the plain integers (such as uint32_t or int32_t)
// when reading from/writing to an mmap'ed file area for the following
// reasons:
//
// 1. mold is always a cross linker and should not depend on what host it
// is running on. Users should be able to run mold on a big-endian
// SPARC machine to create a little-endian RV64 binary, for example.
//
// 2. Even though data members in all ELF data strucutres are naturally
// aligned, they are not guaranteed to be aligned on memory. Because
// archive file (.a file) aligns each member only to a 2 byte boundary,
// anything larger than 2 bytes may be unaligned in an mmap'ed memory.
// Unaligned access is an undefined behavior in C/C++, so we shouldn't
// cast an arbitrary pointer to a uint32_t, for example, to read a
// 32-bits value.
//
// The data types defined in this file don't depend on host byte order and
// don't do unaligned access.
#pragma once
#include "third_party/libcxx/bit"
#include "third_party/libcxx/cstdint"
#include "third_party/libcxx/cstring"
#if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__)
# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
# define __LITTLE_ENDIAN__ 1
# elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
# define __BIG_ENDIAN__ 1
# else
# error "unknown host byte order"
# endif
#endif
namespace mold {
typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
typedef uint64_t u64;
typedef int8_t i8;
typedef int16_t i16;
typedef int32_t i32;
typedef int64_t i64;
template <typename T>
static inline T bswap(T val) {
switch (sizeof(T)) {
case 2: return __builtin_bswap16(val);
case 4: return __builtin_bswap32(val);
case 8: return __builtin_bswap64(val);
default: __builtin_unreachable();
}
}
template <typename T, int SIZE = sizeof(T)>
class LittleEndian {
public:
LittleEndian() = default;
LittleEndian(T x) { *this = x; }
operator T() const {
if constexpr (sizeof(T) == SIZE) {
T x;
memcpy(&x, val, sizeof(T));
if constexpr (std::endian::native == std::endian::big)
x = bswap(x);
return x;
} else {
static_assert(SIZE == 3);
return (val[2] << 16) | (val[1] << 8) | val[0];
}
}
LittleEndian &operator=(T x) {
if constexpr (sizeof(T) == SIZE) {
if constexpr (std::endian::native == std::endian::big)
x = bswap(x);
memcpy(val, &x, sizeof(T));
} else {
static_assert(SIZE == 3);
val[2] = x >> 16;
val[1] = x >> 8;
val[0] = x;
}
return *this;
}
LittleEndian &operator++() {
return *this = *this + 1;
}
LittleEndian operator++(int) {
T ret = *this;
*this = *this + 1;
return ret;
}
LittleEndian &operator--() {
return *this = *this - 1;
}
LittleEndian operator--(int) {
T ret = *this;
*this = *this - 1;
return ret;
}
LittleEndian &operator+=(T x) {
return *this = *this + x;
}
LittleEndian &operator-=(T x) {
return *this = *this - x;
}
LittleEndian &operator&=(T x) {
return *this = *this & x;
}
LittleEndian &operator|=(T x) {
return *this = *this | x;
}
private:
u8 val[SIZE];
};
using il16 = LittleEndian<i16>;
using il32 = LittleEndian<i32>;
using il64 = LittleEndian<i64>;
using ul16 = LittleEndian<u16>;
using ul24 = LittleEndian<u32, 3>;
using ul32 = LittleEndian<u32>;
using ul64 = LittleEndian<u64>;
template <typename T, int SIZE = sizeof(T)>
class BigEndian {
public:
BigEndian() = default;
BigEndian(T x) { *this = x; }
operator T() const {
if constexpr (sizeof(T) == SIZE) {
T x;
memcpy(&x, val, sizeof(T));
if constexpr (std::endian::native == std::endian::little)
x = bswap(x);
return x;
} else {
static_assert(SIZE == 3);
return (val[0] << 16) | (val[1] << 8) | val[2];
}
}
BigEndian &operator=(T x) {
if constexpr (sizeof(T) == SIZE) {
if constexpr (std::endian::native == std::endian::little)
x = bswap(x);
memcpy(val, &x, sizeof(T));
} else {
static_assert(SIZE == 3);
val[0] = x >> 16;
val[1] = x >> 8;
val[2] = x;
}
return *this;
}
BigEndian &operator++() {
return *this = *this + 1;
}
BigEndian operator++(int) {
T ret = *this;
*this = *this + 1;
return ret;
}
BigEndian &operator--() {
return *this = *this - 1;
}
BigEndian operator--(int) {
T ret = *this;
*this = *this - 1;
return ret;
}
BigEndian &operator+=(T x) {
return *this = *this + x;
}
BigEndian &operator-=(T x) {
return *this = *this - x;
}
BigEndian &operator&=(T x) {
return *this = *this & x;
}
BigEndian &operator|=(T x) {
return *this = *this | x;
}
private:
u8 val[SIZE];
};
using ib16 = BigEndian<i16>;
using ib32 = BigEndian<i32>;
using ib64 = BigEndian<i64>;
using ub16 = BigEndian<u16>;
using ub24 = BigEndian<u32, 3>;
using ub32 = BigEndian<u32>;
using ub64 = BigEndian<u64>;
} // namespace mold

188
third_party/mold/main.cc vendored Normal file
View file

@ -0,0 +1,188 @@
// clang-format off
#include "third_party/mold/common.h"
// MISSING #include "config.h"
#include "third_party/libcxx/cstring"
#include "third_party/libcxx/filesystem"
#include "libc/calls/calls.h"
#include "libc/calls/sigtimedwait.h"
#include "libc/calls/struct/sigaction.h"
#include "libc/calls/struct/siginfo.h"
#include "libc/sysv/consts/sa.h"
#include "libc/sysv/consts/sicode.h"
#include "libc/sysv/consts/ss.h"
// MISSING #include <tbb/global_control.h>
#ifdef USE_SYSTEM_MIMALLOC
// MISSING #include <mimalloc-new-delete.h>
#endif
#ifdef __FreeBSD__
// MISSING #include <sys/sysctl.h>
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/runtime/pathconf.h"
#include "libc/runtime/runtime.h"
#include "libc/runtime/sysconf.h"
#include "libc/sysv/consts/f.h"
#include "libc/sysv/consts/fileno.h"
#include "libc/sysv/consts/o.h"
#include "libc/sysv/consts/ok.h"
#include "libc/time/time.h"
#include "third_party/getopt/getopt.h"
#include "third_party/musl/crypt.h"
#include "third_party/musl/lockf.h"
#endif
namespace mold {
std::string mold_version_string = MOLD_VERSION;
namespace elf {
int main(int argc, char **argv);
}
namespace macho {
int main(int argc, char **argv);
}
static std::string get_mold_version() {
std::string name = MOLD_IS_SOLD ? "mold (sold) " : "mold ";
if (mold_git_hash.empty())
return name + MOLD_VERSION + " (compatible with GNU ld)";
return name + MOLD_VERSION + " (" + mold_git_hash + "; compatible with GNU ld)";
}
void cleanup() {
if (output_tmpfile)
unlink(output_tmpfile);
}
std::string errno_string() {
// strerror is not thread-safe, so guard it with a lock.
static std::mutex mu;
std::scoped_lock lock(mu);
return strerror(errno);
}
// Returns the path of the mold executable itself
std::string get_self_path() {
#ifdef __FreeBSD__
// /proc may not be mounted on FreeBSD. The proper way to get the
// current executable's path is to use sysctl(2).
int mib[4];
mib[0] = CTL_KERN;
mib[1] = KERN_PROC;
mib[2] = KERN_PROC_PATHNAME;
mib[3] = -1;
size_t size;
sysctl(mib, 4, NULL, &size, NULL, 0);
std::string path;
path.resize(size);
sysctl(mib, 4, path.data(), &size, NULL, 0);
return path;
#else
return std::filesystem::read_symlink("/proc/self/exe").string();
#endif
}
// mold mmap's an output file, and the mmap succeeds even if there's
// no enough space left on the filesystem. The actual disk blocks are
// not allocated on the mmap call but when the program writes to it
// for the first time.
//
// If a disk becomes full as a result of a write to an mmap'ed memory
// region, the failure of the write is reported as a SIGBUS or structured
// exeption with code EXCEPTION_IN_PAGE_ERROR on Windows. This
// signal handler catches that signal and prints out a user-friendly
// error message. Without this, it is very hard to realize that the
// disk might be full.
#ifdef _WIN32
static LONG WINAPI vectored_handler(_EXCEPTION_POINTERS *exception_info) {
static std::mutex mu;
std::scoped_lock lock{mu};
PEXCEPTION_RECORD exception_record = exception_info->ExceptionRecord;
ULONG_PTR *exception_information = exception_record->ExceptionInformation;
if (exception_record->ExceptionCode == EXCEPTION_IN_PAGE_ERROR &&
(ULONG_PTR)output_buffer_start <= exception_information[1] &&
exception_information[1] < (ULONG_PTR)output_buffer_end) {
const char msg[] = "mold: failed to write to an output file. Disk full?\n";
(void)!write(_fileno(stderr), msg, sizeof(msg) - 1);
}
cleanup();
_exit(1);
}
void install_signal_handler() {
AddVectoredExceptionHandler(0, vectored_handler);
}
#else
static void sighandler(int signo, siginfo_t *info, void *ucontext) {
static std::mutex mu;
std::scoped_lock lock{mu};
switch (signo) {
case SIGSEGV:
case SIGBUS:
if (output_buffer_start <= info->si_addr &&
info->si_addr < output_buffer_end) {
const char msg[] = "mold: failed to write to an output file. Disk full?\n";
(void)!write(STDERR_FILENO, msg, sizeof(msg) - 1);
}
break;
case SIGABRT: {
const char msg[] =
"mold: aborted\n"
"mold: If mold failed due to a spurious failure of pthread_create, "
"it's likely because of https://github.com/oneapi-src/oneTBB/pull/824. "
"You should ensure that you are using 2021.9.0 or newer version of libtbb.\n";
(void)!write(STDERR_FILENO, msg, sizeof(msg) - 1);
break;
}
}
_exit(1);
}
void install_signal_handler() {
struct sigaction action;
action.sa_sigaction = sighandler;
sigemptyset(&action.sa_mask);
action.sa_flags = SA_SIGINFO;
sigaction(SIGABRT, &action, NULL);
sigaction(SIGINT, &action, NULL);
sigaction(SIGTERM, &action, NULL);
sigaction(SIGBUS, &action, NULL);
}
#endif
i64 get_default_thread_count() {
// mold doesn't scale well above 32 threads.
int n = tbb::global_control::active_value(
tbb::global_control::max_allowed_parallelism);
return std::min(n, 32);
}
} // namespace mold
int main(int argc, char **argv) {
mold::mold_version = mold::get_mold_version();
#if MOLD_IS_SOLD
std::string cmd = mold::filepath(argv[0]).filename().string();
if (cmd == "ld64" || cmd.starts_with("ld64."))
return mold::macho::main(argc, argv);
#endif
return mold::elf::main(argc, argv);
}

61
third_party/mold/mold.mk vendored Normal file
View file

@ -0,0 +1,61 @@
#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐
#───vi: set et ft=make ts=8 tw=8 fenc=utf-8 :vi───────────────────────┘
PKGS += THIRD_PARTY_MOLD
private CPPFLAGS += -std=c++20
THIRD_PARTY_MOLD_ARTIFACTS += THIRD_PARTY_MOLD_A
THIRD_PARTY_MOLD = $(THIRD_PARTY_MOLD_A_DEPS) $(THIRD_PARTY_MOLD_A)
THIRD_PARTY_MOLD_A = o/$(MODE)/third_party/mold/mold.a
THIRD_PARTY_MOLD_FILES := $(wildcard third_party/mold/*)
THIRD_PARTY_MOLD_HDRS = $(filter %.h,$(THIRD_PARTY_MOLD_FILES))
THIRD_PARTY_MOLD_SRCS = $(filter %.cc,$(THIRD_PARTY_MOLD_FILES))
THIRD_PARTY_MOLD_OBJS = $(THIRD_PARTY_MOLD_SRCS:%.cc=o/$(MODE)/%.o)
THIRD_PARTY_MOLD_A_DIRECTDEPS = \
THIRD_PARTY_LIBCXX \
THIRD_PARTY_XXHASH
THIRD_PARTY_MOLD_A_DEPS := \
$(call uniq,$(foreach x,$(THIRD_PARTY_MOLD_A_DIRECTDEPS),$($(x))))
# https://github.com/rui314/mold/blob/d4d93d7fb72dd19c44aafa4dd5397e35787d33ad/CMakeLists.txt#L62
$(THIRD_PARTY_MOLD_OBJS): private \
CPPFLAGS += \
-std=gnu++20 \
-fno-exceptions \
-fno-unwind-tables \
-fno-asynchronous-unwind-tables \
-Wno-sign-compare \
-Wno-unused-function \
THIRD_PARTY_MOLD_CHECKS = \
$(THIRD_PARTY_MOLD_A).pkg \
$(THIRD_PARTY_MOLD_HDRS:%=o/$(MODE)/%.ok)
$(THIRD_PARTY_MOLD_A): \
third_party/mold/ \
$(THIRD_PARTY_MOLD_A).pkg \
$(THIRD_PARTY_MOLD_OBJS)
$(THIRD_PARTY_MOLD_A).pkg: \
$(THIRD_PARTY_MOLD_OBJS) \
$(foreach x,$(THIRD_PARTY_MOLD_A_DIRECTDEPS),$($(x)_A).pkg)
o/$(MODE)/third_party/mold/mold.com.dbg: \
$(THIRD_PARTY_MOLD) \
o/$(MODE)/third_party/awk/main.o \
$(CRT) \
$(APE_NO_MODIFY_SELF)
@$(APELINK)
THIRD_PARTY_MOLD_COMS = o/$(MODE)/third_party/mold/mold.com
THIRD_PARTY_MOLD_BINS = $(THIRD_PARTY_MOLD_COMS) $(THIRD_PARTY_MOLD_COMS:%=%.dbg)
THIRD_PARTY_MOLD_LIBS = $(THIRD_PARTY_MOLD_A)
$(THIRD_PARTY_MOLD_OBJS): $(BUILD_FILES) third_party/mold/mold.mk
.PHONY: o/$(MODE)/third_party/mold
o/$(MODE)/third_party/mold: \
$(THIRD_PARTY_MOLD_BINS) \
$(THIRD_PARTY_AWK_CHECKS)

167
third_party/mold/multi-glob.cc vendored Normal file
View file

@ -0,0 +1,167 @@
// clang-format off
// This file implements the Aho-Corasick algorithm to match multiple
// glob patterns to symbol strings as quickly as possible.
//
// Here are some examples of glob patterns:
//
// qt_private_api_tag*
// *16QAccessibleCache*
// *32QAbstractFileIconProviderPrivate*
// *17QPixmapIconEngine*
//
// `*` is a wildcard that matches any substring. We sometimes have
// hundreds of glob patterns and have to match them against millions
// of symbol strings.
//
// Aho-Corasick cannot handle complex patterns such as `*foo*bar*`.
// We handle such patterns with the Glob class. Glob is relatively
// slow, but complex patterns are rare in practice, so it should be
// OK.
#include "third_party/mold/common.h"
#include "third_party/libcxx/queue"
#include "third_party/libcxx/regex"
namespace mold {
std::optional<u32> MultiGlob::find(std::string_view str) {
std::call_once(once, [&] { compile(); });
u32 val = UINT32_MAX;
if (root) {
// Match against simple glob patterns
TrieNode *node = root.get();
auto walk = [&](u8 c) {
for (;;) {
if (node->children[c]) {
node = node->children[c].get();
val = std::min(val, node->value);
return;
}
if (!node->suffix_link)
return;
node = node->suffix_link;
}
};
walk('\0');
for (u8 c : str)
walk(c);
walk('\0');
}
// Match against complex glob patterns
for (std::pair<Glob, u32> &glob : globs)
if (glob.first.match(str))
val = std::min(val, glob.second);
if (val == UINT32_MAX)
return {};
return val;
}
static bool is_simple_pattern(std::string_view pat) {
static std::regex re(R"(\*?[^*[?]+\*?)", std::regex_constants::optimize);
return std::regex_match(pat.begin(), pat.end(), re);
}
static std::string handle_stars(std::string_view pat) {
std::string str(pat);
// Convert "foo" -> "\0foo\0", "*foo" -> "foo\0", "foo*" -> "\0foo"
// and "*foo*" -> "foo". Aho-Corasick can do only substring matching,
// so we use \0 as beginning/end-of-string markers.
if (str.starts_with('*') && str.ends_with('*'))
return str.substr(1, str.size() - 2);
if (str.starts_with('*'))
return str.substr(1) + "\0"s;
if (str.ends_with('*'))
return "\0"s + str.substr(0, str.size() - 1);
return "\0"s + str + "\0"s;
}
bool MultiGlob::add(std::string_view pat, u32 val) {
assert(!is_compiled);
assert(!pat.empty());
strings.push_back(std::string(pat));
// Complex glob pattern
if (!is_simple_pattern(pat)) {
if (std::optional<Glob> glob = Glob::compile(pat)) {
globs.push_back({std::move(*glob), val});
return true;
}
return false;
}
// Simple glob pattern
if (!root)
root.reset(new TrieNode);
TrieNode *node = root.get();
for (u8 c : handle_stars(pat)) {
if (!node->children[c])
node->children[c].reset(new TrieNode);
node = node->children[c].get();
}
node->value = std::min(node->value, val);
return true;
}
void MultiGlob::compile() {
is_compiled = true;
if (root) {
fix_suffix_links(*root);
fix_values();
}
}
void MultiGlob::fix_suffix_links(TrieNode &node) {
for (i64 i = 0; i < 256; i++) {
if (!node.children[i])
continue;
TrieNode &child = *node.children[i];
TrieNode *cur = node.suffix_link;
for (;;) {
if (!cur) {
child.suffix_link = root.get();
break;
}
if (cur->children[i]) {
child.suffix_link = cur->children[i].get();
break;
}
cur = cur->suffix_link;
}
fix_suffix_links(child);
}
}
void MultiGlob::fix_values() {
std::queue<TrieNode *> queue;
queue.push(root.get());
do {
TrieNode *node = queue.front();
queue.pop();
for (std::unique_ptr<TrieNode> &child : node->children) {
if (!child)
continue;
child->value = std::min(child->value, child->suffix_link->value);
queue.push(child.get());
}
} while (!queue.empty());
}
} // namespace mold

203
third_party/mold/output-file-unix.h vendored Normal file
View file

@ -0,0 +1,203 @@
// clang-format off
#include "third_party/mold/common.h"
#include "libc/calls/calls.h"
#include "libc/calls/struct/flock.h"
#include "libc/calls/weirdtypes.h"
#include "libc/sysv/consts/at.h"
#include "libc/sysv/consts/f.h"
#include "libc/sysv/consts/fd.h"
#include "libc/sysv/consts/o.h"
#include "libc/sysv/consts/posix.h"
#include "libc/sysv/consts/s.h"
#include "libc/sysv/consts/splice.h"
#include "third_party/libcxx/filesystem"
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/runtime/runtime.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/mlock.h"
#include "libc/sysv/consts/msync.h"
#include "libc/sysv/consts/posix.h"
#include "libc/sysv/consts/prot.h"
#include "libc/sysv/consts/madv.h"
#include "libc/sysv/consts/mfd.h"
#include "libc/sysv/consts/mremap.h"
#include "libc/calls/calls.h"
#include "libc/calls/struct/stat.h"
#include "libc/calls/struct/stat.macros.h"
#include "libc/calls/struct/timespec.h"
#include "libc/calls/weirdtypes.h"
#include "libc/sysv/consts/s.h"
#include "libc/sysv/consts/utime.h"
#include "libc/time/time.h"
#include "libc/calls/makedev.h"
#include "libc/calls/weirdtypes.h"
#include "libc/thread/thread.h"
#include "libc/calls/typedef/u.h"
#include "libc/calls/weirdtypes.h"
#include "libc/intrin/newbie.h"
#include "libc/sock/select.h"
#include "libc/sysv/consts/endian.h"
namespace mold {
inline u32 get_umask() {
u32 orig_umask = umask(0);
umask(orig_umask);
return orig_umask;
}
template <typename Context>
static std::pair<i64, char *>
open_or_create_file(Context &ctx, std::string path, i64 filesize, i64 perm) {
std::string tmpl = filepath(path).parent_path() / ".mold-XXXXXX";
char *path2 = (char *)save_string(ctx, tmpl).data();
i64 fd = mkstemp(path2);
if (fd == -1)
Fatal(ctx) << "cannot open " << path2 << ": " << errno_string();
// Reuse an existing file if exists and writable because on Linux,
// writing to an existing file is much faster than creating a fresh
// file and writing to it.
if (ctx.overwrite_output_file && rename(path.c_str(), path2) == 0) {
::close(fd);
fd = ::open(path2, O_RDWR | O_CREAT, perm);
if (fd != -1 && !ftruncate(fd, filesize) && !fchmod(fd, perm & ~get_umask()))
return {fd, path2};
unlink(path2);
fd = ::open(path2, O_RDWR | O_CREAT, perm);
if (fd == -1)
Fatal(ctx) << "cannot open " << path2 << ": " << errno_string();
}
if (ftruncate(fd, filesize))
Fatal(ctx) << "ftruncate failed: " << errno_string();
if (fchmod(fd, (perm & ~get_umask())) == -1)
Fatal(ctx) << "fchmod failed: " << errno_string();
return {fd, path2};
}
template <typename Context>
class MemoryMappedOutputFile : public OutputFile<Context> {
public:
MemoryMappedOutputFile(Context &ctx, std::string path, i64 filesize, i64 perm)
: OutputFile<Context>(path, filesize, true) {
i64 fd;
std::tie(fd, output_tmpfile) = open_or_create_file(ctx, path, filesize, perm);
this->buf = (u8 *)mmap(nullptr, filesize, PROT_READ | PROT_WRITE,
MAP_SHARED, fd, 0);
if (this->buf == MAP_FAILED)
Fatal(ctx) << path << ": mmap failed: " << errno_string();
::close(fd);
mold::output_buffer_start = this->buf;
mold::output_buffer_end = this->buf + filesize;
}
~MemoryMappedOutputFile() {
if (fd2 != -1)
::close(fd2);
}
void close(Context &ctx) override {
Timer t(ctx, "close_file");
if (!this->is_unmapped)
munmap(this->buf, this->filesize);
// If an output file already exists, open a file and then remove it.
// This is the fastest way to unlink a file, as it does not make the
// system to immediately release disk blocks occupied by the file.
fd2 = ::open(this->path.c_str(), O_RDONLY);
if (fd2 != -1)
unlink(this->path.c_str());
if (rename(output_tmpfile, this->path.c_str()) == -1)
Fatal(ctx) << this->path << ": rename failed: " << errno_string();
output_tmpfile = nullptr;
}
private:
int fd2 = -1;
};
template <typename Context>
class MallocOutputFile : public OutputFile<Context> {
public:
MallocOutputFile(Context &ctx, std::string path, i64 filesize, i64 perm)
: OutputFile<Context>(path, filesize, false), perm(perm) {
this->buf = (u8 *)mmap(NULL, filesize, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
if (this->buf == MAP_FAILED)
Fatal(ctx) << "mmap failed: " << errno_string();
}
void close(Context &ctx) override {
Timer t(ctx, "close_file");
if (this->path == "-") {
fwrite(this->buf, this->filesize, 1, stdout);
fclose(stdout);
return;
}
i64 fd = ::open(this->path.c_str(), O_RDWR | O_CREAT, perm);
if (fd == -1)
Fatal(ctx) << "cannot open " << this->path << ": " << errno_string();
FILE *fp = fdopen(fd, "w");
fwrite(this->buf, this->filesize, 1, fp);
fclose(fp);
}
private:
i64 perm;
};
template <typename Context>
std::unique_ptr<OutputFile<Context>>
OutputFile<Context>::open(Context &ctx, std::string path, i64 filesize, i64 perm) {
Timer t(ctx, "open_file");
if (path.starts_with('/') && !ctx.arg.chroot.empty())
path = ctx.arg.chroot + "/" + path_clean(path);
bool is_special = false;
if (path == "-") {
is_special = true;
} else {
struct stat st;
if (stat(path.c_str(), &st) == 0 && (st.st_mode & S_IFMT) != S_IFREG)
is_special = true;
}
OutputFile<Context> *file;
if (is_special)
file = new MallocOutputFile(ctx, path, filesize, perm);
else
file = new MemoryMappedOutputFile(ctx, path, filesize, perm);
#ifdef MADV_HUGEPAGE
// Enable transparent huge page for an output memory-mapped file.
// On Linux, it has an effect only on tmpfs mounted with `huge=advise`,
// but it can make the linker ~10% faster. You can try it by creating
// a tmpfs with the following commands
//
// $ mkdir tmp
// $ sudo mount -t tmpfs -o size=2G,huge=advise none tmp
//
// and then specifying a path under the directory as an output file.
madvise(file->buf, filesize, MADV_HUGEPAGE);
#endif
if (ctx.arg.filler != -1)
memset(file->buf, ctx.arg.filler, filesize);
return std::unique_ptr<OutputFile>(file);
}
} // namespace mold

85
third_party/mold/output-file-win32.h vendored Normal file
View file

@ -0,0 +1,85 @@
// clang-format off
#include "third_party/mold/common.h"
#include "libc/calls/calls.h"
#include "libc/calls/struct/flock.h"
#include "libc/calls/weirdtypes.h"
#include "libc/sysv/consts/at.h"
#include "libc/sysv/consts/f.h"
#include "libc/sysv/consts/fd.h"
#include "libc/sysv/consts/o.h"
#include "libc/sysv/consts/posix.h"
#include "libc/sysv/consts/s.h"
#include "libc/sysv/consts/splice.h"
#include "third_party/libcxx/filesystem"
#include "libc/nt/accounting.h"
#include "libc/nt/automation.h"
#include "libc/nt/console.h"
#include "libc/nt/debug.h"
#include "libc/nt/dll.h"
#include "libc/nt/enum/keyaccess.h"
#include "libc/nt/enum/regtype.h"
#include "libc/nt/errors.h"
#include "libc/nt/events.h"
#include "libc/nt/files.h"
#include "libc/nt/ipc.h"
#include "libc/nt/memory.h"
#include "libc/nt/paint.h"
#include "libc/nt/process.h"
#include "libc/nt/registry.h"
#include "libc/nt/synchronization.h"
#include "libc/nt/thread.h"
#include "libc/nt/windows.h"
#include "libc/nt/winsock.h"
namespace mold {
template <typename Context>
class MallocOutputFile : public OutputFile<Context> {
public:
MallocOutputFile(Context &ctx, std::string path, i64 filesize, i64 perm)
: OutputFile<Context>(path, filesize, false), perm(perm) {
this->buf = (u8 *)malloc(filesize);
if (!this->buf)
Fatal(ctx) << "malloc failed";
}
void close(Context &ctx) override {
Timer t(ctx, "close_file");
if (this->path == "-") {
fwrite(this->buf, this->filesize, 1, stdout);
fclose(stdout);
return;
}
i64 fd = ::open(this->path.c_str(), O_RDWR | O_CREAT, perm);
if (fd == -1)
Fatal(ctx) << "cannot open " << this->path << ": " << errno_string();
FILE *fp = fdopen(fd, "w");
fwrite(this->buf, this->filesize, 1, fp);
fclose(fp);
free(this->buf);
}
private:
i64 perm;
};
template <typename Context>
std::unique_ptr<OutputFile<Context>>
OutputFile<Context>::open(Context &ctx, std::string path, i64 filesize, i64 perm) {
Timer t(ctx, "open_file");
if (path.starts_with('/') && !ctx.arg.chroot.empty())
path = ctx.arg.chroot + "/" + path_clean(path);
OutputFile<Context> *file = new MallocOutputFile(ctx, path, filesize, perm);
if (ctx.arg.filler != -1)
memset(file->buf, ctx.arg.filler, filesize);
return std::unique_ptr<OutputFile<Context>>(file);
}
} // namespace mold

6
third_party/mold/output-file.h vendored Normal file
View file

@ -0,0 +1,6 @@
// clang-format off
#if _WIN32
#include "third_party/mold/output-file-win32.h"
#else
#include "third_party/mold/output-file-unix.h"
#endif

140
third_party/mold/perf.cc vendored Normal file
View file

@ -0,0 +1,140 @@
// clang-format off
#include "third_party/mold/common.h"
#include "third_party/libcxx/functional"
#include "third_party/libcxx/iomanip"
#include "third_party/libcxx/ios"
#ifndef _WIN32
#include "libc/calls/calls.h"
#include "libc/calls/struct/rlimit.h"
#include "libc/calls/struct/rusage.h"
#include "libc/calls/weirdtypes.h"
#include "libc/sysv/consts/prio.h"
#include "libc/sysv/consts/rlim.h"
#include "libc/sysv/consts/rlimit.h"
#include "libc/sysv/consts/rusage.h"
#include "libc/calls/struct/itimerval.h"
#include "libc/calls/struct/timeval.h"
#include "libc/calls/weirdtypes.h"
#include "libc/sock/select.h"
#include "libc/sysv/consts/clock.h"
#include "libc/sysv/consts/itimer.h"
#include "libc/time/struct/timezone.h"
#include "libc/time/time.h"
#endif
namespace mold {
i64 Counter::get_value() {
return values.combine(std::plus());
}
void Counter::print() {
sort(instances, [](Counter *a, Counter *b) {
return a->get_value() > b->get_value();
});
for (Counter *c : instances)
std::cout << std::setw(20) << std::right << c->name
<< "=" << c->get_value() << "\n";
}
static i64 now_nsec() {
#ifdef _WIN32
return (i64)std::chrono::steady_clock::now().time_since_epoch().count();
#else
struct timespec t;
clock_gettime(CLOCK_MONOTONIC, &t);
return (i64)t.tv_sec * 1'000'000'000 + t.tv_nsec;
#endif
}
static std::pair<i64, i64> get_usage() {
#ifdef _WIN32
auto to_nsec = [](FILETIME t) -> i64 {
return ((u64)t.dwHighDateTime << 32 + (u64)t.dwLowDateTime) * 100;
};
FILETIME creation, exit, kernel, user;
GetProcessTimes(GetCurrentProcess(), &creation, &exit, &kernel, &user);
return {to_nsec(user), to_nsec(kernel)};
#else
auto to_nsec = [](struct timeval t) -> i64 {
return (i64)t.tv_sec * 1'000'000'000 + t.tv_usec * 1'000;
};
struct rusage ru;
getrusage(RUSAGE_SELF, &ru);
return {to_nsec(ru.ru_utime), to_nsec(ru.ru_stime)};
#endif
}
TimerRecord::TimerRecord(std::string name, TimerRecord *parent)
: name(name), parent(parent) {
start = now_nsec();
std::tie(user, sys) = get_usage();
if (parent)
parent->children.push_back(this);
}
void TimerRecord::stop() {
if (stopped)
return;
stopped = true;
i64 user2;
i64 sys2;
std::tie(user2, sys2) = get_usage();
end = now_nsec();
user = user2 - user;
sys = sys2 - sys;
}
static void print_rec(TimerRecord &rec, i64 indent) {
printf(" % 8.3f % 8.3f % 8.3f %s%s\n",
((double)rec.user / 1'000'000'000),
((double)rec.sys / 1'000'000'000),
(((double)rec.end - rec.start) / 1'000'000'000),
std::string(indent * 2, ' ').c_str(),
rec.name.c_str());
sort(rec.children, [](TimerRecord *a, TimerRecord *b) {
return a->start < b->start;
});
for (TimerRecord *child : rec.children)
print_rec(*child, indent + 1);
}
void print_timer_records(
tbb::concurrent_vector<std::unique_ptr<TimerRecord>> &records) {
for (i64 i = records.size() - 1; i >= 0; i--)
records[i]->stop();
for (i64 i = 0; i < records.size(); i++) {
TimerRecord &inner = *records[i];
if (inner.parent)
continue;
for (i64 j = i - 1; j >= 0; j--) {
TimerRecord &outer = *records[j];
if (outer.start <= inner.start && inner.end <= outer.end) {
inner.parent = &outer;
outer.children.push_back(&inner);
break;
}
}
}
std::cout << " User System Real Name\n";
for (std::unique_ptr<TimerRecord> &rec : records)
if (!rec->parent)
print_rec(*rec, 0);
std::cout << std::flush;
}
} // namespace mold

82
third_party/mold/sha.h vendored Normal file
View file

@ -0,0 +1,82 @@
// clang-format off
#pragma once
#include "third_party/libcxx/cstdint"
typedef uint8_t u8;
static constexpr int64_t SHA256_SIZE = 32;
#ifdef _WIN32
// On Windows, we use Microsoft CNG.
// MISSING #include <Windows.h>
// MISSING #include <bcrypt.h>
// MISSING #include <ntstatus.h>
inline static BCRYPT_ALG_HANDLE get_sha256_handle() {
static std::once_flag once;
static BCRYPT_ALG_HANDLE alg;
std::call_once(once, [&] {
BCryptOpenAlgorithmProvider(&alg, BCRYPT_SHA256_ALGORITHM, nullptr, 0);
});
return alg;
}
inline void sha256_hash(u8 *in, size_t len, u8 *out) {
BCryptHash(get_sha256_handle(), nullptr, 0, in, len, out, SHA256_SIZE);
}
class SHA256Hash {
public:
SHA256Hash() {
BCryptCreateHash(get_sha256_handle(), &handle, nullptr, 0, nullptr, 0, 0);
}
void update(u8 *data, size_t len) {
BCryptHashData(handle, data, len, 0);
}
void finish(u8 *out) {
BCryptFinishHash(handle, out, SHA256_SIZE, 0);
}
private:
BCRYPT_HASH_HANDLE handle;
};
#else
// On Unix, we use OpenSSL or the Apple's OpenSSL-compatible API.
#ifdef __APPLE__
# define COMMON_DIGEST_FOR_OPENSSL
// MISSING #include <CommonCrypto/CommonDigest.h>
# define SHA256(data, len, md) CC_SHA256(data, len, md)
#else
# define OPENSSL_SUPPRESS_DEPRECATED 1
// MISSING #include <openssl/sha.h>
#endif
inline void sha256_hash(u8 *in, size_t len, u8 *out) {
SHA256(in, len, out);
}
class SHA256Hash {
public:
SHA256Hash() {
SHA256_Init(&ctx);
}
void update(u8 *data, size_t len) {
SHA256_Update(&ctx, data, len);
}
void finish(u8 *out) {
SHA256_Final(out, &ctx);
}
private:
SHA256_CTX ctx;
};
#endif

113
third_party/mold/tar.cc vendored Normal file
View file

@ -0,0 +1,113 @@
// clang-format off
#include "third_party/mold/common.h"
namespace mold {
// A tar file consists of one or more Ustar header followed by data.
// Each Ustar header represents a single file in an archive.
//
// tar is an old file format, and its `name` field is only 100 bytes long.
// If `name` is longer than 100 bytes, we can emit a PAX header before a
// Ustar header to store a long filename.
//
// For simplicity, we always emit a PAX header even for a short filename.
struct UstarHeader {
UstarHeader() {
memset(this, 0, sizeof(*this));
}
void finalize() {
memset(checksum, ' ', sizeof(checksum));
memcpy(magic, "ustar", 5);
memcpy(version, "00", 2);
// Compute checksum
int sum = 0;
for (i64 i = 0; i < sizeof(*this); i++)
sum += ((u8 *)this)[i];
// We need to convince the compiler that sum isn't too big to silence
// -Werror=format-truncation.
ASSUME(sum < 01'000'000);
snprintf(checksum, sizeof(checksum), "%06o", sum);
}
char name[100];
char mode[8];
char uid[8];
char gid[8];
char size[12];
char mtime[12];
char checksum[8];
char typeflag[1];
char linkname[100];
char magic[6];
char version[2];
char uname[32];
char gname[32];
char devmajor[8];
char devminor[8];
char prefix[155];
char pad[12];
};
static std::string encode_path(std::string basedir, std::string path) {
path = path_clean(basedir + "/" + path);
// Construct a string which contains something like
// "16 path=foo/bar\n" where 16 is the size of the string
// including the size string itself.
i64 len = std::string(" path=\n").size() + path.size();
i64 total = std::to_string(len).size() + len;
total = std::to_string(total).size() + len;
return std::to_string(total) + " path=" + path + "\n";
}
std::unique_ptr<TarWriter>
TarWriter::open(std::string output_path, std::string basedir) {
FILE *out = fopen(output_path.c_str(), "w");
if (!out)
return nullptr;
return std::unique_ptr<TarWriter>(new TarWriter(out, basedir));
}
TarWriter::~TarWriter() {
fclose(out);
}
void TarWriter::append(std::string path, std::string_view data) {
// Write PAX header
static_assert(sizeof(UstarHeader) == BLOCK_SIZE);
UstarHeader pax;
std::string attr = encode_path(basedir, path);
snprintf(pax.size, sizeof(pax.size), "%011zo", attr.size());
pax.typeflag[0] = 'x';
pax.finalize();
fwrite(&pax, sizeof(pax), 1, out);
// Write pathname
fwrite(attr.data(), attr.size(), 1, out);
fseek(out, align_to(ftell(out), BLOCK_SIZE), SEEK_SET);
// Write Ustar header
UstarHeader ustar;
memcpy(ustar.mode, "0000664", 8);
snprintf(ustar.size, sizeof(ustar.size), "%011zo", data.size());
ustar.finalize();
fwrite(&ustar, sizeof(ustar), 1, out);
// Write file contents
fwrite(data.data(), data.size(), 1, out);
fseek(out, align_to(ftell(out), BLOCK_SIZE), SEEK_SET);
// A tar file must ends with two empty blocks, so write such
// terminator and seek back.
u8 terminator[BLOCK_SIZE * 2] = {};
fwrite(&terminator, BLOCK_SIZE * 2, 1, out);
fseek(out, -BLOCK_SIZE * 2, SEEK_END);
assert(ftell(out) % BLOCK_SIZE == 0);
}
} // namespace mold

View file

@ -0,0 +1,74 @@
// clang-format off
option(MOLD_ENABLE_QEMU_TESTS "Enable tests on non-native targets" OFF)
option(MOLD_ENABLE_QEMU_TESTS_RV32 "Enable tests for RV32" OFF)
option(MOLD_ENABLE_QEMU_TESTS_POWER10 "Enable tests for Power10" OFF)
function(add_target TRIPLE)
set(HOST ${CMAKE_HOST_SYSTEM_PROCESSOR})
if(${HOST} MATCHES "amd64")
set(HOST x86_64)
elseif(${HOST} MATCHES "arm.*")
set(HOST arm)
elseif(${HOST} STREQUAL "ppc64")
set(HOST powerpc64)
endif()
if(${TRIPLE} MATCHES "${HOST}-.*")
set(IS_NATIVE 1)
endif()
if(${TRIPLE} MATCHES "([^-]+)-.")
set(MACHINE ${CMAKE_MATCH_1})
endif()
if(IS_NATIVE OR MOLD_ENABLE_QEMU_TESTS)
file(GLOB ALL_TESTS RELATIVE ${CMAKE_CURRENT_LIST_DIR} CONFIGURE_DEPENDS
"*.sh")
list(FILTER ALL_TESTS EXCLUDE REGEX "_")
file(GLOB TESTS RELATIVE ${CMAKE_CURRENT_LIST_DIR} CONFIGURE_DEPENDS
"${MACHINE}_*.sh")
list(APPEND TESTS ${ALL_TESTS})
foreach(TEST IN LISTS TESTS)
string(REGEX REPLACE "\\.sh$" "" TESTNAME "${MACHINE}-${TEST}")
add_test(NAME ${TESTNAME}
COMMAND bash -x ${CMAKE_CURRENT_LIST_DIR}/${TEST}
WORKING_DIRECTORY ${mold_BINARY_DIR})
if(IS_NATIVE)
set_tests_properties(${TESTNAME} PROPERTIES
SKIP_REGULAR_EXPRESSION "skipped")
else()
set_tests_properties(${TESTNAME} PROPERTIES
ENVIRONMENT "TRIPLE=${TRIPLE}")
endif()
endforeach()
endif()
endfunction()
add_target(x86_64-linux-gnu)
add_target(i686-linux-gnu)
add_target(aarch64-linux-gnu)
add_target(arm-linux-gnueabihf)
add_target(riscv64-linux-gnu)
add_target(powerpc-linux-gnu)
add_target(powerpc64-linux-gnu)
add_target(powerpc64le-linux-gnu)
add_target(sparc64-linux-gnu)
add_target(s390x-linux-gnu)
add_target(m68k-linux-gnu)
add_target(sh4-linux-gnu)
add_target(alpha-linux-gnu)
if(MOLD_ENABLE_QEMU_TESTS_RV32)
add_target(riscv32-linux-gnu)
endif()
if(MOLD_ENABLE_QEMU_TESTS_POWER10)
add_target(powerpc64le_power10-linux-gnu)
endif()

View file

@ -0,0 +1,30 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
[ $MACHINE = aarch64 ] || skip
cat <<EOF | $CC -c -o $t/a.o -fPIC -xc -
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/fmt/fmt.h"
#include "libc/stdio/dprintf.h"
#include "libc/stdio/stdio.h"
#include "libc/stdio/temp.h"
#include "third_party/musl/tempnam.h"
void fn1();
void fn2();
__attribute__((section(".low"))) void fn1() { fn2(); }
__attribute__((section(".high"))) void fn2() { fn1(); }
int main() {
fn1();
}
EOF
$CC -B. -o $t/exe $t/a.o \
-Wl,--section-start=.low=0x10000000,--section-start=.high=0x20000000
$OBJDUMP -dr $t/exe | grep -Fq '<fn1$thunk>:'

29
third_party/mold/test/elf/abs-error.sh vendored Executable file
View file

@ -0,0 +1,29 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
[ $MACHINE = aarch64 ] && skip
[ $MACHINE = ppc64 ] && skip
[ $MACHINE = ppc64le ] && skip
[ $MACHINE = s390x ] && skip
[ $MACHINE = alpha ] && skip
cat <<EOF | $CC -fPIC -c -o $t/a.o -xassembler -
.globl foo
foo = 3;
EOF
cat <<EOF | $CC -fno-PIC -c -o $t/b.o -xc -
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/fmt/fmt.h"
#include "libc/stdio/dprintf.h"
#include "libc/stdio/stdio.h"
#include "libc/stdio/temp.h"
#include "third_party/musl/tempnam.h"
extern char foo;
int main() { printf("foo=%p\n", &foo); }
EOF
! $CC -B. -o $t/exe -pie $t/a.o $t/b.o -Wl,-z,text >& $t/log
grep -q 'recompile with -fPIC' $t/log

67
third_party/mold/test/elf/absolute-symbols.sh vendored Executable file
View file

@ -0,0 +1,67 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
# This test crashes only on qemu-sparc64 running on GitHub Actions,
# even though it works on a local x86-64 machine and on an actual
# SPARC machine.
[ $MACHINE = sparc64 ] && skip
cat <<EOF | $CC -o $t/a.o -c -x assembler -
.globl foo
foo = 0x800008
EOF
cat <<EOF | $CC -o $t/b.o -c -fno-PIC -xc -
#define _GNU_SOURCE 1
#include "libc/calls/calls.h"
#include "libc/calls/sigtimedwait.h"
#include "libc/calls/struct/sigaction.h"
#include "libc/calls/struct/siginfo.h"
#include "libc/sysv/consts/sa.h"
#include "libc/sysv/consts/sicode.h"
#include "libc/sysv/consts/ss.h"
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/fmt/fmt.h"
#include "libc/stdio/dprintf.h"
#include "libc/stdio/stdio.h"
#include "libc/stdio/temp.h"
#include "third_party/musl/tempnam.h"
#include "libc/calls/calls.h"
#include "libc/calls/termios.h"
#include "libc/fmt/conv.h"
#include "libc/limits.h"
#include "libc/mem/alg.h"
#include "libc/mem/alloca.h"
#include "libc/mem/mem.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/dprintf.h"
#include "libc/stdio/rand.h"
#include "libc/stdio/temp.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/exit.h"
#include "third_party/getopt/getopt.h"
#include "third_party/musl/crypt.h"
#include "third_party/musl/rand48.h"
// MISSING #include <ucontext.h>
void handler(int signum, siginfo_t *info, void *ptr) {
printf("ip=%p\n", info->si_addr);
exit(0);
}
extern volatile int foo;
int main() {
struct sigaction act;
act.sa_flags = SA_SIGINFO | SA_RESETHAND;
act.sa_sigaction = handler;
sigemptyset(&act.sa_mask);
sigaction(SIGSEGV, &act, 0);
foo = 5;
}
EOF
$CC -B. -o $t/exe -no-pie $t/a.o $t/b.o
$QEMU $t/exe | grep -q '^ip=0x80000.$'

View file

@ -0,0 +1,10 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
echo 'int main() { return 0; }' | $CC -c -o $t/a.o -xc -
echo 'int main() { return 1; }' | $CC -c -o $t/b.o -xc -
! $CC -B. -o $t/exe $t/a.o $t/b.o 2> /dev/null || false
$CC -B. -o $t/exe $t/a.o $t/b.o -Wl,-allow-multiple-definition
$CC -B. -o $t/exe $t/a.o $t/b.o -Wl,-z,muldefs

35
third_party/mold/test/elf/ar-alignment.sh vendored Executable file
View file

@ -0,0 +1,35 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
cat <<EOF | $CC -o $t/a.o -c -xc -
int two() { return 2; }
EOF
head -c 1 /dev/zero >> $t/a.o
cat <<EOF | $CC -o $t/b.o -c -xc -
int three() { return 3; }
EOF
cat <<EOF | $CC -o $t/c.o -c -xc -
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/fmt/fmt.h"
#include "libc/stdio/dprintf.h"
#include "libc/stdio/stdio.h"
#include "libc/stdio/temp.h"
#include "third_party/musl/tempnam.h"
int two();
int three();
int main() {
printf("%d\n", two() + three());
}
EOF
rm -f $t/d.a
ar rcs $t/d.a $t/a.o $t/b.o
$CC -B. -o $t/exe $t/c.o $t/d.a

View file

@ -0,0 +1,34 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
[ $MACHINE = arm ] || skip
cat <<EOF | $CC -c -o $t/a.o -fPIC -xc -
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/fmt/fmt.h"
#include "libc/stdio/dprintf.h"
#include "libc/stdio/stdio.h"
#include "libc/stdio/temp.h"
#include "third_party/musl/tempnam.h"
void fn1();
void fn2();
__attribute__((section(".low"))) void fn1() { fn2(); }
__attribute__((section(".high"))) void fn2() { fn1(); }
int main() {
fn1();
}
EOF
$CC -B. -o $t/exe $t/a.o \
-Wl,--section-start=.low=0x10000000,--section-start=.high=0x20000000
$OBJDUMP -dr $t/exe | grep -F -A7 '<fn1$thunk>:' > $t/log
grep -Eq 'mov\s+ip, pc' $t/log
grep -Eq 'bx\s+ip' $t/log
grep -Eq 'add\s+ip, ip, pc' $t/log

View file

@ -0,0 +1,60 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
[ $MACHINE = arm ] || skip
echo 'int main() {}' | $CC -c -o /dev/null -xc - -O0 -mthumb >& /dev/null \
|| skip
cat <<EOF > $t/a.c
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/fmt/fmt.h"
#include "libc/stdio/dprintf.h"
#include "libc/stdio/stdio.h"
#include "libc/stdio/temp.h"
#include "third_party/musl/tempnam.h"
void fn3();
void fn4();
__attribute__((section(".low"))) void fn1() { printf(" fn1"); fn3(); }
__attribute__((section(".low"))) void fn2() { printf(" fn2"); fn4(); }
int main() {
printf(" main");
fn1();
printf("\n");
}
EOF
cat <<EOF > $t/b.c
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/fmt/fmt.h"
#include "libc/stdio/dprintf.h"
#include "libc/stdio/stdio.h"
#include "libc/stdio/temp.h"
#include "third_party/musl/tempnam.h"
void fn1();
void fn2();
__attribute__((section(".high"))) void fn3() { printf(" fn3"); fn2(); }
__attribute__((section(".high"))) void fn4() { printf(" fn4"); }
EOF
$CC -c -o $t/c.o $t/a.c -O0 -mthumb
$CC -c -o $t/d.o $t/b.c -O0 -marm
$CC -B. -o $t/exe $t/c.o $t/d.o \
-Wl,--section-start=.low=0x10000000,--section-start=.high=0x20000000
$QEMU $t/exe | grep -q 'main fn1 fn3 fn2 fn4'
$CC -c -o $t/e.o $t/a.c -O2 -mthumb
$CC -c -o $t/f.o $t/b.c -O2 -marm
$CC -B. -o $t/exe $t/e.o $t/f.o \
-Wl,--section-start=.low=0x10000000,--section-start=.high=0x20000000
$QEMU $t/exe | grep -q 'main fn1 fn3 fn2 fn4'

View file

@ -0,0 +1,45 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
[[ $MACHINE == arm* ]] || skip
echo 'int foo() { return 0; }' | $CC -o /dev/null -c -xc - -mthumb 2> /dev/null || skip
cat <<EOF | $CC -o $t/a.o -c -xc - -mthumb
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/fmt/fmt.h"
#include "libc/stdio/dprintf.h"
#include "libc/stdio/stdio.h"
#include "libc/stdio/temp.h"
#include "third_party/musl/tempnam.h"
int bar();
int foo() {
printf(" foo");
bar();
}
EOF
cat <<EOF | $CC -o $t/b.o -c -xc - -marm
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/fmt/fmt.h"
#include "libc/stdio/dprintf.h"
#include "libc/stdio/stdio.h"
#include "libc/stdio/temp.h"
#include "third_party/musl/tempnam.h"
int bar() {
printf(" bar\n");
}
int foo();
int main() {
printf("main");
foo();
}
EOF
$CC -B. -o $t/exe $t/a.o $t/b.o
$QEMU $t/exe | grep -q 'main foo bar'

72
third_party/mold/test/elf/arm_tlsdesc.sh vendored Executable file
View file

@ -0,0 +1,72 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
[ $MACHINE = arm ] || skip
echo 'int main() {}' | $GCC -c -o /dev/null -xc - -O0 -mthumb >& /dev/null \
|| skip
cat <<EOF > $t/a.c
extern _Thread_local int foo;
__attribute__((section(".low")))
int get_foo() {
int y = foo;
return y;
}
static _Thread_local int bar = 5;
__attribute__((section(".high")))
int get_bar() {
return bar;
}
EOF
cat <<EOF > $t/b.c
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/fmt/fmt.h"
#include "libc/stdio/dprintf.h"
#include "libc/stdio/stdio.h"
#include "libc/stdio/temp.h"
#include "third_party/musl/tempnam.h"
_Thread_local int foo;
int get_foo();
int get_bar();
int main() {
foo = 42;
printf("%d %d\n", get_foo(), get_bar());
return 0;
}
EOF
$GCC -fPIC -mtls-dialect=gnu2 -c -o $t/c.o $t/a.c -marm
$GCC -fPIC -mtls-dialect=gnu2 -c -o $t/d.o $t/b.c -marm
$CC -B. -o $t/exe1 $t/c.o $t/d.o
$QEMU $t/exe1 | grep -q '42 5'
$CC -B. -o $t/exe2 $t/c.o $t/d.o -Wl,-no-relax
$QEMU $t/exe2 | grep -q '42 5'
$CC -B. -o $t/exe3 $t/c.o $t/d.o -Wl,-no-relax \
-Wl,--section-start=.low=0x10000000,--section-start=.high=0x20000000
$QEMU $t/exe3 | grep -q '42 5'
$GCC -fPIC -mtls-dialect=gnu2 -c -o $t/e.o $t/a.c -mthumb
$GCC -fPIC -mtls-dialect=gnu2 -c -o $t/f.o $t/b.c -mthumb
$CC -B. -o $t/exe4 $t/e.o $t/f.o
$QEMU $t/exe4 | grep -q '42 5'
$CC -B. -o $t/exe5 $t/e.o $t/f.o -Wl,-no-relax
$QEMU $t/exe5 | grep -q '42 5'
$CC -B. -o $t/exe6 $t/e.o $t/f.o -Wl,-no-relax \
-Wl,--section-start=.low=0x10000000,--section-start=.high=0x20000000
$QEMU $t/exe6 | grep -q '42 5'

25
third_party/mold/test/elf/as-needed-dso.sh vendored Executable file
View file

@ -0,0 +1,25 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
cat <<EOF | $CC -o $t/libfoo.so -shared -fPIC -Wl,-soname,libfoo.so -xc -
int fn1() { return 42; }
EOF
cat <<EOF | $CC -o $t/libbar.so -shared -fPIC -Wl,-soname,libbar.so -xc -
int fn1();
int fn2() { return fn1(); }
EOF
cat <<EOF | $CC -o $t/a.o -c -xc -
int fn2();
int main() { fn2(); }
EOF
$CC -B. -o $t/exe1 $t/a.o -L$t -Wl,--as-needed -lbar -Wl,--allow-shlib-undefined
readelf -W --dynamic $t/exe1 > $t/log1
! grep -q libfoo $t/log1 || false
$CC -B. -o $t/exe2 $t/a.o -L$t -Wl,--as-needed -lbar -lfoo
readelf -W --dynamic $t/exe2 > $t/log2
grep -q libfoo $t/log2

32
third_party/mold/test/elf/as-needed-weak.sh vendored Executable file
View file

@ -0,0 +1,32 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
cat <<EOF | $CC -fPIC -o $t/a.o -c -xc -
__attribute__((weak)) int fn1();
int main() {
if (fn1)
fn1();
}
EOF
cat <<EOF | $CC -o $t/libfoo.so -shared -fPIC -Wl,-soname,libfoo.so -xc -
int fn1() { return 42; }
EOF
cat <<EOF | $CC -o $t/libbar.so -shared -fPIC -Wl,-soname,libbar.so -xc -
int fn2() { return 42; }
EOF
$CC -o $t/exe1 $t/a.o -Wl,-no-as-needed -L$t -lbar -lfoo
readelf --dynamic $t/exe1 > $t/log1
grep -Fq 'Shared library: [libfoo.so]' $t/log1
grep -Fq 'Shared library: [libbar.so]' $t/log1
$CC -o $t/exe2 $t/a.o -Wl,-as-needed -L$t -lbar -lfoo
readelf --dynamic $t/exe2 > $t/log2
! grep -Fq 'Shared library: [libfoo.so]' $t/log2 || false
! grep -Fq 'Shared library: [libbar.so]' $t/log2 || false

30
third_party/mold/test/elf/as-needed.sh vendored Executable file
View file

@ -0,0 +1,30 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
cat <<EOF | $CC -o $t/a.o -c -xc -
void fn1();
int main() {
fn1();
}
EOF
cat <<EOF | $CC -o $t/b.so -shared -fPIC -Wl,-soname,libfoo.so -xc -
int fn1() { return 42; }
EOF
cat <<EOF | $CC -o $t/c.so -shared -fPIC -Wl,-soname,libbar.so -xc -
int fn2() { return 42; }
EOF
$CC -B. -o $t/exe $t/a.o -Wl,--no-as-needed $t/b.so $t/c.so
readelf --dynamic $t/exe > $t/readelf
grep -Fq 'Shared library: [libfoo.so]' $t/readelf
grep -Fq 'Shared library: [libbar.so]' $t/readelf
$CC -B. -o $t/exe $t/a.o -Wl,--as-needed $t/b.so $t/c.so
readelf --dynamic $t/exe > $t/readelf
grep -Fq 'Shared library: [libfoo.so]' $t/readelf
! grep -Fq 'Shared library: [libbar.so]' $t/readelf || false

39
third_party/mold/test/elf/as-needed2.sh vendored Executable file
View file

@ -0,0 +1,39 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
cat <<EOF | $CC -shared -fPIC -o $t/libfoo.so -Wl,--soname,libfoo.so -xc -
int foo() { return 3; }
EOF
cat <<EOF | $CC -shared -fPIC -o $t/libbar.so -Wl,--soname,libbar.so -xc -
int bar() { return 3; }
EOF
cat <<EOF | $CC -fPIC -c -o $t/a.o -xc -
int foo();
int baz() { return foo(); }
EOF
$CC -B. -shared -o $t/libbaz.so -Wl,--soname,libbaz.so -L$t $t/a.o -lfoo
cat <<EOF | $CC -c -o $t/b.o -xc -
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/fmt/fmt.h"
#include "libc/stdio/dprintf.h"
#include "libc/stdio/stdio.h"
#include "libc/stdio/temp.h"
#include "third_party/musl/tempnam.h"
int baz();
int main() {
printf("%d\n", baz());
}
EOF
$CC -B. -o $t/exe $t/b.o -L$t -Wl,--as-needed -lbaz -lbar -lfoo
readelf --dynamic $t/exe > $t/log
grep -q libbaz $t/log || false
! grep -q libbar $t/log || false
grep -q libfoo $t/log || false

16
third_party/mold/test/elf/auxiliary.sh vendored Executable file
View file

@ -0,0 +1,16 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
cat <<EOF | $CC -o $t/a.o -c -x assembler -
.text
.globl _start
_start:
nop
EOF
./mold -o $t/b.so $t/a.o -auxiliary foo -f bar -shared
readelf --dynamic $t/b.so > $t/log
grep -Fq 'Auxiliary library: [foo]' $t/log
grep -Fq 'Auxiliary library: [bar]' $t/log

43
third_party/mold/test/elf/bno-symbolic.sh vendored Executable file
View file

@ -0,0 +1,43 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
# GCC produces buggy code for this test case on s390x.
# https://sourceware.org/bugzilla/show_bug.cgi?id=29655
[ $MACHINE = s390x ] && $CC -v 2>&1 | grep -E '^gcc version 1[0-3]\.' && skip
cat <<EOF | $CC -c -fPIC -o$t/a.o -xc -
int foo = 4;
int get_foo() {
return foo;
}
void *bar() {
return bar;
}
EOF
$CC -B. -shared -fPIC -o $t/b.so $t/a.o -Wl,-Bsymbolic -Wl,-Bno-symbolic
cat <<EOF | $CC -c -o $t/c.o -xc - -fno-PIE
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/fmt/fmt.h"
#include "libc/stdio/dprintf.h"
#include "libc/stdio/stdio.h"
#include "libc/stdio/temp.h"
#include "third_party/musl/tempnam.h"
extern int foo;
int get_foo();
void *bar();
int main() {
foo = 3;
printf("%d %d %d\n", foo, get_foo(), bar == bar());
}
EOF
$CC -B. -no-pie -o $t/exe $t/c.o $t/b.so
$QEMU $t/exe | grep -q '3 3 1'

View file

@ -0,0 +1,34 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
cat <<EOF | $CC -c -o $t/a.o -fPIC -xc -
int foo = 4;
int get_foo() { return foo; }
void *bar() { return bar; }
EOF
$CC -B. -shared -o $t/b.so $t/a.o -Wl,-Bsymbolic-functions
cat <<EOF | $CC -c -o $t/c.o -xc - -fno-PIE
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/fmt/fmt.h"
#include "libc/stdio/dprintf.h"
#include "libc/stdio/stdio.h"
#include "libc/stdio/temp.h"
#include "third_party/musl/tempnam.h"
int foo = 3;
int x = 5;
int get_foo();
void *bar() { return &x; }
int main() {
printf("%d %d %d\n", foo, get_foo(), bar == bar());
}
EOF
$CC -B. -no-pie -o $t/exe $t/c.o $t/b.so
$QEMU $t/exe | grep -q '3 3 0'

30
third_party/mold/test/elf/bsymbolic.sh vendored Executable file
View file

@ -0,0 +1,30 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
cat <<EOF | $CC -c -fPIC -o$t/a.o -xc -
int foo = 4;
int get_foo() { return foo; }
EOF
$CC -B. -shared -fPIC -o $t/b.so $t/a.o -Wl,-Bsymbolic
cat <<EOF | $CC -c -o $t/c.o -xc - -fno-PIE
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/fmt/fmt.h"
#include "libc/stdio/dprintf.h"
#include "libc/stdio/stdio.h"
#include "libc/stdio/temp.h"
#include "third_party/musl/tempnam.h"
int foo = 3;
int get_foo();
int main() {
printf("%d %d\n", foo, get_foo());
}
EOF
$CC -B. -no-pie -o $t/exe $t/c.o $t/b.so
$QEMU $t/exe | grep -q '3 4'

17
third_party/mold/test/elf/bug178.sh vendored Executable file
View file

@ -0,0 +1,17 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
# Verify that mold does not crash if no object file is included
# in the output. The resulting executable doesn't contain any
# meaningful code or data, so this is an edge case, though.
cat <<EOF | $CC -x assembler -c -o $t/a.o -
.globl foo
foo:
EOF
rm -f $t/a.a
ar rcs $t/a.a $t/a.o
./mold -o $t/exe $t/a.a

24
third_party/mold/test/elf/build-id.sh vendored Executable file
View file

@ -0,0 +1,24 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
echo 'int main() { return 0; }' > $t/a.c
$CC -B. -o $t/exe $t/a.c -Wl,-build-id
readelf -n $t/exe | grep -qv 'GNU.*0x00000010.*NT_GNU_BUILD_ID'
$CC -B. -o $t/exe $t/a.c -Wl,-build-id=uuid
readelf -nW $t/exe |
grep -Eq 'GNU.*0x00000010.*NT_GNU_BUILD_ID.*Build ID: ............4...[89abcdef]'
$CC -B. -o $t/exe $t/a.c -Wl,-build-id=md5
readelf -n $t/exe | grep -q 'GNU.*0x00000010.*NT_GNU_BUILD_ID'
$CC -B. -o $t/exe $t/a.c -Wl,-build-id=sha1
readelf -n $t/exe | grep -q 'GNU.*0x00000014.*NT_GNU_BUILD_ID'
$CC -B. -o $t/exe $t/a.c -Wl,-build-id=sha256
readelf -n $t/exe | grep -q 'GNU.*0x00000020.*NT_GNU_BUILD_ID'
$CC -B. -o $t/exe $t/a.c -Wl,-build-id=0xdeadbeefdeadbeef
readelf -n $t/exe | grep -q 'Build ID: deadbeefdeadbeef'

46
third_party/mold/test/elf/canonical-plt.sh vendored Executable file
View file

@ -0,0 +1,46 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
# GCC produces buggy code for this test case on s390x.
# https://sourceware.org/bugzilla/show_bug.cgi?id=29655
[ $MACHINE = s390x ] && $CC -v 2>&1 | grep -E '^gcc version 1[0-3]\.' && skip
cat <<EOF | $CC -o $t/a.so -fPIC -shared -xc -
void *foo() {
return foo;
}
void *bar() {
return bar;
}
EOF
cat <<EOF | $CC -o $t/b.o -c -xc - -fPIC
void *bar();
void *baz() {
return bar;
}
EOF
cat <<EOF | $CC -o $t/c.o -c -xc - -fno-PIC
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/fmt/fmt.h"
#include "libc/stdio/dprintf.h"
#include "libc/stdio/stdio.h"
#include "libc/stdio/temp.h"
#include "third_party/musl/tempnam.h"
void *foo();
void *bar();
void *baz();
int main() {
printf("%d %d %d\n", foo == foo(), bar == bar(), bar == baz());
}
EOF
$CC -B. -no-pie -o $t/exe $t/a.so $t/b.o $t/c.o
$QEMU $t/exe | grep -q '^1 1 1$'

8
third_party/mold/test/elf/cmdline.sh vendored Executable file
View file

@ -0,0 +1,8 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
{ ./mold -zfoo || true; } 2>&1 | grep -q 'unknown command line option: -zfoo'
{ ./mold -z foo || true; } 2>&1 | grep -q 'unknown command line option: -z foo'
{ ./mold -abcdefg || true; } 2>&1 | grep -q 'unknown command line option: -abcdefg'
{ ./mold --abcdefg || true; } 2>&1 | grep -q 'unknown command line option: --abcdefg'

View file

@ -0,0 +1,20 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
cat <<EOF | $CC -o $t/a.o -c -xc -
int foo();
int main() { foo(); }
EOF
! ./mold -o $t/exe $t/a.o --color-diagnostics 2> $t/log
! grep -q $'\033' $t/log || false
! ./mold -o $t/exe $t/a.o --color-diagnostics=always 2> $t/log
grep -q $'\033' $t/log
! ./mold -o $t/exe $t/a.o --color-diagnostics=never 2> $t/log
! grep -q $'\033' $t/log || false
! ./mold -o $t/exe $t/a.o --color-diagnostics=auto 2> $t/log
! grep -q $'\033' $t/log || false

11
third_party/mold/test/elf/comment.sh vendored Executable file
View file

@ -0,0 +1,11 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
cat <<EOF | $CC -c -o $t/a.o -xc -
int main() {}
EOF
$CC -B. -o $t/exe $t/a.o
readelf -p .comment $t/exe | grep -q '[ms]old'
readelf -SW $t/exe | grep -Eq '\.comment.*\bMS\b'

53
third_party/mold/test/elf/common-archive.sh vendored Executable file
View file

@ -0,0 +1,53 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
cat <<EOF | $CC -fcommon -xc -c -o $t/a.o -
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/fmt/fmt.h"
#include "libc/stdio/dprintf.h"
#include "libc/stdio/stdio.h"
#include "libc/stdio/temp.h"
#include "third_party/musl/tempnam.h"
int foo;
int bar;
extern int baz;
__attribute__((weak)) int two();
int main() {
printf("%d %d %d %d\n", foo, bar, baz, two ? two() : -1);
}
EOF
cat <<EOF | $CC -fcommon -xc -c -o $t/b.o -
int foo = 5;
EOF
cat <<EOF | $CC -fcommon -xc -c -o $t/c.o -
int bar;
int two() { return 2; }
EOF
cat <<EOF | $CC -fcommon -xc -c -o $t/d.o -
int baz;
EOF
rm -f $t/e.a
ar rcs $t/e.a $t/b.o $t/c.o $t/d.o
$CC -B. -o $t/exe $t/a.o $t/e.a
$QEMU $t/exe | grep -q '5 0 0 -1'
cat <<EOF | $CC -fcommon -xc -c -o $t/f.o -
int bar = 0;
int baz = 7;
int two() { return 2; }
EOF
rm -f $t/f.a
ar rcs $t/f.a $t/b.o $t/f.o
$CC -B. -o $t/exe $t/a.o $t/f.a
$QEMU $t/exe | grep -q '5 0 7 2'

38
third_party/mold/test/elf/common-ref.sh vendored Executable file
View file

@ -0,0 +1,38 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
cat <<EOF | $CC -fcommon -xc -c -o $t/a.o -
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/fmt/fmt.h"
#include "libc/stdio/dprintf.h"
#include "libc/stdio/stdio.h"
#include "libc/stdio/temp.h"
#include "third_party/musl/tempnam.h"
int bar;
int main() {
printf("%d\n", bar);
}
EOF
cat <<EOF | $CC -fcommon -xc -c -o $t/b.o -
int foo;
EOF
rm -f $t/c.a
ar rcs $t/c.a $t/b.o
cat <<EOF | $CC -fcommon -xc -c -o $t/d.o -
int foo;
int bar = 5;
int get_foo() { return foo; }
EOF
rm -f $t/e.a
ar rcs $t/e.a $t/d.o
$CC -B. -o $t/exe $t/a.o $t/c.a $t/e.a
$QEMU $t/exe | grep -q 5

92
third_party/mold/test/elf/common.inc vendored Normal file
View file

@ -0,0 +1,92 @@
// clang-format off
# -*- mode: sh -*-
# Make sure all commands print out messages in English
export LC_ALL=C
canonical_name() {
case $1 in
i?86) echo i386 ;;
arm*) echo arm ;;
powerpc) echo ppc ;;
powerpc64) echo ppc64 ;;
powerpc64le) echo ppc64le ;;
*) echo $1
esac
}
HOST=$(canonical_name $(uname -m))
# Set tool names
if [ "$TRIPLE" = "" ]; then
MACHINE=$HOST
TESTDIR=out/test/elf/$HOST
CC="${TEST_CC:-cc}"
CXX="${TEST_CXX:-c++}"
GCC="${TEST_GCC:-gcc}"
GXX="${TEST_GXX:-g++}"
OBJDUMP=objdump
OBJCOPY=objcopy
STRIP=strip
QEMU=
elif [ "$TRIPLE" = powerpc64le_power10-linux-gnu ]; then
TRIPLE=powerpc64le-linux-gnu
MACHINE=ppc64le
TESTDIR=out/test/elf/ppc64le-power10
CC="${TEST_CC:-$TRIPLE-gcc} -mcpu=power10"
CXX="${TEST_CXX:-$TRIPLE-g++} -mcpu=power10"
GCC="${TEST_GCC:-$TRIPLE-gcc} -mcpu=power10"
GXX="${TEST_GXX:-$TRIPLE-g++} -mcpu=power10"
OBJDUMP="$TRIPLE-objdump"
OBJCOPY="$TRIPLE-objcopy"
STRIP="$TRIPLE-strip"
QEMU="qemu-ppc64le -L /usr/$TRIPLE -cpu power10"
else
MACHINE=$(canonical_name $(echo $TRIPLE | sed 's/-.*//'))
TESTDIR=out/test/elf/$MACHINE
CC="${TEST_CC:-$TRIPLE-gcc}"
CXX="${TEST_CXX:-$TRIPLE-g++}"
GCC="${TEST_GCC:-$TRIPLE-gcc}"
GXX="${TEST_GXX:-$TRIPLE-g++}"
OBJDUMP="$TRIPLE-objdump"
OBJCOPY="$TRIPLE-objcopy"
STRIP="$TRIPLE-strip"
QEMU="qemu-$MACHINE -L /usr/$TRIPLE"
fi
# Common functions
test_cflags() {
echo 'int main() {}' | $CC "$@" -o /dev/null -xc - >& /dev/null
}
supports_ifunc() {
echo 'void x() __attribute__((ifunc("y"))); void *y() { return 0; }' | \
$CC -c -o /dev/null -xc - >& /dev/null
}
skip() {
echo skipped
trap - EXIT
exit 0
}
on_error() {
code=$?
echo "command failed: $1: $BASH_COMMAND"
trap - EXIT
exit $code
}
on_exit() {
echo OK
exit 0
}
trap 'on_error $LINENO' ERR
trap on_exit EXIT
# Print out the startup message
testname=$(basename "$0" .sh)
echo -n "Testing $testname ... "
t=$TESTDIR/$testname
mkdir -p $t

33
third_party/mold/test/elf/common.sh vendored Executable file
View file

@ -0,0 +1,33 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
cat <<EOF | $CC -fcommon -xc -c -o $t/a.o -
int foo;
int bar;
int baz = 42;
EOF
cat <<EOF | $CC -fcommon -xc -c -o $t/b.o -
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/fmt/fmt.h"
#include "libc/stdio/dprintf.h"
#include "libc/stdio/stdio.h"
#include "libc/stdio/temp.h"
#include "third_party/musl/tempnam.h"
int foo;
int bar = 5;
int baz;
int main() {
printf("%d %d %d\n", foo, bar, baz);
}
EOF
$CC -B. -o $t/exe $t/a.o $t/b.o
$QEMU $t/exe | grep -q '0 5 42'
readelf --sections $t/exe > $t/log
grep -q '.common .*NOBITS' $t/log

View file

@ -0,0 +1,29 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
# arm-linux-gnueabihf-objcopy crashes on x86-64
[ $MACHINE = arm ] && skip
[ $MACHINE = riscv32 ] && skip
command -v zstdcat >& /dev/null || skip
cat <<EOF | $CC -c -g -o $t/a.o -xc -
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/fmt/fmt.h"
#include "libc/stdio/dprintf.h"
#include "libc/stdio/stdio.h"
#include "libc/stdio/temp.h"
#include "third_party/musl/tempnam.h"
int main() {
printf("Hello world\n");
return 0;
}
EOF
$CC -B. -o $t/exe $t/a.o -Wl,--compress-debug-sections=zstd
$OBJCOPY --dump-section .debug_info=$t/debug_info $t/exe
dd if=$t/debug_info of=$t/debug_info.zstd bs=24 skip=1 status=none
zstdcat $t/debug_info.zstd > /dev/null

View file

@ -0,0 +1,25 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
command -v dwarfdump >& /dev/null || skip
cat <<EOF | $CC -c -g -o $t/a.o -xc -
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/fmt/fmt.h"
#include "libc/stdio/dprintf.h"
#include "libc/stdio/stdio.h"
#include "libc/stdio/temp.h"
#include "third_party/musl/tempnam.h"
int main() {
printf("Hello world\n");
return 0;
}
EOF
$CC -B. -o $t/exe $t/a.o -Wl,--compress-debug-sections=zlib
dwarfdump $t/exe > $t/log
grep -Fq '.debug_info SHF_COMPRESSED' $t/log
grep -Fq '.debug_str SHF_COMPRESSED' $t/log

View file

@ -0,0 +1,21 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
command -v dwarfdump >& /dev/null || skip
cat <<EOF | $CXX -c -o $t/a.o -g -gz=zlib -xc++ -
int main() {
return 0;
}
EOF
cat <<EOF | $CXX -c -o $t/b.o -g -gz=zlib -xc++ -
int foo() {
return 0;
}
EOF
$CC -B. -o $t/exe $t/a.o $t/b.o
dwarfdump $t/exe > /dev/null
readelf --sections $t/exe | grep -Fq .debug_info

View file

@ -0,0 +1,43 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
[ $MACHINE = ppc64 ] && skip
[ $MACHINE = ppc64le ] && skip
[ $MACHINE = alpha ] && skip
cat <<EOF | $CC -fPIC -shared -o $t/a.so -xc -
__attribute__((aligned(32))) int foo = 5;
EOF
cat <<EOF | $CC -fPIC -shared -o $t/b.so -xc -
__attribute__((aligned(8))) int foo = 5;
EOF
cat <<EOF | $CC -fPIC -shared -o $t/c.so -xc -
__attribute__((aligned(256))) int foo = 5;
EOF
cat <<EOF | $CC -fno-PIE -o $t/d.o -c -xc -
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/fmt/fmt.h"
#include "libc/stdio/dprintf.h"
#include "libc/stdio/stdio.h"
#include "libc/stdio/temp.h"
#include "third_party/musl/tempnam.h"
extern int foo;
int main() { printf("%d %p\n", foo, &foo); }
EOF
$CC -B. -o $t/exe1 $t/d.o $t/a.so -no-pie
$QEMU $t/exe1 > /dev/null
readelf -W --sections $t/exe1 | grep -q '\.copyrel.* 32$'
$CC -B. -o $t/exe2 $t/d.o $t/b.so -no-pie
$QEMU $t/exe2 > /dev/null
readelf -W --sections $t/exe2 | grep -q '\.copyrel.* 8$'
$CC -B. -o $t/exe3 $t/d.o $t/c.so -no-pie
$QEMU $t/exe3 > /dev/null
readelf -W --sections $t/exe3 | grep -q '\.copyrel.* 256$'

View file

@ -0,0 +1,22 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
[ $MACHINE = ppc64 ] && skip
[ $MACHINE = ppc64le ] && skip
[ $MACHINE = alpha ] && skip
cat <<EOF | $CC -o $t/a.o -c -xc -fno-PIE -
extern int foo;
int main() {
return foo;
}
EOF
cat <<EOF | $CC -shared -o $t/b.so -xc -
__attribute__((visibility("protected"))) int foo;
EOF
! $CC -B. $t/a.o $t/b.so -o $t/exe >& $t/log -no-pie || false
grep -Fq 'cannot make copy relocation for protected symbol' $t/log

53
third_party/mold/test/elf/copyrel-relro.sh vendored Executable file
View file

@ -0,0 +1,53 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
cat <<EOF | $CC -o $t/a.o -c -xc -fno-PIE -
#include "libc/runtime/runtime.h"
#include "libc/calls/calls.h"
#include "libc/calls/sigtimedwait.h"
#include "libc/calls/struct/sigaction.h"
#include "libc/calls/struct/siginfo.h"
#include "libc/sysv/consts/sa.h"
#include "libc/sysv/consts/sicode.h"
#include "libc/sysv/consts/ss.h"
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/fmt/fmt.h"
#include "libc/stdio/dprintf.h"
#include "libc/stdio/stdio.h"
#include "libc/stdio/temp.h"
#include "third_party/musl/tempnam.h"
extern const char readonly[100];
extern char readwrite[100];
static int segv = 0;
static jmp_buf buf;
void handler(int sig) {
segv = 1;
longjmp(buf, 1);
}
int main() {
signal(SIGSEGV, handler);
readwrite[0] = 5;
int x = segv;
if (setjmp(buf) == 0)
*(char *)readonly = 5;
int y = segv;
printf("sigsegv %d %d\n", x, y);
}
EOF
cat <<EOF | $CC -fPIC -shared -o $t/b.so -xc -
const char readonly[100] = "abc";
char readwrite[100] = "abc";
EOF
$CC -B. $t/a.o $t/b.so -o $t/exe -no-pie
$QEMU $t/exe | grep -q '^sigsegv 0 1$'

36
third_party/mold/test/elf/copyrel.sh vendored Executable file
View file

@ -0,0 +1,36 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
cat <<EOF | $CC -fno-PIC -o $t/a.o -c -xc -
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/fmt/fmt.h"
#include "libc/stdio/dprintf.h"
#include "libc/stdio/stdio.h"
#include "libc/stdio/temp.h"
#include "third_party/musl/tempnam.h"
extern int foo;
extern int *get_bar();
int main() {
printf("%d %d %d\n", foo, *get_bar(), &foo == get_bar());
return 0;
}
EOF
cat <<EOF | $CC -fno-PIC -o $t/b.o -c -xc -
extern int bar;
int *get_bar() { return &bar; }
EOF
cat <<EOF | $CC -fPIC -o $t/c.o -c -xc -
int foo = 42;
extern int bar __attribute__((alias("foo")));
extern int baz __attribute__((alias("foo")));
EOF
$CC -B. -shared -o $t/c.so $t/c.o
$CC -B. -no-pie -o $t/exe $t/a.o $t/b.o $t/c.so
$QEMU $t/exe | grep -q '42 42 1'

View file

@ -0,0 +1,32 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
command -v dwarfdump >& /dev/null || skip
cat <<EOF | $CXX -c -o $t/a.o -g -xc++ -
extern const char *msg;
struct Foo {
Foo() { msg = "Hello world"; }
};
Foo x;
EOF
cat <<EOF | $CXX -c -o $t/b.o -g -xc++ -
extern const char *msg;
struct Foo {
Foo() { msg = "Hello world"; }
};
Foo y;
EOF
cat <<EOF | $CXX -o $t/c.o -c -xc++ -g -
#include "third_party/libcxx/cstdio"
const char *msg;
int main() { printf("%s\n", msg); }
EOF
$CXX -o $t/exe $t/a.o $t/b.o $t/c.o -g
$QEMU $t/exe | grep -q 'Hello world'
dwarfdump $t/exe > /dev/null

View file

@ -0,0 +1,23 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
cat <<EOF > $t/a.h
#define A 23
#define B 99
EOF
cat <<EOF | $GCC -o $t/b.o -c -xc - -I$t -g3
// MISSING #include "a.h"
extern int z();
int main () { return z() - 122; }
EOF
cat <<EOF | $GCC -o $t/c.o -c -xc - -I$t -g3
// MISSING #include "a.h"
int z() { return A + B; }
EOF
$GCC -B. -o $t/exe $t/b.o $t/c.o
$OBJDUMP --dwarf=macro $t/exe > $t/log
! grep 'DW_MACRO_import -.* 0x0$' $t/log || false

14
third_party/mold/test/elf/default-symver.sh vendored Executable file
View file

@ -0,0 +1,14 @@
// clang-format off
#!/bin/bash
. $(dirname $0)/common.inc
cat <<EOF | $CC -o $t/a.o -c -xc -
void foo() {}
EOF
$CC -B. -o $t/b.so -shared $t/a.o -Wl,-default-symver
readelf --dyn-syms $t/b.so | grep -q ' foo@@b\.so$'
$CC -B. -o $t/b.so -shared $t/a.o \
-Wl,--soname=bar -Wl,-default-symver
readelf --dyn-syms $t/b.so | grep -q ' foo@@bar$'

Some files were not shown because too many files have changed in this diff Show more