Rewrite Cosmopolitan Ar

The build/bootstrap/ar.com program is now tinier. This change reduces
its size from 140kb to 53kb. Nothing was traded away. Cosmopolitan Ar
performance is now 2x better than llvm-ar largely thanks to using the
copy_file_range() system call. This change homebrews a new allocation
API that addresses the shortcomings of the C standard library design.
Using these new balloc() and reballoc() functions I managed to reduce
memory consumption so much that Cosmpolitan Ar should now use roughly
100x fewer bytes of peak resident memory compared to llvm-ar. Correct
behavior with better compatibility has been assured. Binary output is
now pretty much bit-identical to llvm-ar, as of this change. This can
and should be the living proof we need to show that a better world is
possible for software.
This commit is contained in:
Justine Tunney 2023-07-02 10:19:16 -07:00
parent 197aa0d465
commit 0c630d95b5
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
27 changed files with 916 additions and 341 deletions

View file

@ -1,27 +1,162 @@
#ifndef COSMOPOLITAN_LIBC_ELF_STRUCT_EHDR_H_
#define COSMOPOLITAN_LIBC_ELF_STRUCT_EHDR_H_
#include "libc/elf/def.h"
#include "libc/elf/scalar.h"
#define EI_NIDENT 16
#if !(__ASSEMBLER__ + __LINKER__ + 0)
/*
* ELF header.
*/
typedef struct Elf64_Ehdr {
unsigned char e_ident[EI_NIDENT];
/*
* Leading bytes of ELF header.
*
* - `e_ident[0]` is always `127`
* - `e_ident[1]` is always `'E'`
* - `e_ident[2]` is always `'L'`
* - `e_ident[3]` is always `'F'`
*
* - `e_ident[EI_CLASS]` is mandatory and should be:
*
* - `ELFCLASSNONE64` if it's an Elf64 image
* - `ELFCLASSNONE32` if it's an Elf32 image
* - Otherwise we assume it's an Elf64 image
*
* - `e_ident[EI_DATA]` is advisory and could be:
*
* - `ELFDATANONE` isn't strictly valid
* - `ELFDATA2LSB` for little-endian
* - `ELFDATA2MSB` for big-endian
*
* - `e_ident[EI_VERSION]` is advisory and should be:
*
* - `EV_NONE` if it's zero or unspecified
* - `EV_CURRENT` for current ELF version (which is 1)
*
* - `e_ident[EI_OSABI]` is mandatory and could be:
*
* - `ELFOSABI_NONE` is zero
* - `ELFOSABI_GNU` is for GNU
* - `ELFOSABI_SYSV` used by GNU
* - `ELFOSABI_LINUX` doesn't care
* - `ELFOSABI_FREEBSD` does care (recommended)
* - `ELFOSABI_NETBSD` doesn't care (see `PT_NOTE`)
* - `ELFOSABI_OPENBSD` doesn't care (see `PT_NOTE`)
*
* - `e_ident[EI_ABIVERSION]` is advisory
*
*/
unsigned char e_ident[16];
/*
* ELF image type.
*
* This field is mandatory and should be one of:
*
* - `ET_REL` for `.o` object files
* - `ET_DYN` for `.so` files and `-pie` executables
* - `ET_EXEC` for statically-linked executables
*
*/
Elf64_Half e_type;
/*
* ELF machine type.
*
* This field is mandatory and could be one of:
*
* - `EM_M32` for Bellmac
* - `EM_X86_64` for Amd64
* - `EM_AARCH64` for Arm64
* - `EM_PPC64` for Raptors
* - `EM_RISCV` for Berkeley
* - `EM_S390` for System/360
*
*/
Elf64_Half e_machine;
/*
* ELF version.
*
* This field is advisory and could be:
*
* - `EV_NONE` if it's zero or unspecified
* - `EV_CURRENT` for current ELF version (which is 1)
*
* @see `e_ident[EI_VERSION]`
*/
Elf64_Word e_version;
/*
* ELF executable entrypoint.
*
* Static executables should use this field to store the virtual
* address of the _start() function. This field may be zero, for
* unspecified.
*/
Elf64_Addr e_entry;
/*
* `Elf64_Phdr` file offset.
*
* This field is mandatory. Object files should set it to zero.
*/
Elf64_Off e_phoff;
/*
* `Elf64_Shdr` file offset.
*
* This field is advisory.
*/
Elf64_Off e_shoff;
/*
* ELF flags.
*
* This field is advisory.
*/
Elf64_Word e_flags;
/*
* `Elf64_Ehdr` size.
*
* This field is advisory and should be 64.
*/
Elf64_Half e_ehsize;
/*
* `Elf64_Phdr` element size.
*
* This field *is* cared about and should be set to 56. Cosmopolitan
* permits larger values for the pleasure of it.
*/
Elf64_Half e_phentsize;
/*
* `Elf64_Phdr` array count.
*/
Elf64_Half e_phnum;
/*
* `Elf64_Shdr` element size.
*
* This field is advisory and should be set to 64. Cosmopolitan
* permits larger values for the pleasure of it.
*/
Elf64_Half e_shentsize;
/*
* `Elf64_Shdr` count.
*
* This field is advisory.
*/
Elf64_Half e_shnum;
/*
* Section header index of section name string table.
*/
Elf64_Half e_shstrndx;
} Elf64_Ehdr;
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */

View file

@ -3,9 +3,51 @@
#include "libc/elf/scalar.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
/*
* ELF relocation.
*
* Relocations let us easily apply fixups to compiled object code. This
* data structure represents the contents of an `sh_type` w/ `SHT_REL`.
*
* @see Elf64_Rela
*/
typedef struct Elf64_Rel {
/*
* Location to be modified.
*
* If `e_type` is `ET_REL` then this is a section data byte offset.
*
* If `e_type` isn't `ET_REL` then this is a virtual address.
*/
Elf64_Addr r_offset;
Elf64_Xword r_info; /** @see ELF64_R_{SYM,SIZE,INFO} */
/*
* Relocation type and symbol.
*
* This value may be created using:
*
* r_info = ELF64_R_INFO(sym, type);
*
* This value may be read using:
*
* Elf64_Word sym = ELF64_R_SYM(r_info);
* Elf64_Word type = ELF64_R_TYPE(r_info);
*
* Where `sym` is a symbol index, and `type` might be:
*
* - `R_X86_64_64`
* - `R_X86_64_PC32`
* - `R_X86_64_GOTPCRELX`
* - `R_AARCH64_ABS64`
*
* Each relocation type specifies a mathematical formula that's used
* to compute the appropriate value for the fixed-up object code. If
* if needs an addend, then this struct doesn't have one, but it can
* still be embedded by the compiler in the location to be modified.
*/
Elf64_Xword r_info;
} Elf64_Rel;
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */

View file

@ -3,15 +3,55 @@
#include "libc/elf/scalar.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
/*
* ELF relocation w/ explicit addend.
*
* Relocations let us easily apply fixups to compiled object code. This
* data structure represents the contents of an `sh_type` w/ `SHT_RELA`
*
* @see Elf64_Rel
*/
typedef struct Elf64_Rela {
/*u64*/ Elf64_Addr r_offset;
/*
* ELF64_R_SYM(r_info) sym
* ELF64_R_TYPE(r_info) R_X86_64_{64,PC32,GOTPCRELX,...}
* ELF64_R_INFO(sym, type) r_info
* Location to be modified.
*
* If `e_type` is `ET_REL` then this is a section data byte offset.
*
* If `e_type` isn't `ET_REL` then this is a virtual address.
*/
/*u64*/ Elf64_Xword r_info; /* ELF64_R_{SYM,SIZE,INFO} */
/*i64*/ Elf64_Sxword r_addend;
Elf64_Addr r_offset;
/*
* Relocation type and symbol.
*
* This value may be created using:
*
* r_info = ELF64_R_INFO(sym, type);
*
* This value may be read using:
*
* Elf64_Word sym = ELF64_R_SYM(r_info);
* Elf64_Word type = ELF64_R_TYPE(r_info);
*
* Where `sym` is a symbol index, and `type` might be:
*
* - `R_X86_64_64`
* - `R_X86_64_PC32`
* - `R_X86_64_GOTPCRELX`
* - `R_AARCH64_ABS64`
*
*/
Elf64_Xword r_info;
/*
* Relocation parameter.
*
* Each relocation type has its own mathematical formula, which should
* incorporate this value in its own unique way.
*/
Elf64_Sxword r_addend;
} Elf64_Rela;
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */

View file

@ -8,24 +8,44 @@
* @see https://docs.oracle.com/cd/E19683-01/816-1386/chapter6-94076/index.html
*/
typedef struct Elf64_Shdr {
Elf64_Word sh_name;
Elf64_Word sh_type; /* SHT_{PROGBITS,NOBITS,STRTAB,SYMTAB,RELA,...} */
Elf64_Word sh_type; /* SHT_{PROGBITS,NOBITS,STRTAB,SYMTAB,RELA,...} */
Elf64_Xword sh_flags; /* SHF_{WRITE,ALLOC,EXECINSTR,MERGE,STRINGS,...} */
Elf64_Addr sh_addr;
Elf64_Off sh_offset;
Elf64_Xword sh_size;
/*
* If SHT_RELA: Index of section of associated symbol table.
* If SHT_SYMTAB: Index of section of associated string table.
* Index of linked section header.
*
* If `sh_type` is `SHT_RELA` then `sh_link` holds the section header
* index of the associated symbol table.
*
* If `sh_type` is `SHT_SYMTAB` then `sh_link` holds the section
* header index of the associated string table.
*/
Elf64_Word sh_link;
/*
* If SHT_RELA: Index of section to which relocations apply.
* If SHT_SYMTAB: One greater than symbol table index of last local symbol.
* If `sh_type` is `SHT_RELA` then `sh_info` contains the index of the
* section to which relocations apply.
*
* If `sh_type` is `SHT_SYMTAB` or `SHT_DYNSYM` then `sh_info`
* contains an index that's one greater than symbol table index of
* last `STB_LOCAL` symbol.
*/
Elf64_Word sh_info;
Elf64_Xword sh_addralign;
Elf64_Xword sh_entsize;
} Elf64_Shdr;
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */

View file

@ -4,18 +4,91 @@
#if !(__ASSEMBLER__ + __LINKER__ + 0)
typedef struct Elf64_Sym {
/*
* Symbol name.
*
* This value is a byte offset into the `.strtab` section. If this
* value is zero, then the symbol has no name.
*/
Elf64_Word st_name;
/* ELF64_ST_TYPE(st_info) → STT_{NOTYPE,OBJECT,FUNC,SECTION,FILE,COMMON,...}
* ELF64_ST_BIND(st_info) STB_{LOCAL,GLOBAL,WEAK,...} */
/*
* Symbol type and binding.
*
* This value may be created using:
*
* sym.st_info = ELF64_ST_INFO(bind, type);
*
* This value may be read using:
*
* int bind = ELF64_ST_BIND(sym.st_info);
* int type = ELF64_ST_TYPE(sym.st_info);
*
* Where `bind` is typically:
*
* - `STB_LOCAL`
* - `STB_GLOBAL`
* - `STB_WEAK`
*
* Where `type` is typically:
*
* - `STT_NOTYPE`
* - `STT_OBJECT`
* - `STT_FUNC`
* - `STT_SECTION`
* - `STT_FILE`
* - `STT_COMMON`
*/
uint8_t st_info;
/* STV_{DEFAULT,INTERNAL,HIDDEN,PROTECTED} */
/*
* Symbol visibility.
*
* This value should be accessed using:
*
* int visibility = ELF64_ST_VISIBILITY(sym.st_other);
*
* Where `visibility` is typically:
*
* - `STV_DEFAULT`
* - `STV_INTERNAL`
* - `STV_HIDDEN`
* - `STV_PROTECTED`
*/
uint8_t st_other;
/* SHN_UNDEF, <section index>, SHN_ABS, SHN_COMMON, etc. */
/*
* Symbol section.
*
* If `st_shndx` is within `(SHN_UNDEF,SHN_LORESERVE)` then it holds
* an index into the section header table.
*
* Otherwise `st_shndx` is usually one of the following magic numbers:
*
* - `SHN_UNDEF` means symbol is undefined
* - `SHN_ABS` means symbol is a linker integer
* - `SHN_COMMON` means symbol is defined traditionally
*/
Elf64_Section st_shndx;
/* byte offset into GetElfSectionAddress(st_shndx) */
/*
* Symbol value.
*
* If `e_type` is `ET_REL` and `st_shndx` is `SHN_COMMON`, then
* `st_value` holds the required symbol alignment, or 1 if no
* alignment is required.
*
* If `e_type` is `ET_REL` and `st_shndx` is a section index, then
* `st_value` holds a byte offset into the section memory.
*
* If `e_type` isn't `ET_REL` then `st_value` holds a virtual address.
*/
Elf64_Addr st_value;
/* byte length optionally set by .size directive */
Elf64_Xword st_size;
} Elf64_Sym;
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */