diff --git a/build/bootstrap/ar.com b/build/bootstrap/ar.com index 4acd9ca56..d57a4284f 100755 Binary files a/build/bootstrap/ar.com and b/build/bootstrap/ar.com differ diff --git a/build/bootstrap/package.com b/build/bootstrap/package.com index cfda9f413..c404b3b2e 100755 Binary files a/build/bootstrap/package.com and b/build/bootstrap/package.com differ diff --git a/libc/ar.h b/libc/ar.h index 01b62e4c5..ffdbf4335 100644 --- a/libc/ar.h +++ b/libc/ar.h @@ -10,7 +10,8 @@ COSMOPOLITAN_C_START_ struct ar_hdr { char ar_name[16]; char ar_date[12]; - char ar_uid[6], ar_gid[6]; + char ar_uid[6]; + char ar_gid[6]; char ar_mode[8]; char ar_size[10]; char ar_fmag[2]; diff --git a/libc/calls/vdsofunc.greg.c b/libc/calls/vdsofunc.greg.c index 21a561b18..861514bab 100644 --- a/libc/calls/vdsofunc.greg.c +++ b/libc/calls/vdsofunc.greg.c @@ -18,6 +18,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/calls/calls.h" #include "libc/dce.h" +#include "libc/elf/def.h" #include "libc/elf/scalar.h" #include "libc/elf/struct/ehdr.h" #include "libc/elf/struct/phdr.h" diff --git a/libc/elf/elf.h b/libc/elf/elf.h index 985bcf445..ee901be34 100644 --- a/libc/elf/elf.h +++ b/libc/elf/elf.h @@ -11,16 +11,17 @@ COSMOPOLITAN_C_START_ │ cosmopolitan § executable linkable format ─╬─│┼ ╚────────────────────────────────────────────────────────────────────────────│*/ -char *GetElfStringTable(const Elf64_Ehdr *, size_t, const char *); -Elf64_Sym *GetElfSymbolTable(const Elf64_Ehdr *, size_t, int, Elf64_Xword *); -bool IsElf64Binary(const Elf64_Ehdr *, size_t); bool IsElfSymbolContent(const Elf64_Sym *); +bool IsElf64Binary(const Elf64_Ehdr *, size_t); +char *GetElfStringTable(const Elf64_Ehdr *, size_t, const char *); +Elf64_Sym *GetElfSymbols(const Elf64_Ehdr *, size_t, int, Elf64_Xword *); +Elf64_Shdr *GetElfSymbolTable(const Elf64_Ehdr *, size_t, int, Elf64_Xword *); Elf64_Phdr *GetElfProgramHeaderAddress(const Elf64_Ehdr *, size_t, Elf64_Half); Elf64_Shdr *GetElfSectionHeaderAddress(const Elf64_Ehdr *, size_t, Elf64_Half); +const char *GetElfString(const Elf64_Ehdr *, size_t, const char *, Elf64_Word); void *GetElfSectionAddress(const Elf64_Ehdr *, size_t, const Elf64_Shdr *); -char *GetElfSectionNameStringTable(const Elf64_Ehdr *, size_t); -char *GetElfString(const Elf64_Ehdr *, size_t, const char *, Elf64_Word); const char *GetElfSectionName(const Elf64_Ehdr *, size_t, Elf64_Shdr *); +char *GetElfSectionNameStringTable(const Elf64_Ehdr *, size_t); #endif /* COSMO */ COSMOPOLITAN_C_END_ diff --git a/libc/elf/getelfprogramheaderaddress.c b/libc/elf/getelfprogramheaderaddress.c index b31a2ecf5..84999587b 100644 --- a/libc/elf/getelfprogramheaderaddress.c +++ b/libc/elf/getelfprogramheaderaddress.c @@ -31,7 +31,7 @@ Elf64_Phdr *GetElfProgramHeaderAddress(const Elf64_Ehdr *elf, // size_t mapsize, // Elf64_Half i) { // - uint64_t off; + Elf64_Off off; if (i >= elf->e_phnum) return 0; if (elf->e_phoff <= 0) return 0; if (elf->e_phoff >= mapsize) return 0; diff --git a/libc/elf/getelfsectionaddress.c b/libc/elf/getelfsectionaddress.c index 9cce7b293..2db192053 100644 --- a/libc/elf/getelfsectionaddress.c +++ b/libc/elf/getelfsectionaddress.c @@ -16,25 +16,33 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/elf/def.h" #include "libc/elf/elf.h" +#include "libc/elf/scalar.h" #include "libc/stdckdint.h" /** - * Returns pointer to elf section file content. + * Returns pointer to ELF section file content. * - * This function shouldn't be used on the bss section. + * This function computes `elf + sh_offset` with safety checks. * - * @param elf points to the start of the executable image - * @param mapsize is the number of bytes past `elf` we can access - * @param shdr is from GetElfSectionHeaderAddress() and null-propagating - * @return pointer to content bytes, or null on error + * @param elf points to the start of the executable image data + * @param mapsize is the number of bytes of `elf` we can access + * @param shdr is from GetElfSectionHeaderAddress(), or null + * @return pointer to section data within image, or null if + * 1. `shdr` was null, or + * 2. `sh_size` was zero, or + * 3, `sh_type` was `SHT_NOBITS`, or + * 4. content wasn't contained within `[elf,elf+mapsize)`, or + * 5. an arithmetic overflow occurred */ void *GetElfSectionAddress(const Elf64_Ehdr *elf, // validated size_t mapsize, // validated const Elf64_Shdr *shdr) { // foreign - uint64_t last; + Elf64_Off last; if (!shdr) return 0; if (shdr->sh_size <= 0) return 0; + if (shdr->sh_type == SHT_NOBITS) return 0; if (ckd_add(&last, shdr->sh_offset, shdr->sh_size)) return 0; if (last > mapsize) return 0; return (char *)elf + shdr->sh_offset; diff --git a/libc/elf/getelfsectionheaderaddress.c b/libc/elf/getelfsectionheaderaddress.c index 2d54fc600..d1f9eb023 100644 --- a/libc/elf/getelfsectionheaderaddress.c +++ b/libc/elf/getelfsectionheaderaddress.c @@ -16,21 +16,29 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/elf/def.h" #include "libc/elf/elf.h" +#include "libc/elf/scalar.h" #include "libc/elf/struct/shdr.h" /** * Returns section header object at `elf.section[i]`. * - * @param elf points to the start of the executable image + * @param elf points to the start of the executable image data * @param mapsize is the number of bytes past `elf` we can access - * @param i is the section header index, starting at zero - * @return section header pointer, or null on error + * @param i is the index of the section header + * @return pointer to section header within image, or null if + * 1. `i` was a magic number, i.e. `i >= SHN_LORESERVE`, or + * 2. `e_shoff` was zero (image has no section headers), or + * 3. `e_shentsize` had fewer than the mandatory 60 bytes, or + * 4. section header wasn't contained by `[elf,elf+mapsize)`, or + * 5. an arithmetic overflow occurred */ Elf64_Shdr *GetElfSectionHeaderAddress(const Elf64_Ehdr *elf, // size_t mapsize, // Elf64_Half i) { // - uint64_t off; + Elf64_Off off; + if (i >= SHN_LORESERVE) return 0; if (i >= elf->e_shnum) return 0; if (elf->e_shoff <= 0) return 0; if (elf->e_shoff >= mapsize) return 0; diff --git a/libc/elf/getelfstring.c b/libc/elf/getelfstring.c index a2e5c6d79..89a5ce327 100644 --- a/libc/elf/getelfstring.c +++ b/libc/elf/getelfstring.c @@ -24,21 +24,32 @@ /** * Returns `strtab + i` from elf string table. * - * @param elf points to the start of the executable image + * @param elf points to the start of the executable image data * @param mapsize is the number of bytes past `elf` we can access * @param strtab is double-nul string list from GetElfStringTable() - * @param i is byte index into strtab where needed string starts - * @return pointer to nul terminated string, or null on error + * which may be null, in which case only the `!i` name is valid + * @param i is byte index into strtab where needed string starts or + * zero (no name) in which case empty string is always returned + * as a pointer to the read-only string literal, rather than in + * the elf image, since the elf spec permits an empty or absent + * string table section + * @return a const nul-terminated string pointer, otherwise null if + * 1. `i` was nonzero and `strtab` was null, or + * 2. `strtab+i` wasn't inside `[elf,elf+mapsize)`, or + * 3. a nul byte wasn't present within `[strtab+i,elf+mapsize)`, or + * 4. an arithmetic overflow occurred */ -char *GetElfString(const Elf64_Ehdr *elf, // validated - size_t mapsize, // validated - const char *strtab, // validated - Elf64_Word i) { // foreign +const char *GetElfString(const Elf64_Ehdr *elf, // validated + size_t mapsize, // validated + const char *strtab, // validated + Elf64_Word i) { // foreign const char *e; + if (!i) return ""; e = (const char *)elf; + if (!strtab) return 0; if (strtab < e) return 0; if (strtab >= e + mapsize) return 0; if (strtab + i >= e + mapsize) return 0; if (!memchr(strtab + i, 0, (e + mapsize) - (strtab + i))) return 0; - return (char *)strtab + i; + return (const char *)strtab + i; } diff --git a/libc/elf/getelfstringtable.c b/libc/elf/getelfstringtable.c index cc6d38292..623434ec8 100644 --- a/libc/elf/getelfstringtable.c +++ b/libc/elf/getelfstringtable.c @@ -25,10 +25,16 @@ /** * Returns pointer to elf string table. * - * @param elf points to the start of the executable image + * @param elf points to the start of the executable image data * @param mapsize is the number of bytes past `elf` we can access * @param section_name is usually `".strtab"`, `".dynstr"`, or null - * @return pointer to double-nul terminated string list or null on error + * @return pointer to string table within `elf` image, which should + * normally be a sequence of NUL-terminated strings whose first + * string is the empty string; otherwise NULL is returned, when + * either: (1) `section_name` is not found, (2) it did not have + * the `SHT_STRTAB` section type, (3) the section size was zero + * noting that the ELF spec does consider that legal, or lastly + * (4) an overflow or boundary violation occurred */ char *GetElfStringTable(const Elf64_Ehdr *elf, // size_t mapsize, // diff --git a/libc/elf/getelfsymbols.c b/libc/elf/getelfsymbols.c new file mode 100644 index 000000000..c78d8c0a5 --- /dev/null +++ b/libc/elf/getelfsymbols.c @@ -0,0 +1,58 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/elf/elf.h" +#include "libc/elf/scalar.h" +#include "libc/elf/struct/ehdr.h" +#include "libc/elf/struct/sym.h" + +/** + * Returns pointer to array of elf symbols. + * + * This is a shortcut composing GetElfSymbolTable() and + * GetElfSectionAddress(), that can be used as follows: + * + * Elf64_Xword i, n; + * Elf64_Sym *st = GetElfSymbols(map, size, SHT_SYMTAB, &n); + * for (i = 0; st && i < n; ++i) { + * // st[i] holds a symbol + * } + * + * The above code will iterate over the relocatable and/or + * statically-linked symbols defined by an ELF image. + * + * @param elf points to the start of the executable image data + * @param mapsize is the number of bytes past `elf` we can access + * @param section_type is usually `SHT_SYMTAB` or `SHT_DYNSYM` + * @param out_count optionally receives number of symbols + * @return pointer to array of elf symbol array, otherwise null + */ +Elf64_Sym *GetElfSymbols(const Elf64_Ehdr *elf, // + size_t mapsize, // + int section_type, // + Elf64_Xword *out_count) { + Elf64_Sym *syms; + Elf64_Xword count; + if ((syms = GetElfSectionAddress( + elf, mapsize, + GetElfSymbolTable(elf, mapsize, section_type, &count))) && + out_count) { + *out_count = count; + } + return syms; +} diff --git a/libc/elf/getelfsymboltable.c b/libc/elf/getelfsymboltable.c index faa74cf1f..b4b92f3d6 100644 --- a/libc/elf/getelfsymboltable.c +++ b/libc/elf/getelfsymboltable.c @@ -1,7 +1,7 @@ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ │vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ ╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ Copyright 2023 Justine Alexandra Roberts Tunney │ │ │ │ Permission to use, copy, modify, and/or distribute this software for │ │ any purpose with or without fee is hereby granted, provided that the │ @@ -20,29 +20,59 @@ #include "libc/elf/elf.h" #include "libc/elf/scalar.h" #include "libc/elf/struct/ehdr.h" +#include "libc/elf/struct/shdr.h" #include "libc/elf/struct/sym.h" /** - * Returns pointer to elf symbol table. + * Returns pointer to the elf section header for a symbol table. * - * @param elf points to the start of the executable image + * The easiest way to get the symbol table is: + * + * Elf64_Xword i, n; + * Elf64_Sym *st = GetElfSymbols(map, size, SHT_SYMTAB, &n); + * for (i = 0; st && i < n; ++i) { + * // st[i] holds a symbol + * } + * + * This API is more verbose than the GetElfSymbols() shortcut, however + * calling this the long way makes tricks like the following possible: + * + * Elf64_Xword i, n; + * Elf64_Shdr *sh = GetElfSymbolTable(map, size, SHT_SYMTAB, &n); + * Elf64_Sym *st = GetElfSectionAddress(map, size, sh); + * if (st) { + * for (i = sh->sh_info; i < n; ++i) { + * // st[i] holds a non-local symbol + * } + * } + * + * Our code here only cares about `STB_GLOBAL` and `STB_WEAK` symbols + * however `SHT_SYMTAB` usually has countless `STB_LOCAL` entries too + * that must be skipped over. The trick is that the ELF spec requires + * local symbols be ordered before global symbols, and that the index + * dividing the two be stored to `sh_info`. So, if we start iterating + * there, then we've cleverly avoided possibly dozens of page faults! + * + * @param elf points to the start of the executable image data * @param mapsize is the number of bytes past `elf` we can access * @param section_type is usually `SHT_SYMTAB` or `SHT_DYNSYM` - * @param out_count optionally receives number of elements in res - * @return pointer to symbol array, or null on error + * @param out_count optionally receives number of symbols + * @return pointer to symbol table section header, otherwise null */ -Elf64_Sym *GetElfSymbolTable(const Elf64_Ehdr *elf, // - size_t mapsize, // - int section_type, // - Elf64_Xword *out_count) { +Elf64_Shdr *GetElfSymbolTable(const Elf64_Ehdr *elf, // + size_t mapsize, // + int section_type, // + Elf64_Xword *out_count) { int i; Elf64_Shdr *shdr; for (i = elf->e_shnum; i > 0; --i) { if ((shdr = GetElfSectionHeaderAddress(elf, mapsize, i - 1)) && // shdr->sh_entsize == sizeof(Elf64_Sym) && // shdr->sh_type == section_type) { - if (out_count) *out_count = shdr->sh_size / sizeof(Elf64_Sym); - return GetElfSectionAddress(elf, mapsize, shdr); + if (out_count) { + *out_count = shdr->sh_size / sizeof(Elf64_Sym); + } + return shdr; } } return 0; diff --git a/libc/elf/iselfsymbolcontent.c b/libc/elf/iselfsymbolcontent.c index e09aaa8f8..584b3fa7c 100644 --- a/libc/elf/iselfsymbolcontent.c +++ b/libc/elf/iselfsymbolcontent.c @@ -16,6 +16,7 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/elf/def.h" #include "libc/elf/elf.h" bool IsElfSymbolContent(const Elf64_Sym *sym) { diff --git a/libc/elf/struct/ehdr.h b/libc/elf/struct/ehdr.h index bebf53030..73f023348 100644 --- a/libc/elf/struct/ehdr.h +++ b/libc/elf/struct/ehdr.h @@ -1,27 +1,162 @@ #ifndef COSMOPOLITAN_LIBC_ELF_STRUCT_EHDR_H_ #define COSMOPOLITAN_LIBC_ELF_STRUCT_EHDR_H_ -#include "libc/elf/def.h" #include "libc/elf/scalar.h" - -#define EI_NIDENT 16 - #if !(__ASSEMBLER__ + __LINKER__ + 0) +/* + * ELF header. + */ typedef struct Elf64_Ehdr { - unsigned char e_ident[EI_NIDENT]; + + /* + * Leading bytes of ELF header. + * + * - `e_ident[0]` is always `127` + * - `e_ident[1]` is always `'E'` + * - `e_ident[2]` is always `'L'` + * - `e_ident[3]` is always `'F'` + * + * - `e_ident[EI_CLASS]` is mandatory and should be: + * + * - `ELFCLASSNONE64` if it's an Elf64 image + * - `ELFCLASSNONE32` if it's an Elf32 image + * - Otherwise we assume it's an Elf64 image + * + * - `e_ident[EI_DATA]` is advisory and could be: + * + * - `ELFDATANONE` isn't strictly valid + * - `ELFDATA2LSB` for little-endian + * - `ELFDATA2MSB` for big-endian + * + * - `e_ident[EI_VERSION]` is advisory and should be: + * + * - `EV_NONE` if it's zero or unspecified + * - `EV_CURRENT` for current ELF version (which is 1) + * + * - `e_ident[EI_OSABI]` is mandatory and could be: + * + * - `ELFOSABI_NONE` is zero + * - `ELFOSABI_GNU` is for GNU + * - `ELFOSABI_SYSV` used by GNU + * - `ELFOSABI_LINUX` doesn't care + * - `ELFOSABI_FREEBSD` does care (recommended) + * - `ELFOSABI_NETBSD` doesn't care (see `PT_NOTE`) + * - `ELFOSABI_OPENBSD` doesn't care (see `PT_NOTE`) + * + * - `e_ident[EI_ABIVERSION]` is advisory + * + */ + unsigned char e_ident[16]; + + /* + * ELF image type. + * + * This field is mandatory and should be one of: + * + * - `ET_REL` for `.o` object files + * - `ET_DYN` for `.so` files and `-pie` executables + * - `ET_EXEC` for statically-linked executables + * + */ Elf64_Half e_type; + + /* + * ELF machine type. + * + * This field is mandatory and could be one of: + * + * - `EM_M32` for Bellmac + * - `EM_X86_64` for Amd64 + * - `EM_AARCH64` for Arm64 + * - `EM_PPC64` for Raptors + * - `EM_RISCV` for Berkeley + * - `EM_S390` for System/360 + * + */ Elf64_Half e_machine; + + /* + * ELF version. + * + * This field is advisory and could be: + * + * - `EV_NONE` if it's zero or unspecified + * - `EV_CURRENT` for current ELF version (which is 1) + * + * @see `e_ident[EI_VERSION]` + */ Elf64_Word e_version; + + /* + * ELF executable entrypoint. + * + * Static executables should use this field to store the virtual + * address of the _start() function. This field may be zero, for + * unspecified. + */ Elf64_Addr e_entry; + + /* + * `Elf64_Phdr` file offset. + * + * This field is mandatory. Object files should set it to zero. + */ Elf64_Off e_phoff; + + /* + * `Elf64_Shdr` file offset. + * + * This field is advisory. + */ Elf64_Off e_shoff; + + /* + * ELF flags. + * + * This field is advisory. + */ Elf64_Word e_flags; + + /* + * `Elf64_Ehdr` size. + * + * This field is advisory and should be 64. + */ Elf64_Half e_ehsize; + + /* + * `Elf64_Phdr` element size. + * + * This field *is* cared about and should be set to 56. Cosmopolitan + * permits larger values for the pleasure of it. + */ Elf64_Half e_phentsize; + + /* + * `Elf64_Phdr` array count. + */ Elf64_Half e_phnum; + + /* + * `Elf64_Shdr` element size. + * + * This field is advisory and should be set to 64. Cosmopolitan + * permits larger values for the pleasure of it. + */ Elf64_Half e_shentsize; + + /* + * `Elf64_Shdr` count. + * + * This field is advisory. + */ Elf64_Half e_shnum; + + /* + * Section header index of section name string table. + */ Elf64_Half e_shstrndx; + } Elf64_Ehdr; #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ diff --git a/libc/elf/struct/rel.h b/libc/elf/struct/rel.h index 32b97d9cc..d9c3e2f4b 100644 --- a/libc/elf/struct/rel.h +++ b/libc/elf/struct/rel.h @@ -3,9 +3,51 @@ #include "libc/elf/scalar.h" #if !(__ASSEMBLER__ + __LINKER__ + 0) +/* + * ELF relocation. + * + * Relocations let us easily apply fixups to compiled object code. This + * data structure represents the contents of an `sh_type` w/ `SHT_REL`. + * + * @see Elf64_Rela + */ typedef struct Elf64_Rel { + + /* + * Location to be modified. + * + * If `e_type` is `ET_REL` then this is a section data byte offset. + * + * If `e_type` isn't `ET_REL` then this is a virtual address. + */ Elf64_Addr r_offset; - Elf64_Xword r_info; /** @see ELF64_R_{SYM,SIZE,INFO} */ + + /* + * Relocation type and symbol. + * + * This value may be created using: + * + * r_info = ELF64_R_INFO(sym, type); + * + * This value may be read using: + * + * Elf64_Word sym = ELF64_R_SYM(r_info); + * Elf64_Word type = ELF64_R_TYPE(r_info); + * + * Where `sym` is a symbol index, and `type` might be: + * + * - `R_X86_64_64` + * - `R_X86_64_PC32` + * - `R_X86_64_GOTPCRELX` + * - `R_AARCH64_ABS64` + * + * Each relocation type specifies a mathematical formula that's used + * to compute the appropriate value for the fixed-up object code. If + * if needs an addend, then this struct doesn't have one, but it can + * still be embedded by the compiler in the location to be modified. + */ + Elf64_Xword r_info; + } Elf64_Rel; #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ diff --git a/libc/elf/struct/rela.h b/libc/elf/struct/rela.h index 5fcbc8ea9..2d568c4da 100644 --- a/libc/elf/struct/rela.h +++ b/libc/elf/struct/rela.h @@ -3,15 +3,55 @@ #include "libc/elf/scalar.h" #if !(__ASSEMBLER__ + __LINKER__ + 0) +/* + * ELF relocation w/ explicit addend. + * + * Relocations let us easily apply fixups to compiled object code. This + * data structure represents the contents of an `sh_type` w/ `SHT_RELA` + * + * @see Elf64_Rel + */ typedef struct Elf64_Rela { - /*u64*/ Elf64_Addr r_offset; + /* - * ELF64_R_SYM(r_info) → sym - * ELF64_R_TYPE(r_info) → R_X86_64_{64,PC32,GOTPCRELX,...} - * ELF64_R_INFO(sym, type) → r_info + * Location to be modified. + * + * If `e_type` is `ET_REL` then this is a section data byte offset. + * + * If `e_type` isn't `ET_REL` then this is a virtual address. */ - /*u64*/ Elf64_Xword r_info; /* ELF64_R_{SYM,SIZE,INFO} */ - /*i64*/ Elf64_Sxword r_addend; + Elf64_Addr r_offset; + + /* + * Relocation type and symbol. + * + * This value may be created using: + * + * r_info = ELF64_R_INFO(sym, type); + * + * This value may be read using: + * + * Elf64_Word sym = ELF64_R_SYM(r_info); + * Elf64_Word type = ELF64_R_TYPE(r_info); + * + * Where `sym` is a symbol index, and `type` might be: + * + * - `R_X86_64_64` + * - `R_X86_64_PC32` + * - `R_X86_64_GOTPCRELX` + * - `R_AARCH64_ABS64` + * + */ + Elf64_Xword r_info; + + /* + * Relocation parameter. + * + * Each relocation type has its own mathematical formula, which should + * incorporate this value in its own unique way. + */ + Elf64_Sxword r_addend; + } Elf64_Rela; #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ diff --git a/libc/elf/struct/shdr.h b/libc/elf/struct/shdr.h index 42c02a778..2edff9f35 100644 --- a/libc/elf/struct/shdr.h +++ b/libc/elf/struct/shdr.h @@ -8,24 +8,44 @@ * @see https://docs.oracle.com/cd/E19683-01/816-1386/chapter6-94076/index.html */ typedef struct Elf64_Shdr { + Elf64_Word sh_name; - Elf64_Word sh_type; /* SHT_{PROGBITS,NOBITS,STRTAB,SYMTAB,RELA,...} */ + + Elf64_Word sh_type; /* SHT_{PROGBITS,NOBITS,STRTAB,SYMTAB,RELA,...} */ + Elf64_Xword sh_flags; /* SHF_{WRITE,ALLOC,EXECINSTR,MERGE,STRINGS,...} */ + Elf64_Addr sh_addr; + Elf64_Off sh_offset; + Elf64_Xword sh_size; + /* - * If SHT_RELA: Index of section of associated symbol table. - * If SHT_SYMTAB: Index of section of associated string table. + * Index of linked section header. + * + * If `sh_type` is `SHT_RELA` then `sh_link` holds the section header + * index of the associated symbol table. + * + * If `sh_type` is `SHT_SYMTAB` then `sh_link` holds the section + * header index of the associated string table. */ Elf64_Word sh_link; + /* - * If SHT_RELA: Index of section to which relocations apply. - * If SHT_SYMTAB: One greater than symbol table index of last local symbol. + * If `sh_type` is `SHT_RELA` then `sh_info` contains the index of the + * section to which relocations apply. + * + * If `sh_type` is `SHT_SYMTAB` or `SHT_DYNSYM` then `sh_info` + * contains an index that's one greater than symbol table index of + * last `STB_LOCAL` symbol. */ Elf64_Word sh_info; + Elf64_Xword sh_addralign; + Elf64_Xword sh_entsize; + } Elf64_Shdr; #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ diff --git a/libc/elf/struct/sym.h b/libc/elf/struct/sym.h index 94eed20a0..8137759e4 100644 --- a/libc/elf/struct/sym.h +++ b/libc/elf/struct/sym.h @@ -4,18 +4,91 @@ #if !(__ASSEMBLER__ + __LINKER__ + 0) typedef struct Elf64_Sym { + + /* + * Symbol name. + * + * This value is a byte offset into the `.strtab` section. If this + * value is zero, then the symbol has no name. + */ Elf64_Word st_name; - /* ELF64_ST_TYPE(st_info) → STT_{NOTYPE,OBJECT,FUNC,SECTION,FILE,COMMON,...} - * ELF64_ST_BIND(st_info) → STB_{LOCAL,GLOBAL,WEAK,...} */ + + /* + * Symbol type and binding. + * + * This value may be created using: + * + * sym.st_info = ELF64_ST_INFO(bind, type); + * + * This value may be read using: + * + * int bind = ELF64_ST_BIND(sym.st_info); + * int type = ELF64_ST_TYPE(sym.st_info); + * + * Where `bind` is typically: + * + * - `STB_LOCAL` + * - `STB_GLOBAL` + * - `STB_WEAK` + * + * Where `type` is typically: + * + * - `STT_NOTYPE` + * - `STT_OBJECT` + * - `STT_FUNC` + * - `STT_SECTION` + * - `STT_FILE` + * - `STT_COMMON` + */ uint8_t st_info; - /* STV_{DEFAULT,INTERNAL,HIDDEN,PROTECTED} */ + + /* + * Symbol visibility. + * + * This value should be accessed using: + * + * int visibility = ELF64_ST_VISIBILITY(sym.st_other); + * + * Where `visibility` is typically: + * + * - `STV_DEFAULT` + * - `STV_INTERNAL` + * - `STV_HIDDEN` + * - `STV_PROTECTED` + */ uint8_t st_other; - /* SHN_UNDEF,
, SHN_ABS, SHN_COMMON, etc. */ + + /* + * Symbol section. + * + * If `st_shndx` is within `(SHN_UNDEF,SHN_LORESERVE)` then it holds + * an index into the section header table. + * + * Otherwise `st_shndx` is usually one of the following magic numbers: + * + * - `SHN_UNDEF` means symbol is undefined + * - `SHN_ABS` means symbol is a linker integer + * - `SHN_COMMON` means symbol is defined traditionally + */ Elf64_Section st_shndx; - /* byte offset into GetElfSectionAddress(st_shndx) */ + + /* + * Symbol value. + * + * If `e_type` is `ET_REL` and `st_shndx` is `SHN_COMMON`, then + * `st_value` holds the required symbol alignment, or ≤ 1 if no + * alignment is required. + * + * If `e_type` is `ET_REL` and `st_shndx` is a section index, then + * `st_value` holds a byte offset into the section memory. + * + * If `e_type` isn't `ET_REL` then `st_value` holds a virtual address. + */ Elf64_Addr st_value; + /* byte length optionally set by .size directive */ Elf64_Xword st_size; + } Elf64_Sym; #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ diff --git a/libc/elf/tinyelf.internal.h b/libc/elf/tinyelf.internal.h index 0c332768a..bbe4b1084 100644 --- a/libc/elf/tinyelf.internal.h +++ b/libc/elf/tinyelf.internal.h @@ -1,5 +1,6 @@ #ifndef COSMOPOLITAN_LIBC_ELF_TINYELF_INTERNAL_H_ #define COSMOPOLITAN_LIBC_ELF_TINYELF_INTERNAL_H_ +#include "libc/elf/def.h" #include "libc/elf/struct/ehdr.h" #include "libc/elf/struct/phdr.h" #include "libc/elf/struct/shdr.h" diff --git a/libc/stdio/fflushimpl.c b/libc/stdio/fflushimpl.c index 8fea0bf58..be578bc4e 100644 --- a/libc/stdio/fflushimpl.c +++ b/libc/stdio/fflushimpl.c @@ -18,6 +18,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/calls/calls.h" #include "libc/errno.h" +#include "libc/intrin/weaken.h" #include "libc/mem/mem.h" #include "libc/runtime/runtime.h" #include "libc/stdio/internal.h" @@ -27,8 +28,12 @@ int __fflush_impl(FILE *f) { size_t i; ssize_t rc; - free(f->getln); - f->getln = 0; + if (f->getln) { + if (_weaken(free)) { + _weaken(free)(f->getln); + } + f->getln = 0; + } if (f->beg && !f->end && (f->iomode & O_ACCMODE) != O_RDONLY) { for (i = 0; i < f->beg; i += rc) { if ((rc = write(f->fd, f->buf + i, f->beg - i)) == -1) { diff --git a/tool/build/ar.c b/tool/build/ar.c index d74ebfb83..079ba3cec 100644 --- a/tool/build/ar.c +++ b/tool/build/ar.c @@ -1,7 +1,7 @@ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ │vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ ╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ Copyright 2023 Justine Alexandra Roberts Tunney │ │ │ │ Permission to use, copy, modify, and/or distribute this software for │ │ any purpose with or without fee is hereby granted, provided that the │ @@ -16,345 +16,461 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/ar.h" #include "libc/assert.h" #include "libc/calls/calls.h" #include "libc/calls/struct/iovec.h" #include "libc/calls/struct/stat.h" #include "libc/elf/def.h" #include "libc/elf/elf.h" +#include "libc/elf/scalar.h" +#include "libc/elf/struct/sym.h" #include "libc/errno.h" -#include "libc/fmt/conv.h" #include "libc/fmt/itoa.h" #include "libc/fmt/libgen.h" +#include "libc/fmt/magnumstrs.internal.h" #include "libc/intrin/bits.h" -#include "libc/intrin/safemacros.internal.h" -#include "libc/log/check.h" -#include "libc/log/log.h" +#include "libc/intrin/bsr.h" +#include "libc/intrin/kprintf.h" +#include "libc/limits.h" #include "libc/macros.internal.h" -#include "libc/mem/arraylist2.internal.h" -#include "libc/mem/mem.h" #include "libc/runtime/runtime.h" -#include "libc/sock/sock.h" -#include "libc/stdio/stdio.h" +#include "libc/stdckdint.h" #include "libc/str/str.h" -#include "libc/sysv/consts/ex.h" -#include "libc/sysv/consts/madv.h" #include "libc/sysv/consts/map.h" #include "libc/sysv/consts/o.h" #include "libc/sysv/consts/prot.h" #include "libc/sysv/consts/s.h" -#include "libc/x/x.h" #include "tool/build/lib/getargs.h" /** - * @fileoverview System Five Static Archive Builder. + * @fileoverview cosmopolitan ar * - * GNU ar has a bug which causes it to take hundreds of milliseconds to - * build archives like ntdll.a and several minutes for cosmopolitan.a. - * This goes quadratically faster taking 1ms to do ntdll w/ hot cache. + * This static archiver is superior: * - * Compared to LLVM ar this tool goes 10x faster because it uses madvise - * and copy_file_range which give us the optimal page cached file system - * beahvior that a build environment needs. + * - Isn't "accidentally quadratic" like GNU ar + * - Goes 2x faster than LLVM ar while using 100x less memory + * - Can be built as a 52kb APE binary that works well on six OSes * - * This tool also adds a feature: it ignores directory parameters. This - * is important because good Makefiles on Linux will generally have the - * directory be a .a prerequisite so archives rebuild on file deletion. + * This static archiver introduces handy features: + * + * - Arguments may be supplied in an `@args.txt` file + * - Directory arguments are ignored * * @see https://www.unix.com/man-page/opensolaris/3head/ar.h/ + * @see https://en.wikipedia.org/wiki/Ar_(Unix) */ -struct Args { - size_t i, n; - char **p; -}; +#define VERSION \ + "cosmopolitan ar v2.0\n" \ + "copyright 2023 justine tunney\n" \ + "https://github.com/jart/cosmopolitan\n" -struct String { - size_t i, n; - char *p; -}; +#define HEAP_SIZE (256L * 1024 * 1024) struct Ints { - size_t i, n; int *p; + size_t i; }; -struct Header { - char name[16]; - char date[12]; - char uid[6]; - char gid[6]; - char mode[8]; - char size[10]; - char fmag[2]; +struct Args { + char **p; + size_t i; }; -static void *Realloc(void *p, size_t n) { - void *q; - if (!(q = realloc(p, n))) { - fputs("error: ar: out of memory\n", stderr); - exit(1); +struct Bytes { + char *p; + size_t i; +}; + +static void SortChars(char *A, long n) { + long i, j, t; + for (i = 1; i < n; i++) { + t = A[i]; + j = i - 1; + while (j >= 0 && A[j] > t) { + A[j + 1] = A[j]; + j = j - 1; + } + A[j + 1] = t; } - return q; } -static void *Malloc(size_t n) { - return Realloc(0, n); +static wontreturn void Die(const char *path, const char *reason) { + tinyprint(2, path, ": ", reason, "\n", NULL); + exit(1); } -static void NewArgs(struct Args *l, size_t n) { - l->i = 0; - l->n = MAX(2, n); - l->p = Malloc(l->n * sizeof(*l->p)); - l->p[0] = 0; +static wontreturn void SysDie(const char *path, const char *func) { + const char *errstr; + if (!(errstr = _strerdoc(errno))) errstr = "Unknown error"; + tinyprint(2, path, ": ", func, ": ", errstr, "\n", NULL); + exit(1); } -static void NewInts(struct Ints *l, size_t n) { - l->i = 0; - l->n = MAX(2, n); - l->p = Malloc(l->n * sizeof(*l->p)); - l->p[0] = 0; +static wontreturn void ShowUsage(int rc, int fd) { + tinyprint(fd, VERSION, + "\n" + "USAGE\n" + "\n", + " ", program_invocation_name, " FLAGS ARCHIVE FILE...\n", + "\n" + "FLAGS\n" + "\n" + " rcs create new archive with index\n" + " rcsD always deterministic\n" + " --help show usage\n" + " --version show program details\n" + "\n" + "ARGUMENTS\n" + "\n" + " ARCHIVE should be foo.a\n" + " FILE should be foo.o or @args.txt\n" + "\n", + NULL); + exit(rc); } -static void NewString(struct String *s, size_t n) { - s->i = 0; - s->n = MAX(2, n); - s->p = Malloc(s->n * sizeof(*s->p)); - s->p[0] = 0; +// allocates 𝑛 bytes of memory aligned on 𝑎 from .bss +// - avoids binary bloat of mmap() and malloc() +// - dies if out of memory or overflow occurs +// - new memory is always zero-initialized +// - can't be resized; use reballoc api +// - can't be freed or reclaimed +static void *balloc(size_t n, size_t a) { + size_t c; + int resizable; + uintptr_t h, p; + static size_t used; + static char heap[HEAP_SIZE]; + assert(a >= 1 && !(a & (a - 1))); + h = (uintptr_t)heap; + p = h + used; + if ((resizable = (ssize_t)n < 0)) { + n = ~n; + p += sizeof(c); + } + p += a - 1; + p &= -a; + if (n <= a) { + c = a; + } else if (!resizable) { + c = n; + } else { + c = 2ull << (__builtin_clzll(n - 1) ^ (sizeof(long long) * CHAR_BIT - 1)); + } + if (c < a || c > HEAP_SIZE || p + c > h + HEAP_SIZE) { + Die(program_invocation_name, "out of memory"); + } + used = p - h + c; + if (resizable) { + memcpy((char *)p - sizeof(c), &c, sizeof(c)); + } + return (void *)p; +} + +// reallocates 𝑛 𝑧-sized elements aligned on 𝑧 from .bss +// - avoids binary bloat of mmap() and realloc() +// - dies if out of memory or overflow occurs +// - new memory is always zero-initialized +// - abstracts multiply overflow check +// - shrinking always happens in-place +// - growing cost is always amortized +// - can't be freed or reclaimed +static void *reballoc(void *p, size_t n, size_t z) { + size_t c; + assert(n >= 0); + assert(z >= 1 && !(z & (z - 1))); + if (ckd_mul(&n, n, z)) n = HEAP_SIZE; + if (!p) return balloc(~n, z); + memcpy(&c, (char *)p - sizeof(c), sizeof(c)); + assert(c >= z && c < HEAP_SIZE && !(c & (c - 1))); + if (n <= c) return p; + return memcpy(balloc(~n, z), p, c); +} + +static char *StrDup(const char *s) { + size_t n = strlen(s) + 1; + return memcpy(balloc(n, 1), s, n); +} + +static char *StrCat(const char *a, const char *b) { + char *p; + size_t n, m; + n = strlen(a); + m = strlen(b); + p = balloc(n + m + 1, 1); + memcpy(p, a, n); + memcpy(p + n, b, m + 1); + return p; } static void AppendInt(struct Ints *l, int i) { - assert(l->n > 1); - if (l->i + 1 >= l->n) { - do { - l->n += l->n >> 1; - } while (l->i + 1 >= l->n); - l->p = Realloc(l->p, l->n * sizeof(*l->p)); - } + l->p = reballoc(l->p, l->i + 2, sizeof(*l->p)); l->p[l->i++] = i; - l->p[l->i] = 0; } static void AppendArg(struct Args *l, char *s) { - assert(l->n > 1); - if (l->i + 1 >= l->n) { - do { - l->n += l->n >> 1; - } while (l->i + 1 >= l->n); - l->p = Realloc(l->p, l->n * sizeof(*l->p)); - } + l->p = reballoc(l->p, l->i + 2, sizeof(*l->p)); l->p[l->i++] = s; - l->p[l->i] = 0; } -static void MakeHeader(struct Header *h, const char *name, int ref, int mode, - int size) { +static void AppendBytes(struct Bytes *l, const char *s, size_t n) { + l->p = reballoc(l->p, l->i + n + 1, sizeof(*l->p)); + memcpy(l->p + l->i, s, n); + l->i += n; +} + +static int IsEqual(const char *a, const char *b) { + return !strcmp(a, b); +} + +static void MakeArHeader(struct ar_hdr *h, // + const char *name, // + int mode, // + size_t size) { // size_t n; - char ibuf[13], *p; + char b[21]; memset(h, ' ', sizeof(*h)); n = strlen(name); - memcpy(h->name, name, n); - if (ref != -1) { - FormatUint32(h->name + n, ref); + if (n > ARRAYLEN(h->ar_name)) { + Die(program_invocation_name, "ar_name overflow"); } - if (strcmp(name, "//") != 0) { - h->date[0] = '0'; - h->uid[0] = '0'; - h->gid[0] = '0'; - p = FormatOctal32(ibuf, mode & 0777, false); - CHECK_LE(p - ibuf, sizeof(h->mode)); - memcpy(h->mode, ibuf, p - ibuf); + memcpy(h->ar_name, name, n); + if (!IsEqual(name, "//")) { + h->ar_date[0] = '0'; + h->ar_uid[0] = '0'; + h->ar_gid[0] = '0'; + memcpy(h->ar_mode, b, FormatOctal32(b, mode & 0777, false) - b); } - h->fmag[0] = '`'; - h->fmag[1] = '\n'; - p = FormatUint32(ibuf, size); - CHECK_LE(p - ibuf, sizeof(h->size)); - memcpy(h->size, ibuf, p - ibuf); + if (size > 9999999999) { + Die(program_invocation_name, "ar_size overflow"); + } + memcpy(h->ar_size, b, FormatUint64(b, size) - b); + memcpy(h->ar_fmag, ARFMAG, sizeof(h->ar_fmag)); +} + +// copies data between file descriptors until end of file +// - assumes signal handlers aren't in play +// - uses copy_file_range() if possible +// - returns number of bytes exchanged +// - dies if operation fails +static int64_t CopyFileOrDie(const char *inpath, int infd, // + const char *outpath, int outfd) { + int64_t toto; + char buf[512]; + size_t exchanged; + ssize_t got, wrote; + enum { CFR, RW } mode; + for (mode = CFR, toto = 0;; toto += exchanged) { + if (mode == CFR) { + got = copy_file_range(infd, 0, outfd, 0, 4194304, 0); + if (!got) break; + if (got != -1) { + exchanged = got; + } else if (errno == EXDEV || // different partitions + errno == ENOSYS || // not linux or freebsd + errno == ENOTSUP || // probably a /zip file + errno == EOPNOTSUPP) { // technically the same + exchanged = 0; + mode = RW; + } else { + SysDie(inpath, "copy_file_range"); + } + } else { + got = read(infd, buf, sizeof(buf)); + if (!got) break; + if (got == -1) SysDie(inpath, "read"); + wrote = write(outfd, buf, got); + if (wrote == -1) SysDie(outpath, "write"); + if (wrote != got) Die(outpath, "posix violated"); + exchanged = wrote; + } + } + return toto; } int main(int argc, char *argv[]) { - FILE *f; - void *elf; - char *line; - char *strs; - ssize_t rc; - int *offsets; - size_t wrote; - size_t remain; - struct stat *st; - uint32_t outpos; - Elf64_Sym *syms; - const char *arg; - struct Args args; - uint64_t outsize; - uint8_t *tablebuf; - struct GetArgs ga; - struct Ints modes; - struct Ints names; - struct Ints sizes; - const char *reason; - struct iovec iov[7]; - const char *symname; - const char *outpath; - Elf64_Xword symcount; - struct Ints symnames; - struct String symbols; - struct String filenames; - struct Header *header1, *header2; - int i, j, fd, err, name, outfd, tablebufsize; + int fd, objectid; + struct ar_hdr header1; + struct ar_hdr header2; - // TODO(jart): Delete this. - if (argc == 2 && !strcmp(argv[1], "-n")) { - exit(0); +#ifndef NDEBUG + ShowCrashReports(); +#endif + + // handle hardcoded flags + if (argc == 2) { + if (IsEqual(argv[1], "-n")) { + exit(0); + } + if (IsEqual(argv[1], "-h") || // + IsEqual(argv[1], "-?") || // + IsEqual(argv[1], "--help")) { + ShowUsage(0, 1); + } + if (IsEqual(argv[1], "--version")) { + tinyprint(1, VERSION, NULL); + exit(0); + } } + // get flags and output path + if (argc < 3) { + ShowUsage(1, 2); + } + const char *flags = argv[1]; + const char *outpath = argv[2]; + // we only support one mode of operation, which is creating a new - // deterministic archive. this tool is so fast that we don't need - // database-like tools when editing static archives - if (!(argc > 2 && strcmp(argv[1], "rcsD") == 0)) { - fputs("usage: ", stderr); - if (argc > 0) fputs(argv[0], stderr); - fputs(" rcsD ARCHIVE FILE...", stderr); - exit(EX_USAGE); + // deterministic archive. computing the full archive goes so fast + // on modern systems that it isn't worth supporting the byzantine + // standard posix ar flags intended to improve cassette tape perf + SortChars(flags, strlen(flags)); + if (!IsEqual(flags, "crs") && // + !IsEqual(flags, "Dcrs")) { + tinyprint(2, program_invocation_name, ": flags should be rcsD\n", NULL); + ShowUsage(1, 2); } - outpath = argv[2]; - NewArgs(&args, 4); - st = Malloc(sizeof(struct stat)); - NewInts(&modes, 128); - NewInts(&names, 128); - NewInts(&sizes, 128); - NewInts(&symnames, 1024); - NewString(&symbols, 4096); - NewString(&filenames, 1024); + struct Args args = {reballoc(0, 4096, sizeof(char *))}; + struct Args names = {reballoc(0, 4096, sizeof(char *))}; + struct Ints modes = {reballoc(0, 4096, sizeof(int))}; + struct Ints longnames = {reballoc(0, 256, sizeof(int))}; + struct Ints sizes = {reballoc(0, 4096, sizeof(int))}; + struct Ints symnames = {reballoc(0, 16384, sizeof(int))}; + struct Bytes symbols = {reballoc(0, 131072, sizeof(char))}; + struct Bytes filenames = {reballoc(0, 16384, sizeof(char))}; + + // perform analysis pass on input files + struct GetArgs ga; getargs_init(&ga, argv + 3); - - // load global symbols and populate page cache - for (i = 0;; ++i) { - TryAgain: + for (objectid = 0;;) { + struct stat st; + const char *arg; if (!(arg = getargs_next(&ga))) break; - if (_endswith(arg, "/")) goto TryAgain; - if (_endswith(arg, ".pkg")) goto TryAgain; - CHECK_NE(-1, stat(arg, st), "%s", arg); - if (!st->st_size || S_ISDIR(st->st_mode)) goto TryAgain; - CHECK_NE(-1, (fd = open(arg, O_RDONLY)), "%s", arg); - CHECK_LT(st->st_size, 0x7ffff000); - AppendArg(&args, xstrdup(arg)); - AppendInt(&names, filenames.i); - AppendInt(&sizes, st->st_size); - AppendInt(&modes, st->st_mode); - CONCAT(&filenames.p, &filenames.i, &filenames.n, basename(arg), - strlen(basename(arg))); - CONCAT(&filenames.p, &filenames.i, &filenames.n, "/\n", 2); - CHECK_NE(MAP_FAILED, - (elf = mmap(0, st->st_size, PROT_READ, MAP_PRIVATE, fd, 0))); - CHECK(IsElf64Binary(elf, st->st_size), "%s", arg); - CHECK_NOTNULL((strs = GetElfStringTable(elf, st->st_size, ".strtab"))); - CHECK_NOTNULL( - (syms = GetElfSymbolTable(elf, st->st_size, SHT_SYMTAB, &symcount))); - for (j = 0; j < symcount; ++j) { - if (syms[j].st_shndx == SHN_UNDEF) continue; - if (syms[j].st_other == STV_INTERNAL) continue; - if (ELF64_ST_BIND(syms[j].st_info) == STB_LOCAL) continue; - symname = GetElfString(elf, st->st_size, strs, syms[j].st_name); - CONCAT(&symbols.p, &symbols.i, &symbols.n, symname, strlen(symname) + 1); - AppendInt(&symnames, i); + if (_endswith(arg, "/")) continue; + if (_endswith(arg, ".pkg")) continue; + if (stat(arg, &st)) SysDie(arg, "stat"); + if (S_ISDIR(st.st_mode)) continue; + if (!st.st_size) Die(arg, "file is empty"); + if (st.st_size > 0x7ffff000) Die(arg, "file too large"); + if ((fd = open(arg, O_RDONLY)) == -1) SysDie(arg, "open"); + AppendArg(&args, StrDup(arg)); + AppendInt(&sizes, st.st_size); + AppendInt(&modes, st.st_mode); + char bnbuf[PATH_MAX + 1]; + strlcpy(bnbuf, arg, sizeof(bnbuf)); + char *aname = StrCat(basename(bnbuf), "/"); + if (strlen(aname) <= sizeof(header1.ar_name)) { + AppendArg(&names, aname); + } else { + char ibuf[21]; + FormatUint64(ibuf, filenames.i); + AppendArg(&names, StrCat("/", ibuf)); + AppendBytes(&filenames, aname, strlen(aname)); + AppendBytes(&filenames, "\n", 1); } - CHECK_NE(-1, munmap(elf, st->st_size)); - close(fd); + size_t mapsize = st.st_size; + void *elf = mmap(0, mapsize, PROT_READ, MAP_PRIVATE, fd, 0); + if (elf == MAP_FAILED) SysDie(arg, "mmap"); + if (!IsElf64Binary(elf, mapsize)) Die(arg, "not an elf64 binary"); + char *strs = GetElfStringTable(elf, mapsize, ".strtab"); + if (!strs) Die(arg, "elf .strtab not found"); + Elf64_Xword symcount; + Elf64_Shdr *symsec = GetElfSymbolTable(elf, mapsize, SHT_SYMTAB, &symcount); + Elf64_Sym *syms = GetElfSectionAddress(elf, mapsize, symsec); + if (!syms) Die(arg, "elf symbol table not found"); + for (Elf64_Xword j = symsec->sh_info; j < symcount; ++j) { + if (!syms[j].st_name) continue; + if (syms[j].st_shndx == SHN_UNDEF) continue; + if (syms[j].st_shndx == SHN_COMMON) continue; + const char *symname = GetElfString(elf, mapsize, strs, syms[j].st_name); + if (!symname) Die(arg, "elf symbol name corrupted"); + AppendBytes(&symbols, symname, strlen(symname) + 1); + AppendInt(&symnames, objectid); + } + if (munmap(elf, mapsize)) SysDie(arg, "munmap"); + if (close(fd)) SysDie(arg, "close"); + ++objectid; } - APPEND(&filenames.p, &filenames.i, &filenames.n, "\n"); + getargs_destroy(&ga); // compute length of output archive - outsize = 0; - tablebufsize = 4 + symnames.i * 4; - tablebuf = Malloc(tablebufsize); - offsets = Malloc(args.i * 4); - header1 = Malloc(sizeof(struct Header)); - header2 = Malloc(sizeof(struct Header)); - iov[0].iov_base = "!\n"; - outsize += (iov[0].iov_len = 8); - iov[1].iov_base = header1; - outsize += (iov[1].iov_len = 60); + size_t outsize = 0; + struct iovec iov[8]; + int tablebufsize = 4 + symnames.i * 4; + char *tablebuf = balloc(tablebufsize, 1); + int *offsets = balloc(args.i * sizeof(int), sizeof(int)); + iov[0].iov_base = ARMAG; + outsize += (iov[0].iov_len = SARMAG); + iov[1].iov_base = &header1; + outsize += (iov[1].iov_len = sizeof(struct ar_hdr)); iov[2].iov_base = tablebuf; outsize += (iov[2].iov_len = tablebufsize); iov[3].iov_base = symbols.p; outsize += (iov[3].iov_len = symbols.i); - iov[4].iov_base = "\n"; + iov[4].iov_base = ""; outsize += (iov[4].iov_len = outsize & 1); - iov[5].iov_base = header2; - outsize += (iov[5].iov_len = 60); + iov[5].iov_base = &header2; + outsize += (iov[5].iov_len = filenames.i ? sizeof(struct ar_hdr) : 0); iov[6].iov_base = filenames.p; outsize += (iov[6].iov_len = filenames.i); - for (i = 0; i < args.i; ++i) { + iov[7].iov_base = "\n"; + outsize += (iov[7].iov_len = filenames.i & 1); + for (size_t i = 0; i < args.i; ++i) { outsize += outsize & 1; + if (outsize > INT_MAX) { + Die(outpath, "archive too large"); + } offsets[i] = outsize; - outsize += 60; + outsize += sizeof(struct ar_hdr); outsize += sizes.p[i]; } - CHECK_LE(outsize, 0x7ffff000); // serialize metadata - MakeHeader(header1, "/", -1, 0, tablebufsize + symbols.i); - MakeHeader(header2, "//", -1, 0, filenames.i); + MakeArHeader(&header1, "/", 0, tablebufsize + ROUNDUP(symbols.i, 2)); + MakeArHeader(&header2, "//", 0, ROUNDUP(filenames.i, 2)); WRITE32BE(tablebuf, symnames.i); - for (i = 0; i < symnames.i; ++i) { + for (size_t i = 0; i < symnames.i; ++i) { WRITE32BE(tablebuf + 4 + i * 4, offsets[symnames.p[i]]); } // write output archive - CHECK_NE(-1, (outfd = creat(outpath, 0644))); - ftruncate(outfd, outsize); - if ((outsize = writev(outfd, iov, ARRAYLEN(iov))) == -1) { - reason = "writev1 failed"; - goto fail; + int outfd; + if ((outfd = creat(outpath, 0644)) == -1) { + SysDie(outpath, "creat"); } - for (i = 0; i < args.i; ++i) { - if ((fd = open(args.p[i], O_RDONLY)) == -1) { - reason = "open failed"; - goto fail; + if (ftruncate(outfd, outsize)) { + SysDie(outpath, "ftruncate"); + } + if ((outsize = writev(outfd, iov, ARRAYLEN(iov))) == -1) { + SysDie(outpath, "writev[1]"); + } + for (size_t i = 0; i < args.i; ++i) { + const char *inpath = args.p[i]; + if ((fd = open(inpath, O_RDONLY)) == -1) { + SysDie(inpath, "open"); } iov[0].iov_base = "\n"; outsize += (iov[0].iov_len = outsize & 1); - iov[1].iov_base = header1; - outsize += (iov[1].iov_len = 60); - MakeHeader(header1, "/", names.p[i], modes.p[i], sizes.p[i]); + iov[1].iov_base = &header1; + outsize += (iov[1].iov_len = sizeof(struct ar_hdr)); + MakeArHeader(&header1, names.p[i], modes.p[i], sizes.p[i]); if (writev(outfd, iov, 2) == -1) { - reason = "writev2 failed"; - goto fail; + SysDie(outpath, "writev[2]"); } - outsize += (remain = sizes.p[i]); - if (copyfd(fd, outfd, remain) == -1) { - reason = "copy failed"; - goto fail; + outsize += sizes.p[i]; + if (CopyFileOrDie(inpath, fd, outpath, outfd) != sizes.p[i]) { + Die(inpath, "file size changed"); + } + if (close(fd)) { + SysDie(inpath, "close"); } - close(fd); } - close(outfd); + if (close(outfd)) { + SysDie(outpath, "close"); + } - for (i = 0; i < args.i; ++i) free(args.p[i]); - getargs_destroy(&ga); - free(filenames.p); - free(symnames.p); - free(symbols.p); - free(tablebuf); - free(modes.p); - free(names.p); - free(sizes.p); - free(offsets); - free(header1); - free(header2); - free(args.p); - free(st); return 0; - -fail: - err = errno; - unlink(outpath); - fputs("error: ar failed: ", stderr); - fputs(reason, stderr); - fputs(": ", stderr); - fputs(strerror(err), stderr); - fputs("\n", stderr); - return 1; } diff --git a/tool/build/fixupobj.c b/tool/build/fixupobj.c index e065fcca5..4e5bce1d5 100644 --- a/tool/build/fixupobj.c +++ b/tool/build/fixupobj.c @@ -20,6 +20,7 @@ #include "libc/calls/calls.h" #include "libc/calls/struct/stat.h" #include "libc/dce.h" +#include "libc/elf/def.h" #include "libc/elf/elf.h" #include "libc/elf/scalar.h" #include "libc/elf/struct/rela.h" @@ -358,7 +359,7 @@ static void FixupObject(void) { if (!IsElf64Binary(elf, esize)) { Die("not an elf64 binary"); } - if (!(syms = GetElfSymbolTable(elf, esize, SHT_SYMTAB, &symcount))) { + if (!(syms = GetElfSymbols(elf, esize, SHT_SYMTAB, &symcount))) { Die("missing elf symbol table"); } if (!(secstrs = GetElfSectionNameStringTable(elf, esize))) { diff --git a/tool/build/lib/elfwriter_yoink.c b/tool/build/lib/elfwriter_yoink.c index 489652d69..011151144 100644 --- a/tool/build/lib/elfwriter_yoink.c +++ b/tool/build/lib/elfwriter_yoink.c @@ -16,6 +16,7 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/elf/def.h" #include "libc/str/str.h" #include "tool/build/lib/elfwriter.h" diff --git a/tool/build/lib/elfwriter_zip.c b/tool/build/lib/elfwriter_zip.c index cb60ee4b6..8470f8205 100644 --- a/tool/build/lib/elfwriter_zip.c +++ b/tool/build/lib/elfwriter_zip.c @@ -17,6 +17,7 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/dos.internal.h" +#include "libc/elf/def.h" #include "libc/fmt/conv.h" #include "libc/limits.h" #include "libc/log/check.h" diff --git a/tool/build/lib/getargs.c b/tool/build/lib/getargs.c index b8a0663be..a4a17640c 100644 --- a/tool/build/lib/getargs.c +++ b/tool/build/lib/getargs.c @@ -16,17 +16,20 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "tool/build/lib/getargs.h" #include "libc/assert.h" #include "libc/calls/calls.h" -#include "libc/calls/struct/stat.h" +#include "libc/errno.h" +#include "libc/fmt/magnumstrs.internal.h" +#include "libc/macros.internal.h" #include "libc/runtime/runtime.h" +#include "libc/runtime/sysconf.h" #include "libc/stdio/stdio.h" #include "libc/str/str.h" #include "libc/sysv/consts/map.h" #include "libc/sysv/consts/o.h" #include "libc/sysv/consts/prot.h" #include "libc/sysv/errfuns.h" -#include "tool/build/lib/getargs.h" /** * @fileoverview Fast Command Line Argument Ingestion. @@ -72,6 +75,13 @@ #define IsSpace(c) ((255 & (c)) <= ' ') +static wontreturn void getargs_fail(const char *path, const char *reason) { + const char *errstr; + if (!(errstr = _strerdoc(errno))) errstr = "Unknown error"; + tinyprint(2, path, ": ", reason, ": ", errstr, "\n", NULL); + exit(1); +} + /** * Zeroes GetArgs object and sets its fields. * @param args is borrowed for the lifetime of the GetArgs object @@ -86,7 +96,9 @@ void getargs_init(struct GetArgs *ga, char **args) { * Releases memory associated with GetArgs object and zeroes it. */ void getargs_destroy(struct GetArgs *ga) { - if (ga->map) munmap(ga->map, ga->mapsize); + if (ga->map) { + if (munmap(ga->map, ga->mapsize)) notpossible; + } bzero(ga, sizeof(*ga)); } @@ -106,8 +118,8 @@ const char *getargs_next(struct GetArgs *ga) { char *p; size_t k; unsigned m; - struct stat st; - do { + ssize_t size; + for (;;) { if (ga->map) { for (; ga->j < ga->mapsize; ++ga->j) { if (!IsSpace(ga->map[ga->j])) { @@ -134,36 +146,39 @@ const char *getargs_next(struct GetArgs *ga) { break; } } - if (k) { - if (ga->j + k < ga->mapsize) { - ga->map[ga->j + k] = 0; - p = ga->map + ga->j; - ga->j += ++k; - return p; - } else { - eio(); - break; - } + if (k && ga->j + k < ga->mapsize) { + ga->map[ga->j + k] = 0; + p = ga->map + ga->j; + ga->j += ++k; + return p; } - if (munmap(ga->map, ga->mapsize) == -1) break; + if (munmap(ga->map, ga->mapsize)) notpossible; ga->map = 0; ga->mapsize = 0; ga->j = 0; } - if (!(p = ga->args[ga->i])) return 0; - ++ga->i; - if (*p != '@') return p; - ++p; - if ((fd = open((ga->path = p), O_RDONLY)) != -1) { - fstat(fd, &st); - if ((p = mmap(0, st.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, - 0)) != MAP_FAILED) { - ga->map = p; - ga->mapsize = st.st_size; - } - close(fd); + if (!(p = ga->args[ga->i])) { + return 0; } - } while (ga->map); - perror(ga->path); - exit(1); + ++ga->i; + if (*p != '@') { + return p; + } + ++p; + if ((fd = open((ga->path = p), O_RDONLY)) == -1) { + getargs_fail(ga->path, "open"); + } + if ((size = lseek(fd, 0, SEEK_END)) == -1) { + getargs_fail(ga->path, "lseek"); + } + if (size) { + p = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + if (p == MAP_FAILED) { + getargs_fail(ga->path, "mmap"); + } + ga->map = p; + ga->mapsize = ROUNDUP(size, 4096); + } + close(fd); + } } diff --git a/tool/build/package.c b/tool/build/package.c index 80e0a22f0..b1b9281bf 100644 --- a/tool/build/package.c +++ b/tool/build/package.c @@ -20,6 +20,7 @@ #include "libc/calls/struct/iovec.h" #include "libc/calls/struct/stat.h" #include "libc/dce.h" +#include "libc/elf/def.h" #include "libc/elf/elf.h" #include "libc/elf/struct/rela.h" #include "libc/elf/struct/shdr.h" @@ -529,8 +530,8 @@ static void OpenObject(struct Package *pkg, struct Object *obj, int oid) { if (!(obj->strs = GetElfStringTable(obj->elf, obj->size, ".strtab"))) { Die(path, "missing elf string table"); } - if (!(obj->syms = GetElfSymbolTable(obj->elf, obj->size, SHT_SYMTAB, - &obj->symcount))) { + if (!(obj->syms = + GetElfSymbols(obj->elf, obj->size, SHT_SYMTAB, &obj->symcount))) { Die(path, "missing elf symbol table"); } IndexSections(pkg, obj); diff --git a/tool/decode/elf.c b/tool/decode/elf.c index e0dd81a14..943d2cbe7 100644 --- a/tool/decode/elf.c +++ b/tool/decode/elf.c @@ -20,6 +20,7 @@ #include "libc/calls/calls.h" #include "libc/calls/struct/stat.h" #include "libc/elf/def.h" +#include "libc/elf/elf.h" #include "libc/elf/struct/rela.h" #include "libc/elf/struct/shdr.h" #include "libc/errno.h" @@ -273,8 +274,7 @@ static void printelfsymbol(Elf64_Sym *sym, char *strtab, char *shstrtab) { static void printelfsymboltable(void) { size_t i, symcount = 0; - Elf64_Sym *symtab = - GetElfSymbolTable(elf, st->st_size, SHT_SYMTAB, &symcount); + Elf64_Sym *symtab = GetElfSymbols(elf, st->st_size, SHT_SYMTAB, &symcount); if (!symtab) return; char *strtab = GetElfStringTable(elf, st->st_size, ".strtab"); char *shstrtab = GetElfSectionNameStringTable(elf, st->st_size); @@ -290,8 +290,7 @@ static void printelfsymboltable(void) { static void printelfdynsymboltable(void) { size_t i, symcount = 0; - Elf64_Sym *symtab = - GetElfSymbolTable(elf, st->st_size, SHT_DYNSYM, &symcount); + Elf64_Sym *symtab = GetElfSymbols(elf, st->st_size, SHT_DYNSYM, &symcount); if (!symtab) return; char *strtab = GetElfStringTable(elf, st->st_size, ".dynstr"); char *shstrtab = GetElfSectionNameStringTable(elf, st->st_size);