Make improvements

- Introduce portable sched_getcpu() api
- Support GCC's __target_clones__ feature
- Make fma() go faster on x86 in default mode
- Remove some asan checks from core libraries
- WinMain() now ensures $HOME and $USER are defined
This commit is contained in:
Justine Tunney 2024-02-01 03:39:46 -08:00
parent d5225a693b
commit 2ab9e9f7fd
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
192 changed files with 2809 additions and 932 deletions

View file

@ -39,7 +39,7 @@
/* maximum path size that cosmo can take */
#define PATHSIZE (PATH_MAX < 1024 ? PATH_MAX : 1024)
#define SYSLIB_MAGIC ('s' | 'l' << 8 | 'i' << 16 | 'b' << 24)
#define SYSLIB_VERSION 8
#define SYSLIB_VERSION 9 /* sync with libc/runtime/syslib.internal.h */
struct Syslib {
int magic;
@ -96,11 +96,16 @@ struct Syslib {
long (*sem_trywait)(int *);
long (*getrlimit)(int, struct rlimit *);
long (*setrlimit)(int, const struct rlimit *);
// v6 (2023-11-03)
/* v6 (2023-11-03) */
void *(*dlopen)(const char *, int);
void *(*dlsym)(void *, const char *);
int (*dlclose)(void *);
char *(*dlerror)(void);
/* MANDATORY (cosmo runtime won't load if version < 8)
---------------------------------------------------
OPTIONAL (cosmo lib should check __syslib->version) */
/* v9 (2024-01-31) */
int (*pthread_cpu_number_np)(size_t *);
};
#define ELFCLASS32 1
@ -660,9 +665,9 @@ __attribute__((__noreturn__)) static void Spawn(const char *exe, int fd,
size = (p[i].p_vaddr & (pagesz - 1)) + p[i].p_filesz;
if (prot1 & PROT_EXEC) {
#ifdef SIP_DISABLED
// if sip is disabled then we can load the executable segments
// off the binary into memory without needing to copy anything
// which provides considerably better performance for building
/* if sip is disabled then we can load the executable segments
off the binary into memory without needing to copy anything
which provides considerably better performance for building */
rc = sys_mmap(addr, size, prot1, flags, fd, p[i].p_offset & -pagesz);
if (rc < 0) {
if (rc == -EPERM) {
@ -674,12 +679,12 @@ __attribute__((__noreturn__)) static void Spawn(const char *exe, int fd,
}
}
#else
// the issue is that if sip is enabled then, attempting to map
// it with exec permission will cause xnu to phone home a hash
// of the entire file to apple intelligence as a one time cost
// which is literally minutes for executables holding big data
// since there's no public apple api for detecting sip we read
// as the default strategy which is slow but it works for both
/* the issue is that if sip is enabled then, attempting to map
it with exec permission will cause xnu to phone home a hash
of the entire file to apple intelligence as a one time cost
which is literally minutes for executables holding big data
since there's no public apple api for detecting sip we read
as the default strategy which is slow but it works for both */
rc = sys_mmap(addr, size, (prot1 = PROT_READ | PROT_WRITE),
MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
if (rc < 0) Pexit(exe, rc, "prog mmap anon");
@ -812,12 +817,10 @@ static const char *TryElf(struct ApeLoader *M, union ElfEhdrBuf *ebuf,
}
}
/*
* merge adjacent loads that are contiguous with equal protection,
* which prevents our program header overlap check from needlessly
* failing later on; it also shaves away a microsecond of latency,
* since every program header requires invoking at least 1 syscall
*/
/* merge adjacent loads that are contiguous with equal protection,
which prevents our program header overlap check from needlessly
failing later on; it also shaves away a microsecond of latency,
since every program header requires invoking at least 1 syscall */
for (i = 0; i + 1 < e->e_phnum;) {
if (p[i].p_type == PT_LOAD && p[i + 1].p_type == PT_LOAD &&
((p[i].p_flags & (PF_R | PF_W | PF_X)) ==
@ -944,6 +947,7 @@ int main(int argc, char **argv, char **envp) {
M->lib.dlsym = dlsym;
M->lib.dlclose = dlclose;
M->lib.dlerror = dlerror;
M->lib.pthread_cpu_number_np = pthread_cpu_number_np;
/* getenv("_") is close enough to at_execfn */
execfn = 0;

View file

@ -282,12 +282,9 @@ SECTIONS {
KEEP(*(SORT_BY_NAME(.init.*)))
KEEP(*(.init))
KEEP(*(.initepilogue))
KEEP(*(.pltprologue))
*(.plt)
KEEP(*(.pltepilogue))
KEEP(*(.pltgotprologue))
*(.plt.got)
KEEP(*(.pltgotepilogue))
*(.iplt)
*(.text.startup .text.startup.*)
*(.text.exit .text.exit.*)
*(.text.unlikely .text.*_unlikely .text.unlikely.*)
@ -323,7 +320,7 @@ SECTIONS {
/*BEGIN: Read Only Data */
.rodata . : {
.rodata ALIGN(CONSTANT(COMMONPAGESIZE)) : {
KEEP(*(.rodata.pytab.0));
KEEP(*(.rodata.pytab.1));
KEEP(*(.rodata.pytab.2));
@ -398,13 +395,12 @@ SECTIONS {
KEEP(*(SORT_BY_NAME(.sort.data.*)))
. += . > 0 ? CODE_GRANULE : 0;
KEEP(*(.gotprologue))
. = ALIGN(. != 0 ? __SIZEOF_POINTER__ : 0);
__got_start = .;
*(.got)
KEEP(*(.gotepilogue))
__got_end = .;
KEEP(*(.gotpltprologue))
*(.got.plt)
KEEP(*(.gotpltepilogue))
. = ALIGN(. != 0 ? __SIZEOF_POINTER__ : 0);
__init_array_start = .;
@ -430,6 +426,8 @@ SECTIONS {
KEEP(*(SORT_BY_NAME(.piro.data.sort.*)))
KEEP(*(.piro.pad.data))
KEEP(*(.dataepilogue))
*(.igot.plt)
. = ALIGN(. != 0 ? CONSTANT(COMMONPAGESIZE) : 0);
/*END: NT FORK COPYING */
_edata = .;
@ -528,6 +526,7 @@ SECTIONS {
#endif
*(__patchable_function_entries)
*(__mcount_loc)
*(.rela.dyn)
*(.discard)
*(.yoink)
}

View file

@ -166,13 +166,6 @@
(unsigned long)(255 & (S)[1]) << 010 | \
(unsigned long)(255 & (S)[0]) << 000)
#define DEBUG(VAR) \
{ \
char ibuf[19] = {0}; \
Utox(ibuf, VAR); \
Print(os, 2, ibuf, " " #VAR, "\n", 0l); \
}
struct ElfEhdr {
unsigned char e_ident[16];
unsigned short e_type;
@ -340,23 +333,6 @@ static char *GetEnv(char **p, const char *s) {
return 0;
}
static char *Utox(char p[19], unsigned long x) {
int i;
if (x) {
*p++ = '0';
*p++ = 'x';
i = (__builtin_clzl(x) ^ (sizeof(long) * 8 - 1)) + 1;
i = (i + 3) & -4;
do {
*p++ = "0123456789abcdef"[(x >> (i -= 4)) & 15];
} while (i);
} else {
*p++ = '0';
}
*p = 0;
return p;
}
static char *Utoa(char p[20], unsigned long x) {
char t;
unsigned long i, a, b;
@ -534,6 +510,53 @@ static long Print(int os, int fd, const char *s, ...) {
return Write(fd, b, n, os);
}
static long Printf(int os, int fd, const char *fmt, ...) {
int i;
char c;
int k = 0;
unsigned u;
char b[512];
const char *s;
unsigned long d;
__builtin_va_list va;
__builtin_va_start(va, fmt);
for (;;) {
switch ((c = *fmt++)) {
case '\0':
__builtin_va_end(va);
return Write(fd, b, k, os);
case '%':
switch ((c = *fmt++)) {
case 's':
for (s = __builtin_va_arg(va, const char *); s && *s; ++s) {
if (k < 512) b[k++] = *s;
}
break;
case 'd':
d = __builtin_va_arg(va, unsigned long);
for (i = 16; i--;) {
u = (d >> (i * 4)) & 15;
if (u < 10) {
c = '0' + u;
} else {
u -= 10;
c = 'a' + u;
}
if (k < 512) b[k++] = c;
}
break;
default:
if (k < 512) b[k++] = c;
break;
}
break;
default:
if (k < 512) b[k++] = c;
break;
}
}
}
static void Perror(int os, const char *thing, long rc, const char *reason) {
char ibuf[21];
ibuf[0] = 0;
@ -901,7 +924,7 @@ EXTERN_C __attribute__((__noreturn__)) void ApeLoader(long di, long *sp,
long *auxv, *ap, *endp, *sp2;
char *p, *pe, *exe, *prog, **argv, **envp;
(void)Utox;
(void)Printf;
/* detect freebsd */
if (SupportsXnu() && dl == XNU) {

View file

@ -18,10 +18,12 @@ extern unsigned char _tbss_end[] __attribute__((__weak__));
extern unsigned char _tls_align[] __attribute__((__weak__));
extern unsigned char __test_start[] __attribute__((__weak__));
extern unsigned char __ro[] __attribute__((__weak__));
extern uint8_t __data_start[] __attribute__((__weak__));
extern uint8_t __data_end[] __attribute__((__weak__));
extern uint8_t __bss_start[] __attribute__((__weak__));
extern uint8_t __bss_end[] __attribute__((__weak__));
extern unsigned char __data_start[] __attribute__((__weak__));
extern unsigned char __data_end[] __attribute__((__weak__));
extern unsigned char __bss_start[] __attribute__((__weak__));
extern unsigned char __bss_end[] __attribute__((__weak__));
extern unsigned long __got_start[] __attribute__((__weak__));
extern unsigned long __got_end[] __attribute__((__weak__));
extern unsigned char ape_phdrs[] __attribute__((__weak__));
COSMOPOLITAN_C_END_