mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-02-07 15:03:34 +00:00
Actually Portable Executable now supports Android. Cosmo's old mmap code required a 47 bit address space. The new implementation is very agnostic and supports both smaller address spaces (e.g. embedded) and even modern 56-bit PML5T paging for x86 which finally came true on Zen4 Threadripper Cosmopolitan no longer requires UNIX systems to observe the Windows 64kb granularity; i.e. sysconf(_SC_PAGE_SIZE) will now report the host native page size. This fixes a longstanding POSIX conformance issue, concerning file mappings that overlap the end of file. Other aspects of conformance have been improved too, such as the subtleties of address assignment and and the various subtleties surrounding MAP_FIXED and MAP_FIXED_NOREPLACE On Windows, mappings larger than 100 megabytes won't be broken down into thousands of independent 64kb mappings. Support for MAP_STACK is removed by this change; please use NewCosmoStack() instead. Stack overflow avoidance is now being implemented using the POSIX thread APIs. Please use GetStackBottom() and GetStackAddr(), instead of the old error-prone GetStackAddr() and HaveStackMemory() APIs which are removed.
205 lines
6.4 KiB
C
205 lines
6.4 KiB
C
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||
│ Copyright 2024 Justine Alexandra Roberts Tunney │
|
||
│ │
|
||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||
│ any purpose with or without fee is hereby granted, provided that the │
|
||
│ above copyright notice and this permission notice appear in all copies. │
|
||
│ │
|
||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||
#include "libc/dce.h"
|
||
#include "libc/intrin/magicu.h"
|
||
#include "libc/intrin/strace.internal.h"
|
||
#include "libc/intrin/weaken.h"
|
||
#include "libc/macros.internal.h"
|
||
#include "libc/nexgen32e/rdtscp.h"
|
||
#include "libc/nexgen32e/x86feature.h"
|
||
#include "libc/runtime/runtime.h"
|
||
#include "libc/thread/thread.h"
|
||
#include "third_party/dlmalloc/dlmalloc.h"
|
||
|
||
#if !FOOTERS || !MSPACES
|
||
#error "threaded dlmalloc needs footers and mspaces"
|
||
#endif
|
||
|
||
union Heap {
|
||
struct malloc_state mstate;
|
||
struct {
|
||
size_t top_foot[2];
|
||
struct malloc_state m;
|
||
};
|
||
_Alignas(16) char mspace[DEFAULT_GRANULARITY];
|
||
};
|
||
|
||
static void init_heap(union Heap *heap, int locked);
|
||
|
||
static struct magicu magiu;
|
||
static unsigned g_heapslen;
|
||
static union Heap g_heaps[128];
|
||
|
||
void dlfree(void *p) {
|
||
return mspace_free(0, p);
|
||
}
|
||
|
||
size_t dlmalloc_usable_size(void* mem) {
|
||
return mspace_usable_size(mem);
|
||
}
|
||
|
||
void* dlrealloc_in_place(void *p, size_t n) {
|
||
return mspace_realloc_in_place(0, p, n);
|
||
}
|
||
|
||
int dlmallopt(int param_number, int value) {
|
||
return mspace_mallopt(param_number, value);
|
||
}
|
||
|
||
int dlmalloc_trim(size_t pad) {
|
||
int got_some = 0;
|
||
for (unsigned i = 0; i < g_heapslen; ++i)
|
||
got_some |= mspace_trim(&g_heaps[i].m, pad);
|
||
return got_some;
|
||
}
|
||
|
||
size_t dlbulk_free(void *array[], size_t nelem) {
|
||
for (size_t i = 0; i < nelem; ++i)
|
||
mspace_free(0, array[i]);
|
||
return 0;
|
||
}
|
||
|
||
void dlmalloc_inspect_all(void handler(void *start, void *end,
|
||
size_t used_bytes, void *callback_arg),
|
||
void *arg) {
|
||
for (unsigned i = 0; i < g_heapslen; ++i)
|
||
mspace_inspect_all(&g_heaps[i].m, handler, arg);
|
||
}
|
||
|
||
forceinline mstate get_arena(void) {
|
||
unsigned cpu;
|
||
#ifdef __x86_64__
|
||
unsigned tsc_aux;
|
||
rdtscp(&tsc_aux);
|
||
cpu = TSC_AUX_CORE(tsc_aux);
|
||
#else
|
||
long tpidr_el0;
|
||
asm("mrs\t%0,tpidr_el0" : "=r"(tpidr_el0));
|
||
cpu = tpidr_el0 & 255;
|
||
#endif
|
||
return &g_heaps[__magicu_div(cpu, magiu) % g_heapslen].m;
|
||
}
|
||
|
||
static void *dlmalloc_single(size_t n) {
|
||
return mspace_malloc(&g_heaps[0].m, n);
|
||
}
|
||
|
||
static void *dlmalloc_threaded(size_t n) {
|
||
return mspace_malloc(get_arena(), n);
|
||
}
|
||
|
||
static void *dlcalloc_single(size_t n, size_t z) {
|
||
return mspace_calloc(&g_heaps[0].m, n, z);
|
||
}
|
||
|
||
static void *dlcalloc_threaded(size_t n, size_t z) {
|
||
return mspace_calloc(get_arena(), n, z);
|
||
}
|
||
|
||
static void *dlrealloc_single(void *p, size_t n) {
|
||
return mspace_realloc(&g_heaps[0].m, p, n);
|
||
}
|
||
|
||
static void *dlrealloc_threaded(void *p, size_t n) {
|
||
if (p)
|
||
return mspace_realloc(0, p, n);
|
||
else
|
||
return mspace_malloc(get_arena(), n);
|
||
}
|
||
|
||
static void *dlmemalign_single(size_t a, size_t n) {
|
||
return mspace_memalign(&g_heaps[0].m, a, n);
|
||
}
|
||
|
||
static void *dlmemalign_threaded(size_t a, size_t n) {
|
||
return mspace_memalign(get_arena(), a, n);
|
||
}
|
||
|
||
static struct mallinfo dlmallinfo_single(void) {
|
||
return mspace_mallinfo(&g_heaps[0].m);
|
||
}
|
||
|
||
static struct mallinfo dlmallinfo_threaded(void) {
|
||
return mspace_mallinfo(get_arena());
|
||
}
|
||
|
||
static int dlmalloc_atoi(const char *s) {
|
||
int c, x = 0;
|
||
while ((c = *s++)) {
|
||
x *= 10;
|
||
x += c - '0';
|
||
}
|
||
return x;
|
||
}
|
||
|
||
static void use_single_heap(bool uses_locks) {
|
||
g_heapslen = 1;
|
||
dlmalloc = dlmalloc_single;
|
||
dlcalloc = dlcalloc_single;
|
||
dlrealloc = dlrealloc_single;
|
||
dlmemalign = dlmemalign_single;
|
||
dlmallinfo = dlmallinfo_single;
|
||
init_heap(&g_heaps[0], uses_locks);
|
||
}
|
||
|
||
static void threaded_dlmalloc(void) {
|
||
int heaps, cpus;
|
||
const char *var;
|
||
|
||
if (!_weaken(pthread_create))
|
||
return use_single_heap(false);
|
||
|
||
if (!IsAarch64() && !X86_HAVE(RDTSCP))
|
||
return use_single_heap(true);
|
||
|
||
// determine how many independent heaps we should install
|
||
// by default we do an approximation of one heap per core
|
||
// this code makes the c++ stl go 164x faster on my ryzen
|
||
cpus = __get_cpu_count();
|
||
if (cpus == -1)
|
||
heaps = 1;
|
||
else if ((var = getenv("COSMOPOLITAN_HEAP_COUNT")))
|
||
heaps = dlmalloc_atoi(var);
|
||
else
|
||
heaps = cpus >> 1;
|
||
if (heaps <= 1)
|
||
return use_single_heap(true);
|
||
if (heaps > ARRAYLEN(g_heaps))
|
||
heaps = ARRAYLEN(g_heaps);
|
||
|
||
// find 𝑑 such that sched_getcpu() / 𝑑 is within [0,heaps)
|
||
// turn 𝑑 into a fast magic that can divide by multiplying
|
||
magiu = __magicu_get(cpus / heaps);
|
||
|
||
// we need this too due to linux's cpu count affinity hack
|
||
g_heapslen = heaps;
|
||
|
||
// create the heaps
|
||
for (size_t i = 0; i < g_heapslen; ++i)
|
||
init_heap(&g_heaps[i], true);
|
||
|
||
// install function pointers
|
||
dlmalloc = dlmalloc_threaded;
|
||
dlcalloc = dlcalloc_threaded;
|
||
dlrealloc = dlrealloc_threaded;
|
||
dlmemalign = dlmemalign_threaded;
|
||
dlmallinfo = dlmallinfo_threaded;
|
||
|
||
STRACE("created %d dlmalloc heaps for %d cpus", heaps, cpus);
|
||
}
|