Use better memory strategy on Windows

Rather than using the the rollo global to pick addresses, we select them
randomly now using a conservative vaspace.
This commit is contained in:
Justine Tunney 2024-07-20 02:20:03 -07:00
parent 6a5d4ed65b
commit 2018cac11f
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
13 changed files with 113 additions and 124 deletions

View file

@ -30,7 +30,7 @@ struct Maps {
_Atomic(struct Map *) free;
size_t count;
size_t pages;
atomic_size_t rollo;
_Atomic(char *) pick;
struct Map stack;
struct Map guard;
};
@ -42,11 +42,12 @@ struct AddrSize {
extern struct Maps __maps;
void *randaddr(void);
void __maps_init(void);
bool __maps_lock(void);
void __maps_check(void);
void __maps_unlock(void);
void *__maps_randaddr(void);
void *__maps_pickaddr(size_t);
void __maps_add(struct Map *);
void __maps_free(struct Map *);
struct Map *__maps_alloc(void);

View file

@ -40,6 +40,7 @@
#include "libc/runtime/runtime.h"
#include "libc/runtime/stack.h"
#include "libc/runtime/zipos.internal.h"
#include "libc/stdio/rand.h"
#include "libc/stdio/sysparam.h"
#include "libc/sysv/consts/auxv.h"
#include "libc/sysv/consts/map.h"
@ -51,8 +52,8 @@
#include "libc/thread/tls.h"
#define MMDEBUG IsModeDbg()
#define WINBASE (1ul << 35) // 34 gb
#define WINMAXX ((1ul << 44) - WINBASE) // 17 tb
#define MAX_SIZE 0x0ff800000000ul
#define MAX_TRIES 10
#define MAP_FIXED_NOREPLACE_linux 0x100000
@ -373,6 +374,34 @@ static int __munmap(char *addr, size_t size) {
return rc;
}
void *__maps_randaddr(void) {
uintptr_t addr;
addr = _rand64();
addr &= 0x007fffffffff;
addr |= 0x004000000000;
addr &= -__gransize;
return (void *)addr;
}
void *__maps_pickaddr(size_t size) {
char *addr;
for (int try = 0; try < MAX_TRIES; ++try) {
addr = atomic_exchange_explicit(&__maps.pick, 0, memory_order_acq_rel);
if (!addr)
addr = __maps_randaddr();
__maps_lock();
bool overlaps = __maps_overlaps(addr, size, __pagesize);
__maps_unlock();
if (!overlaps) {
atomic_store_explicit(&__maps.pick,
addr + ((size + __gransize - 1) & __gransize),
memory_order_release);
return addr;
}
}
return 0;
}
static void *__mmap_chunk(void *addr, size_t size, int prot, int flags, int fd,
int64_t off, int pagesz, int gransz) {
@ -409,6 +438,7 @@ static void *__mmap_chunk(void *addr, size_t size, int prot, int flags, int fd,
// obtain mapping from operating system
int olderr = errno;
int tries = MAX_TRIES;
struct DirectMap res;
TryAgain:
res = sys_mmap(addr, size, prot, sysflags, fd, off);
@ -418,10 +448,11 @@ TryAgain:
errno = EEXIST;
} else if (should_untrack) {
errno = ENOMEM;
} else {
addr += gransz;
} else if (--tries && (addr = __maps_pickaddr(size))) {
errno = olderr;
goto TryAgain;
} else {
errno = ENOMEM;
}
}
__maps_free(map);
@ -483,58 +514,15 @@ static void *__mmap_impl(char *addr, size_t size, int prot, int flags, int fd,
}
}
// mmap works fine on unix
if (!IsWindows())
// try to pick our own addresses on windows which are higher up in the
// vaspace. this is important so that conflicts are less likely, after
// forking when resurrecting mappings, because win32 has a strong pref
// with lower memory addresses which may get assigned to who knows wut
if (IsWindows() && !addr)
if (!(addr = __maps_pickaddr(size)))
return (void *)enomem();
return __mmap_chunk(addr, size, prot, flags, fd, off, pagesz, gransz);
// if the concept of pagesz wasn't exciting enough
if (!addr && !(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
size_t rollo, rollo2, slab = (size + gransz - 1) & -gransz;
rollo = atomic_load_explicit(&__maps.rollo, memory_order_relaxed);
for (;;) {
if ((rollo2 = rollo + slab) > WINMAXX) {
rollo = 0;
rollo2 = slab;
}
if (atomic_compare_exchange_weak_explicit(&__maps.rollo, &rollo, rollo2,
memory_order_acq_rel,
memory_order_relaxed)) {
addr = (char *)WINBASE + rollo;
break;
}
}
}
// windows forbids unmapping a subset of a map once it's made
if (size <= gransz || size > 100 * 1024 * 1024)
return __mmap_chunk(addr, size, prot, flags, fd, off, pagesz, gransz);
// so we create a separate map for each granule in the mapping
if (!(flags & MAP_FIXED)) {
while (__maps_overlaps(addr, size, pagesz)) {
if (flags & MAP_FIXED_NOREPLACE)
return (void *)eexist();
addr += gransz;
}
}
char *res = addr;
while (size) {
char *got;
size_t amt = MIN(size, gransz);
got = __mmap_chunk(addr, amt, prot, flags, fd, off, pagesz, gransz);
if (got != addr) {
if (got != MAP_FAILED)
__munmap(got, amt);
if (addr > res)
__munmap(res, addr - res);
errno = EAGAIN;
return MAP_FAILED;
}
size -= amt;
addr += amt;
off += amt;
}
return res;
}
static void *__mmap(char *addr, size_t size, int prot, int flags, int fd,
@ -552,7 +540,7 @@ static void *__mmap(char *addr, size_t size, int prot, int flags, int fd,
return (void *)enomem();
if (!size || (uintptr_t)addr + size < size)
return (void *)einval();
if (size > WINMAXX)
if (size > MAX_SIZE)
return (void *)enomem();
if (__maps.count * pagesz + size > __virtualmax)
return (void *)enomem();
@ -697,9 +685,9 @@ static void *__mremap(char *old_addr, size_t old_size, size_t new_size,
return (void *)einval();
// check for big size
if (old_size > WINMAXX)
if (old_size > MAX_SIZE)
return (void *)enomem();
if (new_size > WINMAXX)
if (new_size > MAX_SIZE)
return (void *)enomem();
// check for overflow

View file

@ -25,11 +25,9 @@
#include "libc/thread/thread.h"
#include "libc/thread/tls.h"
static struct {
int thepid;
uint128_t thepool;
pthread_spinlock_t lock;
} g_rand64;
static int _rand64_pid;
static unsigned __int128 _rand64_pool;
pthread_mutex_t _rand64_lock_obj = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
/**
* Returns nondeterministic random data.
@ -39,19 +37,17 @@ static struct {
* the case even across forks and threads, whose sequences will differ.
*
* @see rdseed(), rdrand(), rand(), random(), rngset()
* @note this function takes 5 cycles (30 if `__threaded`)
* @note this function is not intended for cryptography
* @note this function passes bigcrush and practrand
* @asyncsignalsafe
*/
uint64_t _rand64(void) {
void *p;
uint128_t s;
if (__threaded)
pthread_spin_lock(&g_rand64.lock);
if (__pid == g_rand64.thepid) {
s = g_rand64.thepool; // normal path
pthread_mutex_lock(&_rand64_lock_obj);
if (__pid == _rand64_pid) {
s = _rand64_pool; // normal path
} else {
if (!g_rand64.thepid) {
if (!_rand64_pid) {
if (AT_RANDOM && (p = (void *)__getauxval(AT_RANDOM).value)) {
// linux / freebsd kernel supplied entropy
memcpy(&s, p, 16);
@ -61,13 +57,13 @@ uint64_t _rand64(void) {
}
} else {
// blend another timestamp on fork contention
s = g_rand64.thepool ^ rdtsc();
s = _rand64_pool ^ rdtsc();
}
// blend the pid on startup and fork contention
s ^= __pid;
g_rand64.thepid = __pid;
_rand64_pid = __pid;
}
g_rand64.thepool = (s *= 15750249268501108917ull); // lemur64
pthread_spin_unlock(&g_rand64.lock);
_rand64_pool = (s *= 15750249268501108917ull); // lemur64
pthread_mutex_unlock(&_rand64_lock_obj);
return s >> 64;
}

View file

@ -1,26 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2024 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/maps.h"
void *randaddr(void) {
static unsigned long lcg = 1;
lcg *= 6364136223846793005;
lcg += 1442695040888963407;
return (void *)(lcg >> 48 << 28);
}

View file

@ -48,6 +48,7 @@
__static_yoink("_pthread_atfork");
extern pthread_mutex_t _rand64_lock_obj;
extern pthread_mutex_t _pthread_lock_obj;
static void _onfork_prepare(void) {
@ -56,10 +57,12 @@ static void _onfork_prepare(void) {
_pthread_lock();
__maps_lock();
__fds_lock();
pthread_mutex_lock(&_rand64_lock_obj);
LOCKTRACE("READY TO ROCK AND ROLL");
}
static void _onfork_parent(void) {
pthread_mutex_unlock(&_rand64_lock_obj);
__fds_unlock();
__maps_unlock();
_pthread_unlock();
@ -69,6 +72,7 @@ static void _onfork_parent(void) {
static void _onfork_child(void) {
__fds_lock_obj = (pthread_mutex_t)PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
_rand64_lock_obj = (pthread_mutex_t)PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
_pthread_lock_obj = (pthread_mutex_t)PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
atomic_store_explicit(&__maps.lock, 0, memory_order_relaxed);
atomic_store_explicit(&__get_tls()->tib_relock_maps, 0, memory_order_relaxed);

View file

@ -82,6 +82,8 @@ TEST(madvise, subPages) {
}
TEST(madvise, madvWillNeed_unmappedRegion) {
if (IsWindows())
return; // needs carving
char *p;
ASSERT_NE(MAP_FAILED, (p = mmap(0, getgransize() * 3, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)));
@ -96,6 +98,8 @@ TEST(madvise, madvWillNeed_unmappedRegion) {
}
TEST(madvise, madvFree_unmappedRegion) {
if (IsWindows())
return; // needs carving
char *p;
ASSERT_NE(MAP_FAILED, (p = mmap(0, getgransize() * 3, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)));

View file

@ -275,6 +275,8 @@ TEST(ksnprintf, testMisalignedPointer_wontFormat) {
}
TEST(ksnprintf, testUnterminatedOverrun_truncatesAtPageBoundary) {
if (IsWindows())
return; // needs carving
char *m;
char b[32];
int gran = getgransize();

View file

@ -41,7 +41,7 @@
void map_unmap_one_page(void) {
void *p;
if ((p = mmap(randaddr(), 1, PROT_READ | PROT_WRITE,
if ((p = mmap(__maps_randaddr(), 1, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)) == MAP_FAILED)
__builtin_trap();
if (munmap(p, 1))
@ -61,8 +61,8 @@ int main() {
int n = 10000;
kprintf("%20s creating %d sparse maps...\n", "", n);
for (int i = 0; i < n; ++i) {
if (mmap(randaddr(), 1, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS,
-1, 0) == MAP_FAILED)
if (mmap(__maps_randaddr(), 1, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0) == MAP_FAILED)
__builtin_trap();
}

View file

@ -112,20 +112,23 @@ TEST(mmap, fixedTaken) {
TEST(mmap, hint) {
char *p;
if (IsWindows())
return; // needs carving
// obtain four pages
ASSERT_NE(MAP_FAILED, (p = mmap(randaddr(), gransz * 4, PROT_READ,
ASSERT_NE(MAP_FAILED, (p = mmap(__maps_randaddr(), pagesz * 4, PROT_READ,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)));
// unmap two of those pages
EXPECT_SYS(0, 0, munmap(p + gransz, gransz));
EXPECT_SYS(0, 0, munmap(p + gransz * 3, gransz));
EXPECT_SYS(0, 0, munmap(p + pagesz, pagesz));
EXPECT_SYS(0, 0, munmap(p + pagesz * 3, pagesz));
// test AVAILABLE nonfixed nonzero addr is granted
// - posix doesn't mandate this behavior (but should)
// - freebsd always chooses for you (which has no acceptable workaround)
// - netbsd manual claims it'll be like freebsd, but is actually like openbsd
if (!IsFreebsd())
ASSERT_EQ(p + gransz, mmap(p + gransz, gransz, PROT_READ,
ASSERT_EQ(p + pagesz, mmap(p + pagesz, pagesz, PROT_READ,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
// test UNAVAILABLE nonfixed nonzero addr picks something nearby
@ -134,23 +137,26 @@ TEST(mmap, hint) {
// - freebsd goes about 16mb to the right
// - qemu-user is off the wall
/*if (!IsQemuUser()) {
q = mmap(p + gransz * 2, gransz, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1,
q = mmap(p + pagesz * 2, pagesz, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1,
0);
EXPECT_LE(ABS(q - (p + gransz * 2)), 128 * 1024 * 1024);
EXPECT_SYS(0, 0, munmap(q, gransz));
EXPECT_LE(ABS(q - (p + pagesz * 2)), 128 * 1024 * 1024);
EXPECT_SYS(0, 0, munmap(q, pagesz));
}*/
// clean up
EXPECT_SYS(0, 0, munmap(p, gransz * 4));
EXPECT_SYS(0, 0, munmap(p, pagesz * 4));
}
TEST(mprotect, punchHoleAndFillHole) {
char *p;
int count = __maps.count;
if (IsWindows())
return; // needs carving
// obtain memory
ASSERT_NE(MAP_FAILED,
(p = mmap(randaddr(), gransz * 3, PROT_READ | PROT_WRITE,
(p = mmap(__maps_randaddr(), gransz * 3, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)));
ASSERT_EQ((count += IsWindows() ? 3 : 1), __maps.count);

View file

@ -259,6 +259,8 @@ TEST(mprotect, weirdSize) {
}
TEST(mprotect, outerOverlap) {
if (IsWindows())
return; // needs carving
char *p;
int gransz = getgransize();
EXPECT_NE(MAP_FAILED, (p = mmap(0, gransz * 3, PROT_READ | PROT_EXEC,

View file

@ -42,7 +42,8 @@ TEST(mremap, dontMove_hasRoom_itMoves) {
return; // NetBSD requires MREMAP_MAYMOVE
char *p;
int pagesz = getpagesize();
ASSERT_NE(MAP_FAILED, (p = mmap(randaddr(), pagesz, PROT_READ | PROT_EXEC,
ASSERT_NE(MAP_FAILED,
(p = mmap(__maps_randaddr(), pagesz, PROT_READ | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)));
EXPECT_TRUE(testlib_memoryexists(p));
EXPECT_FALSE(testlib_memoryexists(p + pagesz));
@ -59,7 +60,8 @@ TEST(mremap, dontMove_noRoom_itFailsWithEnomem) {
return; // NetBSD requires MREMAP_MAYMOVE
char *p;
int pagesz = getpagesize();
ASSERT_NE(MAP_FAILED, (p = mmap(randaddr(), pagesz * 2, PROT_READ | PROT_EXEC,
ASSERT_NE(MAP_FAILED,
(p = mmap(__maps_randaddr(), pagesz * 2, PROT_READ | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)));
EXPECT_TRUE(testlib_memoryexists(p + pagesz * 0));
EXPECT_TRUE(testlib_memoryexists(p + pagesz * 1));
@ -77,7 +79,8 @@ TEST(mremap, dontMove_noRoom_itFailsWithEnomem) {
TEST(mremap, mayMove_noRoom_itRelocates) {
char *p, *p2;
int pagesz = getpagesize();
ASSERT_NE(MAP_FAILED, (p = mmap(randaddr(), pagesz * 2, PROT_READ | PROT_EXEC,
ASSERT_NE(MAP_FAILED,
(p = mmap(__maps_randaddr(), pagesz * 2, PROT_READ | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)));
EXPECT_TRUE(testlib_memoryexists(p + pagesz * 0));
EXPECT_TRUE(testlib_memoryexists(p + pagesz * 1));
@ -112,7 +115,7 @@ TEST(mremap, mayMove_noRoom_itRelocates) {
TEST(mremap, bench) {
#define N 10
long size = 1024 * 1024;
char *rollo = randaddr();
char *rollo = __maps_randaddr();
char *addr[N];
// create mappings

View file

@ -54,6 +54,8 @@ TEST(munmap, test) {
}
TEST(munmap, punchHoleInMemory) {
if (IsWindows())
return; // needs carving
char *p;
ASSERT_NE(MAP_FAILED, (p = mmap(0, gransz * 3, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0)));
@ -72,6 +74,8 @@ TEST(munmap, punchHoleInMemory) {
}
TEST(munmap, memoryHasHole) {
if (IsWindows())
return; // needs carving
char *p;
ASSERT_NE(MAP_FAILED, (p = mmap(0, gransz * 3, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0)));
@ -86,6 +90,8 @@ TEST(munmap, memoryHasHole) {
}
TEST(munmap, blanketFree) {
if (IsWindows())
return; // needs carving
char *p;
ASSERT_NE(MAP_FAILED, (p = mmap(0, gransz * 3, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0)));
@ -104,6 +110,8 @@ TEST(munmap, blanketFree) {
}
TEST(munmap, trimLeft) {
if (IsWindows())
return; // needs carving
char *p;
ASSERT_NE(MAP_FAILED, (p = mmap(0, gransz * 2, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0)));
@ -118,6 +126,8 @@ TEST(munmap, trimLeft) {
}
TEST(munmap, trimRight) {
if (IsWindows())
return; // needs carving
char *p;
ASSERT_NE(MAP_FAILED, (p = mmap(0, gransz * 2, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0)));
@ -173,7 +183,7 @@ TEST(munmap, tinyFile_preciseUnmapSize) {
// clang-format off
TEST(munmap, tinyFile_mapThriceUnmapOnce) {
char *p = randaddr();
char *p = __maps_randaddr();
ASSERT_SYS(0, 3, open("doge", O_RDWR | O_CREAT | O_TRUNC, 0644));
ASSERT_SYS (0, 5, write(3, "hello", 5));
ASSERT_EQ(p+gransz*0, mmap(p+gransz*0, gransz, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0));

View file

@ -21,7 +21,6 @@
#include "libc/calls/calls.h"
#include "libc/calls/struct/sched_param.h"
#include "libc/calls/struct/sigaction.h"
#include "libc/calls/struct/sigaltstack.h"
#include "libc/calls/struct/siginfo.h"
#include "libc/dce.h"
#include "libc/errno.h"