Don't relocate file descriptor memory

This change fixes #496 where ASAN spotted a race condition that could
happen in multithreaded programs, with more than OPEN_MAX descriptors
when using ZipOS or Windows NT, which require tracking open file info
and this change fixes that table so it never relocates, thus allowing
us to continue to enjoy the benefits of avoiding locks while reading.
This commit is contained in:
Justine Tunney 2022-09-09 16:54:28 -07:00
parent c3208eb9d5
commit 3265324e00
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
35 changed files with 297 additions and 152 deletions

View file

@ -1,53 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/strace.internal.h"
#include "libc/runtime/memtrack.internal.h"
noasan bool AreMemoryIntervalsOk(const struct MemoryIntervals *mm) {
/* asan runtime depends on this function */
int i;
size_t wantsize;
for (i = 0; i < mm->i; ++i) {
if (mm->p[i].y < mm->p[i].x) {
STRACE("AreMemoryIntervalsOk() y should be >= x!");
return false;
}
wantsize = (size_t)(mm->p[i].y - mm->p[i].x) * FRAMESIZE;
if (!(wantsize < mm->p[i].size && mm->p[i].size <= wantsize + FRAMESIZE)) {
STRACE("AreMemoryIntervalsOk(%p) size is wrong!"
" %'zu not within %'zu .. %'zu",
(uintptr_t)mm->p[i].x << 16, mm->p[i].size, wantsize,
wantsize + FRAMESIZE);
return false;
}
if (i) {
if (mm->p[i].h != -1 || mm->p[i - 1].h != -1) {
if (mm->p[i].x <= mm->p[i - 1].y) {
return false;
}
} else {
if (!(mm->p[i - 1].y + 1 <= mm->p[i].x)) {
STRACE("AreMemoryIntervalsOk() out of order or overlap!");
return false;
}
}
}
}
return true;
}

View file

@ -79,14 +79,6 @@ o/$(MODE)/libc/calls/vdsofunc.greg.o: private \
-ffreestanding \
-fno-sanitize=address
# we can't use asan because:
# asan guard pages haven't been allocated yet
o/$(MODE)/libc/calls/directmap.o \
o/$(MODE)/libc/calls/directmap-nt.o: private \
OVERRIDE_COPTS += \
-ffreestanding \
-fno-sanitize=address
# we can't use asan because:
# ntspawn allocates 128kb of heap memory via win32
o/$(MODE)/libc/calls/ntspawn.o \
@ -144,12 +136,6 @@ o/$(MODE)/libc/calls/ioctl-siocgifconf-nt.o: private \
-ffunction-sections \
-fdata-sections
# we want small code size because:
# to keep .text.head under 4096 bytes
o/$(MODE)/libc/calls/mman.greg.o: private \
OVERRIDE_COPTS += \
-Os
# we always want -Os because:
# va_arg codegen is very bloated in default mode
o//libc/calls/open.o \

View file

@ -1,65 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/calls.h"
#include "libc/macros.internal.h"
#include "libc/runtime/directmap.internal.h"
#include "libc/runtime/pc.internal.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/prot.h"
#include "libc/sysv/errfuns.h"
static uint64_t sys_mmap_metal_break;
noasan struct DirectMap sys_mmap_metal(void *paddr, size_t size, int prot,
int flags, int fd, int64_t off) {
/* asan runtime depends on this function */
size_t i;
struct mman *mm;
struct DirectMap res;
uint64_t addr, page, *pte, *pml4t;
mm = (struct mman *)(BANE + 0x0500);
pml4t = __get_pml4t();
size = ROUNDUP(size, 4096);
addr = (uint64_t)paddr;
if (!(flags & MAP_FIXED)) {
for (i = 0; i < size; i += 4096) {
pte = __get_virtual(mm, pml4t, addr, false);
if (pte && (*pte & PAGE_V)) {
addr = MAX(addr, sys_mmap_metal_break) + i + 4096;
i = 0;
}
}
sys_mmap_metal_break = MAX(addr + size, sys_mmap_metal_break);
}
for (i = 0; i < size; i += 4096) {
page = __new_page(mm);
pte = __get_virtual(mm, pml4t, addr + i, true);
if (pte && page) {
__clear_page(BANE + page);
*pte = page | ((prot & PROT_WRITE) ? PAGE_RW : 0) | PAGE_U | PAGE_V;
} else {
addr = -1;
break;
}
}
res.addr = (void *)addr;
res.maphandle = -1;
return res;
}

View file

@ -1,99 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/calls/internal.h"
#include "libc/calls/state.internal.h"
#include "libc/nt/enum/filemapflags.h"
#include "libc/nt/enum/pageflags.h"
#include "libc/nt/memory.h"
#include "libc/nt/runtime.h"
#include "libc/nt/struct/processmemorycounters.h"
#include "libc/nt/struct/securityattributes.h"
#include "libc/runtime/directmap.internal.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/o.h"
textwindows struct DirectMap sys_mmap_nt(void *addr, size_t size, int prot,
int flags, int fd, int64_t off) {
size_t i;
bool iscow;
int64_t handle;
uint32_t oldprot;
struct DirectMap dm;
struct ProtectNt fl;
const struct NtSecurityAttributes *sec;
struct NtProcessMemoryCountersEx memcount;
if (fd != -1) {
handle = g_fds.p[fd].handle;
} else {
handle = kNtInvalidHandleValue;
}
if ((flags & MAP_TYPE) != MAP_SHARED) {
sec = 0; // MAP_PRIVATE isn't inherited across fork()
} else {
sec = &kNtIsInheritable; // MAP_SHARED gives us zero-copy fork()
}
// nt will whine under many circumstances if we change the execute bit
// later using mprotect(). the workaround is to always request execute
// and then virtualprotect() it away until we actually need it. please
// note that open-nt.c always requests an kNtGenericExecute accessmask
iscow = false;
if (handle != -1) {
if ((flags & MAP_TYPE) != MAP_SHARED) {
// windows has cow pages but they can't propagate across fork()
// that means we only get copy-on-write for the root process :(
fl = (struct ProtectNt){kNtPageExecuteWritecopy,
kNtFileMapCopy | kNtFileMapExecute};
iscow = true;
} else {
if ((g_fds.p[fd].flags & O_ACCMODE) == O_RDONLY) {
fl = (struct ProtectNt){kNtPageExecuteRead,
kNtFileMapRead | kNtFileMapExecute};
} else {
fl = (struct ProtectNt){kNtPageExecuteReadwrite,
kNtFileMapWrite | kNtFileMapExecute};
}
}
} else {
assert(flags & MAP_ANONYMOUS);
fl = (struct ProtectNt){kNtPageExecuteReadwrite,
kNtFileMapWrite | kNtFileMapExecute};
}
if ((dm.maphandle = CreateFileMapping(handle, sec, fl.flags1,
(size + off) >> 32, (size + off), 0))) {
if ((dm.addr = MapViewOfFileEx(dm.maphandle, fl.flags2, off >> 32, off,
size, addr))) {
if (VirtualProtect(addr, size, __prot2nt(prot, iscow), &oldprot)) {
return dm;
} else {
return dm;
UnmapViewOfFile(dm.addr);
}
}
CloseHandle(dm.maphandle);
}
dm.maphandle = kNtInvalidHandleValue;
dm.addr = (void *)(intptr_t)-1;
return dm;
}

View file

@ -1,53 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/strace.internal.h"
#include "libc/calls/syscall-sysv.internal.h"
#include "libc/errno.h"
#include "libc/intrin/describeflags.internal.h"
#include "libc/nt/runtime.h"
#include "libc/runtime/directmap.internal.h"
#include "libc/runtime/memtrack.internal.h"
/**
* Obtains memory mapping directly from system.
*
* The mmap() function needs to track memory mappings in order to
* support Windows NT and Address Sanitizer. That memory tracking can be
* bypassed by calling this function. However the caller is responsible
* for passing the magic memory handle on Windows NT to CloseHandle().
*
* @asyncsignalsafe
* @threadsafe
*/
struct DirectMap sys_mmap(void *addr, size_t size, int prot, int flags, int fd,
int64_t off) {
struct DirectMap d;
if (!IsWindows() && !IsMetal()) {
d.addr = __sys_mmap(addr, size, prot, flags, fd, off, off);
d.maphandle = kNtInvalidHandleValue;
} else if (IsMetal()) {
d = sys_mmap_metal(addr, size, prot, flags, fd, off);
} else {
d = sys_mmap_nt(addr, size, prot, flags, fd, off);
}
KERNTRACE("sys_mmap(%.12p%s, %'zu, %s, %s, %d, %'ld) → {%.12p, %p}% m", addr,
DescribeFrame((intptr_t)addr >> 16), size, DescribeProtFlags(prot),
DescribeMapFlags(flags), fd, off, d.addr, d.maphandle);
return d;
}

View file

@ -0,0 +1,10 @@
#ifndef COSMOPOLITAN_LIBC_CALLS_EXTEND_INTERNAL_H_
#define COSMOPOLITAN_LIBC_CALLS_EXTEND_INTERNAL_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
void *_extend(void *, size_t, void *, intptr_t);
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_CALLS_EXTEND_INTERNAL_H_ */

View file

@ -1,250 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/intrin/bits.h"
#include "libc/intrin/likely.h"
#include "libc/intrin/weaken.h"
#include "libc/calls/calls.h"
#include "libc/calls/strace.internal.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/intrin/asan.internal.h"
#include "libc/log/libfatal.internal.h"
#include "libc/log/log.h"
#include "libc/macros.internal.h"
#include "libc/mem/mem.h"
#include "libc/runtime/directmap.internal.h"
#include "libc/runtime/memtrack.internal.h"
#include "libc/runtime/runtime.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/prot.h"
#include "libc/sysv/errfuns.h"
static void *MoveMemoryIntervals(struct MemoryInterval *d,
const struct MemoryInterval *s, int n) {
// asan runtime depends on this function
int i;
assert(n >= 0);
if (d > s) {
for (i = n; i--;) {
d[i] = s[i];
}
} else {
for (i = 0; i < n; ++i) {
d[i] = s[i];
}
}
return d;
}
static void RemoveMemoryIntervals(struct MemoryIntervals *mm, int i, int n) {
// asan runtime depends on this function
assert(i >= 0);
assert(i + n <= mm->i);
MoveMemoryIntervals(mm->p + i, mm->p + i + n, mm->i - (i + n));
mm->i -= n;
}
static bool ExtendMemoryIntervals(struct MemoryIntervals *mm) {
int prot, flags;
char *base, *shad;
size_t gran, size;
struct DirectMap dm;
gran = kMemtrackGran;
base = (char *)kMemtrackStart;
prot = PROT_READ | PROT_WRITE;
flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED;
// TODO(jart): These map handles should not leak across NT fork()
if (mm->p == mm->s) {
// TODO(jart): How can we detect ASAN mode under GREG?
if (1 || IsAsan()) {
shad = (char *)(((intptr_t)base >> 3) + 0x7fff8000);
dm = sys_mmap(shad, gran >> 3, prot, flags, -1, 0);
if (!dm.addr) return false;
}
dm = sys_mmap(base, gran, prot, flags, -1, 0);
if (!dm.addr) return false;
MoveMemoryIntervals(dm.addr, mm->p, mm->i);
mm->p = dm.addr;
mm->n = gran / sizeof(*mm->p);
} else {
size = ROUNDUP(mm->n * sizeof(*mm->p), gran);
base += size;
if (IsAsan()) {
shad = (char *)(((intptr_t)base >> 3) + 0x7fff8000);
dm = sys_mmap(shad, gran >> 3, prot, flags, -1, 0);
if (!dm.addr) return false;
}
dm = sys_mmap(base, gran, prot, flags, -1, 0);
if (!dm.addr) return false;
mm->n = (size + gran) / sizeof(*mm->p);
}
#if IsModeDbg()
assert(AreMemoryIntervalsOk(mm));
#endif
return true;
}
int CreateMemoryInterval(struct MemoryIntervals *mm, int i) {
// asan runtime depends on this function
int rc;
rc = 0;
assert(i >= 0);
assert(i <= mm->i);
assert(mm->n >= 0);
if (UNLIKELY(mm->i == mm->n) && !ExtendMemoryIntervals(mm)) return enomem();
MoveMemoryIntervals(mm->p + i + 1, mm->p + i, mm->i++ - i);
return 0;
}
static int PunchHole(struct MemoryIntervals *mm, int x, int y, int i) {
if (CreateMemoryInterval(mm, i) == -1) return -1;
mm->p[i + 0].size -= (size_t)(mm->p[i + 0].y - (x - 1)) * FRAMESIZE;
mm->p[i + 0].y = x - 1;
mm->p[i + 1].size -= (size_t)((y + 1) - mm->p[i + 1].x) * FRAMESIZE;
mm->p[i + 1].x = y + 1;
return 0;
}
int ReleaseMemoryIntervals(struct MemoryIntervals *mm, int x, int y,
void wf(struct MemoryIntervals *, int, int)) {
unsigned l, r;
#if IsModeDbg()
assert(y >= x);
assert(AreMemoryIntervalsOk(mm));
#endif
if (!mm->i) return 0;
// binary search for the lefthand side
l = FindMemoryInterval(mm, x);
if (l == mm->i) return 0;
if (y < mm->p[l].x) return 0;
// binary search for the righthand side
r = FindMemoryInterval(mm, y);
if (r == mm->i || (r > l && y < mm->p[r].x)) --r;
assert(r >= l);
assert(x <= mm->p[r].y);
// remove the middle of an existing map
//
// ----|mmmmmmmmmmmmmmmm|--------- before
// xxxxx
// ----|mmmm|-----|mmmmm|--------- after
//
// this isn't possible on windows because we track each
// 64kb segment on that platform using a separate entry
if (l == r && x > mm->p[l].x && y < mm->p[l].y) {
return PunchHole(mm, x, y, l);
}
// trim the right side of the lefthand map
//
// ----|mmmmmmm|-------------- before
// xxxxx
// ----|mmmm|----------------- after
//
if (x > mm->p[l].x && x <= mm->p[l].y) {
assert(y >= mm->p[l].y);
if (IsWindows()) return einval();
mm->p[l].size -= (size_t)(mm->p[l].y - (x - 1)) * FRAMESIZE;
mm->p[l].y = x - 1;
assert(mm->p[l].x <= mm->p[l].y);
++l;
}
// trim the left side of the righthand map
//
// ------------|mmmmm|-------- before
// xxxxx
// ---------------|mm|-------- after
//
if (y >= mm->p[r].x && y < mm->p[r].y) {
assert(x <= mm->p[r].x);
if (IsWindows()) return einval();
mm->p[r].size -= (size_t)((y + 1) - mm->p[r].x) * FRAMESIZE;
mm->p[r].x = y + 1;
assert(mm->p[r].x <= mm->p[r].y);
--r;
}
if (l <= r) {
if (IsWindows() && wf) {
wf(mm, l, r);
}
RemoveMemoryIntervals(mm, l, r - l + 1);
}
return 0;
}
int TrackMemoryInterval(struct MemoryIntervals *mm, int x, int y, long h,
int prot, int flags, bool readonlyfile, bool iscow,
long offset, long size) {
// asan runtime depends on this function
unsigned i;
#if IsModeDbg()
assert(y >= x);
assert(AreMemoryIntervalsOk(mm));
#endif
i = FindMemoryInterval(mm, x);
// try to extend the righthand side of the lefthand entry
// we can't do that if we're tracking independent handles
// we can't do that if it's a file map with a small size!
if (i && x == mm->p[i - 1].y + 1 && h == mm->p[i - 1].h &&
prot == mm->p[i - 1].prot && flags == mm->p[i - 1].flags &&
mm->p[i - 1].size ==
(size_t)(mm->p[i - 1].y - mm->p[i - 1].x) * FRAMESIZE + FRAMESIZE) {
mm->p[i - 1].size += (size_t)(y - mm->p[i - 1].y) * FRAMESIZE;
mm->p[i - 1].y = y;
// if we filled the hole then merge the two mappings
if (i < mm->i && y + 1 == mm->p[i].x && h == mm->p[i].h &&
prot == mm->p[i].prot && flags == mm->p[i].flags) {
mm->p[i - 1].y = mm->p[i].y;
mm->p[i - 1].size += mm->p[i].size;
RemoveMemoryIntervals(mm, i, 1);
}
}
// try to extend the lefthand side of the righthand entry
// we can't do that if we're creating a smaller file map!
else if (i < mm->i && y + 1 == mm->p[i].x && h == mm->p[i].h &&
prot == mm->p[i].prot && flags == mm->p[i].flags &&
size == (size_t)(y - x) * FRAMESIZE + FRAMESIZE) {
mm->p[i].size += (size_t)(mm->p[i].x - x) * FRAMESIZE;
mm->p[i].x = x;
}
// otherwise, create a new entry and memmove the items
else {
if (CreateMemoryInterval(mm, i) == -1) return -1;
mm->p[i].x = x;
mm->p[i].y = y;
mm->p[i].h = h;
mm->p[i].prot = prot;
mm->p[i].flags = flags;
mm->p[i].offset = offset;
mm->p[i].size = size;
mm->p[i].iscow = iscow;
mm->p[i].readonlyfile = readonlyfile;
}
return 0;
}

View file

@ -1,160 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
αcτµαlly pδrταblε εxεcµταblε § no-frills virtual memory management
*/
#include "ape/relocations.h"
#include "libc/elf/def.h"
#include "libc/elf/struct/phdr.h"
#include "libc/macros.internal.h"
#include "libc/nexgen32e/uart.internal.h"
#include "libc/runtime/e820.internal.h"
#include "libc/runtime/metalprintf.internal.h"
#include "libc/runtime/pc.internal.h"
#include "libc/runtime/runtime.h"
/**
* Allocates new page of physical memory.
*/
noasan texthead uint64_t __new_page(struct mman *mm) {
uint64_t p;
if (mm->pdpi == mm->e820n) {
/* TODO: reclaim free pages */
return 0;
}
while (mm->pdp >= mm->e820[mm->pdpi].addr + mm->e820[mm->pdpi].size) {
if (++mm->pdpi == mm->e820n) return 0;
mm->pdp = mm->e820[mm->pdpi].addr;
}
p = mm->pdp;
mm->pdp += 4096;
return p;
}
/**
* Returns pointer to page table entry for page at virtual address.
* Additional page tables are allocated if needed as a side-effect.
*/
noasan textreal uint64_t *__get_virtual(struct mman *mm, uint64_t *t,
int64_t vaddr, bool maketables) {
uint64_t *e, p;
unsigned char h;
for (h = 39;; h -= 9) {
e = t + ((vaddr >> h) & 511);
if (h == 12) return e;
if (!(*e & PAGE_V)) {
if (!maketables) return NULL;
if (!(p = __new_page(mm))) return NULL;
*e = p | PAGE_V | PAGE_RW;
}
t = (uint64_t *)(BANE + (*e & PAGE_TA));
}
}
/**
* Sorts, rounds, and filters BIOS memory map.
*/
static noasan textreal void __normalize_e820(struct mman *mm) {
uint64_t a, b;
uint64_t x, y;
unsigned i, j, n;
for (n = i = 0; mm->e820[i].size; ++i) {
mm->e820[n] = mm->e820[i];
x = mm->e820[n].addr;
y = mm->e820[n].addr + mm->e820[n].size;
a = ROUNDUP(x, 4096);
b = ROUNDDOWN(y, 4096) - a;
if (b > 0 && mm->e820[i].type == kMemoryUsable) {
mm->e820[n].addr = a;
mm->e820[n].size = b;
++n;
}
}
for (i = 1; i < n; ++i) {
for (j = i; j > 0 && mm->e820[i].addr < mm->e820[j - 1].addr; --j) {
mm->e820[j] = mm->e820[j - 1];
}
mm->e820[j] = mm->e820[i];
}
mm->pdp = MAX(0x80000, mm->e820[0].addr);
mm->pdpi = 0;
mm->e820n = n;
}
/**
* Identity maps all usable physical memory to its negative address.
*/
static noasan textreal void __invert_memory(struct mman *mm, uint64_t *pml4t) {
uint64_t i, j, *m, p, pe;
for (i = 0; i < mm->e820n; ++i) {
for (p = mm->e820[i].addr, pe = mm->e820[i].addr + mm->e820[i].size;
p + 0x200000 < pe; p += 4096) {
m = __get_virtual(mm, pml4t, BANE + p, true);
if (m && !(*m & PAGE_V)) {
*m = p | PAGE_V | PAGE_RW;
}
}
}
}
noasan textreal void __setup_mman(struct mman *mm, uint64_t *pml4t) {
__normalize_e820(mm);
__invert_memory(mm, pml4t);
}
/**
* Maps APE-defined ELF program headers into memory and clears BSS.
*/
noasan textreal void __map_phdrs(struct mman *mm, uint64_t *pml4t, uint64_t b) {
struct Elf64_Phdr *p;
uint64_t i, f, v, m, *e;
extern char ape_phdrs[] __attribute__((__weak__));
extern char ape_phdrs_end[] __attribute__((__weak__));
__setup_mman(mm, pml4t);
for (p = (struct Elf64_Phdr *)REAL(ape_phdrs), m = 0;
p < (struct Elf64_Phdr *)REAL(ape_phdrs_end); ++p) {
if (p->p_type == PT_LOAD || p->p_type == PT_GNU_STACK) {
f = PAGE_V | PAGE_U;
if (p->p_flags & PF_W) f |= PAGE_RW;
for (i = 0; i < p->p_memsz; i += 4096) {
if (i < p->p_filesz) {
v = b + p->p_offset + i;
m = MAX(m, v);
} else {
v = __clear_page(__new_page(mm));
}
*__get_virtual(mm, pml4t, p->p_vaddr + i, true) = (v & PAGE_TA) | f;
}
}
}
mm->pdp = MAX(mm->pdp, m);
}

View file

@ -18,6 +18,7 @@
*/
#include "libc/assert.h"
#include "libc/calls/calls.h"
#include "libc/calls/extend.internal.h"
#include "libc/calls/internal.h"
#include "libc/calls/state.internal.h"
#include "libc/calls/strace.internal.h"
@ -46,33 +47,11 @@ static volatile size_t mapsize;
* @asyncsignalsafe
*/
int __ensurefds_unlocked(int fd) {
uint64_t addr;
int prot, flags;
size_t size, chunk;
struct DirectMap dm;
bool relocate;
if (fd < g_fds.n) return fd;
STRACE("__ensurefds(%d) extending", fd);
size = mapsize;
chunk = FRAMESIZE;
if (IsAsan()) chunk *= 8;
addr = kMemtrackFdsStart + size;
prot = PROT_READ | PROT_WRITE;
flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED;
dm = sys_mmap((char *)addr, chunk, prot, flags, -1, 0);
TrackMemoryInterval(&_mmi, addr >> 16, (addr + chunk - 1) >> 16, dm.maphandle,
prot, flags, false, false, 0, chunk);
if (IsAsan()) {
addr = (addr >> 3) + 0x7fff8000;
dm = sys_mmap((char *)addr, FRAMESIZE, prot, flags, -1, 0);
TrackMemoryInterval(&_mmi, addr >> 16, addr >> 16, dm.maphandle, prot,
flags, false, false, 0, FRAMESIZE);
}
if (!size) {
g_fds.p = memcpy((char *)kMemtrackFdsStart, g_fds.__init_p,
sizeof(g_fds.__init_p));
}
g_fds.n = (size + chunk) / sizeof(*g_fds.p);
mapsize = size + chunk;
g_fds.n = fd + 1;
g_fds.e =
_extend(g_fds.p, g_fds.n * sizeof(*g_fds.p), g_fds.e, 0x6ff000000000);
return fd;
}

View file

@ -16,6 +16,7 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/calls.h"
#include "libc/calls/sig.internal.h"
#include "libc/calls/state.internal.h"
#include "libc/calls/strace.internal.h"

View file

@ -25,10 +25,9 @@ struct Fd {
};
struct Fds {
int f; /* lowest free slot */
size_t n; /* monotonic capacity */
struct Fd *p;
struct Fd __init_p[OPEN_MAX];
int f; /* lowest free slot */
size_t n;
struct Fd *p, *e;
};
COSMOPOLITAN_C_END_