Rewrite ZipOS

This reduces the virtual memory usage of Emacs for me by 30%. We now
have a simpler implementation that uses read(), rather mmap()ing the
whole executable.
This commit is contained in:
Justine Tunney 2023-10-03 07:27:25 -07:00
parent ff77f2a6af
commit b01282e23e
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
21 changed files with 408 additions and 421 deletions

View file

@ -16,25 +16,17 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/atomic.h"
#include "libc/calls/calls.h"
#include "libc/calls/metalfile.internal.h"
#include "libc/calls/struct/stat.h"
#include "libc/cosmo.h"
#include "libc/fmt/conv.h"
#include "libc/intrin/cmpxchg.h"
#include "libc/intrin/promises.internal.h"
#include "libc/intrin/strace.internal.h"
#include "libc/macros.internal.h"
#include "libc/limits.h"
#include "libc/mem/alg.h"
#include "libc/runtime/runtime.h"
#include "libc/runtime/stack.h"
#include "libc/runtime/zipos.internal.h"
#include "libc/sysv/consts/auxv.h"
#include "libc/sysv/consts/f.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/o.h"
#include "libc/sysv/consts/posix.h"
#include "libc/sysv/consts/prot.h"
#include "libc/thread/thread.h"
#include "libc/zip.internal.h"
@ -42,118 +34,162 @@
__static_yoink(APE_COM_NAME);
#endif
struct ZiposPlanner {
uint8_t buf[kZipLookbehindBytes];
struct stat st;
};
static struct Zipos __zipos;
static atomic_uint __zipos_once;
static void __zipos_dismiss(uint8_t *map, const uint8_t *cdir, long pg) {
uint64_t i, n, c, ef, lf, mo, lo, hi;
// determine the byte range of zip file content (excluding central dir)
c = GetZipCdirOffset(cdir);
n = GetZipCdirRecords(cdir);
for (lo = c, hi = i = 0; i < n; ++i, c += ZIP_CFILE_HDRSIZE(map + c)) {
lf = GetZipCfileOffset(map + c);
if (lf < lo) lo = lf;
ef = lf + ZIP_LFILE_HDRSIZE(map + lf) + GetZipLfileCompressedSize(map + lf);
if (ef > hi) hi = ef;
}
// unmap the executable portion beneath the local files
mo = ROUNDDOWN(lo, FRAMESIZE);
if (mo) munmap(map, mo);
// this is supposed to reduce our rss usage but does it really?
lo = ROUNDDOWN(lo, pg);
hi = MIN(ROUNDUP(hi, pg), ROUNDDOWN(c, pg));
if (hi > lo) {
posix_madvise(map + lo, hi - lo, POSIX_MADV_DONTNEED);
}
static void __zipos_wipe(void) {
pthread_mutex_init(&__zipos.lock, 0);
}
static int __zipos_compare_names(const void *a, const void *b, void *c) {
const size_t *x = (const size_t *)a;
const size_t *y = (const size_t *)b;
struct Zipos *z = (struct Zipos *)c;
int xn = ZIP_CFILE_NAMESIZE(z->map + *x);
int yn = ZIP_CFILE_NAMESIZE(z->map + *y);
void __zipos_lock(void) {
pthread_mutex_lock(&__zipos.lock);
}
void __zipos_unlock(void) {
pthread_mutex_unlock(&__zipos.lock);
}
static int __zipos_compare(const void *a, const void *b, void *c) {
uint8_t *cdir = (uint8_t *)c;
const int *x = (const int *)a;
const int *y = (const int *)b;
int xn = ZIP_CFILE_NAMESIZE(cdir + *x);
int yn = ZIP_CFILE_NAMESIZE(cdir + *y);
int n = MIN(xn, yn);
if (n) {
int res =
memcmp(ZIP_CFILE_NAME(z->map + *x), ZIP_CFILE_NAME(z->map + *y), n);
int res = memcmp(ZIP_CFILE_NAME(cdir + *x), ZIP_CFILE_NAME(cdir + *y), n);
if (res) return res;
}
return xn - yn; // xn and yn are 16-bit
}
// creates binary searchable array of file offsets to cdir records
static void __zipos_generate_index(struct Zipos *zipos) {
size_t c, i;
zipos->records = GetZipCdirRecords(zipos->cdir);
zipos->index = _mapanon(zipos->records * sizeof(size_t));
for (i = 0, c = GetZipCdirOffset(zipos->cdir); i < zipos->records;
++i, c += ZIP_CFILE_HDRSIZE(zipos->map + c)) {
zipos->index[i] = c;
static dontinline int __zipos_plan(int fd, struct ZiposPlanner *p) {
// get file size and dev/inode
// this might fail if a bad fd was passed via environment
if (fstat(fd, &p->st)) {
return kZipErrorOpenFailed;
}
// read the last 64kb of file
// the zip file format magic can be anywhere in there
int amt;
int64_t off;
if (p->st.st_size <= kZipLookbehindBytes) {
off = 0;
amt = p->st.st_size;
} else {
off = p->st.st_size - kZipLookbehindBytes;
amt = p->st.st_size - off;
}
if (pread(fd, p->buf, amt, off) != amt) {
return kZipErrorReadFailed;
}
// search backwards for the end-of-central-directory record
// the eocd (cdir) says where the central directory (cfile) array is located
// we consistency check some legacy fields, to be extra sure that it is eocd
int cnt = 0;
for (int i = amt - MIN(kZipCdirHdrMinSize, kZipCdir64LocatorSize); i; --i) {
uint32_t magic = READ32LE(p->buf + i);
if (magic == kZipCdir64LocatorMagic && i + kZipCdir64LocatorSize <= amt &&
pread(fd, p->buf, kZipCdirHdrMinSize,
ZIP_LOCATE64_OFFSET(p->buf + i)) == kZipCdirHdrMinSize &&
READ32LE(p->buf) == kZipCdir64HdrMagic &&
ZIP_CDIR64_RECORDS(p->buf) == ZIP_CDIR64_RECORDSONDISK(p->buf) &&
ZIP_CDIR64_RECORDS(p->buf) && ZIP_CDIR64_SIZE(p->buf) <= INT_MAX) {
cnt = ZIP_CDIR64_RECORDS(p->buf);
off = ZIP_CDIR64_OFFSET(p->buf);
amt = ZIP_CDIR64_SIZE(p->buf);
break;
}
if (magic == kZipCdirHdrMagic && i + kZipCdirHdrMinSize <= amt &&
ZIP_CDIR_RECORDS(p->buf + i) == ZIP_CDIR_RECORDSONDISK(p->buf + i) &&
ZIP_CDIR_RECORDS(p->buf + i) && ZIP_CDIR_SIZE(p->buf + i) <= INT_MAX) {
cnt = ZIP_CDIR_RECORDS(p->buf + i);
off = ZIP_CDIR_OFFSET(p->buf + i);
amt = ZIP_CDIR_SIZE(p->buf + i);
break;
}
}
if (cnt <= 0) {
return kZipErrorEocdNotFound;
}
// we'll store the entire central directory in memory
// in addition to a file name index that's bisectable
void *memory;
int cdirsize = amt;
size_t indexsize = cnt * sizeof(int);
size_t memorysize = indexsize + cdirsize;
if (!(memory = _mapanon(memorysize))) {
return kZipErrorMapFailed;
}
int *index = memory;
uint8_t *cdir = (uint8_t *)memory + indexsize;
// read the central directory
if (pread(fd, cdir, cdirsize, off) != cdirsize) {
return kZipErrorReadFailed;
}
// generate our file name index
// smoothsort() isn't the fastest algorithm, but it guarantees
// o(logn), won't smash the stack and doesn't depend on malloc
smoothsort_r(zipos->index, zipos->records, sizeof(size_t),
__zipos_compare_names, zipos);
int entry_index, entry_offset;
for (entry_index = entry_offset = 0;
entry_index < cnt && entry_offset + kZipCfileHdrMinSize <= cdirsize &&
entry_offset + ZIP_CFILE_HDRSIZE(cdir + entry_offset) <= cdirsize;
++entry_index, entry_offset += ZIP_CFILE_HDRSIZE(cdir + entry_offset)) {
index[entry_index] = entry_offset;
}
if (cnt != entry_index) {
return kZipErrorZipCorrupt;
}
smoothsort_r(index, cnt, sizeof(int), __zipos_compare, cdir);
// finally populate the global singleton
__zipos.cnt = cnt;
__zipos.cdir = cdir;
__zipos.index = index;
__zipos.dev = p->st.st_ino;
__zipos.cdirsize = cdirsize;
return kZipOk;
}
static dontinline int __zipos_setup(int fd) {
// allocates 64kb on the stack as scratch memory
// this should only be used from the main thread
#pragma GCC push_options
#pragma GCC diagnostic ignored "-Wframe-larger-than="
struct ZiposPlanner p;
CheckLargeStackAllocation(&p, sizeof(p));
#pragma GCC pop_options
return __zipos_plan(fd, &p);
}
static void __zipos_init(void) {
char *endptr;
const char *s;
struct stat st;
int x, fd, err, msg;
uint8_t *map, *cdir;
const char *progpath;
if (!(s = getenv("COSMOPOLITAN_DISABLE_ZIPOS"))) {
// this environment variable may be a filename or file descriptor
if ((progpath = getenv("COSMOPOLITAN_INIT_ZIPOS")) &&
(x = strtol(progpath, &endptr, 10)) >= 0 && !*endptr) {
fd = x;
} else {
fd = -1;
}
if (fd != -1 || PLEDGED(RPATH)) {
if (fd == -1) {
if (!progpath) {
progpath = GetProgramExecutableName();
}
fd = open(progpath, O_RDONLY);
}
if (fd != -1) {
if (!fstat(fd, &st) && (map = mmap(0, st.st_size, PROT_READ, MAP_SHARED,
fd, 0)) != MAP_FAILED) {
if ((cdir = GetZipEocd(map, st.st_size, &err))) {
long pagesz = getauxval(AT_PAGESZ);
__zipos_dismiss(map, cdir, pagesz);
__zipos.map = map;
__zipos.cdir = cdir;
__zipos.dev = st.st_ino;
__zipos.pagesz = pagesz;
__zipos_generate_index(&__zipos);
msg = kZipOk;
} else {
munmap(map, st.st_size);
msg = !cdir ? err : kZipErrorRaceCondition;
}
} else {
msg = kZipErrorMapFailed;
}
close(fd);
} else {
msg = kZipErrorOpenFailed;
}
} else {
msg = -666;
}
} else {
progpath = 0;
msg = -777;
int status;
if (getenv("COSMOPOLITAN_DISABLE_ZIPOS")) return;
if (!(__zipos.progpath = getenv("COSMOPOLITAN_INIT_ZIPOS"))) {
__zipos.progpath = GetProgramExecutableName();
}
(void)msg;
STRACE("__zipos_get(%#s) → %d% m", progpath, msg);
int fd = open(__zipos.progpath, O_RDONLY);
if (fd != -1) {
if (!(status = __zipos_setup(fd))) {
__zipos_wipe();
pthread_atfork(__zipos_lock, __zipos_unlock, __zipos_wipe);
}
close(fd);
} else {
status = kZipErrorOpenFailed;
}
(void)status;
STRACE("__zipos_init(%#s) → %d% m", __zipos.progpath, status);
}
/**
@ -161,6 +197,6 @@ static void __zipos_init(void) {
* @asyncsignalsafe
*/
struct Zipos *__zipos_get(void) {
cosmo_once(&__zipos_once, __zipos_init);
return __zipos.cdir ? &__zipos : 0;
cosmo_once(&__zipos.once, __zipos_init);
return __zipos.cnt ? &__zipos : 0;
}