mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 03:27:39 +00:00
Add filesystem index to ZipOS
This change brings the /zip/... read-only filesystem into performance parity with the native Linux filesystem which doesn't use compression therefore, imagine how much faster this could be with bloom filtering rather than simple binary search, and if we used zstd instead of zlib
This commit is contained in:
parent
7100b1cf91
commit
5b42c810a5
7 changed files with 93 additions and 25 deletions
|
@ -16,6 +16,7 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/kmalloc.h"
|
||||
#include "libc/assert.h"
|
||||
#include "libc/atomic.h"
|
||||
#include "libc/dce.h"
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_KMALLOC_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_KMALLOC_H_
|
||||
#ifdef _COSMO_SOURCE
|
||||
#define kmalloc __kmalloc
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
|
@ -11,4 +13,5 @@ void __kmalloc_unlock(void);
|
|||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* _COSMO_SOURCE */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_KMALLOC_H_ */
|
||||
|
|
|
@ -16,37 +16,66 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/runtime/zipos.internal.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/sysv/consts/s.h"
|
||||
#include "libc/sysv/errfuns.h"
|
||||
#include "libc/zip.internal.h"
|
||||
|
||||
static ssize_t __zipos_scan(struct Zipos *zipos, struct ZiposUri *name) {
|
||||
size_t len = name->len;
|
||||
ssize_t __zipos_scan(struct Zipos *zipos, struct ZiposUri *name) {
|
||||
|
||||
// strip trailing slash from search name
|
||||
int len = name->len;
|
||||
if (len && name->path[len - 1] == '/') {
|
||||
--len;
|
||||
}
|
||||
|
||||
// empty string means the /zip root directory
|
||||
if (!len) {
|
||||
return ZIPOS_SYNTHETIC_DIRECTORY;
|
||||
}
|
||||
bool found_subfile = false;
|
||||
size_t c = GetZipCdirOffset(zipos->cdir);
|
||||
size_t n = GetZipCdirRecords(zipos->cdir);
|
||||
for (size_t i = 0; i < n; ++i, c += ZIP_CFILE_HDRSIZE(zipos->map + c)) {
|
||||
const char *zname = ZIP_CFILE_NAME(zipos->map + c);
|
||||
size_t zsize = ZIP_CFILE_NAMESIZE(zipos->map + c);
|
||||
if ((len == zsize || (len + 1 == zsize && zname[len] == '/')) &&
|
||||
!memcmp(name->path, zname, len)) {
|
||||
return c;
|
||||
} else if (len + 1 < zsize && zname[len] == '/' &&
|
||||
!memcmp(name->path, zname, len)) {
|
||||
found_subfile = true;
|
||||
|
||||
// binary search for leftmost name in central directory
|
||||
int l = 0;
|
||||
int r = zipos->records;
|
||||
while (l < r) {
|
||||
int m = (l & r) + ((l ^ r) >> 1); // floor((a+b)/2)
|
||||
const char *xp = ZIP_CFILE_NAME(zipos->map + zipos->index[m]);
|
||||
const char *yp = name->path;
|
||||
int xn = ZIP_CFILE_NAMESIZE(zipos->map + zipos->index[m]);
|
||||
int yn = len;
|
||||
int n = MIN(xn, yn);
|
||||
int c;
|
||||
if (n) {
|
||||
if (!(c = memcmp(xp, yp, n))) {
|
||||
c = xn - yn; // xn and yn are 16-bit
|
||||
}
|
||||
} else {
|
||||
c = xn - yn;
|
||||
}
|
||||
if (c < 0) {
|
||||
l = m + 1;
|
||||
} else {
|
||||
r = m;
|
||||
}
|
||||
}
|
||||
if (found_subfile) {
|
||||
return ZIPOS_SYNTHETIC_DIRECTORY;
|
||||
|
||||
// return pointer to leftmost record if it matches
|
||||
if (l < zipos->records) {
|
||||
size_t cfile = zipos->index[l];
|
||||
const char *zname = ZIP_CFILE_NAME(zipos->map + cfile);
|
||||
int zsize = ZIP_CFILE_NAMESIZE(zipos->map + cfile);
|
||||
if ((len == zsize || (len + 1 == zsize && zname[len] == '/')) &&
|
||||
!memcmp(name->path, zname, len)) {
|
||||
return cfile;
|
||||
} else if (len + 1 < zsize && zname[len] == '/' &&
|
||||
!memcmp(name->path, zname, len)) {
|
||||
return ZIPOS_SYNTHETIC_DIRECTORY;
|
||||
}
|
||||
}
|
||||
|
||||
// otherwise return not found
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
|
|
@ -21,9 +21,11 @@
|
|||
#include "libc/calls/struct/stat.h"
|
||||
#include "libc/fmt/conv.h"
|
||||
#include "libc/intrin/cmpxchg.h"
|
||||
#include "libc/intrin/kmalloc.h"
|
||||
#include "libc/intrin/promises.internal.h"
|
||||
#include "libc/intrin/strace.internal.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/mem/alg.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/runtime/zipos.internal.h"
|
||||
#include "libc/sysv/consts/f.h"
|
||||
|
@ -37,27 +39,55 @@
|
|||
__static_yoink(APE_COM_NAME);
|
||||
#endif
|
||||
|
||||
static uint64_t __zipos_get_min_offset(const uint8_t *base,
|
||||
static uint64_t __zipos_get_min_offset(const uint8_t *map,
|
||||
const uint8_t *cdir) {
|
||||
uint64_t i, n, c, r, o;
|
||||
c = GetZipCdirOffset(cdir);
|
||||
n = GetZipCdirRecords(cdir);
|
||||
for (r = c, i = 0; i < n; ++i, c += ZIP_CFILE_HDRSIZE(base + c)) {
|
||||
o = GetZipCfileOffset(base + c);
|
||||
for (r = c, i = 0; i < n; ++i, c += ZIP_CFILE_HDRSIZE(map + c)) {
|
||||
o = GetZipCfileOffset(map + c);
|
||||
if (o < r) r = o;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static void __zipos_munmap_unneeded(const uint8_t *base, const uint8_t *cdir,
|
||||
const uint8_t *map) {
|
||||
static void __zipos_munmap_unneeded(const uint8_t *map, const uint8_t *cdir) {
|
||||
uint64_t n;
|
||||
n = __zipos_get_min_offset(base, cdir);
|
||||
n += base - map;
|
||||
n = __zipos_get_min_offset(map, cdir);
|
||||
n = ROUNDDOWN(n, FRAMESIZE);
|
||||
if (n) munmap(map, n);
|
||||
}
|
||||
|
||||
static int __zipos_compare_names(const void *a, const void *b, void *c) {
|
||||
const size_t *x = (const size_t *)a;
|
||||
const size_t *y = (const size_t *)b;
|
||||
struct Zipos *z = (struct Zipos *)c;
|
||||
int xn = ZIP_CFILE_NAMESIZE(z->map + *x);
|
||||
int yn = ZIP_CFILE_NAMESIZE(z->map + *y);
|
||||
int n = MIN(xn, yn);
|
||||
if (n) {
|
||||
int res =
|
||||
memcmp(ZIP_CFILE_NAME(z->map + *x), ZIP_CFILE_NAME(z->map + *y), n);
|
||||
if (res) return res;
|
||||
}
|
||||
return xn - yn; // xn and yn are 16-bit
|
||||
}
|
||||
|
||||
// creates binary searchable array of file offsets to cdir records
|
||||
static void __zipos_generate_index(struct Zipos *zipos) {
|
||||
size_t c, i;
|
||||
zipos->records = GetZipCdirRecords(zipos->cdir);
|
||||
zipos->index = kmalloc(zipos->records * sizeof(size_t));
|
||||
for (i = 0, c = GetZipCdirOffset(zipos->cdir); i < zipos->records;
|
||||
++i, c += ZIP_CFILE_HDRSIZE(zipos->map + c)) {
|
||||
zipos->index[i] = c;
|
||||
}
|
||||
// smoothsort() isn't the fastest algorithm, but it guarantees
|
||||
// o(logn), won't smash the stack and doesn't depend on malloc
|
||||
smoothsort_r(zipos->index, zipos->records, sizeof(size_t),
|
||||
__zipos_compare_names, zipos);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns pointer to zip central directory of current executable.
|
||||
* @asyncsignalsafe
|
||||
|
@ -95,10 +125,11 @@ struct Zipos *__zipos_get(void) {
|
|||
(map = mmap(0, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0)) !=
|
||||
MAP_FAILED) {
|
||||
if ((cdir = GetZipEocd(map, st.st_size, &err))) {
|
||||
__zipos_munmap_unneeded(map, cdir, map);
|
||||
__zipos_munmap_unneeded(map, cdir);
|
||||
zipos.map = map;
|
||||
zipos.cdir = cdir;
|
||||
zipos.dev = st.st_ino;
|
||||
__zipos_generate_index(&zipos);
|
||||
msg = kZipOk;
|
||||
} else {
|
||||
munmap(map, st.st_size);
|
||||
|
|
|
@ -35,6 +35,8 @@ struct Zipos {
|
|||
uint8_t *map;
|
||||
uint8_t *cdir;
|
||||
uint64_t dev;
|
||||
size_t *index;
|
||||
size_t records;
|
||||
struct ZiposHandle *freelist;
|
||||
};
|
||||
|
||||
|
@ -45,6 +47,7 @@ void __zipos_free(struct ZiposHandle *);
|
|||
struct Zipos *__zipos_get(void) pureconst;
|
||||
size_t __zipos_normpath(char *, const char *, size_t);
|
||||
ssize_t __zipos_find(struct Zipos *, struct ZiposUri *);
|
||||
ssize_t __zipos_scan(struct Zipos *, struct ZiposUri *);
|
||||
ssize_t __zipos_parseuri(const char *, struct ZiposUri *);
|
||||
uint64_t __zipos_inode(struct Zipos *, int64_t, const void *, size_t);
|
||||
int __zipos_open(struct ZiposUri *, int);
|
||||
|
|
|
@ -382,7 +382,7 @@ static struct dirent *readdir_zipos(DIR *dir) {
|
|||
while (p.len && p.path[p.len - 1] == '/') --p.len;
|
||||
p.path[p.len] = 0;
|
||||
ent->d_ino = __zipos_inode(
|
||||
dir->zip.zipos, __zipos_find(dir->zip.zipos, &p), p.path, p.len);
|
||||
dir->zip.zipos, __zipos_scan(dir->zip.zipos, &p), p.path, p.len);
|
||||
} else {
|
||||
uint8_t *s = ZIP_CFILE_NAME(dir->zip.zipos->map + dir->zip.offset);
|
||||
size_t n = ZIP_CFILE_NAMESIZE(dir->zip.zipos->map + dir->zip.offset);
|
||||
|
|
|
@ -87,6 +87,7 @@ TEST(__zipos_normpath, overflows_willNulTerminate) {
|
|||
TEST(__zipos_normpath, vectors) {
|
||||
static const char V[][2][128] = {
|
||||
{"", ""},
|
||||
{"/", ""},
|
||||
{"/..", ""},
|
||||
{"/../", ""},
|
||||
{".", ""},
|
||||
|
|
Loading…
Reference in a new issue