Add filesystem index to ZipOS

This change brings the /zip/... read-only filesystem into performance
parity with the native Linux filesystem which doesn't use compression
therefore, imagine how much faster this could be with bloom filtering
rather than simple binary search, and if we used zstd instead of zlib
This commit is contained in:
Justine Tunney 2023-08-18 07:04:55 -07:00
parent 7100b1cf91
commit 5b42c810a5
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
7 changed files with 93 additions and 25 deletions

View file

@ -16,6 +16,7 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/kmalloc.h"
#include "libc/assert.h"
#include "libc/atomic.h"
#include "libc/dce.h"

View file

@ -1,5 +1,7 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_KMALLOC_H_
#define COSMOPOLITAN_LIBC_INTRIN_KMALLOC_H_
#ifdef _COSMO_SOURCE
#define kmalloc __kmalloc
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
@ -11,4 +13,5 @@ void __kmalloc_unlock(void);
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* _COSMO_SOURCE */
#endif /* COSMOPOLITAN_LIBC_INTRIN_KMALLOC_H_ */

View file

@ -16,37 +16,66 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
#include "libc/runtime/zipos.internal.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/s.h"
#include "libc/sysv/errfuns.h"
#include "libc/zip.internal.h"
static ssize_t __zipos_scan(struct Zipos *zipos, struct ZiposUri *name) {
size_t len = name->len;
ssize_t __zipos_scan(struct Zipos *zipos, struct ZiposUri *name) {
// strip trailing slash from search name
int len = name->len;
if (len && name->path[len - 1] == '/') {
--len;
}
// empty string means the /zip root directory
if (!len) {
return ZIPOS_SYNTHETIC_DIRECTORY;
}
bool found_subfile = false;
size_t c = GetZipCdirOffset(zipos->cdir);
size_t n = GetZipCdirRecords(zipos->cdir);
for (size_t i = 0; i < n; ++i, c += ZIP_CFILE_HDRSIZE(zipos->map + c)) {
const char *zname = ZIP_CFILE_NAME(zipos->map + c);
size_t zsize = ZIP_CFILE_NAMESIZE(zipos->map + c);
if ((len == zsize || (len + 1 == zsize && zname[len] == '/')) &&
!memcmp(name->path, zname, len)) {
return c;
} else if (len + 1 < zsize && zname[len] == '/' &&
!memcmp(name->path, zname, len)) {
found_subfile = true;
// binary search for leftmost name in central directory
int l = 0;
int r = zipos->records;
while (l < r) {
int m = (l & r) + ((l ^ r) >> 1); // floor((a+b)/2)
const char *xp = ZIP_CFILE_NAME(zipos->map + zipos->index[m]);
const char *yp = name->path;
int xn = ZIP_CFILE_NAMESIZE(zipos->map + zipos->index[m]);
int yn = len;
int n = MIN(xn, yn);
int c;
if (n) {
if (!(c = memcmp(xp, yp, n))) {
c = xn - yn; // xn and yn are 16-bit
}
} else {
c = xn - yn;
}
if (c < 0) {
l = m + 1;
} else {
r = m;
}
}
if (found_subfile) {
return ZIPOS_SYNTHETIC_DIRECTORY;
// return pointer to leftmost record if it matches
if (l < zipos->records) {
size_t cfile = zipos->index[l];
const char *zname = ZIP_CFILE_NAME(zipos->map + cfile);
int zsize = ZIP_CFILE_NAMESIZE(zipos->map + cfile);
if ((len == zsize || (len + 1 == zsize && zname[len] == '/')) &&
!memcmp(name->path, zname, len)) {
return cfile;
} else if (len + 1 < zsize && zname[len] == '/' &&
!memcmp(name->path, zname, len)) {
return ZIPOS_SYNTHETIC_DIRECTORY;
}
}
// otherwise return not found
return -1;
}

View file

@ -21,9 +21,11 @@
#include "libc/calls/struct/stat.h"
#include "libc/fmt/conv.h"
#include "libc/intrin/cmpxchg.h"
#include "libc/intrin/kmalloc.h"
#include "libc/intrin/promises.internal.h"
#include "libc/intrin/strace.internal.h"
#include "libc/macros.internal.h"
#include "libc/mem/alg.h"
#include "libc/runtime/runtime.h"
#include "libc/runtime/zipos.internal.h"
#include "libc/sysv/consts/f.h"
@ -37,27 +39,55 @@
__static_yoink(APE_COM_NAME);
#endif
static uint64_t __zipos_get_min_offset(const uint8_t *base,
static uint64_t __zipos_get_min_offset(const uint8_t *map,
const uint8_t *cdir) {
uint64_t i, n, c, r, o;
c = GetZipCdirOffset(cdir);
n = GetZipCdirRecords(cdir);
for (r = c, i = 0; i < n; ++i, c += ZIP_CFILE_HDRSIZE(base + c)) {
o = GetZipCfileOffset(base + c);
for (r = c, i = 0; i < n; ++i, c += ZIP_CFILE_HDRSIZE(map + c)) {
o = GetZipCfileOffset(map + c);
if (o < r) r = o;
}
return r;
}
static void __zipos_munmap_unneeded(const uint8_t *base, const uint8_t *cdir,
const uint8_t *map) {
static void __zipos_munmap_unneeded(const uint8_t *map, const uint8_t *cdir) {
uint64_t n;
n = __zipos_get_min_offset(base, cdir);
n += base - map;
n = __zipos_get_min_offset(map, cdir);
n = ROUNDDOWN(n, FRAMESIZE);
if (n) munmap(map, n);
}
static int __zipos_compare_names(const void *a, const void *b, void *c) {
const size_t *x = (const size_t *)a;
const size_t *y = (const size_t *)b;
struct Zipos *z = (struct Zipos *)c;
int xn = ZIP_CFILE_NAMESIZE(z->map + *x);
int yn = ZIP_CFILE_NAMESIZE(z->map + *y);
int n = MIN(xn, yn);
if (n) {
int res =
memcmp(ZIP_CFILE_NAME(z->map + *x), ZIP_CFILE_NAME(z->map + *y), n);
if (res) return res;
}
return xn - yn; // xn and yn are 16-bit
}
// creates binary searchable array of file offsets to cdir records
static void __zipos_generate_index(struct Zipos *zipos) {
size_t c, i;
zipos->records = GetZipCdirRecords(zipos->cdir);
zipos->index = kmalloc(zipos->records * sizeof(size_t));
for (i = 0, c = GetZipCdirOffset(zipos->cdir); i < zipos->records;
++i, c += ZIP_CFILE_HDRSIZE(zipos->map + c)) {
zipos->index[i] = c;
}
// smoothsort() isn't the fastest algorithm, but it guarantees
// o(logn), won't smash the stack and doesn't depend on malloc
smoothsort_r(zipos->index, zipos->records, sizeof(size_t),
__zipos_compare_names, zipos);
}
/**
* Returns pointer to zip central directory of current executable.
* @asyncsignalsafe
@ -95,10 +125,11 @@ struct Zipos *__zipos_get(void) {
(map = mmap(0, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0)) !=
MAP_FAILED) {
if ((cdir = GetZipEocd(map, st.st_size, &err))) {
__zipos_munmap_unneeded(map, cdir, map);
__zipos_munmap_unneeded(map, cdir);
zipos.map = map;
zipos.cdir = cdir;
zipos.dev = st.st_ino;
__zipos_generate_index(&zipos);
msg = kZipOk;
} else {
munmap(map, st.st_size);

View file

@ -35,6 +35,8 @@ struct Zipos {
uint8_t *map;
uint8_t *cdir;
uint64_t dev;
size_t *index;
size_t records;
struct ZiposHandle *freelist;
};
@ -45,6 +47,7 @@ void __zipos_free(struct ZiposHandle *);
struct Zipos *__zipos_get(void) pureconst;
size_t __zipos_normpath(char *, const char *, size_t);
ssize_t __zipos_find(struct Zipos *, struct ZiposUri *);
ssize_t __zipos_scan(struct Zipos *, struct ZiposUri *);
ssize_t __zipos_parseuri(const char *, struct ZiposUri *);
uint64_t __zipos_inode(struct Zipos *, int64_t, const void *, size_t);
int __zipos_open(struct ZiposUri *, int);

View file

@ -382,7 +382,7 @@ static struct dirent *readdir_zipos(DIR *dir) {
while (p.len && p.path[p.len - 1] == '/') --p.len;
p.path[p.len] = 0;
ent->d_ino = __zipos_inode(
dir->zip.zipos, __zipos_find(dir->zip.zipos, &p), p.path, p.len);
dir->zip.zipos, __zipos_scan(dir->zip.zipos, &p), p.path, p.len);
} else {
uint8_t *s = ZIP_CFILE_NAME(dir->zip.zipos->map + dir->zip.offset);
size_t n = ZIP_CFILE_NAMESIZE(dir->zip.zipos->map + dir->zip.offset);

View file

@ -87,6 +87,7 @@ TEST(__zipos_normpath, overflows_willNulTerminate) {
TEST(__zipos_normpath, vectors) {
static const char V[][2][128] = {
{"", ""},
{"/", ""},
{"/..", ""},
{"/../", ""},
{".", ""},