From 5b42c810a5d0fab1749e3ad6c8f9929f08b8d94d Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Fri, 18 Aug 2023 07:04:55 -0700 Subject: [PATCH] Add filesystem index to ZipOS This change brings the /zip/... read-only filesystem into performance parity with the native Linux filesystem which doesn't use compression therefore, imagine how much faster this could be with bloom filtering rather than simple binary search, and if we used zstd instead of zlib --- libc/intrin/kmalloc.c | 1 + libc/intrin/kmalloc.h | 3 ++ libc/runtime/zipos-find.c | 61 ++++++++++++++++++++++++++--------- libc/runtime/zipos-get.c | 47 ++++++++++++++++++++++----- libc/runtime/zipos.internal.h | 3 ++ libc/stdio/dirstream.c | 2 +- test/libc/stdio/zipdir_test.c | 1 + 7 files changed, 93 insertions(+), 25 deletions(-) diff --git a/libc/intrin/kmalloc.c b/libc/intrin/kmalloc.c index 342e047f0..16d826255 100644 --- a/libc/intrin/kmalloc.c +++ b/libc/intrin/kmalloc.c @@ -16,6 +16,7 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/intrin/kmalloc.h" #include "libc/assert.h" #include "libc/atomic.h" #include "libc/dce.h" diff --git a/libc/intrin/kmalloc.h b/libc/intrin/kmalloc.h index 7e3d751ef..c97d8a47a 100644 --- a/libc/intrin/kmalloc.h +++ b/libc/intrin/kmalloc.h @@ -1,5 +1,7 @@ #ifndef COSMOPOLITAN_LIBC_INTRIN_KMALLOC_H_ #define COSMOPOLITAN_LIBC_INTRIN_KMALLOC_H_ +#ifdef _COSMO_SOURCE +#define kmalloc __kmalloc #if !(__ASSEMBLER__ + __LINKER__ + 0) COSMOPOLITAN_C_START_ @@ -11,4 +13,5 @@ void __kmalloc_unlock(void); COSMOPOLITAN_C_END_ #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ +#endif /* _COSMO_SOURCE */ #endif /* COSMOPOLITAN_LIBC_INTRIN_KMALLOC_H_ */ diff --git a/libc/runtime/zipos-find.c b/libc/runtime/zipos-find.c index 71eb1417c..8cd028c31 100644 --- a/libc/runtime/zipos-find.c +++ b/libc/runtime/zipos-find.c @@ -16,37 +16,66 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/macros.internal.h" #include "libc/runtime/zipos.internal.h" #include "libc/str/str.h" #include "libc/sysv/consts/s.h" #include "libc/sysv/errfuns.h" #include "libc/zip.internal.h" -static ssize_t __zipos_scan(struct Zipos *zipos, struct ZiposUri *name) { - size_t len = name->len; +ssize_t __zipos_scan(struct Zipos *zipos, struct ZiposUri *name) { + + // strip trailing slash from search name + int len = name->len; if (len && name->path[len - 1] == '/') { --len; } + + // empty string means the /zip root directory if (!len) { return ZIPOS_SYNTHETIC_DIRECTORY; } - bool found_subfile = false; - size_t c = GetZipCdirOffset(zipos->cdir); - size_t n = GetZipCdirRecords(zipos->cdir); - for (size_t i = 0; i < n; ++i, c += ZIP_CFILE_HDRSIZE(zipos->map + c)) { - const char *zname = ZIP_CFILE_NAME(zipos->map + c); - size_t zsize = ZIP_CFILE_NAMESIZE(zipos->map + c); - if ((len == zsize || (len + 1 == zsize && zname[len] == '/')) && - !memcmp(name->path, zname, len)) { - return c; - } else if (len + 1 < zsize && zname[len] == '/' && - !memcmp(name->path, zname, len)) { - found_subfile = true; + + // binary search for leftmost name in central directory + int l = 0; + int r = zipos->records; + while (l < r) { + int m = (l & r) + ((l ^ r) >> 1); // floor((a+b)/2) + const char *xp = ZIP_CFILE_NAME(zipos->map + zipos->index[m]); + const char *yp = name->path; + int xn = ZIP_CFILE_NAMESIZE(zipos->map + zipos->index[m]); + int yn = len; + int n = MIN(xn, yn); + int c; + if (n) { + if (!(c = memcmp(xp, yp, n))) { + c = xn - yn; // xn and yn are 16-bit + } + } else { + c = xn - yn; + } + if (c < 0) { + l = m + 1; + } else { + r = m; } } - if (found_subfile) { - return ZIPOS_SYNTHETIC_DIRECTORY; + + // return pointer to leftmost record if it matches + if (l < zipos->records) { + size_t cfile = zipos->index[l]; + const char *zname = ZIP_CFILE_NAME(zipos->map + cfile); + int zsize = ZIP_CFILE_NAMESIZE(zipos->map + cfile); + if ((len == zsize || (len + 1 == zsize && zname[len] == '/')) && + !memcmp(name->path, zname, len)) { + return cfile; + } else if (len + 1 < zsize && zname[len] == '/' && + !memcmp(name->path, zname, len)) { + return ZIPOS_SYNTHETIC_DIRECTORY; + } } + + // otherwise return not found return -1; } diff --git a/libc/runtime/zipos-get.c b/libc/runtime/zipos-get.c index 843523c01..fc0b2be82 100644 --- a/libc/runtime/zipos-get.c +++ b/libc/runtime/zipos-get.c @@ -21,9 +21,11 @@ #include "libc/calls/struct/stat.h" #include "libc/fmt/conv.h" #include "libc/intrin/cmpxchg.h" +#include "libc/intrin/kmalloc.h" #include "libc/intrin/promises.internal.h" #include "libc/intrin/strace.internal.h" #include "libc/macros.internal.h" +#include "libc/mem/alg.h" #include "libc/runtime/runtime.h" #include "libc/runtime/zipos.internal.h" #include "libc/sysv/consts/f.h" @@ -37,27 +39,55 @@ __static_yoink(APE_COM_NAME); #endif -static uint64_t __zipos_get_min_offset(const uint8_t *base, +static uint64_t __zipos_get_min_offset(const uint8_t *map, const uint8_t *cdir) { uint64_t i, n, c, r, o; c = GetZipCdirOffset(cdir); n = GetZipCdirRecords(cdir); - for (r = c, i = 0; i < n; ++i, c += ZIP_CFILE_HDRSIZE(base + c)) { - o = GetZipCfileOffset(base + c); + for (r = c, i = 0; i < n; ++i, c += ZIP_CFILE_HDRSIZE(map + c)) { + o = GetZipCfileOffset(map + c); if (o < r) r = o; } return r; } -static void __zipos_munmap_unneeded(const uint8_t *base, const uint8_t *cdir, - const uint8_t *map) { +static void __zipos_munmap_unneeded(const uint8_t *map, const uint8_t *cdir) { uint64_t n; - n = __zipos_get_min_offset(base, cdir); - n += base - map; + n = __zipos_get_min_offset(map, cdir); n = ROUNDDOWN(n, FRAMESIZE); if (n) munmap(map, n); } +static int __zipos_compare_names(const void *a, const void *b, void *c) { + const size_t *x = (const size_t *)a; + const size_t *y = (const size_t *)b; + struct Zipos *z = (struct Zipos *)c; + int xn = ZIP_CFILE_NAMESIZE(z->map + *x); + int yn = ZIP_CFILE_NAMESIZE(z->map + *y); + int n = MIN(xn, yn); + if (n) { + int res = + memcmp(ZIP_CFILE_NAME(z->map + *x), ZIP_CFILE_NAME(z->map + *y), n); + if (res) return res; + } + return xn - yn; // xn and yn are 16-bit +} + +// creates binary searchable array of file offsets to cdir records +static void __zipos_generate_index(struct Zipos *zipos) { + size_t c, i; + zipos->records = GetZipCdirRecords(zipos->cdir); + zipos->index = kmalloc(zipos->records * sizeof(size_t)); + for (i = 0, c = GetZipCdirOffset(zipos->cdir); i < zipos->records; + ++i, c += ZIP_CFILE_HDRSIZE(zipos->map + c)) { + zipos->index[i] = c; + } + // smoothsort() isn't the fastest algorithm, but it guarantees + // o(logn), won't smash the stack and doesn't depend on malloc + smoothsort_r(zipos->index, zipos->records, sizeof(size_t), + __zipos_compare_names, zipos); +} + /** * Returns pointer to zip central directory of current executable. * @asyncsignalsafe @@ -95,10 +125,11 @@ struct Zipos *__zipos_get(void) { (map = mmap(0, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0)) != MAP_FAILED) { if ((cdir = GetZipEocd(map, st.st_size, &err))) { - __zipos_munmap_unneeded(map, cdir, map); + __zipos_munmap_unneeded(map, cdir); zipos.map = map; zipos.cdir = cdir; zipos.dev = st.st_ino; + __zipos_generate_index(&zipos); msg = kZipOk; } else { munmap(map, st.st_size); diff --git a/libc/runtime/zipos.internal.h b/libc/runtime/zipos.internal.h index c8e21f602..fa9a008e2 100644 --- a/libc/runtime/zipos.internal.h +++ b/libc/runtime/zipos.internal.h @@ -35,6 +35,8 @@ struct Zipos { uint8_t *map; uint8_t *cdir; uint64_t dev; + size_t *index; + size_t records; struct ZiposHandle *freelist; }; @@ -45,6 +47,7 @@ void __zipos_free(struct ZiposHandle *); struct Zipos *__zipos_get(void) pureconst; size_t __zipos_normpath(char *, const char *, size_t); ssize_t __zipos_find(struct Zipos *, struct ZiposUri *); +ssize_t __zipos_scan(struct Zipos *, struct ZiposUri *); ssize_t __zipos_parseuri(const char *, struct ZiposUri *); uint64_t __zipos_inode(struct Zipos *, int64_t, const void *, size_t); int __zipos_open(struct ZiposUri *, int); diff --git a/libc/stdio/dirstream.c b/libc/stdio/dirstream.c index 9437dc81a..24d4e8ec2 100644 --- a/libc/stdio/dirstream.c +++ b/libc/stdio/dirstream.c @@ -382,7 +382,7 @@ static struct dirent *readdir_zipos(DIR *dir) { while (p.len && p.path[p.len - 1] == '/') --p.len; p.path[p.len] = 0; ent->d_ino = __zipos_inode( - dir->zip.zipos, __zipos_find(dir->zip.zipos, &p), p.path, p.len); + dir->zip.zipos, __zipos_scan(dir->zip.zipos, &p), p.path, p.len); } else { uint8_t *s = ZIP_CFILE_NAME(dir->zip.zipos->map + dir->zip.offset); size_t n = ZIP_CFILE_NAMESIZE(dir->zip.zipos->map + dir->zip.offset); diff --git a/test/libc/stdio/zipdir_test.c b/test/libc/stdio/zipdir_test.c index 4dcdc4d9f..b6ba5cb9b 100644 --- a/test/libc/stdio/zipdir_test.c +++ b/test/libc/stdio/zipdir_test.c @@ -87,6 +87,7 @@ TEST(__zipos_normpath, overflows_willNulTerminate) { TEST(__zipos_normpath, vectors) { static const char V[][2][128] = { {"", ""}, + {"/", ""}, {"/..", ""}, {"/../", ""}, {".", ""},