From 0ffc0dd4612047423b3118bc70dd3c60a94cda1d Mon Sep 17 00:00:00 2001 From: tkchia <tkchia@users.noreply.github.com> Date: Mon, 24 Jul 2023 02:12:22 +0800 Subject: [PATCH] Let lz4toasm accept LZ4 files that lack extracted-size field (#858) lz4toasm should now more easily accept LZ4 files output by compressor programs that do not support the extracted-size field, such as Stephan Brumme's smallz4. This patch also proposes to add a new lz4len() function to the libc: it parses an LZ4 compressed block to compute the unpacked content size, without really unpacking the block. Co-authored-by: tkchia <tkchia-cosmo@gmx.com> --- libc/nexgen32e/kompressor.h | 1 + libc/str/lz4len.c | 57 +++++++++++++++++++++++++++++++++++++ tool/build/lz4toasm.c | 3 +- 3 files changed, 59 insertions(+), 2 deletions(-) create mode 100644 libc/str/lz4len.c diff --git a/libc/nexgen32e/kompressor.h b/libc/nexgen32e/kompressor.h index ca863f2ca..aef5a9b1e 100644 --- a/libc/nexgen32e/kompressor.h +++ b/libc/nexgen32e/kompressor.h @@ -17,6 +17,7 @@ void rldecode(void *dest, const struct RlDecode *) _Hide; void rldecode2(void *dest, const struct RlDecode *) _Hide; const uint8_t *lz4check(const void *data) _Hide; void *lz4cpy(void *dest, const void *blockdata, size_t blocksize) _Hide; +size_t lz4len(const void *blockdata, size_t blocksize) _Hide; void *lz4decode(void *dest, const void *src) _Hide; COSMOPOLITAN_C_END_ diff --git a/libc/str/lz4len.c b/libc/str/lz4len.c new file mode 100644 index 000000000..6ef059245 --- /dev/null +++ b/libc/str/lz4len.c @@ -0,0 +1,57 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/intrin/bits.h" +#include "libc/intrin/pushpop.internal.h" +#include "libc/nexgen32e/kompressor.h" +#include "libc/str/str.h" + +/** + * Returns the uncompressed content size for a compressed LZ4 block, without + * actually decompressing it. + * + * @see lz4cpy() + */ +size_t lz4len(const void *blockdata, size_t blocksize) { + unsigned char *ip, *ipe; + unsigned token, length, fifteen, offset, matchlen; + size_t unpacklen = 0; + for (ip = blockdata, ipe = ip + blocksize;;) { + token = *ip++; + length = token >> 4; + fifteen = pushpop(15); + if (length == fifteen) { + do { + length += *ip; + } while (*ip++ == 255); + } + ip += length; + unpacklen += length; + if (ip >= ipe) break; + offset = READ16LE(ip); + matchlen = token & fifteen; + ip += 2; + if (matchlen == fifteen) { + do { + matchlen += *ip; + } while (*ip++ == 255); + } + unpacklen += matchlen + 4; + } + return unpacklen; +} diff --git a/tool/build/lz4toasm.c b/tool/build/lz4toasm.c index 58c6fca5e..7e65b3a9d 100644 --- a/tool/build/lz4toasm.c +++ b/tool/build/lz4toasm.c @@ -116,8 +116,7 @@ int main(int argc, char *argv[]) { if (LZ4_FRAME_BLOCKCONTENTSIZEFLAG(frame)) { extractedsize = LZ4_FRAME_BLOCKCONTENTSIZE(frame); } else { - fprintf(stderr, "error: need extractedsize\n"); - exit(1); + extractedsize = lz4len(data, size); } }