From 0ffc0dd4612047423b3118bc70dd3c60a94cda1d Mon Sep 17 00:00:00 2001
From: tkchia <tkchia@users.noreply.github.com>
Date: Mon, 24 Jul 2023 02:12:22 +0800
Subject: [PATCH] Let lz4toasm accept LZ4 files that lack extracted-size field
 (#858)

lz4toasm should now more easily accept LZ4 files output by
compressor programs that do not support the extracted-size
field, such as Stephan Brumme's smallz4.

This patch also proposes to add a new lz4len() function to
the libc: it parses an LZ4 compressed block to compute the
unpacked content size, without really unpacking the block.

Co-authored-by: tkchia <tkchia-cosmo@gmx.com>
---
 libc/nexgen32e/kompressor.h |  1 +
 libc/str/lz4len.c           | 57 +++++++++++++++++++++++++++++++++++++
 tool/build/lz4toasm.c       |  3 +-
 3 files changed, 59 insertions(+), 2 deletions(-)
 create mode 100644 libc/str/lz4len.c

diff --git a/libc/nexgen32e/kompressor.h b/libc/nexgen32e/kompressor.h
index ca863f2ca..aef5a9b1e 100644
--- a/libc/nexgen32e/kompressor.h
+++ b/libc/nexgen32e/kompressor.h
@@ -17,6 +17,7 @@ void rldecode(void *dest, const struct RlDecode *) _Hide;
 void rldecode2(void *dest, const struct RlDecode *) _Hide;
 const uint8_t *lz4check(const void *data) _Hide;
 void *lz4cpy(void *dest, const void *blockdata, size_t blocksize) _Hide;
+size_t lz4len(const void *blockdata, size_t blocksize) _Hide;
 void *lz4decode(void *dest, const void *src) _Hide;
 
 COSMOPOLITAN_C_END_
diff --git a/libc/str/lz4len.c b/libc/str/lz4len.c
new file mode 100644
index 000000000..6ef059245
--- /dev/null
+++ b/libc/str/lz4len.c
@@ -0,0 +1,57 @@
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8                                :vi│
+╞══════════════════════════════════════════════════════════════════════════════╡
+│ Copyright 2020 Justine Alexandra Roberts Tunney                              │
+│                                                                              │
+│ Permission to use, copy, modify, and/or distribute this software for         │
+│ any purpose with or without fee is hereby granted, provided that the         │
+│ above copyright notice and this permission notice appear in all copies.      │
+│                                                                              │
+│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
+│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
+│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
+│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
+│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
+│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
+│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
+│ PERFORMANCE OF THIS SOFTWARE.                                                │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/intrin/bits.h"
+#include "libc/intrin/pushpop.internal.h"
+#include "libc/nexgen32e/kompressor.h"
+#include "libc/str/str.h"
+
+/**
+ * Returns the uncompressed content size for a compressed LZ4 block, without
+ * actually decompressing it.
+ *
+ * @see lz4cpy()
+ */
+size_t lz4len(const void *blockdata, size_t blocksize) {
+  unsigned char *ip, *ipe;
+  unsigned token, length, fifteen, offset, matchlen;
+  size_t unpacklen = 0;
+  for (ip = blockdata, ipe = ip + blocksize;;) {
+    token = *ip++;
+    length = token >> 4;
+    fifteen = pushpop(15);
+    if (length == fifteen) {
+      do {
+        length += *ip;
+      } while (*ip++ == 255);
+    }
+    ip += length;
+    unpacklen += length;
+    if (ip >= ipe) break;
+    offset = READ16LE(ip);
+    matchlen = token & fifteen;
+    ip += 2;
+    if (matchlen == fifteen) {
+      do {
+        matchlen += *ip;
+      } while (*ip++ == 255);
+    }
+    unpacklen += matchlen + 4;
+  }
+  return unpacklen;
+}
diff --git a/tool/build/lz4toasm.c b/tool/build/lz4toasm.c
index 58c6fca5e..7e65b3a9d 100644
--- a/tool/build/lz4toasm.c
+++ b/tool/build/lz4toasm.c
@@ -116,8 +116,7 @@ int main(int argc, char *argv[]) {
     if (LZ4_FRAME_BLOCKCONTENTSIZEFLAG(frame)) {
       extractedsize = LZ4_FRAME_BLOCKCONTENTSIZE(frame);
     } else {
-      fprintf(stderr, "error: need extractedsize\n");
-      exit(1);
+      extractedsize = lz4len(data, size);
     }
   }