Import C++ Standard Template Library

You can now use the hardest fastest and most dangerous language there is with Cosmopolitan. So far about 75% of LLVM libcxx has been added. A few breaking changes needed to be made to help this go smoothly. - Rename nothrow to dontthrow - Rename nodiscard to dontdiscard - Add some libm functions, e.g. lgamma, nan, etc. - Change intmax_t from int128 to int64 like everything else - Introduce %jjd formatting directive for int128_t - Introduce strtoi128(), strtou128(), etc. - Rename bsrmax() to bsr128() Some of the templates that should be working currently are std::vector, std::string, std::map, std::set, std::deque, etc.
2025-07-25 20:10:29 +00:00 · 2022-03-22 05:51:41 -07:00 · 2022-03-22 05:51:41 -07:00 · 868af3f950
commit 868af3f950
parent 5022f9e920
286 changed files with 123987 additions and 507 deletions
--- a/third_party/smallz4/README.cosmo
+++ b/third_party/smallz4/README.cosmo
@ -0,0 +1,6 @@
+Source:
+  https://create.stephan-brumme.com/smallz4/
+Date:
+  2022-03-22
+License:
+  MIT
--- a/third_party/smallz4/smallz4.cc
+++ b/third_party/smallz4/smallz4.cc
@ -0,0 +1,314 @@
+/*-*- mode:c++;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
+│vi: set et ft=c ts=8 tw=8 fenc=utf-8                                       :vi│
+╚──────────────────────────────────────────────────────────────────────────────╝
+│                                                                              │
+│  smallz4                                                                     │
+│  Copyright (c) 2016-2019 Stephan Brumme. All rights reserved.                │
+│  See https://create.stephan-brumme.com/smallz4/                              │
+│                                                                              │
+│  Permission is hereby granted, free of charge, to any person obtaining       │
+│  a copy of this software and associated documentation files (the             │
+│  "Software"), to deal in the Software without restriction, including         │
+│  without limitation the rights to use, copy, modify, merge, publish,         │
+│  distribute, sublicense, and/or sell copies of the Software, and to          │
+│  permit persons to whom the Software is furnished to do so, subject to       │
+│  the following conditions:                                                   │
+│                                                                              │
+│  The above copyright notice and this permission notice shall be              │
+│  included in all copies or substantial portions of the Software.             │
+│                                                                              │
+│  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,             │
+│  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF          │
+│  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.      │
+│  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY        │
+│  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,        │
+│  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE           │
+│  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
+│                                                                              │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/calls/calls.h"
+#include "libc/calls/weirdtypes.h"
+#include "libc/runtime/runtime.h"
+#include "libc/stdio/stdio.h"
+#include "libc/time/time.h"
+#include "third_party/smallz4/smallz4.hh"
+
+/// error handler
+static void error(const char* msg, int code = 1) {
+  fprintf(stderr, "ERROR: %s\n", msg);
+  exit(code);
+}
+
+// ==================== user-specific I/O INTERFACE ====================
+
+struct UserPtr {
+  // file handles
+  FILE* in;
+  FILE* out;
+  // the attributes below are just needed for verbose output
+  bool verbose;
+  uint64_t numBytesIn;
+  uint64_t numBytesOut;
+  uint64_t totalSize;
+  time_t starttime;
+};
+
+/// read several bytes and store at "data", return number of actually read bytes
+/// (return only zero if end of data reached)
+size_t getBytesFromIn(void* data, size_t numBytes, void* userPtr) {
+  /// cast user-specific data
+  UserPtr* user = (UserPtr*)userPtr;
+
+  if (data && numBytes > 0) {
+    size_t actual = fread(data, 1, numBytes, user->in);
+    user->numBytesIn += actual;
+
+    return actual;
+  }
+  return 0;
+}
+
+/// show verbose info on STDERR
+void verbose(const UserPtr& user) {
+  if (!user.verbose) return;
+  if (user.numBytesIn == 0) return;
+
+  // elapsed and estimated time in seconds
+  int duration = int(time(NULL) - user.starttime);
+  if (duration == 0) return;
+  int estimated = int(duration * user.totalSize / user.numBytesIn);
+
+  // display on STDERR
+  fprintf(stderr, "\r%lld bytes => %lld bytes (%d%%", user.numBytesIn,
+          user.numBytesOut, 100 * user.numBytesOut / user.numBytesIn);
+  if (estimated > 0) fprintf(stderr, ", %d%% done", 100 * duration / estimated);
+  fprintf(stderr, "), after %d seconds @ %d kByte/s", duration,
+          duration > 0 ? (user.numBytesIn / duration) / 1024 : 0);
+  if (estimated > 0)
+    fprintf(stderr, ", about %d seconds left  ", estimated - duration);
+}
+
+/// write a block of bytes
+void sendBytesToOut(const void* data, size_t numBytes, void* userPtr) {
+  /// cast user-specific data
+  UserPtr* user = (UserPtr*)userPtr;
+  if (data && numBytes > 0) {
+    fwrite(data, 1, numBytes, user->out);
+    user->numBytesOut += numBytes;
+
+    if (user->verbose) verbose(*user);
+  }
+}
+
+// ==================== COMMAND-LINE HANDLING ====================
+
+// show simple help
+static void showHelp(const char* program) {
+  printf(
+      "smalLZ4 %s%s: compressor with optimal parsing, fully compatible with "
+      "LZ4 by Yann Collet (see https://lz4.org)\n"
+      "\n"
+      "Basic usage:\n"
+      "  %s [flags] [input] [output]\n"
+      "\n"
+      "This program writes to STDOUT if output isn't specified\n"
+      "and reads from STDIN if input isn't specified, either.\n"
+      "\n"
+      "Examples:\n"
+      "  %s   < abc.txt > abc.txt.lz4    # use STDIN and STDOUT\n"
+      "  %s     abc.txt > abc.txt.lz4    # read from file and write to STDOUT\n"
+      "  %s     abc.txt   abc.txt.lz4    # read from and write to file\n"
+      "  cat abc.txt | %s - abc.txt.lz4  # read from STDIN and write to file\n"
+      "  %s -6  abc.txt   abc.txt.lz4    # compression level 6 (instead of "
+      "default 9)\n"
+      "  %s -f  abc.txt   abc.txt.lz4    # overwrite an existing file\n"
+      "  %s -f7 abc.txt   abc.txt.lz4    # compression level 7 and overwrite "
+      "an existing file\n"
+      "\n"
+      "Flags:\n"
+      "  -0, -1 ... -9   Set compression level, default: 9 (see below)\n"
+      "  -h              Display this help message\n"
+      "  -f              Overwrite an existing file\n"
+      "  -l              Use LZ4 legacy file format\n"
+      "  -D [FILE]       Load dictionary\n"
+      "  -v              Verbose\n"
+      "\n"
+      "Compression levels:\n"
+      " -0               No compression\n"
+      " -1 ... -%d        Greedy search, check 1 to %d matches\n"
+      " -%d ... -8        Lazy matching with optimal parsing, check %d to 8 "
+      "matches\n"
+      " -9               Optimal parsing, check all possible matches "
+      "(default)\n"
+      "\n"
+      "Written in 2016-2020 by Stephan Brumme "
+      "https://create.stephan-brumme.com/smallz4/\n",
+      smallz4::getVersion(), "", program, program, program, program, program,
+      program, program, program, smallz4::ShortChainsGreedy,
+      smallz4::ShortChainsGreedy, smallz4::ShortChainsGreedy + 1,
+      smallz4::ShortChainsGreedy + 1);
+}
+
+/// parse command-line
+int main(int argc, const char* argv[]) {
+  // show help if no parameters and stdin isn't a pipe
+  if (argc == 1 && isatty(fileno(stdin)) != 0) {
+    showHelp(argv[0]);
+    return 0;
+  }
+
+  unsigned short maxChainLength =
+      65535;  // "unlimited" because search window contains only 2^16 bytes
+
+  // overwrite output ?
+  bool overwrite = false;
+  // legacy format ? (not recommended, but smaller files if input < 8 MB)
+  bool useLegacy = false;
+  // preload dictionary from disk
+  const char* dictionary = NULL;
+
+  // default input/output streams
+  UserPtr user;
+  user.in = stdin;
+  user.out = stdout;
+  user.verbose = false;
+  user.numBytesIn = 0;
+  user.numBytesOut = 0;
+  user.totalSize = 0;
+
+  // parse flags
+  int nextArgument = 1;
+  bool skipArgument = false;
+  while (argc > nextArgument && argv[nextArgument][0] == '-') {
+    int argPos = 1;
+    while (argv[nextArgument][argPos] != '\0') {
+      switch (argv[nextArgument][argPos++]) {
+          // show help
+        case 'h':
+          showHelp(argv[0]);
+          return 0;
+
+          // force overwrite
+        case 'f':
+          overwrite = true;
+          break;
+
+          // old LZ4 format
+        case 'l':
+          useLegacy = true;
+          break;
+
+          // use dictionary
+        case 'D':
+          if (nextArgument + 1 >= argc) error("no dictionary filename found");
+          dictionary =
+              argv[nextArgument +
+                   1];  // TODO: any flag immediately after -D causes an error
+          skipArgument = true;
+          break;
+
+          // display some info on STDERR while compressing
+        case 'v':
+          user.verbose = true;
+          break;
+
+          // set compression level
+        case '0':
+        case '1':
+        case '2':
+        case '3':
+        case '4':
+        case '5':
+        case '6':
+        case '7':
+        case '8':
+          maxChainLength =
+              argv[nextArgument][1] - '0';  // "0" => 0, "1" => 1, ..., "8" => 8
+          break;
+
+          // unlimited hash chain length
+        case '9':
+          // default maxChainLength is already "unlimited"
+          break;
+
+        default:
+          error("unknown flag");
+      }
+    }
+
+    nextArgument++;
+    if (skipArgument) nextArgument++;
+  }
+
+  // input file is given as first parameter or stdin if no parameter is given
+  // (or "-")
+  if (argc > nextArgument && argv[nextArgument][0] != '-') {
+    user.in = fopen(argv[nextArgument], "rb");
+    if (!user.in) error("file not found");
+    nextArgument++;
+  }
+
+  // output file is given as second parameter or stdout if no parameter is given
+  // (or "-")
+  if (argc == nextArgument + 1 && argv[nextArgument][0] != '-') {
+    // check if file already exists
+    if (!overwrite && fopen(argv[nextArgument], "rb"))
+      error("output file already exists");
+
+    user.out = fopen(argv[nextArgument], "wb");
+    if (!user.out) error("cannot create file");
+  }
+
+  // basic check of legacy format's restrictions
+  if (useLegacy) {
+    if (dictionary != 0) error("legacy format doesn't support dictionaries");
+    if (maxChainLength == 0)
+      error("legacy format doesn't support uncompressed files");
+  }
+
+  // load dictionary
+  std::vector<unsigned char> preload;
+  if (dictionary != NULL) {
+    // open dictionary
+    FILE* dict = fopen(dictionary, "rb");
+    if (!dict) error("cannot open dictionary");
+
+    // get dictionary's filesize
+    fseek(dict, 0, SEEK_END);
+    size_t dictSize = ftell(dict);
+    // only the last 64k are relevant
+    const size_t Last64k = 65536;
+    size_t relevant = dictSize < Last64k ? 0 : dictSize - Last64k;
+    fseek(dict, (long)relevant, SEEK_SET);
+    if (dictSize > Last64k) dictSize = Last64k;
+
+    // read those bytes
+    preload.resize(dictSize);
+    fread(&preload[0], 1, dictSize, dict);
+    fclose(dict);
+  }
+
+  if (user.verbose) {
+    if (user.in != stdin) {
+      fseek(user.in, 0, SEEK_END);
+      user.totalSize = ftell(user.in);
+      fseek(user.in, 0, SEEK_SET);
+    }
+
+    user.starttime = time(NULL);
+  }
+
+  // and go !
+  smallz4::lz4(getBytesFromIn, sendBytesToOut, maxChainLength, preload,
+               useLegacy, &user);
+
+  if (user.verbose && user.numBytesIn > 0)
+    fprintf(stderr,
+            "\r%lld bytes => %lld bytes (%d%%) after %d seconds                "
+            "                                                      \n",
+            user.numBytesIn, user.numBytesOut,
+            100 * user.numBytesOut / user.numBytesIn,
+            int(time(NULL) - user.starttime));
+
+  return 0;
+}
--- a/third_party/smallz4/smallz4.hh
+++ b/third_party/smallz4/smallz4.hh
@ -0,0 +1,807 @@
+#ifndef COSMOPOLITAN_THIRD_PARTY_SMALLZ4_SMALLZ4_H_
+#define COSMOPOLITAN_THIRD_PARTY_SMALLZ4_SMALLZ4_H_
+#include "third_party/libcxx/vector"
+
+/**
+ * LZ4 compression with optimal parsing
+ *
+ * See smallz4.cc for a basic I/O interface you can easily replace it by
+ * a in-memory version then all you have to do is:
+ *
+ *     smallz4::lz4(GET_BYTES, SEND_BYTES);
+ *
+ * For more advanced stuff, you can call lz4 with up to four parameters
+ * (incl. max chain length and a dictionary)
+ */
+class smallz4 {
+ public:
+  // read  several bytes, see getBytesFromIn() in smallz4.cpp for a basic
+  // implementation
+  typedef size_t (*GET_BYTES)(void* data, size_t numBytes, void* userPtr);
+  // write several bytes, see sendBytesToOut() in smallz4.cpp for a basic
+  // implementation
+  typedef void (*SEND_BYTES)(const void* data, size_t numBytes, void* userPtr);
+
+  /// compress everything in input stream (accessed via getByte) and write to
+  /// output stream (via send)
+  static void lz4(GET_BYTES getBytes, SEND_BYTES sendBytes,
+                  unsigned short maxChainLength = MaxChainLength,
+                  bool useLegacyFormat = false, void* userPtr = NULL) {
+    lz4(getBytes, sendBytes, maxChainLength, std::vector<unsigned char>(),
+        useLegacyFormat, userPtr);
+  }
+
+  /// compress everything in input stream (accessed via getByte) and write to
+  /// output stream (via send)
+  static void lz4(
+      GET_BYTES getBytes, SEND_BYTES sendBytes, unsigned short maxChainLength,
+      const std::vector<unsigned char>& dictionary,  // predefined dictionary
+      bool useLegacyFormat =
+          false,  // old format is 7 bytes smaller if input < 8 MB
+      void* userPtr = NULL) {
+    smallz4 obj(maxChainLength);
+    obj.compress(getBytes, sendBytes, dictionary, useLegacyFormat, userPtr);
+  }
+
+  /// version string
+  static const char* const getVersion() {
+    return "1.5";
+  }
+
+  // compression level thresholds, made public because I display them in the
+  // help screen ...
+  enum {
+    /// greedy mode for short chains (compression level <= 3) instead of optimal
+    /// parsing / lazy evaluation
+    ShortChainsGreedy = 3,
+    /// lazy evaluation for medium-sized chains (compression level > 3 and <= 6)
+    ShortChainsLazy = 6
+  };
+
+  // ----- END OF PUBLIC INTERFACE -----
+ private:
+  // ----- constants and types -----
+
+  /// a block can be up to 4 MB, so uint32_t would suffice but uint64_t is quite
+  /// a bit faster on my x64 machine
+  typedef uint64_t Length;
+  /// matches must start within the most recent 64k
+  typedef uint16_t Distance;
+
+  enum {
+    /// each match's length must be >= 4
+    MinMatch = 4,
+    /// a literal needs one byte
+    JustLiteral = 1,
+    /// last match must not be closer than 12 bytes to the end
+    BlockEndNoMatch = 12,
+    /// last 5 bytes must be literals, no matching allowed
+    BlockEndLiterals = 5,
+
+    /// match finder's hash table size (2^HashBits entries, must be less than
+    /// 32)
+    HashBits = 20,
+    HashSize = 1 << HashBits,
+
+    /// input buffer size, can be any number but zero ;-)
+    BufferSize = 1024,
+
+    /// maximum match distance, must be power of 2 minus 1
+    MaxDistance = 65535,
+    /// marker for "no match"
+    EndOfChain = 0,
+    /// stop match finding after MaxChainLength steps (default is unlimited =>
+    /// optimal parsing)
+    MaxChainLength = MaxDistance,
+
+    /// significantly speed up parsing if the same byte is repeated a lot, may
+    /// cause sub-optimal compression
+    MaxSameLetter = 19 + 255 * 256,  // was: 19 + 255,
+
+    /// maximum block size as defined in LZ4 spec: {
+    /// 0,0,0,0,64*1024,256*1024,1024*1024,4*1024*1024 } I only work with the
+    /// biggest maximum block size (7)
+    //  note: xxhash header checksum is precalculated only for 7, too
+    MaxBlockSizeId = 7,
+    MaxBlockSize = 4 * 1024 * 1024,
+
+    /// legacy format has a fixed block size of 8 MB
+    MaxBlockSizeLegacy = 8 * 1024 * 1024,
+
+    /// number of literals and match length is encoded in several bytes, max.
+    /// 255 per byte
+    MaxLengthCode = 255
+  };
+
+  //  ----- one and only variable ... -----
+
+  /// how many matches are checked in findLongestMatch, lower values yield
+  /// faster encoding at the cost of worse compression ratio
+  unsigned short maxChainLength;
+
+  //  ----- code -----
+
+  /// match
+  struct Match {
+    /// length of match
+    Length length;
+    /// start of match
+    Distance distance;
+  };
+
+  /// create new compressor (only invoked by lz4)
+  explicit smallz4(unsigned short newMaxChainLength = MaxChainLength)
+      : maxChainLength(newMaxChainLength)  // => no limit, but can be changed by
+                                           // setMaxChainLength
+  {
+  }
+
+  /// return true, if the four bytes at *a and *b match
+  inline static bool match4(const void* const a, const void* const b) {
+    return *(const uint32_t*)a == *(const uint32_t*)b;
+  }
+
+  /// simple hash function, input: 32 bits, output: HashBits bits (by default:
+  /// 20)
+  inline static uint32_t getHash32(uint32_t fourBytes) {
+    // taken from https://en.wikipedia.org/wiki/Linear_congruential_generator
+    const uint32_t HashMultiplier = 48271;
+    return ((fourBytes * HashMultiplier) >> (32 - HashBits)) & (HashSize - 1);
+  }
+
+  /// find longest match of data[pos] between data[begin] and data[end], use
+  /// match chain
+  Match findLongestMatch(const unsigned char* const data, uint64_t pos,
+                         uint64_t begin, uint64_t end,
+                         const Distance* const chain) const {
+    Match result;
+    result.length = JustLiteral;  // assume a literal => one byte
+
+    // compression level: look only at the first n entries of the match chain
+    unsigned short stepsLeft = maxChainLength;
+    // findLongestMatch() shouldn't be called when maxChainLength = 0
+    // (uncompressed)
+
+    // pointer to position that is currently analyzed (which we try to find a
+    // great match for)
+    const unsigned char* const current = data + pos - begin;
+    // don't match beyond this point
+    const unsigned char* const stop = current + end - pos;
+
+    // get distance to previous match, abort if 0 => not existing
+    Distance distance = chain[pos & MaxDistance];
+    int64_t totalDistance = 0;
+    while (distance != EndOfChain) {
+      // chain goes too far back ?
+      totalDistance += distance;
+      if (totalDistance > MaxDistance) break;  // can't match beyond 64k
+
+      // prepare next position
+      distance = chain[(pos - totalDistance) & MaxDistance];
+
+      // let's introduce a new pointer atLeast that points to the first "new"
+      // byte of a potential longer match
+      const unsigned char* const atLeast = current + result.length + 1;
+      // impossible to find a longer match because not enough bytes left ?
+      if (atLeast > stop) break;
+
+      // the idea is to split the comparison algorithm into 2 phases
+      // (1) scan backward from atLeast to current, abort if mismatch
+      // (2) scan forward  until a mismatch is found and store length/distance
+      // of this new best match current                  atLeast
+      //    |                        |
+      //    -<<<<<<<< phase 1 <<<<<<<<
+      //                              >>> phase 2 >>>
+      // main reason for phase 1:
+      // - both byte sequences start with the same bytes, quite likely they are
+      // very similar
+      // - there is a good chance that if they differ, then their last bytes
+      // differ
+      // => checking the last first increases the probability that a mismatch is
+      // detected as early as possible
+
+      // compare 4 bytes at once
+      const Length CheckAtOnce = 4;
+
+      // all bytes between current and atLeast shall be identical
+      const unsigned char* phase1 =
+          atLeast - CheckAtOnce;  // minus 4 because match4 checks 4 bytes
+      while (phase1 > current && match4(phase1, phase1 - totalDistance))
+        phase1 -= CheckAtOnce;
+      // note: - the first four bytes always match
+      //       - in the last iteration, phase1 points either at current + 1 or
+      //       current + 2 or current + 3
+      //       - therefore we compare a few bytes twice => but a check to skip
+      //       these checks is more expensive
+
+      // mismatch ? (the while-loop was aborted)
+      if (phase1 > current) continue;
+
+      // we have a new best match, now scan forward
+      const unsigned char* phase2 = atLeast;
+
+      // fast loop: check four bytes at once
+      while (phase2 + CheckAtOnce <= stop &&
+             match4(phase2, phase2 - totalDistance))
+        phase2 += CheckAtOnce;
+      // slow loop: check the last 1/2/3 bytes
+      while (phase2 < stop && *phase2 == *(phase2 - totalDistance)) phase2++;
+
+      // store new best match
+      result.distance = Distance(totalDistance);
+      result.length = Length(phase2 - current);
+
+      // stop searching on lower compression levels
+      if (--stepsLeft == 0) break;
+    }
+
+    return result;
+  }
+
+  /// create shortest output
+  /** data points to block's begin; we need it to extract literals **/
+  static std::vector<unsigned char> selectBestMatches(
+      const std::vector<Match>& matches, const unsigned char* const data) {
+    // store encoded data
+    std::vector<unsigned char> result;
+    result.reserve(matches.size());
+
+    // indices of current run of literals
+    size_t literalsFrom = 0;
+    size_t numLiterals = 0;
+
+    bool lastToken = false;
+
+    // walk through the whole block
+    for (size_t offset = 0;
+         offset < matches.size();)  // increment inside of loop
+    {
+      // get best cost-weighted match
+      Match match = matches[offset];
+
+      // if no match, then count literals instead
+      if (match.length <= JustLiteral) {
+        // first literal ? need to reset pointers of current sequence of
+        // literals
+        if (numLiterals == 0) literalsFrom = offset;
+
+        // add one more literal to current sequence
+        numLiterals++;
+
+        // next match
+        offset++;
+
+        // continue unless it's the last literal
+        if (offset < matches.size()) continue;
+
+        lastToken = true;
+      } else {
+        // skip unused matches
+        offset += match.length;
+      }
+
+      // store match length (4 is implied because it's the minimum match length)
+      int matchLength = int(match.length) - MinMatch;
+
+      // last token has zero length
+      if (lastToken) matchLength = 0;
+
+      // token consists of match length and number of literals, let's start with
+      // match length ...
+      unsigned char token =
+          (matchLength < 15) ? (unsigned char)matchLength : 15;
+
+      // >= 15 literals ? (extra bytes to store length)
+      if (numLiterals < 15) {
+        // add number of literals in higher four bits
+        token |= numLiterals << 4;
+        result.push_back(token);
+      } else {
+        // set all higher four bits, the following bytes with determine the
+        // exact number of literals
+        result.push_back(token | 0xF0);
+
+        // 15 is already encoded in token
+        int encodeNumLiterals = int(numLiterals) - 15;
+
+        // emit 255 until remainder is below 255
+        while (encodeNumLiterals >= MaxLengthCode) {
+          result.push_back(MaxLengthCode);
+          encodeNumLiterals -= MaxLengthCode;
+        }
+        // and the last byte (can be zero, too)
+        result.push_back((unsigned char)encodeNumLiterals);
+      }
+      // copy literals
+      if (numLiterals > 0) {
+        result.insert(result.end(), data + literalsFrom,
+                      data + literalsFrom + numLiterals);
+
+        // last token doesn't have a match
+        if (lastToken) break;
+
+        // reset
+        numLiterals = 0;
+      }
+
+      // distance stored in 16 bits / little endian
+      result.push_back(match.distance & 0xFF);
+      result.push_back(match.distance >> 8);
+
+      // >= 15+4 bytes matched
+      if (matchLength >= 15) {
+        // 15 is already encoded in token
+        matchLength -= 15;
+        // emit 255 until remainder is below 255
+        while (matchLength >= MaxLengthCode) {
+          result.push_back(MaxLengthCode);
+          matchLength -= MaxLengthCode;
+        }
+        // and the last byte (can be zero, too)
+        result.push_back((unsigned char)matchLength);
+      }
+    }
+
+    return result;
+  }
+
+  /// walk backwards through all matches and compute number of compressed bytes
+  /// from current position to the end of the block
+  /** note: matches are modified (shortened length) if necessary **/
+  static void estimateCosts(std::vector<Match>& matches) {
+    const size_t blockEnd = matches.size();
+
+    // equals the number of bytes after compression
+    typedef uint32_t Cost;
+    // minimum cost from this position to the end of the current block
+    std::vector<Cost> cost(matches.size(), 0);
+    // "cost" represents the number of bytes needed
+
+    // the last bytes must always be literals
+    Length numLiterals = BlockEndLiterals;
+    // backwards optimal parsing
+    for (int64_t i = (int64_t)blockEnd - (1 + BlockEndLiterals); i >= 0;
+         i--)  // ignore the last 5 bytes, they are always literals
+    {
+      // if encoded as a literal
+      numLiterals++;
+      Length bestLength = JustLiteral;
+      // such a literal "costs" 1 byte
+      Cost minCost = cost[i + 1] + JustLiteral;
+
+      // an extra length byte is required for every 255 literals
+      if (numLiterals >= 15) {
+        // same as: if ((numLiterals - 15) % MaxLengthCode == 0)
+        // but I try hard to avoid the slow modulo function
+        if (numLiterals == 15 || (numLiterals >= 15 + MaxLengthCode &&
+                                  (numLiterals - 15) % MaxLengthCode == 0))
+          minCost++;
+      }
+
+      // let's look at the longest match, almost always more efficient that the
+      // plain literals
+      Match match = matches[i];
+
+      // very long self-referencing matches can slow down the program A LOT
+      if (match.length >= MaxSameLetter && match.distance == 1) {
+        // assume that longest match is always the best match
+        // NOTE: this assumption might not be optimal !
+        bestLength = match.length;
+        minCost =
+            cost[i + match.length] + 1 + 2 + 1 + Cost(match.length - 19) / 255;
+      } else {
+        // this is the core optimization loop
+
+        // overhead of encoding a match: token (1 byte) + offset (2 bytes) +
+        // sometimes extra bytes for long matches
+        Cost extraCost = 1 + 2;
+        Length nextCostIncrease = 18;  // need one more byte for 19+ long
+                                       // matches (next increase: 19+255*x)
+
+        // try all match lengths (start with short ones)
+        for (Length length = MinMatch; length <= match.length; length++) {
+          // token (1 byte) + offset (2 bytes) + extra bytes for long matches
+          Cost currentCost = cost[i + length] + extraCost;
+          // better choice ?
+          if (currentCost <= minCost) {
+            // regarding the if-condition:
+            // "<"  prefers literals and shorter matches
+            // "<=" prefers longer matches
+            // they should produce the same number of bytes (because of the same
+            // cost)
+            // ... but every now and then it doesn't !
+            // that's why: too many consecutive literals require an extra length
+            // byte (which we took into consideration a few lines above) but we
+            // only looked at literals beyond the current position if there are
+            // many literal in front of the current position then it may be
+            // better to emit a match with the same cost as the literals at the
+            // current position
+            // => it "breaks" the long chain of literals and removes the extra
+            // length byte
+            minCost = currentCost;
+            bestLength = length;
+            // performance-wise, a long match is usually faster during decoding
+            // than multiple short matches on the other hand, literals are
+            // faster than short matches as well (assuming same cost)
+          }
+
+          // very long matches need extra bytes for encoding match length
+          if (length == nextCostIncrease) {
+            extraCost++;
+            nextCostIncrease += MaxLengthCode;
+          }
+        }
+      }
+
+      // store lowest cost so far
+      cost[i] = minCost;
+
+      // and adjust best match
+      matches[i].length = bestLength;
+
+      // reset number of literals if a match was chosen
+      if (bestLength != JustLiteral) numLiterals = 0;
+
+      // note: if bestLength is smaller than the previous matches[i].length then
+      // there might be a closer match
+      //       which could be more cache-friendly (=> faster decoding)
+    }
+  }
+
+  /// compress everything in input stream (accessed via getByte) and write to
+  /// output stream (via send), improve compression with a predefined dictionary
+  void compress(GET_BYTES getBytes, SEND_BYTES sendBytes,
+                const std::vector<unsigned char>& dictionary,
+                bool useLegacyFormat, void* userPtr) const {
+    // ==================== write header ====================
+    if (useLegacyFormat) {
+      // magic bytes
+      const unsigned char header[] = {0x02, 0x21, 0x4C, 0x18};
+      sendBytes(header, sizeof(header), userPtr);
+    } else {
+      // frame header
+      const unsigned char header[] = {
+          0x04, 0x22, 0x4D,
+          0x18,    // magic bytes
+          1 << 6,  // flags: no checksums, blocks depend on each other and no
+                   // dictionary ID
+          MaxBlockSizeId << 4,  // max blocksize
+          0xDF                  // header checksum (precomputed)
+      };
+      sendBytes(header, sizeof(header), userPtr);
+    }
+
+    // ==================== declarations ====================
+    // change read buffer size as you like
+    unsigned char buffer[BufferSize];
+
+    // read the file in chunks/blocks, data will contain only bytes which are
+    // relevant for the current block
+    std::vector<unsigned char> data;
+
+    // file position corresponding to data[0]
+    size_t dataZero = 0;
+    // last already read position
+    size_t numRead = 0;
+
+    // passthru data ? (but still wrap it in LZ4 format)
+    const bool uncompressed = (maxChainLength == 0);
+
+    // last time we saw a hash
+    const uint64_t NoLastHash = ~0;  // = -1
+    std::vector<uint64_t> lastHash(HashSize, NoLastHash);
+
+    // previous position which starts with the same bytes
+    std::vector<Distance> previousHash(
+        MaxDistance + 1,
+        Distance(EndOfChain));  // long chains based on my simple hash
+    std::vector<Distance> previousExact(
+        MaxDistance + 1,
+        Distance(EndOfChain));  // shorter chains based on exact matching of the
+                                // first four bytes
+    // these two containers are essential for match finding:
+    // 1. I compute a hash of four byte
+    // 2. in lastHash is the location of the most recent block of four byte with
+    // that same hash
+    // 3. due to hash collisions, several groups of four bytes may yield the
+    // same hash
+    // 4. so for each location I can look up the previous location of the same
+    // hash in previousHash
+    // 5. basically it's a chain of memory locations where potential matches
+    // start
+    // 5. I follow this hash chain until I find exactly the same four bytes I
+    // was looking for
+    // 6. then I switch to a sparser chain: previousExact
+    // 7. it's basically the same idea as previousHash but this time not the
+    // hash but the first four bytes must be identical
+    // 8. previousExact will be used by findLongestMatch: it compare all such
+    // strings a figures out which is the longest match
+
+    // And why do I have to do it in such a complicated way ?
+    // - well, there are 2^32 combinations of four bytes
+    // - so that there are 2^32 potential chains
+    // - most combinations just don't occur and occupy no space but I still have
+    // to keep their "entry point" (which are empty/invalid)
+    // - that would be at least 16 GBytes RAM (2^32 x 4 bytes)
+    // - my hashing algorithm reduces the 2^32 combinations to 2^20 hashes (see
+    // hashBits), that's about 8 MBytes RAM
+    // - thus only 2^20 entry points and at most 2^20 hash chains which is
+    // easily manageable
+    // ... in the end it's all about conserving memory !
+    // (total memory consumption of smallz4 is about 64 MBytes)
+
+    // first and last offset of a block (nextBlock is end-of-block plus 1)
+    uint64_t lastBlock = 0;
+    uint64_t nextBlock = 0;
+    bool parseDictionary = !dictionary.empty();
+
+    // main loop, processes one block per iteration
+    while (true) {
+      // ==================== start new block ====================
+      // first byte of the currently processed block (std::vector data may
+      // contain the last 64k of the previous block, too)
+      const unsigned char* dataBlock = NULL;
+
+      // prepend dictionary
+      if (parseDictionary) {
+        // resize dictionary to 64k (minus 1 because we can only match the last
+        // 65535 bytes of the dictionary => MaxDistance)
+        if (dictionary.size() < MaxDistance) {
+          // dictionary is smaller than 64k, prepend garbage data
+          size_t unused = MaxDistance - dictionary.size();
+          data.resize(unused, 0);
+          data.insert(data.end(), dictionary.begin(), dictionary.end());
+        } else
+          // copy only the most recent 64k of the dictionary
+          data.insert(data.end(),
+                      dictionary.begin() + dictionary.size() - MaxDistance,
+                      dictionary.end());
+
+        nextBlock = data.size();
+        numRead = data.size();
+      }
+
+      // read more bytes from input
+      size_t maxBlockSize = useLegacyFormat ? MaxBlockSizeLegacy : MaxBlockSize;
+      while (numRead - nextBlock < maxBlockSize) {
+        // buffer can be significantly smaller than MaxBlockSize, that's the
+        // only reason for this while-block
+        size_t incoming = getBytes(buffer, BufferSize, userPtr);
+        // no more data ?
+        if (incoming == 0) break;
+
+        // add bytes to buffer
+        numRead += incoming;
+        data.insert(data.end(), buffer, buffer + incoming);
+      }
+
+      // no more data ? => WE'RE DONE !
+      if (nextBlock == numRead) break;
+
+      // determine block borders
+      lastBlock = nextBlock;
+      nextBlock += maxBlockSize;
+      // not beyond end-of-file
+      if (nextBlock > numRead) nextBlock = numRead;
+
+      // pointer to first byte of the currently processed block (the std::vector
+      // container named data may contain the last 64k of the previous block,
+      // too)
+      dataBlock = &data[lastBlock - dataZero];
+
+      const uint64_t blockSize = nextBlock - lastBlock;
+
+      // ==================== full match finder ====================
+
+      // greedy mode is much faster but produces larger output
+      const bool isGreedy = (maxChainLength <= ShortChainsGreedy);
+      // lazy evaluation: if there is a match, then try running match finder on
+      // next position, too, but not after that
+      const bool isLazy = !isGreedy && (maxChainLength <= ShortChainsLazy);
+      // skip match finding on the next x bytes in greedy mode
+      Length skipMatches = 0;
+      // allow match finding on the next byte but skip afterwards (in lazy mode)
+      bool lazyEvaluation = false;
+
+      // the last literals of the previous block skipped matching, so they are
+      // missing from the hash chains
+      int64_t lookback = int64_t(dataZero);
+      if (lookback > BlockEndNoMatch && !parseDictionary)
+        lookback = BlockEndNoMatch;
+      if (parseDictionary) lookback = int64_t(dictionary.size());
+      // so let's go back a few bytes
+      lookback = -lookback;
+      // ... but not in legacy mode
+      if (useLegacyFormat || uncompressed) lookback = 0;
+
+      std::vector<Match> matches(uncompressed ? 0 : blockSize);
+      // find longest matches for each position (skip if level=0 which means
+      // "uncompressed")
+      int64_t i;
+      for (i = lookback;
+           i + BlockEndNoMatch <= int64_t(blockSize) && !uncompressed; i++) {
+        // detect self-matching
+        if (i > 0 && dataBlock[i] == dataBlock[i - 1]) {
+          Match prevMatch = matches[i - 1];
+          // predecessor had the same match ?
+          if (prevMatch.distance == 1 &&
+              prevMatch.length > MaxSameLetter)  // TODO: handle very long
+                                                 // self-referencing matches
+          {
+            // just copy predecessor without further (expensive) optimizations
+            matches[i].distance = 1;
+            matches[i].length = prevMatch.length - 1;
+            continue;
+          }
+        }
+
+        // read next four bytes
+        const uint32_t four = *(uint32_t*)(dataBlock + i);
+        // convert to a shorter hash
+        const uint32_t hash = getHash32(four);
+
+        // get most recent position of this hash
+        uint64_t lastHashMatch = lastHash[hash];
+        // and store current position
+        lastHash[hash] = i + lastBlock;
+
+        // remember: i could be negative, too
+        Distance prevIndex =
+            (i + MaxDistance + 1) &
+            MaxDistance;  // actually the same as i & MaxDistance
+
+        // no predecessor / no hash chain available ?
+        if (lastHashMatch == NoLastHash) {
+          previousHash[prevIndex] = EndOfChain;
+          previousExact[prevIndex] = EndOfChain;
+          continue;
+        }
+
+        // most recent hash match too far away ?
+        uint64_t distance = lastHash[hash] - lastHashMatch;
+        if (distance > MaxDistance) {
+          previousHash[prevIndex] = EndOfChain;
+          previousExact[prevIndex] = EndOfChain;
+          continue;
+        }
+
+        // build hash chain, i.e. store distance to last pseudo-match
+        previousHash[prevIndex] = (Distance)distance;
+
+        // skip pseudo-matches (hash collisions) and build a second chain where
+        // the first four bytes must match exactly
+        uint32_t currentFour;
+        // check the hash chain
+        while (true) {
+          // read four bytes
+          currentFour =
+              *(uint32_t*)(&data[lastHashMatch -
+                                 dataZero]);  // match may be found in the
+                                              // previous block, too
+          // match chain found, first 4 bytes are identical
+          if (currentFour == four) break;
+
+          // prevent from accidently hopping on an old, wrong hash chain
+          if (hash != getHash32(currentFour)) break;
+
+          // try next pseudo-match
+          Distance next = previousHash[lastHashMatch & MaxDistance];
+          // end of the hash chain ?
+          if (next == EndOfChain) break;
+
+          // too far away ?
+          distance += next;
+          if (distance > MaxDistance) break;
+
+          // take another step along the hash chain ...
+          lastHashMatch -= next;
+          // closest match is out of range ?
+          if (lastHashMatch < dataZero) break;
+        }
+
+        // search aborted / failed ?
+        if (four != currentFour) {
+          // no matches for the first four bytes
+          previousExact[prevIndex] = EndOfChain;
+          continue;
+        }
+
+        // store distance to previous match
+        previousExact[prevIndex] = (Distance)distance;
+
+        // no matching if crossing block boundary, just update hash tables
+        if (i < 0) continue;
+
+        // skip match finding if in greedy mode
+        if (skipMatches > 0) {
+          skipMatches--;
+          if (!lazyEvaluation) continue;
+          lazyEvaluation = false;
+        }
+
+        // and after all that preparation ... finally look for the longest match
+        matches[i] = findLongestMatch(data.data(), i + lastBlock, dataZero,
+                                      nextBlock - BlockEndLiterals,
+                                      previousExact.data());
+
+        // no match finding needed for the next few bytes in greedy/lazy mode
+        if ((isLazy || isGreedy) && matches[i].length != JustLiteral) {
+          lazyEvaluation = (skipMatches == 0);
+          skipMatches = matches[i].length;
+        }
+      }
+      // last bytes are always literals
+      while (i < int(matches.size())) matches[i++].length = JustLiteral;
+
+      // dictionary is valid only to the first block
+      parseDictionary = false;
+
+      // ==================== estimate costs (number of compressed bytes)
+      // ====================
+
+      // not needed in greedy mode and/or very short blocks
+      if (matches.size() > BlockEndNoMatch &&
+          maxChainLength > ShortChainsGreedy)
+        estimateCosts(matches);
+
+      // ==================== select best matches ====================
+
+      std::vector<unsigned char> compressed =
+          selectBestMatches(matches, &data[lastBlock - dataZero]);
+
+      // ==================== output ====================
+
+      // did compression do harm ?
+      bool useCompression = compressed.size() < blockSize && !uncompressed;
+      // legacy format is always compressed
+      useCompression |= useLegacyFormat;
+
+      // block size
+      uint32_t numBytes =
+          uint32_t(useCompression ? compressed.size() : blockSize);
+      uint32_t numBytesTagged = numBytes | (useCompression ? 0 : 0x80000000);
+      unsigned char num1 = numBytesTagged & 0xFF;
+      sendBytes(&num1, 1, userPtr);
+      unsigned char num2 = (numBytesTagged >> 8) & 0xFF;
+      sendBytes(&num2, 1, userPtr);
+      unsigned char num3 = (numBytesTagged >> 16) & 0xFF;
+      sendBytes(&num3, 1, userPtr);
+      unsigned char num4 = (numBytesTagged >> 24) & 0xFF;
+      sendBytes(&num4, 1, userPtr);
+
+      if (useCompression)
+        sendBytes(compressed.data(), numBytes, userPtr);
+      else  // uncompressed ? => copy input data
+        sendBytes(&data[lastBlock - dataZero], numBytes, userPtr);
+
+      // legacy format: no matching across blocks
+      if (useLegacyFormat) {
+        dataZero += data.size();
+        data.clear();
+
+        // clear hash tables
+        for (size_t i = 0; i < previousHash.size(); i++)
+          previousHash[i] = EndOfChain;
+        for (size_t i = 0; i < previousExact.size(); i++)
+          previousExact[i] = EndOfChain;
+        for (size_t i = 0; i < lastHash.size(); i++) lastHash[i] = NoLastHash;
+      } else {
+        // remove already processed data except for the last 64kb which could be
+        // used for intra-block matches
+        if (data.size() > MaxDistance) {
+          size_t remove = data.size() - MaxDistance;
+          dataZero += remove;
+          data.erase(data.begin(), data.begin() + remove);
+        }
+      }
+    }
+
+    // add an empty block
+    if (!useLegacyFormat) {
+      static const uint32_t zero = 0;
+      sendBytes(&zero, 4, userPtr);
+    }
+  }
+};
+
+#endif /* COSMOPOLITAN_THIRD_PARTY_SMALLZ4_SMALLZ4_H_ */
--- a/third_party/smallz4/smallz4.mk
+++ b/third_party/smallz4/smallz4.mk
@ -0,0 +1,86 @@
+#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐
+#───vi: set et ft=make ts=8 tw=8 fenc=utf-8 :vi───────────────────────┘
+
+PKGS += THIRD_PARTY_SMALLZ4
+
+THIRD_PARTY_SMALLZ4_SRCS = $(THIRD_PARTY_SMALLZ4_A_SRCS)
+THIRD_PARTY_SMALLZ4_HDRS = $(THIRD_PARTY_SMALLZ4_A_HDRS)
+THIRD_PARTY_SMALLZ4_BINS = $(THIRD_PARTY_SMALLZ4_COMS) $(THIRD_PARTY_SMALLZ4_COMS:%=%.dbg)
+
+THIRD_PARTY_SMALLZ4_ARTIFACTS += THIRD_PARTY_SMALLZ4_A
+THIRD_PARTY_SMALLZ4 = $(THIRD_PARTY_SMALLZ4_A_DEPS) $(THIRD_PARTY_SMALLZ4_A)
+THIRD_PARTY_SMALLZ4_A = o/$(MODE)/third_party/smallz4/smallz4.a
+THIRD_PARTY_SMALLZ4_A_FILES := $(wildcard third_party/smallz4/*)
+THIRD_PARTY_SMALLZ4_A_HDRS = $(filter %.hh,$(THIRD_PARTY_SMALLZ4_A_FILES))
+THIRD_PARTY_SMALLZ4_A_SRCS_S = $(filter %.S,$(THIRD_PARTY_SMALLZ4_A_FILES))
+THIRD_PARTY_SMALLZ4_A_SRCS_C = $(filter %.c,$(THIRD_PARTY_SMALLZ4_A_FILES))
+THIRD_PARTY_SMALLZ4_A_SRCS_CC = $(filter %.cc,$(THIRD_PARTY_SMALLZ4_A_FILES))
+
+THIRD_PARTY_SMALLZ4_A_SRCS =					\
+	$(THIRD_PARTY_SMALLZ4_A_SRCS_S)				\
+	$(THIRD_PARTY_SMALLZ4_A_SRCS_C)				\
+	$(THIRD_PARTY_SMALLZ4_A_SRCS_CC)
+
+THIRD_PARTY_SMALLZ4_A_OBJS =					\
+	$(THIRD_PARTY_SMALLZ4_A_SRCS_S:%.S=o/$(MODE)/%.o)	\
+	$(THIRD_PARTY_SMALLZ4_A_SRCS_C:%.c=o/$(MODE)/%.o)	\
+	$(THIRD_PARTY_SMALLZ4_A_SRCS_CC:%.cc=o/$(MODE)/%.o)
+
+THIRD_PARTY_SMALLZ4_A_DIRECTDEPS =				\
+	LIBC_FMT						\
+	LIBC_INTRIN						\
+	LIBC_LOG						\
+	LIBC_MEM						\
+	LIBC_NEXGEN32E						\
+	LIBC_RUNTIME						\
+	LIBC_CALLS						\
+	LIBC_UNICODE						\
+	LIBC_STDIO						\
+	LIBC_STR						\
+	LIBC_STUBS						\
+	THIRD_PARTY_LIBCXX
+
+THIRD_PARTY_SMALLZ4_A_DEPS :=					\
+	$(call uniq,$(foreach x,$(THIRD_PARTY_SMALLZ4_A_DIRECTDEPS),$($(x))))
+
+THIRD_PARTY_SMALLZ4_A_CHECKS =					\
+	$(THIRD_PARTY_SMALLZ4_A).pkg				\
+	$(THIRD_PARTY_SMALLZ4_A_HDRS:%=o/$(MODE)/%.ok)
+
+$(THIRD_PARTY_SMALLZ4_A):					\
+		third_party/smallz4/				\
+		$(THIRD_PARTY_SMALLZ4_A).pkg			\
+		$(THIRD_PARTY_SMALLZ4_A_OBJS)
+
+$(THIRD_PARTY_SMALLZ4_A).pkg:					\
+		$(THIRD_PARTY_SMALLZ4_A_OBJS)			\
+		$(foreach x,$(THIRD_PARTY_SMALLZ4_A_DIRECTDEPS),$($(x)_A).pkg)
+
+o/$(MODE)/third_party/smallz4/smallz4.com.dbg:			\
+		$(THIRD_PARTY_SMALLZ4)				\
+		o/$(MODE)/third_party/smallz4/smallz4.o		\
+		$(CRT)						\
+		$(APE)
+	@$(APELINK)
+
+o/$(MODE)/third_party/smallz4/smallz4cat.com.dbg:		\
+		$(THIRD_PARTY_SMALLZ4)				\
+		o/$(MODE)/third_party/smallz4/smallz4cat.o	\
+		$(CRT)						\
+		$(APE)
+	@$(APELINK)
+
+THIRD_PARTY_SMALLZ4_COMS =					\
+	o/$(MODE)/third_party/smallz4/smallz4.com		\
+	o/$(MODE)/third_party/smallz4/smallz4cat.com
+
+THIRD_PARTY_SMALLZ4_LIBS = $(foreach x,$(THIRD_PARTY_SMALLZ4_ARTIFACTS),$($(x)))
+THIRD_PARTY_SMALLZ4_SRCS = $(foreach x,$(THIRD_PARTY_SMALLZ4_ARTIFACTS),$($(x)_SRCS))
+THIRD_PARTY_SMALLZ4_CHECKS = $(foreach x,$(THIRD_PARTY_SMALLZ4_ARTIFACTS),$($(x)_CHECKS))
+THIRD_PARTY_SMALLZ4_OBJS = $(foreach x,$(THIRD_PARTY_SMALLZ4_ARTIFACTS),$($(x)_OBJS))
+$(THIRD_PARTY_SMALLZ4_OBJS): $(BUILD_FILES) third_party/smallz4/smallz4.mk
+
+.PHONY: o/$(MODE)/third_party/smallz4
+o/$(MODE)/third_party/smallz4:					\
+		$(THIRD_PARTY_SMALLZ4_BINS)			\
+		$(THIRD_PARTY_SMALLZ4_CHECKS)
--- a/third_party/smallz4/smallz4cat.c
+++ b/third_party/smallz4/smallz4cat.c
@ -0,0 +1,356 @@
+/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
+│vi: set et ft=c ts=8 tw=8 fenc=utf-8                                       :vi│
+╚──────────────────────────────────────────────────────────────────────────────╝
+│                                                                              │
+│  smallz4cat                                                                  │
+│  Copyright (c) 2016-2019 Stephan Brumme. All rights reserved.                │
+│  See https://create.stephan-brumme.com/smallz4/                              │
+│                                                                              │
+│  Permission is hereby granted, free of charge, to any person obtaining       │
+│  a copy of this software and associated documentation files (the             │
+│  "Software"), to deal in the Software without restriction, including         │
+│  without limitation the rights to use, copy, modify, merge, publish,         │
+│  distribute, sublicense, and/or sell copies of the Software, and to          │
+│  permit persons to whom the Software is furnished to do so, subject to       │
+│  the following conditions:                                                   │
+│                                                                              │
+│  The above copyright notice and this permission notice shall be              │
+│  included in all copies or substantial portions of the Software.             │
+│                                                                              │
+│  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,             │
+│  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF          │
+│  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.      │
+│  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY        │
+│  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,        │
+│  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE           │
+│  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
+│                                                                              │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/calls/calls.h"
+#include "libc/mem/mem.h"
+#include "libc/runtime/gc.internal.h"
+#include "libc/stdio/stdio.h"
+
+/**
+ * @fileoverview shorter, more readable, albeit slower re-implementation
+ *     of lz4cat ( https://github.com/Cyan4973/xxHash )
+ *
+ * Limitations:
+ *
+ * - Skippable frames and legacy frames are not implemented (and most
+ *   likely never will)
+ *
+ * - Checksums are not verified (see https://create.stephan-brumme.com/xxhash/
+ *   for a simple implementation)
+ *
+ * Replace getByteFromIn() and sendToOut() by your own code if you need
+ * in-memory LZ4 decompression. Corrupted data causes a call to
+ * unlz4error().
+ */
+
+#define HISTORY_SIZE     65536  // don't change
+#define READ_BUFFER_SIZE 1024   // change at will
+
+static void unlz4error(const char* msg) {
+  fputs("ERROR: ", stderr);
+  fputs(msg, stderr);
+  fputc('\n', stderr);
+  exit(1);
+}
+
+typedef unsigned char (*GET_BYTE)(void*);
+typedef void (*SEND_BYTES)(const unsigned char*, unsigned int, void*);
+
+struct UserPtr {
+  // file handles
+  FILE* in;
+  FILE* out;
+  unsigned char readBuffer[READ_BUFFER_SIZE];
+  unsigned int pos;
+  unsigned int available;
+};
+
+/// read a single byte (with simple buffering)
+static unsigned char getByteFromIn(void* userPtr) {
+  struct UserPtr* user = (struct UserPtr*)userPtr;
+  if (user->pos == user->available) {
+    user->pos = 0;
+    user->available = fread(user->readBuffer, 1, READ_BUFFER_SIZE, user->in);
+    if (user->available == 0) unlz4error("out of data");
+  }
+  return user->readBuffer[user->pos++];
+}
+
+/// write a block of bytes
+static void sendBytesToOut(const unsigned char* data, unsigned int numBytes,
+                           void* userPtr) {
+  /// cast user-specific data
+  struct UserPtr* user = (struct UserPtr*)userPtr;
+  if (data != NULL && numBytes > 0) fwrite(data, 1, numBytes, user->out);
+}
+
+/// decompress everything in input stream (accessed via getByte) and write to
+/// output stream (via sendBytes)
+void unlz4_userPtr(GET_BYTE getByte, SEND_BYTES sendBytes,
+                   const char* dictionary, void* userPtr) {
+  // signature
+  unsigned char signature1 = getByte(userPtr);
+  unsigned char signature2 = getByte(userPtr);
+  unsigned char signature3 = getByte(userPtr);
+  unsigned char signature4 = getByte(userPtr);
+  unsigned int signature =
+      (signature4 << 24) | (signature3 << 16) | (signature2 << 8) | signature1;
+  unsigned char isModern = (signature == 0x184D2204);
+  unsigned char isLegacy = (signature == 0x184C2102);
+  if (!isModern && !isLegacy) unlz4error("invalid signature");
+
+  unsigned char hasBlockChecksum = false;
+  unsigned char hasContentSize = false;
+  unsigned char hasContentChecksum = false;
+  unsigned char hasDictionaryID = false;
+  if (isModern) {
+    // flags
+    unsigned char flags = getByte(userPtr);
+    hasBlockChecksum = flags & 16;
+    hasContentSize = flags & 8;
+    hasContentChecksum = flags & 4;
+    hasDictionaryID = flags & 1;
+
+    // only version 1 file format
+    unsigned char version = flags >> 6;
+    if (version != 1) unlz4error("only LZ4 file format version 1 supported");
+
+    // ignore blocksize
+    char numIgnore = 1;
+
+    // ignore, skip 8 bytes
+    if (hasContentSize) numIgnore += 8;
+    // ignore, skip 4 bytes
+    if (hasDictionaryID) numIgnore += 4;
+
+    // ignore header checksum (xxhash32 of everything up this point & 0xFF)
+    numIgnore++;
+
+    // skip all those ignored bytes
+    while (numIgnore--) getByte(userPtr);
+  }
+
+  // contains the latest decoded data
+  unsigned char* history = gc(malloc(HISTORY_SIZE));
+  // next free position in history[]
+  unsigned int pos = 0;
+
+  // dictionary compression is a recently introduced feature, just move its
+  // contents to the buffer
+  if (dictionary != NULL) {
+    // open dictionary
+    FILE* dict = fopen(dictionary, "rb");
+    if (!dict) unlz4error("cannot open dictionary");
+
+    // get dictionary's filesize
+    fseek(dict, 0, SEEK_END);
+    long dictSize = ftell(dict);
+    // only the last 64k are relevant
+    long relevant = dictSize < 65536 ? 0 : dictSize - 65536;
+    fseek(dict, relevant, SEEK_SET);
+    if (dictSize > 65536) dictSize = 65536;
+    // read it and store it at the end of the buffer
+    fread(history + HISTORY_SIZE - dictSize, 1, dictSize, dict);
+    fclose(dict);
+  }
+
+  // parse all blocks until blockSize == 0
+  while (1) {
+    // block size
+    unsigned int blockSize = getByte(userPtr);
+    blockSize |= (unsigned int)getByte(userPtr) << 8;
+    blockSize |= (unsigned int)getByte(userPtr) << 16;
+    blockSize |= (unsigned int)getByte(userPtr) << 24;
+
+    // highest bit set ?
+    unsigned char isCompressed = isLegacy || (blockSize & 0x80000000) == 0;
+    if (isModern) blockSize &= 0x7FFFFFFF;
+
+    // stop after last block
+    if (blockSize == 0) break;
+
+    if (isCompressed) {
+      // decompress block
+      unsigned int blockOffset = 0;
+      unsigned int numWritten = 0;
+      while (blockOffset < blockSize) {
+        // get a token
+        unsigned char token = getByte(userPtr);
+        blockOffset++;
+
+        // determine number of literals
+        unsigned int numLiterals = token >> 4;
+        if (numLiterals == 15) {
+          // number of literals length encoded in more than 1 byte
+          unsigned char current;
+          do {
+            current = getByte(userPtr);
+            numLiterals += current;
+            blockOffset++;
+          } while (current == 255);
+        }
+
+        blockOffset += numLiterals;
+
+        // copy all those literals
+        if (pos + numLiterals < HISTORY_SIZE) {
+          // fast loop
+          while (numLiterals-- > 0) history[pos++] = getByte(userPtr);
+        } else {
+          // slow loop
+          while (numLiterals-- > 0) {
+            history[pos++] = getByte(userPtr);
+
+            // flush output buffer
+            if (pos == HISTORY_SIZE) {
+              sendBytes(history, HISTORY_SIZE, userPtr);
+              numWritten += HISTORY_SIZE;
+              pos = 0;
+            }
+          }
+        }
+
+        // last token has only literals
+        if (blockOffset == blockSize) break;
+
+        // match distance is encoded in two bytes (little endian)
+        unsigned int delta = getByte(userPtr);
+        delta |= (unsigned int)getByte(userPtr) << 8;
+        // zero isn't allowed
+        if (delta == 0) unlz4error("invalid offset");
+        blockOffset += 2;
+
+        // match length (always >= 4, therefore length is stored minus 4)
+        unsigned int matchLength = 4 + (token & 0x0F);
+        if (matchLength == 4 + 0x0F) {
+          unsigned char current;
+          do  // match length encoded in more than 1 byte
+          {
+            current = getByte(userPtr);
+            matchLength += current;
+            blockOffset++;
+          } while (current == 255);
+        }
+
+        // copy match
+        unsigned int referencePos =
+            (pos >= delta) ? (pos - delta) : (HISTORY_SIZE + pos - delta);
+        // start and end within the current 64k block ?
+        if (pos + matchLength < HISTORY_SIZE &&
+            referencePos + matchLength < HISTORY_SIZE) {
+          // read/write continuous block (no wrap-around at the end of
+          // history[]) fast copy
+          if (pos >= referencePos + matchLength ||
+              referencePos >= pos + matchLength) {
+            // non-overlapping
+            memcpy(history + pos, history + referencePos, matchLength);
+            pos += matchLength;
+          } else {
+            // overlapping, slower byte-wise copy
+            while (matchLength-- > 0) history[pos++] = history[referencePos++];
+          }
+        } else {
+          // either read or write wraps around at the end of history[]
+          while (matchLength-- > 0) {
+            // copy single byte
+            history[pos++] = history[referencePos++];
+
+            // cannot write anymore ? => wrap around
+            if (pos == HISTORY_SIZE) {
+              // flush output buffer
+              sendBytes(history, HISTORY_SIZE, userPtr);
+              numWritten += HISTORY_SIZE;
+              pos = 0;
+            }
+            // wrap-around of read location
+            referencePos %= HISTORY_SIZE;
+          }
+        }
+      }
+
+      // all legacy blocks must be completely filled - except for the last one
+      if (isLegacy && numWritten + pos < 8 * 1024 * 1024) break;
+    } else {
+      // copy uncompressed data and add to history, too (if next block is
+      // compressed and some matches refer to this block)
+      while (blockSize-- > 0) {
+        // copy a byte ...
+        history[pos++] = getByte(userPtr);
+        // ... until buffer is full => send to output
+        if (pos == HISTORY_SIZE) {
+          sendBytes(history, HISTORY_SIZE, userPtr);
+          pos = 0;
+        }
+      }
+    }
+
+    if (hasBlockChecksum) {
+      // ignore checksum, skip 4 bytes
+      getByte(userPtr);
+      getByte(userPtr);
+      getByte(userPtr);
+      getByte(userPtr);
+    }
+  }
+
+  if (hasContentChecksum) {
+    // ignore checksum, skip 4 bytes
+    getByte(userPtr);
+    getByte(userPtr);
+    getByte(userPtr);
+    getByte(userPtr);
+  }
+
+  // flush output buffer
+  sendBytes(history, pos, userPtr);
+}
+
+/// old interface where getByte and sendBytes use global file handles
+void unlz4(GET_BYTE getByte, SEND_BYTES sendBytes, const char* dictionary) {
+  unlz4_userPtr(getByte, sendBytes, dictionary, NULL);
+}
+
+/// parse command-line
+int main(int argc, const char* argv[]) {
+  // default input/output streams
+  struct UserPtr user = {.in = stdin,
+                         .out = stdout,
+                         .pos = 0,  // initial input buffer is empty
+                         .available = 0};
+
+  const char* dictionary = NULL;
+
+  // first command-line parameter is our input filename / but ignore "-" which
+  // stands for STDIN
+  int parameter;
+  for (parameter = 1; parameter < argc; parameter++) {
+    const char* current = argv[parameter];
+    // dictionary
+    if (current[0] == '-' && current[1] == 'D') {
+      if (parameter + 1 >= argc) unlz4error("no dictionary filename found");
+      dictionary = argv[++parameter];
+      continue;
+    }
+
+    // filename
+    // read from STDIN, default behavior
+    if (current[0] != '-' && current[1] != '\0') {
+      // already have a filename - at most one filename is allowed (except for
+      // dictionary) ?
+      if (user.in != stdin)
+        unlz4error("can only decompress one file at a time");
+      // get handle
+      user.in = fopen(argv[1], "rb");
+      if (!user.in) unlz4error("file not found");
+    }
+  }
+
+  // and go !
+  unlz4_userPtr(getByteFromIn, sendBytesToOut, dictionary, &user);
+  return 0;
+}
--- a/third_party/smallz4/stub.c
+++ b/third_party/smallz4/stub.c
@ -0,0 +1,20 @@
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8                                :vi│
+╞══════════════════════════════════════════════════════════════════════════════╡
+│ Copyright 2022 Justine Alexandra Roberts Tunney                              │
+│                                                                              │
+│ Permission to use, copy, modify, and/or distribute this software for         │
+│ any purpose with or without fee is hereby granted, provided that the         │
+│ above copyright notice and this permission notice appear in all copies.      │
+│                                                                              │
+│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
+│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
+│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
+│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
+│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
+│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
+│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
+│ PERFORMANCE OF THIS SOFTWARE.                                                │
+╚─────────────────────────────────────────────────────────────────────────────*/
+
+// file intentionally empty