Import C++ Standard Template Library

You can now use the hardest fastest and most dangerous language there is with Cosmopolitan. So far about 75% of LLVM libcxx has been added. A few breaking changes needed to be made to help this go smoothly. - Rename nothrow to dontthrow - Rename nodiscard to dontdiscard - Add some libm functions, e.g. lgamma, nan, etc. - Change intmax_t from int128 to int64 like everything else - Introduce %jjd formatting directive for int128_t - Introduce strtoi128(), strtou128(), etc. - Rename bsrmax() to bsr128() Some of the templates that should be working currently are std::vector, std::string, std::map, std::set, std::deque, etc.
2025-07-31 23:10:27 +00:00 · 2022-03-22 05:51:41 -07:00 · 2022-03-22 05:51:41 -07:00 · 868af3f950
commit 868af3f950
parent 5022f9e920
286 changed files with 123987 additions and 507 deletions
--- a/third_party/smallz4/smallz4.cc
+++ b/third_party/smallz4/smallz4.cc
@ -0,0 +1,314 @@
+/*-*- mode:c++;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
+│vi: set et ft=c ts=8 tw=8 fenc=utf-8                                       :vi│
+╚──────────────────────────────────────────────────────────────────────────────╝
+│                                                                              │
+│  smallz4                                                                     │
+│  Copyright (c) 2016-2019 Stephan Brumme. All rights reserved.                │
+│  See https://create.stephan-brumme.com/smallz4/                              │
+│                                                                              │
+│  Permission is hereby granted, free of charge, to any person obtaining       │
+│  a copy of this software and associated documentation files (the             │
+│  "Software"), to deal in the Software without restriction, including         │
+│  without limitation the rights to use, copy, modify, merge, publish,         │
+│  distribute, sublicense, and/or sell copies of the Software, and to          │
+│  permit persons to whom the Software is furnished to do so, subject to       │
+│  the following conditions:                                                   │
+│                                                                              │
+│  The above copyright notice and this permission notice shall be              │
+│  included in all copies or substantial portions of the Software.             │
+│                                                                              │
+│  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,             │
+│  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF          │
+│  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.      │
+│  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY        │
+│  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,        │
+│  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE           │
+│  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
+│                                                                              │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/calls/calls.h"
+#include "libc/calls/weirdtypes.h"
+#include "libc/runtime/runtime.h"
+#include "libc/stdio/stdio.h"
+#include "libc/time/time.h"
+#include "third_party/smallz4/smallz4.hh"
+
+/// error handler
+static void error(const char* msg, int code = 1) {
+  fprintf(stderr, "ERROR: %s\n", msg);
+  exit(code);
+}
+
+// ==================== user-specific I/O INTERFACE ====================
+
+struct UserPtr {
+  // file handles
+  FILE* in;
+  FILE* out;
+  // the attributes below are just needed for verbose output
+  bool verbose;
+  uint64_t numBytesIn;
+  uint64_t numBytesOut;
+  uint64_t totalSize;
+  time_t starttime;
+};
+
+/// read several bytes and store at "data", return number of actually read bytes
+/// (return only zero if end of data reached)
+size_t getBytesFromIn(void* data, size_t numBytes, void* userPtr) {
+  /// cast user-specific data
+  UserPtr* user = (UserPtr*)userPtr;
+
+  if (data && numBytes > 0) {
+    size_t actual = fread(data, 1, numBytes, user->in);
+    user->numBytesIn += actual;
+
+    return actual;
+  }
+  return 0;
+}
+
+/// show verbose info on STDERR
+void verbose(const UserPtr& user) {
+  if (!user.verbose) return;
+  if (user.numBytesIn == 0) return;
+
+  // elapsed and estimated time in seconds
+  int duration = int(time(NULL) - user.starttime);
+  if (duration == 0) return;
+  int estimated = int(duration * user.totalSize / user.numBytesIn);
+
+  // display on STDERR
+  fprintf(stderr, "\r%lld bytes => %lld bytes (%d%%", user.numBytesIn,
+          user.numBytesOut, 100 * user.numBytesOut / user.numBytesIn);
+  if (estimated > 0) fprintf(stderr, ", %d%% done", 100 * duration / estimated);
+  fprintf(stderr, "), after %d seconds @ %d kByte/s", duration,
+          duration > 0 ? (user.numBytesIn / duration) / 1024 : 0);
+  if (estimated > 0)
+    fprintf(stderr, ", about %d seconds left  ", estimated - duration);
+}
+
+/// write a block of bytes
+void sendBytesToOut(const void* data, size_t numBytes, void* userPtr) {
+  /// cast user-specific data
+  UserPtr* user = (UserPtr*)userPtr;
+  if (data && numBytes > 0) {
+    fwrite(data, 1, numBytes, user->out);
+    user->numBytesOut += numBytes;
+
+    if (user->verbose) verbose(*user);
+  }
+}
+
+// ==================== COMMAND-LINE HANDLING ====================
+
+// show simple help
+static void showHelp(const char* program) {
+  printf(
+      "smalLZ4 %s%s: compressor with optimal parsing, fully compatible with "
+      "LZ4 by Yann Collet (see https://lz4.org)\n"
+      "\n"
+      "Basic usage:\n"
+      "  %s [flags] [input] [output]\n"
+      "\n"
+      "This program writes to STDOUT if output isn't specified\n"
+      "and reads from STDIN if input isn't specified, either.\n"
+      "\n"
+      "Examples:\n"
+      "  %s   < abc.txt > abc.txt.lz4    # use STDIN and STDOUT\n"
+      "  %s     abc.txt > abc.txt.lz4    # read from file and write to STDOUT\n"
+      "  %s     abc.txt   abc.txt.lz4    # read from and write to file\n"
+      "  cat abc.txt | %s - abc.txt.lz4  # read from STDIN and write to file\n"
+      "  %s -6  abc.txt   abc.txt.lz4    # compression level 6 (instead of "
+      "default 9)\n"
+      "  %s -f  abc.txt   abc.txt.lz4    # overwrite an existing file\n"
+      "  %s -f7 abc.txt   abc.txt.lz4    # compression level 7 and overwrite "
+      "an existing file\n"
+      "\n"
+      "Flags:\n"
+      "  -0, -1 ... -9   Set compression level, default: 9 (see below)\n"
+      "  -h              Display this help message\n"
+      "  -f              Overwrite an existing file\n"
+      "  -l              Use LZ4 legacy file format\n"
+      "  -D [FILE]       Load dictionary\n"
+      "  -v              Verbose\n"
+      "\n"
+      "Compression levels:\n"
+      " -0               No compression\n"
+      " -1 ... -%d        Greedy search, check 1 to %d matches\n"
+      " -%d ... -8        Lazy matching with optimal parsing, check %d to 8 "
+      "matches\n"
+      " -9               Optimal parsing, check all possible matches "
+      "(default)\n"
+      "\n"
+      "Written in 2016-2020 by Stephan Brumme "
+      "https://create.stephan-brumme.com/smallz4/\n",
+      smallz4::getVersion(), "", program, program, program, program, program,
+      program, program, program, smallz4::ShortChainsGreedy,
+      smallz4::ShortChainsGreedy, smallz4::ShortChainsGreedy + 1,
+      smallz4::ShortChainsGreedy + 1);
+}
+
+/// parse command-line
+int main(int argc, const char* argv[]) {
+  // show help if no parameters and stdin isn't a pipe
+  if (argc == 1 && isatty(fileno(stdin)) != 0) {
+    showHelp(argv[0]);
+    return 0;
+  }
+
+  unsigned short maxChainLength =
+      65535;  // "unlimited" because search window contains only 2^16 bytes
+
+  // overwrite output ?
+  bool overwrite = false;
+  // legacy format ? (not recommended, but smaller files if input < 8 MB)
+  bool useLegacy = false;
+  // preload dictionary from disk
+  const char* dictionary = NULL;
+
+  // default input/output streams
+  UserPtr user;
+  user.in = stdin;
+  user.out = stdout;
+  user.verbose = false;
+  user.numBytesIn = 0;
+  user.numBytesOut = 0;
+  user.totalSize = 0;
+
+  // parse flags
+  int nextArgument = 1;
+  bool skipArgument = false;
+  while (argc > nextArgument && argv[nextArgument][0] == '-') {
+    int argPos = 1;
+    while (argv[nextArgument][argPos] != '\0') {
+      switch (argv[nextArgument][argPos++]) {
+          // show help
+        case 'h':
+          showHelp(argv[0]);
+          return 0;
+
+          // force overwrite
+        case 'f':
+          overwrite = true;
+          break;
+
+          // old LZ4 format
+        case 'l':
+          useLegacy = true;
+          break;
+
+          // use dictionary
+        case 'D':
+          if (nextArgument + 1 >= argc) error("no dictionary filename found");
+          dictionary =
+              argv[nextArgument +
+                   1];  // TODO: any flag immediately after -D causes an error
+          skipArgument = true;
+          break;
+
+          // display some info on STDERR while compressing
+        case 'v':
+          user.verbose = true;
+          break;
+
+          // set compression level
+        case '0':
+        case '1':
+        case '2':
+        case '3':
+        case '4':
+        case '5':
+        case '6':
+        case '7':
+        case '8':
+          maxChainLength =
+              argv[nextArgument][1] - '0';  // "0" => 0, "1" => 1, ..., "8" => 8
+          break;
+
+          // unlimited hash chain length
+        case '9':
+          // default maxChainLength is already "unlimited"
+          break;
+
+        default:
+          error("unknown flag");
+      }
+    }
+
+    nextArgument++;
+    if (skipArgument) nextArgument++;
+  }
+
+  // input file is given as first parameter or stdin if no parameter is given
+  // (or "-")
+  if (argc > nextArgument && argv[nextArgument][0] != '-') {
+    user.in = fopen(argv[nextArgument], "rb");
+    if (!user.in) error("file not found");
+    nextArgument++;
+  }
+
+  // output file is given as second parameter or stdout if no parameter is given
+  // (or "-")
+  if (argc == nextArgument + 1 && argv[nextArgument][0] != '-') {
+    // check if file already exists
+    if (!overwrite && fopen(argv[nextArgument], "rb"))
+      error("output file already exists");
+
+    user.out = fopen(argv[nextArgument], "wb");
+    if (!user.out) error("cannot create file");
+  }
+
+  // basic check of legacy format's restrictions
+  if (useLegacy) {
+    if (dictionary != 0) error("legacy format doesn't support dictionaries");
+    if (maxChainLength == 0)
+      error("legacy format doesn't support uncompressed files");
+  }
+
+  // load dictionary
+  std::vector<unsigned char> preload;
+  if (dictionary != NULL) {
+    // open dictionary
+    FILE* dict = fopen(dictionary, "rb");
+    if (!dict) error("cannot open dictionary");
+
+    // get dictionary's filesize
+    fseek(dict, 0, SEEK_END);
+    size_t dictSize = ftell(dict);
+    // only the last 64k are relevant
+    const size_t Last64k = 65536;
+    size_t relevant = dictSize < Last64k ? 0 : dictSize - Last64k;
+    fseek(dict, (long)relevant, SEEK_SET);
+    if (dictSize > Last64k) dictSize = Last64k;
+
+    // read those bytes
+    preload.resize(dictSize);
+    fread(&preload[0], 1, dictSize, dict);
+    fclose(dict);
+  }
+
+  if (user.verbose) {
+    if (user.in != stdin) {
+      fseek(user.in, 0, SEEK_END);
+      user.totalSize = ftell(user.in);
+      fseek(user.in, 0, SEEK_SET);
+    }
+
+    user.starttime = time(NULL);
+  }
+
+  // and go !
+  smallz4::lz4(getBytesFromIn, sendBytesToOut, maxChainLength, preload,
+               useLegacy, &user);
+
+  if (user.verbose && user.numBytesIn > 0)
+    fprintf(stderr,
+            "\r%lld bytes => %lld bytes (%d%%) after %d seconds                "
+            "                                                      \n",
+            user.numBytesIn, user.numBytesOut,
+            100 * user.numBytesOut / user.numBytesIn,
+            int(time(NULL) - user.starttime));
+
+  return 0;
+}