Import C++ Standard Template Library

You can now use the hardest fastest and most dangerous language there is with Cosmopolitan. So far about 75% of LLVM libcxx has been added. A few breaking changes needed to be made to help this go smoothly. - Rename nothrow to dontthrow - Rename nodiscard to dontdiscard - Add some libm functions, e.g. lgamma, nan, etc. - Change intmax_t from int128 to int64 like everything else - Introduce %jjd formatting directive for int128_t - Introduce strtoi128(), strtou128(), etc. - Rename bsrmax() to bsr128() Some of the templates that should be working currently are std::vector, std::string, std::map, std::set, std::deque, etc.
2025-09-10 10:43:48 +00:00 · 2022-03-22 05:51:41 -07:00 · 2022-03-22 05:51:41 -07:00 · 868af3f950
commit 868af3f950
parent 5022f9e920
286 changed files with 123987 additions and 507 deletions
--- a/third_party/smallz4/smallz4cat.c
+++ b/third_party/smallz4/smallz4cat.c
@ -0,0 +1,356 @@
+/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
+│vi: set et ft=c ts=8 tw=8 fenc=utf-8                                       :vi│
+╚──────────────────────────────────────────────────────────────────────────────╝
+│                                                                              │
+│  smallz4cat                                                                  │
+│  Copyright (c) 2016-2019 Stephan Brumme. All rights reserved.                │
+│  See https://create.stephan-brumme.com/smallz4/                              │
+│                                                                              │
+│  Permission is hereby granted, free of charge, to any person obtaining       │
+│  a copy of this software and associated documentation files (the             │
+│  "Software"), to deal in the Software without restriction, including         │
+│  without limitation the rights to use, copy, modify, merge, publish,         │
+│  distribute, sublicense, and/or sell copies of the Software, and to          │
+│  permit persons to whom the Software is furnished to do so, subject to       │
+│  the following conditions:                                                   │
+│                                                                              │
+│  The above copyright notice and this permission notice shall be              │
+│  included in all copies or substantial portions of the Software.             │
+│                                                                              │
+│  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,             │
+│  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF          │
+│  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.      │
+│  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY        │
+│  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,        │
+│  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE           │
+│  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
+│                                                                              │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/calls/calls.h"
+#include "libc/mem/mem.h"
+#include "libc/runtime/gc.internal.h"
+#include "libc/stdio/stdio.h"
+
+/**
+ * @fileoverview shorter, more readable, albeit slower re-implementation
+ *     of lz4cat ( https://github.com/Cyan4973/xxHash )
+ *
+ * Limitations:
+ *
+ * - Skippable frames and legacy frames are not implemented (and most
+ *   likely never will)
+ *
+ * - Checksums are not verified (see https://create.stephan-brumme.com/xxhash/
+ *   for a simple implementation)
+ *
+ * Replace getByteFromIn() and sendToOut() by your own code if you need
+ * in-memory LZ4 decompression. Corrupted data causes a call to
+ * unlz4error().
+ */
+
+#define HISTORY_SIZE     65536  // don't change
+#define READ_BUFFER_SIZE 1024   // change at will
+
+static void unlz4error(const char* msg) {
+  fputs("ERROR: ", stderr);
+  fputs(msg, stderr);
+  fputc('\n', stderr);
+  exit(1);
+}
+
+typedef unsigned char (*GET_BYTE)(void*);
+typedef void (*SEND_BYTES)(const unsigned char*, unsigned int, void*);
+
+struct UserPtr {
+  // file handles
+  FILE* in;
+  FILE* out;
+  unsigned char readBuffer[READ_BUFFER_SIZE];
+  unsigned int pos;
+  unsigned int available;
+};
+
+/// read a single byte (with simple buffering)
+static unsigned char getByteFromIn(void* userPtr) {
+  struct UserPtr* user = (struct UserPtr*)userPtr;
+  if (user->pos == user->available) {
+    user->pos = 0;
+    user->available = fread(user->readBuffer, 1, READ_BUFFER_SIZE, user->in);
+    if (user->available == 0) unlz4error("out of data");
+  }
+  return user->readBuffer[user->pos++];
+}
+
+/// write a block of bytes
+static void sendBytesToOut(const unsigned char* data, unsigned int numBytes,
+                           void* userPtr) {
+  /// cast user-specific data
+  struct UserPtr* user = (struct UserPtr*)userPtr;
+  if (data != NULL && numBytes > 0) fwrite(data, 1, numBytes, user->out);
+}
+
+/// decompress everything in input stream (accessed via getByte) and write to
+/// output stream (via sendBytes)
+void unlz4_userPtr(GET_BYTE getByte, SEND_BYTES sendBytes,
+                   const char* dictionary, void* userPtr) {
+  // signature
+  unsigned char signature1 = getByte(userPtr);
+  unsigned char signature2 = getByte(userPtr);
+  unsigned char signature3 = getByte(userPtr);
+  unsigned char signature4 = getByte(userPtr);
+  unsigned int signature =
+      (signature4 << 24) | (signature3 << 16) | (signature2 << 8) | signature1;
+  unsigned char isModern = (signature == 0x184D2204);
+  unsigned char isLegacy = (signature == 0x184C2102);
+  if (!isModern && !isLegacy) unlz4error("invalid signature");
+
+  unsigned char hasBlockChecksum = false;
+  unsigned char hasContentSize = false;
+  unsigned char hasContentChecksum = false;
+  unsigned char hasDictionaryID = false;
+  if (isModern) {
+    // flags
+    unsigned char flags = getByte(userPtr);
+    hasBlockChecksum = flags & 16;
+    hasContentSize = flags & 8;
+    hasContentChecksum = flags & 4;
+    hasDictionaryID = flags & 1;
+
+    // only version 1 file format
+    unsigned char version = flags >> 6;
+    if (version != 1) unlz4error("only LZ4 file format version 1 supported");
+
+    // ignore blocksize
+    char numIgnore = 1;
+
+    // ignore, skip 8 bytes
+    if (hasContentSize) numIgnore += 8;
+    // ignore, skip 4 bytes
+    if (hasDictionaryID) numIgnore += 4;
+
+    // ignore header checksum (xxhash32 of everything up this point & 0xFF)
+    numIgnore++;
+
+    // skip all those ignored bytes
+    while (numIgnore--) getByte(userPtr);
+  }
+
+  // contains the latest decoded data
+  unsigned char* history = gc(malloc(HISTORY_SIZE));
+  // next free position in history[]
+  unsigned int pos = 0;
+
+  // dictionary compression is a recently introduced feature, just move its
+  // contents to the buffer
+  if (dictionary != NULL) {
+    // open dictionary
+    FILE* dict = fopen(dictionary, "rb");
+    if (!dict) unlz4error("cannot open dictionary");
+
+    // get dictionary's filesize
+    fseek(dict, 0, SEEK_END);
+    long dictSize = ftell(dict);
+    // only the last 64k are relevant
+    long relevant = dictSize < 65536 ? 0 : dictSize - 65536;
+    fseek(dict, relevant, SEEK_SET);
+    if (dictSize > 65536) dictSize = 65536;
+    // read it and store it at the end of the buffer
+    fread(history + HISTORY_SIZE - dictSize, 1, dictSize, dict);
+    fclose(dict);
+  }
+
+  // parse all blocks until blockSize == 0
+  while (1) {
+    // block size
+    unsigned int blockSize = getByte(userPtr);
+    blockSize |= (unsigned int)getByte(userPtr) << 8;
+    blockSize |= (unsigned int)getByte(userPtr) << 16;
+    blockSize |= (unsigned int)getByte(userPtr) << 24;
+
+    // highest bit set ?
+    unsigned char isCompressed = isLegacy || (blockSize & 0x80000000) == 0;
+    if (isModern) blockSize &= 0x7FFFFFFF;
+
+    // stop after last block
+    if (blockSize == 0) break;
+
+    if (isCompressed) {
+      // decompress block
+      unsigned int blockOffset = 0;
+      unsigned int numWritten = 0;
+      while (blockOffset < blockSize) {
+        // get a token
+        unsigned char token = getByte(userPtr);
+        blockOffset++;
+
+        // determine number of literals
+        unsigned int numLiterals = token >> 4;
+        if (numLiterals == 15) {
+          // number of literals length encoded in more than 1 byte
+          unsigned char current;
+          do {
+            current = getByte(userPtr);
+            numLiterals += current;
+            blockOffset++;
+          } while (current == 255);
+        }
+
+        blockOffset += numLiterals;
+
+        // copy all those literals
+        if (pos + numLiterals < HISTORY_SIZE) {
+          // fast loop
+          while (numLiterals-- > 0) history[pos++] = getByte(userPtr);
+        } else {
+          // slow loop
+          while (numLiterals-- > 0) {
+            history[pos++] = getByte(userPtr);
+
+            // flush output buffer
+            if (pos == HISTORY_SIZE) {
+              sendBytes(history, HISTORY_SIZE, userPtr);
+              numWritten += HISTORY_SIZE;
+              pos = 0;
+            }
+          }
+        }
+
+        // last token has only literals
+        if (blockOffset == blockSize) break;
+
+        // match distance is encoded in two bytes (little endian)
+        unsigned int delta = getByte(userPtr);
+        delta |= (unsigned int)getByte(userPtr) << 8;
+        // zero isn't allowed
+        if (delta == 0) unlz4error("invalid offset");
+        blockOffset += 2;
+
+        // match length (always >= 4, therefore length is stored minus 4)
+        unsigned int matchLength = 4 + (token & 0x0F);
+        if (matchLength == 4 + 0x0F) {
+          unsigned char current;
+          do  // match length encoded in more than 1 byte
+          {
+            current = getByte(userPtr);
+            matchLength += current;
+            blockOffset++;
+          } while (current == 255);
+        }
+
+        // copy match
+        unsigned int referencePos =
+            (pos >= delta) ? (pos - delta) : (HISTORY_SIZE + pos - delta);
+        // start and end within the current 64k block ?
+        if (pos + matchLength < HISTORY_SIZE &&
+            referencePos + matchLength < HISTORY_SIZE) {
+          // read/write continuous block (no wrap-around at the end of
+          // history[]) fast copy
+          if (pos >= referencePos + matchLength ||
+              referencePos >= pos + matchLength) {
+            // non-overlapping
+            memcpy(history + pos, history + referencePos, matchLength);
+            pos += matchLength;
+          } else {
+            // overlapping, slower byte-wise copy
+            while (matchLength-- > 0) history[pos++] = history[referencePos++];
+          }
+        } else {
+          // either read or write wraps around at the end of history[]
+          while (matchLength-- > 0) {
+            // copy single byte
+            history[pos++] = history[referencePos++];
+
+            // cannot write anymore ? => wrap around
+            if (pos == HISTORY_SIZE) {
+              // flush output buffer
+              sendBytes(history, HISTORY_SIZE, userPtr);
+              numWritten += HISTORY_SIZE;
+              pos = 0;
+            }
+            // wrap-around of read location
+            referencePos %= HISTORY_SIZE;
+          }
+        }
+      }
+
+      // all legacy blocks must be completely filled - except for the last one
+      if (isLegacy && numWritten + pos < 8 * 1024 * 1024) break;
+    } else {
+      // copy uncompressed data and add to history, too (if next block is
+      // compressed and some matches refer to this block)
+      while (blockSize-- > 0) {
+        // copy a byte ...
+        history[pos++] = getByte(userPtr);
+        // ... until buffer is full => send to output
+        if (pos == HISTORY_SIZE) {
+          sendBytes(history, HISTORY_SIZE, userPtr);
+          pos = 0;
+        }
+      }
+    }
+
+    if (hasBlockChecksum) {
+      // ignore checksum, skip 4 bytes
+      getByte(userPtr);
+      getByte(userPtr);
+      getByte(userPtr);
+      getByte(userPtr);
+    }
+  }
+
+  if (hasContentChecksum) {
+    // ignore checksum, skip 4 bytes
+    getByte(userPtr);
+    getByte(userPtr);
+    getByte(userPtr);
+    getByte(userPtr);
+  }
+
+  // flush output buffer
+  sendBytes(history, pos, userPtr);
+}
+
+/// old interface where getByte and sendBytes use global file handles
+void unlz4(GET_BYTE getByte, SEND_BYTES sendBytes, const char* dictionary) {
+  unlz4_userPtr(getByte, sendBytes, dictionary, NULL);
+}
+
+/// parse command-line
+int main(int argc, const char* argv[]) {
+  // default input/output streams
+  struct UserPtr user = {.in = stdin,
+                         .out = stdout,
+                         .pos = 0,  // initial input buffer is empty
+                         .available = 0};
+
+  const char* dictionary = NULL;
+
+  // first command-line parameter is our input filename / but ignore "-" which
+  // stands for STDIN
+  int parameter;
+  for (parameter = 1; parameter < argc; parameter++) {
+    const char* current = argv[parameter];
+    // dictionary
+    if (current[0] == '-' && current[1] == 'D') {
+      if (parameter + 1 >= argc) unlz4error("no dictionary filename found");
+      dictionary = argv[++parameter];
+      continue;
+    }
+
+    // filename
+    // read from STDIN, default behavior
+    if (current[0] != '-' && current[1] != '\0') {
+      // already have a filename - at most one filename is allowed (except for
+      // dictionary) ?
+      if (user.in != stdin)
+        unlz4error("can only decompress one file at a time");
+      // get handle
+      user.in = fopen(argv[1], "rb");
+      if (!user.in) unlz4error("file not found");
+    }
+  }
+
+  // and go !
+  unlz4_userPtr(getByteFromIn, sendBytesToOut, dictionary, &user);
+  return 0;
+}