mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-07-19 17:10:30 +00:00
Import C++ Standard Template Library
You can now use the hardest fastest and most dangerous language there is with Cosmopolitan. So far about 75% of LLVM libcxx has been added. A few breaking changes needed to be made to help this go smoothly. - Rename nothrow to dontthrow - Rename nodiscard to dontdiscard - Add some libm functions, e.g. lgamma, nan, etc. - Change intmax_t from int128 to int64 like everything else - Introduce %jjd formatting directive for int128_t - Introduce strtoi128(), strtou128(), etc. - Rename bsrmax() to bsr128() Some of the templates that should be working currently are std::vector, std::string, std::map, std::set, std::deque, etc.
This commit is contained in:
parent
5022f9e920
commit
868af3f950
286 changed files with 123987 additions and 507 deletions
356
third_party/smallz4/smallz4cat.c
vendored
Normal file
356
third_party/smallz4/smallz4cat.c
vendored
Normal file
|
@ -0,0 +1,356 @@
|
|||
/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set et ft=c ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||
│ │
|
||||
│ smallz4cat │
|
||||
│ Copyright (c) 2016-2019 Stephan Brumme. All rights reserved. │
|
||||
│ See https://create.stephan-brumme.com/smallz4/ │
|
||||
│ │
|
||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||
│ a copy of this software and associated documentation files (the │
|
||||
│ "Software"), to deal in the Software without restriction, including │
|
||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
||||
│ the following conditions: │
|
||||
│ │
|
||||
│ The above copyright notice and this permission notice shall be │
|
||||
│ included in all copies or substantial portions of the Software. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||
│ │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/mem/mem.h"
|
||||
#include "libc/runtime/gc.internal.h"
|
||||
#include "libc/stdio/stdio.h"
|
||||
|
||||
/**
|
||||
* @fileoverview shorter, more readable, albeit slower re-implementation
|
||||
* of lz4cat ( https://github.com/Cyan4973/xxHash )
|
||||
*
|
||||
* Limitations:
|
||||
*
|
||||
* - Skippable frames and legacy frames are not implemented (and most
|
||||
* likely never will)
|
||||
*
|
||||
* - Checksums are not verified (see https://create.stephan-brumme.com/xxhash/
|
||||
* for a simple implementation)
|
||||
*
|
||||
* Replace getByteFromIn() and sendToOut() by your own code if you need
|
||||
* in-memory LZ4 decompression. Corrupted data causes a call to
|
||||
* unlz4error().
|
||||
*/
|
||||
|
||||
#define HISTORY_SIZE 65536 // don't change
|
||||
#define READ_BUFFER_SIZE 1024 // change at will
|
||||
|
||||
static void unlz4error(const char* msg) {
|
||||
fputs("ERROR: ", stderr);
|
||||
fputs(msg, stderr);
|
||||
fputc('\n', stderr);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
typedef unsigned char (*GET_BYTE)(void*);
|
||||
typedef void (*SEND_BYTES)(const unsigned char*, unsigned int, void*);
|
||||
|
||||
struct UserPtr {
|
||||
// file handles
|
||||
FILE* in;
|
||||
FILE* out;
|
||||
unsigned char readBuffer[READ_BUFFER_SIZE];
|
||||
unsigned int pos;
|
||||
unsigned int available;
|
||||
};
|
||||
|
||||
/// read a single byte (with simple buffering)
|
||||
static unsigned char getByteFromIn(void* userPtr) {
|
||||
struct UserPtr* user = (struct UserPtr*)userPtr;
|
||||
if (user->pos == user->available) {
|
||||
user->pos = 0;
|
||||
user->available = fread(user->readBuffer, 1, READ_BUFFER_SIZE, user->in);
|
||||
if (user->available == 0) unlz4error("out of data");
|
||||
}
|
||||
return user->readBuffer[user->pos++];
|
||||
}
|
||||
|
||||
/// write a block of bytes
|
||||
static void sendBytesToOut(const unsigned char* data, unsigned int numBytes,
|
||||
void* userPtr) {
|
||||
/// cast user-specific data
|
||||
struct UserPtr* user = (struct UserPtr*)userPtr;
|
||||
if (data != NULL && numBytes > 0) fwrite(data, 1, numBytes, user->out);
|
||||
}
|
||||
|
||||
/// decompress everything in input stream (accessed via getByte) and write to
|
||||
/// output stream (via sendBytes)
|
||||
void unlz4_userPtr(GET_BYTE getByte, SEND_BYTES sendBytes,
|
||||
const char* dictionary, void* userPtr) {
|
||||
// signature
|
||||
unsigned char signature1 = getByte(userPtr);
|
||||
unsigned char signature2 = getByte(userPtr);
|
||||
unsigned char signature3 = getByte(userPtr);
|
||||
unsigned char signature4 = getByte(userPtr);
|
||||
unsigned int signature =
|
||||
(signature4 << 24) | (signature3 << 16) | (signature2 << 8) | signature1;
|
||||
unsigned char isModern = (signature == 0x184D2204);
|
||||
unsigned char isLegacy = (signature == 0x184C2102);
|
||||
if (!isModern && !isLegacy) unlz4error("invalid signature");
|
||||
|
||||
unsigned char hasBlockChecksum = false;
|
||||
unsigned char hasContentSize = false;
|
||||
unsigned char hasContentChecksum = false;
|
||||
unsigned char hasDictionaryID = false;
|
||||
if (isModern) {
|
||||
// flags
|
||||
unsigned char flags = getByte(userPtr);
|
||||
hasBlockChecksum = flags & 16;
|
||||
hasContentSize = flags & 8;
|
||||
hasContentChecksum = flags & 4;
|
||||
hasDictionaryID = flags & 1;
|
||||
|
||||
// only version 1 file format
|
||||
unsigned char version = flags >> 6;
|
||||
if (version != 1) unlz4error("only LZ4 file format version 1 supported");
|
||||
|
||||
// ignore blocksize
|
||||
char numIgnore = 1;
|
||||
|
||||
// ignore, skip 8 bytes
|
||||
if (hasContentSize) numIgnore += 8;
|
||||
// ignore, skip 4 bytes
|
||||
if (hasDictionaryID) numIgnore += 4;
|
||||
|
||||
// ignore header checksum (xxhash32 of everything up this point & 0xFF)
|
||||
numIgnore++;
|
||||
|
||||
// skip all those ignored bytes
|
||||
while (numIgnore--) getByte(userPtr);
|
||||
}
|
||||
|
||||
// contains the latest decoded data
|
||||
unsigned char* history = gc(malloc(HISTORY_SIZE));
|
||||
// next free position in history[]
|
||||
unsigned int pos = 0;
|
||||
|
||||
// dictionary compression is a recently introduced feature, just move its
|
||||
// contents to the buffer
|
||||
if (dictionary != NULL) {
|
||||
// open dictionary
|
||||
FILE* dict = fopen(dictionary, "rb");
|
||||
if (!dict) unlz4error("cannot open dictionary");
|
||||
|
||||
// get dictionary's filesize
|
||||
fseek(dict, 0, SEEK_END);
|
||||
long dictSize = ftell(dict);
|
||||
// only the last 64k are relevant
|
||||
long relevant = dictSize < 65536 ? 0 : dictSize - 65536;
|
||||
fseek(dict, relevant, SEEK_SET);
|
||||
if (dictSize > 65536) dictSize = 65536;
|
||||
// read it and store it at the end of the buffer
|
||||
fread(history + HISTORY_SIZE - dictSize, 1, dictSize, dict);
|
||||
fclose(dict);
|
||||
}
|
||||
|
||||
// parse all blocks until blockSize == 0
|
||||
while (1) {
|
||||
// block size
|
||||
unsigned int blockSize = getByte(userPtr);
|
||||
blockSize |= (unsigned int)getByte(userPtr) << 8;
|
||||
blockSize |= (unsigned int)getByte(userPtr) << 16;
|
||||
blockSize |= (unsigned int)getByte(userPtr) << 24;
|
||||
|
||||
// highest bit set ?
|
||||
unsigned char isCompressed = isLegacy || (blockSize & 0x80000000) == 0;
|
||||
if (isModern) blockSize &= 0x7FFFFFFF;
|
||||
|
||||
// stop after last block
|
||||
if (blockSize == 0) break;
|
||||
|
||||
if (isCompressed) {
|
||||
// decompress block
|
||||
unsigned int blockOffset = 0;
|
||||
unsigned int numWritten = 0;
|
||||
while (blockOffset < blockSize) {
|
||||
// get a token
|
||||
unsigned char token = getByte(userPtr);
|
||||
blockOffset++;
|
||||
|
||||
// determine number of literals
|
||||
unsigned int numLiterals = token >> 4;
|
||||
if (numLiterals == 15) {
|
||||
// number of literals length encoded in more than 1 byte
|
||||
unsigned char current;
|
||||
do {
|
||||
current = getByte(userPtr);
|
||||
numLiterals += current;
|
||||
blockOffset++;
|
||||
} while (current == 255);
|
||||
}
|
||||
|
||||
blockOffset += numLiterals;
|
||||
|
||||
// copy all those literals
|
||||
if (pos + numLiterals < HISTORY_SIZE) {
|
||||
// fast loop
|
||||
while (numLiterals-- > 0) history[pos++] = getByte(userPtr);
|
||||
} else {
|
||||
// slow loop
|
||||
while (numLiterals-- > 0) {
|
||||
history[pos++] = getByte(userPtr);
|
||||
|
||||
// flush output buffer
|
||||
if (pos == HISTORY_SIZE) {
|
||||
sendBytes(history, HISTORY_SIZE, userPtr);
|
||||
numWritten += HISTORY_SIZE;
|
||||
pos = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// last token has only literals
|
||||
if (blockOffset == blockSize) break;
|
||||
|
||||
// match distance is encoded in two bytes (little endian)
|
||||
unsigned int delta = getByte(userPtr);
|
||||
delta |= (unsigned int)getByte(userPtr) << 8;
|
||||
// zero isn't allowed
|
||||
if (delta == 0) unlz4error("invalid offset");
|
||||
blockOffset += 2;
|
||||
|
||||
// match length (always >= 4, therefore length is stored minus 4)
|
||||
unsigned int matchLength = 4 + (token & 0x0F);
|
||||
if (matchLength == 4 + 0x0F) {
|
||||
unsigned char current;
|
||||
do // match length encoded in more than 1 byte
|
||||
{
|
||||
current = getByte(userPtr);
|
||||
matchLength += current;
|
||||
blockOffset++;
|
||||
} while (current == 255);
|
||||
}
|
||||
|
||||
// copy match
|
||||
unsigned int referencePos =
|
||||
(pos >= delta) ? (pos - delta) : (HISTORY_SIZE + pos - delta);
|
||||
// start and end within the current 64k block ?
|
||||
if (pos + matchLength < HISTORY_SIZE &&
|
||||
referencePos + matchLength < HISTORY_SIZE) {
|
||||
// read/write continuous block (no wrap-around at the end of
|
||||
// history[]) fast copy
|
||||
if (pos >= referencePos + matchLength ||
|
||||
referencePos >= pos + matchLength) {
|
||||
// non-overlapping
|
||||
memcpy(history + pos, history + referencePos, matchLength);
|
||||
pos += matchLength;
|
||||
} else {
|
||||
// overlapping, slower byte-wise copy
|
||||
while (matchLength-- > 0) history[pos++] = history[referencePos++];
|
||||
}
|
||||
} else {
|
||||
// either read or write wraps around at the end of history[]
|
||||
while (matchLength-- > 0) {
|
||||
// copy single byte
|
||||
history[pos++] = history[referencePos++];
|
||||
|
||||
// cannot write anymore ? => wrap around
|
||||
if (pos == HISTORY_SIZE) {
|
||||
// flush output buffer
|
||||
sendBytes(history, HISTORY_SIZE, userPtr);
|
||||
numWritten += HISTORY_SIZE;
|
||||
pos = 0;
|
||||
}
|
||||
// wrap-around of read location
|
||||
referencePos %= HISTORY_SIZE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// all legacy blocks must be completely filled - except for the last one
|
||||
if (isLegacy && numWritten + pos < 8 * 1024 * 1024) break;
|
||||
} else {
|
||||
// copy uncompressed data and add to history, too (if next block is
|
||||
// compressed and some matches refer to this block)
|
||||
while (blockSize-- > 0) {
|
||||
// copy a byte ...
|
||||
history[pos++] = getByte(userPtr);
|
||||
// ... until buffer is full => send to output
|
||||
if (pos == HISTORY_SIZE) {
|
||||
sendBytes(history, HISTORY_SIZE, userPtr);
|
||||
pos = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (hasBlockChecksum) {
|
||||
// ignore checksum, skip 4 bytes
|
||||
getByte(userPtr);
|
||||
getByte(userPtr);
|
||||
getByte(userPtr);
|
||||
getByte(userPtr);
|
||||
}
|
||||
}
|
||||
|
||||
if (hasContentChecksum) {
|
||||
// ignore checksum, skip 4 bytes
|
||||
getByte(userPtr);
|
||||
getByte(userPtr);
|
||||
getByte(userPtr);
|
||||
getByte(userPtr);
|
||||
}
|
||||
|
||||
// flush output buffer
|
||||
sendBytes(history, pos, userPtr);
|
||||
}
|
||||
|
||||
/// old interface where getByte and sendBytes use global file handles
|
||||
void unlz4(GET_BYTE getByte, SEND_BYTES sendBytes, const char* dictionary) {
|
||||
unlz4_userPtr(getByte, sendBytes, dictionary, NULL);
|
||||
}
|
||||
|
||||
/// parse command-line
|
||||
int main(int argc, const char* argv[]) {
|
||||
// default input/output streams
|
||||
struct UserPtr user = {.in = stdin,
|
||||
.out = stdout,
|
||||
.pos = 0, // initial input buffer is empty
|
||||
.available = 0};
|
||||
|
||||
const char* dictionary = NULL;
|
||||
|
||||
// first command-line parameter is our input filename / but ignore "-" which
|
||||
// stands for STDIN
|
||||
int parameter;
|
||||
for (parameter = 1; parameter < argc; parameter++) {
|
||||
const char* current = argv[parameter];
|
||||
// dictionary
|
||||
if (current[0] == '-' && current[1] == 'D') {
|
||||
if (parameter + 1 >= argc) unlz4error("no dictionary filename found");
|
||||
dictionary = argv[++parameter];
|
||||
continue;
|
||||
}
|
||||
|
||||
// filename
|
||||
// read from STDIN, default behavior
|
||||
if (current[0] != '-' && current[1] != '\0') {
|
||||
// already have a filename - at most one filename is allowed (except for
|
||||
// dictionary) ?
|
||||
if (user.in != stdin)
|
||||
unlz4error("can only decompress one file at a time");
|
||||
// get handle
|
||||
user.in = fopen(argv[1], "rb");
|
||||
if (!user.in) unlz4error("file not found");
|
||||
}
|
||||
}
|
||||
|
||||
// and go !
|
||||
unlz4_userPtr(getByteFromIn, sendBytesToOut, dictionary, &user);
|
||||
return 0;
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue