cosmopolitan/third_party/smallz4/smallz4.cc
Justine Tunney 868af3f950 Import C++ Standard Template Library
You can now use the hardest fastest and most dangerous language there is
with Cosmopolitan. So far about 75% of LLVM libcxx has been added. A few
breaking changes needed to be made to help this go smoothly.

- Rename nothrow to dontthrow
- Rename nodiscard to dontdiscard
- Add some libm functions, e.g. lgamma, nan, etc.
- Change intmax_t from int128 to int64 like everything else
- Introduce %jjd formatting directive for int128_t
- Introduce strtoi128(), strtou128(), etc.
- Rename bsrmax() to bsr128()

Some of the templates that should be working currently are std::vector,
std::string, std::map, std::set, std::deque, etc.
2022-03-22 06:41:54 -07:00

314 lines
11 KiB
C++

/*-*- mode:c++;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│
│vi: set et ft=c ts=8 tw=8 fenc=utf-8 :vi│
╚──────────────────────────────────────────────────────────────────────────────╝
│ │
│ smallz4 │
│ Copyright (c) 2016-2019 Stephan Brumme. All rights reserved. │
│ See https://create.stephan-brumme.com/smallz4/ │
│ │
│ Permission is hereby granted, free of charge, to any person obtaining │
│ a copy of this software and associated documentation files (the │
│ "Software"), to deal in the Software without restriction, including │
│ without limitation the rights to use, copy, modify, merge, publish, │
│ distribute, sublicense, and/or sell copies of the Software, and to │
│ permit persons to whom the Software is furnished to do so, subject to │
│ the following conditions: │
│ │
│ The above copyright notice and this permission notice shall be │
│ included in all copies or substantial portions of the Software. │
│ │
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
│ │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "libc/calls/calls.h"
#include "libc/calls/weirdtypes.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/stdio.h"
#include "libc/time/time.h"
#include "third_party/smallz4/smallz4.hh"
/// error handler
static void error(const char* msg, int code = 1) {
fprintf(stderr, "ERROR: %s\n", msg);
exit(code);
}
// ==================== user-specific I/O INTERFACE ====================
struct UserPtr {
// file handles
FILE* in;
FILE* out;
// the attributes below are just needed for verbose output
bool verbose;
uint64_t numBytesIn;
uint64_t numBytesOut;
uint64_t totalSize;
time_t starttime;
};
/// read several bytes and store at "data", return number of actually read bytes
/// (return only zero if end of data reached)
size_t getBytesFromIn(void* data, size_t numBytes, void* userPtr) {
/// cast user-specific data
UserPtr* user = (UserPtr*)userPtr;
if (data && numBytes > 0) {
size_t actual = fread(data, 1, numBytes, user->in);
user->numBytesIn += actual;
return actual;
}
return 0;
}
/// show verbose info on STDERR
void verbose(const UserPtr& user) {
if (!user.verbose) return;
if (user.numBytesIn == 0) return;
// elapsed and estimated time in seconds
int duration = int(time(NULL) - user.starttime);
if (duration == 0) return;
int estimated = int(duration * user.totalSize / user.numBytesIn);
// display on STDERR
fprintf(stderr, "\r%lld bytes => %lld bytes (%d%%", user.numBytesIn,
user.numBytesOut, 100 * user.numBytesOut / user.numBytesIn);
if (estimated > 0) fprintf(stderr, ", %d%% done", 100 * duration / estimated);
fprintf(stderr, "), after %d seconds @ %d kByte/s", duration,
duration > 0 ? (user.numBytesIn / duration) / 1024 : 0);
if (estimated > 0)
fprintf(stderr, ", about %d seconds left ", estimated - duration);
}
/// write a block of bytes
void sendBytesToOut(const void* data, size_t numBytes, void* userPtr) {
/// cast user-specific data
UserPtr* user = (UserPtr*)userPtr;
if (data && numBytes > 0) {
fwrite(data, 1, numBytes, user->out);
user->numBytesOut += numBytes;
if (user->verbose) verbose(*user);
}
}
// ==================== COMMAND-LINE HANDLING ====================
// show simple help
static void showHelp(const char* program) {
printf(
"smalLZ4 %s%s: compressor with optimal parsing, fully compatible with "
"LZ4 by Yann Collet (see https://lz4.org)\n"
"\n"
"Basic usage:\n"
" %s [flags] [input] [output]\n"
"\n"
"This program writes to STDOUT if output isn't specified\n"
"and reads from STDIN if input isn't specified, either.\n"
"\n"
"Examples:\n"
" %s < abc.txt > abc.txt.lz4 # use STDIN and STDOUT\n"
" %s abc.txt > abc.txt.lz4 # read from file and write to STDOUT\n"
" %s abc.txt abc.txt.lz4 # read from and write to file\n"
" cat abc.txt | %s - abc.txt.lz4 # read from STDIN and write to file\n"
" %s -6 abc.txt abc.txt.lz4 # compression level 6 (instead of "
"default 9)\n"
" %s -f abc.txt abc.txt.lz4 # overwrite an existing file\n"
" %s -f7 abc.txt abc.txt.lz4 # compression level 7 and overwrite "
"an existing file\n"
"\n"
"Flags:\n"
" -0, -1 ... -9 Set compression level, default: 9 (see below)\n"
" -h Display this help message\n"
" -f Overwrite an existing file\n"
" -l Use LZ4 legacy file format\n"
" -D [FILE] Load dictionary\n"
" -v Verbose\n"
"\n"
"Compression levels:\n"
" -0 No compression\n"
" -1 ... -%d Greedy search, check 1 to %d matches\n"
" -%d ... -8 Lazy matching with optimal parsing, check %d to 8 "
"matches\n"
" -9 Optimal parsing, check all possible matches "
"(default)\n"
"\n"
"Written in 2016-2020 by Stephan Brumme "
"https://create.stephan-brumme.com/smallz4/\n",
smallz4::getVersion(), "", program, program, program, program, program,
program, program, program, smallz4::ShortChainsGreedy,
smallz4::ShortChainsGreedy, smallz4::ShortChainsGreedy + 1,
smallz4::ShortChainsGreedy + 1);
}
/// parse command-line
int main(int argc, const char* argv[]) {
// show help if no parameters and stdin isn't a pipe
if (argc == 1 && isatty(fileno(stdin)) != 0) {
showHelp(argv[0]);
return 0;
}
unsigned short maxChainLength =
65535; // "unlimited" because search window contains only 2^16 bytes
// overwrite output ?
bool overwrite = false;
// legacy format ? (not recommended, but smaller files if input < 8 MB)
bool useLegacy = false;
// preload dictionary from disk
const char* dictionary = NULL;
// default input/output streams
UserPtr user;
user.in = stdin;
user.out = stdout;
user.verbose = false;
user.numBytesIn = 0;
user.numBytesOut = 0;
user.totalSize = 0;
// parse flags
int nextArgument = 1;
bool skipArgument = false;
while (argc > nextArgument && argv[nextArgument][0] == '-') {
int argPos = 1;
while (argv[nextArgument][argPos] != '\0') {
switch (argv[nextArgument][argPos++]) {
// show help
case 'h':
showHelp(argv[0]);
return 0;
// force overwrite
case 'f':
overwrite = true;
break;
// old LZ4 format
case 'l':
useLegacy = true;
break;
// use dictionary
case 'D':
if (nextArgument + 1 >= argc) error("no dictionary filename found");
dictionary =
argv[nextArgument +
1]; // TODO: any flag immediately after -D causes an error
skipArgument = true;
break;
// display some info on STDERR while compressing
case 'v':
user.verbose = true;
break;
// set compression level
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
maxChainLength =
argv[nextArgument][1] - '0'; // "0" => 0, "1" => 1, ..., "8" => 8
break;
// unlimited hash chain length
case '9':
// default maxChainLength is already "unlimited"
break;
default:
error("unknown flag");
}
}
nextArgument++;
if (skipArgument) nextArgument++;
}
// input file is given as first parameter or stdin if no parameter is given
// (or "-")
if (argc > nextArgument && argv[nextArgument][0] != '-') {
user.in = fopen(argv[nextArgument], "rb");
if (!user.in) error("file not found");
nextArgument++;
}
// output file is given as second parameter or stdout if no parameter is given
// (or "-")
if (argc == nextArgument + 1 && argv[nextArgument][0] != '-') {
// check if file already exists
if (!overwrite && fopen(argv[nextArgument], "rb"))
error("output file already exists");
user.out = fopen(argv[nextArgument], "wb");
if (!user.out) error("cannot create file");
}
// basic check of legacy format's restrictions
if (useLegacy) {
if (dictionary != 0) error("legacy format doesn't support dictionaries");
if (maxChainLength == 0)
error("legacy format doesn't support uncompressed files");
}
// load dictionary
std::vector<unsigned char> preload;
if (dictionary != NULL) {
// open dictionary
FILE* dict = fopen(dictionary, "rb");
if (!dict) error("cannot open dictionary");
// get dictionary's filesize
fseek(dict, 0, SEEK_END);
size_t dictSize = ftell(dict);
// only the last 64k are relevant
const size_t Last64k = 65536;
size_t relevant = dictSize < Last64k ? 0 : dictSize - Last64k;
fseek(dict, (long)relevant, SEEK_SET);
if (dictSize > Last64k) dictSize = Last64k;
// read those bytes
preload.resize(dictSize);
fread(&preload[0], 1, dictSize, dict);
fclose(dict);
}
if (user.verbose) {
if (user.in != stdin) {
fseek(user.in, 0, SEEK_END);
user.totalSize = ftell(user.in);
fseek(user.in, 0, SEEK_SET);
}
user.starttime = time(NULL);
}
// and go !
smallz4::lz4(getBytesFromIn, sendBytesToOut, maxChainLength, preload,
useLegacy, &user);
if (user.verbose && user.numBytesIn > 0)
fprintf(stderr,
"\r%lld bytes => %lld bytes (%d%%) after %d seconds "
" \n",
user.numBytesIn, user.numBytesOut,
100 * user.numBytesOut / user.numBytesIn,
int(time(NULL) - user.starttime));
return 0;
}