cosmopolitan/third_party/smallz4/smallz4.hh
Justine Tunney db0d8dd806 Support Linux binfmt_misc and APE loading on Apple
The "no modify self" variant of Actually Portable Executable is now
supported on all platforms. If you use `$(APE_NO_MODIFY_SELF)` then
ld.bfd will embed a 4096 byte ELF binary and a 4096 byte Macho file
which are installed on the fly to ${TMPDIR:-/tmp}, which enables us
launch the executable, without needing to copy the whole executable

To prevent it from copying a tiny executable to your temp directory
you need to install the `ape` command (renamed from ape-loader), to
a system path. For example:

    # FreeBSD / NetBSD / OpenBSD
    make -j8 o//ape/ape
    cp o//ape/ape /usr/bin/ape

    # Mac OS
    # make -j8 o//ape/ape.macho
    curl https://justine.lol/ape.macho >/usr/bin/ape
    chmod +x /usr/bin/ape

On Linux you can get even more performance with the new binfmt_misc
support which makes launching non-modifying APE binaries as fast as
launching ELF executables. Running the following command:

    # Linux
    ape/apeinstall.sh

Will copy APE loader to /usr/bin/ape and register with binfmt_misc
Lastly, this change also fixes a really interesting race condition
with OpenBSD thread joining.
2022-05-21 09:28:25 -07:00

807 lines
30 KiB
C++

#ifndef COSMOPOLITAN_THIRD_PARTY_SMALLZ4_SMALLZ4_H_
#define COSMOPOLITAN_THIRD_PARTY_SMALLZ4_SMALLZ4_H_
#include "libc/bits/bits.h"
#include "third_party/libcxx/vector"
/**
* LZ4 compression with optimal parsing
*
* See smallz4.cc for a basic I/O interface you can easily replace it by
* a in-memory version then all you have to do is:
*
* smallz4::lz4(GET_BYTES, SEND_BYTES);
*
* For more advanced stuff, you can call lz4 with up to four parameters
* (incl. max chain length and a dictionary)
*/
class smallz4 {
public:
// read several bytes, see getBytesFromIn() in smallz4.cpp for a basic
// implementation
typedef size_t (*GET_BYTES)(void* data, size_t numBytes, void* userPtr);
// write several bytes, see sendBytesToOut() in smallz4.cpp for a basic
// implementation
typedef void (*SEND_BYTES)(const void* data, size_t numBytes, void* userPtr);
/// compress everything in input stream (accessed via getByte) and write to
/// output stream (via send)
static void lz4(GET_BYTES getBytes, SEND_BYTES sendBytes,
unsigned short maxChainLength = MaxChainLength,
bool useLegacyFormat = false, void* userPtr = NULL) {
lz4(getBytes, sendBytes, maxChainLength, std::vector<unsigned char>(),
useLegacyFormat, userPtr);
}
/// compress everything in input stream (accessed via getByte) and write to
/// output stream (via send)
static void lz4(
GET_BYTES getBytes, SEND_BYTES sendBytes, unsigned short maxChainLength,
const std::vector<unsigned char>& dictionary, // predefined dictionary
bool useLegacyFormat =
false, // old format is 7 bytes smaller if input < 8 MB
void* userPtr = NULL) {
smallz4 obj(maxChainLength);
obj.compress(getBytes, sendBytes, dictionary, useLegacyFormat, userPtr);
}
/// version string
static const char* const getVersion() {
return "1.5";
}
// compression level thresholds, made public because I display them in the
// help screen ...
enum {
/// greedy mode for short chains (compression level <= 3) instead of optimal
/// parsing / lazy evaluation
ShortChainsGreedy = 3,
/// lazy evaluation for medium-sized chains (compression level > 3 and <= 6)
ShortChainsLazy = 6
};
// ----- END OF PUBLIC INTERFACE -----
private:
// ----- constants and types -----
/// a block can be up to 4 MB, so uint32_t would suffice but uint64_t is quite
/// a bit faster on my x64 machine
typedef uint64_t Length;
/// matches must start within the most recent 64k
typedef uint16_t Distance;
enum {
/// each match's length must be >= 4
MinMatch = 4,
/// a literal needs one byte
JustLiteral = 1,
/// last match must not be closer than 12 bytes to the end
BlockEndNoMatch = 12,
/// last 5 bytes must be literals, no matching allowed
BlockEndLiterals = 5,
/// match finder's hash table size (2^HashBits entries, must be less than
/// 32)
HashBits = 20,
HashSize = 1 << HashBits,
/// input buffer size, can be any number but zero ;-)
BufferSize = 1024,
/// maximum match distance, must be power of 2 minus 1
MaxDistance = 65535,
/// marker for "no match"
EndOfChain = 0,
/// stop match finding after MaxChainLength steps (default is unlimited =>
/// optimal parsing)
MaxChainLength = MaxDistance,
/// significantly speed up parsing if the same byte is repeated a lot, may
/// cause sub-optimal compression
MaxSameLetter = 19 + 255 * 256, // was: 19 + 255,
/// maximum block size as defined in LZ4 spec: {
/// 0,0,0,0,64*1024,256*1024,1024*1024,4*1024*1024 } I only work with the
/// biggest maximum block size (7)
// note: xxhash header checksum is precalculated only for 7, too
MaxBlockSizeId = 7,
MaxBlockSize = 4 * 1024 * 1024,
/// legacy format has a fixed block size of 8 MB
MaxBlockSizeLegacy = 8 * 1024 * 1024,
/// number of literals and match length is encoded in several bytes, max.
/// 255 per byte
MaxLengthCode = 255
};
// ----- one and only variable ... -----
/// how many matches are checked in findLongestMatch, lower values yield
/// faster encoding at the cost of worse compression ratio
unsigned short maxChainLength;
// ----- code -----
/// match
struct Match {
/// length of match
Length length;
/// start of match
Distance distance;
};
/// create new compressor (only invoked by lz4)
explicit smallz4(unsigned short newMaxChainLength = MaxChainLength)
: maxChainLength(newMaxChainLength) // => no limit, but can be changed by
// setMaxChainLength
{
}
/// return true, if the four bytes at *a and *b match
inline static bool match4(const void* const a, const void* const b) {
return READ32LE(a) == READ32LE(b);
}
/// simple hash function, input: 32 bits, output: HashBits bits (by default:
/// 20)
inline static uint32_t getHash32(uint32_t fourBytes) {
// taken from https://en.wikipedia.org/wiki/Linear_congruential_generator
const uint32_t HashMultiplier = 48271;
return ((fourBytes * HashMultiplier) >> (32 - HashBits)) & (HashSize - 1);
}
/// find longest match of data[pos] between data[begin] and data[end], use
/// match chain
Match findLongestMatch(const unsigned char* const data, uint64_t pos,
uint64_t begin, uint64_t end,
const Distance* const chain) const {
Match result;
result.length = JustLiteral; // assume a literal => one byte
// compression level: look only at the first n entries of the match chain
unsigned short stepsLeft = maxChainLength;
// findLongestMatch() shouldn't be called when maxChainLength = 0
// (uncompressed)
// pointer to position that is currently analyzed (which we try to find a
// great match for)
const unsigned char* const current = data + pos - begin;
// don't match beyond this point
const unsigned char* const stop = current + end - pos;
// get distance to previous match, abort if 0 => not existing
Distance distance = chain[pos & MaxDistance];
int64_t totalDistance = 0;
while (distance != EndOfChain) {
// chain goes too far back ?
totalDistance += distance;
if (totalDistance > MaxDistance) break; // can't match beyond 64k
// prepare next position
distance = chain[(pos - totalDistance) & MaxDistance];
// let's introduce a new pointer atLeast that points to the first "new"
// byte of a potential longer match
const unsigned char* const atLeast = current + result.length + 1;
// impossible to find a longer match because not enough bytes left ?
if (atLeast > stop) break;
// the idea is to split the comparison algorithm into 2 phases
// (1) scan backward from atLeast to current, abort if mismatch
// (2) scan forward until a mismatch is found and store length/distance
// of this new best match current atLeast
// | |
// -<<<<<<<< phase 1 <<<<<<<<
// >>> phase 2 >>>
// main reason for phase 1:
// - both byte sequences start with the same bytes, quite likely they are
// very similar
// - there is a good chance that if they differ, then their last bytes
// differ
// => checking the last first increases the probability that a mismatch is
// detected as early as possible
// compare 4 bytes at once
const Length CheckAtOnce = 4;
// all bytes between current and atLeast shall be identical
const unsigned char* phase1 =
atLeast - CheckAtOnce; // minus 4 because match4 checks 4 bytes
while (phase1 > current && match4(phase1, phase1 - totalDistance))
phase1 -= CheckAtOnce;
// note: - the first four bytes always match
// - in the last iteration, phase1 points either at current + 1 or
// current + 2 or current + 3
// - therefore we compare a few bytes twice => but a check to skip
// these checks is more expensive
// mismatch ? (the while-loop was aborted)
if (phase1 > current) continue;
// we have a new best match, now scan forward
const unsigned char* phase2 = atLeast;
// fast loop: check four bytes at once
while (phase2 + CheckAtOnce <= stop &&
match4(phase2, phase2 - totalDistance))
phase2 += CheckAtOnce;
// slow loop: check the last 1/2/3 bytes
while (phase2 < stop && *phase2 == *(phase2 - totalDistance)) phase2++;
// store new best match
result.distance = Distance(totalDistance);
result.length = Length(phase2 - current);
// stop searching on lower compression levels
if (--stepsLeft == 0) break;
}
return result;
}
/// create shortest output
/** data points to block's begin; we need it to extract literals **/
static std::vector<unsigned char> selectBestMatches(
const std::vector<Match>& matches, const unsigned char* const data) {
// store encoded data
std::vector<unsigned char> result;
result.reserve(matches.size());
// indices of current run of literals
size_t literalsFrom = 0;
size_t numLiterals = 0;
bool lastToken = false;
// walk through the whole block
for (size_t offset = 0;
offset < matches.size();) // increment inside of loop
{
// get best cost-weighted match
Match match = matches[offset];
// if no match, then count literals instead
if (match.length <= JustLiteral) {
// first literal ? need to reset pointers of current sequence of
// literals
if (numLiterals == 0) literalsFrom = offset;
// add one more literal to current sequence
numLiterals++;
// next match
offset++;
// continue unless it's the last literal
if (offset < matches.size()) continue;
lastToken = true;
} else {
// skip unused matches
offset += match.length;
}
// store match length (4 is implied because it's the minimum match length)
int matchLength = int(match.length) - MinMatch;
// last token has zero length
if (lastToken) matchLength = 0;
// token consists of match length and number of literals, let's start with
// match length ...
unsigned char token =
(matchLength < 15) ? (unsigned char)matchLength : 15;
// >= 15 literals ? (extra bytes to store length)
if (numLiterals < 15) {
// add number of literals in higher four bits
token |= numLiterals << 4;
result.push_back(token);
} else {
// set all higher four bits, the following bytes with determine the
// exact number of literals
result.push_back(token | 0xF0);
// 15 is already encoded in token
int encodeNumLiterals = int(numLiterals) - 15;
// emit 255 until remainder is below 255
while (encodeNumLiterals >= MaxLengthCode) {
result.push_back(MaxLengthCode);
encodeNumLiterals -= MaxLengthCode;
}
// and the last byte (can be zero, too)
result.push_back((unsigned char)encodeNumLiterals);
}
// copy literals
if (numLiterals > 0) {
result.insert(result.end(), data + literalsFrom,
data + literalsFrom + numLiterals);
// last token doesn't have a match
if (lastToken) break;
// reset
numLiterals = 0;
}
// distance stored in 16 bits / little endian
result.push_back(match.distance & 0xFF);
result.push_back(match.distance >> 8);
// >= 15+4 bytes matched
if (matchLength >= 15) {
// 15 is already encoded in token
matchLength -= 15;
// emit 255 until remainder is below 255
while (matchLength >= MaxLengthCode) {
result.push_back(MaxLengthCode);
matchLength -= MaxLengthCode;
}
// and the last byte (can be zero, too)
result.push_back((unsigned char)matchLength);
}
}
return result;
}
/// walk backwards through all matches and compute number of compressed bytes
/// from current position to the end of the block
/** note: matches are modified (shortened length) if necessary **/
static void estimateCosts(std::vector<Match>& matches) {
const size_t blockEnd = matches.size();
// equals the number of bytes after compression
typedef uint32_t Cost;
// minimum cost from this position to the end of the current block
std::vector<Cost> cost(matches.size(), 0);
// "cost" represents the number of bytes needed
// the last bytes must always be literals
Length numLiterals = BlockEndLiterals;
// backwards optimal parsing
for (int64_t i = (int64_t)blockEnd - (1 + BlockEndLiterals); i >= 0;
i--) // ignore the last 5 bytes, they are always literals
{
// if encoded as a literal
numLiterals++;
Length bestLength = JustLiteral;
// such a literal "costs" 1 byte
Cost minCost = cost[i + 1] + JustLiteral;
// an extra length byte is required for every 255 literals
if (numLiterals >= 15) {
// same as: if ((numLiterals - 15) % MaxLengthCode == 0)
// but I try hard to avoid the slow modulo function
if (numLiterals == 15 || (numLiterals >= 15 + MaxLengthCode &&
(numLiterals - 15) % MaxLengthCode == 0))
minCost++;
}
// let's look at the longest match, almost always more efficient that the
// plain literals
Match match = matches[i];
// very long self-referencing matches can slow down the program A LOT
if (match.length >= MaxSameLetter && match.distance == 1) {
// assume that longest match is always the best match
// NOTE: this assumption might not be optimal !
bestLength = match.length;
minCost =
cost[i + match.length] + 1 + 2 + 1 + Cost(match.length - 19) / 255;
} else {
// this is the core optimization loop
// overhead of encoding a match: token (1 byte) + offset (2 bytes) +
// sometimes extra bytes for long matches
Cost extraCost = 1 + 2;
Length nextCostIncrease = 18; // need one more byte for 19+ long
// matches (next increase: 19+255*x)
// try all match lengths (start with short ones)
for (Length length = MinMatch; length <= match.length; length++) {
// token (1 byte) + offset (2 bytes) + extra bytes for long matches
Cost currentCost = cost[i + length] + extraCost;
// better choice ?
if (currentCost <= minCost) {
// regarding the if-condition:
// "<" prefers literals and shorter matches
// "<=" prefers longer matches
// they should produce the same number of bytes (because of the same
// cost)
// ... but every now and then it doesn't !
// that's why: too many consecutive literals require an extra length
// byte (which we took into consideration a few lines above) but we
// only looked at literals beyond the current position if there are
// many literal in front of the current position then it may be
// better to emit a match with the same cost as the literals at the
// current position
// => it "breaks" the long chain of literals and removes the extra
// length byte
minCost = currentCost;
bestLength = length;
// performance-wise, a long match is usually faster during decoding
// than multiple short matches on the other hand, literals are
// faster than short matches as well (assuming same cost)
}
// very long matches need extra bytes for encoding match length
if (length == nextCostIncrease) {
extraCost++;
nextCostIncrease += MaxLengthCode;
}
}
}
// store lowest cost so far
cost[i] = minCost;
// and adjust best match
matches[i].length = bestLength;
// reset number of literals if a match was chosen
if (bestLength != JustLiteral) numLiterals = 0;
// note: if bestLength is smaller than the previous matches[i].length then
// there might be a closer match
// which could be more cache-friendly (=> faster decoding)
}
}
/// compress everything in input stream (accessed via getByte) and write to
/// output stream (via send), improve compression with a predefined dictionary
void compress(GET_BYTES getBytes, SEND_BYTES sendBytes,
const std::vector<unsigned char>& dictionary,
bool useLegacyFormat, void* userPtr) const {
// ==================== write header ====================
if (useLegacyFormat) {
// magic bytes
const unsigned char header[] = {0x02, 0x21, 0x4C, 0x18};
sendBytes(header, sizeof(header), userPtr);
} else {
// frame header
const unsigned char header[] = {
0x04, 0x22, 0x4D,
0x18, // magic bytes
1 << 6, // flags: no checksums, blocks depend on each other and no
// dictionary ID
MaxBlockSizeId << 4, // max blocksize
0xDF // header checksum (precomputed)
};
sendBytes(header, sizeof(header), userPtr);
}
// ==================== declarations ====================
// change read buffer size as you like
unsigned char buffer[BufferSize];
// read the file in chunks/blocks, data will contain only bytes which are
// relevant for the current block
std::vector<unsigned char> data;
// file position corresponding to data[0]
size_t dataZero = 0;
// last already read position
size_t numRead = 0;
// passthru data ? (but still wrap it in LZ4 format)
const bool uncompressed = (maxChainLength == 0);
// last time we saw a hash
const uint64_t NoLastHash = ~0; // = -1
std::vector<uint64_t> lastHash(HashSize, NoLastHash);
// previous position which starts with the same bytes
std::vector<Distance> previousHash(
MaxDistance + 1,
Distance(EndOfChain)); // long chains based on my simple hash
std::vector<Distance> previousExact(
MaxDistance + 1,
Distance(EndOfChain)); // shorter chains based on exact matching of the
// first four bytes
// these two containers are essential for match finding:
// 1. I compute a hash of four byte
// 2. in lastHash is the location of the most recent block of four byte with
// that same hash
// 3. due to hash collisions, several groups of four bytes may yield the
// same hash
// 4. so for each location I can look up the previous location of the same
// hash in previousHash
// 5. basically it's a chain of memory locations where potential matches
// start
// 5. I follow this hash chain until I find exactly the same four bytes I
// was looking for
// 6. then I switch to a sparser chain: previousExact
// 7. it's basically the same idea as previousHash but this time not the
// hash but the first four bytes must be identical
// 8. previousExact will be used by findLongestMatch: it compare all such
// strings a figures out which is the longest match
// And why do I have to do it in such a complicated way ?
// - well, there are 2^32 combinations of four bytes
// - so that there are 2^32 potential chains
// - most combinations just don't occur and occupy no space but I still have
// to keep their "entry point" (which are empty/invalid)
// - that would be at least 16 GBytes RAM (2^32 x 4 bytes)
// - my hashing algorithm reduces the 2^32 combinations to 2^20 hashes (see
// hashBits), that's about 8 MBytes RAM
// - thus only 2^20 entry points and at most 2^20 hash chains which is
// easily manageable
// ... in the end it's all about conserving memory !
// (total memory consumption of smallz4 is about 64 MBytes)
// first and last offset of a block (nextBlock is end-of-block plus 1)
uint64_t lastBlock = 0;
uint64_t nextBlock = 0;
bool parseDictionary = !dictionary.empty();
// main loop, processes one block per iteration
while (true) {
// ==================== start new block ====================
// first byte of the currently processed block (std::vector data may
// contain the last 64k of the previous block, too)
const unsigned char* dataBlock = NULL;
// prepend dictionary
if (parseDictionary) {
// resize dictionary to 64k (minus 1 because we can only match the last
// 65535 bytes of the dictionary => MaxDistance)
if (dictionary.size() < MaxDistance) {
// dictionary is smaller than 64k, prepend garbage data
size_t unused = MaxDistance - dictionary.size();
data.resize(unused, 0);
data.insert(data.end(), dictionary.begin(), dictionary.end());
} else
// copy only the most recent 64k of the dictionary
data.insert(data.end(),
dictionary.begin() + dictionary.size() - MaxDistance,
dictionary.end());
nextBlock = data.size();
numRead = data.size();
}
// read more bytes from input
size_t maxBlockSize = useLegacyFormat ? MaxBlockSizeLegacy : MaxBlockSize;
while (numRead - nextBlock < maxBlockSize) {
// buffer can be significantly smaller than MaxBlockSize, that's the
// only reason for this while-block
size_t incoming = getBytes(buffer, BufferSize, userPtr);
// no more data ?
if (incoming == 0) break;
// add bytes to buffer
numRead += incoming;
data.insert(data.end(), buffer, buffer + incoming);
}
// no more data ? => WE'RE DONE !
if (nextBlock == numRead) break;
// determine block borders
lastBlock = nextBlock;
nextBlock += maxBlockSize;
// not beyond end-of-file
if (nextBlock > numRead) nextBlock = numRead;
// pointer to first byte of the currently processed block (the std::vector
// container named data may contain the last 64k of the previous block,
// too)
dataBlock = &data[lastBlock - dataZero];
const uint64_t blockSize = nextBlock - lastBlock;
// ==================== full match finder ====================
// greedy mode is much faster but produces larger output
const bool isGreedy = (maxChainLength <= ShortChainsGreedy);
// lazy evaluation: if there is a match, then try running match finder on
// next position, too, but not after that
const bool isLazy = !isGreedy && (maxChainLength <= ShortChainsLazy);
// skip match finding on the next x bytes in greedy mode
Length skipMatches = 0;
// allow match finding on the next byte but skip afterwards (in lazy mode)
bool lazyEvaluation = false;
// the last literals of the previous block skipped matching, so they are
// missing from the hash chains
int64_t lookback = int64_t(dataZero);
if (lookback > BlockEndNoMatch && !parseDictionary)
lookback = BlockEndNoMatch;
if (parseDictionary) lookback = int64_t(dictionary.size());
// so let's go back a few bytes
lookback = -lookback;
// ... but not in legacy mode
if (useLegacyFormat || uncompressed) lookback = 0;
std::vector<Match> matches(uncompressed ? 0 : blockSize);
// find longest matches for each position (skip if level=0 which means
// "uncompressed")
int64_t i;
for (i = lookback;
i + BlockEndNoMatch <= int64_t(blockSize) && !uncompressed; i++) {
// detect self-matching
if (i > 0 && dataBlock[i] == dataBlock[i - 1]) {
Match prevMatch = matches[i - 1];
// predecessor had the same match ?
if (prevMatch.distance == 1 &&
prevMatch.length > MaxSameLetter) // TODO: handle very long
// self-referencing matches
{
// just copy predecessor without further (expensive) optimizations
matches[i].distance = 1;
matches[i].length = prevMatch.length - 1;
continue;
}
}
// read next four bytes
const uint32_t four = READ32LE(dataBlock + i);
// convert to a shorter hash
const uint32_t hash = getHash32(four);
// get most recent position of this hash
uint64_t lastHashMatch = lastHash[hash];
// and store current position
lastHash[hash] = i + lastBlock;
// remember: i could be negative, too
Distance prevIndex =
(i + MaxDistance + 1) &
MaxDistance; // actually the same as i & MaxDistance
// no predecessor / no hash chain available ?
if (lastHashMatch == NoLastHash) {
previousHash[prevIndex] = EndOfChain;
previousExact[prevIndex] = EndOfChain;
continue;
}
// most recent hash match too far away ?
uint64_t distance = lastHash[hash] - lastHashMatch;
if (distance > MaxDistance) {
previousHash[prevIndex] = EndOfChain;
previousExact[prevIndex] = EndOfChain;
continue;
}
// build hash chain, i.e. store distance to last pseudo-match
previousHash[prevIndex] = (Distance)distance;
// skip pseudo-matches (hash collisions) and build a second chain where
// the first four bytes must match exactly
uint32_t currentFour;
// check the hash chain
while (true) {
// read four bytes
currentFour = READ32LE(
&data[lastHashMatch - dataZero]); // match may be found in the
// previous block, too
// match chain found, first 4 bytes are identical
if (currentFour == four) break;
// prevent from accidently hopping on an old, wrong hash chain
if (hash != getHash32(currentFour)) break;
// try next pseudo-match
Distance next = previousHash[lastHashMatch & MaxDistance];
// end of the hash chain ?
if (next == EndOfChain) break;
// too far away ?
distance += next;
if (distance > MaxDistance) break;
// take another step along the hash chain ...
lastHashMatch -= next;
// closest match is out of range ?
if (lastHashMatch < dataZero) break;
}
// search aborted / failed ?
if (four != currentFour) {
// no matches for the first four bytes
previousExact[prevIndex] = EndOfChain;
continue;
}
// store distance to previous match
previousExact[prevIndex] = (Distance)distance;
// no matching if crossing block boundary, just update hash tables
if (i < 0) continue;
// skip match finding if in greedy mode
if (skipMatches > 0) {
skipMatches--;
if (!lazyEvaluation) continue;
lazyEvaluation = false;
}
// and after all that preparation ... finally look for the longest match
matches[i] = findLongestMatch(data.data(), i + lastBlock, dataZero,
nextBlock - BlockEndLiterals,
previousExact.data());
// no match finding needed for the next few bytes in greedy/lazy mode
if ((isLazy || isGreedy) && matches[i].length != JustLiteral) {
lazyEvaluation = (skipMatches == 0);
skipMatches = matches[i].length;
}
}
// last bytes are always literals
while (i < int(matches.size())) matches[i++].length = JustLiteral;
// dictionary is valid only to the first block
parseDictionary = false;
// ==================== estimate costs (number of compressed bytes)
// ====================
// not needed in greedy mode and/or very short blocks
if (matches.size() > BlockEndNoMatch &&
maxChainLength > ShortChainsGreedy)
estimateCosts(matches);
// ==================== select best matches ====================
std::vector<unsigned char> compressed =
selectBestMatches(matches, &data[lastBlock - dataZero]);
// ==================== output ====================
// did compression do harm ?
bool useCompression = compressed.size() < blockSize && !uncompressed;
// legacy format is always compressed
useCompression |= useLegacyFormat;
// block size
uint32_t numBytes =
uint32_t(useCompression ? compressed.size() : blockSize);
uint32_t numBytesTagged = numBytes | (useCompression ? 0 : 0x80000000);
unsigned char num1 = numBytesTagged & 0xFF;
sendBytes(&num1, 1, userPtr);
unsigned char num2 = (numBytesTagged >> 8) & 0xFF;
sendBytes(&num2, 1, userPtr);
unsigned char num3 = (numBytesTagged >> 16) & 0xFF;
sendBytes(&num3, 1, userPtr);
unsigned char num4 = (numBytesTagged >> 24) & 0xFF;
sendBytes(&num4, 1, userPtr);
if (useCompression)
sendBytes(compressed.data(), numBytes, userPtr);
else // uncompressed ? => copy input data
sendBytes(&data[lastBlock - dataZero], numBytes, userPtr);
// legacy format: no matching across blocks
if (useLegacyFormat) {
dataZero += data.size();
data.clear();
// clear hash tables
for (size_t i = 0; i < previousHash.size(); i++)
previousHash[i] = EndOfChain;
for (size_t i = 0; i < previousExact.size(); i++)
previousExact[i] = EndOfChain;
for (size_t i = 0; i < lastHash.size(); i++) lastHash[i] = NoLastHash;
} else {
// remove already processed data except for the last 64kb which could be
// used for intra-block matches
if (data.size() > MaxDistance) {
size_t remove = data.size() - MaxDistance;
dataZero += remove;
data.erase(data.begin(), data.begin() + remove);
}
}
}
// add an empty block
if (!useLegacyFormat) {
static const uint32_t zero = 0;
sendBytes(&zero, 4, userPtr);
}
}
};
#endif /* COSMOPOLITAN_THIRD_PARTY_SMALLZ4_SMALLZ4_H_ */