Add Compress() and Uncompress() to redbean

This commit is contained in:
Justine Tunney 2022-05-16 16:49:20 -07:00
parent 59b6ae1cbd
commit 8bfb70ca3f
9 changed files with 218 additions and 23 deletions

View file

@ -5,8 +5,9 @@ COSMOPOLITAN_C_START_
char *sleb64(char *, int64_t);
char *zleb64(char *, int64_t);
char *uleb64(char *, uint64_t);
char *uleb64(char[hasatleast 10], uint64_t);
int unzleb64(const char *, size_t, int64_t *);
int unuleb64(char *, size_t, uint64_t *);
#ifndef __STRICT_ANSI__
char *sleb128(char *, int128_t);

View file

@ -36,7 +36,7 @@
* @param x is number
* @return p + i
*/
char *uleb64(char *p, uint64_t x) {
char *uleb64(char p[hasatleast 10], uint64_t x) {
int c;
for (;;) {
c = x & 127;

41
libc/fmt/unuleb64.c Normal file
View file

@ -0,0 +1,41 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/fmt/leb128.h"
/**
* Decodes unsigned integer from array.
*
* @param p is input array
* @param n is capacity of p
* @param x receives number number
* @return bytes decoded or -1 on error
*/
int unuleb64(char *p, size_t n, uint64_t *x) {
int k;
size_t i;
uint64_t t;
for (k = t = i = 0; i < n; ++i, k += 7) {
t |= (uint64_t)(p[i] & 127) << k;
if (~p[i] & 128) {
*x = t;
return i + 1;
}
}
return -1;
}

View file

@ -0,0 +1,72 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/fmt/leb128.h"
#include "libc/limits.h"
#include "libc/stdio/stdio.h"
#include "libc/testlib/testlib.h"
char p[10];
uint64_t x;
TEST(uleb64, testZero) {
EXPECT_EQ(1, uleb64(p, 0) - p);
EXPECT_EQ(0, p[0]);
EXPECT_EQ(1, unuleb64(p, 10, &x));
EXPECT_EQ(0, x);
EXPECT_EQ(-1, unuleb64(p, 0, &x));
}
TEST(uleb64, testOne) {
EXPECT_EQ(1, uleb64(p, 1) - p);
EXPECT_EQ(1, p[0]);
EXPECT_EQ(1, unuleb64(p, 10, &x));
EXPECT_EQ(1, x);
}
TEST(uleb64, test255) {
EXPECT_EQ(2, uleb64(p, 255) - p);
EXPECT_EQ(255, p[0] & 255);
EXPECT_EQ(1, p[1]);
EXPECT_EQ(2, unuleb64(p, 10, &x));
EXPECT_EQ(255, x);
}
TEST(uleb64, testFFFF) {
EXPECT_EQ(3, uleb64(p, 0xFFFF) - p);
EXPECT_EQ(255, p[0] & 255);
EXPECT_EQ(255, p[1] & 255);
EXPECT_EQ(3, p[2] & 255);
}
TEST(uleb64, testMax) {
EXPECT_EQ(10, uleb64(p, UINT64_MAX) - p);
EXPECT_EQ(255, p[0x00] & 255);
EXPECT_EQ(255, p[0x01] & 255);
EXPECT_EQ(255, p[0x02] & 255);
EXPECT_EQ(255, p[0x03] & 255);
EXPECT_EQ(255, p[0x04] & 255);
EXPECT_EQ(255, p[0x05] & 255);
EXPECT_EQ(255, p[0x06] & 255);
EXPECT_EQ(255, p[0x07] & 255);
EXPECT_EQ(255, p[0x08] & 255);
EXPECT_EQ(001, p[0x09] & 255);
EXPECT_EQ(10, unuleb64(p, 10, &x));
EXPECT_EQ(UINT64_MAX, x);
EXPECT_EQ(-1, unuleb64(p, 7, &x));
}

View file

@ -1308,6 +1308,39 @@ FUNCTIONS
the density of information. Cryptographic random should be in
the ballpark of 7.9 whereas plaintext will be more like 4.5.
Compress(uncompdata:str[, level:int]) → compdata:str
Compresses data using DEFLATE algorithm. The compression
format here is defined to be quick and handy for things like
database fields. For example:
>: Compress('hello')
"\x05\x86\xa6\x106x\x9c\xcbH\xcd\xc9\xc9\x07\x00\x06,\x02\x15"
>: Uncompress(Compress('hello'))
"hello"
`level` is the compression level, which defaults to 7. The max
is 10. Lower numbers go faster. Higher numbers go slower, but
have better compression ratios.
[implementation details]
The binary wire format is defined as follows:
1. uleb64 uncompressed byte size (1 to 10 bytes)
2. uint32_t crc32 (4 bytes; zlib polynomial)
3. data (created by zlib compress function)
Uncompress(compdata:str) → uncompdata:str
Uncompresses data using DEFLATE algorithm. This applies the
inverse transform of the Compress() function. See its docs for
further details on usage and encoding.
This function throws exceptions in the event that the value
couldn't be decoded. There's a crc32 check to make our check
of validity iron-clad. It's implemented using Intel CLMUL so
it has ludicrous speed performance as well.
Benchmark(func[, count[, maxattempts]])
└─→ nanos:real, ticks:int, overhead-ticks:int, tries:int

View file

@ -17,10 +17,12 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "dsp/scale/cdecimate2xuint8x8.h"
#include "libc/bits/bits.h"
#include "libc/bits/popcnt.h"
#include "libc/calls/calls.h"
#include "libc/calls/struct/rusage.h"
#include "libc/fmt/itoa.h"
#include "libc/fmt/leb128.h"
#include "libc/intrin/kprintf.h"
#include "libc/log/check.h"
#include "libc/log/log.h"
@ -55,6 +57,7 @@
#include "third_party/mbedtls/sha1.h"
#include "third_party/mbedtls/sha256.h"
#include "third_party/mbedtls/sha512.h"
#include "third_party/zlib/zlib.h"
#include "tool/net/lfuncs.h"
static int Rdpid(void) {
@ -155,7 +158,7 @@ int LuaDecimate(lua_State *L) {
unsigned char *p;
s = luaL_checklstring(L, 1, &n);
m = ROUNDUP(n, 16);
p = xmalloc(m);
CHECK_NOTNULL((p = LuaAlloc(L, m)));
bzero(p + n, m - n);
cDecimate2xUint8x8(m, p, (signed char[8]){-1, -3, 3, 17, 17, 3, -3, -1});
lua_pushlstring(L, (char *)p, (n + 1) >> 1);
@ -405,7 +408,7 @@ int LuaGetRandomBytes(lua_State *L) {
luaL_argerror(L, 1, "not in range 1..256");
unreachable;
}
p = xmalloc(n);
CHECK_NOTNULL((p = LuaAlloc(L, n)));
CHECK_EQ(n, getrandom(p, n, 0));
lua_pushlstring(L, p, n);
free(p);
@ -678,3 +681,49 @@ int LuaBenchmark(lua_State *L) {
lua_pushinteger(L, attempts);
return 4;
}
int LuaCompress(lua_State *L) {
size_t n, m;
char *q, *e;
uint32_t crc;
const char *p;
int level, hdrlen;
p = luaL_checklstring(L, 1, &n);
level = luaL_optinteger(L, 2, Z_DEFAULT_COMPRESSION);
m = compressBound(n);
CHECK_NOTNULL((q = LuaAlloc(L, 10 + 4 + m)));
crc = crc32_z(0, p, n);
e = uleb64(q, n);
e = WRITE32LE(e, crc);
hdrlen = e - q;
CHECK_EQ(Z_OK, compress2((unsigned char *)(q + hdrlen), &m,
(unsigned char *)p, n, level));
lua_pushlstring(L, q, hdrlen + m);
free(q);
return 1;
}
int LuaUncompress(lua_State *L) {
char *q;
uint32_t crc;
int rc, level;
const char *p;
size_t n, m, len;
p = luaL_checklstring(L, 1, &n);
if ((rc = unuleb64(p, n, &m)) == -1 || n < rc + 4) {
luaL_error(L, "compressed value too short to be valid");
unreachable;
}
len = m;
crc = READ32LE(p + rc);
CHECK_NOTNULL((q = LuaAlloc(L, m)));
if (uncompress((void *)q, &m, (unsigned char *)p + rc + 4, n) != Z_OK ||
m != len || crc32_z(0, q, m) != crc) {
free(q);
luaL_error(L, "compressed value is corrupted");
unreachable;
}
lua_pushlstring(L, q, m);
free(q);
return 1;
}

View file

@ -11,11 +11,15 @@ int LuaUnix(lua_State *);
int luaopen_argon2(lua_State *);
int luaopen_lsqlite3(lua_State *);
void *LuaRealloc(lua_State *, void *, size_t);
void *LuaAlloc(lua_State *, size_t);
int LuaBenchmark(lua_State *);
int LuaBin(lua_State *);
int LuaBsf(lua_State *);
int LuaBsr(lua_State *);
int LuaCategorizeIp(lua_State *);
int LuaCompress(lua_State *);
int LuaCrc32(lua_State *);
int LuaCrc32c(lua_State *);
int LuaDecimate(lua_State *);
@ -78,6 +82,7 @@ int LuaSha384(lua_State *);
int LuaSha512(lua_State *);
int LuaSleep(lua_State *);
int LuaSlurp(lua_State *);
int LuaUncompress(lua_State *);
int LuaUnderlong(lua_State *);
int LuaVisualizeControlCodes(lua_State *);

View file

@ -103,7 +103,7 @@ struct UnixErrno {
static lua_State *GL;
static void *LuaUnixRealloc(lua_State *L, void *p, size_t n) {
void *LuaRealloc(lua_State *L, void *p, size_t n) {
void *p2;
if ((p2 = realloc(p, n))) {
return p2;
@ -116,16 +116,8 @@ static void *LuaUnixRealloc(lua_State *L, void *p, size_t n) {
return p2;
}
static void *LuaUnixAllocRaw(lua_State *L, size_t n) {
return LuaUnixRealloc(L, 0, n);
}
static void *LuaUnixAlloc(lua_State *L, size_t n) {
void *p;
if ((p = LuaUnixAllocRaw(L, n))) {
bzero(p, n);
}
return p;
void *LuaAlloc(lua_State *L, size_t n) {
return LuaRealloc(L, 0, n);
}
static lua_Integer FixLimit(long x) {
@ -241,7 +233,7 @@ static char **ConvertLuaArrayToStringList(lua_State *L, int i) {
lua_len(L, i);
n = lua_tointeger(L, -1);
lua_pop(L, 1);
if ((p = LuaUnixAllocRaw(L, (n + 1) * sizeof(*p)))) {
if ((p = LuaAlloc(L, (n + 1) * sizeof(*p)))) {
for (j = 1; j <= n; ++j) {
lua_geti(L, i, j);
s = strdup(lua_tostring(L, -1));
@ -442,7 +434,7 @@ static int LuaUnixReadlink(lua_State *L) {
size_t got, bufsiz = 8192;
path = luaL_checkstring(L, 1);
dirfd = luaL_optinteger(L, 2, AT_FDCWD);
if ((buf = LuaUnixAllocRaw(L, bufsiz))) {
if ((buf = LuaAlloc(L, bufsiz))) {
if ((rc = readlinkat(dirfd, path, buf, bufsiz)) != -1) {
got = rc;
if (got < bufsiz) {
@ -543,7 +535,7 @@ static int LuaUnixCommandv(lua_State *L) {
char *pathbuf, *resolved;
olderr = errno;
prog = luaL_checkstring(L, 1);
if ((pathbuf = LuaUnixAllocRaw(L, PATH_MAX))) {
if ((pathbuf = LuaAlloc(L, PATH_MAX))) {
if ((resolved = commandv(prog, pathbuf, PATH_MAX))) {
lua_pushstring(L, resolved);
free(pathbuf);
@ -913,7 +905,7 @@ static int LuaUnixRead(lua_State *L) {
bufsiz = luaL_optinteger(L, 2, BUFSIZ);
offset = luaL_optinteger(L, 3, -1);
bufsiz = MIN(bufsiz, 0x7ffff000);
if ((buf = LuaUnixAllocRaw(L, bufsiz))) {
if ((buf = LuaAlloc(L, bufsiz))) {
if (offset == -1) {
rc = read(fd, buf, bufsiz);
} else {
@ -1246,7 +1238,7 @@ static int LuaUnixSiocgifconf(lua_State *L) {
struct ifreq *ifr;
struct ifconf conf;
olderr = errno;
if (!(data = LuaUnixAllocRaw(L, (n = 4096)))) {
if (!(data = LuaAlloc(L, (n = 4096)))) {
return SysretErrno(L, "siocgifconf", olderr);
}
if ((fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_IP)) == -1) {
@ -1345,7 +1337,7 @@ static int LuaUnixPoll(lua_State *L) {
lua_pushnil(L);
for (fds = 0, nfds = 0; lua_next(L, 1);) {
if (lua_isinteger(L, -2)) {
if ((fds2 = LuaUnixRealloc(L, fds, (nfds + 1) * sizeof(*fds)))) {
if ((fds2 = LuaRealloc(L, fds, (nfds + 1) * sizeof(*fds)))) {
fds2[nfds].fd = lua_tointeger(L, -2);
fds2[nfds].events = lua_tointeger(L, -1);
fds = fds2;
@ -1392,7 +1384,7 @@ static int LuaUnixRecvfrom(lua_State *L) {
bufsiz = luaL_optinteger(L, 2, 1500);
bufsiz = MIN(bufsiz, 0x7ffff000);
flags = luaL_optinteger(L, 3, 0);
if ((buf = LuaUnixAllocRaw(L, bufsiz))) {
if ((buf = LuaAlloc(L, bufsiz))) {
rc = recvfrom(fd, buf, bufsiz, flags, &sa, &addrsize);
if (rc != -1) {
got = rc;
@ -1423,7 +1415,7 @@ static int LuaUnixRecv(lua_State *L) {
bufsiz = luaL_optinteger(L, 2, 1500);
bufsiz = MIN(bufsiz, 0x7ffff000);
flags = luaL_optinteger(L, 3, 0);
if ((buf = LuaUnixAllocRaw(L, bufsiz))) {
if ((buf = LuaAlloc(L, bufsiz))) {
rc = recv(fd, buf, bufsiz, flags);
if (rc != -1) {
got = rc;

View file

@ -5029,6 +5029,7 @@ static const luaL_Reg kLuaFuncs[] = {
{"Bsf", LuaBsf}, //
{"Bsr", LuaBsr}, //
{"CategorizeIp", LuaCategorizeIp}, //
{"Compress", LuaCompress}, //
{"Crc32", LuaCrc32}, //
{"Crc32c", LuaCrc32c}, //
{"Decimate", LuaDecimate}, //
@ -5164,6 +5165,7 @@ static const luaL_Reg kLuaFuncs[] = {
{"Sleep", LuaSleep}, //
{"Slurp", LuaSlurp}, //
{"StoreAsset", LuaStoreAsset}, //
{"Uncompress", LuaUncompress}, //
{"Underlong", LuaUnderlong}, //
{"VisualizeControlCodes", LuaVisualizeControlCodes}, //
{"Write", LuaWrite}, //