Make sorted serialization faster

Redbean Lua and JSON serialization now goes faster because we're now
inserting object entries into tree data structure rather than making
an array and sorting it at the end. For example, when serializing an
object with 10,000 entries this goes twice as fast. However it still
goes slower than saying EncodeJson(x, {sorted=false}).
This commit is contained in:
Justine Tunney 2022-07-22 04:19:01 -07:00
parent 9de3d8f1e6
commit 84caee23ba
12 changed files with 122 additions and 224 deletions

View file

@ -13,14 +13,14 @@ struct critbit0 {
bool critbit0_contains(struct critbit0 *, const char *) dontthrow nosideeffect
paramsnonnull();
bool critbit0_insert(struct critbit0 *, const char *) paramsnonnull();
int critbit0_insert(struct critbit0 *, const char *) paramsnonnull();
bool critbit0_delete(struct critbit0 *, const char *) dontthrow paramsnonnull();
void critbit0_clear(struct critbit0 *) dontthrow paramsnonnull();
char *critbit0_get(struct critbit0 *, const char *);
intptr_t critbit0_allprefixed(struct critbit0 *, const char *,
intptr_t (*)(const char *, void *), void *)
paramsnonnull((1, 2, 3)) dontthrow;
bool critbit0_emplace(struct critbit0 *, char *, size_t) paramsnonnull();
int critbit0_emplace(struct critbit0 *, char *, size_t) paramsnonnull();
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */

View file

@ -58,7 +58,9 @@ intptr_t critbit0_allprefixed(struct critbit0 *t, const char *prefix,
if (q->byte < ulen) top = p;
}
for (size_t i = 0; i < ulen; ++i) {
if (p[i] != ubytes[i]) return 0;
if (p[i] != ubytes[i]) {
return 0;
}
}
return allprefixed_traverse(top, callback, arg);
}

View file

@ -23,18 +23,19 @@
/**
* Inserts 𝑢 into 𝑡 without copying.
* @param t tree
* @param u NUL-terminated string which must be 8+ byte aligned and
* becomes owned by the tree afterwards
* @return true if 𝑡 was mutated
*
* @param t is critical bit tree
* @param u is nul-terminated string which must be 8+ byte aligned
* and becomes owned by the tree afterwards
* @return true if 𝑡 was mutated, or -1 w/ errno
* @note h/t djb and agl
*/
bool critbit0_emplace(struct critbit0 *t, char *u, size_t ulen) {
int critbit0_emplace(struct critbit0 *t, char *u, size_t ulen) {
unsigned char *p = t->root;
if (!p) {
t->root = u;
t->count = 1;
return true;
return 1;
}
const unsigned char *const ubytes = (void *)u;
while (1 & (intptr_t)p) {
@ -49,39 +50,43 @@ bool critbit0_emplace(struct critbit0 *t, char *u, size_t ulen) {
for (newbyte = 0; newbyte < ulen; ++newbyte) {
if (p[newbyte] != ubytes[newbyte]) {
newotherbits = p[newbyte] ^ ubytes[newbyte];
goto different_byte_found;
goto DifferentByteFound;
}
}
if (p[newbyte] != 0) {
newotherbits = p[newbyte];
goto different_byte_found;
goto DifferentByteFound;
}
return false;
different_byte_found:
return 0;
DifferentByteFound:
newotherbits |= newotherbits >> 1;
newotherbits |= newotherbits >> 2;
newotherbits |= newotherbits >> 4;
newotherbits = (newotherbits & ~(newotherbits >> 1)) ^ 255;
unsigned char c = p[newbyte];
int newdirection = (1 + (newotherbits | c)) >> 8;
struct CritbitNode *newnode = malloc(sizeof(struct CritbitNode));
newnode->byte = newbyte;
newnode->otherbits = newotherbits;
newnode->child[1 - newdirection] = (void*)ubytes;
void **wherep = &t->root;
for (;;) {
unsigned char *wp = *wherep;
if (!(1 & (intptr_t)wp)) break;
struct CritbitNode *q = (void *)(wp - 1);
if (q->byte > newbyte) break;
if (q->byte == newbyte && q->otherbits > newotherbits) break;
unsigned char c2 = 0;
if (q->byte < ulen) c2 = ubytes[q->byte];
const int direction = (1 + (q->otherbits | c2)) >> 8;
wherep = q->child + direction;
struct CritbitNode *newnode;
if ((newnode = malloc(sizeof(struct CritbitNode)))) {
newnode->byte = newbyte;
newnode->otherbits = newotherbits;
newnode->child[1 - newdirection] = (void *)ubytes;
void **wherep = &t->root;
for (;;) {
unsigned char *wp = *wherep;
if (!(1 & (intptr_t)wp)) break;
struct CritbitNode *q = (void *)(wp - 1);
if (q->byte > newbyte) break;
if (q->byte == newbyte && q->otherbits > newotherbits) break;
unsigned char c2 = 0;
if (q->byte < ulen) c2 = ubytes[q->byte];
const int direction = (1 + (q->otherbits | c2)) >> 8;
wherep = q->child + direction;
}
newnode->child[newdirection] = *wherep;
*wherep = (void *)(1 + (char *)newnode);
t->count++;
return 1;
} else {
return -1;
}
newnode->child[newdirection] = *wherep;
*wherep = (void *)(1 + (char *)newnode);
t->count++;
return true;
}

View file

@ -25,10 +25,15 @@
* Inserts 𝑢 into 𝑡.
* @param t tree
* @param u NUL-terminated string
* @return true if 𝑡 was mutated
* @return true if 𝑡 was mutated, or -1 w/ errno
* @note h/t djb and agl
*/
bool critbit0_insert(struct critbit0 *t, const char *u) {
size_t ulen = strlen(u);
return critbit0_emplace(t, memcpy(malloc(ulen + 1), u, ulen + 1), ulen);
int critbit0_insert(struct critbit0 *t, const char *u) {
char *p;
size_t n;
if ((p = malloc((n = strlen(u)) + 1))) {
return critbit0_emplace(t, memcpy(p, u, n + 1), n);
} else {
return -1;
}
}

View file

@ -1,82 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/alg/alg.h"
#include "libc/stdio/append.internal.h"
#include "libc/stdio/strlist.internal.h"
#include "libc/str/str.h"
static int CompareStrings(const void *p1, const void *p2) {
const char **a = p1;
const char **b = p2;
return strcmp(*a, *b);
}
void FreeStrList(struct StrList *sl) {
int i;
for (i = 0; i < sl->i; ++i) {
free(sl->p[i]);
}
free(sl->p);
sl->p = 0;
sl->i = 0;
sl->n = 0;
}
int AppendStrList(struct StrList *sl) {
int n2;
char **p2;
if (sl->i == sl->n) {
n2 = sl->n;
if (!n2) n2 = 2;
n2 += n2 >> 1;
if ((p2 = realloc(sl->p, n2 * sizeof(*p2)))) {
sl->p = p2;
sl->n = n2;
} else {
return -1;
}
}
sl->p[sl->i] = 0;
appendr(&sl->p[sl->i], 0);
return sl->i++;
}
void SortStrList(struct StrList *sl) {
if (sl->i) {
qsort(sl->p, sl->i, sizeof(*sl->p), CompareStrings);
}
}
int JoinStrList(struct StrList *sl, char **buf, uint64_t sep) {
int i;
if (!*buf && !sl->i) {
return appendr(buf, 0);
}
for (i = 0; i < sl->i; ++i) {
if (i) {
if (appendw(buf, sep) == -1) {
return -1;
}
}
if (appends(buf, sl->p[i]) == -1) {
return -1;
}
}
return 0;
}

View file

@ -1,18 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_STDIO_STRLIST_H_
#define COSMOPOLITAN_LIBC_STDIO_STRLIST_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
struct StrList {
int i, n;
char **p;
};
void FreeStrList(struct StrList *) hidden;
int AppendStrList(struct StrList *) hidden;
void SortStrList(struct StrList *) hidden;
int JoinStrList(struct StrList *, char **, uint64_t) hidden;
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_STDIO_STRLIST_H_ */

View file

@ -1,58 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/bits.h"
#include "libc/intrin/kprintf.h"
#include "libc/mem/mem.h"
#include "libc/runtime/gc.internal.h"
#include "libc/stdio/append.internal.h"
#include "libc/stdio/stdio.h"
#include "libc/stdio/strlist.internal.h"
#include "libc/testlib/testlib.h"
struct StrList sl;
void TearDown(void) {
FreeStrList(&sl);
}
TEST(strlist, test) {
int i;
char *b = 0;
ASSERT_NE(-1, (i = AppendStrList(&sl)));
ASSERT_NE(-1, appends(&sl.p[i], "world"));
ASSERT_NE(-1, (i = AppendStrList(&sl)));
ASSERT_NE(-1, appends(&sl.p[i], "hello"));
SortStrList(&sl);
ASSERT_NE(-1, JoinStrList(&sl, &b, READ16LE(", ")));
EXPECT_STREQ("hello, world", b);
free(b);
}
TEST(strlist, testNumbers) {
int i;
char *b = 0;
ASSERT_NE(-1, (i = AppendStrList(&sl)));
ASSERT_NE(-1, appends(&sl.p[i], "2"));
ASSERT_NE(-1, (i = AppendStrList(&sl)));
ASSERT_NE(-1, appends(&sl.p[i], "1"));
SortStrList(&sl);
ASSERT_NE(-1, JoinStrList(&sl, &b, ':'));
EXPECT_STREQ("1:2", b);
free(b);
}

View file

@ -115,6 +115,7 @@ THIRD_PARTY_LUA_A_OBJS = \
$(THIRD_PARTY_LUA_A_SRCS:%.c=o/$(MODE)/%.o)
THIRD_PARTY_LUA_A_DIRECTDEPS = \
LIBC_ALG \
LIBC_CALLS \
LIBC_FMT \
LIBC_INTRIN \

View file

@ -16,6 +16,7 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/alg/critbit0.h"
#include "libc/assert.h"
#include "libc/bits/bits.h"
#include "libc/bits/likely.h"
@ -27,7 +28,6 @@
#include "libc/runtime/gc.internal.h"
#include "libc/runtime/stack.h"
#include "libc/stdio/append.internal.h"
#include "libc/stdio/strlist.internal.h"
#include "libc/str/str.h"
#include "net/http/escape.h"
#include "third_party/double-conversion/wrapper.h"
@ -44,6 +44,11 @@ struct Serializer {
bool sorted;
};
struct Joiner {
char **buf;
int i;
};
static int Serialize(lua_State *, char **, int, struct Serializer *, int);
static int SerializeNull(lua_State *L, char **buf) {
@ -133,31 +138,48 @@ OnError:
return -1;
}
static intptr_t Join(const char *elem, void *arg) {
struct Joiner *j = arg;
if (!j->i) {
++j->i;
} else {
RETURN_ON_ERROR(appendw(j->buf, ','));
}
RETURN_ON_ERROR(appends(j->buf, elem));
return 0;
OnError:
return -1;
}
static int SerializeSorted(lua_State *L, char **buf, struct Serializer *z,
int level) {
int i;
struct StrList sl = {0};
char *b = 0;
struct Joiner j = {buf};
struct critbit0 t = {0};
lua_pushnil(L);
while (lua_next(L, -2)) {
if (lua_type(L, -2) == LUA_TSTRING) {
RETURN_ON_ERROR(i = AppendStrList(&sl));
RETURN_ON_ERROR(SerializeString(L, sl.p + i, -2, z));
RETURN_ON_ERROR(appendw(sl.p + i, ':'));
RETURN_ON_ERROR(Serialize(L, sl.p + i, -1, z, level - 1));
RETURN_ON_ERROR(appendr(&b, 0));
RETURN_ON_ERROR(SerializeString(L, &b, -2, z));
RETURN_ON_ERROR(appendw(&b, ':'));
RETURN_ON_ERROR(Serialize(L, &b, -1, z, level - 1));
RETURN_ON_ERROR(critbit0_insert(&t, b));
lua_pop(L, 1);
} else {
z->reason = "json objects must only use string keys";
goto OnError;
}
}
SortStrList(&sl);
RETURN_ON_ERROR(appendw(buf, '{'));
RETURN_ON_ERROR(JoinStrList(&sl, buf, ','));
RETURN_ON_ERROR(critbit0_allprefixed(&t, "", Join, &j));
RETURN_ON_ERROR(appendw(buf, '}'));
FreeStrList(&sl);
critbit0_clear(&t);
free(b);
return 0;
OnError:
FreeStrList(&sl);
critbit0_clear(&t);
free(b);
return -1;
}

View file

@ -16,6 +16,7 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/alg/critbit0.h"
#include "libc/assert.h"
#include "libc/bits/bits.h"
#include "libc/fmt/itoa.h"
@ -24,7 +25,6 @@
#include "libc/mem/mem.h"
#include "libc/runtime/stack.h"
#include "libc/stdio/append.internal.h"
#include "libc/stdio/strlist.internal.h"
#include "libc/x/x.h"
#include "third_party/double-conversion/wrapper.h"
#include "third_party/lua/cosmo.h"
@ -39,6 +39,11 @@ struct Serializer {
bool sorted;
};
struct Joiner {
char **buf;
int i;
};
static int Serialize(lua_State *, char **, int, struct Serializer *, int);
static bool IsLuaIdentifier(lua_State *L, int idx) {
@ -298,37 +303,54 @@ OnError:
return -1;
}
static intptr_t Join(const char *elem, void *arg) {
struct Joiner *j = arg;
if (!j->i) {
++j->i;
} else {
RETURN_ON_ERROR(appendw(j->buf, READ16LE(", ")));
}
RETURN_ON_ERROR(appends(j->buf, elem));
return 0;
OnError:
return -1;
}
static int SerializeSorted(lua_State *L, char **buf, struct Serializer *z,
int depth) {
size_t n;
int i, rc;
char *b = 0;
const char *s;
struct StrList sl = {0};
struct Joiner j = {buf};
struct critbit0 t = {0};
lua_pushnil(L);
while (lua_next(L, -2)) {
RETURN_ON_ERROR(i = AppendStrList(&sl));
RETURN_ON_ERROR(appendr(&b, 0));
if (lua_type(L, -2) == LUA_TSTRING && IsLuaIdentifier(L, -2)) {
// use {𝑘=𝑣} syntax when 𝑘 is a legal lua identifier
s = lua_tolstring(L, -2, &n);
RETURN_ON_ERROR(appendd(sl.p + i, s, n));
RETURN_ON_ERROR(appendw(sl.p + i, '='));
RETURN_ON_ERROR(appendd(&b, s, n));
RETURN_ON_ERROR(appendw(&b, '='));
} else {
// use {[𝑘]=𝑣} otherwise
RETURN_ON_ERROR(appendw(sl.p + i, '['));
RETURN_ON_ERROR(Serialize(L, sl.p + i, -2, z, depth - 1));
RETURN_ON_ERROR(appendw(sl.p + i, ']' | '=' << 010));
RETURN_ON_ERROR(appendw(&b, '['));
RETURN_ON_ERROR(Serialize(L, &b, -2, z, depth - 1));
RETURN_ON_ERROR(appendw(&b, ']' | '=' << 010));
}
RETURN_ON_ERROR(Serialize(L, sl.p + i, -1, z, depth - 1));
RETURN_ON_ERROR(Serialize(L, &b, -1, z, depth - 1));
RETURN_ON_ERROR(critbit0_insert(&t, b));
lua_pop(L, 1);
}
SortStrList(&sl);
RETURN_ON_ERROR(appendw(buf, '{'));
RETURN_ON_ERROR(JoinStrList(&sl, buf, READ16LE(", ")));
RETURN_ON_ERROR(critbit0_allprefixed(&t, "", Join, &j));
RETURN_ON_ERROR(appendw(buf, '}'));
FreeStrList(&sl);
critbit0_clear(&t);
free(b);
return 0;
OnError:
FreeStrList(&sl);
critbit0_clear(&t);
free(b);
return -1;
}

View file

@ -41,7 +41,6 @@
#include "libc/sock/sock.h"
#include "libc/sock/struct/pollfd.h"
#include "libc/stdio/stdio.h"
#include "libc/stdio/strlist.internal.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/ioprio.h"
#include "libc/sysv/consts/map.h"

View file

@ -783,9 +783,9 @@ FUNCTIONS
- sorted: (bool=true) Lua uses hash tables so the order of
object keys is lost in a Lua table. So, by default, we use
`qsort(strcmp)` to impose a deterministic output order. If
you don't care about ordering then setting `sorted=false`
should yield a 1.6x performance boost in serialization.
`strcmp` to impose a deterministic output order. If you
don't care about ordering then setting `sorted=false`
should yield a performance boost in serialization.
This function will return an error if:
@ -840,9 +840,9 @@ FUNCTIONS
- sorted: (bool=true) Lua uses hash tables so the order of
object keys is lost in a Lua table. So, by default, we use
`qsort(strcmp)` to impose a deterministic output order. If
you don't care about ordering then setting `sorted=false`
should yield a 2x performance boost in serialization.
`strcmp` to impose a deterministic output order. If you
don't care about ordering then setting `sorted=false`
should yield a performance boost in serialization.
If a user data object has a `__repr` or `__tostring` meta
method, then that'll be used to encode the Lua code.