Make sorted serialization faster

Redbean Lua and JSON serialization now goes faster because we're now
inserting object entries into tree data structure rather than making
an array and sorting it at the end. For example, when serializing an
object with 10,000 entries this goes twice as fast. However it still
goes slower than saying EncodeJson(x, {sorted=false}).
This commit is contained in:
Justine Tunney 2022-07-22 04:19:01 -07:00
parent 9de3d8f1e6
commit 84caee23ba
12 changed files with 122 additions and 224 deletions

View file

@ -13,14 +13,14 @@ struct critbit0 {
bool critbit0_contains(struct critbit0 *, const char *) dontthrow nosideeffect bool critbit0_contains(struct critbit0 *, const char *) dontthrow nosideeffect
paramsnonnull(); paramsnonnull();
bool critbit0_insert(struct critbit0 *, const char *) paramsnonnull(); int critbit0_insert(struct critbit0 *, const char *) paramsnonnull();
bool critbit0_delete(struct critbit0 *, const char *) dontthrow paramsnonnull(); bool critbit0_delete(struct critbit0 *, const char *) dontthrow paramsnonnull();
void critbit0_clear(struct critbit0 *) dontthrow paramsnonnull(); void critbit0_clear(struct critbit0 *) dontthrow paramsnonnull();
char *critbit0_get(struct critbit0 *, const char *); char *critbit0_get(struct critbit0 *, const char *);
intptr_t critbit0_allprefixed(struct critbit0 *, const char *, intptr_t critbit0_allprefixed(struct critbit0 *, const char *,
intptr_t (*)(const char *, void *), void *) intptr_t (*)(const char *, void *), void *)
paramsnonnull((1, 2, 3)) dontthrow; paramsnonnull((1, 2, 3)) dontthrow;
bool critbit0_emplace(struct critbit0 *, char *, size_t) paramsnonnull(); int critbit0_emplace(struct critbit0 *, char *, size_t) paramsnonnull();
COSMOPOLITAN_C_END_ COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */

View file

@ -58,7 +58,9 @@ intptr_t critbit0_allprefixed(struct critbit0 *t, const char *prefix,
if (q->byte < ulen) top = p; if (q->byte < ulen) top = p;
} }
for (size_t i = 0; i < ulen; ++i) { for (size_t i = 0; i < ulen; ++i) {
if (p[i] != ubytes[i]) return 0; if (p[i] != ubytes[i]) {
return 0;
}
} }
return allprefixed_traverse(top, callback, arg); return allprefixed_traverse(top, callback, arg);
} }

View file

@ -23,18 +23,19 @@
/** /**
* Inserts 𝑢 into 𝑡 without copying. * Inserts 𝑢 into 𝑡 without copying.
* @param t tree *
* @param u NUL-terminated string which must be 8+ byte aligned and * @param t is critical bit tree
* becomes owned by the tree afterwards * @param u is nul-terminated string which must be 8+ byte aligned
* @return true if 𝑡 was mutated * and becomes owned by the tree afterwards
* @return true if 𝑡 was mutated, or -1 w/ errno
* @note h/t djb and agl * @note h/t djb and agl
*/ */
bool critbit0_emplace(struct critbit0 *t, char *u, size_t ulen) { int critbit0_emplace(struct critbit0 *t, char *u, size_t ulen) {
unsigned char *p = t->root; unsigned char *p = t->root;
if (!p) { if (!p) {
t->root = u; t->root = u;
t->count = 1; t->count = 1;
return true; return 1;
} }
const unsigned char *const ubytes = (void *)u; const unsigned char *const ubytes = (void *)u;
while (1 & (intptr_t)p) { while (1 & (intptr_t)p) {
@ -49,22 +50,23 @@ bool critbit0_emplace(struct critbit0 *t, char *u, size_t ulen) {
for (newbyte = 0; newbyte < ulen; ++newbyte) { for (newbyte = 0; newbyte < ulen; ++newbyte) {
if (p[newbyte] != ubytes[newbyte]) { if (p[newbyte] != ubytes[newbyte]) {
newotherbits = p[newbyte] ^ ubytes[newbyte]; newotherbits = p[newbyte] ^ ubytes[newbyte];
goto different_byte_found; goto DifferentByteFound;
} }
} }
if (p[newbyte] != 0) { if (p[newbyte] != 0) {
newotherbits = p[newbyte]; newotherbits = p[newbyte];
goto different_byte_found; goto DifferentByteFound;
} }
return false; return 0;
different_byte_found: DifferentByteFound:
newotherbits |= newotherbits >> 1; newotherbits |= newotherbits >> 1;
newotherbits |= newotherbits >> 2; newotherbits |= newotherbits >> 2;
newotherbits |= newotherbits >> 4; newotherbits |= newotherbits >> 4;
newotherbits = (newotherbits & ~(newotherbits >> 1)) ^ 255; newotherbits = (newotherbits & ~(newotherbits >> 1)) ^ 255;
unsigned char c = p[newbyte]; unsigned char c = p[newbyte];
int newdirection = (1 + (newotherbits | c)) >> 8; int newdirection = (1 + (newotherbits | c)) >> 8;
struct CritbitNode *newnode = malloc(sizeof(struct CritbitNode)); struct CritbitNode *newnode;
if ((newnode = malloc(sizeof(struct CritbitNode)))) {
newnode->byte = newbyte; newnode->byte = newbyte;
newnode->otherbits = newotherbits; newnode->otherbits = newotherbits;
newnode->child[1 - newdirection] = (void *)ubytes; newnode->child[1 - newdirection] = (void *)ubytes;
@ -83,5 +85,8 @@ different_byte_found:
newnode->child[newdirection] = *wherep; newnode->child[newdirection] = *wherep;
*wherep = (void *)(1 + (char *)newnode); *wherep = (void *)(1 + (char *)newnode);
t->count++; t->count++;
return true; return 1;
} else {
return -1;
}
} }

View file

@ -25,10 +25,15 @@
* Inserts 𝑢 into 𝑡. * Inserts 𝑢 into 𝑡.
* @param t tree * @param t tree
* @param u NUL-terminated string * @param u NUL-terminated string
* @return true if 𝑡 was mutated * @return true if 𝑡 was mutated, or -1 w/ errno
* @note h/t djb and agl * @note h/t djb and agl
*/ */
bool critbit0_insert(struct critbit0 *t, const char *u) { int critbit0_insert(struct critbit0 *t, const char *u) {
size_t ulen = strlen(u); char *p;
return critbit0_emplace(t, memcpy(malloc(ulen + 1), u, ulen + 1), ulen); size_t n;
if ((p = malloc((n = strlen(u)) + 1))) {
return critbit0_emplace(t, memcpy(p, u, n + 1), n);
} else {
return -1;
}
} }

View file

@ -1,82 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/alg/alg.h"
#include "libc/stdio/append.internal.h"
#include "libc/stdio/strlist.internal.h"
#include "libc/str/str.h"
static int CompareStrings(const void *p1, const void *p2) {
const char **a = p1;
const char **b = p2;
return strcmp(*a, *b);
}
void FreeStrList(struct StrList *sl) {
int i;
for (i = 0; i < sl->i; ++i) {
free(sl->p[i]);
}
free(sl->p);
sl->p = 0;
sl->i = 0;
sl->n = 0;
}
int AppendStrList(struct StrList *sl) {
int n2;
char **p2;
if (sl->i == sl->n) {
n2 = sl->n;
if (!n2) n2 = 2;
n2 += n2 >> 1;
if ((p2 = realloc(sl->p, n2 * sizeof(*p2)))) {
sl->p = p2;
sl->n = n2;
} else {
return -1;
}
}
sl->p[sl->i] = 0;
appendr(&sl->p[sl->i], 0);
return sl->i++;
}
void SortStrList(struct StrList *sl) {
if (sl->i) {
qsort(sl->p, sl->i, sizeof(*sl->p), CompareStrings);
}
}
int JoinStrList(struct StrList *sl, char **buf, uint64_t sep) {
int i;
if (!*buf && !sl->i) {
return appendr(buf, 0);
}
for (i = 0; i < sl->i; ++i) {
if (i) {
if (appendw(buf, sep) == -1) {
return -1;
}
}
if (appends(buf, sl->p[i]) == -1) {
return -1;
}
}
return 0;
}

View file

@ -1,18 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_STDIO_STRLIST_H_
#define COSMOPOLITAN_LIBC_STDIO_STRLIST_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
struct StrList {
int i, n;
char **p;
};
void FreeStrList(struct StrList *) hidden;
int AppendStrList(struct StrList *) hidden;
void SortStrList(struct StrList *) hidden;
int JoinStrList(struct StrList *, char **, uint64_t) hidden;
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_STDIO_STRLIST_H_ */

View file

@ -1,58 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/bits.h"
#include "libc/intrin/kprintf.h"
#include "libc/mem/mem.h"
#include "libc/runtime/gc.internal.h"
#include "libc/stdio/append.internal.h"
#include "libc/stdio/stdio.h"
#include "libc/stdio/strlist.internal.h"
#include "libc/testlib/testlib.h"
struct StrList sl;
void TearDown(void) {
FreeStrList(&sl);
}
TEST(strlist, test) {
int i;
char *b = 0;
ASSERT_NE(-1, (i = AppendStrList(&sl)));
ASSERT_NE(-1, appends(&sl.p[i], "world"));
ASSERT_NE(-1, (i = AppendStrList(&sl)));
ASSERT_NE(-1, appends(&sl.p[i], "hello"));
SortStrList(&sl);
ASSERT_NE(-1, JoinStrList(&sl, &b, READ16LE(", ")));
EXPECT_STREQ("hello, world", b);
free(b);
}
TEST(strlist, testNumbers) {
int i;
char *b = 0;
ASSERT_NE(-1, (i = AppendStrList(&sl)));
ASSERT_NE(-1, appends(&sl.p[i], "2"));
ASSERT_NE(-1, (i = AppendStrList(&sl)));
ASSERT_NE(-1, appends(&sl.p[i], "1"));
SortStrList(&sl);
ASSERT_NE(-1, JoinStrList(&sl, &b, ':'));
EXPECT_STREQ("1:2", b);
free(b);
}

View file

@ -115,6 +115,7 @@ THIRD_PARTY_LUA_A_OBJS = \
$(THIRD_PARTY_LUA_A_SRCS:%.c=o/$(MODE)/%.o) $(THIRD_PARTY_LUA_A_SRCS:%.c=o/$(MODE)/%.o)
THIRD_PARTY_LUA_A_DIRECTDEPS = \ THIRD_PARTY_LUA_A_DIRECTDEPS = \
LIBC_ALG \
LIBC_CALLS \ LIBC_CALLS \
LIBC_FMT \ LIBC_FMT \
LIBC_INTRIN \ LIBC_INTRIN \

View file

@ -16,6 +16,7 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/alg/critbit0.h"
#include "libc/assert.h" #include "libc/assert.h"
#include "libc/bits/bits.h" #include "libc/bits/bits.h"
#include "libc/bits/likely.h" #include "libc/bits/likely.h"
@ -27,7 +28,6 @@
#include "libc/runtime/gc.internal.h" #include "libc/runtime/gc.internal.h"
#include "libc/runtime/stack.h" #include "libc/runtime/stack.h"
#include "libc/stdio/append.internal.h" #include "libc/stdio/append.internal.h"
#include "libc/stdio/strlist.internal.h"
#include "libc/str/str.h" #include "libc/str/str.h"
#include "net/http/escape.h" #include "net/http/escape.h"
#include "third_party/double-conversion/wrapper.h" #include "third_party/double-conversion/wrapper.h"
@ -44,6 +44,11 @@ struct Serializer {
bool sorted; bool sorted;
}; };
struct Joiner {
char **buf;
int i;
};
static int Serialize(lua_State *, char **, int, struct Serializer *, int); static int Serialize(lua_State *, char **, int, struct Serializer *, int);
static int SerializeNull(lua_State *L, char **buf) { static int SerializeNull(lua_State *L, char **buf) {
@ -133,31 +138,48 @@ OnError:
return -1; return -1;
} }
static intptr_t Join(const char *elem, void *arg) {
struct Joiner *j = arg;
if (!j->i) {
++j->i;
} else {
RETURN_ON_ERROR(appendw(j->buf, ','));
}
RETURN_ON_ERROR(appends(j->buf, elem));
return 0;
OnError:
return -1;
}
static int SerializeSorted(lua_State *L, char **buf, struct Serializer *z, static int SerializeSorted(lua_State *L, char **buf, struct Serializer *z,
int level) { int level) {
int i; int i;
struct StrList sl = {0}; char *b = 0;
struct Joiner j = {buf};
struct critbit0 t = {0};
lua_pushnil(L); lua_pushnil(L);
while (lua_next(L, -2)) { while (lua_next(L, -2)) {
if (lua_type(L, -2) == LUA_TSTRING) { if (lua_type(L, -2) == LUA_TSTRING) {
RETURN_ON_ERROR(i = AppendStrList(&sl)); RETURN_ON_ERROR(appendr(&b, 0));
RETURN_ON_ERROR(SerializeString(L, sl.p + i, -2, z)); RETURN_ON_ERROR(SerializeString(L, &b, -2, z));
RETURN_ON_ERROR(appendw(sl.p + i, ':')); RETURN_ON_ERROR(appendw(&b, ':'));
RETURN_ON_ERROR(Serialize(L, sl.p + i, -1, z, level - 1)); RETURN_ON_ERROR(Serialize(L, &b, -1, z, level - 1));
RETURN_ON_ERROR(critbit0_insert(&t, b));
lua_pop(L, 1); lua_pop(L, 1);
} else { } else {
z->reason = "json objects must only use string keys"; z->reason = "json objects must only use string keys";
goto OnError; goto OnError;
} }
} }
SortStrList(&sl);
RETURN_ON_ERROR(appendw(buf, '{')); RETURN_ON_ERROR(appendw(buf, '{'));
RETURN_ON_ERROR(JoinStrList(&sl, buf, ',')); RETURN_ON_ERROR(critbit0_allprefixed(&t, "", Join, &j));
RETURN_ON_ERROR(appendw(buf, '}')); RETURN_ON_ERROR(appendw(buf, '}'));
FreeStrList(&sl); critbit0_clear(&t);
free(b);
return 0; return 0;
OnError: OnError:
FreeStrList(&sl); critbit0_clear(&t);
free(b);
return -1; return -1;
} }

View file

@ -16,6 +16,7 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/alg/critbit0.h"
#include "libc/assert.h" #include "libc/assert.h"
#include "libc/bits/bits.h" #include "libc/bits/bits.h"
#include "libc/fmt/itoa.h" #include "libc/fmt/itoa.h"
@ -24,7 +25,6 @@
#include "libc/mem/mem.h" #include "libc/mem/mem.h"
#include "libc/runtime/stack.h" #include "libc/runtime/stack.h"
#include "libc/stdio/append.internal.h" #include "libc/stdio/append.internal.h"
#include "libc/stdio/strlist.internal.h"
#include "libc/x/x.h" #include "libc/x/x.h"
#include "third_party/double-conversion/wrapper.h" #include "third_party/double-conversion/wrapper.h"
#include "third_party/lua/cosmo.h" #include "third_party/lua/cosmo.h"
@ -39,6 +39,11 @@ struct Serializer {
bool sorted; bool sorted;
}; };
struct Joiner {
char **buf;
int i;
};
static int Serialize(lua_State *, char **, int, struct Serializer *, int); static int Serialize(lua_State *, char **, int, struct Serializer *, int);
static bool IsLuaIdentifier(lua_State *L, int idx) { static bool IsLuaIdentifier(lua_State *L, int idx) {
@ -298,37 +303,54 @@ OnError:
return -1; return -1;
} }
static intptr_t Join(const char *elem, void *arg) {
struct Joiner *j = arg;
if (!j->i) {
++j->i;
} else {
RETURN_ON_ERROR(appendw(j->buf, READ16LE(", ")));
}
RETURN_ON_ERROR(appends(j->buf, elem));
return 0;
OnError:
return -1;
}
static int SerializeSorted(lua_State *L, char **buf, struct Serializer *z, static int SerializeSorted(lua_State *L, char **buf, struct Serializer *z,
int depth) { int depth) {
size_t n; size_t n;
int i, rc; int i, rc;
char *b = 0;
const char *s; const char *s;
struct StrList sl = {0}; struct Joiner j = {buf};
struct critbit0 t = {0};
lua_pushnil(L); lua_pushnil(L);
while (lua_next(L, -2)) { while (lua_next(L, -2)) {
RETURN_ON_ERROR(i = AppendStrList(&sl)); RETURN_ON_ERROR(appendr(&b, 0));
if (lua_type(L, -2) == LUA_TSTRING && IsLuaIdentifier(L, -2)) { if (lua_type(L, -2) == LUA_TSTRING && IsLuaIdentifier(L, -2)) {
// use {𝑘=𝑣} syntax when 𝑘 is a legal lua identifier // use {𝑘=𝑣} syntax when 𝑘 is a legal lua identifier
s = lua_tolstring(L, -2, &n); s = lua_tolstring(L, -2, &n);
RETURN_ON_ERROR(appendd(sl.p + i, s, n)); RETURN_ON_ERROR(appendd(&b, s, n));
RETURN_ON_ERROR(appendw(sl.p + i, '=')); RETURN_ON_ERROR(appendw(&b, '='));
} else { } else {
// use {[𝑘]=𝑣} otherwise // use {[𝑘]=𝑣} otherwise
RETURN_ON_ERROR(appendw(sl.p + i, '[')); RETURN_ON_ERROR(appendw(&b, '['));
RETURN_ON_ERROR(Serialize(L, sl.p + i, -2, z, depth - 1)); RETURN_ON_ERROR(Serialize(L, &b, -2, z, depth - 1));
RETURN_ON_ERROR(appendw(sl.p + i, ']' | '=' << 010)); RETURN_ON_ERROR(appendw(&b, ']' | '=' << 010));
} }
RETURN_ON_ERROR(Serialize(L, sl.p + i, -1, z, depth - 1)); RETURN_ON_ERROR(Serialize(L, &b, -1, z, depth - 1));
RETURN_ON_ERROR(critbit0_insert(&t, b));
lua_pop(L, 1); lua_pop(L, 1);
} }
SortStrList(&sl);
RETURN_ON_ERROR(appendw(buf, '{')); RETURN_ON_ERROR(appendw(buf, '{'));
RETURN_ON_ERROR(JoinStrList(&sl, buf, READ16LE(", "))); RETURN_ON_ERROR(critbit0_allprefixed(&t, "", Join, &j));
RETURN_ON_ERROR(appendw(buf, '}')); RETURN_ON_ERROR(appendw(buf, '}'));
FreeStrList(&sl); critbit0_clear(&t);
free(b);
return 0; return 0;
OnError: OnError:
FreeStrList(&sl); critbit0_clear(&t);
free(b);
return -1; return -1;
} }

View file

@ -41,7 +41,6 @@
#include "libc/sock/sock.h" #include "libc/sock/sock.h"
#include "libc/sock/struct/pollfd.h" #include "libc/sock/struct/pollfd.h"
#include "libc/stdio/stdio.h" #include "libc/stdio/stdio.h"
#include "libc/stdio/strlist.internal.h"
#include "libc/str/str.h" #include "libc/str/str.h"
#include "libc/sysv/consts/ioprio.h" #include "libc/sysv/consts/ioprio.h"
#include "libc/sysv/consts/map.h" #include "libc/sysv/consts/map.h"

View file

@ -783,9 +783,9 @@ FUNCTIONS
- sorted: (bool=true) Lua uses hash tables so the order of - sorted: (bool=true) Lua uses hash tables so the order of
object keys is lost in a Lua table. So, by default, we use object keys is lost in a Lua table. So, by default, we use
`qsort(strcmp)` to impose a deterministic output order. If `strcmp` to impose a deterministic output order. If you
you don't care about ordering then setting `sorted=false` don't care about ordering then setting `sorted=false`
should yield a 1.6x performance boost in serialization. should yield a performance boost in serialization.
This function will return an error if: This function will return an error if:
@ -840,9 +840,9 @@ FUNCTIONS
- sorted: (bool=true) Lua uses hash tables so the order of - sorted: (bool=true) Lua uses hash tables so the order of
object keys is lost in a Lua table. So, by default, we use object keys is lost in a Lua table. So, by default, we use
`qsort(strcmp)` to impose a deterministic output order. If `strcmp` to impose a deterministic output order. If you
you don't care about ordering then setting `sorted=false` don't care about ordering then setting `sorted=false`
should yield a 2x performance boost in serialization. should yield a performance boost in serialization.
If a user data object has a `__repr` or `__tostring` meta If a user data object has a `__repr` or `__tostring` meta
method, then that'll be used to encode the Lua code. method, then that'll be used to encode the Lua code.