Further improve JSON serialization

This commit is contained in:
Justine Tunney 2022-07-11 23:06:49 -07:00
parent 4814b6bdf8
commit 6ee18986e4
20 changed files with 868 additions and 687 deletions

View file

@ -28,12 +28,12 @@ int EscapeLuaString(char *s, size_t len, char **buf) {
size_t i;
RETURN_ON_ERROR(appendw(buf, '"'));
for (i = 0; i < len; i++) {
if (' ' <= s[i] && s[i] <= 0x7e) {
RETURN_ON_ERROR(appendw(buf, s[i]));
} else if (s[i] == '\n') {
if (s[i] == '\n') {
RETURN_ON_ERROR(appendw(buf, '\\' | 'n' << 8));
} else if (s[i] == '\\' || s[i] == '\'' || s[i] == '\"') {
RETURN_ON_ERROR(appendw(buf, '\\' | s[i] << 8));
} else if (' ' <= s[i] && s[i] <= 0x7e) {
RETURN_ON_ERROR(appendw(buf, s[i]));
} else {
RETURN_ON_ERROR(
appendw(buf, '\\' | 'x' << 010 |

View file

@ -19,6 +19,8 @@
#include "libc/assert.h"
#include "libc/bits/bits.h"
#include "libc/fmt/itoa.h"
#include "libc/intrin/kprintf.h"
#include "libc/log/log.h"
#include "libc/log/rop.h"
#include "libc/mem/mem.h"
#include "libc/runtime/gc.internal.h"
@ -34,13 +36,14 @@
static int LuaEncodeJsonDataImpl(lua_State *L, char **buf, int level,
char *numformat, int idx,
struct LuaVisited *visited) {
struct LuaVisited *visited,
const char **reason) {
char *s;
int sli, rc;
bool isarray;
char ibuf[128];
size_t tbllen, i, z;
struct StrList sl = {0};
char ibuf[128], fmt[] = "%.14g";
if (level > 0) {
switch (lua_type(L, idx)) {
@ -56,7 +59,9 @@ static int LuaEncodeJsonDataImpl(lua_State *L, char **buf, int level,
case LUA_TSTRING:
s = lua_tolstring(L, idx, &z);
if (!(s = EscapeJsStringLiteral(s, z, &z))) goto OnError;
if (!(s = EscapeJsStringLiteral(s, z, &z))) {
goto OnError;
}
RETURN_ON_ERROR(appendw(buf, '"'));
RETURN_ON_ERROR(appendd(buf, s, z));
RETURN_ON_ERROR(appendw(buf, '"'));
@ -76,19 +81,28 @@ static int LuaEncodeJsonDataImpl(lua_State *L, char **buf, int level,
case LUA_TTABLE:
RETURN_ON_ERROR(rc = LuaPushVisit(visited, lua_topointer(L, idx)));
if (!rc) {
lua_pushvalue(L, idx); // table ref
tbllen = lua_rawlen(L, -1);
// encode tables with numeric indices and empty tables as arrays
isarray =
tbllen > 0 || // integer keys present
(lua_pushnil(L), !lua_next(L, -2)) || // no non-integer keys
(lua_pop(L, 2), false); // pop key/value pushed by lua_next
// create nearby reference to table at idx
lua_pushvalue(L, idx);
// fast way to tell if table is an array or object
if ((tbllen = lua_rawlen(L, -1)) > 0) {
isarray = true;
} else {
// the json parser inserts `[0]=false` in empty arrays
// so we can tell them apart from empty objects, which
// is needed in order to have `[]` roundtrip the parse
isarray = (lua_rawgeti(L, -1, 0) == LUA_TBOOLEAN &&
!lua_toboolean(L, -1));
lua_pop(L, 1);
}
// now serialize the table
if (isarray) {
for (i = 1; i <= tbllen; i++) {
RETURN_ON_ERROR(sli = AppendStrList(&sl));
lua_rawgeti(L, -1, i); // table/-2, value/-1
RETURN_ON_ERROR(LuaEncodeJsonDataImpl(L, &sl.p[sli], level - 1,
numformat, -1, visited));
RETURN_ON_ERROR(LuaEncodeJsonDataImpl(
L, &sl.p[sli], level - 1, numformat, -1, visited, reason));
lua_pop(L, 1);
}
} else {
@ -96,20 +110,15 @@ static int LuaEncodeJsonDataImpl(lua_State *L, char **buf, int level,
lua_pushnil(L); // push the first key
while (lua_next(L, -2)) {
if (lua_type(L, -2) != LUA_TSTRING) {
// json tables must be arrays or use string keys
*reason = "json objects must only use string keys";
goto OnError;
}
// the json parser inserts a `__json_object__` into empty
// objects, so we don't serialize `{}` as `[]` by mistake
// and as such, we should ignore it here, for readability
if (strcmp(luaL_checkstring(L, -2), "__json_object__")) {
RETURN_ON_ERROR(sli = AppendStrList(&sl));
RETURN_ON_ERROR(LuaEncodeJsonDataImpl(L, &sl.p[sli], level - 1,
numformat, -2, visited));
RETURN_ON_ERROR(appendw(&sl.p[sli], ':'));
RETURN_ON_ERROR(LuaEncodeJsonDataImpl(L, &sl.p[sli], level - 1,
numformat, -1, visited));
}
RETURN_ON_ERROR(sli = AppendStrList(&sl));
RETURN_ON_ERROR(LuaEncodeJsonDataImpl(
L, &sl.p[sli], level - 1, numformat, -2, visited, reason));
RETURN_ON_ERROR(appendw(&sl.p[sli], ':'));
RETURN_ON_ERROR(LuaEncodeJsonDataImpl(
L, &sl.p[sli], level - 1, numformat, -1, visited, reason));
lua_pop(L, 1); // table/-2, key/-1
}
// stack: table/-1, as the key was popped by lua_next
@ -121,18 +130,18 @@ static int LuaEncodeJsonDataImpl(lua_State *L, char **buf, int level,
RETURN_ON_ERROR(appendw(buf, isarray ? ']' : '}'));
LuaPopVisit(visited);
lua_pop(L, 1); // table ref
return 0;
} else {
// cyclic data structure
*reason = "won't serialize cyclic lua table";
goto OnError;
}
default:
// unsupported lua type
*reason = "unsupported lua type";
goto OnError;
}
} else {
// too much depth
*reason = "table has great depth";
goto OnError;
}
OnError:
@ -152,7 +161,12 @@ OnError:
int LuaEncodeJsonData(lua_State *L, char **buf, char *numformat, int idx) {
int rc;
struct LuaVisited visited = {0};
rc = LuaEncodeJsonDataImpl(L, buf, 64, numformat, idx, &visited);
const char *reason = "out of memory";
rc = LuaEncodeJsonDataImpl(L, buf, 64, numformat, idx, &visited, &reason);
free(visited.p);
if (rc == -1) {
lua_pushnil(L);
lua_pushstring(L, reason);
}
return rc;
}

View file

@ -57,6 +57,15 @@ static bool IsLuaArray(lua_State *L) {
return true;
}
static int LuaEncodeLuaOpaqueData(lua_State *L, char **buf, int idx,
const char *kind) {
if (appendf(buf, "\"%s@%p\"", kind, lua_topointer(L, idx)) != -1) {
return 0;
} else {
return -1;
}
}
static int LuaEncodeLuaDataImpl(lua_State *L, char **buf, int level,
char *numformat, int idx,
struct LuaVisited *visited) {
@ -80,19 +89,13 @@ static int LuaEncodeLuaDataImpl(lua_State *L, char **buf, int level,
return 0;
case LUA_TFUNCTION:
RETURN_ON_ERROR(
appendf(buf, "\"%s@%p\"", "func", lua_topointer(L, idx)));
return 0;
return LuaEncodeLuaOpaqueData(L, buf, idx, "func");
case LUA_TLIGHTUSERDATA:
RETURN_ON_ERROR(
appendf(buf, "\"%s@%p\"", "light", lua_topointer(L, idx)));
return 0;
return LuaEncodeLuaOpaqueData(L, buf, idx, "light");
case LUA_TTHREAD:
RETURN_ON_ERROR(
appendf(buf, "\"%s@%p\"", "thread", lua_topointer(L, idx)));
return 0;
return LuaEncodeLuaOpaqueData(L, buf, idx, "thread");
case LUA_TUSERDATA:
if (luaL_callmeta(L, idx, "__repr")) {
@ -117,9 +120,7 @@ static int LuaEncodeLuaDataImpl(lua_State *L, char **buf, int level,
lua_pop(L, 1);
return 0;
}
RETURN_ON_ERROR(
appendf(buf, "\"%s@%p\"", "udata", lua_touserdata(L, idx)));
return 0;
return LuaEncodeLuaOpaqueData(L, buf, idx, "udata");
case LUA_TNUMBER:
if (lua_isinteger(L, idx)) {
@ -174,19 +175,16 @@ static int LuaEncodeLuaDataImpl(lua_State *L, char **buf, int level,
RETURN_ON_ERROR(appendw(buf, '}'));
FreeStrList(&sl);
LuaPopVisit(visited);
} else {
RETURN_ON_ERROR(
appendf(buf, "\"%s@%p\"", "cyclic", lua_topointer(L, idx)));
}
return 0;
return 0;
} else {
return LuaEncodeLuaOpaqueData(L, buf, idx, "cyclic");
}
default:
// unsupported lua type
goto OnError;
return LuaEncodeLuaOpaqueData(L, buf, idx, "unsupported");
}
} else {
// too much depth
goto OnError;
return LuaEncodeLuaOpaqueData(L, buf, idx, "greatdepth");
}
OnError:
FreeStrList(&sl);
@ -196,6 +194,13 @@ OnError:
/**
* Encodes Lua data structure as Lua code string.
*
* This serializer is intended primarily for describing the data
* structure. For example, it's used by the REPL where we need to be
* able to ignore errors when displaying data structures, since showing
* most things imperfectly is better than crashing. Therefore this isn't
* the kind of serializer you'd want to use to persist data in prod. Try
* using the JSON serializer for that purpose.
*
* @param L is Lua interpreter state
* @param buf receives encoded output string
* @param numformat controls double formatting
@ -207,5 +212,9 @@ int LuaEncodeLuaData(lua_State *L, char **buf, char *numformat, int idx) {
struct LuaVisited visited = {0};
rc = LuaEncodeLuaDataImpl(L, buf, 64, numformat, idx, &visited);
free(visited.p);
if (rc == -1) {
lua_pushnil(L);
lua_pushstring(L, "out of memory");
}
return rc;
}

View file

@ -17,29 +17,43 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/x/x.h"
#include "libc/mem/mem.h"
#include "third_party/lua/visitor.h"
int LuaPushVisit(struct LuaVisited *visited, const void *p) {
int i, n2;
const void **p2;
for (i = 0; i < visited->n; ++i) {
if (visited->p[i] == p) {
return 1;
static inline bool IsVisited(struct LuaVisited *v, const void *p) {
int i;
for (i = 0; i < v->i; ++i) {
if (v->p[i] == p) {
return true;
}
}
n2 = visited->n;
if ((p2 = realloc(visited->p, ++n2 * sizeof(*visited->p)))) {
visited->p = p2;
visited->n = n2;
} else {
return -1;
return false;
}
static inline int Visit(struct LuaVisited *v, const void *p) {
int n2;
const void **p2;
if (v->i == v->n) {
n2 = v->n;
if (!n2) n2 = 2;
n2 += n2 >> 1;
if ((p2 = realloc(v->p, n2 * sizeof(*p2)))) {
v->p = p2;
v->n = n2;
} else {
return -1;
}
}
visited->p[visited->n - 1] = p;
v->p[v->i++] = p;
return 0;
}
void LuaPopVisit(struct LuaVisited *visited) {
assert(visited->n > 0);
--visited->n;
int LuaPushVisit(struct LuaVisited *v, const void *p) {
if (IsVisited(v, p)) return 1;
return Visit(v, p);
}
void LuaPopVisit(struct LuaVisited *v) {
assert(v->i > 0);
--v->i;
}

View file

@ -4,7 +4,7 @@
COSMOPOLITAN_C_START_
struct LuaVisited {
int n;
int i, n;
const void **p;
};