Improve Lua and JSON serialization

This commit is contained in:
Justine Tunney 2022-07-12 23:31:06 -07:00
parent 3027d67037
commit e3cd476a9b
20 changed files with 1041 additions and 476 deletions

View file

@ -735,20 +735,52 @@ FUNCTIONS
the original ordering of fields. As such, they'll be sorted
by EncodeJson() and may not round-trip with original intent
EncodeJson(value[,options:table])
EncodeJson(value[, options:table])
├─→ json:str
├─→ true [if useoutput]
└─→ nil, error:str
Turns Lua data structure into a JSON string.
Turns Lua data structure into JSON string.
Tables with non-zero length (as reported by `#`) are encoded
as arrays and any non-array elements are ignored. Empty tables
are encoded as `{}` with the exception of the special empty
table `{[0]=false}` shall be encoded as `[]`. Arrays elements
are serialized in specified order. Object entries are sorted
ASCIIbetically using strcmp() on their string keys to ensure
deterministic order.
Since Lua uses tables for both hashmaps and arrays, we use a
simple fast algorithm for telling the two apart. Tables with
non-zero length (as reported by `#`) are encoded as arrays,
and any non-array elements are ignored. For example:
>: EncodeJson({2})
"[2]"
>: EncodeJson({[1]=2, ["hi"]=1})
"[2]"
If there are holes in your array, then the serialized array
will exclude everything after the first hole. If the beginning
of your array is a hole, then an error is returned.
>: EncodeJson({[1]=1, [3]=3})
"[1]"
>: EncodeJson({[2]=1, [3]=3})
"[]"
>: EncodeJson({[2]=1, [3]=3})
nil "json objects must only use string keys"
If the raw length of a table is reported as zero, then we
check for the magic element `[0]=false`. If it's present, then
your table will be serialized as empty array `[]`. That entry
inserted by DecodeJson() automatically, only when encountering
empty arrays, and it's necessary in order to make empty arrays
round-trip. If raw length is zero and `[0]=false` is absent,
then your table will be serialized as an iterated object.
>: EncodeJson({})
"{}"
>: EncodeJson({[0]=false})
"[]"
>: EncodeJson({["hi"]=1})
"{\"hi\":1}"
>: EncodeJson({["hi"]=1, [0]=false})
"[]"
>: EncodeJson({["hi"]=1, [7]=false})
nil "json objects must only use string keys"
The following options may be used:
@ -756,38 +788,72 @@ FUNCTIONS
output buffer and returns `nil` value. This option is
ignored if used outside of request handling code.
This function will fail if:
- sorted: (bool=true) Lua uses hash tables so the order of
object keys is lost in a Lua table. So, by default, we use
`qsort(strcmp)` to impose a deterministic output order. If
you don't care about ordering then setting `sorted=false`
should yield a 1.6x performance boost in serialization.
This function will return an error if:
- `value` is cyclic
- `value` has depth greater than 64
- `value` contains functions, user data, or threads
- `value` is table that blends string / non-string keys
- Your serializer runs out of C heap memory (setrlimit)
When arrays and objects are serialized, entries will be sorted
in a deterministic order.
We assume strings in `value` contain UTF-8. This serializer
currently does not produce UTF-8 output. The output format is
right now ASCII. Your UTF-8 data will be safely transcoded to
\uXXXX sequences which are UTF-16. Overlong encodings in your
input strings will be canonicalized rather than validated.
This parser does not support UTF-8
NaNs are serialized as `null` and Infinities are `null` which
is consistent with the v8 behavior.
EncodeLua(value[,options:table])
EncodeLua(value[, options:table])
├─→ luacode:str
├─→ true [if useoutput]
└─→ nil, error:str
Turns Lua data structure into Lua code string.
Since Lua uses tables as both hashmaps and arrays, tables will
only be serialized as an array with determinate order, if it's
an array in the strictest possible sense.
1. for all 𝑘=𝑣 in table, 𝑘 is an integer ≥1
2. no holes exist between MIN(𝑘) and MAX(𝑘)
3. if non-empty, MIN(𝑘) is 1
In all other cases, your table will be serialized as an object
which is iterated and displayed as a list of (possibly) sorted
entries that have equal signs.
>: EncodeLua({3, 2})
"{3, 2}"
>: EncodeLua({[1]=3, [2]=3})
"{3, 2}"
>: EncodeLua({[1]=3, [3]=3})
"{[1]=3, [3]=3}"
>: EncodeLua({["hi"]=1, [1]=2})
"{[1]=2, hi=1}"
The following options may be used:
- useoutput: (bool=false) encodes the result directly to the
output buffer and returns `nil` value. This option is
ignored if used outside of request handling code.
- sorted: (bool=true) Lua uses hash tables so the order of
object keys is lost in a Lua table. So, by default, we use
`qsort(strcmp)` to impose a deterministic output order. If
you don't care about ordering then setting `sorted=false`
should yield a 2x performance boost in serialization.
If a user data object has a `__repr` or `__tostring` meta
method, then that'll be used to encode the Lua code.
When tables are serialized, entries will be sorted in a
deterministic order. This makes `EncodeLua` a great fit for
writing unit tests, when tables contain regular normal data.
This serializer is designed primarily to describe data. For
example, it's used by the REPL where we need to be able to
ignore errors when displaying data structures, since showing
@ -802,10 +868,32 @@ FUNCTIONS
tables; however instead of failing, it embeds a string of
unspecified layout describing the cycle.
Integer literals are encoded as decimal. However if the int64
number is ≥256 and has a population count of 1 then we switch
to representating the number in hexadecimal, for readability.
Hex numbers have leading zeroes added in order to visualize
whether the number fits in a uint16, uint32, or int64. Also
some numbers can only be encoded expressionally. For example,
NaNs are serialized as `0/0`, and Infinity is `math.huge`.
>: 7000
7000
>: 0x100
0x0100
>: 0x10000
0x00010000
>: 0x100000000
0x0000000100000000
>: 0/0
0/0
>: 1.5e+9999
math.huge
>: -9223372036854775807 - 1
-9223372036854775807 - 1
The only failure return condition currently implemented is
when C runs out of heap memory.
EncodeLatin1(utf-8:str[,flags:int]) → iso-8859-1:str
Turns UTF-8 into ISO-8859-1 string.

View file

@ -511,9 +511,13 @@ static dontinline int LuaCoderImpl(lua_State *L,
void *p;
size_t n;
p = luaL_checklstring(L, 1, &n);
p = C(p, n, &n);
lua_pushlstring(L, p, n);
free(p);
if ((p = C(p, n, &n))) {
lua_pushlstring(L, p, n);
free(p);
} else {
luaL_error(L, "out of memory");
unreachable;
}
return 1;
}
@ -575,7 +579,17 @@ int LuaEscapeFragment(lua_State *L) {
}
int LuaEscapeLiteral(lua_State *L) {
return LuaCoder(L, EscapeJsStringLiteral);
char *p, *q = 0;
size_t n, y = 0;
p = luaL_checklstring(L, 1, &n);
if ((p = EscapeJsStringLiteral(&q, &y, p, n, &n))) {
lua_pushlstring(L, p, n);
free(q);
return 1;
} else {
luaL_error(L, "out of memory");
unreachable;
}
}
int LuaVisualizeControlCodes(lua_State *L) {

View file

@ -36,8 +36,6 @@
#define OBJECT_KEY 2
#define OBJECT_VAL 3
#define MAX_JSON_DEPTH 128
static struct DecodeJson Parse(struct lua_State *L, const char *p,
const char *e, int context, int depth) {
long x;
@ -47,7 +45,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
const char *reason;
struct DecodeJson r;
int A, B, C, D, c, d, i, u;
if (UNLIKELY(!--depth)) {
if (UNLIKELY(!depth)) {
return (struct DecodeJson){-1, "maximum depth exceeded"};
}
for (a = p, d = +1; p < e;) {
@ -154,7 +152,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
lua_newtable(L);
i = 0;
for (;;) {
r = Parse(L, p, e, ARRAY_VAL, depth);
r = Parse(L, p, e, ARRAY_VAL, depth - 1);
if (UNLIKELY(r.rc == -1)) {
lua_pop(L, 1);
return r;
@ -190,7 +188,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
if (UNLIKELY(context == OBJECT_KEY)) goto BadObjectKey;
lua_newtable(L);
for (;;) {
r = Parse(L, p, e, OBJECT_KEY, depth);
r = Parse(L, p, e, OBJECT_KEY, depth - 1);
if (r.rc == -1) {
lua_pop(L, 1);
return r;
@ -199,7 +197,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
if (!r.rc) {
break;
}
r = Parse(L, p, e, OBJECT_VAL, depth);
r = Parse(L, p, e, OBJECT_VAL, depth - 1);
if (r.rc == -1) {
lua_pop(L, 2);
return r;
@ -388,9 +386,10 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
* @return r.p is string describing error if `rc < 0`
*/
struct DecodeJson DecodeJson(struct lua_State *L, const char *p, size_t n) {
int depth = 64;
if (n == -1) n = p ? strlen(p) : 0;
if (lua_checkstack(L, MAX_JSON_DEPTH + MAX_JSON_DEPTH / 2)) {
return Parse(L, p, p + n, TOP_LEVEL, MAX_JSON_DEPTH);
if (lua_checkstack(L, depth * 4)) {
return Parse(L, p, p + n, TOP_LEVEL, depth);
} else {
return (struct DecodeJson){-1, "can't set stack depth"};
}

View file

@ -4221,11 +4221,11 @@ static int LuaLog(lua_State *L) {
}
static int LuaEncodeSmth(lua_State *L,
int Encoder(lua_State *, char **, char *, int)) {
int useoutput = false;
int maxdepth = 64;
char *numformat = "%.14g";
int Encoder(lua_State *, char **, int, bool)) {
char *p = 0;
int maxdepth = 64;
int sorted = true;
int useoutput = false;
if (lua_istable(L, 2)) {
lua_settop(L, 2); // discard any extra arguments
lua_getfield(L, 2, "useoutput");
@ -4233,11 +4233,11 @@ static int LuaEncodeSmth(lua_State *L,
if (ishandlingrequest && lua_isboolean(L, -1)) {
useoutput = lua_toboolean(L, -1);
}
lua_getfield(L, 2, "numformat");
numformat = luaL_optstring(L, -1, numformat);
lua_getfield(L, 2, "sorted");
sorted = lua_toboolean(L, -1);
}
lua_settop(L, 1); // keep the passed argument on top
if (Encoder(L, useoutput ? &outbuf : &p, numformat, -1) == -1) {
if (Encoder(L, useoutput ? &outbuf : &p, -1, sorted) == -1) {
free(p);
return 2;
}
@ -5352,7 +5352,7 @@ static void LuaPrint(lua_State *L) {
if (n > 0) {
for (i = 1; i <= n; i++) {
if (i > 1) appendw(&b, '\t');
LuaEncodeLuaData(L, &b, "g", i);
LuaEncodeLuaData(L, &b, i, true);
}
appendw(&b, '\n');
WRITE(1, b, appendz(b).i);