mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-06-26 22:38:30 +00:00
Further improve JSON serialization
This commit is contained in:
parent
4814b6bdf8
commit
6ee18986e4
20 changed files with 868 additions and 687 deletions
|
@ -674,7 +674,12 @@ FUNCTIONS
|
|||
encodebase64.c.
|
||||
|
||||
DecodeJson(input:str)
|
||||
├─→ value:*
|
||||
├─→ int64
|
||||
├─→ string
|
||||
├─→ double
|
||||
├─→ array
|
||||
├─→ object
|
||||
├─→ true
|
||||
└─→ nil, error:str
|
||||
|
||||
Turns JSON string into a Lua data structure.
|
||||
|
@ -687,6 +692,9 @@ FUNCTIONS
|
|||
even though that structure won't round-trip with `EncodeJson`
|
||||
since redbean won't generate invalid JSON (see Postel's Law).
|
||||
|
||||
This parser permits top-level values regardless of type, with
|
||||
the exception of `false`, `null`, and absent.
|
||||
|
||||
EncodeJson(value[,options:table])
|
||||
├─→ json:str
|
||||
├─→ true [if useoutput]
|
||||
|
@ -695,10 +703,12 @@ FUNCTIONS
|
|||
Turns Lua data structure into a JSON string.
|
||||
|
||||
Tables with non-zero length (as reported by `#`) are encoded
|
||||
as arrays with non-array elements ignored. Empty tables are
|
||||
encoded as empty arrays. All other tables are encoded as
|
||||
objects with numerical keys converted to strings (so `{[3]=1}`
|
||||
is encoded as `{"3":1}`).
|
||||
as arrays and any non-array elements are ignored. Empty tables
|
||||
are encoded as `{}` with the exception of the special empty
|
||||
table `{[0]=false}` shall be encoded as `[]`. Arrays elements
|
||||
are serialized in specified order. Object entries are sorted
|
||||
ASCIIbetically using strcmp() on their string keys to ensure
|
||||
deterministic order.
|
||||
|
||||
The following options may be used:
|
||||
|
||||
|
@ -711,6 +721,7 @@ FUNCTIONS
|
|||
- `value` is cyclic
|
||||
- `value` has depth greater than 64
|
||||
- `value` contains functions, user data, or threads
|
||||
- `value` is table that blends string / non-string keys
|
||||
|
||||
When arrays and objects are serialized, entries will be sorted
|
||||
in a deterministic order.
|
||||
|
@ -728,22 +739,30 @@ FUNCTIONS
|
|||
output buffer and returns `nil` value. This option is
|
||||
ignored if used outside of request handling code.
|
||||
|
||||
This function will fail if:
|
||||
|
||||
- `value` has depth greater than 64
|
||||
|
||||
If a user data object has a `__repr` or `__tostring` meta
|
||||
method, then that'll be used to encode the Lua code.
|
||||
|
||||
When tables are serialized, entries will be sorted in a
|
||||
deterministic order. This makes `EncodeLua` a great fit for
|
||||
writing unit tests, when tables contain regular normal data.
|
||||
|
||||
This serializer is designed primarily to describe data. For
|
||||
example, it's used by the REPL where we need to be able to
|
||||
ignore errors when displaying data structures, since showing
|
||||
most things imperfectly is better than crashing. Therefore
|
||||
this isn't the kind of serializer you'd want to use to persist
|
||||
data in prod. Try using the JSON serializer for that purpose.
|
||||
|
||||
Non-encodable value types (e.g. threads, functions) will be
|
||||
represented as a string literal with the type name and pointer
|
||||
address. Note this is subject to change in the future.
|
||||
address. The string description is of an unspecified format
|
||||
that could most likely change. This encoder detects cyclic
|
||||
tables; however instead of failing, it embeds a string of
|
||||
unspecified layout describing the cycle.
|
||||
|
||||
This encoder detects cyclic tables, and encodes a string
|
||||
literal saying it's cyclic when cycles are encountered.
|
||||
The only failure return condition currently implemented is
|
||||
when C runs out of heap memory.
|
||||
|
||||
When tables are serialized, entries will be sorted in a
|
||||
deterministic order.
|
||||
|
||||
EncodeLatin1(utf-8:str[,flags:int]) → iso-8859-1:str
|
||||
Turns UTF-8 into ISO-8859-1 string.
|
||||
|
|
240
tool/net/ljson.c
240
tool/net/ljson.c
|
@ -23,34 +23,32 @@
|
|||
#include "libc/str/tpenc.h"
|
||||
#include "libc/str/utf16.h"
|
||||
#include "third_party/double-conversion/wrapper.h"
|
||||
#include "third_party/lua/cosmo.h"
|
||||
#include "third_party/lua/lauxlib.h"
|
||||
#include "third_party/lua/ltests.h"
|
||||
#include "third_party/lua/lua.h"
|
||||
#include "tool/net/ljson.h"
|
||||
|
||||
#define TOP_LEVEL 0
|
||||
#define ARRAY_VAL 1
|
||||
#define OBJECT_KEY 2
|
||||
#define OBJECT_VAL 3
|
||||
|
||||
#define MAX_JSON_DEPTH 128
|
||||
|
||||
struct Rc {
|
||||
int t;
|
||||
const char *p;
|
||||
};
|
||||
|
||||
static struct Rc Parse(struct lua_State *L, const char *p, const char *e) {
|
||||
static struct DecodeJson Parse(struct lua_State *L, const char *p,
|
||||
const char *e, int context, int depth) {
|
||||
long x;
|
||||
char w[4];
|
||||
struct Rc r;
|
||||
const char *a;
|
||||
luaL_Buffer b;
|
||||
struct DecodeJson r;
|
||||
int A, B, C, D, c, d, i, u;
|
||||
if (lua_gettop(L) >= MAX_JSON_DEPTH) {
|
||||
luaL_error(L, "maximum depth exceeded\n");
|
||||
return (struct Rc){-1, p};
|
||||
if (UNLIKELY(!--depth)) {
|
||||
return (struct DecodeJson){-1, "maximum depth exceeded"};
|
||||
}
|
||||
for (a = p, d = +1; p < e;) {
|
||||
switch ((c = *p++ & 255)) {
|
||||
default:
|
||||
luaL_error(L, "illegal character\n");
|
||||
return (struct Rc){-1, p};
|
||||
|
||||
case ' ': // spaces
|
||||
case '\n':
|
||||
case '\r':
|
||||
|
@ -59,53 +57,69 @@ static struct Rc Parse(struct lua_State *L, const char *p, const char *e) {
|
|||
break;
|
||||
|
||||
case ',': // present in list and object
|
||||
a = p;
|
||||
break;
|
||||
if (context == ARRAY_VAL || context == OBJECT_KEY) {
|
||||
a = p;
|
||||
break;
|
||||
} else {
|
||||
return (struct DecodeJson){-1, "unexpected ','"};
|
||||
}
|
||||
|
||||
case ':': // present only in object after key
|
||||
if (LUA_TSTRING != lua_type(L, -1)) {
|
||||
luaL_error(L, "unexpected ':'\n");
|
||||
return (struct Rc){-1, p};
|
||||
if (context == OBJECT_VAL) {
|
||||
a = p;
|
||||
break;
|
||||
} else {
|
||||
return (struct DecodeJson){-1, "unexpected ':'"};
|
||||
}
|
||||
a = p;
|
||||
break;
|
||||
|
||||
case 'n': // null
|
||||
if (UNLIKELY(context == OBJECT_KEY)) goto BadObjectKey;
|
||||
if (UNLIKELY(context == TOP_LEVEL)) {
|
||||
return (struct DecodeJson){-1, "toplevel json can't be null"};
|
||||
}
|
||||
if (p + 3 <= e && READ32LE(p - 1) == READ32LE("null")) {
|
||||
lua_pushnil(L);
|
||||
return (struct Rc){1, p + 3};
|
||||
return (struct DecodeJson){1, p + 3};
|
||||
} else {
|
||||
goto IllegalCharacter;
|
||||
}
|
||||
luaL_error(L, "expecting null\n");
|
||||
return (struct Rc){-1, p};
|
||||
|
||||
case 't': // true
|
||||
if (p + 3 <= e && READ32LE(p - 1) == READ32LE("true")) {
|
||||
lua_pushboolean(L, true);
|
||||
return (struct Rc){1, p + 3};
|
||||
}
|
||||
luaL_error(L, "expecting true\n");
|
||||
return (struct Rc){-1, p};
|
||||
|
||||
case 'f': // false
|
||||
if (UNLIKELY(context == OBJECT_KEY)) goto BadObjectKey;
|
||||
if (UNLIKELY(context == TOP_LEVEL)) {
|
||||
return (struct DecodeJson){-1, "toplevel json can't be false"};
|
||||
}
|
||||
if (p + 4 <= e && READ32LE(p) == READ32LE("alse")) {
|
||||
lua_pushboolean(L, false);
|
||||
return (struct Rc){1, p + 4};
|
||||
return (struct DecodeJson){1, p + 4};
|
||||
} else {
|
||||
goto IllegalCharacter;
|
||||
}
|
||||
|
||||
case 't': // true
|
||||
if (UNLIKELY(context == OBJECT_KEY)) goto BadObjectKey;
|
||||
if (p + 3 <= e && READ32LE(p - 1) == READ32LE("true")) {
|
||||
lua_pushboolean(L, true);
|
||||
return (struct DecodeJson){1, p + 3};
|
||||
} else {
|
||||
goto IllegalCharacter;
|
||||
}
|
||||
luaL_error(L, "expecting false\n");
|
||||
return (struct Rc){-1, p};
|
||||
|
||||
case '-': // negative
|
||||
if (UNLIKELY(context == OBJECT_KEY)) goto BadObjectKey;
|
||||
d = -1;
|
||||
break;
|
||||
|
||||
case '0': // zero or number
|
||||
if (UNLIKELY(context == OBJECT_KEY)) goto BadObjectKey;
|
||||
if (p < e && (*p == '.' || *p == 'e' || *p == 'E')) {
|
||||
goto UseDubble;
|
||||
}
|
||||
lua_pushinteger(L, 0);
|
||||
return (struct Rc){1, p};
|
||||
return (struct DecodeJson){1, p};
|
||||
|
||||
case '1' ... '9': // integer
|
||||
if (UNLIKELY(context == OBJECT_KEY)) goto BadObjectKey;
|
||||
for (x = (c - '0') * d; p < e; ++p) {
|
||||
c = *p & 255;
|
||||
if (isdigit(c)) {
|
||||
|
@ -120,67 +134,75 @@ static struct Rc Parse(struct lua_State *L, const char *p, const char *e) {
|
|||
}
|
||||
}
|
||||
lua_pushinteger(L, x);
|
||||
return (struct Rc){1, p};
|
||||
return (struct DecodeJson){1, p};
|
||||
|
||||
UseDubble: // number
|
||||
lua_pushnumber(L, StringToDouble(a, e - a, &c));
|
||||
return (struct Rc){1, a + c};
|
||||
return (struct DecodeJson){1, a + c};
|
||||
|
||||
case '[': // Array
|
||||
if (UNLIKELY(context == OBJECT_KEY)) goto BadObjectKey;
|
||||
lua_newtable(L);
|
||||
i = 0;
|
||||
do {
|
||||
r = Parse(L, p, e);
|
||||
p = r.p;
|
||||
if (r.t) {
|
||||
lua_rawseti(L, -2, i++ + 1);
|
||||
for (;;) {
|
||||
r = Parse(L, p, e, ARRAY_VAL, depth);
|
||||
if (UNLIKELY(r.rc == -1)) {
|
||||
lua_pop(L, 1);
|
||||
return r;
|
||||
}
|
||||
} while (r.t);
|
||||
if (*(p - 1) != ']') {
|
||||
luaL_error(L, "invalid list\n");
|
||||
return (struct Rc){-1, p};
|
||||
p = r.p;
|
||||
if (!r.rc) {
|
||||
break;
|
||||
}
|
||||
lua_rawseti(L, -2, i++ + 1);
|
||||
}
|
||||
return (struct Rc){1, p};
|
||||
if (!i) {
|
||||
// we need this kludge so `[]` won't round-trip as `{}`
|
||||
lua_pushboolean(L, false);
|
||||
lua_rawseti(L, -2, 0);
|
||||
}
|
||||
return (struct DecodeJson){1, p};
|
||||
|
||||
case ']':
|
||||
if (context == ARRAY_VAL) {
|
||||
return (struct DecodeJson){0, p};
|
||||
} else {
|
||||
return (struct DecodeJson){-1, "unexpected ']'"};
|
||||
}
|
||||
|
||||
case '}':
|
||||
return (struct Rc){0, p};
|
||||
if (context == OBJECT_KEY) {
|
||||
return (struct DecodeJson){0, p};
|
||||
} else {
|
||||
return (struct DecodeJson){-1, "unexpected '}'"};
|
||||
}
|
||||
|
||||
case '{': // Object
|
||||
if (UNLIKELY(context == OBJECT_KEY)) goto BadObjectKey;
|
||||
lua_newtable(L);
|
||||
i = 0;
|
||||
do {
|
||||
r = Parse(L, p, e);
|
||||
p = r.p;
|
||||
if (r.t) {
|
||||
if (LUA_TSTRING != lua_type(L, -1)) {
|
||||
/* json keys can only be strings */
|
||||
lua_settop(L, -2);
|
||||
break;
|
||||
}
|
||||
r = Parse(L, p, e);
|
||||
p = r.p;
|
||||
if (!r.t) {
|
||||
/* key provided but no value */
|
||||
lua_settop(L, -2);
|
||||
luaL_error(L, "key provided but no value\n");
|
||||
return (struct Rc){-1, p};
|
||||
}
|
||||
lua_settable(L, -3);
|
||||
++i;
|
||||
for (;;) {
|
||||
r = Parse(L, p, e, OBJECT_KEY, depth);
|
||||
if (r.rc == -1) {
|
||||
lua_pop(L, 1);
|
||||
return r;
|
||||
}
|
||||
} while (r.t);
|
||||
if (!i) {
|
||||
// we need this kludge so `{}` won't round-trip as `[]`
|
||||
lua_pushstring(L, "__json_object__");
|
||||
lua_pushboolean(L, true);
|
||||
p = r.p;
|
||||
if (!r.rc) {
|
||||
break;
|
||||
}
|
||||
r = Parse(L, p, e, OBJECT_VAL, depth);
|
||||
if (r.rc == -1) {
|
||||
lua_pop(L, 2);
|
||||
return r;
|
||||
}
|
||||
if (!r.rc) {
|
||||
lua_pop(L, 2);
|
||||
return (struct DecodeJson){-1, "unexpected eof in object"};
|
||||
}
|
||||
p = r.p;
|
||||
lua_settable(L, -3);
|
||||
}
|
||||
if (*(p - 1) != '}') {
|
||||
luaL_error(L, "invalid object\n");
|
||||
return (struct Rc){-1, p};
|
||||
}
|
||||
return (struct Rc){1, p};
|
||||
return (struct DecodeJson){1, p};
|
||||
|
||||
case '"': // string
|
||||
luaL_buffinit(L, &b);
|
||||
|
@ -193,15 +215,11 @@ static struct Rc Parse(struct lua_State *L, const char *p, const char *e) {
|
|||
case '\\':
|
||||
if (p < e) {
|
||||
switch ((c = *p++ & 255)) {
|
||||
case '0':
|
||||
case 'x':
|
||||
luaL_error(L, "invalid escaped character\n");
|
||||
return (struct Rc){-1, p};
|
||||
|
||||
default:
|
||||
goto InvalidEscapeCharacter;
|
||||
case '"':
|
||||
case '/':
|
||||
case '\\':
|
||||
default:
|
||||
goto AddByte;
|
||||
case 'b':
|
||||
c = '\b';
|
||||
|
@ -218,6 +236,16 @@ static struct Rc Parse(struct lua_State *L, const char *p, const char *e) {
|
|||
case 't':
|
||||
c = '\t';
|
||||
goto AddByte;
|
||||
case 'x':
|
||||
if (p + 2 <= e && //
|
||||
(A = kHexToInt[p[0] & 255]) != -1 && // HEX
|
||||
(B = kHexToInt[p[1] & 255]) != -1) { //
|
||||
c = A << 4 | B;
|
||||
p += 2;
|
||||
goto AddByte;
|
||||
} else {
|
||||
goto InvalidEscapeCharacter;
|
||||
}
|
||||
case 'u':
|
||||
if (p + 4 <= e && //
|
||||
(A = kHexToInt[p[0] & 255]) != -1 && //
|
||||
|
@ -276,6 +304,7 @@ static struct Rc Parse(struct lua_State *L, const char *p, const char *e) {
|
|||
}
|
||||
luaL_addlstring(&b, w, i);
|
||||
} else {
|
||||
goto InvalidEscapeCharacter;
|
||||
BadUnicode:
|
||||
// Echo invalid \uXXXX sequences
|
||||
// Rather than corrupting UTF-8!
|
||||
|
@ -283,18 +312,35 @@ static struct Rc Parse(struct lua_State *L, const char *p, const char *e) {
|
|||
}
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
goto InvalidEscapeCharacter;
|
||||
}
|
||||
break;
|
||||
case '"':
|
||||
goto FinishString;
|
||||
luaL_pushresult(&b);
|
||||
return (struct DecodeJson){1, p};
|
||||
}
|
||||
}
|
||||
FinishString:
|
||||
luaL_pushresult(&b);
|
||||
return (struct Rc){1, p};
|
||||
luaL_pushresultsize(&b, 0);
|
||||
lua_pop(L, 1);
|
||||
return (struct DecodeJson){-1, "unexpected eof in string"};
|
||||
|
||||
default:
|
||||
IllegalCharacter:
|
||||
return (struct DecodeJson){-1, "illegal character"};
|
||||
BadObjectKey:
|
||||
return (struct DecodeJson){-1, "object key must be string"};
|
||||
InvalidEscapeCharacter:
|
||||
luaL_pushresultsize(&b, 0);
|
||||
lua_pop(L, 1);
|
||||
return (struct DecodeJson){-1, "invalid escape character"};
|
||||
}
|
||||
}
|
||||
return (struct Rc){0, p};
|
||||
if (UNLIKELY(context == TOP_LEVEL)) {
|
||||
return (struct DecodeJson){0, 0};
|
||||
} else {
|
||||
return (struct DecodeJson){-1, "unexpected eof"};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -318,13 +364,15 @@ static struct Rc Parse(struct lua_State *L, const char *p, const char *e) {
|
|||
* @param L is Lua interpreter state
|
||||
* @param p is input string
|
||||
* @param n is byte length of `p` or -1 for automatic strlen()
|
||||
* @return 1 if value was pushed, 0 on end, or -1 on error
|
||||
* @return res.rc is 1 if value pushed, 0 on eof, otherwise -1
|
||||
* @return res.p is is advanced `p` pointer if `rc` isn't -1
|
||||
* @return res.p is string describing error if `rc` is -1
|
||||
*/
|
||||
int DecodeJson(struct lua_State *L, const char *p, size_t n) {
|
||||
struct DecodeJson DecodeJson(struct lua_State *L, const char *p, size_t n) {
|
||||
if (n == -1) n = p ? strlen(p) : 0;
|
||||
if(!lua_checkstack(L, MAX_JSON_DEPTH + MAX_JSON_DEPTH/2)) {
|
||||
luaL_error(L, "unable to set stack depth of %d\n", MAX_JSON_DEPTH + MAX_JSON_DEPTH/2);
|
||||
return -1;
|
||||
if (lua_checkstack(L, MAX_JSON_DEPTH + MAX_JSON_DEPTH / 2)) {
|
||||
return Parse(L, p, p + n, TOP_LEVEL, MAX_JSON_DEPTH);
|
||||
} else {
|
||||
return (struct DecodeJson){-1, "can't set stack depth"};
|
||||
}
|
||||
return Parse(L, p, p + n).t;
|
||||
}
|
||||
|
|
|
@ -4,7 +4,12 @@
|
|||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
int DecodeJson(struct lua_State *, const char *, size_t);
|
||||
struct DecodeJson {
|
||||
int rc;
|
||||
const char *p;
|
||||
};
|
||||
|
||||
struct DecodeJson DecodeJson(struct lua_State *, const char *, size_t);
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/atomic.h"
|
||||
#include "libc/bits/likely.h"
|
||||
#include "libc/bits/safemacros.internal.h"
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/calls/ioctl.h"
|
||||
|
@ -4238,8 +4239,6 @@ static int LuaEncodeSmth(lua_State *L,
|
|||
lua_settop(L, 1); // keep the passed argument on top
|
||||
if (Encoder(L, useoutput ? &outbuf : &p, numformat, -1) == -1) {
|
||||
free(p);
|
||||
lua_pushnil(L);
|
||||
lua_pushstring(L, "serialization failed");
|
||||
return 2;
|
||||
}
|
||||
if (useoutput) {
|
||||
|
@ -4262,8 +4261,26 @@ static int LuaEncodeLua(lua_State *L) {
|
|||
static int LuaDecodeJson(lua_State *L) {
|
||||
size_t n;
|
||||
const char *p;
|
||||
struct DecodeJson r;
|
||||
p = luaL_checklstring(L, 1, &n);
|
||||
return DecodeJson(L, p, n);
|
||||
r = DecodeJson(L, p, n);
|
||||
if (UNLIKELY(!r.rc)) {
|
||||
lua_pushnil(L);
|
||||
lua_pushstring(L, "unexpected eof");
|
||||
return 2;
|
||||
}
|
||||
if (UNLIKELY(r.rc == -1)) {
|
||||
lua_pushnil(L);
|
||||
lua_pushstring(L, r.p);
|
||||
return 2;
|
||||
}
|
||||
r = DecodeJson(L, r.p, n - (r.p - p));
|
||||
if (UNLIKELY(r.rc)) {
|
||||
lua_pushnil(L);
|
||||
lua_pushstring(L, "junk after expression");
|
||||
return 2;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int LuaGetUrl(lua_State *L) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue