mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-05-04 12:44:52 +00:00
Make JSON parser nearly perfectly compliant (#483)
This commit is contained in:
parent
1d490fcb94
commit
0cea6c560f
5 changed files with 30 additions and 31 deletions
|
@ -177,3 +177,9 @@ break"]
|
||||||
-- https://www.json.org/JSON_checker/test.zip
|
-- https://www.json.org/JSON_checker/test.zip
|
||||||
-- JSON parsing sample test case: fail15.json
|
-- JSON parsing sample test case: fail15.json
|
||||||
assert(not DecodeJson(' ["Illegal backslash escape: \x15"] '))
|
assert(not DecodeJson(' ["Illegal backslash escape: \x15"] '))
|
||||||
|
|
||||||
|
-- https://www.json.org/JSON_checker/test.zip
|
||||||
|
-- JSON parsing sample test case: fail19.json
|
||||||
|
assert(not DecodeJson([[
|
||||||
|
{"Missing colon" null}
|
||||||
|
]]))
|
||||||
|
|
|
@ -1,11 +0,0 @@
|
||||||
-- json.org says the following test cases should be
|
|
||||||
-- considered as invalid JSON, but ljson.c is lenient.
|
|
||||||
-- we run these tests anyway just to ensure that
|
|
||||||
-- no segfaults occurs while parsing these cases
|
|
||||||
|
|
||||||
-- [jart] we deviate from json.org because we don't care about colons
|
|
||||||
-- https://www.json.org/JSON_checker/test.zip
|
|
||||||
-- JSON parsing sample test case: fail19.json
|
|
||||||
assert(DecodeJson([[
|
|
||||||
{"Missing colon" null}
|
|
||||||
]]))
|
|
|
@ -300,3 +300,14 @@ assert(not DecodeJson(' [1,,2] '))
|
||||||
|
|
||||||
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_comma_and_number.json
|
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_comma_and_number.json
|
||||||
assert(not DecodeJson(' [,1] '))
|
assert(not DecodeJson(' [,1] '))
|
||||||
|
|
||||||
|
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_inner_array_no_comma.json
|
||||||
|
-- (added spaces between [[ and ]] so lua doesn't get confused)
|
||||||
|
assert(not DecodeJson([[
|
||||||
|
[ 3[ 4] ] ]]))
|
||||||
|
|
||||||
|
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_1_true_without_comma.json
|
||||||
|
assert(not DecodeJson(' [1 true] '))
|
||||||
|
|
||||||
|
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_missing_semicolon.json
|
||||||
|
assert(not DecodeJson(' {"a" "b"} '))
|
||||||
|
|
|
@ -27,19 +27,12 @@
|
||||||
--
|
--
|
||||||
|
|
||||||
-- [jart] these tests deviate from the expectations of the upstream test
|
-- [jart] these tests deviate from the expectations of the upstream test
|
||||||
-- suite. most of these failures are because we permit syntax
|
-- suite. most of these failures are because we're more permissive
|
||||||
-- like this since it saves bandwidth and makes the impl smaller.
|
-- about the encoding of double exponents and empty double fraction.
|
||||||
-- we're also more permissive about things like the encoding of
|
|
||||||
-- double exponents and empty double fraction.
|
|
||||||
assert(EncodeLua(DecodeJson('[0 1 2 3 4]')) == '{0, 1, 2, 3, 4}')
|
|
||||||
|
|
||||||
-- from fail1.lua
|
-- from fail1.lua
|
||||||
--------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------
|
||||||
|
|
||||||
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_missing_semicolon.json
|
|
||||||
assert(DecodeJson(' {"a" "b"} '))
|
|
||||||
assert(EncodeLua(DecodeJson(' {"a" "b"} ')) == '{a="b"}')
|
|
||||||
|
|
||||||
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_number_real_without_fractional_part.json
|
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_number_real_without_fractional_part.json
|
||||||
assert(DecodeJson(' [1.] '))
|
assert(DecodeJson(' [1.] '))
|
||||||
assert(EncodeLua(DecodeJson(' [1.] ')) == EncodeLua({1.0}))
|
assert(EncodeLua(DecodeJson(' [1.] ')) == EncodeLua({1.0}))
|
||||||
|
@ -66,14 +59,3 @@ assert(EncodeLua(DecodeJson(' [-2.] ')) == '{-2.}')
|
||||||
|
|
||||||
-- lool
|
-- lool
|
||||||
assert(not DecodeJson(' [--2.] '))
|
assert(not DecodeJson(' [--2.] '))
|
||||||
|
|
||||||
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_inner_array_no_comma.json
|
|
||||||
-- (added spaces between [[ and ]] so lua doesn't get confused)
|
|
||||||
assert(DecodeJson([[
|
|
||||||
[ 3[ 4] ] ]]))
|
|
||||||
assert(EncodeLua(DecodeJson([[
|
|
||||||
[ 3[ 4] ] ]])) == '{3, {4}}')
|
|
||||||
|
|
||||||
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_1_true_without_comma.json
|
|
||||||
assert(DecodeJson(' [1 true] '))
|
|
||||||
assert(EncodeLua(DecodeJson(' [1 true] ')) == '{1, true}')
|
|
||||||
|
|
|
@ -81,6 +81,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
|
||||||
|
|
||||||
case 'n': // null
|
case 'n': // null
|
||||||
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
|
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
|
||||||
|
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
|
||||||
if (p + 3 <= e && READ32LE(p - 1) == READ32LE("null")) {
|
if (p + 3 <= e && READ32LE(p - 1) == READ32LE("null")) {
|
||||||
lua_pushnil(L);
|
lua_pushnil(L);
|
||||||
return (struct DecodeJson){1, p + 3};
|
return (struct DecodeJson){1, p + 3};
|
||||||
|
@ -90,6 +91,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
|
||||||
|
|
||||||
case 'f': // false
|
case 'f': // false
|
||||||
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
|
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
|
||||||
|
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
|
||||||
if (p + 4 <= e && READ32LE(p) == READ32LE("alse")) {
|
if (p + 4 <= e && READ32LE(p) == READ32LE("alse")) {
|
||||||
lua_pushboolean(L, false);
|
lua_pushboolean(L, false);
|
||||||
return (struct DecodeJson){1, p + 4};
|
return (struct DecodeJson){1, p + 4};
|
||||||
|
@ -99,6 +101,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
|
||||||
|
|
||||||
case 't': // true
|
case 't': // true
|
||||||
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
|
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
|
||||||
|
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
|
||||||
if (p + 3 <= e && READ32LE(p - 1) == READ32LE("true")) {
|
if (p + 3 <= e && READ32LE(p - 1) == READ32LE("true")) {
|
||||||
lua_pushboolean(L, true);
|
lua_pushboolean(L, true);
|
||||||
return (struct DecodeJson){1, p + 3};
|
return (struct DecodeJson){1, p + 3};
|
||||||
|
@ -108,9 +111,12 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
|
||||||
|
|
||||||
BadObjectKey:
|
BadObjectKey:
|
||||||
return (struct DecodeJson){-1, "object key must be string"};
|
return (struct DecodeJson){-1, "object key must be string"};
|
||||||
|
MissingPunctuation:
|
||||||
|
return (struct DecodeJson){-1, "missing ',' or ':'"};
|
||||||
|
|
||||||
case '-': // negative
|
case '-': // negative
|
||||||
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
|
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
|
||||||
|
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
|
||||||
if (p < e && isdigit(*p)) {
|
if (p < e && isdigit(*p)) {
|
||||||
d = -1;
|
d = -1;
|
||||||
break;
|
break;
|
||||||
|
@ -120,6 +126,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
|
||||||
|
|
||||||
case '0': // zero or number
|
case '0': // zero or number
|
||||||
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
|
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
|
||||||
|
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
|
||||||
if (p < e) {
|
if (p < e) {
|
||||||
if ((*p == '.' || *p == 'e' || *p == 'E')) {
|
if ((*p == '.' || *p == 'e' || *p == 'E')) {
|
||||||
goto UseDubble;
|
goto UseDubble;
|
||||||
|
@ -132,6 +139,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
|
||||||
|
|
||||||
case '1' ... '9': // integer
|
case '1' ... '9': // integer
|
||||||
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
|
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
|
||||||
|
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
|
||||||
for (x = (c - '0') * d; p < e; ++p) {
|
for (x = (c - '0') * d; p < e; ++p) {
|
||||||
c = *p & 255;
|
c = *p & 255;
|
||||||
if (isdigit(c)) {
|
if (isdigit(c)) {
|
||||||
|
@ -155,6 +163,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
|
||||||
|
|
||||||
case '[': // Array
|
case '[': // Array
|
||||||
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
|
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
|
||||||
|
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
|
||||||
lua_newtable(L);
|
lua_newtable(L);
|
||||||
i = 0;
|
i = 0;
|
||||||
r = Parse(L, p, e, ARRAY_SINGLE | ARRAY_END, depth - 1);
|
r = Parse(L, p, e, ARRAY_SINGLE | ARRAY_END, depth - 1);
|
||||||
|
@ -193,6 +202,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
|
||||||
|
|
||||||
case '{': // Object
|
case '{': // Object
|
||||||
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
|
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
|
||||||
|
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
|
||||||
lua_newtable(L);
|
lua_newtable(L);
|
||||||
r = Parse(L, p, e, OBJECT_KEY | OBJECT_END, depth - 1);
|
r = Parse(L, p, e, OBJECT_KEY | OBJECT_END, depth - 1);
|
||||||
for (;;) {
|
for (;;) {
|
||||||
|
@ -220,6 +230,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
|
||||||
return (struct DecodeJson){1, p};
|
return (struct DecodeJson){1, p};
|
||||||
|
|
||||||
case '"': // string
|
case '"': // string
|
||||||
|
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
|
||||||
luaL_buffinit(L, &b);
|
luaL_buffinit(L, &b);
|
||||||
for (;;) {
|
for (;;) {
|
||||||
if (UNLIKELY(p >= e)) {
|
if (UNLIKELY(p >= e)) {
|
||||||
|
|
Loading…
Add table
Reference in a new issue