Make JSON parser nearly perfectly compliant (#483)

This commit is contained in:
Gautham 2022-07-13 20:08:23 +05:30 committed by GitHub
parent 1d490fcb94
commit 0cea6c560f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 30 additions and 31 deletions

View file

@ -177,3 +177,9 @@ break"]
-- https://www.json.org/JSON_checker/test.zip
-- JSON parsing sample test case: fail15.json
assert(not DecodeJson(' ["Illegal backslash escape: \x15"] '))
-- https://www.json.org/JSON_checker/test.zip
-- JSON parsing sample test case: fail19.json
assert(not DecodeJson([[
{"Missing colon" null}
]]))

View file

@ -1,11 +0,0 @@
-- json.org says the following test cases should be
-- considered as invalid JSON, but ljson.c is lenient.
-- we run these tests anyway just to ensure that
-- no segfaults occurs while parsing these cases
-- [jart] we deviate from json.org because we don't care about colons
-- https://www.json.org/JSON_checker/test.zip
-- JSON parsing sample test case: fail19.json
assert(DecodeJson([[
{"Missing colon" null}
]]))

View file

@ -300,3 +300,14 @@ assert(not DecodeJson(' [1,,2] '))
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_comma_and_number.json
assert(not DecodeJson(' [,1] '))
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_inner_array_no_comma.json
-- (added spaces between [[ and ]] so lua doesn't get confused)
assert(not DecodeJson([[
[ 3[ 4] ] ]]))
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_1_true_without_comma.json
assert(not DecodeJson(' [1 true] '))
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_missing_semicolon.json
assert(not DecodeJson(' {"a" "b"} '))

View file

@ -27,19 +27,12 @@
--
-- [jart] these tests deviate from the expectations of the upstream test
-- suite. most of these failures are because we permit syntax
-- like this since it saves bandwidth and makes the impl smaller.
-- we're also more permissive about things like the encoding of
-- double exponents and empty double fraction.
assert(EncodeLua(DecodeJson('[0 1 2 3 4]')) == '{0, 1, 2, 3, 4}')
-- suite. most of these failures are because we're more permissive
-- about the encoding of double exponents and empty double fraction.
-- from fail1.lua
--------------------------------------------------------------------------------
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_missing_semicolon.json
assert(DecodeJson(' {"a" "b"} '))
assert(EncodeLua(DecodeJson(' {"a" "b"} ')) == '{a="b"}')
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_number_real_without_fractional_part.json
assert(DecodeJson(' [1.] '))
assert(EncodeLua(DecodeJson(' [1.] ')) == EncodeLua({1.0}))
@ -66,14 +59,3 @@ assert(EncodeLua(DecodeJson(' [-2.] ')) == '{-2.}')
-- lool
assert(not DecodeJson(' [--2.] '))
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_inner_array_no_comma.json
-- (added spaces between [[ and ]] so lua doesn't get confused)
assert(DecodeJson([[
[ 3[ 4] ] ]]))
assert(EncodeLua(DecodeJson([[
[ 3[ 4] ] ]])) == '{3, {4}}')
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_1_true_without_comma.json
assert(DecodeJson(' [1 true] '))
assert(EncodeLua(DecodeJson(' [1 true] ')) == '{1, true}')

View file

@ -81,6 +81,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
case 'n': // null
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
if (p + 3 <= e && READ32LE(p - 1) == READ32LE("null")) {
lua_pushnil(L);
return (struct DecodeJson){1, p + 3};
@ -90,6 +91,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
case 'f': // false
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
if (p + 4 <= e && READ32LE(p) == READ32LE("alse")) {
lua_pushboolean(L, false);
return (struct DecodeJson){1, p + 4};
@ -99,6 +101,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
case 't': // true
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
if (p + 3 <= e && READ32LE(p - 1) == READ32LE("true")) {
lua_pushboolean(L, true);
return (struct DecodeJson){1, p + 3};
@ -108,9 +111,12 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
BadObjectKey:
return (struct DecodeJson){-1, "object key must be string"};
MissingPunctuation:
return (struct DecodeJson){-1, "missing ',' or ':'"};
case '-': // negative
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
if (p < e && isdigit(*p)) {
d = -1;
break;
@ -120,6 +126,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
case '0': // zero or number
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
if (p < e) {
if ((*p == '.' || *p == 'e' || *p == 'E')) {
goto UseDubble;
@ -132,6 +139,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
case '1' ... '9': // integer
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
for (x = (c - '0') * d; p < e; ++p) {
c = *p & 255;
if (isdigit(c)) {
@ -155,6 +163,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
case '[': // Array
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
lua_newtable(L);
i = 0;
r = Parse(L, p, e, ARRAY_SINGLE | ARRAY_END, depth - 1);
@ -193,6 +202,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
case '{': // Object
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
lua_newtable(L);
r = Parse(L, p, e, OBJECT_KEY | OBJECT_END, depth - 1);
for (;;) {
@ -220,6 +230,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
return (struct DecodeJson){1, p};
case '"': // string
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
luaL_buffinit(L, &b);
for (;;) {
if (UNLIKELY(p >= e)) {