throw parsing errors for missing commas and colons

context will be set as zero if a comma is found AFTER_VALUE or a colon
is found before OBJECT_VAL, so we add an UNLIKELY check before parsing
each value to confirm that these two fields are set to zero in context.

this change now means that ljson complies with all the tests from
json.org, and only deviates from JSONTestSuite's expected behavior when
it comes to parsing numbers.
This commit is contained in:
ahgamut 2022-07-13 16:05:29 +05:30
parent 12d9f7ade6
commit f13cdc18b0
5 changed files with 30 additions and 31 deletions

View file

@ -177,3 +177,9 @@ break"]
-- https://www.json.org/JSON_checker/test.zip
-- JSON parsing sample test case: fail15.json
assert(not DecodeJson(' ["Illegal backslash escape: \x15"] '))
-- https://www.json.org/JSON_checker/test.zip
-- JSON parsing sample test case: fail19.json
assert(not DecodeJson([[
{"Missing colon" null}
]]))

View file

@ -1,11 +0,0 @@
-- json.org says the following test cases should be
-- considered as invalid JSON, but ljson.c is lenient.
-- we run these tests anyway just to ensure that
-- no segfaults occurs while parsing these cases
-- [jart] we deviate from json.org because we don't care about colons
-- https://www.json.org/JSON_checker/test.zip
-- JSON parsing sample test case: fail19.json
assert(DecodeJson([[
{"Missing colon" null}
]]))

View file

@ -300,3 +300,14 @@ assert(not DecodeJson(' [1,,2] '))
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_comma_and_number.json
assert(not DecodeJson(' [,1] '))
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_inner_array_no_comma.json
-- (added spaces between [[ and ]] so lua doesn't get confused)
assert(not DecodeJson([[
[ 3[ 4] ] ]]))
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_1_true_without_comma.json
assert(not DecodeJson(' [1 true] '))
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_missing_semicolon.json
assert(not DecodeJson(' {"a" "b"} '))

View file

@ -27,19 +27,12 @@
--
-- [jart] these tests deviate from the expectations of the upstream test
-- suite. most of these failures are because we permit syntax
-- like this since it saves bandwidth and makes the impl smaller.
-- we're also more permissive about things like the encoding of
-- double exponents and empty double fraction.
assert(EncodeLua(DecodeJson('[0 1 2 3 4]')) == '{0, 1, 2, 3, 4}')
-- suite. most of these failures are because we're more permissive
-- about the encoding of double exponents and empty double fraction.
-- from fail1.lua
--------------------------------------------------------------------------------
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_missing_semicolon.json
assert(DecodeJson(' {"a" "b"} '))
assert(EncodeLua(DecodeJson(' {"a" "b"} ')) == '{a="b"}')
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_number_real_without_fractional_part.json
assert(DecodeJson(' [1.] '))
assert(EncodeLua(DecodeJson(' [1.] ')) == EncodeLua({1.0}))
@ -66,14 +59,3 @@ assert(EncodeLua(DecodeJson(' [-2.] ')) == '{-2.}')
-- lool
assert(not DecodeJson(' [--2.] '))
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_inner_array_no_comma.json
-- (added spaces between [[ and ]] so lua doesn't get confused)
assert(DecodeJson([[
[ 3[ 4] ] ]]))
assert(EncodeLua(DecodeJson([[
[ 3[ 4] ] ]])) == '{3, {4}}')
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_1_true_without_comma.json
assert(DecodeJson(' [1 true] '))
assert(EncodeLua(DecodeJson(' [1 true] ')) == '{1, true}')

View file

@ -81,6 +81,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
case 'n': // null
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
if (p + 3 <= e && READ32LE(p - 1) == READ32LE("null")) {
lua_pushnil(L);
return (struct DecodeJson){1, p + 3};
@ -90,6 +91,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
case 'f': // false
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
if (p + 4 <= e && READ32LE(p) == READ32LE("alse")) {
lua_pushboolean(L, false);
return (struct DecodeJson){1, p + 4};
@ -99,6 +101,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
case 't': // true
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
if (p + 3 <= e && READ32LE(p - 1) == READ32LE("true")) {
lua_pushboolean(L, true);
return (struct DecodeJson){1, p + 3};
@ -108,9 +111,12 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
BadObjectKey:
return (struct DecodeJson){-1, "object key must be string"};
MissingPunctuation:
return (struct DecodeJson){-1, "missing ',' or ':'"};
case '-': // negative
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
if (p < e && isdigit(*p)) {
d = -1;
break;
@ -120,6 +126,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
case '0': // zero or number
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
if (p < e) {
if ((*p == '.' || *p == 'e' || *p == 'E')) {
goto UseDubble;
@ -132,6 +139,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
case '1' ... '9': // integer
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
for (x = (c - '0') * d; p < e; ++p) {
c = *p & 255;
if (isdigit(c)) {
@ -155,6 +163,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
case '[': // Array
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
lua_newtable(L);
i = 0;
r = Parse(L, p, e, ARRAY_SINGLE | ARRAY_END, depth - 1);
@ -193,6 +202,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
case '{': // Object
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
lua_newtable(L);
r = Parse(L, p, e, OBJECT_KEY | OBJECT_END, depth - 1);
for (;;) {
@ -220,6 +230,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
return (struct DecodeJson){1, p};
case '"': // string
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
luaL_buffinit(L, &b);
for (;;) {
if (UNLIKELY(p >= e)) {