throw parsing errors for missing commas and colons

context will be set as zero if a comma is found AFTER_VALUE or a colon
is found before OBJECT_VAL, so we add an UNLIKELY check before parsing
each value to confirm that these two fields are set to zero in context.

this change now means that ljson complies with all the tests from
json.org, and only deviates from JSONTestSuite's expected behavior when
it comes to parsing numbers.
This commit is contained in:
ahgamut 2022-07-13 16:05:29 +05:30
parent 12d9f7ade6
commit f13cdc18b0
5 changed files with 30 additions and 31 deletions

View file

@ -177,3 +177,9 @@ break"]
-- https://www.json.org/JSON_checker/test.zip -- https://www.json.org/JSON_checker/test.zip
-- JSON parsing sample test case: fail15.json -- JSON parsing sample test case: fail15.json
assert(not DecodeJson(' ["Illegal backslash escape: \x15"] ')) assert(not DecodeJson(' ["Illegal backslash escape: \x15"] '))
-- https://www.json.org/JSON_checker/test.zip
-- JSON parsing sample test case: fail19.json
assert(not DecodeJson([[
{"Missing colon" null}
]]))

View file

@ -1,11 +0,0 @@
-- json.org says the following test cases should be
-- considered as invalid JSON, but ljson.c is lenient.
-- we run these tests anyway just to ensure that
-- no segfaults occurs while parsing these cases
-- [jart] we deviate from json.org because we don't care about colons
-- https://www.json.org/JSON_checker/test.zip
-- JSON parsing sample test case: fail19.json
assert(DecodeJson([[
{"Missing colon" null}
]]))

View file

@ -300,3 +300,14 @@ assert(not DecodeJson(' [1,,2] '))
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_comma_and_number.json -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_comma_and_number.json
assert(not DecodeJson(' [,1] ')) assert(not DecodeJson(' [,1] '))
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_inner_array_no_comma.json
-- (added spaces between [[ and ]] so lua doesn't get confused)
assert(not DecodeJson([[
[ 3[ 4] ] ]]))
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_1_true_without_comma.json
assert(not DecodeJson(' [1 true] '))
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_missing_semicolon.json
assert(not DecodeJson(' {"a" "b"} '))

View file

@ -27,19 +27,12 @@
-- --
-- [jart] these tests deviate from the expectations of the upstream test -- [jart] these tests deviate from the expectations of the upstream test
-- suite. most of these failures are because we permit syntax -- suite. most of these failures are because we're more permissive
-- like this since it saves bandwidth and makes the impl smaller. -- about the encoding of double exponents and empty double fraction.
-- we're also more permissive about things like the encoding of
-- double exponents and empty double fraction.
assert(EncodeLua(DecodeJson('[0 1 2 3 4]')) == '{0, 1, 2, 3, 4}')
-- from fail1.lua -- from fail1.lua
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_missing_semicolon.json
assert(DecodeJson(' {"a" "b"} '))
assert(EncodeLua(DecodeJson(' {"a" "b"} ')) == '{a="b"}')
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_number_real_without_fractional_part.json -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_number_real_without_fractional_part.json
assert(DecodeJson(' [1.] ')) assert(DecodeJson(' [1.] '))
assert(EncodeLua(DecodeJson(' [1.] ')) == EncodeLua({1.0})) assert(EncodeLua(DecodeJson(' [1.] ')) == EncodeLua({1.0}))
@ -66,14 +59,3 @@ assert(EncodeLua(DecodeJson(' [-2.] ')) == '{-2.}')
-- lool -- lool
assert(not DecodeJson(' [--2.] ')) assert(not DecodeJson(' [--2.] '))
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_inner_array_no_comma.json
-- (added spaces between [[ and ]] so lua doesn't get confused)
assert(DecodeJson([[
[ 3[ 4] ] ]]))
assert(EncodeLua(DecodeJson([[
[ 3[ 4] ] ]])) == '{3, {4}}')
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_1_true_without_comma.json
assert(DecodeJson(' [1 true] '))
assert(EncodeLua(DecodeJson(' [1 true] ')) == '{1, true}')

View file

@ -81,6 +81,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
case 'n': // null case 'n': // null
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey; if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
if (p + 3 <= e && READ32LE(p - 1) == READ32LE("null")) { if (p + 3 <= e && READ32LE(p - 1) == READ32LE("null")) {
lua_pushnil(L); lua_pushnil(L);
return (struct DecodeJson){1, p + 3}; return (struct DecodeJson){1, p + 3};
@ -90,6 +91,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
case 'f': // false case 'f': // false
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey; if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
if (p + 4 <= e && READ32LE(p) == READ32LE("alse")) { if (p + 4 <= e && READ32LE(p) == READ32LE("alse")) {
lua_pushboolean(L, false); lua_pushboolean(L, false);
return (struct DecodeJson){1, p + 4}; return (struct DecodeJson){1, p + 4};
@ -99,6 +101,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
case 't': // true case 't': // true
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey; if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
if (p + 3 <= e && READ32LE(p - 1) == READ32LE("true")) { if (p + 3 <= e && READ32LE(p - 1) == READ32LE("true")) {
lua_pushboolean(L, true); lua_pushboolean(L, true);
return (struct DecodeJson){1, p + 3}; return (struct DecodeJson){1, p + 3};
@ -108,9 +111,12 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
BadObjectKey: BadObjectKey:
return (struct DecodeJson){-1, "object key must be string"}; return (struct DecodeJson){-1, "object key must be string"};
MissingPunctuation:
return (struct DecodeJson){-1, "missing ',' or ':'"};
case '-': // negative case '-': // negative
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey; if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
if (p < e && isdigit(*p)) { if (p < e && isdigit(*p)) {
d = -1; d = -1;
break; break;
@ -120,6 +126,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
case '0': // zero or number case '0': // zero or number
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey; if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
if (p < e) { if (p < e) {
if ((*p == '.' || *p == 'e' || *p == 'E')) { if ((*p == '.' || *p == 'e' || *p == 'E')) {
goto UseDubble; goto UseDubble;
@ -132,6 +139,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
case '1' ... '9': // integer case '1' ... '9': // integer
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey; if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
for (x = (c - '0') * d; p < e; ++p) { for (x = (c - '0') * d; p < e; ++p) {
c = *p & 255; c = *p & 255;
if (isdigit(c)) { if (isdigit(c)) {
@ -155,6 +163,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
case '[': // Array case '[': // Array
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey; if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
lua_newtable(L); lua_newtable(L);
i = 0; i = 0;
r = Parse(L, p, e, ARRAY_SINGLE | ARRAY_END, depth - 1); r = Parse(L, p, e, ARRAY_SINGLE | ARRAY_END, depth - 1);
@ -193,6 +202,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
case '{': // Object case '{': // Object
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey; if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
lua_newtable(L); lua_newtable(L);
r = Parse(L, p, e, OBJECT_KEY | OBJECT_END, depth - 1); r = Parse(L, p, e, OBJECT_KEY | OBJECT_END, depth - 1);
for (;;) { for (;;) {
@ -220,6 +230,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
return (struct DecodeJson){1, p}; return (struct DecodeJson){1, p};
case '"': // string case '"': // string
if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
luaL_buffinit(L, &b); luaL_buffinit(L, &b);
for (;;) { for (;;) {
if (UNLIKELY(p >= e)) { if (UNLIKELY(p >= e)) {