From f13cdc18b0d5d7eb9fa2bb16e1dcc6dc34b46b2f Mon Sep 17 00:00:00 2001 From: ahgamut <41098605+ahgamut@users.noreply.github.com> Date: Wed, 13 Jul 2022 16:05:29 +0530 Subject: [PATCH] throw parsing errors for missing commas and colons context will be set as zero if a comma is found AFTER_VALUE or a colon is found before OBJECT_VAL, so we add an UNLIKELY check before parsing each value to confirm that these two fields are set to zero in context. this change now means that ljson complies with all the tests from json.org, and only deviates from JSONTestSuite's expected behavior when it comes to parsing numbers. --- test/tool/net/jsonorg_fail_test.lua | 6 ++++++ test/tool/net/jsonorg_lenient_test.lua | 11 ---------- test/tool/net/jsontestsuite_fail1_test.lua | 11 ++++++++++ test/tool/net/jsontestsuite_lenient_test.lua | 22 ++------------------ tool/net/ljson.c | 11 ++++++++++ 5 files changed, 30 insertions(+), 31 deletions(-) delete mode 100644 test/tool/net/jsonorg_lenient_test.lua diff --git a/test/tool/net/jsonorg_fail_test.lua b/test/tool/net/jsonorg_fail_test.lua index 54f70708a..11527e683 100644 --- a/test/tool/net/jsonorg_fail_test.lua +++ b/test/tool/net/jsonorg_fail_test.lua @@ -177,3 +177,9 @@ break"] -- https://www.json.org/JSON_checker/test.zip -- JSON parsing sample test case: fail15.json assert(not DecodeJson(' ["Illegal backslash escape: \x15"] ')) + +-- https://www.json.org/JSON_checker/test.zip +-- JSON parsing sample test case: fail19.json +assert(not DecodeJson([[ +{"Missing colon" null} +]])) diff --git a/test/tool/net/jsonorg_lenient_test.lua b/test/tool/net/jsonorg_lenient_test.lua deleted file mode 100644 index 68513d2a9..000000000 --- a/test/tool/net/jsonorg_lenient_test.lua +++ /dev/null @@ -1,11 +0,0 @@ --- json.org says the following test cases should be --- considered as invalid JSON, but ljson.c is lenient. --- we run these tests anyway just to ensure that --- no segfaults occurs while parsing these cases - --- [jart] we deviate from json.org because we don't care about colons --- https://www.json.org/JSON_checker/test.zip --- JSON parsing sample test case: fail19.json -assert(DecodeJson([[ -{"Missing colon" null} -]])) diff --git a/test/tool/net/jsontestsuite_fail1_test.lua b/test/tool/net/jsontestsuite_fail1_test.lua index 15e5dd653..11ec6f968 100644 --- a/test/tool/net/jsontestsuite_fail1_test.lua +++ b/test/tool/net/jsontestsuite_fail1_test.lua @@ -300,3 +300,14 @@ assert(not DecodeJson(' [1,,2] ')) -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_comma_and_number.json assert(not DecodeJson(' [,1] ')) + +-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_inner_array_no_comma.json +-- (added spaces between [[ and ]] so lua doesn't get confused) +assert(not DecodeJson([[ +[ 3[ 4] ] ]])) + +-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_1_true_without_comma.json +assert(not DecodeJson(' [1 true] ')) + +-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_missing_semicolon.json +assert(not DecodeJson(' {"a" "b"} ')) diff --git a/test/tool/net/jsontestsuite_lenient_test.lua b/test/tool/net/jsontestsuite_lenient_test.lua index b85bfe426..9a9d90bc3 100644 --- a/test/tool/net/jsontestsuite_lenient_test.lua +++ b/test/tool/net/jsontestsuite_lenient_test.lua @@ -27,19 +27,12 @@ -- -- [jart] these tests deviate from the expectations of the upstream test --- suite. most of these failures are because we permit syntax --- like this since it saves bandwidth and makes the impl smaller. --- we're also more permissive about things like the encoding of --- double exponents and empty double fraction. -assert(EncodeLua(DecodeJson('[0 1 2 3 4]')) == '{0, 1, 2, 3, 4}') +-- suite. most of these failures are because we're more permissive +-- about the encoding of double exponents and empty double fraction. -- from fail1.lua -------------------------------------------------------------------------------- --- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_missing_semicolon.json -assert(DecodeJson(' {"a" "b"} ')) -assert(EncodeLua(DecodeJson(' {"a" "b"} ')) == '{a="b"}') - -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_number_real_without_fractional_part.json assert(DecodeJson(' [1.] ')) assert(EncodeLua(DecodeJson(' [1.] ')) == EncodeLua({1.0})) @@ -66,14 +59,3 @@ assert(EncodeLua(DecodeJson(' [-2.] ')) == '{-2.}') -- lool assert(not DecodeJson(' [--2.] ')) - --- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_inner_array_no_comma.json --- (added spaces between [[ and ]] so lua doesn't get confused) -assert(DecodeJson([[ -[ 3[ 4] ] ]])) -assert(EncodeLua(DecodeJson([[ -[ 3[ 4] ] ]])) == '{3, {4}}') - --- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_1_true_without_comma.json -assert(DecodeJson(' [1 true] ')) -assert(EncodeLua(DecodeJson(' [1 true] ')) == '{1, true}') diff --git a/tool/net/ljson.c b/tool/net/ljson.c index 46260312f..8a0f3992f 100644 --- a/tool/net/ljson.c +++ b/tool/net/ljson.c @@ -81,6 +81,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p, case 'n': // null if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey; + if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation; if (p + 3 <= e && READ32LE(p - 1) == READ32LE("null")) { lua_pushnil(L); return (struct DecodeJson){1, p + 3}; @@ -90,6 +91,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p, case 'f': // false if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey; + if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation; if (p + 4 <= e && READ32LE(p) == READ32LE("alse")) { lua_pushboolean(L, false); return (struct DecodeJson){1, p + 4}; @@ -99,6 +101,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p, case 't': // true if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey; + if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation; if (p + 3 <= e && READ32LE(p - 1) == READ32LE("true")) { lua_pushboolean(L, true); return (struct DecodeJson){1, p + 3}; @@ -108,9 +111,12 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p, BadObjectKey: return (struct DecodeJson){-1, "object key must be string"}; + MissingPunctuation: + return (struct DecodeJson){-1, "missing ',' or ':'"}; case '-': // negative if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey; + if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation; if (p < e && isdigit(*p)) { d = -1; break; @@ -120,6 +126,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p, case '0': // zero or number if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey; + if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation; if (p < e) { if ((*p == '.' || *p == 'e' || *p == 'E')) { goto UseDubble; @@ -132,6 +139,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p, case '1' ... '9': // integer if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey; + if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation; for (x = (c - '0') * d; p < e; ++p) { c = *p & 255; if (isdigit(c)) { @@ -155,6 +163,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p, case '[': // Array if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey; + if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation; lua_newtable(L); i = 0; r = Parse(L, p, e, ARRAY_SINGLE | ARRAY_END, depth - 1); @@ -193,6 +202,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p, case '{': // Object if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey; + if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation; lua_newtable(L); r = Parse(L, p, e, OBJECT_KEY | OBJECT_END, depth - 1); for (;;) { @@ -220,6 +230,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p, return (struct DecodeJson){1, p}; case '"': // string + if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation; luaL_buffinit(L, &b); for (;;) { if (UNLIKELY(p >= e)) {