Make ljson more strict (#482)

This commit is contained in:
Gautham 2022-07-13 15:09:19 +05:30 committed by GitHub
parent 30cc2c8dc1
commit 12d9f7ade6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 116 additions and 131 deletions

View file

@ -130,3 +130,50 @@ assert(not DecodeJson([[
[ "line\
break"]
]]))
-- https://www.json.org/JSON_checker/test.zip
-- JSON parsing sample test case: fail4.json
assert(not DecodeJson([[
[ "extra comma",]
]]))
-- https://www.json.org/JSON_checker/test.zip
-- JSON parsing sample test case: fail5.json
assert(not DecodeJson([[
[ "double extra comma",,]
]]))
-- https://www.json.org/JSON_checker/test.zip
-- JSON parsing sample test case: fail6.json
assert(not DecodeJson([[
[ , "<-- missing value"]
]]))
-- https://www.json.org/JSON_checker/test.zip
-- JSON parsing sample test case: fail9.json
assert(not DecodeJson([[
{"Extra comma": true,}
]]))
-- https://www.json.org/JSON_checker/test.zip
-- JSON parsing sample test case: fail20.json
assert(not DecodeJson([[
{"Double colon":: null}
]]))
-- https://www.json.org/JSON_checker/test.zip
-- JSON parsing sample test case: fail25.json
assert(not DecodeJson([[
[ " tab character in string "]
]]))
-- https://www.json.org/JSON_checker/test.zip
-- JSON parsing sample test case: fail27.json
assert(not DecodeJson([[
[ "line
break"]
]]))
-- https://www.json.org/JSON_checker/test.zip
-- JSON parsing sample test case: fail15.json
assert(not DecodeJson(' ["Illegal backslash escape: \x15"] '))

View file

@ -3,62 +3,9 @@
-- we run these tests anyway just to ensure that
-- no segfaults occurs while parsing these cases
-- https://www.json.org/JSON_checker/test.zip
-- JSON parsing sample test case: fail4.json
assert(DecodeJson([[
[ "extra comma",]
]]))
-- [jart] we deviate from json.org because we don't care about commas
-- https://www.json.org/JSON_checker/test.zip
-- JSON parsing sample test case: fail5.json
assert(DecodeJson([[
[ "double extra comma",,]
]]))
-- [jart] we deviate from json.org because we don't care about commas
-- https://www.json.org/JSON_checker/test.zip
-- JSON parsing sample test case: fail6.json
assert(DecodeJson([[
[ , "<-- missing value"]
]]))
-- [jart] we deviate from json.org because we don't care about commas
-- https://www.json.org/JSON_checker/test.zip
-- JSON parsing sample test case: fail9.json
assert(DecodeJson([[
{"Extra comma": true,}
]]))
-- [jart] we deviate from json.org because we don't care about colons
-- https://www.json.org/JSON_checker/test.zip
-- JSON parsing sample test case: fail19.json
assert(DecodeJson([[
{"Missing colon" null}
]]))
-- [jart] we deviate from json.org because we don't care about colons
-- https://www.json.org/JSON_checker/test.zip
-- JSON parsing sample test case: fail20.json
assert(DecodeJson([[
{"Double colon":: null}
]]))
-- https://www.json.org/JSON_checker/test.zip
-- JSON parsing sample test case: fail25.json
assert(not DecodeJson([[
[ " tab character in string "]
]]))
-- https://www.json.org/JSON_checker/test.zip
-- JSON parsing sample test case: fail27.json
assert(not DecodeJson([[
[ "line
break"]
]]))
-- https://www.json.org/JSON_checker/test.zip
-- JSON parsing sample test case: fail15.json
assert(not DecodeJson([[
[ "Illegal backslash escape: \x15"]
]]))

View file

@ -269,3 +269,34 @@ assert(not DecodeJson(' [- 1] '))
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_just_minus.json
assert(not DecodeJson(' [-] '))
-- [jart] v8 permits the \xb9 but doesn't permit the trailing comma
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_lone_continuation_byte_in_key_and_trailing_comma.json
assert(not DecodeJson(" {\"\xb9\":\"0\",} "))
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_double_colon.json
assert(not DecodeJson(' {"x"::"b"} '))
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_number_and_several_commas.json
assert(not DecodeJson(' [1,,] '))
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_number_and_comma.json
assert(not DecodeJson(' [1,] '))
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_missing_value.json
assert(not DecodeJson(' [ , ""] '))
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_just_comma.json
assert(not DecodeJson(' [,] '))
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_extra_comma.json
assert(not DecodeJson(' ["",] '))
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_double_extra_comma.json
assert(not DecodeJson(' ["x",,] '))
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_double_comma.json
assert(not DecodeJson(' [1,,2] '))
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_comma_and_number.json
assert(not DecodeJson(' [,1] '))

View file

@ -177,3 +177,12 @@ assert(not DecodeJson(" [\"\t\"] "))
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_string_unescaped_newline.json
assert(not DecodeJson(" [\"new\nline\"] "))
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_two_commas_in_a_row.json
assert(not DecodeJson(' {"a":"b",,"c":"d"} '))
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_trailing_comma.json
assert(not DecodeJson(' {"id":0,} '))
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_several_trailing_commas.json
assert(not DecodeJson(' {"id":0,,,,,} '))

View file

@ -33,38 +33,13 @@
-- double exponents and empty double fraction.
assert(EncodeLua(DecodeJson('[0 1 2 3 4]')) == '{0, 1, 2, 3, 4}')
-- from fail4.lua
--------------------------------------------------------------------------------
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_two_commas_in_a_row.json
assert(DecodeJson(' {"a":"b",,"c":"d"} '))
assert(EncodeLua(DecodeJson(' {"a":"b",,"c":"d"} ')) == '{a="b", c="d"}')
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_trailing_comma.json
assert(DecodeJson(' {"id":0,} '))
assert(EncodeLua(DecodeJson(' {"id":0,} ')) == '{id=0}')
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_several_trailing_commas.json
assert(DecodeJson(' {"id":0,,,,,} '))
assert(EncodeLua(DecodeJson(' {"id":0,,,,,} ')) == '{id=0}')
-- from fail1.lua
--------------------------------------------------------------------------------
-- [jart] v8 permits the \xb9 but doesn't permit the trailing comma
-- therefore this succeeds beacuse we don't care about comma
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_lone_continuation_byte_in_key_and_trailing_comma.json
assert(DecodeJson(" {\"\xb9\":\"0\",} "))
assert(EncodeLua(DecodeJson(" {\"\xb9\":\"0\",} ")) == '{["\\xb9"]="0"}')
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_missing_semicolon.json
assert(DecodeJson(' {"a" "b"} '))
assert(EncodeLua(DecodeJson(' {"a" "b"} ')) == '{a="b"}')
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_double_colon.json
assert(DecodeJson(' {"x"::"b"} '))
assert(EncodeLua(DecodeJson(' {"x"::"b"} ')) == '{x="b"}')
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_number_real_without_fractional_part.json
assert(DecodeJson(' [1.] '))
assert(EncodeLua(DecodeJson(' [1.] ')) == EncodeLua({1.0}))
@ -92,22 +67,6 @@ assert(EncodeLua(DecodeJson(' [-2.] ')) == '{-2.}')
-- lool
assert(not DecodeJson(' [--2.] '))
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_number_and_several_commas.json
assert(DecodeJson(' [1,,] '))
assert(EncodeLua(DecodeJson(' [1,,] ')) == '{1}')
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_number_and_comma.json
assert(DecodeJson(' [1,] '))
assert(EncodeLua(DecodeJson(' [1,] ')) == '{1}')
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_missing_value.json
assert(DecodeJson(' [ , ""] '))
assert(EncodeLua(DecodeJson(' [ , ""] ')) == '{""}')
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_just_comma.json
assert(DecodeJson(' [,] '))
assert(EncodeLua(DecodeJson(' [,] ')) == EncodeLua(DecodeJson(' [] ')))
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_inner_array_no_comma.json
-- (added spaces between [[ and ]] so lua doesn't get confused)
assert(DecodeJson([[
@ -115,22 +74,6 @@ assert(DecodeJson([[
assert(EncodeLua(DecodeJson([[
[ 3[ 4] ] ]])) == '{3, {4}}')
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_extra_comma.json
assert(DecodeJson(' ["",] '))
assert(EncodeLua(DecodeJson(' ["",] ')) == '{""}')
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_double_extra_comma.json
assert(DecodeJson(' ["x",,] '))
assert(EncodeLua(DecodeJson(' ["x",,] ')) == '{"x"}')
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_double_comma.json
assert(DecodeJson(' [1,,2] '))
assert(EncodeLua(DecodeJson(' [1,,2] ')) == '{1, 2}')
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_comma_and_number.json
assert(DecodeJson(' [,1] '))
assert(EncodeLua(DecodeJson(' [,1] ')) == '{1}')
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_1_true_without_comma.json
assert(DecodeJson(' [1 true] '))
assert(EncodeLua(DecodeJson(' [1 true] ')) == '{1, true}')

View file

@ -31,10 +31,14 @@
#include "third_party/lua/lua.h"
#include "tool/net/ljson.h"
#define TOP_LEVEL 0
#define ARRAY_VAL 1
#define OBJECT_KEY 2
#define OBJECT_VAL 3
#define AFTER_VALUE 0x01u
#define ARRAY_SINGLE 0x02u
#define ARRAY_END 0x04u
#define OBJECT_KEY 0x10u
#define OBJECT_VAL 0x20u
#define OBJECT_END 0x40u
#define TOP_LEVEL 64
static struct DecodeJson Parse(struct lua_State *L, const char *p,
const char *e, int context, int depth) {
@ -58,7 +62,8 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
break;
case ',': // present in list and object
if (context == ARRAY_VAL || context == OBJECT_KEY) {
if (0 != (context & AFTER_VALUE)) {
context = 0;
a = p;
break;
} else {
@ -66,7 +71,8 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
}
case ':': // present only in object after key
if (context == OBJECT_VAL) {
if (0 != (context & OBJECT_VAL)) {
context = 0;
a = p;
break;
} else {
@ -74,7 +80,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
}
case 'n': // null
if (UNLIKELY(context == OBJECT_KEY)) goto BadObjectKey;
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
if (p + 3 <= e && READ32LE(p - 1) == READ32LE("null")) {
lua_pushnil(L);
return (struct DecodeJson){1, p + 3};
@ -83,7 +89,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
}
case 'f': // false
if (UNLIKELY(context == OBJECT_KEY)) goto BadObjectKey;
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
if (p + 4 <= e && READ32LE(p) == READ32LE("alse")) {
lua_pushboolean(L, false);
return (struct DecodeJson){1, p + 4};
@ -92,7 +98,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
}
case 't': // true
if (UNLIKELY(context == OBJECT_KEY)) goto BadObjectKey;
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
if (p + 3 <= e && READ32LE(p - 1) == READ32LE("true")) {
lua_pushboolean(L, true);
return (struct DecodeJson){1, p + 3};
@ -104,7 +110,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
return (struct DecodeJson){-1, "object key must be string"};
case '-': // negative
if (UNLIKELY(context == OBJECT_KEY)) goto BadObjectKey;
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
if (p < e && isdigit(*p)) {
d = -1;
break;
@ -113,7 +119,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
}
case '0': // zero or number
if (UNLIKELY(context == OBJECT_KEY)) goto BadObjectKey;
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
if (p < e) {
if ((*p == '.' || *p == 'e' || *p == 'E')) {
goto UseDubble;
@ -125,7 +131,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
return (struct DecodeJson){1, p};
case '1' ... '9': // integer
if (UNLIKELY(context == OBJECT_KEY)) goto BadObjectKey;
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
for (x = (c - '0') * d; p < e; ++p) {
c = *p & 255;
if (isdigit(c)) {
@ -148,11 +154,11 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
return (struct DecodeJson){1, a + c};
case '[': // Array
if (UNLIKELY(context == OBJECT_KEY)) goto BadObjectKey;
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
lua_newtable(L);
i = 0;
r = Parse(L, p, e, ARRAY_SINGLE | ARRAY_END, depth - 1);
for (;;) {
r = Parse(L, p, e, ARRAY_VAL, depth - 1);
if (UNLIKELY(r.rc == -1)) {
lua_pop(L, 1);
return r;
@ -162,6 +168,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
break;
}
lua_rawseti(L, -2, i++ + 1);
r = Parse(L, p, e, AFTER_VALUE | ARRAY_END, depth - 1);
}
if (!i) {
// we need this kludge so `[]` won't round-trip as `{}`
@ -171,24 +178,24 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
return (struct DecodeJson){1, p};
case ']':
if (context == ARRAY_VAL) {
if (0 != (context & ARRAY_END)) {
return (struct DecodeJson){0, p};
} else {
return (struct DecodeJson){-1, "unexpected ']'"};
}
case '}':
if (context == OBJECT_KEY) {
if (0 != (context & OBJECT_END)) {
return (struct DecodeJson){0, p};
} else {
return (struct DecodeJson){-1, "unexpected '}'"};
}
case '{': // Object
if (UNLIKELY(context == OBJECT_KEY)) goto BadObjectKey;
if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
lua_newtable(L);
r = Parse(L, p, e, OBJECT_KEY | OBJECT_END, depth - 1);
for (;;) {
r = Parse(L, p, e, OBJECT_KEY, depth - 1);
if (r.rc == -1) {
lua_pop(L, 1);
return r;
@ -208,6 +215,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
}
p = r.p;
lua_settable(L, -3);
r = Parse(L, p, e, OBJECT_KEY | AFTER_VALUE | OBJECT_END, depth - 1);
}
return (struct DecodeJson){1, p};
@ -355,7 +363,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
return (struct DecodeJson){-1, "illegal character"};
}
}
if (UNLIKELY(context == TOP_LEVEL)) {
if (UNLIKELY(depth == TOP_LEVEL)) {
return (struct DecodeJson){0, 0};
} else {
return (struct DecodeJson){-1, "unexpected eof"};
@ -386,10 +394,10 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
* @return r.p is string describing error if `rc < 0`
*/
struct DecodeJson DecodeJson(struct lua_State *L, const char *p, size_t n) {
int depth = 64;
int depth = TOP_LEVEL;
if (n == -1) n = p ? strlen(p) : 0;
if (lua_checkstack(L, depth * 4)) {
return Parse(L, p, p + n, TOP_LEVEL, depth);
return Parse(L, p, p + n, 0, depth);
} else {
return (struct DecodeJson){-1, "can't set stack depth"};
}