diff --git a/net/http/visualizecontrolcodes.c b/net/http/visualizecontrolcodes.c index 3ba14fdac..084abca2b 100644 --- a/net/http/visualizecontrolcodes.c +++ b/net/http/visualizecontrolcodes.c @@ -23,7 +23,7 @@ #include "net/http/escape.h" /** - * Filters out control codes from string. + * Makes control codes and trojan source plainly viewable. * * This is useful for logging data like HTTP messages, where we don't * want full blown C string literal escaping, but we don't want things @@ -67,6 +67,32 @@ char *VisualizeControlCodes(const char *data, size_t size, size_t *out_size) { } } } + + // remap trojan source characters + if (x == 0x2028) { + x = L'↵'; // line separator + } else if (x == 0x2029) { + x = L'¶'; // paragraph separator + } else if (x == 0x202A) { + x = L'⟫'; // left-to-right embedding + } else if (x == 0x202B) { + x = L'⟪'; // right-to-left embedding + } else if (x == 0x202D) { + x = L'❯'; // left-to-right override + } else if (x == 0x202E) { + x = L'❮'; // right-to-left override + } else if (x == 0x2066) { + x = L'⟩'; // left-to-right isolate + } else if (x == 0x2067) { + x = L'⟨'; // right-to-left isolate + } else if (x == 0x2068) { + x = L'⧽'; // first strong isolate + } else if (x == 0x202C) { + x = L'⇮'; // pop directional formatting + } else if (x == 0x2069) { + x = L'⇯'; // pop directional isolate + } + if (0x80 <= x && x < 0xA0) { q[0] = '\\'; q[1] = 'u'; diff --git a/test/tool/net/lfuncs_test.lua b/test/tool/net/lfuncs_test.lua index d4c4154b0..e0a642cbf 100644 --- a/test/tool/net/lfuncs_test.lua +++ b/test/tool/net/lfuncs_test.lua @@ -85,7 +85,6 @@ assert(err == "won't serialize cyclic lua table") -- pass the parser to itself lool json, err = EncodeJson(EncodeJson) assert(not json) -print(err) assert(err == "unsupported lua type") -- EncodeJson() sorts table entries @@ -145,6 +144,9 @@ assert(ParseHttpDateTime("Fri, 08 Jul 2022 16:17:43 GMT") == 1657297063) assert(FormatHttpDateTime(1657297063) == "Fri, 08 Jul 2022 16:17:43 GMT") assert(VisualizeControlCodes("hello\x00") == "hello␀") +assert(VisualizeControlCodes("\xe2\x80\xa8") == "↵") -- line separator +assert(VisualizeControlCodes("\xe2\x80\xaa") == "⟫") -- left-to-right embedding +assert(VisualizeControlCodes("\xe2\x80\xab") == "⟪") -- right-to-left embedding assert(math.floor(10 * MeasureEntropy(" ") + .5) == 0) assert(math.floor(10 * MeasureEntropy("abcabcabcabc") + .5) == 16) diff --git a/test/tool/net/ljson_test.lua b/test/tool/net/ljson_test.lua index 4641c1cf9..f270c95dc 100644 --- a/test/tool/net/ljson_test.lua +++ b/test/tool/net/ljson_test.lua @@ -17,7 +17,6 @@ assert(EncodeLua(assert(DecodeJson[[ 0 ]])) == '0' ) assert(EncodeLua(assert(DecodeJson[[ [1] ]])) == '{1}') assert(EncodeLua(assert(DecodeJson[[ 2.3 ]])) == '2.3') assert(EncodeLua(assert(DecodeJson[[ [1,3,2] ]])) == '{1, 3, 2}') --- assert(EncodeLua(assert(DecodeJson[[ {1: 2, 3: 4} ]])) == '{[1]=2, [3]=4}') assert(EncodeLua(assert(DecodeJson[[ {"foo": 2, "bar": 4} ]])) == '{bar=4, foo=2}') assert(EncodeLua(assert(DecodeJson[[ -123 ]])) == '-123') assert(EncodeLua(assert(DecodeJson[[ 1e6 ]])) == '1000000.') @@ -26,7 +25,6 @@ assert(EncodeLua(assert(DecodeJson[[ 1e-06 ]])) == '0.000001') assert(EncodeLua(assert(DecodeJson[[ 9.123e6 ]])) == '9123000.') assert(EncodeLua(assert(DecodeJson[[ [{"heh": [1,3,2]}] ]])) == '{{heh={1, 3, 2}}}') assert(EncodeLua(assert(DecodeJson[[ 3.14159 ]])) == '3.14159') --- assert(EncodeLua(assert(DecodeJson[[ {3=4} ]])) == '{[3]=4}') assert(EncodeLua(assert(DecodeJson[[ 1e-12 ]])) == '1e-12') assert(EncodeJson(assert(DecodeJson[[ 1e-12 ]])) == '1e-12') @@ -52,6 +50,14 @@ assert(EncodeJson(assert(DecodeJson[[ 9223372036854775807.0 ]])) == '9223372036 assert(EncodeJson(assert(DecodeJson[[ 2.7182818284590452354 ]])) == '2.718281828459045') -- euler constant w/ 17 digit precision assert( EncodeLua(assert(DecodeJson[[ 2.7182818284590452354 ]])) == '2.718281828459045') -- euler constant w/ 17 digit precision +res, err = DecodeJson[[ null ]] +assert(res == nil) +assert(err == nil) + +res, err = DecodeJson[[ false ]] +assert(res == false) +assert(err == nil) + res, err = DecodeJson[[ ]] assert(not res) assert(err == 'unexpected eof') @@ -60,14 +66,6 @@ res, err = DecodeJson[[ {} {} ]] assert(not res) assert(err == "junk after expression") -res, err = DecodeJson[[ null ]] -assert(not res) -assert(err == "toplevel json can't be null") - -res, err = DecodeJson[[ false ]] -assert(not res) -assert(err == "toplevel json can't be false") - res, err = DecodeJson[[ {3:4} ]] assert(not res) assert(err == "object key must be string") @@ -96,6 +94,26 @@ res, err = DecodeJson[[ {true:3} ]] assert(not res) assert(err == "object key must be string") +res, err = DecodeJson('"\x00"') +assert(res == nil) +assert(err == 'non-del c0 in string') + +res, err = DecodeJson('"e') +assert(res == nil) +assert(err == 'unexpected eof in string') + +res, err = DecodeJson('"\\xcc\\xa9"') +assert(res == nil) +assert(err == 'hex escape not printable') + +res, err = DecodeJson('"\\xcj"') +assert(res == nil) +assert(err == 'invalid hex escape') + +res, err = DecodeJson('"\\ucjcc"') +assert(res == nil) +assert(err == 'invalid unicode escape') + res, err = DecodeJson('[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[') assert(not res) assert(err == "maximum depth exceeded") diff --git a/test/tool/net/samples/JSONTestSuite-fail1.lua b/test/tool/net/samples/JSONTestSuite-fail1.lua index 4bd4ae868..9a4dc9395 100644 --- a/test/tool/net/samples/JSONTestSuite-fail1.lua +++ b/test/tool/net/samples/JSONTestSuite-fail1.lua @@ -236,8 +236,7 @@ assert(not DecodeJson(' ["x" ')) assert(not DecodeJson(' ["": 1] ')) -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_a_invalid_utf8.json - -- (converted to binary for safety) -assert(not DecodeJson(' \x5b\x61\xe5\x5d ')) +assert(not DecodeJson(" [a\xe5] ")) -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_comma_instead_of_colon.json assert(not DecodeJson(' {"x", null} ')) @@ -252,3 +251,21 @@ assert(not DecodeJson(' 123\x00 ')) -- (added spaces between [[ and ]] so lua doesn't get confused) assert(not DecodeJson([[ [ "x"] ] ]])) + +-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_number_with_leading_zero.json +assert(not DecodeJson(' [012] ')) + +-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_number_neg_int_starting_with_zero.json +assert(not DecodeJson(' [-012] ')) + +-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_number_1_000.json +assert(not DecodeJson(' [1 000.0] ')) + +-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_number_-01.json +assert(not DecodeJson(' [-01] ')) + +-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_number_minus_space_1.json +assert(not DecodeJson(' [- 1] ')) + +-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_just_minus.json +assert(not DecodeJson(' [-] ')) diff --git a/test/tool/net/samples/JSONTestSuite-fail2.lua b/test/tool/net/samples/JSONTestSuite-fail2.lua index 65fd22459..f2d76d765 100644 --- a/test/tool/net/samples/JSONTestSuite-fail2.lua +++ b/test/tool/net/samples/JSONTestSuite-fail2.lua @@ -26,7 +26,6 @@ -- SOFTWARE. -- - -- these test cases are prefixed with n_ -- ljson should reject all of them as invalid diff --git a/test/tool/net/samples/JSONTestSuite-fail3.lua b/test/tool/net/samples/JSONTestSuite-fail3.lua index 48c29768b..4e964ede7 100644 --- a/test/tool/net/samples/JSONTestSuite-fail3.lua +++ b/test/tool/net/samples/JSONTestSuite-fail3.lua @@ -1,4 +1,3 @@ - -- -- Nicolas Seriot's JSONTestSuite -- https://github.com/nst/JSONTestSuite @@ -27,7 +26,6 @@ -- SOFTWARE. -- - -- these test cases are prefixed with n_ -- ljson should reject all of them as invalid diff --git a/test/tool/net/samples/JSONTestSuite-fail4.lua b/test/tool/net/samples/JSONTestSuite-fail4.lua index a8b57a64e..b2c4a8206 100644 --- a/test/tool/net/samples/JSONTestSuite-fail4.lua +++ b/test/tool/net/samples/JSONTestSuite-fail4.lua @@ -1,4 +1,3 @@ - -- -- Nicolas Seriot's JSONTestSuite -- https://github.com/nst/JSONTestSuite @@ -31,48 +30,40 @@ -- ljson should reject all of them as invalid -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_string_start_escape_unclosed.json --- (converted to binary for safety) -assert(not DecodeJson(' \x5b\x22\x5c ')) +assert(not DecodeJson(" [\"\\ ")) -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_string_single_string_no_double_quotes.json --- (converted to binary for safety) -assert(not DecodeJson(' \x61\x62\x63 ')) +assert(not DecodeJson(" abc ")) -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_string_single_quote.json --- (converted to binary for safety) -assert(not DecodeJson(' \x5b\x27\x73\x69\x6e\x67\x6c\x65\x20\x71\x75\x6f\x74\x65\x27\x5d ')) +assert(not DecodeJson(" [\'single quote\'] ")) +-- disallow escape code outside string -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_string_no_quotes_with_bad_escape.json --- (converted to binary for safety) -assert(not DecodeJson(' \x5b\x5c\x6e\x5d ')) +assert(not DecodeJson(" [\\n] ")) +-- disallow unicode escape outside string -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_string_leading_uescaped_thinspace.json --- (converted to binary for safety) -assert(not DecodeJson(' \x5b\x5c\x75\x30\x30\x32\x30\x22\x61\x73\x64\x22\x5d ')) +assert(not DecodeJson(" [\\u0020\"asd\"] ")) -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_string_incomplete_surrogate_escape_invalid.json --- (converted to binary for safety) -assert(not DecodeJson(' \x5b\x22\x5c\x75\x44\x38\x30\x30\x5c\x75\x44\x38\x30\x30\x5c\x78\x22\x5d ')) +assert(not DecodeJson(" [\"\\uD800\\uD800\\x\"] ")) -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_string_incomplete_escape.json --- (converted to binary for safety) -assert(not DecodeJson(' \x5b\x22\x5c\x22\x5d ')) +assert(not DecodeJson(" [\"\\\"] ")) -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_string_escaped_backslash_bad.json --- (converted to binary for safety) -assert(not DecodeJson(' \x5b\x22\x5c\x5c\x5c\x22\x5d ')) +assert(not DecodeJson(" [\"\\\\\\\"] ")) -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_string_accentuated_char_no_quotes.json --- (converted to binary for safety) -assert(not DecodeJson(' \x5b\xc3\xa9\x5d ')) +assert(not DecodeJson(" [é] ")) +assert(DecodeJson(" [\"é\"] ")) -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_string_1_surrogate_then_escape.json --- (converted to binary for safety) -assert(not DecodeJson(' \x5b\x22\x5c\x75\x44\x38\x30\x30\x5c\x22\x5d ')) +assert(not DecodeJson(" [\"\\uD800\\\"] ")) -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_with_single_string.json --- (converted to binary for safety) -assert(not DecodeJson(' \x7b\x20\x22\x66\x6f\x6f\x22\x20\x3a\x20\x22\x62\x61\x72\x22\x2c\x20\x22\x61\x22\x20\x7d ')) +assert(not DecodeJson(" { \"foo\" : \"bar\", \"a\" } ")) -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_unterminated-value.json assert(not DecodeJson(' {"a":"a ')) @@ -174,3 +165,15 @@ assert(not DecodeJson(' {"a":"b"}/**// ')) -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_trailing_comment.json assert(not DecodeJson(' {"a":"b"}/**/ ')) + +-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_string_unescaped_ctrl_char.json +assert(not DecodeJson(" [\"a\x00a\"] ")) + +-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_string_escape_x.json +assert(not DecodeJson(" [\"a\\x00a\"] ")) + +-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_string_unescaped_tab.json +assert(not DecodeJson(" [\"\t\"] ")) + +-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_string_unescaped_newline.json +assert(not DecodeJson(" [\"new\nline\"] ")) diff --git a/test/tool/net/samples/JSONTestSuite-lenient.lua b/test/tool/net/samples/JSONTestSuite-lenient.lua index 64053053b..97c3f8ce4 100644 --- a/test/tool/net/samples/JSONTestSuite-lenient.lua +++ b/test/tool/net/samples/JSONTestSuite-lenient.lua @@ -26,111 +26,111 @@ -- SOFTWARE. -- +-- [jart] these tests deviate from the expectations of the upstream test +-- suite. most of these failures are because we permit syntax +-- like this since it saves bandwidth and makes the impl smaller. +-- we're also more permissive about things like the encoding of +-- double exponents and empty double fraction. +assert(EncodeLua(DecodeJson('[0 1 2 3 4]')) == '{0, 1, 2, 3, 4}') + -- from fail4.lua - --- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_string_escape_x.json --- (converted to binary for safety) -assert(DecodeJson(" [\"\\x00\"] ")) - --- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_string_unescaped_tab.json --- (converted to binary for safety) -assert(DecodeJson([[ [" "] ]])) - --- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_string_unescaped_newline.json --- (converted to binary for safety) -assert(DecodeJson(" [\"new\nline\"] ")) - --- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_string_unescaped_ctrl_char.json --- (converted to binary for safety) -assert(assert(EncodeLua(assert(DecodeJson(" [\"a\x00a\"] ")))) == assert(EncodeLua({"a\x00a"}))) +-------------------------------------------------------------------------------- -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_two_commas_in_a_row.json -assert(assert(EncodeLua(assert(DecodeJson(' {"a":"b",,"c":"d"} ')))) == assert(EncodeLua({a="b", c="d"}))) assert(DecodeJson(' {"a":"b",,"c":"d"} ')) +assert(EncodeLua(DecodeJson(' {"a":"b",,"c":"d"} ')) == '{a="b", c="d"}') -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_trailing_comma.json assert(DecodeJson(' {"id":0,} ')) +assert(EncodeLua(DecodeJson(' {"id":0,} ')) == '{id=0}') -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_several_trailing_commas.json assert(DecodeJson(' {"id":0,,,,,} ')) +assert(EncodeLua(DecodeJson(' {"id":0,,,,,} ')) == '{id=0}') -- from fail1.lua +-------------------------------------------------------------------------------- +-- [jart] v8 permits the \xb9 but doesn't permit the trailing comma +-- therefore this succeeds beacuse we don't care about comma -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_lone_continuation_byte_in_key_and_trailing_comma.json - -- (converted to binary for safety) assert(DecodeJson(" {\"\xb9\":\"0\",} ")) +assert(EncodeLua(DecodeJson(" {\"\xb9\":\"0\",} ")) == '{["\\xb9"]="0"}') -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_missing_semicolon.json assert(DecodeJson(' {"a" "b"} ')) +assert(EncodeLua(DecodeJson(' {"a" "b"} ')) == '{a="b"}') -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_double_colon.json assert(DecodeJson(' {"x"::"b"} ')) - --- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_number_with_leading_zero.json -assert(DecodeJson(' [012] ')) +assert(EncodeLua(DecodeJson(' {"x"::"b"} ')) == '{x="b"}') -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_number_real_without_fractional_part.json assert(DecodeJson(' [1.] ')) - --- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_number_neg_int_starting_with_zero.json -assert(DecodeJson(' [-012] ')) - --- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_number_minus_space_1.json -assert(DecodeJson(' [- 1] ')) +assert(EncodeLua(DecodeJson(' [1.] ')) == EncodeLua({1.0})) -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_number_2.e3.json assert(DecodeJson(' [2.e3] ')) +assert(EncodeLua(DecodeJson(' [2.e3] ')) == '{2000.}') -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_number_2.e-3.json assert(DecodeJson(' [2.e-3] ')) +assert(EncodeLua(DecodeJson(' [2.e-3] ')) == '{0.002}') -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_number_2.e+3.json assert(DecodeJson(' [2.e+3] ')) - --- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_number_1_000.json -assert(DecodeJson(' [1 000.0] ')) +assert(EncodeLua(DecodeJson(' [2.e+3] ')) == '{2000.}') -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_number_0.e1.json assert(DecodeJson(' [0.e1] ')) +assert(EncodeLua(DecodeJson(' [0.e1] ')) == '{0.}') -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_number_-2..json assert(DecodeJson(' [-2.] ')) +assert(EncodeLua(DecodeJson(' [-2.] ')) == '{-2.}') --- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_number_-01.json -assert(DecodeJson(' [-01] ')) +-- lool +assert(not DecodeJson(' [--2.] ')) -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_number_and_several_commas.json assert(DecodeJson(' [1,,] ')) +assert(EncodeLua(DecodeJson(' [1,,] ')) == '{1}') -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_number_and_comma.json assert(DecodeJson(' [1,] ')) +assert(EncodeLua(DecodeJson(' [1,] ')) == '{1}') -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_missing_value.json assert(DecodeJson(' [ , ""] ')) - --- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_just_minus.json -assert(DecodeJson(' [-] ')) +assert(EncodeLua(DecodeJson(' [ , ""] ')) == '{""}') -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_just_comma.json assert(DecodeJson(' [,] ')) +assert(EncodeLua(DecodeJson(' [,] ')) == EncodeLua(DecodeJson(' [] '))) -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_inner_array_no_comma.json -- (added spaces between [[ and ]] so lua doesn't get confused) assert(DecodeJson([[ [ 3[ 4] ] ]])) +assert(EncodeLua(DecodeJson([[ +[ 3[ 4] ] ]])) == '{3, {4}}') -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_extra_comma.json assert(DecodeJson(' ["",] ')) +assert(EncodeLua(DecodeJson(' ["",] ')) == '{""}') -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_double_extra_comma.json assert(DecodeJson(' ["x",,] ')) - +assert(EncodeLua(DecodeJson(' ["x",,] ')) == '{"x"}') -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_double_comma.json assert(DecodeJson(' [1,,2] ')) +assert(EncodeLua(DecodeJson(' [1,,2] ')) == '{1, 2}') -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_comma_and_number.json assert(DecodeJson(' [,1] ')) +assert(EncodeLua(DecodeJson(' [,1] ')) == '{1}') -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_1_true_without_comma.json assert(DecodeJson(' [1 true] ')) +assert(EncodeLua(DecodeJson(' [1 true] ')) == '{1, true}') diff --git a/test/tool/net/samples/JSONTestSuite-okay.lua b/test/tool/net/samples/JSONTestSuite-okay.lua index e52b92345..6446a0a09 100644 --- a/test/tool/net/samples/JSONTestSuite-okay.lua +++ b/test/tool/net/samples/JSONTestSuite-okay.lua @@ -1,4 +1,3 @@ - -- -- Nicolas Seriot's JSONTestSuite -- https://github.com/nst/JSONTestSuite @@ -27,18 +26,17 @@ -- SOFTWARE. -- - -- these test cases are prefixed with i_ -- ljson is free to accept or reject, -- but we run them anyway to check for segfaults -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_structure_UTF-8_BOM_empty_object.json -- (converted to binary for safety) -assert(nil ~= pcall(DecodeJson, ' \xef\xbb\xbf{} ')) +assert(not DecodeJson(' \xef\xbb\xbf{} ')) -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_structure_500_nested_arrays.json -- (added spaces between [[ and ]] so lua doesn't get confused) -assert(nil ~= pcall(DecodeJson, [[ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ +assert(not DecodeJson([[ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ @@ -66,124 +64,154 @@ assert(nil ~= pcall(DecodeJson, [[ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ]])) -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_utf16LE_no_BOM.json --- (converted to binary for safety) -assert(nil ~= pcall(DecodeJson, ' \x5b\x00\x22\x00\xe9\x00\x22\x00\x5d\x00 ')) +assert(not DecodeJson(" [\x00\"\x00\xe9\x00\"\x00]\x00 ")) -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_utf16BE_no_BOM.json --- (converted to binary for safety) -assert(nil ~= pcall(DecodeJson, ' \x00\x5b\x00\x22\x00\xe9\x00\x22\x00\x5d ')) +assert(not DecodeJson(" \x00[\x00\"\x00\xe9\x00\"\x00] ")) +-- [jart] ljson currently doesn't validate utf-8 -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_truncated-utf-8.json --- (converted to binary for safety) -assert(nil ~= pcall(DecodeJson, ' \x5b\x22\xe0\xff\x22\x5d ')) +assert(DecodeJson(" [\"\xe0\xff\"] ")) +-- [jart] ljson currently doesn't validate utf-8 -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_overlong_sequence_6_bytes_null.json --- (converted to binary for safety) -assert(nil ~= pcall(DecodeJson, ' \x5b\x22\xfc\x80\x80\x80\x80\x80\x22\x5d ')) +assert(DecodeJson(" [\"\xfc\x80\x80\x80\x80\x80\"] ")) +-- [jart] ljson currently doesn't validate utf-8 -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_overlong_sequence_6_bytes.json --- (converted to binary for safety) -assert(nil ~= pcall(DecodeJson, ' \x5b\x22\xfc\x83\xbf\xbf\xbf\xbf\x22\x5d ')) +assert(DecodeJson(" [\"\xfc\x83\xbf\xbf\xbf\xbf\"] ")) +-- [jart] ljson currently doesn't validate utf-8 -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_overlong_sequence_2_bytes.json -- (converted to binary for safety) -assert(nil ~= pcall(DecodeJson, ' \x5b\x22\xc0\xaf\x22\x5d ')) +assert(DecodeJson(' \x5b\x22\xc0\xaf\x22\x5d ')) +-- [jart] ljson currently doesn't validate utf-8 -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_not_in_unicode_range.json -- (converted to binary for safety) -assert(nil ~= pcall(DecodeJson, ' \x5b\x22\xf4\xbf\xbf\xbf\x22\x5d ')) +assert(DecodeJson(" [\"\xf4\xbf\xbf\xbf\"] ")) +-- [jart] ljson currently doesn't validate utf-8 -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_lone_utf8_continuation_byte.json -- (converted to binary for safety) -assert(nil ~= pcall(DecodeJson, ' \x5b\x22\x81\x22\x5d ')) +assert(DecodeJson(' \x5b\x22\x81\x22\x5d ')) +-- [jart] our behavior here is consistent with v8 -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_lone_second_surrogate.json --- (converted to binary for safety) -assert(nil ~= pcall(DecodeJson, ' \x5b\x22\x5c\x75\x44\x46\x41\x41\x22\x5d ')) +assert(DecodeJson(" [\"\\uDFAA\"] ")) +assert(EncodeJson(DecodeJson(" [\"\\uDFAA\"] ")) == "[\"\\\\uDFAA\"]") +-- [jart] ljson currently doesn't validate utf-8 -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_iso_latin_1.json -- (converted to binary for safety) -assert(nil ~= pcall(DecodeJson, ' \x5b\x22\xe9\x22\x5d ')) +assert(DecodeJson(' \x5b\x22\xe9\x22\x5d ')) +-- [jart] ljson currently doesn't validate utf-8 -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_inverted_surrogates_U+1D11E.json -- (converted to binary for safety) -assert(nil ~= pcall(DecodeJson, ' \x5b\x22\x5c\x75\x44\x64\x31\x65\x5c\x75\x44\x38\x33\x34\x22\x5d ')) +assert(DecodeJson(' \x5b\x22\x5c\x75\x44\x64\x31\x65\x5c\x75\x44\x38\x33\x34\x22\x5d ')) +-- [jart] ljson currently doesn't validate utf-8 -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_invalid_utf-8.json -- (converted to binary for safety) -assert(nil ~= pcall(DecodeJson, ' \x5b\x22\xff\x22\x5d ')) +assert(DecodeJson(' \x5b\x22\xff\x22\x5d ')) +-- [jart] ljson currently doesn't validate utf-8 -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_invalid_surrogate.json -- (converted to binary for safety) -assert(nil ~= pcall(DecodeJson, ' \x5b\x22\x5c\x75\x64\x38\x30\x30\x61\x62\x63\x22\x5d ')) +assert(DecodeJson(' \x5b\x22\x5c\x75\x64\x38\x30\x30\x61\x62\x63\x22\x5d ')) +-- [jart] ljson currently doesn't validate utf-8 -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_invalid_lonely_surrogate.json -- (converted to binary for safety) -assert(nil ~= pcall(DecodeJson, ' \x5b\x22\x5c\x75\x64\x38\x30\x30\x22\x5d ')) +assert(DecodeJson(' \x5b\x22\x5c\x75\x64\x38\x30\x30\x22\x5d ')) +-- [jart] ljson currently doesn't validate utf-8 -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_incomplete_surrogates_escape_valid.json -- (converted to binary for safety) -assert(nil ~= pcall(DecodeJson, ' \x5b\x22\x5c\x75\x44\x38\x30\x30\x5c\x75\x44\x38\x30\x30\x5c\x6e\x22\x5d ')) +assert(DecodeJson(' \x5b\x22\x5c\x75\x44\x38\x30\x30\x5c\x75\x44\x38\x30\x30\x5c\x6e\x22\x5d ')) +-- [jart] ljson currently doesn't validate utf-8 -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_incomplete_surrogate_pair.json -- (converted to binary for safety) -assert(nil ~= pcall(DecodeJson, ' \x5b\x22\x5c\x75\x44\x64\x31\x65\x61\x22\x5d ')) +assert(DecodeJson(' \x5b\x22\x5c\x75\x44\x64\x31\x65\x61\x22\x5d ')) +-- [jart] ljson currently doesn't validate utf-8 -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_incomplete_surrogate_and_escape_valid.json -- (converted to binary for safety) -assert(nil ~= pcall(DecodeJson, ' \x5b\x22\x5c\x75\x44\x38\x30\x30\x5c\x6e\x22\x5d ')) +assert(DecodeJson(' \x5b\x22\x5c\x75\x44\x38\x30\x30\x5c\x6e\x22\x5d ')) +-- [jart] ljson currently doesn't validate utf-8 -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_UTF8_surrogate_U+D800.json -- (converted to binary for safety) -assert(nil ~= pcall(DecodeJson, ' \x5b\x22\xed\xa0\x80\x22\x5d ')) +assert(DecodeJson(' \x5b\x22\xed\xa0\x80\x22\x5d ')) +-- [jart] ljson currently doesn't validate utf-8 -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_UTF-8_invalid_sequence.json -- (converted to binary for safety) -assert(nil ~= pcall(DecodeJson, ' \x5b\x22\xe6\x97\xa5\xd1\x88\xfa\x22\x5d ')) +assert(DecodeJson(' \x5b\x22\xe6\x97\xa5\xd1\x88\xfa\x22\x5d ')) -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_UTF-16LE_with_BOM.json --- (converted to binary for safety) -assert(nil ~= pcall(DecodeJson, ' \xff\xfe\x5b\x00\x22\x00\xe9\x00\x22\x00\x5d\x00 ')) +assert(not DecodeJson(" \xff\xfe[\x00\"\x00\xe9\x00\"\x00]\x00 ")) +-- [jart] ljson currently doesn't validate utf-8 -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_1st_valid_surrogate_2nd_invalid.json -- (converted to binary for safety) -assert(nil ~= pcall(DecodeJson, ' \x5b\x22\x5c\x75\x44\x38\x38\x38\x5c\x75\x31\x32\x33\x34\x22\x5d ')) +assert(DecodeJson(' \x5b\x22\x5c\x75\x44\x38\x38\x38\x5c\x75\x31\x32\x33\x34\x22\x5d ')) +-- [jart] ljson currently doesn't validate utf-8 -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_1st_surrogate_but_2nd_missing.json -- (converted to binary for safety) -assert(nil ~= pcall(DecodeJson, ' \x5b\x22\x5c\x75\x44\x41\x44\x41\x22\x5d ')) +assert(DecodeJson(' \x5b\x22\x5c\x75\x44\x41\x44\x41\x22\x5d ')) +-- [jart] ljson currently doesn't validate utf-8 -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_object_key_lone_2nd_surrogate.json -- (converted to binary for safety) -assert(nil ~= pcall(DecodeJson, ' \x7b\x22\x5c\x75\x44\x46\x41\x41\x22\x3a\x30\x7d ')) - +assert(DecodeJson(' \x7b\x22\x5c\x75\x44\x46\x41\x41\x22\x3a\x30\x7d ')) +-- [jart] ljson is precicely the same as v8 with integers larger than int64_t -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_number_very_big_negative_int.json -assert(nil ~= pcall(DecodeJson, ' [-237462374673276894279832749832423479823246327846] ')) +assert(DecodeJson(' [-237462374673276894279832749832423479823246327846] ')) +assert(EncodeJson(DecodeJson(' [-237462374673276894279832749832423479823246327846] ')) == '[-2.374623746732769e+47]') -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_number_too_big_pos_int.json -assert(nil ~= pcall(DecodeJson, ' [100000000000000000000] ')) +assert(DecodeJson(' [100000000000000000000] ')) -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_number_too_big_neg_int.json -assert(nil ~= pcall(DecodeJson, ' [-123123123123123123123123123123] ')) +assert(DecodeJson(' [-123123123123123123123123123123] ')) +-- [jart] once again consistent with v8 -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_number_real_underflow.json -assert(nil ~= pcall(DecodeJson, ' [123e-10000000] ')) +assert(DecodeJson(' [123e-10000000] ')) +assert(EncodeJson(DecodeJson(' [123e-10000000] ')) == '[0]') +assert(EncodeLua(DecodeJson(' [123e-10000000] ')) == '{0.}') +-- [jart] consistent with v8 we encode Infinity as null (wut?) -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_number_real_pos_overflow.json -assert(nil ~= pcall(DecodeJson, ' [123123e100000] ')) +assert(DecodeJson(' [123123e100000] ')) +assert(EncodeJson(DecodeJson(' [123123e100000] ')) == '[null]') +-- [jart] consistent with v8 we encode -Infinity as null (wut?) -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_number_real_neg_overflow.json -assert(nil ~= pcall(DecodeJson, ' [-123123e100000] ')) +assert(DecodeJson(' [-123123e100000] ')) +assert(EncodeJson(DecodeJson(' [-123123e100000] ')) == '[null]') +-- [jart] consistent with v8 we encode Infinity as null (wut?) -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_number_pos_double_huge_exp.json -assert(nil ~= pcall(DecodeJson, ' [1.5e+9999] ')) +assert(DecodeJson(' [1.5e+9999] ')) +assert(EncodeJson(DecodeJson(' [1.5e+9999] ')) == '[null]') +-- [jart] consistent with v8 we encode -Infinity as null (wut?) -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_number_neg_int_huge_exp.json -assert(nil ~= pcall(DecodeJson, ' [-1e+9999] ')) +assert(DecodeJson(' [-1e+9999] ')) +assert(EncodeJson(DecodeJson(' [-1e+9999] ')) == '[null]') +-- [jart] consistent with v8 we encode Infinity as null (wut?) -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_number_huge_exp.json -assert(nil ~= pcall(DecodeJson, ' [0.4e00669999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999969999999006] ')) +assert(DecodeJson(' [0.4e00669999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999969999999006] ')) +assert(EncodeJson(DecodeJson(' [0.4e00669999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999969999999006] ')) == '[null]') +-- [jart] consistent with v8 we encode underflow as 0 -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_number_double_huge_neg_exp.json -assert(nil ~= pcall(DecodeJson, ' [123.456e-789] ')) +assert(DecodeJson(' [123.456e-789] ')) +assert(EncodeJson(DecodeJson(' [123.456e-789] ')) == '[0]') diff --git a/test/tool/net/samples/JSONTestSuite-pass.lua b/test/tool/net/samples/JSONTestSuite-pass.lua index cb981cac0..33197aea7 100644 --- a/test/tool/net/samples/JSONTestSuite-pass.lua +++ b/test/tool/net/samples/JSONTestSuite-pass.lua @@ -1,4 +1,3 @@ - -- -- Nicolas Seriot's JSONTestSuite -- https://github.com/nst/JSONTestSuite @@ -27,10 +26,21 @@ -- SOFTWARE. -- - -- these test cases are prefixed with y_ -- ljson should accept all of them as valid +-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/y_structure_lonely_null.json +val, err = DecodeJson([[ null ]]) +assert(not val) +assert(val == nil) +assert(err == nil) + +-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/y_structure_lonely_false.json +val, err = DecodeJson([[ false ]]) +assert(not val) +assert(val == false) +assert(err == nil) + -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/y_structure_whitespace_array.json assert(DecodeJson([[ [] ]])) @@ -49,27 +59,29 @@ assert(DecodeJson([[ true ]])) -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/y_structure_lonely_string.json assert(DecodeJson([[ "asd" ]])) --- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/y_structure_lonely_null.json -assert(not DecodeJson([[ null ]])) - -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/y_structure_lonely_negative_real.json assert(DecodeJson([[ -0.1 ]])) -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/y_structure_lonely_int.json assert(DecodeJson([[ 42 ]])) --- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/y_structure_lonely_false.json -assert(not DecodeJson([[ false ]])) - +-- Raw ASCII DEL allowed in string literals -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/y_string_with_del_character.json -assert(DecodeJson([[ ["aa"] ]])) +assert(DecodeJson(" [\"a\x7fa\"] ")) +assert(EncodeJson(DecodeJson(" [\"a\x7fa\"] ")) == '["a\\u007fa"]') +-- EURO SIGN (20AC) and MUSICAL SYMBOL G CLEF (1D11E) in string literal -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/y_string_utf8.json assert(DecodeJson([[ ["€𝄞"] ]])) +assert(EncodeJson(DecodeJson([[ ["€𝄞"] ]])) == "[\"\\u20ac\\ud834\\udd1e\"]") +assert(EncodeJson(DecodeJson([[ ["€𝄞"] ]])) == EncodeJson(DecodeJson(" [\"\xe2\x82\xac\xf0\x9d\x84\x9e\"] "))) +-- unicode escape for double quote -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/y_string_unicode_escaped_double_quote.json assert(DecodeJson([[ ["\u0022"] ]])) +assert(DecodeJson([[ ["\u0022"] ]])[1] == '"') +-- replacement character -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/y_string_unicode_U+FFFE_nonchar.json assert(DecodeJson([[ ["\uFFFE"] ]])) @@ -106,9 +118,11 @@ assert(DecodeJson([[ ["new\u000Aline"] ]])) -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/y_string_uEscape.json assert(DecodeJson([[ ["\u0061\u30af\u30EA\u30b9"] ]])) +-- paragraph separator trojan source -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/y_string_u+2029_par_sep.json assert(DecodeJson([[ ["
"] ]])) +-- line separator trojan source -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/y_string_u+2028_line_sep.json assert(DecodeJson([[ ["
"] ]])) @@ -322,4 +336,3 @@ assert(DecodeJson([[ [""] ]])) -- (added spaces between [[ and ]] so lua doesn't get confused) assert(DecodeJson([[ [ [ ] ] ]])) - diff --git a/test/tool/net/samples/jsonorg-lenient.lua b/test/tool/net/samples/jsonorg-lenient.lua index 9e393d12a..aef7b8eab 100644 --- a/test/tool/net/samples/jsonorg-lenient.lua +++ b/test/tool/net/samples/jsonorg-lenient.lua @@ -9,30 +9,35 @@ assert(DecodeJson([[ [ "extra comma",] ]])) +-- [jart] we deviate from json.org because we don't care about commas -- https://www.json.org/JSON_checker/test.zip -- JSON parsing sample test case: fail5.json assert(DecodeJson([[ [ "double extra comma",,] ]])) +-- [jart] we deviate from json.org because we don't care about commas -- https://www.json.org/JSON_checker/test.zip -- JSON parsing sample test case: fail6.json assert(DecodeJson([[ [ , "<-- missing value"] ]])) +-- [jart] we deviate from json.org because we don't care about commas -- https://www.json.org/JSON_checker/test.zip -- JSON parsing sample test case: fail9.json assert(DecodeJson([[ {"Extra comma": true,} ]])) +-- [jart] we deviate from json.org because we don't care about colons -- https://www.json.org/JSON_checker/test.zip -- JSON parsing sample test case: fail19.json assert(DecodeJson([[ {"Missing colon" null} ]])) +-- [jart] we deviate from json.org because we don't care about colons -- https://www.json.org/JSON_checker/test.zip -- JSON parsing sample test case: fail20.json assert(DecodeJson([[ @@ -41,19 +46,19 @@ assert(DecodeJson([[ -- https://www.json.org/JSON_checker/test.zip -- JSON parsing sample test case: fail25.json -assert(DecodeJson([[ +assert(not DecodeJson([[ [ " tab character in string "] ]])) -- https://www.json.org/JSON_checker/test.zip -- JSON parsing sample test case: fail27.json -assert(DecodeJson([[ +assert(not DecodeJson([[ [ "line break"] ]])) -- https://www.json.org/JSON_checker/test.zip -- JSON parsing sample test case: fail15.json -assert(DecodeJson([[ +assert(not DecodeJson([[ [ "Illegal backslash escape: \x15"] ]])) diff --git a/test/tool/net/samples/jsonorg-pass.lua b/test/tool/net/samples/jsonorg-pass.lua index 8cab843a7..f59e6e601 100644 --- a/test/tool/net/samples/jsonorg-pass.lua +++ b/test/tool/net/samples/jsonorg-pass.lua @@ -1,6 +1,6 @@ -- https://www.json.org/JSON_checker/test.zip -- JSON parsing sample test case: pass1.json -assert(pcall(DecodeJson, [[ +assert(DecodeJson([[ [ "JSON Test Pattern pass1", {"object with 1 member":[ "array with 1 element"] }, @@ -63,14 +63,13 @@ assert(pcall(DecodeJson, [[ -- https://www.json.org/JSON_checker/test.zip -- JSON parsing sample test case: pass2.json -assert(pcall(DecodeJson, [[ +assert(DecodeJson([[ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ "Not too deep"] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ]])) - -- https://www.json.org/JSON_checker/test.zip -- JSON parsing sample test case: pass3.json -assert(pcall(DecodeJson, [[ +assert(DecodeJson([[ { "JSON Test Pattern pass3": { "The outermost value": "must be an object or array.", @@ -80,18 +79,20 @@ assert(pcall(DecodeJson, [[ ]])) - -- json.org says these should fail, but many parsers, -- including python's json.load allow the following +-- [jart] our behavior is consistent with v8 -- https://www.json.org/JSON_checker/test.zip -- JSON parsing sample test case: fail1.json (actually passes) -assert(pcall(DecodeJson, [[ +assert(DecodeJson([[ "A JSON payload should be an object or array, not a string." ]])) +-- [jart] this deviates from json.org +-- we permit depth up to 128 -- https://www.json.org/JSON_checker/test.zip -- JSON parsing sample test case: fail18.json (actually passes) -assert(pcall(DecodeJson, [[ +assert(DecodeJson([[ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ "Too deep"] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ]])) diff --git a/third_party/double-conversion/wrapper.cc b/third_party/double-conversion/wrapper.cc index d602ada44..068408051 100644 --- a/third_party/double-conversion/wrapper.cc +++ b/third_party/double-conversion/wrapper.cc @@ -16,6 +16,7 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/bits/bits.h" #include "libc/str/str.h" #include "third_party/double-conversion/double-conversion.h" #include "third_party/double-conversion/double-to-string.h" @@ -32,6 +33,21 @@ char* DoubleToEcmascript(char buf[128], double x) { return b.Finalize(); } +char* DoubleToJson(char buf[128], double x) { + StringBuilder b(buf, 128); + static const DoubleToStringConverter kDoubleToJson( + DoubleToStringConverter::UNIQUE_ZERO | + DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN, + "null", "null", 'e', -6, 21, 6, 0); + kDoubleToJson.ToShortest(x, &b); + b.Finalize(); + if (READ32LE(buf) != READ32LE("-nul")) { + return buf; + } else { + return strcpy(buf, "null"); + } +} + char* DoubleToLua(char buf[128], double x) { static const DoubleToStringConverter kDoubleToLua( DoubleToStringConverter::EMIT_TRAILING_DECIMAL_POINT | diff --git a/third_party/double-conversion/wrapper.h b/third_party/double-conversion/wrapper.h index 0414345d4..84a47b689 100644 --- a/third_party/double-conversion/wrapper.h +++ b/third_party/double-conversion/wrapper.h @@ -4,6 +4,7 @@ COSMOPOLITAN_C_START_ char *DoubleToLua(char[128], double); +char *DoubleToJson(char[128], double); char *DoubleToEcmascript(char[128], double); double StringToDouble(const char *, size_t, int *); diff --git a/third_party/lua/luaencodejsondata.c b/third_party/lua/luaencodejsondata.c index 2dd9bce80..a1e168855 100644 --- a/third_party/lua/luaencodejsondata.c +++ b/third_party/lua/luaencodejsondata.c @@ -74,7 +74,7 @@ static int LuaEncodeJsonDataImpl(lua_State *L, char **buf, int level, buf, ibuf, FormatInt64(ibuf, luaL_checkinteger(L, idx)) - ibuf)); } else { RETURN_ON_ERROR( - appends(buf, DoubleToEcmascript(ibuf, lua_tonumber(L, idx)))); + appends(buf, DoubleToJson(ibuf, lua_tonumber(L, idx)))); } return 0; diff --git a/tool/net/help.txt b/tool/net/help.txt index 0eff7bbaf..218d1b9b3 100644 --- a/tool/net/help.txt +++ b/tool/net/help.txt @@ -679,21 +679,61 @@ FUNCTIONS ├─→ double ├─→ array ├─→ object + ├─→ false ├─→ true + ├─→ nil └─→ nil, error:str Turns JSON string into a Lua data structure. - This is a very permissive parser. That means it should always - parse correctly formatted JSON correctly. However it will not - complain if the `input` string is weirdly formatted. There is - currently no validation performed, other than what we need to - ensure security. For example `{3=4}` will decode as `{[3]=4}` - even though that structure won't round-trip with `EncodeJson` - since redbean won't generate invalid JSON (see Postel's Law). + This is a generally permissive parser, in the sense that like + v8, it permits scalars as top-level values. Therefore we must + note that this API can be thought of as special, in the sense - This parser permits top-level values regardless of type, with - the exception of `false`, `null`, and absent. + val = assert(DecodeJson(str)) + + will usually do the right thing, except in cases where false + or null are the top-level value. In those cases, it's needed + to check the second value too in order to discern from error + + val, err = DecodeJson(str) + if not val then + if err then + print('bad json', err) + elseif val == nil then + print('val is null') + elseif val == false then + print('val is false') + end + end + + This parser supports 64-bit signed integers. If an overflow + happens, then the integer is silently coerced to double, as + consistent with v8. If a double overflows into Infinity, we + coerce it to `null` since that's what v8 does, and the same + goes for underflows which, like v8, are coerced to 0.0. + + This parser does not validate UTF-8 which is copied how the + JSON specifies. It may therefore contain underlong overlong + characters, trojan source and even numbers banned the IETF. + You can use VisualizeControlCodes() and Underlong(), to see + if a string round-trips, to detect these weirdo codepoints. + + This parser does some validation of UTF-16. Consistent with + v8, bad surrogate characters will be silently preserved, as + their original escape sequence text. Thereby ensuring utf-8 + output is valid. Please note that invalid utf-8 could still + happen if it's encoded as utf-8. + + This parser is lenient about commas and colons. For example + it's permissible to say `DecodeJson('[1 2 3 4]')`. Trailing + commas are allowed. Even prefix commas are allowed. However + it's not recommended that you rely on this behavior, and it + won't round-trip with EncodeJson() currently. + + When objects are parsed, your Lua object can't preserve the + the original ordering of fields. As such, they'll be sorted + by EncodeJson() and may not round-trip with original intent EncodeJson(value[,options:table]) ├─→ json:str @@ -726,6 +766,8 @@ FUNCTIONS When arrays and objects are serialized, entries will be sorted in a deterministic order. + This parser does not support UTF-8 + EncodeLua(value[,options:table]) ├─→ luacode:str ├─→ true [if useoutput] @@ -1385,10 +1427,10 @@ FUNCTIONS access log and message logging. VisualizeControlCodes(str) → str - Replaces C0 control codes with their UNICODE pictures - representation. This function also canonicalizes overlong - encodings. C1 control codes are replaced with a JavaScript-like - escape sequence. + Replaces C0 control codes and trojan source characters with + descriptive UNICODE pictorial representation. This function + also canonicalizes overlong encodings. C1 control codes are + replaced with a JavaScript-like escape sequence. Underlong(str) → str Canonicalizes overlong encodings. diff --git a/tool/net/ljson.c b/tool/net/ljson.c index 4ebadf3f9..7023b8429 100644 --- a/tool/net/ljson.c +++ b/tool/net/ljson.c @@ -19,7 +19,9 @@ #include "libc/bits/bits.h" #include "libc/bits/likely.h" #include "libc/intrin/kprintf.h" +#include "libc/log/check.h" #include "libc/log/log.h" +#include "libc/str/str.h" #include "libc/str/tpenc.h" #include "libc/str/utf16.h" #include "third_party/double-conversion/wrapper.h" @@ -42,6 +44,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p, char w[4]; const char *a; luaL_Buffer b; + const char *reason; struct DecodeJson r; int A, B, C, D, c, d, i, u; if (UNLIKELY(!--depth)) { @@ -74,9 +77,6 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p, case 'n': // null if (UNLIKELY(context == OBJECT_KEY)) goto BadObjectKey; - if (UNLIKELY(context == TOP_LEVEL)) { - return (struct DecodeJson){-1, "toplevel json can't be null"}; - } if (p + 3 <= e && READ32LE(p - 1) == READ32LE("null")) { lua_pushnil(L); return (struct DecodeJson){1, p + 3}; @@ -86,9 +86,6 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p, case 'f': // false if (UNLIKELY(context == OBJECT_KEY)) goto BadObjectKey; - if (UNLIKELY(context == TOP_LEVEL)) { - return (struct DecodeJson){-1, "toplevel json can't be false"}; - } if (p + 4 <= e && READ32LE(p) == READ32LE("alse")) { lua_pushboolean(L, false); return (struct DecodeJson){1, p + 4}; @@ -105,15 +102,26 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p, goto IllegalCharacter; } + BadObjectKey: + return (struct DecodeJson){-1, "object key must be string"}; + case '-': // negative if (UNLIKELY(context == OBJECT_KEY)) goto BadObjectKey; - d = -1; - break; + if (p < e && isdigit(*p)) { + d = -1; + break; + } else { + return (struct DecodeJson){-1, "bad negative"}; + } case '0': // zero or number if (UNLIKELY(context == OBJECT_KEY)) goto BadObjectKey; - if (p < e && (*p == '.' || *p == 'e' || *p == 'E')) { - goto UseDubble; + if (p < e) { + if ((*p == '.' || *p == 'e' || *p == 'E')) { + goto UseDubble; + } else if (isdigit(*p)) { + return (struct DecodeJson){-1, "unexpected octal"}; + } } lua_pushinteger(L, 0); return (struct DecodeJson){1, p}; @@ -138,6 +146,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p, UseDubble: // number lua_pushnumber(L, StringToDouble(a, e - a, &c)); + DCHECK(c > 0, "paranoid avoiding infinite loop"); return (struct DecodeJson){1, a + c}; case '[': // Array @@ -206,134 +215,146 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p, case '"': // string luaL_buffinit(L, &b); - while (p < e) { + for (;;) { + if (UNLIKELY(p >= e)) { + UnexpectedEofString: + reason = "unexpected eof in string"; + goto StringFailureWithReason; + } + c = *p++ & 255; + if (c == '"') { + luaL_pushresult(&b); + return (struct DecodeJson){1, p}; + } else if (c == '\\') { + goto HandleEscape; + } else if (UNLIKELY(c <= 0x1F)) { + reason = "non-del c0 in string"; + goto StringFailureWithReason; + } else { + luaL_addchar(&b, c); + } + continue; + HandleEscape: + if (UNLIKELY(p >= e)) { + goto UnexpectedEofString; + } switch ((c = *p++ & 255)) { - default: - AddByte: + case '"': + case '/': + case '\\': luaL_addchar(&b, c); break; - case '\\': - if (p < e) { - switch ((c = *p++ & 255)) { - default: - goto InvalidEscapeCharacter; - case '"': - case '/': - case '\\': - goto AddByte; - case 'b': - c = '\b'; - goto AddByte; - case 'f': - c = '\f'; - goto AddByte; - case 'n': - c = '\n'; - goto AddByte; - case 'r': - c = '\r'; - goto AddByte; - case 't': - c = '\t'; - goto AddByte; - case 'x': - if (p + 2 <= e && // - (A = kHexToInt[p[0] & 255]) != -1 && // HEX - (B = kHexToInt[p[1] & 255]) != -1) { // - c = A << 4 | B; - p += 2; - goto AddByte; - } else { - goto InvalidEscapeCharacter; - } - case 'u': - if (p + 4 <= e && // - (A = kHexToInt[p[0] & 255]) != -1 && // - (B = kHexToInt[p[1] & 255]) != -1 && // UCS-2 - (C = kHexToInt[p[2] & 255]) != -1 && // - (D = kHexToInt[p[3] & 255]) != -1) { // - c = A << 12 | B << 8 | C << 4 | D; - if (!IsSurrogate(c)) { - p += 4; - } else if (IsHighSurrogate(c)) { - if (p + 4 + 6 <= e && // - p[4] == '\\' && // - p[5] == 'u' && // - (A = kHexToInt[p[6] & 255]) != -1 && // UTF-16 - (B = kHexToInt[p[7] & 255]) != -1 && // - (C = kHexToInt[p[8] & 255]) != -1 && // - (D = kHexToInt[p[9] & 255]) != -1) { // - u = A << 12 | B << 8 | C << 4 | D; - if (IsLowSurrogate(u)) { - p += 4 + 6; - c = MergeUtf16(c, u); - } else { - goto BadUnicode; - } - } else { - goto BadUnicode; - } - } else { - goto BadUnicode; - } - // UTF-8 - if (c < 0x7f) { - w[0] = c; - i = 1; - } else if (c <= 0x7ff) { - w[0] = 0300 | (c >> 6); - w[1] = 0200 | (c & 077); - i = 2; - } else if (c <= 0xffff) { - if (UNLIKELY(IsSurrogate(c))) { - ReplacementCharacter: - c = 0xfffd; - } - w[0] = 0340 | (c >> 12); - w[1] = 0200 | ((c >> 6) & 077); - w[2] = 0200 | (c & 077); - i = 3; - } else if (~(c >> 18) & 007) { - w[0] = 0360 | (c >> 18); - w[1] = 0200 | ((c >> 12) & 077); - w[2] = 0200 | ((c >> 6) & 077); - w[3] = 0200 | (c & 077); - i = 4; - } else { - goto ReplacementCharacter; - } - luaL_addlstring(&b, w, i); - } else { - goto InvalidEscapeCharacter; - BadUnicode: - // Echo invalid \uXXXX sequences - // Rather than corrupting UTF-8! - luaL_addstring(&b, "\\u"); - } - break; + case 'b': + luaL_addchar(&b, '\b'); + break; + case 'f': + luaL_addchar(&b, '\f'); + break; + case 'n': + luaL_addchar(&b, '\n'); + break; + case 'r': + luaL_addchar(&b, '\r'); + break; + case 't': + luaL_addchar(&b, '\t'); + break; + case 'x': + if (p + 2 <= e && // + (A = kHexToInt[p[0] & 255]) != -1 && // HEX + (B = kHexToInt[p[1] & 255]) != -1) { // + c = A << 4 | B; + if (!(0x20 <= c && c <= 0x7E)) { + reason = "hex escape not printable"; + goto StringFailureWithReason; } + p += 2; + luaL_addchar(&b, c); + break; } else { - goto InvalidEscapeCharacter; + reason = "invalid hex escape"; + goto StringFailureWithReason; + } + case 'u': + if (p + 4 <= e && // + (A = kHexToInt[p[0] & 255]) != -1 && // + (B = kHexToInt[p[1] & 255]) != -1 && // UCS-2 + (C = kHexToInt[p[2] & 255]) != -1 && // + (D = kHexToInt[p[3] & 255]) != -1) { // + c = A << 12 | B << 8 | C << 4 | D; + if (!IsSurrogate(c)) { + p += 4; + } else if (IsHighSurrogate(c)) { + if (p + 4 + 6 <= e && // + p[4] == '\\' && // + p[5] == 'u' && // + (A = kHexToInt[p[6] & 255]) != -1 && // UTF-16 + (B = kHexToInt[p[7] & 255]) != -1 && // + (C = kHexToInt[p[8] & 255]) != -1 && // + (D = kHexToInt[p[9] & 255]) != -1) { // + u = A << 12 | B << 8 | C << 4 | D; + if (IsLowSurrogate(u)) { + p += 4 + 6; + c = MergeUtf16(c, u); + } else { + goto BadUnicode; + } + } else { + goto BadUnicode; + } + } else { + goto BadUnicode; + } + // UTF-8 + if (c < 0x7f) { + w[0] = c; + i = 1; + } else if (c <= 0x7ff) { + w[0] = 0300 | (c >> 6); + w[1] = 0200 | (c & 077); + i = 2; + } else if (c <= 0xffff) { + if (UNLIKELY(IsSurrogate(c))) { + ReplacementCharacter: + c = 0xfffd; + } + w[0] = 0340 | (c >> 12); + w[1] = 0200 | ((c >> 6) & 077); + w[2] = 0200 | (c & 077); + i = 3; + } else if (~(c >> 18) & 007) { + w[0] = 0360 | (c >> 18); + w[1] = 0200 | ((c >> 12) & 077); + w[2] = 0200 | ((c >> 6) & 077); + w[3] = 0200 | (c & 077); + i = 4; + } else { + goto ReplacementCharacter; + } + luaL_addlstring(&b, w, i); + } else { + reason = "invalid unicode escape"; + goto StringFailureWithReason; + BadUnicode: + // Echo invalid \uXXXX sequences + // Rather than corrupting UTF-8! + luaL_addstring(&b, "\\u"); } break; - case '"': - luaL_pushresult(&b); - return (struct DecodeJson){1, p}; + default: + reason = "invalid escape character"; + goto StringFailureWithReason; } } + break; + StringFailureWithReason: luaL_pushresultsize(&b, 0); lua_pop(L, 1); - return (struct DecodeJson){-1, "unexpected eof in string"}; + return (struct DecodeJson){-1, reason}; default: IllegalCharacter: return (struct DecodeJson){-1, "illegal character"}; - BadObjectKey: - return (struct DecodeJson){-1, "object key must be string"}; - InvalidEscapeCharacter: - luaL_pushresultsize(&b, 0); - lua_pop(L, 1); - return (struct DecodeJson){-1, "invalid escape character"}; } } if (UNLIKELY(context == TOP_LEVEL)) { @@ -357,16 +378,14 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p, * converted to a floating-point number instead. Invalid surrogate * escape sequences in strings won't be decoded. * - * A weird case exists when parsing empty objects. In order to let Lua - * tell them apart from empty arrays, we insert a special key that's - * ignored by our JSON serializer, called `[__json_object__]=true`. - * * @param L is Lua interpreter state * @param p is input string * @param n is byte length of `p` or -1 for automatic strlen() - * @return res.rc is 1 if value pushed, 0 on eof, otherwise -1 - * @return res.p is is advanced `p` pointer if `rc` isn't -1 - * @return res.p is string describing error if `rc` is -1 + * @return r.rc is 1 if value is pushed on lua stack + * @return r.rc is 0 on eof + * @return r.rc is -1 on error + * @return r.p is is advanced `p` pointer if `rc ≥ 0` + * @return r.p is string describing error if `rc < 0` */ struct DecodeJson DecodeJson(struct lua_State *L, const char *p, size_t n) { if (n == -1) n = p ? strlen(p) : 0;