Add utf-8 validation to ljson

This commit is contained in:
Justine Tunney 2022-07-15 06:18:32 -07:00
parent ccd057a85d
commit baf51a4a23
4 changed files with 342 additions and 177 deletions

View file

@ -64,107 +64,95 @@ assert(not DecodeJson([[ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [
] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ]]))
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_utf16LE_no_BOM.json
assert(not DecodeJson(" [\x00\"\x00\xe9\x00\"\x00]\x00 "))
val, err = DecodeJson(" [\x00\"\x00\xe9\x00\"\x00]\x00 ")
assert(val == nil)
assert(err == 'illegal character')
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_utf16BE_no_BOM.json
assert(not DecodeJson(" \x00[\x00\"\x00\xe9\x00\"\x00] "))
-- [jart] ljson currently doesn't validate utf-8
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_truncated-utf-8.json
assert(DecodeJson(" [\"\xe0\xff\"] "))
val, err = DecodeJson(" [\"\xe0\xff\"] ")
assert(val == nil)
assert(err == 'malformed utf-8')
-- [jart] ljson currently doesn't validate utf-8
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_overlong_sequence_6_bytes_null.json
assert(DecodeJson(" [\"\xfc\x80\x80\x80\x80\x80\"] "))
val, err = DecodeJson(" [\"\xfc\x80\x80\x80\x80\x80\"] ")
assert(val == nil)
assert(err == 'illegal utf-8 character')
-- [jart] ljson currently doesn't validate utf-8
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_overlong_sequence_6_bytes.json
assert(DecodeJson(" [\"\xfc\x83\xbf\xbf\xbf\xbf\"] "))
val, err = DecodeJson(" [\"\xfc\x83\xbf\xbf\xbf\xbf\"] ")
assert(val == nil)
assert(err == "illegal utf-8 character")
-- [jart] ljson currently doesn't validate utf-8
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_overlong_sequence_2_bytes.json
-- (converted to binary for safety)
assert(DecodeJson(' \x5b\x22\xc0\xaf\x22\x5d '))
val, err = DecodeJson(" [\"\xc0\xaf\"] ")
assert(val == nil)
assert(err == "overlong ascii")
-- [jart] ljson currently doesn't validate utf-8
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_not_in_unicode_range.json
-- (converted to binary for safety)
assert(DecodeJson(" [\"\xf4\xbf\xbf\xbf\"] "))
val, err = DecodeJson(" [\"\xf4\xbf\xbf\xbf\"] ")
assert(val == nil)
assert(err == "utf-8 exceeds utf-16 range")
-- [jart] ljson currently doesn't validate utf-8
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_lone_utf8_continuation_byte.json
-- (converted to binary for safety)
assert(DecodeJson(' \x5b\x22\x81\x22\x5d '))
val, err = DecodeJson(" [\"\x81\"] ")
assert(val == nil)
assert(err == "c1 control code in string")
-- [jart] our behavior here is consistent with v8
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_lone_second_surrogate.json
assert(DecodeJson(" [\"\\uDFAA\"] "))
assert(EncodeJson(DecodeJson(" [\"\\uDFAA\"] ")) == "[\"\\\\uDFAA\"]")
-- [jart] ljson currently doesn't validate utf-8
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_iso_latin_1.json
-- (converted to binary for safety)
assert(DecodeJson(' \x5b\x22\xe9\x22\x5d '))
val, err = DecodeJson(" [\"\xe9\"] ")
assert(val == nil)
assert(err == "malformed utf-8")
-- [jart] ljson currently doesn't validate utf-8
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_inverted_surrogates_U+1D11E.json
-- (converted to binary for safety)
assert(DecodeJson(' \x5b\x22\x5c\x75\x44\x64\x31\x65\x5c\x75\x44\x38\x33\x34\x22\x5d '))
assert(DecodeJson(" [\"\\uDd1e\\uD834\"] "))
-- [jart] ljson currently doesn't validate utf-8
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_invalid_utf-8.json
-- (converted to binary for safety)
assert(DecodeJson(' \x5b\x22\xff\x22\x5d '))
val, err = DecodeJson(" [\"\xff\"] ")
assert(val == nil)
assert(err == "illegal utf-8 character")
-- [jart] ljson currently doesn't validate utf-8
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_invalid_surrogate.json
-- (converted to binary for safety)
assert(DecodeJson(' \x5b\x22\x5c\x75\x64\x38\x30\x30\x61\x62\x63\x22\x5d '))
assert(EncodeJson(DecodeJson(" [\"\\ud800abc\"] ")) == "[\"\\\\ud800abc\"]")
-- [jart] ljson currently doesn't validate utf-8
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_invalid_lonely_surrogate.json
-- (converted to binary for safety)
assert(DecodeJson(' \x5b\x22\x5c\x75\x64\x38\x30\x30\x22\x5d '))
assert(DecodeJson(" [\"\\ud800\"] ")[1] == "\\ud800")
-- [jart] ljson currently doesn't validate utf-8
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_incomplete_surrogates_escape_valid.json
-- (converted to binary for safety)
assert(DecodeJson(' \x5b\x22\x5c\x75\x44\x38\x30\x30\x5c\x75\x44\x38\x30\x30\x5c\x6e\x22\x5d '))
assert(DecodeJson(" [\"\\uD800\\uD800\\n\"] "))
-- [jart] ljson currently doesn't validate utf-8
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_incomplete_surrogate_pair.json
-- (converted to binary for safety)
assert(DecodeJson(' \x5b\x22\x5c\x75\x44\x64\x31\x65\x61\x22\x5d '))
assert(DecodeJson(" [\"\\uDd1ea\"] "))
-- [jart] ljson currently doesn't validate utf-8
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_incomplete_surrogate_and_escape_valid.json
-- (converted to binary for safety)
assert(DecodeJson(' \x5b\x22\x5c\x75\x44\x38\x30\x30\x5c\x6e\x22\x5d '))
assert(DecodeJson(" [\"\\uD800\\n\"] "))
-- [jart] ljson currently doesn't validate utf-8
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_UTF8_surrogate_U+D800.json
-- (converted to binary for safety)
assert(DecodeJson(' \x5b\x22\xed\xa0\x80\x22\x5d '))
assert(DecodeJson(" [\"\\ud800abc\"] ")[1] == "\\ud800abc")
-- [jart] ljson currently doesn't validate utf-8
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_UTF-8_invalid_sequence.json
-- (converted to binary for safety)
assert(DecodeJson(' \x5b\x22\xe6\x97\xa5\xd1\x88\xfa\x22\x5d '))
val, err = DecodeJson(" [\"\xe6\x97\xa5\xd1\x88\xfa\"] ")
assert(val == nil)
assert(err == "illegal utf-8 character")
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_UTF-16LE_with_BOM.json
assert(not DecodeJson(" \xff\xfe[\x00\"\x00\xe9\x00\"\x00]\x00 "))
-- [jart] ljson currently doesn't validate utf-8
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_1st_valid_surrogate_2nd_invalid.json
-- (converted to binary for safety)
assert(DecodeJson(' \x5b\x22\x5c\x75\x44\x38\x38\x38\x5c\x75\x31\x32\x33\x34\x22\x5d '))
assert(DecodeJson(" [\"\\uD888\\u1234\"] "))
-- [jart] ljson currently doesn't validate utf-8
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_string_1st_surrogate_but_2nd_missing.json
-- (converted to binary for safety)
assert(DecodeJson(' \x5b\x22\x5c\x75\x44\x41\x44\x41\x22\x5d '))
assert(DecodeJson(" [\"\\uDADA\"] "))
-- [jart] ljson currently doesn't validate utf-8
-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/i_object_key_lone_2nd_surrogate.json
-- (converted to binary for safety)
assert(DecodeJson(' \x7b\x22\x5c\x75\x44\x46\x41\x41\x22\x3a\x30\x7d '))