From 0cea6c560f72cdce71e5402f9315cbab568820e3 Mon Sep 17 00:00:00 2001
From: Gautham <41098605+ahgamut@users.noreply.github.com>
Date: Wed, 13 Jul 2022 20:08:23 +0530
Subject: [PATCH] Make JSON parser nearly perfectly compliant (#483)

---
 test/tool/net/jsonorg_fail_test.lua          |  6 ++++++
 test/tool/net/jsonorg_lenient_test.lua       | 11 ----------
 test/tool/net/jsontestsuite_fail1_test.lua   | 11 ++++++++++
 test/tool/net/jsontestsuite_lenient_test.lua | 22 ++------------------
 tool/net/ljson.c                             | 11 ++++++++++
 5 files changed, 30 insertions(+), 31 deletions(-)
 delete mode 100644 test/tool/net/jsonorg_lenient_test.lua

diff --git a/test/tool/net/jsonorg_fail_test.lua b/test/tool/net/jsonorg_fail_test.lua
index 54f70708a..11527e683 100644
--- a/test/tool/net/jsonorg_fail_test.lua
+++ b/test/tool/net/jsonorg_fail_test.lua
@@ -177,3 +177,9 @@ break"]
 -- https://www.json.org/JSON_checker/test.zip
 -- JSON parsing sample test case: fail15.json
 assert(not DecodeJson(' ["Illegal backslash escape: \x15"] '))
+
+-- https://www.json.org/JSON_checker/test.zip
+-- JSON parsing sample test case: fail19.json
+assert(not DecodeJson([[
+{"Missing colon" null}
+]]))
diff --git a/test/tool/net/jsonorg_lenient_test.lua b/test/tool/net/jsonorg_lenient_test.lua
deleted file mode 100644
index 68513d2a9..000000000
--- a/test/tool/net/jsonorg_lenient_test.lua
+++ /dev/null
@@ -1,11 +0,0 @@
--- json.org says the following test cases should be
--- considered as invalid JSON, but ljson.c is lenient.
--- we run these tests anyway just to ensure that
--- no segfaults occurs while parsing these cases
-
--- [jart] we deviate from json.org because we don't care about colons
--- https://www.json.org/JSON_checker/test.zip
--- JSON parsing sample test case: fail19.json
-assert(DecodeJson([[
-{"Missing colon" null}
-]]))
diff --git a/test/tool/net/jsontestsuite_fail1_test.lua b/test/tool/net/jsontestsuite_fail1_test.lua
index 15e5dd653..11ec6f968 100644
--- a/test/tool/net/jsontestsuite_fail1_test.lua
+++ b/test/tool/net/jsontestsuite_fail1_test.lua
@@ -300,3 +300,14 @@ assert(not DecodeJson(' [1,,2] '))
 
 -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_comma_and_number.json
 assert(not DecodeJson(' [,1] '))
+
+-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_inner_array_no_comma.json
+-- (added spaces between [[ and ]] so lua doesn't get confused)
+assert(not DecodeJson([[
+[ 3[ 4] ]   ]]))
+
+-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_1_true_without_comma.json
+assert(not DecodeJson(' [1 true] '))
+
+-- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_missing_semicolon.json
+assert(not DecodeJson(' {"a" "b"} '))
diff --git a/test/tool/net/jsontestsuite_lenient_test.lua b/test/tool/net/jsontestsuite_lenient_test.lua
index b85bfe426..9a9d90bc3 100644
--- a/test/tool/net/jsontestsuite_lenient_test.lua
+++ b/test/tool/net/jsontestsuite_lenient_test.lua
@@ -27,19 +27,12 @@
 -- 
 
 -- [jart] these tests deviate from the expectations of the upstream test
---        suite. most of these failures are because we permit syntax
---        like this since it saves bandwidth and makes the impl smaller.
---        we're also more permissive about things like the encoding of
---        double exponents and empty double fraction.
-assert(EncodeLua(DecodeJson('[0 1 2 3 4]')) == '{0, 1, 2, 3, 4}')
+--        suite. most of these failures are because we're more permissive
+--        about the encoding of double exponents and empty double fraction.
 
 -- from fail1.lua
 --------------------------------------------------------------------------------
 
--- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_object_missing_semicolon.json
-assert(DecodeJson(' {"a" "b"} '))
-assert(EncodeLua(DecodeJson(' {"a" "b"} ')) == '{a="b"}')
-
 -- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_number_real_without_fractional_part.json
 assert(DecodeJson(' [1.] '))
 assert(EncodeLua(DecodeJson(' [1.] ')) == EncodeLua({1.0}))
@@ -66,14 +59,3 @@ assert(EncodeLua(DecodeJson(' [-2.] ')) == '{-2.}')
 
 -- lool
 assert(not DecodeJson(' [--2.] '))
-
--- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_inner_array_no_comma.json
--- (added spaces between [[ and ]] so lua doesn't get confused)
-assert(DecodeJson([[
-[ 3[ 4] ]   ]]))
-assert(EncodeLua(DecodeJson([[
-[ 3[ 4] ]   ]])) == '{3, {4}}')
-
--- https://github.com/nst/JSONTestSuite/tree/d64aefb55228d9584d3e5b2433f720ea8fd00c82/test_parsing/n_array_1_true_without_comma.json
-assert(DecodeJson(' [1 true] '))
-assert(EncodeLua(DecodeJson(' [1 true] ')) == '{1, true}')
diff --git a/tool/net/ljson.c b/tool/net/ljson.c
index 46260312f..8a0f3992f 100644
--- a/tool/net/ljson.c
+++ b/tool/net/ljson.c
@@ -81,6 +81,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
 
       case 'n':  // null
         if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
+        if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
         if (p + 3 <= e && READ32LE(p - 1) == READ32LE("null")) {
           lua_pushnil(L);
           return (struct DecodeJson){1, p + 3};
@@ -90,6 +91,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
 
       case 'f':  // false
         if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
+        if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
         if (p + 4 <= e && READ32LE(p) == READ32LE("alse")) {
           lua_pushboolean(L, false);
           return (struct DecodeJson){1, p + 4};
@@ -99,6 +101,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
 
       case 't':  // true
         if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
+        if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
         if (p + 3 <= e && READ32LE(p - 1) == READ32LE("true")) {
           lua_pushboolean(L, true);
           return (struct DecodeJson){1, p + 3};
@@ -108,9 +111,12 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
 
       BadObjectKey:
         return (struct DecodeJson){-1, "object key must be string"};
+      MissingPunctuation:
+        return (struct DecodeJson){-1, "missing ',' or ':'"};
 
       case '-':  // negative
         if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
+        if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
         if (p < e && isdigit(*p)) {
           d = -1;
           break;
@@ -120,6 +126,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
 
       case '0':  // zero or number
         if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
+        if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
         if (p < e) {
           if ((*p == '.' || *p == 'e' || *p == 'E')) {
             goto UseDubble;
@@ -132,6 +139,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
 
       case '1' ... '9':  // integer
         if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
+        if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
         for (x = (c - '0') * d; p < e; ++p) {
           c = *p & 255;
           if (isdigit(c)) {
@@ -155,6 +163,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
 
       case '[':  // Array
         if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
+        if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
         lua_newtable(L);
         i = 0;
         r = Parse(L, p, e, ARRAY_SINGLE | ARRAY_END, depth - 1);
@@ -193,6 +202,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
 
       case '{':  // Object
         if (UNLIKELY(0 != (context & OBJECT_KEY))) goto BadObjectKey;
+        if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
         lua_newtable(L);
         r = Parse(L, p, e, OBJECT_KEY | OBJECT_END, depth - 1);
         for (;;) {
@@ -220,6 +230,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
         return (struct DecodeJson){1, p};
 
       case '"':  // string
+        if (UNLIKELY(0 != (context & (OBJECT_VAL | AFTER_VALUE)))) goto MissingPunctuation;
         luaL_buffinit(L, &b);
         for (;;) {
           if (UNLIKELY(p >= e)) {