Audit every single JSON test

This commit is contained in:
Justine Tunney 2022-07-12 12:30:42 -07:00
parent 7965ed0232
commit 3f3e7e92d7
17 changed files with 473 additions and 285 deletions

View file

@ -19,7 +19,9 @@
#include "libc/bits/bits.h"
#include "libc/bits/likely.h"
#include "libc/intrin/kprintf.h"
#include "libc/log/check.h"
#include "libc/log/log.h"
#include "libc/str/str.h"
#include "libc/str/tpenc.h"
#include "libc/str/utf16.h"
#include "third_party/double-conversion/wrapper.h"
@ -42,6 +44,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
char w[4];
const char *a;
luaL_Buffer b;
const char *reason;
struct DecodeJson r;
int A, B, C, D, c, d, i, u;
if (UNLIKELY(!--depth)) {
@ -74,9 +77,6 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
case 'n': // null
if (UNLIKELY(context == OBJECT_KEY)) goto BadObjectKey;
if (UNLIKELY(context == TOP_LEVEL)) {
return (struct DecodeJson){-1, "toplevel json can't be null"};
}
if (p + 3 <= e && READ32LE(p - 1) == READ32LE("null")) {
lua_pushnil(L);
return (struct DecodeJson){1, p + 3};
@ -86,9 +86,6 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
case 'f': // false
if (UNLIKELY(context == OBJECT_KEY)) goto BadObjectKey;
if (UNLIKELY(context == TOP_LEVEL)) {
return (struct DecodeJson){-1, "toplevel json can't be false"};
}
if (p + 4 <= e && READ32LE(p) == READ32LE("alse")) {
lua_pushboolean(L, false);
return (struct DecodeJson){1, p + 4};
@ -105,15 +102,26 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
goto IllegalCharacter;
}
BadObjectKey:
return (struct DecodeJson){-1, "object key must be string"};
case '-': // negative
if (UNLIKELY(context == OBJECT_KEY)) goto BadObjectKey;
d = -1;
break;
if (p < e && isdigit(*p)) {
d = -1;
break;
} else {
return (struct DecodeJson){-1, "bad negative"};
}
case '0': // zero or number
if (UNLIKELY(context == OBJECT_KEY)) goto BadObjectKey;
if (p < e && (*p == '.' || *p == 'e' || *p == 'E')) {
goto UseDubble;
if (p < e) {
if ((*p == '.' || *p == 'e' || *p == 'E')) {
goto UseDubble;
} else if (isdigit(*p)) {
return (struct DecodeJson){-1, "unexpected octal"};
}
}
lua_pushinteger(L, 0);
return (struct DecodeJson){1, p};
@ -138,6 +146,7 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
UseDubble: // number
lua_pushnumber(L, StringToDouble(a, e - a, &c));
DCHECK(c > 0, "paranoid avoiding infinite loop");
return (struct DecodeJson){1, a + c};
case '[': // Array
@ -206,134 +215,146 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
case '"': // string
luaL_buffinit(L, &b);
while (p < e) {
for (;;) {
if (UNLIKELY(p >= e)) {
UnexpectedEofString:
reason = "unexpected eof in string";
goto StringFailureWithReason;
}
c = *p++ & 255;
if (c == '"') {
luaL_pushresult(&b);
return (struct DecodeJson){1, p};
} else if (c == '\\') {
goto HandleEscape;
} else if (UNLIKELY(c <= 0x1F)) {
reason = "non-del c0 in string";
goto StringFailureWithReason;
} else {
luaL_addchar(&b, c);
}
continue;
HandleEscape:
if (UNLIKELY(p >= e)) {
goto UnexpectedEofString;
}
switch ((c = *p++ & 255)) {
default:
AddByte:
case '"':
case '/':
case '\\':
luaL_addchar(&b, c);
break;
case '\\':
if (p < e) {
switch ((c = *p++ & 255)) {
default:
goto InvalidEscapeCharacter;
case '"':
case '/':
case '\\':
goto AddByte;
case 'b':
c = '\b';
goto AddByte;
case 'f':
c = '\f';
goto AddByte;
case 'n':
c = '\n';
goto AddByte;
case 'r':
c = '\r';
goto AddByte;
case 't':
c = '\t';
goto AddByte;
case 'x':
if (p + 2 <= e && //
(A = kHexToInt[p[0] & 255]) != -1 && // HEX
(B = kHexToInt[p[1] & 255]) != -1) { //
c = A << 4 | B;
p += 2;
goto AddByte;
} else {
goto InvalidEscapeCharacter;
}
case 'u':
if (p + 4 <= e && //
(A = kHexToInt[p[0] & 255]) != -1 && //
(B = kHexToInt[p[1] & 255]) != -1 && // UCS-2
(C = kHexToInt[p[2] & 255]) != -1 && //
(D = kHexToInt[p[3] & 255]) != -1) { //
c = A << 12 | B << 8 | C << 4 | D;
if (!IsSurrogate(c)) {
p += 4;
} else if (IsHighSurrogate(c)) {
if (p + 4 + 6 <= e && //
p[4] == '\\' && //
p[5] == 'u' && //
(A = kHexToInt[p[6] & 255]) != -1 && // UTF-16
(B = kHexToInt[p[7] & 255]) != -1 && //
(C = kHexToInt[p[8] & 255]) != -1 && //
(D = kHexToInt[p[9] & 255]) != -1) { //
u = A << 12 | B << 8 | C << 4 | D;
if (IsLowSurrogate(u)) {
p += 4 + 6;
c = MergeUtf16(c, u);
} else {
goto BadUnicode;
}
} else {
goto BadUnicode;
}
} else {
goto BadUnicode;
}
// UTF-8
if (c < 0x7f) {
w[0] = c;
i = 1;
} else if (c <= 0x7ff) {
w[0] = 0300 | (c >> 6);
w[1] = 0200 | (c & 077);
i = 2;
} else if (c <= 0xffff) {
if (UNLIKELY(IsSurrogate(c))) {
ReplacementCharacter:
c = 0xfffd;
}
w[0] = 0340 | (c >> 12);
w[1] = 0200 | ((c >> 6) & 077);
w[2] = 0200 | (c & 077);
i = 3;
} else if (~(c >> 18) & 007) {
w[0] = 0360 | (c >> 18);
w[1] = 0200 | ((c >> 12) & 077);
w[2] = 0200 | ((c >> 6) & 077);
w[3] = 0200 | (c & 077);
i = 4;
} else {
goto ReplacementCharacter;
}
luaL_addlstring(&b, w, i);
} else {
goto InvalidEscapeCharacter;
BadUnicode:
// Echo invalid \uXXXX sequences
// Rather than corrupting UTF-8!
luaL_addstring(&b, "\\u");
}
break;
case 'b':
luaL_addchar(&b, '\b');
break;
case 'f':
luaL_addchar(&b, '\f');
break;
case 'n':
luaL_addchar(&b, '\n');
break;
case 'r':
luaL_addchar(&b, '\r');
break;
case 't':
luaL_addchar(&b, '\t');
break;
case 'x':
if (p + 2 <= e && //
(A = kHexToInt[p[0] & 255]) != -1 && // HEX
(B = kHexToInt[p[1] & 255]) != -1) { //
c = A << 4 | B;
if (!(0x20 <= c && c <= 0x7E)) {
reason = "hex escape not printable";
goto StringFailureWithReason;
}
p += 2;
luaL_addchar(&b, c);
break;
} else {
goto InvalidEscapeCharacter;
reason = "invalid hex escape";
goto StringFailureWithReason;
}
case 'u':
if (p + 4 <= e && //
(A = kHexToInt[p[0] & 255]) != -1 && //
(B = kHexToInt[p[1] & 255]) != -1 && // UCS-2
(C = kHexToInt[p[2] & 255]) != -1 && //
(D = kHexToInt[p[3] & 255]) != -1) { //
c = A << 12 | B << 8 | C << 4 | D;
if (!IsSurrogate(c)) {
p += 4;
} else if (IsHighSurrogate(c)) {
if (p + 4 + 6 <= e && //
p[4] == '\\' && //
p[5] == 'u' && //
(A = kHexToInt[p[6] & 255]) != -1 && // UTF-16
(B = kHexToInt[p[7] & 255]) != -1 && //
(C = kHexToInt[p[8] & 255]) != -1 && //
(D = kHexToInt[p[9] & 255]) != -1) { //
u = A << 12 | B << 8 | C << 4 | D;
if (IsLowSurrogate(u)) {
p += 4 + 6;
c = MergeUtf16(c, u);
} else {
goto BadUnicode;
}
} else {
goto BadUnicode;
}
} else {
goto BadUnicode;
}
// UTF-8
if (c < 0x7f) {
w[0] = c;
i = 1;
} else if (c <= 0x7ff) {
w[0] = 0300 | (c >> 6);
w[1] = 0200 | (c & 077);
i = 2;
} else if (c <= 0xffff) {
if (UNLIKELY(IsSurrogate(c))) {
ReplacementCharacter:
c = 0xfffd;
}
w[0] = 0340 | (c >> 12);
w[1] = 0200 | ((c >> 6) & 077);
w[2] = 0200 | (c & 077);
i = 3;
} else if (~(c >> 18) & 007) {
w[0] = 0360 | (c >> 18);
w[1] = 0200 | ((c >> 12) & 077);
w[2] = 0200 | ((c >> 6) & 077);
w[3] = 0200 | (c & 077);
i = 4;
} else {
goto ReplacementCharacter;
}
luaL_addlstring(&b, w, i);
} else {
reason = "invalid unicode escape";
goto StringFailureWithReason;
BadUnicode:
// Echo invalid \uXXXX sequences
// Rather than corrupting UTF-8!
luaL_addstring(&b, "\\u");
}
break;
case '"':
luaL_pushresult(&b);
return (struct DecodeJson){1, p};
default:
reason = "invalid escape character";
goto StringFailureWithReason;
}
}
break;
StringFailureWithReason:
luaL_pushresultsize(&b, 0);
lua_pop(L, 1);
return (struct DecodeJson){-1, "unexpected eof in string"};
return (struct DecodeJson){-1, reason};
default:
IllegalCharacter:
return (struct DecodeJson){-1, "illegal character"};
BadObjectKey:
return (struct DecodeJson){-1, "object key must be string"};
InvalidEscapeCharacter:
luaL_pushresultsize(&b, 0);
lua_pop(L, 1);
return (struct DecodeJson){-1, "invalid escape character"};
}
}
if (UNLIKELY(context == TOP_LEVEL)) {
@ -357,16 +378,14 @@ static struct DecodeJson Parse(struct lua_State *L, const char *p,
* converted to a floating-point number instead. Invalid surrogate
* escape sequences in strings won't be decoded.
*
* A weird case exists when parsing empty objects. In order to let Lua
* tell them apart from empty arrays, we insert a special key that's
* ignored by our JSON serializer, called `[__json_object__]=true`.
*
* @param L is Lua interpreter state
* @param p is input string
* @param n is byte length of `p` or -1 for automatic strlen()
* @return res.rc is 1 if value pushed, 0 on eof, otherwise -1
* @return res.p is is advanced `p` pointer if `rc` isn't -1
* @return res.p is string describing error if `rc` is -1
* @return r.rc is 1 if value is pushed on lua stack
* @return r.rc is 0 on eof
* @return r.rc is -1 on error
* @return r.p is is advanced `p` pointer if `rc 0`
* @return r.p is string describing error if `rc < 0`
*/
struct DecodeJson DecodeJson(struct lua_State *L, const char *p, size_t n) {
if (n == -1) n = p ? strlen(p) : 0;