mirror of
				https://github.com/jart/cosmopolitan.git
				synced 2025-10-26 19:16:41 +00:00 
			
		
		
		
	- Fix preadv() and pwritev() for old distros - Introduce _npassert() and _unassert() macros - Prove that file locks work properly on Windows - Support fcntl(F_DUPFD_CLOEXEC) on more systems
		
			
				
	
	
		
			593 lines
		
	
	
	
		
			21 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			593 lines
		
	
	
	
		
			21 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
 | |
| │vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8                                :vi│
 | |
| ╞══════════════════════════════════════════════════════════════════════════════╡
 | |
| │ Copyright 2022 Justine Alexandra Roberts Tunney                              │
 | |
| │                                                                              │
 | |
| │ Permission to use, copy, modify, and/or distribute this software for         │
 | |
| │ any purpose with or without fee is hereby granted, provided that the         │
 | |
| │ above copyright notice and this permission notice appear in all copies.      │
 | |
| │                                                                              │
 | |
| │ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
 | |
| │ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
 | |
| │ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
 | |
| │ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
 | |
| │ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
 | |
| │ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
 | |
| │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
 | |
| │ PERFORMANCE OF THIS SOFTWARE.                                                │
 | |
| ╚─────────────────────────────────────────────────────────────────────────────*/
 | |
| #include "libc/intrin/bits.h"
 | |
| #include "libc/intrin/likely.h"
 | |
| #include "libc/log/check.h"
 | |
| #include "libc/log/log.h"
 | |
| #include "libc/runtime/stack.h"
 | |
| #include "libc/str/str.h"
 | |
| #include "libc/str/tab.internal.h"
 | |
| #include "libc/str/utf16.h"
 | |
| #include "third_party/double-conversion/wrapper.h"
 | |
| #include "third_party/lua/cosmo.h"
 | |
| #include "third_party/lua/lauxlib.h"
 | |
| #include "third_party/lua/ltests.h"
 | |
| #include "third_party/lua/lua.h"
 | |
| #include "tool/net/ljson.h"
 | |
| 
 | |
| #define KEY    1
 | |
| #define COMMA  2
 | |
| #define COLON  4
 | |
| #define ARRAY  8
 | |
| #define OBJECT 16
 | |
| #define DEPTH  64
 | |
| 
 | |
| #define ASCII     0
 | |
| #define C0        1
 | |
| #define DQUOTE    2
 | |
| #define BACKSLASH 3
 | |
| #define UTF8_2    4
 | |
| #define UTF8_3    5
 | |
| #define UTF8_4    6
 | |
| #define C1        7
 | |
| #define UTF8_3_E0 8
 | |
| #define UTF8_3_ED 9
 | |
| #define UTF8_4_F0 10
 | |
| #define BADUTF8   11
 | |
| #define EVILUTF8  12
 | |
| 
 | |
| static const char kJsonStr[256] = {
 | |
|     1,  1,  1,  1,  1,  1,  1,  1,   // 0000 ascii (0)
 | |
|     1,  1,  1,  1,  1,  1,  1,  1,   // 0010
 | |
|     1,  1,  1,  1,  1,  1,  1,  1,   // 0020 c0 (1)
 | |
|     1,  1,  1,  1,  1,  1,  1,  1,   // 0030
 | |
|     0,  0,  2,  0,  0,  0,  0,  0,   // 0040 dquote (2)
 | |
|     0,  0,  0,  0,  0,  0,  0,  0,   // 0050
 | |
|     0,  0,  0,  0,  0,  0,  0,  0,   // 0060
 | |
|     0,  0,  0,  0,  0,  0,  0,  0,   // 0070
 | |
|     0,  0,  0,  0,  0,  0,  0,  0,   // 0100
 | |
|     0,  0,  0,  0,  0,  0,  0,  0,   // 0110
 | |
|     0,  0,  0,  0,  0,  0,  0,  0,   // 0120
 | |
|     0,  0,  0,  0,  3,  0,  0,  0,   // 0130 backslash (3)
 | |
|     0,  0,  0,  0,  0,  0,  0,  0,   // 0140
 | |
|     0,  0,  0,  0,  0,  0,  0,  0,   // 0150
 | |
|     0,  0,  0,  0,  0,  0,  0,  0,   // 0160
 | |
|     0,  0,  0,  0,  0,  0,  0,  0,   // 0170
 | |
|     7,  7,  7,  7,  7,  7,  7,  7,   // 0200 c1 (8)
 | |
|     7,  7,  7,  7,  7,  7,  7,  7,   // 0210
 | |
|     7,  7,  7,  7,  7,  7,  7,  7,   // 0220
 | |
|     7,  7,  7,  7,  7,  7,  7,  7,   // 0230
 | |
|     11, 11, 11, 11, 11, 11, 11, 11,  // 0240 latin1 (4)
 | |
|     11, 11, 11, 11, 11, 11, 11, 11,  // 0250
 | |
|     11, 11, 11, 11, 11, 11, 11, 11,  // 0260
 | |
|     11, 11, 11, 11, 11, 11, 11, 11,  // 0270
 | |
|     12, 12, 4,  4,  4,  4,  4,  4,   // 0300 utf8-2 (5)
 | |
|     4,  4,  4,  4,  4,  4,  4,  4,   // 0310
 | |
|     4,  4,  4,  4,  4,  4,  4,  4,   // 0320 utf8-2
 | |
|     4,  4,  4,  4,  4,  4,  4,  4,   // 0330
 | |
|     8,  5,  5,  5,  5,  5,  5,  5,   // 0340 utf8-3 (6)
 | |
|     5,  5,  5,  5,  5,  9,  5,  5,   // 0350
 | |
|     10, 6,  6,  6,  6,  11, 11, 11,  // 0360 utf8-4 (7)
 | |
|     11, 11, 11, 11, 11, 11, 11, 11,  // 0370
 | |
| };
 | |
| 
 | |
| static struct DecodeJson Parse(struct lua_State *L, const char *p,
 | |
|                                const char *e, int context, int depth) {
 | |
|   long x;
 | |
|   char w[4];
 | |
|   luaL_Buffer b;
 | |
|   struct DecodeJson r;
 | |
|   const char *a, *reason;
 | |
|   int A, B, C, D, c, d, i, u;
 | |
|   if (UNLIKELY(!depth)) {
 | |
|     return (struct DecodeJson){-1, "maximum depth exceeded"};
 | |
|   }
 | |
|   if (UNLIKELY(!HaveStackMemory(PAGESIZE))) {
 | |
|     return (struct DecodeJson){-1, "out of stack"};
 | |
|   }
 | |
|   for (a = p, d = +1; p < e;) {
 | |
|     switch ((c = *p++ & 255)) {
 | |
|       case ' ':  // spaces
 | |
|       case '\n':
 | |
|       case '\r':
 | |
|       case '\t':
 | |
|         a = p;
 | |
|         break;
 | |
| 
 | |
|       case ',':  // present in list and object
 | |
|         if (context & COMMA) {
 | |
|           context = 0;
 | |
|           a = p;
 | |
|           break;
 | |
|         } else {
 | |
|           return (struct DecodeJson){-1, "unexpected ','"};
 | |
|         }
 | |
| 
 | |
|       case ':':  // present only in object after key
 | |
|         if (context & COLON) {
 | |
|           context = 0;
 | |
|           a = p;
 | |
|           break;
 | |
|         } else {
 | |
|           return (struct DecodeJson){-1, "unexpected ':'"};
 | |
|         }
 | |
| 
 | |
|       case 'n':  // null
 | |
|         if (context & (KEY | COLON | COMMA)) goto OnColonCommaKey;
 | |
|         if (p + 3 <= e && READ32LE(p - 1) == READ32LE("null")) {
 | |
|           lua_pushnil(L);
 | |
|           return (struct DecodeJson){1, p + 3};
 | |
|         } else {
 | |
|           goto IllegalCharacter;
 | |
|         }
 | |
| 
 | |
|       case 'f':  // false
 | |
|         if (context & (KEY | COLON | COMMA)) goto OnColonCommaKey;
 | |
|         if (p + 4 <= e && READ32LE(p) == READ32LE("alse")) {
 | |
|           lua_pushboolean(L, false);
 | |
|           return (struct DecodeJson){1, p + 4};
 | |
|         } else {
 | |
|           goto IllegalCharacter;
 | |
|         }
 | |
| 
 | |
|       case 't':  // true
 | |
|         if (context & (KEY | COLON | COMMA)) goto OnColonCommaKey;
 | |
|         if (p + 3 <= e && READ32LE(p - 1) == READ32LE("true")) {
 | |
|           lua_pushboolean(L, true);
 | |
|           return (struct DecodeJson){1, p + 3};
 | |
|         } else {
 | |
|           goto IllegalCharacter;
 | |
|         }
 | |
| 
 | |
|       default:
 | |
|       IllegalCharacter:
 | |
|         return (struct DecodeJson){-1, "illegal character"};
 | |
|       OnColonCommaKey:
 | |
|         if (context & KEY) goto BadObjectKey;
 | |
|       OnColonComma:
 | |
|         if (context & COLON) goto MissingColon;
 | |
|         return (struct DecodeJson){-1, "missing ','"};
 | |
|       MissingColon:
 | |
|         return (struct DecodeJson){-1, "missing ':'"};
 | |
|       BadObjectKey:
 | |
|         return (struct DecodeJson){-1, "object key must be string"};
 | |
| 
 | |
|       case '-':  // negative
 | |
|         if (context & (COLON | COMMA | KEY)) goto OnColonCommaKey;
 | |
|         if (p < e && isdigit(*p)) {
 | |
|           d = -1;
 | |
|           break;
 | |
|         } else {
 | |
|           return (struct DecodeJson){-1, "bad negative"};
 | |
|         }
 | |
| 
 | |
|       case '0':  // zero or number
 | |
|         if (context & (COLON | COMMA | KEY)) goto OnColonCommaKey;
 | |
|         if (p < e) {
 | |
|           if (*p == '.') {
 | |
|             if (p + 1 == e || !isdigit(p[1])) {
 | |
|               return (struct DecodeJson){-1, "bad double"};
 | |
|             }
 | |
|             goto UseDubble;
 | |
|           } else if (*p == 'e' || *p == 'E') {
 | |
|             goto UseDubble;
 | |
|           } else if (isdigit(*p)) {
 | |
|             return (struct DecodeJson){-1, "unexpected octal"};
 | |
|           }
 | |
|         }
 | |
|         lua_pushinteger(L, 0);
 | |
|         return (struct DecodeJson){1, p};
 | |
| 
 | |
|       case '1' ... '9':  // integer
 | |
|         if (context & (COLON | COMMA | KEY)) goto OnColonCommaKey;
 | |
|         for (x = (c - '0') * d; p < e; ++p) {
 | |
|           c = *p & 255;
 | |
|           if (isdigit(c)) {
 | |
|             if (__builtin_mul_overflow(x, 10, &x) ||
 | |
|                 __builtin_add_overflow(x, (c - '0') * d, &x)) {
 | |
|               goto UseDubble;
 | |
|             }
 | |
|           } else if (c == '.') {
 | |
|             if (p + 1 == e || !isdigit(p[1])) {
 | |
|               return (struct DecodeJson){-1, "bad double"};
 | |
|             }
 | |
|             goto UseDubble;
 | |
|           } else if (c == 'e' || c == 'E') {
 | |
|             goto UseDubble;
 | |
|           } else {
 | |
|             break;
 | |
|           }
 | |
|         }
 | |
|         lua_pushinteger(L, x);
 | |
|         return (struct DecodeJson){1, p};
 | |
| 
 | |
|       UseDubble:  // number
 | |
|         lua_pushnumber(L, StringToDouble(a, e - a, &c));
 | |
|         DCHECK(c > 0, "paranoid avoiding infinite loop");
 | |
|         if (a + c < e && (a[c] == 'e' || a[c] == 'E')) {
 | |
|           return (struct DecodeJson){-1, "bad exponent"};
 | |
|         }
 | |
|         return (struct DecodeJson){1, a + c};
 | |
| 
 | |
|       case '[':  // Array
 | |
|         if (context & (COLON | COMMA | KEY)) goto OnColonCommaKey;
 | |
|         lua_newtable(L);  // +1
 | |
|         for (context = ARRAY, i = 0;;) {
 | |
|           r = Parse(L, p, e, context, depth - 1);  // +2
 | |
|           if (UNLIKELY(r.rc == -1)) {
 | |
|             lua_pop(L, 1);
 | |
|             return r;
 | |
|           }
 | |
|           p = r.p;
 | |
|           if (!r.rc) {
 | |
|             break;
 | |
|           }
 | |
|           lua_rawseti(L, -2, i++ + 1);
 | |
|           context = ARRAY | COMMA;
 | |
|         }
 | |
|         if (!i) {
 | |
|           // we need this kludge so `[]` won't round-trip as `{}`
 | |
|           lua_pushboolean(L, false);  // +2
 | |
|           lua_rawseti(L, -2, 0);
 | |
|         }
 | |
|         return (struct DecodeJson){1, p};
 | |
| 
 | |
|       case ']':
 | |
|         if (context & ARRAY) {
 | |
|           return (struct DecodeJson){0, p};
 | |
|         } else {
 | |
|           return (struct DecodeJson){-1, "unexpected ']'"};
 | |
|         }
 | |
| 
 | |
|       case '}':
 | |
|         if (context & OBJECT) {
 | |
|           return (struct DecodeJson){0, p};
 | |
|         } else {
 | |
|           return (struct DecodeJson){-1, "unexpected '}'"};
 | |
|         }
 | |
| 
 | |
|       case '{':  // Object
 | |
|         if (context & (COLON | COMMA | KEY)) goto OnColonCommaKey;
 | |
|         lua_newtable(L);  // +1
 | |
|         context = KEY | OBJECT;
 | |
|         for (;;) {
 | |
|           r = Parse(L, p, e, context, depth - 1);  // +2
 | |
|           if (r.rc == -1) {
 | |
|             lua_pop(L, 1);
 | |
|             return r;
 | |
|           }
 | |
|           p = r.p;
 | |
|           if (!r.rc) {
 | |
|             return (struct DecodeJson){1, p};
 | |
|           }
 | |
|           r = Parse(L, p, e, COLON, depth - 1);  // +3
 | |
|           if (r.rc == -1) {
 | |
|             lua_pop(L, 2);
 | |
|             return r;
 | |
|           }
 | |
|           if (!r.rc) {
 | |
|             lua_pop(L, 2);
 | |
|             return (struct DecodeJson){-1, "unexpected eof in object"};
 | |
|           }
 | |
|           p = r.p;
 | |
|           lua_settable(L, -3);
 | |
|           context = KEY | COMMA | OBJECT;
 | |
|         }
 | |
| 
 | |
|       case '"':  // string
 | |
|         if (context & (COLON | COMMA)) goto OnColonComma;
 | |
|         luaL_buffinit(L, &b);
 | |
|         for (;;) {
 | |
|           if (UNLIKELY(p >= e)) {
 | |
|           UnexpectedEofString:
 | |
|             reason = "unexpected eof in string";
 | |
|             goto StringFailureWithReason;
 | |
|           }
 | |
|           switch (kJsonStr[(c = *p++ & 255)]) {
 | |
| 
 | |
|             case ASCII:
 | |
|               luaL_addchar(&b, c);
 | |
|               break;
 | |
| 
 | |
|             case DQUOTE:
 | |
|               luaL_pushresult(&b);
 | |
|               return (struct DecodeJson){1, p};
 | |
| 
 | |
|             case BACKSLASH:
 | |
|               if (p >= e) {
 | |
|                 goto UnexpectedEofString;
 | |
|               }
 | |
|               switch ((c = *p++ & 255)) {
 | |
|                 case '"':
 | |
|                 case '/':
 | |
|                 case '\\':
 | |
|                   luaL_addchar(&b, c);
 | |
|                   break;
 | |
|                 case 'b':
 | |
|                   luaL_addchar(&b, '\b');
 | |
|                   break;
 | |
|                 case 'f':
 | |
|                   luaL_addchar(&b, '\f');
 | |
|                   break;
 | |
|                 case 'n':
 | |
|                   luaL_addchar(&b, '\n');
 | |
|                   break;
 | |
|                 case 'r':
 | |
|                   luaL_addchar(&b, '\r');
 | |
|                   break;
 | |
|                 case 't':
 | |
|                   luaL_addchar(&b, '\t');
 | |
|                   break;
 | |
|                 case 'x':
 | |
|                   if (p + 2 <= e &&                         //
 | |
|                       (A = kHexToInt[p[0] & 255]) != -1 &&  // HEX
 | |
|                       (B = kHexToInt[p[1] & 255]) != -1) {  //
 | |
|                     c = A << 4 | B;
 | |
|                     if (!(0x20 <= c && c <= 0x7E)) {
 | |
|                       reason = "hex escape not printable";
 | |
|                       goto StringFailureWithReason;
 | |
|                     }
 | |
|                     p += 2;
 | |
|                     luaL_addchar(&b, c);
 | |
|                     break;
 | |
|                   } else {
 | |
|                     reason = "invalid hex escape";
 | |
|                     goto StringFailureWithReason;
 | |
|                   }
 | |
|                 case 'u':
 | |
|                   if (p + 4 <= e &&                         //
 | |
|                       (A = kHexToInt[p[0] & 255]) != -1 &&  //
 | |
|                       (B = kHexToInt[p[1] & 255]) != -1 &&  // UCS-2
 | |
|                       (C = kHexToInt[p[2] & 255]) != -1 &&  //
 | |
|                       (D = kHexToInt[p[3] & 255]) != -1) {  //
 | |
|                     c = A << 12 | B << 8 | C << 4 | D;
 | |
|                     if (!IsSurrogate(c)) {
 | |
|                       p += 4;
 | |
|                     } else if (IsHighSurrogate(c)) {
 | |
|                       if (p + 4 + 6 <= e &&                     //
 | |
|                           p[4] == '\\' &&                       //
 | |
|                           p[5] == 'u' &&                        //
 | |
|                           (A = kHexToInt[p[6] & 255]) != -1 &&  // UTF-16
 | |
|                           (B = kHexToInt[p[7] & 255]) != -1 &&  //
 | |
|                           (C = kHexToInt[p[8] & 255]) != -1 &&  //
 | |
|                           (D = kHexToInt[p[9] & 255]) != -1) {  //
 | |
|                         u = A << 12 | B << 8 | C << 4 | D;
 | |
|                         if (IsLowSurrogate(u)) {
 | |
|                           p += 4 + 6;
 | |
|                           c = MergeUtf16(c, u);
 | |
|                         } else {
 | |
|                           goto BadUnicode;
 | |
|                         }
 | |
|                       } else {
 | |
|                         goto BadUnicode;
 | |
|                       }
 | |
|                     } else {
 | |
|                       goto BadUnicode;
 | |
|                     }
 | |
|                     // UTF-8
 | |
|                   EncodeUtf8:
 | |
|                     if (c <= 0x7f) {
 | |
|                       w[0] = c;
 | |
|                       i = 1;
 | |
|                     } else if (c <= 0x7ff) {
 | |
|                       w[0] = 0300 | (c >> 6);
 | |
|                       w[1] = 0200 | (c & 077);
 | |
|                       i = 2;
 | |
|                     } else if (c <= 0xffff) {
 | |
|                       if (IsSurrogate(c)) {
 | |
|                       ReplacementCharacter:
 | |
|                         c = 0xfffd;
 | |
|                       }
 | |
|                       w[0] = 0340 | (c >> 12);
 | |
|                       w[1] = 0200 | ((c >> 6) & 077);
 | |
|                       w[2] = 0200 | (c & 077);
 | |
|                       i = 3;
 | |
|                     } else if (~(c >> 18) & 007) {
 | |
|                       w[0] = 0360 | (c >> 18);
 | |
|                       w[1] = 0200 | ((c >> 12) & 077);
 | |
|                       w[2] = 0200 | ((c >> 6) & 077);
 | |
|                       w[3] = 0200 | (c & 077);
 | |
|                       i = 4;
 | |
|                     } else {
 | |
|                       goto ReplacementCharacter;
 | |
|                     }
 | |
|                     luaL_addlstring(&b, w, i);
 | |
|                   } else {
 | |
|                     reason = "invalid unicode escape";
 | |
|                     goto StringFailureWithReason;
 | |
|                   BadUnicode:
 | |
|                     // Echo invalid \uXXXX sequences
 | |
|                     // Rather than corrupting UTF-8!
 | |
|                     luaL_addstring(&b, "\\u");
 | |
|                   }
 | |
|                   break;
 | |
|                 default:
 | |
|                   reason = "invalid escape character";
 | |
|                   goto StringFailureWithReason;
 | |
|               }
 | |
|               break;
 | |
| 
 | |
|             case UTF8_2:
 | |
|               if (p < e &&                  //
 | |
|                   (p[0] & 0300) == 0200) {  //
 | |
|                 c = (c & 037) << 6 |        //
 | |
|                     (p[0] & 077);           //
 | |
|                 p += 1;
 | |
|                 goto EncodeUtf8;
 | |
|               } else {
 | |
|                 reason = "malformed utf-8";
 | |
|                 goto StringFailureWithReason;
 | |
|               }
 | |
| 
 | |
|             case UTF8_3_E0:
 | |
|               if (p + 2 <= e &&             //
 | |
|                   (p[0] & 0377) < 0240 &&   //
 | |
|                   (p[0] & 0300) == 0200 &&  //
 | |
|                   (p[1] & 0300) == 0200) {
 | |
|                 reason = "overlong utf-8 0..0x7ff";
 | |
|                 goto StringFailureWithReason;
 | |
|               }
 | |
|               // fallthrough
 | |
|             case UTF8_3:
 | |
|             ThreeUtf8:
 | |
|               if (p + 2 <= e &&             //
 | |
|                   (p[0] & 0300) == 0200 &&  //
 | |
|                   (p[1] & 0300) == 0200) {  //
 | |
|                 c = (c & 017) << 12 |       //
 | |
|                     (p[0] & 077) << 6 |     //
 | |
|                     (p[1] & 077);           //
 | |
|                 p += 2;
 | |
|                 goto EncodeUtf8;
 | |
|               } else {
 | |
|                 reason = "malformed utf-8";
 | |
|                 goto StringFailureWithReason;
 | |
|               }
 | |
| 
 | |
|             case UTF8_3_ED:
 | |
|               if (p + 2 <= e &&                  //
 | |
|                   (p[0] & 0377) >= 0240) {       //
 | |
|                 if (p + 5 <= e &&                //
 | |
|                     (p[0] & 0377) >= 0256 &&     //
 | |
|                     (p[1] & 0300) == 0200 &&     //
 | |
|                     (p[2] & 0377) == 0355 &&     //
 | |
|                     (p[3] & 0377) >= 0260 &&     //
 | |
|                     (p[4] & 0300) == 0200) {     //
 | |
|                   A = (0355 & 017) << 12 |       // CESU-8
 | |
|                       (p[0] & 077) << 6 |        //
 | |
|                       (p[1] & 077);              //
 | |
|                   B = (0355 & 017) << 12 |       //
 | |
|                       (p[3] & 077) << 6 |        //
 | |
|                       (p[4] & 077);              //
 | |
|                   c = ((A - 0xDB80) << 10) +     //
 | |
|                       ((B - 0xDC00) + 0x10000);  //
 | |
|                   goto EncodeUtf8;
 | |
|                 } else if ((p[0] & 0300) == 0200 &&  //
 | |
|                            (p[1] & 0300) == 0200) {  //
 | |
|                   reason = "utf-16 surrogate in utf-8";
 | |
|                   goto StringFailureWithReason;
 | |
|                 } else {
 | |
|                   reason = "malformed utf-8";
 | |
|                   goto StringFailureWithReason;
 | |
|                 }
 | |
|               }
 | |
|               goto ThreeUtf8;
 | |
| 
 | |
|             case UTF8_4_F0:
 | |
|               if (p + 3 <= e && (p[0] & 0377) < 0220 &&
 | |
|                   (((uint32_t)(p[+2] & 0377) << 030 |
 | |
|                     (uint32_t)(p[+1] & 0377) << 020 |
 | |
|                     (uint32_t)(p[+0] & 0377) << 010 |
 | |
|                     (uint32_t)(p[-1] & 0377) << 000) &
 | |
|                    0xC0C0C000) == 0x80808000) {
 | |
|                 reason = "overlong utf-8 0..0xffff";
 | |
|                 goto StringFailureWithReason;
 | |
|               }
 | |
|               // fallthrough
 | |
|             case UTF8_4:
 | |
|               if (p + 3 <= e &&                               //
 | |
|                   ((A = ((uint32_t)(p[+2] & 0377) << 030 |    //
 | |
|                          (uint32_t)(p[+1] & 0377) << 020 |    //
 | |
|                          (uint32_t)(p[+0] & 0377) << 010 |    //
 | |
|                          (uint32_t)(p[-1] & 0377) << 000)) &  //
 | |
|                    0xC0C0C000) == 0x80808000) {               //
 | |
|                 A = (A & 7) << 18 |                           //
 | |
|                     (A & (077 << 010)) << (12 - 010) |        //
 | |
|                     (A & (077 << 020)) >> -(6 - 020) |        //
 | |
|                     (A & (077 << 030)) >> 030;                //
 | |
|                 if (A <= 0x10FFFF) {
 | |
|                   c = A;
 | |
|                   p += 3;
 | |
|                   goto EncodeUtf8;
 | |
|                 } else {
 | |
|                   reason = "utf-8 exceeds utf-16 range";
 | |
|                   goto StringFailureWithReason;
 | |
|                 }
 | |
|               } else {
 | |
|                 reason = "malformed utf-8";
 | |
|                 goto StringFailureWithReason;
 | |
|               }
 | |
| 
 | |
|             case EVILUTF8:
 | |
|               if (p < e &&                  //
 | |
|                   (p[0] & 0300) == 0200) {  //
 | |
|                 reason = "overlong ascii";
 | |
|                 goto StringFailureWithReason;
 | |
|               }
 | |
|               // fallthrough
 | |
|             case BADUTF8:
 | |
|               reason = "illegal utf-8 character";
 | |
|               goto StringFailureWithReason;
 | |
| 
 | |
|             case C0:
 | |
|               reason = "non-del c0 control code in string";
 | |
|               goto StringFailureWithReason;
 | |
| 
 | |
|             case C1:
 | |
|               reason = "c1 control code in string";
 | |
|               goto StringFailureWithReason;
 | |
| 
 | |
|             default:
 | |
|               unreachable;
 | |
|           }
 | |
|         }
 | |
|         unreachable;
 | |
|       StringFailureWithReason:
 | |
|         luaL_pushresultsize(&b, 0);
 | |
|         lua_pop(L, 1);
 | |
|         return (struct DecodeJson){-1, reason};
 | |
|     }
 | |
|   }
 | |
|   if (depth == DEPTH) {
 | |
|     return (struct DecodeJson){0, 0};
 | |
|   } else {
 | |
|     return (struct DecodeJson){-1, "unexpected eof"};
 | |
|   }
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Parses JSON data structure string into Lua data structure.
 | |
|  *
 | |
|  * This function returns the number of items pushed to the Lua stack,
 | |
|  * which should be 1, unless no parseable JSON content was found, in
 | |
|  * which case this will return 0. On error -1 is returned. There's
 | |
|  * currently no error return condition. This function doesn't do JSON
 | |
|  * validity enforcement.
 | |
|  *
 | |
|  * JSON UTF-16 strings are re-encoded as valid UTF-8. 64-bit integers
 | |
|  * are supported. If an integer overflows during parsing, it'll be
 | |
|  * converted to a floating-point number instead. Invalid surrogate
 | |
|  * escape sequences in strings won't be decoded.
 | |
|  *
 | |
|  * @param L is Lua interpreter state
 | |
|  * @param p is input string
 | |
|  * @param n is byte length of `p` or -1 for automatic strlen()
 | |
|  * @return r.rc is 1 if value is pushed on lua stack
 | |
|  * @return r.rc is 0 on eof
 | |
|  * @return r.rc is -1 on error
 | |
|  * @return r.p is is advanced `p` pointer if `rc ≥ 0`
 | |
|  * @return r.p is string describing error if `rc < 0`
 | |
|  */
 | |
| struct DecodeJson DecodeJson(struct lua_State *L, const char *p, size_t n) {
 | |
|   if (n == -1) n = p ? strlen(p) : 0;
 | |
|   if (lua_checkstack(L, DEPTH * 3 + LUA_MINSTACK)) {
 | |
|     return Parse(L, p, p + n, 0, DEPTH);
 | |
|   } else {
 | |
|     return (struct DecodeJson){-1, "can't set stack depth"};
 | |
|   }
 | |
| }
 |