mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-05-28 08:12:28 +00:00
Make major improvements to stdio
Buffering now has optimal performance, bugs have been fixed, and some missing apis have been introduced. This implementation is also now more production worthy since it's less brittle now in terms of system errors. That's going to help redbean since lua i/o is all based on stdio. See #97
This commit is contained in:
parent
09bcfa23d5
commit
da36e7e256
69 changed files with 1595 additions and 735 deletions
|
@ -27,44 +27,130 @@
|
|||
* HTML entities and forward slash are escaped too for added safety.
|
||||
*
|
||||
* We assume the UTF-8 is well-formed and can be represented as UTF-16.
|
||||
* Things that can't be decoded or encoded will be replaced with invalid
|
||||
* code-point markers. This function is agnostic to numbers that have
|
||||
* been used with malicious intent in the past under buggy software.
|
||||
* Noncanonical encodings such as overlong NUL are canonicalized as NUL.
|
||||
* Things that can't be decoded will fall back to binary. Things that
|
||||
* can't be encoded will use invalid codepoint markers. This function is
|
||||
* agnostic to numbers that have been used with malicious intent in the
|
||||
* past under buggy software. Noncanonical encodings such as overlong
|
||||
* NUL are canonicalized as NUL.
|
||||
*/
|
||||
struct EscapeResult EscapeJsStringLiteral(const char *data, size_t size) {
|
||||
char *p;
|
||||
size_t i;
|
||||
unsigned n;
|
||||
uint64_t w;
|
||||
wint_t x, y;
|
||||
unsigned i, n;
|
||||
wint_t x, a, b;
|
||||
const char *d, *e;
|
||||
struct EscapeResult r;
|
||||
d = data;
|
||||
e = data + size;
|
||||
p = r.data = xmalloc(size * 6 + 6 + 1);
|
||||
for (i = 0; i < size;) {
|
||||
x = data[i++] & 0xff;
|
||||
if (x >= 0200) {
|
||||
if (x >= 0300) {
|
||||
n = ThomPikeLen(x);
|
||||
x = ThomPikeByte(x);
|
||||
while (--n) {
|
||||
if (i < size) {
|
||||
y = data[i++] & 0xff;
|
||||
if (ThomPikeCont(y)) {
|
||||
x = ThomPikeMerge(x, y);
|
||||
} else {
|
||||
x = 0xFFFD;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
x = 0xFFFD;
|
||||
while (d < e) {
|
||||
x = *d++ & 0xff;
|
||||
if (x >= 0300) {
|
||||
a = ThomPikeByte(x);
|
||||
n = ThomPikeLen(x) - 1;
|
||||
if (d + n <= e) {
|
||||
for (i = 0;;) {
|
||||
b = d[i] & 0xff;
|
||||
if (!ThomPikeCont(b)) break;
|
||||
a = ThomPikeMerge(a, b);
|
||||
if (++i == n) {
|
||||
x = a;
|
||||
d += i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
x = 0xFFFD;
|
||||
}
|
||||
}
|
||||
switch (x) {
|
||||
case ' ':
|
||||
case '!':
|
||||
case '#':
|
||||
case '$':
|
||||
case '%':
|
||||
case '(':
|
||||
case ')':
|
||||
case '*':
|
||||
case '+':
|
||||
case ',':
|
||||
case '-':
|
||||
case '.':
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9':
|
||||
case ':':
|
||||
case ';':
|
||||
case '?':
|
||||
case '@':
|
||||
case 'A':
|
||||
case 'B':
|
||||
case 'C':
|
||||
case 'D':
|
||||
case 'E':
|
||||
case 'F':
|
||||
case 'G':
|
||||
case 'H':
|
||||
case 'I':
|
||||
case 'J':
|
||||
case 'K':
|
||||
case 'L':
|
||||
case 'M':
|
||||
case 'N':
|
||||
case 'O':
|
||||
case 'P':
|
||||
case 'Q':
|
||||
case 'R':
|
||||
case 'S':
|
||||
case 'T':
|
||||
case 'U':
|
||||
case 'V':
|
||||
case 'W':
|
||||
case 'X':
|
||||
case 'Y':
|
||||
case 'Z':
|
||||
case '[':
|
||||
case ']':
|
||||
case '^':
|
||||
case '_':
|
||||
case '`':
|
||||
case 'a':
|
||||
case 'b':
|
||||
case 'c':
|
||||
case 'd':
|
||||
case 'e':
|
||||
case 'f':
|
||||
case 'g':
|
||||
case 'h':
|
||||
case 'i':
|
||||
case 'j':
|
||||
case 'k':
|
||||
case 'l':
|
||||
case 'm':
|
||||
case 'n':
|
||||
case 'o':
|
||||
case 'p':
|
||||
case 'q':
|
||||
case 'r':
|
||||
case 's':
|
||||
case 't':
|
||||
case 'u':
|
||||
case 'v':
|
||||
case 'w':
|
||||
case 'x':
|
||||
case 'y':
|
||||
case 'z':
|
||||
case '{':
|
||||
case '|':
|
||||
case '}':
|
||||
case '~':
|
||||
*p++ = x;
|
||||
break;
|
||||
case '\t':
|
||||
p[0] = '\\';
|
||||
p[1] = 't';
|
||||
|
@ -105,24 +191,19 @@ struct EscapeResult EscapeJsStringLiteral(const char *data, size_t size) {
|
|||
p[1] = '\'';
|
||||
p += 2;
|
||||
break;
|
||||
default:
|
||||
if (0x20 <= x && x < 0x7F) {
|
||||
*p++ = x;
|
||||
break;
|
||||
}
|
||||
/* fallthrough */
|
||||
case '<':
|
||||
case '>':
|
||||
case '&':
|
||||
case '=':
|
||||
default:
|
||||
w = EncodeUtf16(x);
|
||||
do {
|
||||
p[0] = '\\';
|
||||
p[1] = 'u';
|
||||
p[2] = "0123456789ABCDEF"[(w & 0xF000) >> 014];
|
||||
p[3] = "0123456789ABCDEF"[(w & 0x0F00) >> 010];
|
||||
p[4] = "0123456789ABCDEF"[(w & 0x00F0) >> 004];
|
||||
p[5] = "0123456789ABCDEF"[(w & 0x000F) >> 000];
|
||||
p[2] = "0123456789abcdef"[(w & 0xF000) >> 014];
|
||||
p[3] = "0123456789abcdef"[(w & 0x0F00) >> 010];
|
||||
p[4] = "0123456789abcdef"[(w & 0x00F0) >> 004];
|
||||
p[5] = "0123456789abcdef"[(w & 0x000F) >> 000];
|
||||
p += 6;
|
||||
} while ((w >>= 16));
|
||||
break;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue