mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-05-24 06:12:27 +00:00
Transcode ISO-8859-1 in HTTP headers
If we keep making changes like this, redbean might not be a toy anymore. Additional steps are also being taken now to prevent ANSI control codes sent by the client from slipping into logs.
This commit is contained in:
parent
dcbd2b8668
commit
a1677d605a
14 changed files with 675 additions and 161 deletions
54
net/http/decodelatin1.c
Normal file
54
net/http/decodelatin1.c
Normal file
|
@ -0,0 +1,54 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/mem/mem.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "net/http/http.h"
|
||||
|
||||
/**
|
||||
* Decodes ISO-8859-1 to UTF-8.
|
||||
*
|
||||
* @param data is input value
|
||||
* @param size if -1 implies strlen
|
||||
* @param out_size if non-NULL receives output length on success
|
||||
* @return allocated NUL-terminated buffer, or NULL w/ errno
|
||||
*/
|
||||
char *DecodeLatin1(const char *data, size_t size, size_t *out_size) {
|
||||
int c;
|
||||
char *r, *q;
|
||||
const char *p, *e;
|
||||
if (size == -1) size = strlen(data);
|
||||
if ((r = malloc(size * 2 + 1))) {
|
||||
q = r;
|
||||
p = data;
|
||||
e = p + size;
|
||||
while (p < e) {
|
||||
c = *p++ & 0xff;
|
||||
if (c < 0200) {
|
||||
*q++ = c;
|
||||
} else {
|
||||
*q++ = 0300 | c >> 6;
|
||||
*q++ = 0200 | c & 077;
|
||||
}
|
||||
}
|
||||
if (out_size) *out_size = q - r;
|
||||
*q++ = '\0';
|
||||
if ((q = realloc(r, q - r))) r = q;
|
||||
}
|
||||
return r;
|
||||
}
|
85
net/http/encodehttpheadervalue.c
Normal file
85
net/http/encodehttpheadervalue.c
Normal file
|
@ -0,0 +1,85 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/errno.h"
|
||||
#include "libc/mem/mem.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/str/thompike.h"
|
||||
#include "net/http/http.h"
|
||||
|
||||
/**
|
||||
* Encodes HTTP header value.
|
||||
*
|
||||
* This operation involves the following:
|
||||
*
|
||||
* 1. Trim whitespace.
|
||||
* 2. Turn UTF-8 into ISO-8859-1.
|
||||
* 3. Make sure no C0 or C1 control codes are present (except tab).
|
||||
*
|
||||
* If the input value isn't thompson-pike encoded then this
|
||||
* implementation will fall back to latin1 in most cases.
|
||||
*
|
||||
* @param data is input value
|
||||
* @param size if -1 implies strlen
|
||||
* @param out_size if non-NULL receives output length on success
|
||||
* @return allocated NUL-terminated string, or NULL w/ errno
|
||||
*/
|
||||
char *EncodeHttpHeaderValue(const char *data, size_t size, size_t *out_size) {
|
||||
bool t;
|
||||
wint_t x;
|
||||
char *r, *q;
|
||||
const char *p, *e;
|
||||
if (size == -1) size = strlen(data);
|
||||
if ((r = malloc(size + 1))) {
|
||||
t = 0;
|
||||
q = r;
|
||||
p = data;
|
||||
e = p + size;
|
||||
while (p < e) {
|
||||
x = *p++ & 0xff;
|
||||
if (x >= 0300) {
|
||||
if (p < e && ThomPikeCont(*p)) {
|
||||
if (ThomPikeLen(x) == 2) {
|
||||
x = ThomPikeMerge(ThomPikeByte(x), *p++);
|
||||
} else {
|
||||
x = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!t) {
|
||||
if (x == ' ' || x == '\t') {
|
||||
continue;
|
||||
} else {
|
||||
t = true;
|
||||
}
|
||||
}
|
||||
if ((0x20 <= x && x <= 0x7E) || (0xA0 <= x && x <= 0xFF) || x == '\t') {
|
||||
*q++ = x;
|
||||
} else {
|
||||
free(r);
|
||||
errno = EILSEQ;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
while (q > r && (q[-1] == ' ' || q[-1] == '\t')) --q;
|
||||
if (out_size) *out_size = q - r;
|
||||
*q++ = '\0';
|
||||
if ((q = realloc(r, q - r))) r = q;
|
||||
}
|
||||
return r;
|
||||
}
|
|
@ -121,6 +121,10 @@ unsigned ParseHttpVersion(const char *, size_t);
|
|||
int64_t ParseHttpDateTime(const char *, size_t);
|
||||
const char *GetHttpReason(int);
|
||||
const char *GetHttpHeaderName(int);
|
||||
char *DecodeLatin1(const char *, size_t, size_t *);
|
||||
bool IsValidHttpToken(const char *, size_t);
|
||||
char *EncodeHttpHeaderValue(const char *, size_t, size_t *);
|
||||
char *VisualizeControlCodes(const char *, size_t, size_t *);
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
|
|
54
net/http/isvalidhttptoken.c
Normal file
54
net/http/isvalidhttptoken.c
Normal file
|
@ -0,0 +1,54 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/str/str.h"
|
||||
#include "net/http/http.h"
|
||||
|
||||
// http/1.1 token dispatch
|
||||
// 0 is CTLs, SP, ()<>@,;:\"/[]?={}
|
||||
// 1 is legal ascii
|
||||
static const char kHttpToken[256] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x00
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10
|
||||
0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, // 0x20
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, // 0x30
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, // 0x50
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, // 0x70
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xa0
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xb0
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xc0
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xd0
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xe0
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xf0
|
||||
};
|
||||
|
||||
bool IsValidHttpToken(const char *s, size_t n) {
|
||||
size_t i;
|
||||
if (!n) return false;
|
||||
if (n == -1) n = strlen(s);
|
||||
for (i = 0; i < n; ++i) {
|
||||
if (!kHttpToken[s[i] & 0xff]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
|
@ -47,9 +47,19 @@ void DestroyHttpRequest(struct HttpRequest *r) {
|
|||
|
||||
/**
|
||||
* Parses HTTP request.
|
||||
*
|
||||
* This parser is responsible for determining the length of a message
|
||||
* and slicing the strings inside it. Performance is attained using
|
||||
* perfect hash tables. No memory allocation is performed for normal
|
||||
* messages. Line folding is forbidden. State persists across calls so
|
||||
* that fragmented messages can be handled efficiently. A limitation on
|
||||
* message size is imposed to make the header data structures smaller.
|
||||
* All other things are permissive to the greatest extent possible.
|
||||
* Further functions are provided for the interpretation, validation,
|
||||
* and sanitization of various fields.
|
||||
*/
|
||||
int ParseHttpRequest(struct HttpRequest *r, const char *p, size_t n) {
|
||||
int c, h;
|
||||
int c, h, i;
|
||||
struct HttpRequestHeader *x;
|
||||
for (n = MIN(n, LIMIT); r->i < n; ++r->i) {
|
||||
c = p[r->i] & 0xff;
|
||||
|
@ -122,14 +132,16 @@ int ParseHttpRequest(struct HttpRequest *r, const char *p, size_t n) {
|
|||
/* fallthrough */
|
||||
case HVAL:
|
||||
if (c == '\r' || c == '\n') {
|
||||
i = r->i;
|
||||
while (i > r->a && (p[i - 1] == ' ' || p[i - 1] == '\t')) --i;
|
||||
if ((h = GetHttpHeader(p + r->k.a, r->k.b - r->k.a)) != -1) {
|
||||
r->headers[h].a = r->a;
|
||||
r->headers[h].b = r->i;
|
||||
r->headers[h].b = i;
|
||||
} else if ((x = realloc(r->xheaders.p, (r->xheaders.n + 1) *
|
||||
sizeof(*r->xheaders.p)))) {
|
||||
x[r->xheaders.n].k = r->k;
|
||||
x[r->xheaders.n].v.a = r->a;
|
||||
x[r->xheaders.n].v.b = r->i;
|
||||
x[r->xheaders.n].v.b = i;
|
||||
r->xheaders.p = x;
|
||||
++r->xheaders.n;
|
||||
}
|
||||
|
|
93
net/http/visualizecontrolcodes.c
Normal file
93
net/http/visualizecontrolcodes.c
Normal file
|
@ -0,0 +1,93 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/mem/mem.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/str/thompike.h"
|
||||
#include "libc/str/tpenc.h"
|
||||
#include "net/http/http.h"
|
||||
|
||||
/**
|
||||
* Filters out control codes from string.
|
||||
*
|
||||
* This is useful for logging data like HTTP messages, where we don't
|
||||
* want full blown C string literal escaping, but we don't want things
|
||||
* like raw ANSI control codes from untrusted devices in our terminals.
|
||||
*
|
||||
* @param data is input value
|
||||
* @param size if -1 implies strlen
|
||||
* @param out_size if non-NULL receives output length on success
|
||||
* @return allocated NUL-terminated buffer, or NULL w/ errno
|
||||
*/
|
||||
char *VisualizeControlCodes(const char *data, size_t size, size_t *out_size) {
|
||||
uint64_t w;
|
||||
char *r, *q;
|
||||
unsigned i, n;
|
||||
wint_t x, a, b;
|
||||
const char *p, *e;
|
||||
if (size == -1) size = strlen(data);
|
||||
if ((r = malloc(size * 6 + 1))) {
|
||||
q = r;
|
||||
p = data;
|
||||
e = p + size;
|
||||
while (p < e) {
|
||||
x = *p++ & 0xff;
|
||||
if (x >= 0300) {
|
||||
a = ThomPikeByte(x);
|
||||
n = ThomPikeLen(x) - 1;
|
||||
if (p + n <= e) {
|
||||
for (i = 0;;) {
|
||||
b = p[i] & 0xff;
|
||||
if (!ThomPikeCont(b)) break;
|
||||
a = ThomPikeMerge(a, b);
|
||||
if (++i == n) {
|
||||
x = a;
|
||||
p += i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (0x80 <= x && x < 0xA0) {
|
||||
q[0] = '\\';
|
||||
q[1] = 'u';
|
||||
q[2] = '0';
|
||||
q[3] = '0';
|
||||
q[4] = "0123456789abcdef"[(x & 0xF0) >> 4];
|
||||
q[5] = "0123456789abcdef"[(x & 0x0F) >> 0];
|
||||
q += 6;
|
||||
} else {
|
||||
if (0x00 <= x && x < 0x20) {
|
||||
if (x != '\t' && x != '\r' && x != '\n') {
|
||||
x += 0x2400; /* Control Pictures */
|
||||
}
|
||||
} else if (x == 0x7F) {
|
||||
x = 0x2421;
|
||||
}
|
||||
w = tpenc(x);
|
||||
do {
|
||||
*q++ = w;
|
||||
} while ((w >>= 8));
|
||||
}
|
||||
}
|
||||
if (out_size) *out_size = q - r;
|
||||
*q++ = '\0';
|
||||
if ((q = realloc(r, q - r))) r = q;
|
||||
}
|
||||
return r;
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue