mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 11:37:35 +00:00
322 lines
11 KiB
C
322 lines
11 KiB
C
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
|
│ │
|
|
│ Permission to use, copy, modify, and/or distribute this software for │
|
|
│ any purpose with or without fee is hereby granted, provided that the │
|
|
│ above copyright notice and this permission notice appear in all copies. │
|
|
│ │
|
|
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
|
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
|
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
|
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
|
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
|
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
#include "libc/assert.h"
|
|
#include "libc/ctype.h"
|
|
#include "libc/limits.h"
|
|
#include "libc/mem/alg.h"
|
|
#include "libc/mem/arraylist.internal.h"
|
|
#include "libc/mem/mem.h"
|
|
#include "libc/serialize.h"
|
|
#include "libc/stdio/stdio.h"
|
|
#include "libc/str/str.h"
|
|
#include "libc/str/tab.internal.h"
|
|
#include "libc/sysv/errfuns.h"
|
|
#include "libc/x/x.h"
|
|
#include "net/http/http.h"
|
|
|
|
/**
|
|
* Initializes HTTP message parser.
|
|
*/
|
|
void InitHttpMessage(struct HttpMessage *r, int type) {
|
|
unassert(type == kHttpRequest || type == kHttpResponse);
|
|
bzero(r, sizeof(*r));
|
|
r->type = type;
|
|
}
|
|
|
|
/**
|
|
* Destroys HTTP message parser.
|
|
*/
|
|
void DestroyHttpMessage(struct HttpMessage *r) {
|
|
if (r->xheaders.p) {
|
|
free(r->xheaders.p);
|
|
r->xheaders.p = NULL;
|
|
r->xheaders.n = 0;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Resets http message parser state, so it can be re-used.
|
|
*
|
|
* This function amortizes the cost of malloc() in threads that process
|
|
* multiple messages in a loop.
|
|
*
|
|
* @param r is assumed to have been passed to `InitHttpMessage` earlier
|
|
*/
|
|
void ResetHttpMessage(struct HttpMessage *r, int type) {
|
|
unassert(type == kHttpRequest || type == kHttpResponse);
|
|
unsigned c = r->xheaders.c;
|
|
struct HttpHeader *p = r->xheaders.p;
|
|
bzero(r, sizeof(*r));
|
|
r->xheaders.c = c;
|
|
r->xheaders.p = p;
|
|
r->type = type;
|
|
}
|
|
|
|
/**
|
|
* Parses HTTP request or response.
|
|
*
|
|
* This parser is responsible for determining the length of a message
|
|
* and slicing the strings inside it. Performance is attained using
|
|
* perfect hash tables. No memory allocation is performed for normal
|
|
* messagesy. Line folding is forbidden. State persists across calls so
|
|
* that fragmented messages can be handled efficiently. A limitation on
|
|
* message size is imposed to make the header data structures smaller.
|
|
*
|
|
* This parser assumes ISO-8859-1 and guarantees no C0 or C1 control
|
|
* codes are present in message fields, with the exception of tab.
|
|
* Please note that fields like kHttpStateUri may use UTF-8 percent encoding.
|
|
* This parser doesn't care if you choose ASA X3.4-1963 or MULTICS newlines.
|
|
*
|
|
* kHttpRepeatable defines which standard header fields are O(1) and
|
|
* which ones may have comma entries spilled over into xheaders. For
|
|
* most headers it's sufficient to simply check the static slice. If
|
|
* r->headers[kHttpFoo].a is zero then the header is totally absent.
|
|
*
|
|
* This parser has linear complexity. Each character only needs to be
|
|
* considered a single time. That's the case even if messages are
|
|
* fragmented. If a message is valid but incomplete, this function will
|
|
* return zero so that it can be resumed as soon as more data arrives.
|
|
*
|
|
* This parser takes about 400 nanoseconds to parse a 403 byte Chrome
|
|
* HTTP request under MODE=rel on a Core i9 which is about three cycles
|
|
* per byte or a gigabyte per second of throughput per core.
|
|
*
|
|
* @param p needs to have at least `c` bytes available
|
|
* @param n is how many bytes have been received off the network so far
|
|
* @param c is the capacity of `p` buffer; beyond `SHRT_MAX` is ignored
|
|
* @return bytes on success, -1 on failure, 0 if more data must be read
|
|
* @note messages can't exceed 2**15 bytes
|
|
* @see HTTP/1.1 RFC2616 RFC2068
|
|
* @see HTTP/1.0 RFC1945
|
|
*/
|
|
int ParseHttpMessage(struct HttpMessage *r, const char *p, size_t n, size_t c) {
|
|
int h, i, ch;
|
|
if (n > c)
|
|
return einval();
|
|
n = n > SHRT_MAX ? SHRT_MAX : n;
|
|
c = c > SHRT_MAX ? SHRT_MAX : c;
|
|
for (; r->i < n; ++r->i) {
|
|
ch = p[r->i] & 255;
|
|
switch (r->t) {
|
|
case kHttpStateStart:
|
|
if (ch == '\r' || ch == '\n')
|
|
break; // RFC7230 § 3.5
|
|
if (!kHttpToken[ch])
|
|
return ebadmsg();
|
|
if (r->type == kHttpRequest) {
|
|
r->t = kHttpStateMethod;
|
|
r->method = kToUpper[ch];
|
|
r->a = 8;
|
|
} else {
|
|
r->t = kHttpStateVersion;
|
|
r->a = r->i;
|
|
}
|
|
break;
|
|
case kHttpStateMethod:
|
|
for (;;) {
|
|
if (ch == ' ') {
|
|
r->a = r->i + 1;
|
|
r->t = kHttpStateUri;
|
|
break;
|
|
} else if (r->a == 64 || !kHttpToken[ch]) {
|
|
return ebadmsg();
|
|
}
|
|
ch = kToUpper[ch];
|
|
r->method |= (uint64_t)ch << r->a;
|
|
r->a += 8;
|
|
if (++r->i == n)
|
|
break;
|
|
ch = p[r->i] & 255;
|
|
}
|
|
break;
|
|
case kHttpStateUri:
|
|
for (;;) {
|
|
if (ch == ' ' || ch == '\r' || ch == '\n') {
|
|
if (r->i == r->a)
|
|
return ebadmsg();
|
|
r->uri.a = r->a;
|
|
r->uri.b = r->i;
|
|
if (ch == ' ') {
|
|
r->a = r->i + 1;
|
|
r->t = kHttpStateVersion;
|
|
} else {
|
|
r->version = 9;
|
|
r->t = ch == '\r' ? kHttpStateCr : kHttpStateLf1;
|
|
}
|
|
break;
|
|
} else if (ch < 0x20 || (0x7F <= ch && ch < 0xA0)) {
|
|
return ebadmsg();
|
|
}
|
|
if (++r->i == n)
|
|
break;
|
|
ch = p[r->i] & 255;
|
|
}
|
|
break;
|
|
case kHttpStateVersion:
|
|
if (ch == ' ' || ch == '\r' || ch == '\n') {
|
|
if (r->i - r->a == 8 &&
|
|
(READ64BE(p + r->a) & 0xFFFFFFFFFF00FF00) == 0x485454502F002E00 &&
|
|
isdigit(p[r->a + 5]) && isdigit(p[r->a + 7])) {
|
|
r->version = (p[r->a + 5] - '0') * 10 + (p[r->a + 7] - '0');
|
|
if (r->type == kHttpRequest) {
|
|
r->t = ch == '\r' ? kHttpStateCr : kHttpStateLf1;
|
|
} else {
|
|
r->t = kHttpStateStatus;
|
|
}
|
|
} else {
|
|
return ebadmsg();
|
|
}
|
|
}
|
|
break;
|
|
case kHttpStateStatus:
|
|
for (;;) {
|
|
if (ch == ' ' || ch == '\r' || ch == '\n') {
|
|
if (r->status < 100)
|
|
return ebadmsg();
|
|
if (ch == ' ') {
|
|
r->a = r->i + 1;
|
|
r->t = kHttpStateMessage;
|
|
} else {
|
|
r->t = ch == '\r' ? kHttpStateCr : kHttpStateLf1;
|
|
}
|
|
break;
|
|
} else if ('0' <= ch && ch <= '9') {
|
|
r->status *= 10;
|
|
r->status += ch - '0';
|
|
if (r->status > 999)
|
|
return ebadmsg();
|
|
} else {
|
|
return ebadmsg();
|
|
}
|
|
if (++r->i == n)
|
|
break;
|
|
ch = p[r->i] & 255;
|
|
}
|
|
break;
|
|
case kHttpStateMessage:
|
|
for (;;) {
|
|
if (ch == '\r' || ch == '\n') {
|
|
r->message.a = r->a;
|
|
r->message.b = r->i;
|
|
r->t = ch == '\r' ? kHttpStateCr : kHttpStateLf1;
|
|
break;
|
|
} else if (ch < 0x20 || (0x7F <= ch && ch < 0xA0)) {
|
|
return ebadmsg();
|
|
}
|
|
if (++r->i == n)
|
|
break;
|
|
ch = p[r->i] & 255;
|
|
}
|
|
break;
|
|
case kHttpStateCr:
|
|
if (ch != '\n')
|
|
return ebadmsg();
|
|
r->t = kHttpStateLf1;
|
|
break;
|
|
case kHttpStateLf1:
|
|
if (ch == '\r') {
|
|
r->t = kHttpStateLf2;
|
|
break;
|
|
} else if (ch == '\n') {
|
|
return ++r->i;
|
|
} else if (!kHttpToken[ch]) {
|
|
// 1. Forbid empty header name (RFC2616 §2.2)
|
|
// 2. Forbid line folding (RFC7230 §3.2.4)
|
|
return ebadmsg();
|
|
}
|
|
r->k.a = r->i;
|
|
r->t = kHttpStateName;
|
|
break;
|
|
case kHttpStateName:
|
|
for (;;) {
|
|
if (ch == ':') {
|
|
r->k.b = r->i;
|
|
r->t = kHttpStateColon;
|
|
break;
|
|
} else if (!kHttpToken[ch]) {
|
|
return ebadmsg();
|
|
}
|
|
if (++r->i == n)
|
|
break;
|
|
ch = p[r->i] & 255;
|
|
}
|
|
break;
|
|
case kHttpStateColon:
|
|
if (ch == ' ' || ch == '\t')
|
|
break;
|
|
r->a = r->i;
|
|
r->t = kHttpStateValue;
|
|
// fallthrough
|
|
case kHttpStateValue:
|
|
for (;;) {
|
|
if (ch == '\r' || ch == '\n') {
|
|
i = r->i;
|
|
while (i > r->a && (p[i - 1] == ' ' || p[i - 1] == '\t'))
|
|
--i;
|
|
if ((h = GetHttpHeader(p + r->k.a, r->k.b - r->k.a)) != -1 &&
|
|
(!r->headers[h].a || !kHttpRepeatable[h])) {
|
|
r->headers[h].a = r->a;
|
|
r->headers[h].b = i;
|
|
} else {
|
|
if (r->xheaders.n == r->xheaders.c) {
|
|
unsigned c2;
|
|
struct HttpHeader *p1, *p2;
|
|
p1 = r->xheaders.p;
|
|
c2 = r->xheaders.c;
|
|
if (c2 == 0) {
|
|
c2 = 1;
|
|
} else {
|
|
c2 = c2 * 2;
|
|
}
|
|
if ((p2 = realloc(p1, c2 * sizeof(*p1)))) {
|
|
r->xheaders.p = p2;
|
|
r->xheaders.c = c2;
|
|
}
|
|
}
|
|
if (r->xheaders.n < r->xheaders.c) {
|
|
r->xheaders.p[r->xheaders.n].k = r->k;
|
|
r->xheaders.p[r->xheaders.n].v.a = r->a;
|
|
r->xheaders.p[r->xheaders.n].v.b = i;
|
|
r->xheaders.p = r->xheaders.p;
|
|
++r->xheaders.n;
|
|
}
|
|
}
|
|
r->t = ch == '\r' ? kHttpStateCr : kHttpStateLf1;
|
|
break;
|
|
} else if ((ch < 0x20 && ch != '\t') || (0x7F <= ch && ch < 0xA0)) {
|
|
return ebadmsg();
|
|
}
|
|
if (++r->i == n)
|
|
break;
|
|
ch = p[r->i] & 255;
|
|
}
|
|
break;
|
|
case kHttpStateLf2:
|
|
if (ch == '\n') {
|
|
return ++r->i;
|
|
}
|
|
return ebadmsg();
|
|
default:
|
|
__builtin_unreachable();
|
|
}
|
|
}
|
|
if (r->i < c)
|
|
return 0;
|
|
return ebadmsg();
|
|
}
|