2020-06-15 14:18:57 +00:00
|
|
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
|
|
|
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
|
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
|
|
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
|
|
|
│ │
|
2020-12-28 01:18:44 +00:00
|
|
|
│ Permission to use, copy, modify, and/or distribute this software for │
|
|
|
|
│ any purpose with or without fee is hereby granted, provided that the │
|
|
|
|
│ above copyright notice and this permission notice appear in all copies. │
|
2020-06-15 14:18:57 +00:00
|
|
|
│ │
|
2020-12-28 01:18:44 +00:00
|
|
|
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
|
|
|
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
|
|
|
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
|
|
|
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
|
|
|
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
|
|
|
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
|
|
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
|
|
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
2020-06-15 14:18:57 +00:00
|
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
|
|
#include "libc/alg/alg.h"
|
2020-11-25 16:19:00 +00:00
|
|
|
#include "libc/alg/arraylist.internal.h"
|
2020-06-15 14:18:57 +00:00
|
|
|
#include "libc/limits.h"
|
2021-03-25 09:21:13 +00:00
|
|
|
#include "libc/macros.internal.h"
|
2021-03-27 14:29:55 +00:00
|
|
|
#include "libc/mem/mem.h"
|
2020-06-15 14:18:57 +00:00
|
|
|
#include "libc/stdio/stdio.h"
|
|
|
|
#include "libc/str/str.h"
|
|
|
|
#include "libc/sysv/errfuns.h"
|
|
|
|
#include "libc/x/x.h"
|
|
|
|
#include "net/http/http.h"
|
|
|
|
|
2021-03-28 01:17:54 +00:00
|
|
|
#define LIMIT (SHRT_MAX - 2)
|
2021-03-25 09:21:13 +00:00
|
|
|
|
|
|
|
enum { START, METHOD, URI, VERSION, HKEY, HSEP, HVAL, CR1, LF1, LF2 };
|
2020-06-15 14:18:57 +00:00
|
|
|
|
|
|
|
/**
|
2021-03-25 09:21:13 +00:00
|
|
|
* Initializes HTTP request parser.
|
2020-06-15 14:18:57 +00:00
|
|
|
*/
|
2021-03-25 09:21:13 +00:00
|
|
|
void InitHttpRequest(struct HttpRequest *r) {
|
|
|
|
memset(r, 0, sizeof(*r));
|
|
|
|
}
|
|
|
|
|
2021-03-27 14:29:55 +00:00
|
|
|
/**
|
|
|
|
* Destroys HTTP request parser.
|
|
|
|
*/
|
|
|
|
void DestroyHttpRequest(struct HttpRequest *r) {
|
2021-03-29 01:00:29 +00:00
|
|
|
if (r->xheaders.p) {
|
|
|
|
free(r->xheaders.p);
|
|
|
|
r->xheaders.p = NULL;
|
|
|
|
r->xheaders.n = 0;
|
|
|
|
}
|
2021-03-27 14:29:55 +00:00
|
|
|
}
|
|
|
|
|
2021-03-25 09:21:13 +00:00
|
|
|
/**
|
|
|
|
* Parses HTTP request.
|
2021-03-28 07:10:17 +00:00
|
|
|
*
|
|
|
|
* This parser is responsible for determining the length of a message
|
|
|
|
* and slicing the strings inside it. Performance is attained using
|
|
|
|
* perfect hash tables. No memory allocation is performed for normal
|
|
|
|
* messages. Line folding is forbidden. State persists across calls so
|
|
|
|
* that fragmented messages can be handled efficiently. A limitation on
|
|
|
|
* message size is imposed to make the header data structures smaller.
|
|
|
|
* All other things are permissive to the greatest extent possible.
|
|
|
|
* Further functions are provided for the interpretation, validation,
|
|
|
|
* and sanitization of various fields.
|
2021-03-29 01:00:29 +00:00
|
|
|
*
|
|
|
|
* @note we assume p points to a buffer that has >=SHRT_MAX bytes
|
2021-03-25 09:21:13 +00:00
|
|
|
*/
|
|
|
|
int ParseHttpRequest(struct HttpRequest *r, const char *p, size_t n) {
|
2021-03-28 07:10:17 +00:00
|
|
|
int c, h, i;
|
2021-03-27 14:29:55 +00:00
|
|
|
struct HttpRequestHeader *x;
|
2021-03-25 09:21:13 +00:00
|
|
|
for (n = MIN(n, LIMIT); r->i < n; ++r->i) {
|
|
|
|
c = p[r->i] & 0xff;
|
|
|
|
switch (r->t) {
|
|
|
|
case START:
|
|
|
|
if (c == '\r' || c == '\n') {
|
|
|
|
++r->a; /* RFC7230 § 3.5 */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
r->t = METHOD;
|
|
|
|
/* fallthrough */
|
2020-06-15 14:18:57 +00:00
|
|
|
case METHOD:
|
2021-03-29 01:00:29 +00:00
|
|
|
for (;;) {
|
|
|
|
if (c == ' ') {
|
|
|
|
if ((r->method = GetHttpMethod(p + r->a, r->i - r->a)) != -1) {
|
|
|
|
r->uri.a = r->i + 1;
|
|
|
|
r->t = URI;
|
|
|
|
} else {
|
|
|
|
return ebadmsg();
|
|
|
|
}
|
|
|
|
break;
|
2021-04-02 01:51:12 +00:00
|
|
|
} else if (!('A' <= c && c <= 'Z')) {
|
|
|
|
return ebadmsg();
|
2021-03-25 09:21:13 +00:00
|
|
|
}
|
2021-03-29 01:00:29 +00:00
|
|
|
if (++r->i == n) break;
|
|
|
|
c = p[r->i] & 0xff;
|
2020-06-15 14:18:57 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case URI:
|
2021-03-29 01:00:29 +00:00
|
|
|
for (;;) {
|
|
|
|
if (c == ' ' || c == '\r' || c == '\n') {
|
|
|
|
if (r->i == r->uri.a) return ebadmsg();
|
|
|
|
r->uri.b = r->i;
|
|
|
|
if (c == ' ') {
|
|
|
|
r->version.a = r->i + 1;
|
|
|
|
r->t = VERSION;
|
|
|
|
} else if (c == '\r') {
|
|
|
|
r->t = CR1;
|
|
|
|
} else {
|
|
|
|
r->t = LF1;
|
|
|
|
}
|
|
|
|
break;
|
2021-03-25 09:21:13 +00:00
|
|
|
}
|
2021-03-29 01:00:29 +00:00
|
|
|
if (++r->i == n) break;
|
|
|
|
c = p[r->i] & 0xff;
|
2020-06-15 14:18:57 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case VERSION:
|
2020-09-07 04:39:00 +00:00
|
|
|
if (c == '\r' || c == '\n') {
|
2021-03-25 09:21:13 +00:00
|
|
|
r->version.b = r->i;
|
|
|
|
r->t = c == '\r' ? CR1 : LF1;
|
2020-06-15 14:18:57 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case CR1:
|
2020-09-07 04:39:00 +00:00
|
|
|
if (c != '\n') return ebadmsg();
|
2021-03-25 09:21:13 +00:00
|
|
|
r->t = LF1;
|
2020-06-15 14:18:57 +00:00
|
|
|
break;
|
|
|
|
case LF1:
|
2020-09-07 04:39:00 +00:00
|
|
|
if (c == '\r') {
|
2021-03-25 09:21:13 +00:00
|
|
|
r->t = LF2;
|
2020-06-15 14:18:57 +00:00
|
|
|
break;
|
2020-09-07 04:39:00 +00:00
|
|
|
} else if (c == '\n') {
|
2021-03-25 09:21:13 +00:00
|
|
|
return ++r->i;
|
|
|
|
} else if (c == ':') {
|
|
|
|
return ebadmsg();
|
|
|
|
} else if (c == ' ' || c == '\t') {
|
|
|
|
return ebadmsg(); /* RFC7230 § 3.2.4 */
|
2020-06-15 14:18:57 +00:00
|
|
|
}
|
2021-03-27 14:29:55 +00:00
|
|
|
r->k.a = r->i;
|
2021-03-25 09:21:13 +00:00
|
|
|
r->t = HKEY;
|
|
|
|
break;
|
2020-06-15 14:18:57 +00:00
|
|
|
case HKEY:
|
2021-03-29 01:00:29 +00:00
|
|
|
for (;;) {
|
|
|
|
if (c == ':') {
|
|
|
|
r->k.b = r->i;
|
|
|
|
r->t = HSEP;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (++r->i == n) break;
|
|
|
|
c = p[r->i] & 0xff;
|
2020-09-07 04:39:00 +00:00
|
|
|
}
|
2020-06-15 14:18:57 +00:00
|
|
|
break;
|
|
|
|
case HSEP:
|
2020-09-07 04:39:00 +00:00
|
|
|
if (c == ' ' || c == '\t') break;
|
2021-03-25 09:21:13 +00:00
|
|
|
r->a = r->i;
|
|
|
|
r->t = HVAL;
|
|
|
|
/* fallthrough */
|
2020-06-15 14:18:57 +00:00
|
|
|
case HVAL:
|
2021-03-29 01:00:29 +00:00
|
|
|
for (;;) {
|
|
|
|
if (c == '\r' || c == '\n') {
|
|
|
|
i = r->i;
|
|
|
|
while (i > r->a && (p[i - 1] == ' ' || p[i - 1] == '\t')) --i;
|
|
|
|
if ((h = GetHttpHeader(p + r->k.a, r->k.b - r->k.a)) != -1) {
|
|
|
|
r->headers[h].a = r->a;
|
|
|
|
r->headers[h].b = i;
|
|
|
|
} else if ((x = realloc(
|
|
|
|
r->xheaders.p,
|
|
|
|
(r->xheaders.n + 1) * sizeof(*r->xheaders.p)))) {
|
|
|
|
x[r->xheaders.n].k = r->k;
|
|
|
|
x[r->xheaders.n].v.a = r->a;
|
|
|
|
x[r->xheaders.n].v.b = i;
|
|
|
|
r->xheaders.p = x;
|
|
|
|
++r->xheaders.n;
|
|
|
|
}
|
|
|
|
r->t = c == '\r' ? CR1 : LF1;
|
|
|
|
break;
|
2020-09-07 04:39:00 +00:00
|
|
|
}
|
2021-03-29 01:00:29 +00:00
|
|
|
if (++r->i == n) break;
|
|
|
|
c = p[r->i] & 0xff;
|
2020-06-15 14:18:57 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case LF2:
|
2020-10-06 06:11:49 +00:00
|
|
|
if (c == '\n') {
|
2021-03-25 09:21:13 +00:00
|
|
|
return ++r->i;
|
2020-10-06 06:11:49 +00:00
|
|
|
}
|
2020-06-15 14:18:57 +00:00
|
|
|
return ebadmsg();
|
|
|
|
default:
|
|
|
|
unreachable;
|
|
|
|
}
|
|
|
|
}
|
2021-03-25 09:21:13 +00:00
|
|
|
if (r->i < LIMIT) {
|
|
|
|
return 0;
|
|
|
|
} else {
|
|
|
|
return ebadmsg();
|
|
|
|
}
|
2020-06-15 14:18:57 +00:00
|
|
|
}
|