mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-02-07 06:53:33 +00:00
The most exciting improvement is dynamic pages will soon be able to use the executable itself as an object store. it required a heroic technique for overcoming ETXTBSY restrictions which lets us open the executable in read/write mode, which means (1) wa can restore the APE header, and (2) we can potentially containerize redbean extension code so that modules you download for your redbean online will only impact your redbean. Here's a list of breaking changes to redbean: - Remove /tool/net/ prefix from magic ZIP paths - GetHeader() now returns NIL if header is absent Here's a list of fixes and enhancements to redbean: - Support 64-bit ZIP archives - Record User-Agent header in logs - Add twelve error handlers to accept() - Display octal st_mode on listing page - Show ZIP file comments on listing page - Restore APE MZ header on redbean startup - Track request count on redbean index page - Report server uptime on redbean index page - Don't bind server socket using SO_REUSEPORT - Fix #151 where Lua LoadAsset() could free twice - Report rusage accounting when workers exit w/ -vv - Use ZIP iattr field as text/plain vs. binary hint - Add ParseUrl() API for parsing things like a.href - Add ParseParams() API for parsing HTTP POST bodies - Add IsAcceptablePath() API for checking dots, etc. - Add IsValidHttpToken() API for validating sane ASCII - Add IsAcceptableHostPort() for validating HOST[:PORT] - Send 400 response to HTTP/1.1 requests without a Host - Send 403 response if ZIP or file isn't other readable - Add virtual hosting that tries prepending Host to path - Route requests based on Host in Request-URI if present - Host routing will attempt to remove or add the www. prefix - Sign-extend UNIX timestamps and don't adjust FileTime zone Here's some of the improvements made to Cosmopolitan Libc: - Fix ape.S indentation - Improve consts.sh magnums - Write pretty good URL parser - Improve rusage accounting apis - Bring mremap() closer to working - Added ZIP APIs which will change - Check for overflow in reallocarray() - Remove overly fancy linkage in strerror() - Fix GDB attach on crash w/ OpenBSD msyscall() - Make sigqueue() portable to most UNIX distros - Make integer serialization macros more elegant - Bring back 34x tprecode8to16() performance boost - Make malloc() more resilient to absurdly large sizes
346 lines
11 KiB
C
346 lines
11 KiB
C
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
|
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
|
│ │
|
|
│ Permission to use, copy, modify, and/or distribute this software for │
|
|
│ any purpose with or without fee is hereby granted, provided that the │
|
|
│ above copyright notice and this permission notice appear in all copies. │
|
|
│ │
|
|
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
|
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
|
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
|
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
|
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
|
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
#include "libc/bits/likely.h"
|
|
#include "libc/limits.h"
|
|
#include "libc/str/str.h"
|
|
#include "libc/x/x.h"
|
|
#include "net/http/url.h"
|
|
|
|
struct UrlParser {
|
|
int i;
|
|
int c;
|
|
const char *data;
|
|
int size;
|
|
bool isform;
|
|
bool islatin1;
|
|
char *p;
|
|
char *q;
|
|
};
|
|
|
|
static const signed char kHexToInt[256] = {
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x00
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x10
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x20
|
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 0x30
|
|
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x40
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x50
|
|
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x60
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x70
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x80
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x90
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0xa0
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0xb0
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0xc0
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0xd0
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0xe0
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0xf0
|
|
};
|
|
|
|
static void EmitLatin1(struct UrlParser *u, int c) {
|
|
u->p[0] = 0300 | c >> 6;
|
|
u->p[1] = 0200 | c & 077;
|
|
u->p += 2;
|
|
}
|
|
|
|
static void EmitKey(struct UrlParser *u, struct UrlParams *h) {
|
|
h->p = xrealloc(h->p, ++h->n * sizeof(*h->p));
|
|
h->p[h->n - 1].key.p = u->q;
|
|
h->p[h->n - 1].key.n = u->p - u->q;
|
|
u->q = u->p;
|
|
}
|
|
|
|
static void EmitVal(struct UrlParser *u, struct UrlParams *h, bool t) {
|
|
if (!t) {
|
|
if (u->p > u->q) {
|
|
EmitKey(u, h);
|
|
h->p[h->n - 1].val.p = NULL;
|
|
h->p[h->n - 1].val.n = SIZE_MAX;
|
|
}
|
|
} else {
|
|
h->p[h->n - 1].val.p = u->q;
|
|
h->p[h->n - 1].val.n = u->p - u->q;
|
|
u->q = u->p;
|
|
}
|
|
}
|
|
|
|
static void ParseEscape(struct UrlParser *u) {
|
|
int a, b;
|
|
if (u->i + 2 <= u->size &&
|
|
((a = kHexToInt[u->data[u->i + 0] & 0xff]) != -1 &&
|
|
(b = kHexToInt[u->data[u->i + 1] & 0xff]) != -1)) {
|
|
u->c = a << 4 | b;
|
|
u->i += 2;
|
|
}
|
|
*u->p++ = u->c;
|
|
}
|
|
|
|
static bool ParseScheme(struct UrlParser *u, struct Url *h) {
|
|
while (u->i < u->size) {
|
|
u->c = u->data[u->i++] & 0xff;
|
|
if (u->c == '/') {
|
|
if (u->i == 1 && u->i < u->size && u->data[u->i] == '/') {
|
|
++u->i;
|
|
return true;
|
|
} else {
|
|
*u->p++ = u->c;
|
|
return false;
|
|
}
|
|
} else if (u->c == ':') {
|
|
h->scheme.p = u->q;
|
|
h->scheme.n = u->p - u->q;
|
|
u->q = u->p;
|
|
if (u->i + 2 <= u->size &&
|
|
(u->data[u->i + 1] == '/' && u->data[u->i + 1] == '/')) {
|
|
u->i += 2;
|
|
return true;
|
|
} else {
|
|
return false;
|
|
}
|
|
} else if (u->c == '#' || u->c == '?') {
|
|
h->path.p = u->q;
|
|
h->path.n = u->p - u->q;
|
|
u->q = u->p;
|
|
return false;
|
|
} else if (u->c == '%') {
|
|
ParseEscape(u);
|
|
} else if (u->c >= 0200 && u->islatin1) {
|
|
EmitLatin1(u, u->c);
|
|
} else {
|
|
*u->p++ = u->c;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static void ParseAuthority(struct UrlParser *u, struct Url *h) {
|
|
bool b = false;
|
|
const char *c = NULL;
|
|
while (u->i < u->size) {
|
|
u->c = u->data[u->i++] & 0xff;
|
|
if (u->c == '/' || u->c == '#' || u->c == '?') {
|
|
break;
|
|
} else if (u->c == '[') {
|
|
b = true;
|
|
} else if (u->c == ']') {
|
|
b = false;
|
|
} else if (u->c == ':' && !b) {
|
|
c = u->p;
|
|
} else if (u->c == '@') {
|
|
if (c) {
|
|
h->user.p = u->q;
|
|
h->user.n = c - u->q;
|
|
h->pass.p = c;
|
|
h->pass.n = u->p - c;
|
|
c = NULL;
|
|
} else {
|
|
h->user.p = u->q;
|
|
h->user.n = u->p - u->q;
|
|
}
|
|
u->q = u->p;
|
|
} else if (u->c == '%') {
|
|
ParseEscape(u);
|
|
} else if (u->c >= 0200 && u->islatin1) {
|
|
EmitLatin1(u, u->c);
|
|
} else {
|
|
*u->p++ = u->c;
|
|
}
|
|
}
|
|
if (c) {
|
|
h->host.p = u->q;
|
|
h->host.n = c - u->q;
|
|
h->port.p = c;
|
|
h->port.n = u->p - c;
|
|
c = NULL;
|
|
} else {
|
|
h->host.p = u->q;
|
|
h->host.n = u->p - u->q;
|
|
}
|
|
u->q = u->p;
|
|
if (u->c == '/') {
|
|
*u->p++ = u->c;
|
|
}
|
|
}
|
|
|
|
static void ParsePath(struct UrlParser *u, struct UrlView *h) {
|
|
while (u->i < u->size) {
|
|
u->c = u->data[u->i++] & 0xff;
|
|
if (u->c == '#' || u->c == '?') {
|
|
break;
|
|
} else if (u->c == '%') {
|
|
ParseEscape(u);
|
|
} else if (u->c >= 0200 && u->islatin1) {
|
|
EmitLatin1(u, u->c);
|
|
} else {
|
|
*u->p++ = u->c;
|
|
}
|
|
}
|
|
h->p = u->q;
|
|
h->n = u->p - u->q;
|
|
u->q = u->p;
|
|
}
|
|
|
|
static void ParseKeyValues(struct UrlParser *u, struct UrlParams *h) {
|
|
bool t = false;
|
|
while (u->i < u->size) {
|
|
u->c = u->data[u->i++] & 0xff;
|
|
if (u->c == '#') {
|
|
break;
|
|
} else if (u->c == '%') {
|
|
ParseEscape(u);
|
|
} else if (u->c == '+') {
|
|
*u->p++ = u->isform ? ' ' : '+';
|
|
} else if (u->c == '&') {
|
|
EmitVal(u, h, t);
|
|
t = false;
|
|
} else if (u->c == '=') {
|
|
if (!t) {
|
|
if (u->p > u->q) {
|
|
EmitKey(u, h);
|
|
t = true;
|
|
}
|
|
} else {
|
|
*u->p++ = '=';
|
|
}
|
|
} else if (u->c >= 0200 && u->islatin1) {
|
|
EmitLatin1(u, u->c);
|
|
} else {
|
|
*u->p++ = u->c;
|
|
}
|
|
}
|
|
EmitVal(u, h, t);
|
|
}
|
|
|
|
static void ParseFragment(struct UrlParser *u, struct UrlView *h) {
|
|
while (u->i < u->size) {
|
|
u->c = u->data[u->i++] & 0xff;
|
|
if (u->c == '%') {
|
|
ParseEscape(u);
|
|
} else if (u->c >= 0200 && u->islatin1) {
|
|
EmitLatin1(u, u->c);
|
|
} else {
|
|
*u->p++ = u->c;
|
|
}
|
|
}
|
|
h->p = u->q;
|
|
h->n = u->p - u->q;
|
|
u->q = u->p;
|
|
}
|
|
|
|
static char *ParseUrlImpl(const char *data, size_t size, struct Url *h,
|
|
bool latin1) {
|
|
char *m;
|
|
struct UrlParser u;
|
|
if (size == -1) size = data ? strlen(data) : 0;
|
|
u.i = 0;
|
|
u.c = 0;
|
|
u.isform = false;
|
|
u.islatin1 = latin1;
|
|
u.data = data;
|
|
u.size = size;
|
|
memset(h, 0, sizeof(*h));
|
|
u.q = u.p = m = xmalloc(u.size * 2);
|
|
if (ParseScheme(&u, h)) ParseAuthority(&u, h);
|
|
if (u.c != '#' && u.c != '?') ParsePath(&u, &h->path);
|
|
if (u.c == '?') ParseKeyValues(&u, &h->params);
|
|
if (u.c == '#') ParseFragment(&u, &h->fragment);
|
|
return xrealloc(m, u.p - m);
|
|
}
|
|
|
|
/**
|
|
* Parses URL.
|
|
*
|
|
* There's no failure condition for this routine. This is a permissive
|
|
* parser that doesn't impose character restrictions beyond what is
|
|
* necessary for parsing. This doesn't normalize path segments like `.`
|
|
* or `..`. Use IsAcceptablePath() to check for those.
|
|
*
|
|
* This parser is charset agnostic. Returned values might contain things
|
|
* like NUL characters, control codes, and non-canonical encodings.
|
|
*
|
|
* This parser doesn't support the ability to accurately parse path
|
|
* segments which contain percent-encoded slash. There's also no support
|
|
* for semicolon parameters at the moment.
|
|
*
|
|
* @param data is value like `/hi?x=y&z` or `http://a.example/hi#x`
|
|
* @param size is byte length and -1 implies strlen
|
|
* @param h is assumed to be uninitialized
|
|
* @return memory backing UrlView needing free (and h.params.p too)
|
|
*/
|
|
char *ParseUrl(const char *data, size_t size, struct Url *h) {
|
|
return ParseUrlImpl(data, size, h, false);
|
|
}
|
|
|
|
/**
|
|
* Parses HTTP Request-URI.
|
|
*
|
|
* The input is ISO-8859-1 which is transcoded to UTF-8. Therefore we
|
|
* assume percent-encoded bytes are expressed as UTF-8. Returned values
|
|
* might contain things like NUL characters, C0, and C1 control codes.
|
|
* UTF-8 isn't checked for validity and may contain overlong values.
|
|
*
|
|
* There's no failure condition for this routine. This is a permissive
|
|
* parser that doesn't impose character restrictions beyond what is
|
|
* necessary for parsing. This doesn't normalize path segments like `.`
|
|
* or `..`. Use IsAcceptablePath() to check for those.
|
|
*
|
|
* This parser doesn't support the ability to accurately parse path
|
|
* segments which contain percent-encoded slash.
|
|
*
|
|
* @param data is value like `/hi?x=y&z` or `http://a.example/hi#x`
|
|
* @param size is byte length and -1 implies strlen
|
|
* @param h is assumed to be uninitialized
|
|
* @return memory backing UrlView needing free (and h.params.p too)
|
|
*/
|
|
char *ParseRequestUri(const char *data, size_t size, struct Url *h) {
|
|
return ParseUrlImpl(data, size, h, true);
|
|
}
|
|
|
|
/**
|
|
* Parses HTTP POST key-value params.
|
|
*
|
|
* These are similar to the parameters found in a Request-URI. The main
|
|
* difference is that `+` is translated into space here. The mime type
|
|
* for this is application/x-www-form-urlencoded.
|
|
*
|
|
* This parser is charset agnostic. Returned values might contain things
|
|
* like NUL characters, control codes, and non-canonical encodings.
|
|
*
|
|
* There's no failure condition for this routine. This is a permissive
|
|
* parser that doesn't impose character restrictions beyond what is
|
|
* necessary for parsing.
|
|
*
|
|
* @param data is value like `foo=bar&x=y&z`
|
|
* @param size is byte length and -1 implies strlen
|
|
* @param h must be zeroed by caller and this appends if reused
|
|
* @return UrlView memory with same size needing free (h.p needs free too)
|
|
*/
|
|
char *ParseParams(const char *data, size_t size, struct UrlParams *h) {
|
|
char *m;
|
|
struct UrlParser u;
|
|
if (size == -1) size = data ? strlen(data) : 0;
|
|
u.i = 0;
|
|
u.c = 0;
|
|
u.isform = true;
|
|
u.islatin1 = false;
|
|
u.data = data;
|
|
u.size = size;
|
|
u.q = u.p = m = xmalloc(u.size);
|
|
ParseKeyValues(&u, h);
|
|
return m;
|
|
}
|