Make major improvements to redbean and libraries

The most exciting improvement is dynamic pages will soon be able to use
the executable itself as an object store. it required a heroic technique
for overcoming ETXTBSY restrictions which lets us open the executable in
read/write mode, which means (1) wa can restore the APE header, and (2)
we can potentially containerize redbean extension code so that modules
you download for your redbean online will only impact your redbean.

Here's a list of breaking changes to redbean:

- Remove /tool/net/ prefix from magic ZIP paths
- GetHeader() now returns NIL if header is absent

Here's a list of fixes and enhancements to redbean:

- Support 64-bit ZIP archives
- Record User-Agent header in logs
- Add twelve error handlers to accept()
- Display octal st_mode on listing page
- Show ZIP file comments on listing page
- Restore APE MZ header on redbean startup
- Track request count on redbean index page
- Report server uptime on redbean index page
- Don't bind server socket using SO_REUSEPORT
- Fix #151 where Lua LoadAsset() could free twice
- Report rusage accounting when workers exit w/ -vv
- Use ZIP iattr field as text/plain vs. binary hint
- Add ParseUrl() API for parsing things like a.href
- Add ParseParams() API for parsing HTTP POST bodies
- Add IsAcceptablePath() API for checking dots, etc.
- Add IsValidHttpToken() API for validating sane ASCII
- Add IsAcceptableHostPort() for validating HOST[:PORT]
- Send 400 response to HTTP/1.1 requests without a Host
- Send 403 response if ZIP or file isn't other readable
- Add virtual hosting that tries prepending Host to path
- Route requests based on Host in Request-URI if present
- Host routing will attempt to remove or add the www. prefix
- Sign-extend UNIX timestamps and don't adjust FileTime zone

Here's some of the improvements made to Cosmopolitan Libc:

- Fix ape.S indentation
- Improve consts.sh magnums
- Write pretty good URL parser
- Improve rusage accounting apis
- Bring mremap() closer to working
- Added ZIP APIs which will change
- Check for overflow in reallocarray()
- Remove overly fancy linkage in strerror()
- Fix GDB attach on crash w/ OpenBSD msyscall()
- Make sigqueue() portable to most UNIX distros
- Make integer serialization macros more elegant
- Bring back 34x tprecode8to16() performance boost
- Make malloc() more resilient to absurdly large sizes
This commit is contained in:
Justine Tunney 2021-04-18 11:34:59 -07:00
parent 69c508729e
commit bf03b2e64c
307 changed files with 4557 additions and 2581 deletions

View file

@ -41,13 +41,18 @@ static const signed char kBase64[256] = {
/**
* Decodes base64 ascii representation to binary.
*
* @param data is input value
* @param size if -1 implies strlen
* @param out_size if non-NULL receives output length
* @return allocated NUL-terminated buffer, or NULL w/ errno
*/
void *DecodeBase64(const char *data, size_t size, size_t *out_size) {
unsigned w;
size_t n;
char *r, *q;
int a, b, c, d;
int a, b, c, d, w;
const char *p, *pe;
if (size == -1) size = strlen(data);
if (size == -1) size = data ? strlen(data) : 0;
if ((r = malloc(size / 4 * 3 + 1))) {
q = r;
p = data;
@ -77,9 +82,14 @@ void *DecodeBase64(const char *data, size_t size, size_t *out_size) {
if (d != -2) *q++ = (w & 0x0000FF) >> 000;
}
Done:
if (out_size) *out_size = q - r;
n = q - r;
*q++ = '\0';
if ((q = realloc(r, q - r))) r = q;
} else {
n = 0;
}
if (out_size) {
*out_size = n;
}
return r;
}

View file

@ -16,6 +16,8 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/pcmpgtb.h"
#include "libc/intrin/pmovmskb.h"
#include "libc/mem/mem.h"
#include "libc/str/str.h"
#include "net/http/http.h"
@ -25,14 +27,15 @@
*
* @param data is input value
* @param size if -1 implies strlen
* @param out_size if non-NULL receives output length on success
* @param out_size if non-NULL receives output length
* @return allocated NUL-terminated buffer, or NULL w/ errno
*/
char *DecodeLatin1(const char *data, size_t size, size_t *out_size) {
int c;
size_t n;
char *r, *q;
const char *p, *e;
if (size == -1) size = strlen(data);
if (size == -1) size = data ? strlen(data) : 0;
if ((r = malloc(size * 2 + 1))) {
q = r;
p = data;
@ -46,9 +49,14 @@ char *DecodeLatin1(const char *data, size_t size, size_t *out_size) {
*q++ = 0200 | c & 077;
}
}
if (out_size) *out_size = q - r;
n = q - r;
*q++ = '\0';
if ((q = realloc(r, q - r))) r = q;
if ((q = realloc(r, n + 1))) r = q;
} else {
n = 0;
}
if (out_size) {
*out_size = n;
}
return r;
}

View file

@ -17,22 +17,28 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/mem/mem.h"
#include "libc/str/str.h"
#include "net/http/base64.h"
#define CHARS "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
/**
* Encodes binary to base64 ascii representation.
*
* @param data is input value
* @param size if -1 implies strlen
* @param out_size if non-NULL receives output length
* @return allocated NUL-terminated buffer, or NULL w/ errno
*/
char *EncodeBase64(const void *data, size_t size, size_t *out_size) {
size_t n;
unsigned w;
char *r, *q;
const unsigned char *p, *pe;
if (size == -1) size = data ? strlen(data) : 0;
if ((n = size) % 3) n += 3 - size % 3;
n /= 3, n *= 4;
if ((r = malloc(n + 1))) {
if (out_size) *out_size = n;
for (q = r, p = data, pe = p + size; p < pe; p += 3) {
w = p[0] << 020;
if (p + 1 < pe) w |= p[1] << 010;
@ -43,6 +49,11 @@ char *EncodeBase64(const void *data, size_t size, size_t *out_size) {
*q++ = p + 2 < pe ? CHARS[w & 077] : '=';
}
*q++ = '\0';
} else {
n = 0;
}
if (out_size) {
*out_size = n;
}
return r;
}

View file

@ -42,9 +42,10 @@
char *EncodeHttpHeaderValue(const char *data, size_t size, size_t *out_size) {
bool t;
wint_t x;
size_t n;
char *r, *q;
const char *p, *e;
if (size == -1) size = strlen(data);
if (size == -1) size = data ? strlen(data) : 0;
if ((r = malloc(size + 1))) {
t = 0;
q = r;
@ -77,9 +78,14 @@ char *EncodeHttpHeaderValue(const char *data, size_t size, size_t *out_size) {
}
}
while (q > r && (q[-1] == ' ' || q[-1] == '\t')) --q;
if (out_size) *out_size = q - r;
n = q - r;
*q++ = '\0';
if ((q = realloc(r, q - r))) r = q;
} else {
n = 0;
}
if (out_size) {
*out_size = n;
}
return r;
}

View file

@ -25,6 +25,7 @@
* This function is agnostic to the underlying charset.
* Always using UTF-8 is a good idea.
*
* @param size if -1 implies strlen
* @see EscapeUrlParam
* @see EscapeUrlFragment
* @see EscapeUrlPathSegment
@ -35,6 +36,7 @@ struct EscapeResult EscapeUrl(const char *data, size_t size,
char *p;
size_t i;
struct EscapeResult r;
if (size == -1) size = data ? strlen(data) : 0;
p = r.data = xmalloc(size * 6 + 1);
for (i = 0; i < size; ++i) {
if (!xlat[(c = data[i] & 0xff)]) {

View file

@ -46,6 +46,8 @@ static const char kEscapeUrlFragment[256] = {
/**
* Escapes URL fragment.
*
* @param size if -1 implies strlen
*/
struct EscapeResult EscapeUrlFragment(const char *data, size_t size) {
return EscapeUrl(data, size, kEscapeUrlFragment);

View file

@ -44,6 +44,8 @@ static const char kEscapeUrlParam[256] = {
/**
* Escapes query/form name/parameter.
*
* @param size if -1 implies strlen
*/
struct EscapeResult EscapeUrlParam(const char *data, size_t size) {
return EscapeUrl(data, size, kEscapeUrlParam);

View file

@ -48,6 +48,8 @@ static const char kEscapeUrlPath[256] = {
* Escapes URL path.
*
* This is the same as EscapeUrlPathSegment() except slash is allowed.
*
* @param size if -1 implies strlen
*/
struct EscapeResult EscapeUrlPath(const char *data, size_t size) {
return EscapeUrl(data, size, kEscapeUrlPath);

View file

@ -49,6 +49,8 @@ static const char kEscapeUrlPathSegment[256] = {
*
* Please note this will URI encode the slash character. That's because
* segments are the labels between the slashes in a path.
*
* @param size if -1 implies strlen
*/
struct EscapeResult EscapeUrlPathSegment(const char *data, size_t size) {
return EscapeUrl(data, size, kEscapeUrlPathSegment);

View file

@ -125,7 +125,8 @@ bool IsValidHttpToken(const char *, size_t);
char *EncodeHttpHeaderValue(const char *, size_t, size_t *);
char *VisualizeControlCodes(const char *, size_t, size_t *);
char *IndentLines(const char *, size_t, size_t *, size_t);
bool IsAcceptableHttpRequestPath(const char *, size_t);
bool IsAcceptablePath(const char *, size_t);
bool IsAcceptableHostPort(const char *, size_t);
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */

View file

@ -25,7 +25,7 @@
*
* @param data is input value
* @param size if -1 implies strlen
* @param out_size if non-NULL receives output length on success
* @param out_size if non-NULL receives output length
* @param amt is number of spaces to use
* @return allocated NUL-terminated buffer, or NULL w/ errno
*/
@ -33,7 +33,7 @@ char *IndentLines(const char *data, size_t size, size_t *out_size, size_t amt) {
char *r;
const char *p;
size_t i, n, m, a;
if (size == -1) size = strlen(data);
if (size == -1) size = data ? strlen(data) : 0;
r = 0;
n = 0;
do {
@ -51,7 +51,9 @@ char *IndentLines(const char *data, size_t size, size_t *out_size, size_t amt) {
data += m;
size -= m;
} while (p);
if (out_size) *out_size = n;
if (out_size) {
*out_size = n;
}
r[n] = '\0';
return r;
}

View file

@ -0,0 +1,106 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/str.h"
#include "net/http/http.h"
/**
* Returns true if HOST[:PORT] seems legit.
*
* This parser is permissive and imposes the subset of restrictions
* that'll make things easier for the caller. For example, only one
* colon is allowed to appear, which makes memchr() so much easier.
*
* Here's examples of permitted inputs:
*
* - 1.2.3.4
* - 1.2.3.4.arpa
* - 1.2.3.4:8080
* - localservice
* - hello.example
* - _hello.example
* - -hello.example
* - hi-there.example
* - hello.example:443
*
* Here's some examples of forbidden inputs:
*
* - :443
* - 1.2.3
* - 1.2.3.4.5
* - [::1]:8080
* - .hi.example
* - hi..example
* - hi.example::80
* - hi.example:-80
* - hi.example:65536
*
* @param n if -1 implies strlen
*/
bool IsAcceptableHostPort(const char *s, size_t n) {
size_t i;
bool isip;
int c, t, p, b, j;
if (n == -1) n = s ? strlen(s) : 0;
if (!n) return false;
for (isip = true, b = j = p = t = i = 0; i < n; ++i) {
c = s[i] & 255;
if (!t) {
if (c == ':') {
if (!i || s[i - 1] == '.') {
return false;
} else {
t = 1;
}
} else if (c == '.' && (!i || s[i - 1] == '.')) {
return false;
} else if (!(isalnum(c) || c == '-' || c == '_' || c == '.')) {
return false;
}
if (isip) {
if (isdigit(c)) {
b *= 10;
b += c - '0';
if (b > 255) {
return false;
}
} else if (c == '.') {
b = 0;
++j;
} else {
isip = false;
}
}
} else {
if (c == ':') {
return false;
} else if ('0' <= c && c <= '9') {
p *= 10;
p += c - '0';
if (p > 65535) {
return false;
}
} else {
return false;
}
}
}
if (isip && j != 3) return false;
if (!t && s[i - 1] == '.') return false;
return true;
}

View file

@ -16,29 +16,29 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/str.h"
#include "libc/str/thompike.h"
#include "net/http/http.h"
/**
* Returns true if request path seems legit.
*
* 1. Request path must start with '/'.
* 2. The substring "//" is disallowed.
* 3. We won't serve hidden files (segment starts with '.').
* 4. We won't serve paths with segments equal to "." or "..".
* 1. The substring "//" is disallowed.
* 2. We won't serve hidden files (segment starts with '.').
* 3. We won't serve paths with segments equal to "." or "..".
*
* It is assumed that the URI parser already took care of percent
* escape decoding as well as ISO-8859-1 decoding. The input needs
* to be a UTF-8 string.
*
* @param size if -1 implies strlen
*/
bool IsAcceptableHttpRequestPath(const char *data, size_t size) {
bool t;
size_t i;
unsigned n;
wint_t x, y, a, b;
bool IsAcceptablePath(const char *data, size_t size) {
const char *p, *e;
if (!size || *data != '/') return false;
int x, y, a, b, t, i, n;
if (size == -1) size = data ? strlen(data) : 0;
t = 0;
y = '/';
p = data;
e = p + size;
while (p < e) {
@ -62,14 +62,12 @@ bool IsAcceptableHttpRequestPath(const char *data, size_t size) {
if (x == '\\') {
x = '/';
}
if (!t) {
t = true;
} else {
if ((x == '/' || x == '.') && y == '/') {
return false;
}
if (y == '/') {
if (x == '.') return false;
if (x == '/' && t) return false;
}
y = x;
t = 1;
}
return true;
}

View file

@ -21,7 +21,7 @@
// http/1.1 token dispatch
// 0 is CTLs, SP, ()<>@,;:\"/[]?={}
// 1 is legal ascii
// 1 is what remains of ascii
static const char kHttpToken[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x00
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10
@ -41,10 +41,15 @@ static const char kHttpToken[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xf0
};
/**
* Returns true if string is ASCII without delimiters.
*
* @param n if -1 implies strlen
*/
bool IsValidHttpToken(const char *s, size_t n) {
size_t i;
if (!n) return false;
if (n == -1) n = strlen(s);
if (n == -1) n = s ? strlen(s) : 0;
for (i = 0; i < n; ++i) {
if (!kHttpToken[s[i] & 0xff]) {
return false;

346
net/http/parseurl.c Normal file
View file

@ -0,0 +1,346 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/likely.h"
#include "libc/limits.h"
#include "libc/str/str.h"
#include "libc/x/x.h"
#include "net/http/url.h"
struct UrlParser {
int i;
int c;
const char *data;
int size;
bool isform;
bool islatin1;
char *p;
char *q;
};
static const signed char kHexToInt[256] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x00
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x10
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x20
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 0x30
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x40
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x50
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x60
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x70
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x80
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x90
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0xa0
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0xb0
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0xc0
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0xd0
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0xe0
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0xf0
};
static void EmitLatin1(struct UrlParser *u, int c) {
u->p[0] = 0300 | c >> 6;
u->p[1] = 0200 | c & 077;
u->p += 2;
}
static void EmitKey(struct UrlParser *u, struct UrlParams *h) {
h->p = xrealloc(h->p, ++h->n * sizeof(*h->p));
h->p[h->n - 1].key.p = u->q;
h->p[h->n - 1].key.n = u->p - u->q;
u->q = u->p;
}
static void EmitVal(struct UrlParser *u, struct UrlParams *h, bool t) {
if (!t) {
if (u->p > u->q) {
EmitKey(u, h);
h->p[h->n - 1].val.p = NULL;
h->p[h->n - 1].val.n = SIZE_MAX;
}
} else {
h->p[h->n - 1].val.p = u->q;
h->p[h->n - 1].val.n = u->p - u->q;
u->q = u->p;
}
}
static void ParseEscape(struct UrlParser *u) {
int a, b;
if (u->i + 2 <= u->size &&
((a = kHexToInt[u->data[u->i + 0] & 0xff]) != -1 &&
(b = kHexToInt[u->data[u->i + 1] & 0xff]) != -1)) {
u->c = a << 4 | b;
u->i += 2;
}
*u->p++ = u->c;
}
static bool ParseScheme(struct UrlParser *u, struct Url *h) {
while (u->i < u->size) {
u->c = u->data[u->i++] & 0xff;
if (u->c == '/') {
if (u->i == 1 && u->i < u->size && u->data[u->i] == '/') {
++u->i;
return true;
} else {
*u->p++ = u->c;
return false;
}
} else if (u->c == ':') {
h->scheme.p = u->q;
h->scheme.n = u->p - u->q;
u->q = u->p;
if (u->i + 2 <= u->size &&
(u->data[u->i + 1] == '/' && u->data[u->i + 1] == '/')) {
u->i += 2;
return true;
} else {
return false;
}
} else if (u->c == '#' || u->c == '?') {
h->path.p = u->q;
h->path.n = u->p - u->q;
u->q = u->p;
return false;
} else if (u->c == '%') {
ParseEscape(u);
} else if (u->c >= 0200 && u->islatin1) {
EmitLatin1(u, u->c);
} else {
*u->p++ = u->c;
}
}
return false;
}
static void ParseAuthority(struct UrlParser *u, struct Url *h) {
bool b = false;
const char *c = NULL;
while (u->i < u->size) {
u->c = u->data[u->i++] & 0xff;
if (u->c == '/' || u->c == '#' || u->c == '?') {
break;
} else if (u->c == '[') {
b = true;
} else if (u->c == ']') {
b = false;
} else if (u->c == ':' && !b) {
c = u->p;
} else if (u->c == '@') {
if (c) {
h->user.p = u->q;
h->user.n = c - u->q;
h->pass.p = c;
h->pass.n = u->p - c;
c = NULL;
} else {
h->user.p = u->q;
h->user.n = u->p - u->q;
}
u->q = u->p;
} else if (u->c == '%') {
ParseEscape(u);
} else if (u->c >= 0200 && u->islatin1) {
EmitLatin1(u, u->c);
} else {
*u->p++ = u->c;
}
}
if (c) {
h->host.p = u->q;
h->host.n = c - u->q;
h->port.p = c;
h->port.n = u->p - c;
c = NULL;
} else {
h->host.p = u->q;
h->host.n = u->p - u->q;
}
u->q = u->p;
if (u->c == '/') {
*u->p++ = u->c;
}
}
static void ParsePath(struct UrlParser *u, struct UrlView *h) {
while (u->i < u->size) {
u->c = u->data[u->i++] & 0xff;
if (u->c == '#' || u->c == '?') {
break;
} else if (u->c == '%') {
ParseEscape(u);
} else if (u->c >= 0200 && u->islatin1) {
EmitLatin1(u, u->c);
} else {
*u->p++ = u->c;
}
}
h->p = u->q;
h->n = u->p - u->q;
u->q = u->p;
}
static void ParseKeyValues(struct UrlParser *u, struct UrlParams *h) {
bool t = false;
while (u->i < u->size) {
u->c = u->data[u->i++] & 0xff;
if (u->c == '#') {
break;
} else if (u->c == '%') {
ParseEscape(u);
} else if (u->c == '+') {
*u->p++ = u->isform ? ' ' : '+';
} else if (u->c == '&') {
EmitVal(u, h, t);
t = false;
} else if (u->c == '=') {
if (!t) {
if (u->p > u->q) {
EmitKey(u, h);
t = true;
}
} else {
*u->p++ = '=';
}
} else if (u->c >= 0200 && u->islatin1) {
EmitLatin1(u, u->c);
} else {
*u->p++ = u->c;
}
}
EmitVal(u, h, t);
}
static void ParseFragment(struct UrlParser *u, struct UrlView *h) {
while (u->i < u->size) {
u->c = u->data[u->i++] & 0xff;
if (u->c == '%') {
ParseEscape(u);
} else if (u->c >= 0200 && u->islatin1) {
EmitLatin1(u, u->c);
} else {
*u->p++ = u->c;
}
}
h->p = u->q;
h->n = u->p - u->q;
u->q = u->p;
}
static char *ParseUrlImpl(const char *data, size_t size, struct Url *h,
bool latin1) {
char *m;
struct UrlParser u;
if (size == -1) size = data ? strlen(data) : 0;
u.i = 0;
u.c = 0;
u.isform = false;
u.islatin1 = latin1;
u.data = data;
u.size = size;
memset(h, 0, sizeof(*h));
u.q = u.p = m = xmalloc(u.size * 2);
if (ParseScheme(&u, h)) ParseAuthority(&u, h);
if (u.c != '#' && u.c != '?') ParsePath(&u, &h->path);
if (u.c == '?') ParseKeyValues(&u, &h->params);
if (u.c == '#') ParseFragment(&u, &h->fragment);
return xrealloc(m, u.p - m);
}
/**
* Parses URL.
*
* There's no failure condition for this routine. This is a permissive
* parser that doesn't impose character restrictions beyond what is
* necessary for parsing. This doesn't normalize path segments like `.`
* or `..`. Use IsAcceptablePath() to check for those.
*
* This parser is charset agnostic. Returned values might contain things
* like NUL characters, control codes, and non-canonical encodings.
*
* This parser doesn't support the ability to accurately parse path
* segments which contain percent-encoded slash. There's also no support
* for semicolon parameters at the moment.
*
* @param data is value like `/hi?x=y&z` or `http://a.example/hi#x`
* @param size is byte length and -1 implies strlen
* @param h is assumed to be uninitialized
* @return memory backing UrlView needing free (and h.params.p too)
*/
char *ParseUrl(const char *data, size_t size, struct Url *h) {
return ParseUrlImpl(data, size, h, false);
}
/**
* Parses HTTP Request-URI.
*
* The input is ISO-8859-1 which is transcoded to UTF-8. Therefore we
* assume percent-encoded bytes are expressed as UTF-8. Returned values
* might contain things like NUL characters, C0, and C1 control codes.
* UTF-8 isn't checked for validity and may contain overlong values.
*
* There's no failure condition for this routine. This is a permissive
* parser that doesn't impose character restrictions beyond what is
* necessary for parsing. This doesn't normalize path segments like `.`
* or `..`. Use IsAcceptablePath() to check for those.
*
* This parser doesn't support the ability to accurately parse path
* segments which contain percent-encoded slash.
*
* @param data is value like `/hi?x=y&z` or `http://a.example/hi#x`
* @param size is byte length and -1 implies strlen
* @param h is assumed to be uninitialized
* @return memory backing UrlView needing free (and h.params.p too)
*/
char *ParseRequestUri(const char *data, size_t size, struct Url *h) {
return ParseUrlImpl(data, size, h, true);
}
/**
* Parses HTTP POST key-value params.
*
* These are similar to the parameters found in a Request-URI. The main
* difference is that `+` is translated into space here. The mime type
* for this is application/x-www-form-urlencoded.
*
* This parser is charset agnostic. Returned values might contain things
* like NUL characters, control codes, and non-canonical encodings.
*
* There's no failure condition for this routine. This is a permissive
* parser that doesn't impose character restrictions beyond what is
* necessary for parsing.
*
* @param data is value like `foo=bar&x=y&z`
* @param size is byte length and -1 implies strlen
* @param h must be zeroed by caller and this appends if reused
* @return UrlView memory with same size needing free (h.p needs free too)
*/
char *ParseParams(const char *data, size_t size, struct UrlParams *h) {
char *m;
struct UrlParser u;
if (size == -1) size = data ? strlen(data) : 0;
u.i = 0;
u.c = 0;
u.isform = true;
u.islatin1 = false;
u.data = data;
u.size = size;
u.q = u.p = m = xmalloc(u.size);
ParseKeyValues(&u, h);
return m;
}

36
net/http/url.h Normal file
View file

@ -0,0 +1,36 @@
#ifndef COSMOPOLITAN_NET_HTTP_URL_H_
#define COSMOPOLITAN_NET_HTTP_URL_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
struct UrlView {
size_t n;
char *p; /* not allocated; not nul terminated */
};
struct UrlParams {
size_t n;
struct Param {
struct UrlView key;
struct UrlView val; /* val.n may be SIZE_MAX */
} * p;
};
struct Url {
struct UrlView scheme;
struct UrlView user;
struct UrlView pass;
struct UrlView host;
struct UrlView port;
struct UrlView path;
struct UrlParams params;
struct UrlView fragment;
};
char *ParseUrl(const char *, size_t, struct Url *);
char *ParseParams(const char *, size_t, struct UrlParams *);
char *ParseRequestUri(const char *, size_t, struct Url *);
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_NET_HTTP_URL_H_ */

View file

@ -31,7 +31,7 @@
*
* @param data is input value
* @param size if -1 implies strlen
* @param out_size if non-NULL receives output length on success
* @param out_size if non-NULL receives output length
* @return allocated NUL-terminated buffer, or NULL w/ errno
*/
char *VisualizeControlCodes(const char *data, size_t size, size_t *out_size) {
@ -40,7 +40,7 @@ char *VisualizeControlCodes(const char *data, size_t size, size_t *out_size) {
unsigned i, n;
wint_t x, a, b;
const char *p, *e;
if (size == -1) size = strlen(data);
if (size == -1) size = data ? strlen(data) : 0;
if ((r = malloc(size * 6 + 1))) {
q = r;
p = data;
@ -85,9 +85,14 @@ char *VisualizeControlCodes(const char *data, size_t size, size_t *out_size) {
} while ((w >>= 8));
}
}
if (out_size) *out_size = q - r;
n = q - r;
*q++ = '\0';
if ((q = realloc(r, q - r))) r = q;
} else {
n = 0;
}
if (out_size) {
*out_size = n;
}
return r;
}