mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 03:27:39 +00:00
Improve redbean method=get parameter handling
This commit is contained in:
parent
6e582d245b
commit
2cc1d5ac4c
9 changed files with 175 additions and 146 deletions
|
@ -183,7 +183,7 @@ int main(int argc, char *argv[]) {
|
|||
struct Url url;
|
||||
char *host, *port;
|
||||
bool usessl = false;
|
||||
_gc(ParseUrl(urlarg, -1, &url));
|
||||
_gc(ParseUrl(urlarg, -1, &url, kUrlPlus));
|
||||
_gc(url.params.p);
|
||||
if (url.scheme.n) {
|
||||
if (url.scheme.n == 5 && !memcasecmp(url.scheme.p, "https", 5)) {
|
||||
|
|
|
@ -28,8 +28,7 @@
|
|||
struct UrlParser {
|
||||
char *p, *q;
|
||||
const char *s;
|
||||
unsigned c, i, n;
|
||||
char isform, islatin1, isopaque;
|
||||
unsigned c, i, n, f;
|
||||
};
|
||||
|
||||
static void EmitLatin1(char **p, int c) {
|
||||
|
@ -99,7 +98,7 @@ static bool ParseScheme(struct UrlParser *u, struct Url *h) {
|
|||
return false;
|
||||
}
|
||||
} else {
|
||||
u->isopaque = true;
|
||||
u->f |= kUrlOpaque;
|
||||
return false;
|
||||
}
|
||||
} else if (u->c == '#' || u->c == '?') {
|
||||
|
@ -110,7 +109,7 @@ static bool ParseScheme(struct UrlParser *u, struct Url *h) {
|
|||
} else if (u->c == '%') {
|
||||
ParseEscape(u);
|
||||
return false;
|
||||
} else if (u->c >= 0200 && u->islatin1) {
|
||||
} else if (u->c >= 0200 && (u->f & kUrlLatin1)) {
|
||||
EmitLatin1(&u->p, u->c);
|
||||
return false;
|
||||
} else {
|
||||
|
@ -161,7 +160,7 @@ static void ParseAuthority(struct UrlParser *u, struct Url *h) {
|
|||
u->q = u->p;
|
||||
} else if (u->c == '%') {
|
||||
ParseEscape(u);
|
||||
} else if (u->c >= 0200 && u->islatin1) {
|
||||
} else if (u->c >= 0200 && (u->f & kUrlLatin1)) {
|
||||
EmitLatin1(&u->p, u->c);
|
||||
} else {
|
||||
*u->p++ = u->c;
|
||||
|
@ -188,11 +187,11 @@ static void ParsePath(struct UrlParser *u, struct UrlView *h) {
|
|||
u->c = u->s[u->i++] & 255;
|
||||
if (u->c == '#') {
|
||||
break;
|
||||
} else if (u->c == '?' && !u->isopaque) {
|
||||
} else if (u->c == '?' && !(u->f & kUrlOpaque)) {
|
||||
break;
|
||||
} else if (u->c == '%') {
|
||||
ParseEscape(u);
|
||||
} else if (u->c >= 0200 && u->islatin1) {
|
||||
} else if (u->c >= 0200 && (u->f & kUrlLatin1)) {
|
||||
EmitLatin1(&u->p, u->c);
|
||||
} else {
|
||||
*u->p++ = u->c;
|
||||
|
@ -213,7 +212,7 @@ static void ParseQuery(struct UrlParser *u, struct UrlParams *h) {
|
|||
} else if (u->c == '%') {
|
||||
ParseEscape(u);
|
||||
} else if (u->c == '+') {
|
||||
*u->p++ = u->isform ? ' ' : '+';
|
||||
*u->p++ = (u->f & kUrlPlus) ? ' ' : '+';
|
||||
} else if (u->c == '&') {
|
||||
EmitVal(u, h, t);
|
||||
t = false;
|
||||
|
@ -223,7 +222,7 @@ static void ParseQuery(struct UrlParser *u, struct UrlParams *h) {
|
|||
} else {
|
||||
*u->p++ = '=';
|
||||
}
|
||||
} else if (u->c >= 0200 && u->islatin1) {
|
||||
} else if (u->c >= 0200 && (u->f & kUrlLatin1)) {
|
||||
EmitLatin1(&u->p, u->c);
|
||||
} else {
|
||||
*u->p++ = u->c;
|
||||
|
@ -237,7 +236,7 @@ static void ParseFragment(struct UrlParser *u, struct UrlView *h) {
|
|||
u->c = u->s[u->i++] & 255;
|
||||
if (u->c == '%') {
|
||||
ParseEscape(u);
|
||||
} else if (u->c >= 0200 && u->islatin1) {
|
||||
} else if (u->c >= 0200 && (u->f & kUrlLatin1)) {
|
||||
EmitLatin1(&u->p, u->c);
|
||||
} else {
|
||||
*u->p++ = u->c;
|
||||
|
@ -248,28 +247,6 @@ static void ParseFragment(struct UrlParser *u, struct UrlView *h) {
|
|||
u->q = u->p;
|
||||
}
|
||||
|
||||
static char *ParseUrlImpl(const char *s, size_t n, struct Url *h, bool latin1) {
|
||||
char *m;
|
||||
struct UrlParser u;
|
||||
if (n == -1) n = s ? strlen(s) : 0;
|
||||
u.i = 0;
|
||||
u.c = 0;
|
||||
u.s = s;
|
||||
u.n = n;
|
||||
u.isform = false;
|
||||
u.isopaque = false;
|
||||
u.islatin1 = latin1;
|
||||
bzero(h, sizeof(*h));
|
||||
if ((m = malloc(latin1 ? u.n * 2 : u.n))) {
|
||||
u.q = u.p = m;
|
||||
if (ParseScheme(&u, h)) ParseAuthority(&u, h);
|
||||
if (u.c != '#' && u.c != '?') ParsePath(&u, &h->path);
|
||||
if (u.c == '?') ParseQuery(&u, &h->params);
|
||||
if (u.c == '#') ParseFragment(&u, &h->fragment);
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses URL.
|
||||
*
|
||||
|
@ -298,43 +275,39 @@ static char *ParseUrlImpl(const char *s, size_t n, struct Url *h, bool latin1) {
|
|||
* @param s is value like `/hi?x=y&z` or `http://a.example/hi#x`
|
||||
* @param n is byte length and -1 implies strlen
|
||||
* @param h is assumed to be uninitialized
|
||||
* @param f is flags which may have:
|
||||
* - `FLAGS_PLUS` to turn `+` into space in query params
|
||||
* - `FLAGS_LATIN1` to transcode ISO-8859-1 input into UTF-8
|
||||
* @return memory backing UrlView needing free (and h.params.p too)
|
||||
* @see URI Generic Syntax RFC3986 RFC2396
|
||||
* @see EncodeUrl()
|
||||
*/
|
||||
char *ParseUrl(const char *s, size_t n, struct Url *h) {
|
||||
return ParseUrlImpl(s, n, h, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses HTTP Request-URI.
|
||||
*
|
||||
* The input is ISO-8859-1 which is transcoded to UTF-8. Therefore we
|
||||
* assume percent-encoded bytes are expressed as UTF-8. Returned values
|
||||
* might contain things like NUL characters, C0, and C1 control codes.
|
||||
* UTF-8 isn't checked for validity and may contain overlong values.
|
||||
* Absent can be discerned from empty by checking if the pointer is set.
|
||||
*
|
||||
* There's no failure condition for this routine. This is a permissive
|
||||
* parser that doesn't impose character restrictions beyond what is
|
||||
* necessary for parsing. This doesn't normalize path segments like `.`
|
||||
* or `..`. Use IsAcceptablePath() to check for those.
|
||||
*
|
||||
* @param s is value like `/hi?x=y&z` or `http://a.example/hi#x`
|
||||
* @param n is byte length and -1 implies strlen
|
||||
* @param h is assumed to be uninitialized
|
||||
* @return memory backing UrlView needing free (and h.params.p too)
|
||||
*/
|
||||
char *ParseRequestUri(const char *s, size_t n, struct Url *h) {
|
||||
return ParseUrlImpl(s, n, h, true);
|
||||
char *ParseUrl(const char *s, size_t n, struct Url *h, int f) {
|
||||
char *m;
|
||||
struct UrlParser u;
|
||||
if (n == -1) n = s ? strlen(s) : 0;
|
||||
u.i = 0;
|
||||
u.c = 0;
|
||||
u.s = s;
|
||||
u.n = n;
|
||||
u.f = f;
|
||||
bzero(h, sizeof(*h));
|
||||
if ((m = malloc((f & kUrlLatin1) ? u.n * 2 : u.n))) {
|
||||
u.q = u.p = m;
|
||||
if (ParseScheme(&u, h)) ParseAuthority(&u, h);
|
||||
if (u.c != '#' && u.c != '?') ParsePath(&u, &h->path);
|
||||
if (u.c == '?') ParseQuery(&u, &h->params);
|
||||
if (u.c == '#') ParseFragment(&u, &h->fragment);
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses HTTP POST key-value params.
|
||||
*
|
||||
* These are similar to the parameters found in a Request-URI. The main
|
||||
* difference is that `+` is translated into space here. The mime type
|
||||
* for this is application/x-www-form-urlencoded.
|
||||
* These are similar to the parameters found in a Request-URI, except
|
||||
* usually submitted via an HTTP POST request. We translate `+` into
|
||||
* space. The mime type is application/x-www-form-urlencoded.
|
||||
*
|
||||
* This parser is charset agnostic. Returned values might contain things
|
||||
* like NUL characters, NUL, control codes, and non-canonical encodings.
|
||||
|
@ -357,9 +330,7 @@ char *ParseParams(const char *s, size_t n, struct UrlParams *h) {
|
|||
u.s = s;
|
||||
u.n = n;
|
||||
u.c = '?';
|
||||
u.isform = true;
|
||||
u.islatin1 = false;
|
||||
u.isopaque = false;
|
||||
u.f = kUrlPlus;
|
||||
if ((m = malloc(u.n))) {
|
||||
u.q = u.p = m;
|
||||
ParseQuery(&u, h);
|
||||
|
@ -399,9 +370,7 @@ char *ParseHost(const char *s, size_t n, struct Url *h) {
|
|||
u.c = 0;
|
||||
u.s = s;
|
||||
u.n = n;
|
||||
u.isform = false;
|
||||
u.islatin1 = true;
|
||||
u.isopaque = false;
|
||||
u.f = kUrlLatin1;
|
||||
if ((m = malloc(u.n * 2))) {
|
||||
u.q = u.p = m;
|
||||
ParseAuthority(&u, h);
|
||||
|
|
|
@ -1,5 +1,10 @@
|
|||
#ifndef COSMOPOLITAN_NET_HTTP_URL_H_
|
||||
#define COSMOPOLITAN_NET_HTTP_URL_H_
|
||||
|
||||
#define kUrlPlus 1
|
||||
#define kUrlLatin1 2
|
||||
#define kUrlOpaque 4
|
||||
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
|
@ -28,9 +33,8 @@ struct Url {
|
|||
};
|
||||
|
||||
char *EncodeUrl(struct Url *, size_t *);
|
||||
char *ParseUrl(const char *, size_t, struct Url *);
|
||||
char *ParseUrl(const char *, size_t, struct Url *, int);
|
||||
char *ParseParams(const char *, size_t, struct UrlParams *);
|
||||
char *ParseRequestUri(const char *, size_t, struct Url *);
|
||||
char *ParseHost(const char *, size_t, struct Url *);
|
||||
char *EscapeUrlView(char *, struct UrlView *, const char[256]);
|
||||
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/fmt/internal.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/mem/gc.internal.h"
|
||||
#include "libc/mem/mem.h"
|
||||
|
@ -29,7 +30,7 @@
|
|||
|
||||
TEST(ParseUrl, testEmpty) {
|
||||
struct Url h;
|
||||
gc(ParseUrl(0, 0, &h));
|
||||
gc(ParseUrl(0, 0, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(0, h.params.n);
|
||||
ASSERT_STREQ("", gc(EncodeUrl(&h, 0)));
|
||||
|
@ -37,7 +38,7 @@ TEST(ParseUrl, testEmpty) {
|
|||
|
||||
TEST(ParseUrl, testFragment) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("#x", -1, &h));
|
||||
gc(ParseUrl("#x", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(0, h.path.n);
|
||||
ASSERT_EQ(1, h.fragment.n);
|
||||
|
@ -47,7 +48,7 @@ TEST(ParseUrl, testFragment) {
|
|||
|
||||
TEST(ParseUrl, testFragmentAbsent_isNull) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("", -1, &h));
|
||||
gc(ParseUrl("", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(0, h.fragment.p);
|
||||
ASSERT_EQ(0, h.fragment.n);
|
||||
|
@ -56,7 +57,7 @@ TEST(ParseUrl, testFragmentAbsent_isNull) {
|
|||
|
||||
TEST(ParseUrl, testFragmentEmpty_isNonNull) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("#", -1, &h)); /* python's uri parser is wrong here */
|
||||
gc(ParseUrl("#", -1, &h, 0)); /* python's uri parser is wrong here */
|
||||
gc(h.params.p);
|
||||
ASSERT_NE(0, h.fragment.p);
|
||||
ASSERT_EQ(0, h.fragment.n);
|
||||
|
@ -65,7 +66,7 @@ TEST(ParseUrl, testFragmentEmpty_isNonNull) {
|
|||
|
||||
TEST(ParseUrl, testPathFragment) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("x#y", -1, &h));
|
||||
gc(ParseUrl("x#y", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(1, h.path.n);
|
||||
ASSERT_EQ('x', h.path.p[0]);
|
||||
|
@ -76,7 +77,7 @@ TEST(ParseUrl, testPathFragment) {
|
|||
|
||||
TEST(ParseUrl, testAbsolutePath) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("/x/y", -1, &h));
|
||||
gc(ParseUrl("/x/y", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(4, h.path.n);
|
||||
ASSERT_BINEQ(u"/x/y", h.path.p);
|
||||
|
@ -85,7 +86,7 @@ TEST(ParseUrl, testAbsolutePath) {
|
|||
|
||||
TEST(ParseUrl, testRelativePath1) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("x", -1, &h));
|
||||
gc(ParseUrl("x", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(1, h.path.n);
|
||||
ASSERT_EQ('x', h.path.p[0]);
|
||||
|
@ -94,7 +95,7 @@ TEST(ParseUrl, testRelativePath1) {
|
|||
|
||||
TEST(ParseUrl, testOptions) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("*", -1, &h));
|
||||
gc(ParseUrl("*", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(1, h.path.n);
|
||||
ASSERT_EQ('*', h.path.p[0]);
|
||||
|
@ -103,7 +104,7 @@ TEST(ParseUrl, testOptions) {
|
|||
|
||||
TEST(ParseUrl, testRelativePath2) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("x/y", -1, &h));
|
||||
gc(ParseUrl("x/y", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(3, h.path.n);
|
||||
ASSERT_BINEQ(u"x/y", h.path.p);
|
||||
|
@ -112,7 +113,7 @@ TEST(ParseUrl, testRelativePath2) {
|
|||
|
||||
TEST(ParseUrl, testRoot) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("/", -1, &h));
|
||||
gc(ParseUrl("/", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(1, h.path.n);
|
||||
ASSERT_EQ('/', h.path.p[0]);
|
||||
|
@ -121,7 +122,7 @@ TEST(ParseUrl, testRoot) {
|
|||
|
||||
TEST(ParseUrl, testSchemePath) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("x:y", -1, &h));
|
||||
gc(ParseUrl("x:y", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(1, h.scheme.n);
|
||||
ASSERT_BINEQ(u"x", h.scheme.p);
|
||||
|
@ -132,7 +133,7 @@ TEST(ParseUrl, testSchemePath) {
|
|||
|
||||
TEST(ParseUrl, testSchemeAuthority) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("x://y", -1, &h));
|
||||
gc(ParseUrl("x://y", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(1, h.scheme.n);
|
||||
ASSERT_EQ('x', h.scheme.p[0]);
|
||||
|
@ -141,9 +142,37 @@ TEST(ParseUrl, testSchemeAuthority) {
|
|||
ASSERT_STREQ("x://y", gc(EncodeUrl(&h, 0)));
|
||||
}
|
||||
|
||||
TEST(ParseUrl, testParamsPlus_maybeYes) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("x?q=hi+there", -1, &h, kUrlPlus));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(1, h.path.n);
|
||||
ASSERT_BINEQ(u"x", h.path.p);
|
||||
ASSERT_EQ(1, h.params.n);
|
||||
ASSERT_EQ(1, h.params.p[0].key.n);
|
||||
ASSERT_EQ(8, h.params.p[0].val.n);
|
||||
ASSERT_BINEQ(u"q", h.params.p[0].key.p);
|
||||
ASSERT_BINEQ(u"hi there", h.params.p[0].val.p);
|
||||
ASSERT_STREQ("x?q=hi%20there", gc(EncodeUrl(&h, 0)));
|
||||
}
|
||||
|
||||
TEST(ParseUrl, testParamsPlus_maybeNot) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("x?q=hi+there", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(1, h.path.n);
|
||||
ASSERT_BINEQ(u"x", h.path.p);
|
||||
ASSERT_EQ(1, h.params.n);
|
||||
ASSERT_EQ(1, h.params.p[0].key.n);
|
||||
ASSERT_EQ(8, h.params.p[0].val.n);
|
||||
ASSERT_BINEQ(u"q", h.params.p[0].key.p);
|
||||
ASSERT_BINEQ(u"hi+there", h.params.p[0].val.p);
|
||||
ASSERT_STREQ("x?q=hi%2Bthere", gc(EncodeUrl(&h, 0)));
|
||||
}
|
||||
|
||||
TEST(ParseUrl, testParamsQuestion_doesntTurnIntoSpace) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("x?+", -1, &h));
|
||||
gc(ParseUrl("x?+", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(1, h.path.n);
|
||||
ASSERT_BINEQ(u"x", h.path.p);
|
||||
|
@ -155,7 +184,7 @@ TEST(ParseUrl, testParamsQuestion_doesntTurnIntoSpace) {
|
|||
|
||||
TEST(ParseUrl, testUrl) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("a://b:B@c:C/d?e#f", -1, &h));
|
||||
gc(ParseUrl("a://b:B@c:C/d?e#f", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(1, h.scheme.n);
|
||||
ASSERT_EQ('a', h.scheme.p[0]);
|
||||
|
@ -180,7 +209,7 @@ TEST(ParseUrl, testUrl) {
|
|||
|
||||
TEST(ParseUrl, testEmptyQueryKeyVal_decodesToEmptyStrings) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("?=", -1, &h));
|
||||
gc(ParseUrl("?=", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(1, h.params.n);
|
||||
ASSERT_EQ(0, h.params.p[0].key.n);
|
||||
|
@ -192,7 +221,7 @@ TEST(ParseUrl, testEmptyQueryKeyVal_decodesToEmptyStrings) {
|
|||
|
||||
TEST(ParseUrl, testMultipleEquals_goesIntoValue) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("?==", -1, &h));
|
||||
gc(ParseUrl("?==", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(1, h.params.n);
|
||||
ASSERT_EQ(0, h.params.p[0].key.n);
|
||||
|
@ -204,7 +233,7 @@ TEST(ParseUrl, testMultipleEquals_goesIntoValue) {
|
|||
|
||||
TEST(ParseUrl, testUrlWithoutScheme) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("//b@c/d?e#f", -1, &h));
|
||||
gc(ParseUrl("//b@c/d?e#f", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(0, h.scheme.n);
|
||||
ASSERT_EQ(1, h.user.n);
|
||||
|
@ -225,7 +254,7 @@ TEST(ParseUrl, testUrlWithoutScheme) {
|
|||
|
||||
TEST(ParseUrl, testUrlWithoutUser) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("a://c/d?e#f", -1, &h));
|
||||
gc(ParseUrl("a://c/d?e#f", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(1, h.scheme.n);
|
||||
ASSERT_EQ('a', h.scheme.p[0]);
|
||||
|
@ -248,11 +277,11 @@ TEST(ParseUrl, testUrlWithoutUser) {
|
|||
|
||||
TEST(ParseUrl, testEmptyParams_absentCanBeDiscerned) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("", -1, &h));
|
||||
gc(ParseUrl("", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(0, h.params.n);
|
||||
ASSERT_EQ(NULL, h.params.p);
|
||||
gc(ParseUrl("?", -1, &h)); /* python's uri parser is wrong here */
|
||||
gc(ParseUrl("?", -1, &h, 0)); /* python's uri parser is wrong here */
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(0, h.params.n);
|
||||
ASSERT_NE(NULL, h.params.p);
|
||||
|
@ -260,7 +289,7 @@ TEST(ParseUrl, testEmptyParams_absentCanBeDiscerned) {
|
|||
|
||||
TEST(ParseUrl, testWeirdAmps_areReproducible) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("?&&", -1, &h));
|
||||
gc(ParseUrl("?&&", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(3, h.params.n);
|
||||
ASSERT_EQ(0, h.params.p[0].key.n);
|
||||
|
@ -280,7 +309,7 @@ TEST(ParseUrl, testWeirdAmps_areReproducible) {
|
|||
|
||||
TEST(ParseUrl, testOpaquePart_canLetQuestionMarkGoInPath) {
|
||||
struct Url h; /* python's uri parser is wrong here */
|
||||
gc(ParseUrl("s:o!$%&'()*+,-./09:;=?@AZ_az#fragged", -1, &h));
|
||||
gc(ParseUrl("s:o!$%&'()*+,-./09:;=?@AZ_az#fragged", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(26, h.path.n);
|
||||
ASSERT_EQ(0, memcmp(h.path.p, "o!$%&'()*+,-./09:;=?@AZ_az", 26));
|
||||
|
@ -292,7 +321,7 @@ TEST(ParseUrl, testOpaquePart_canLetQuestionMarkGoInPath) {
|
|||
|
||||
TEST(ParseUrl, testSchemePathWithoutAuthority_paramsAreAllowed) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("s:/o!$%&'()*+,-./09:;=?@AZ_az#fragged", -1, &h));
|
||||
gc(ParseUrl("s:/o!$%&'()*+,-./09:;=?@AZ_az#fragged", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(20, h.path.n);
|
||||
ASSERT_EQ(0, memcmp(h.path.p, "/o!$%&'()*+,-./09:;=", 20));
|
||||
|
@ -303,7 +332,7 @@ TEST(ParseUrl, testSchemePathWithoutAuthority_paramsAreAllowed) {
|
|||
|
||||
TEST(ParseUrl, testOpaquePart_permitsPercentEncoding) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("s:%2Fo!$%&'()*+,-./09:;=?@AZ_az#fragged", -1, &h));
|
||||
gc(ParseUrl("s:%2Fo!$%&'()*+,-./09:;=?@AZ_az#fragged", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(27, h.path.n);
|
||||
ASSERT_EQ(0, memcmp(h.path.p, "/o!$%&'()*+,-./09:;=?@AZ_az", 27));
|
||||
|
@ -314,7 +343,7 @@ TEST(ParseUrl, testOpaquePart_permitsPercentEncoding) {
|
|||
|
||||
TEST(ParseUrl, testTelephone) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("tel:+1-212-867-5309", -1, &h));
|
||||
gc(ParseUrl("tel:+1-212-867-5309", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(15, h.path.n);
|
||||
ASSERT_BINEQ(u"+1-212-867-5309", h.path.p);
|
||||
|
@ -323,7 +352,7 @@ TEST(ParseUrl, testTelephone) {
|
|||
|
||||
TEST(ParseUrl, testLolv6) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("//[::1]:31337", -1, &h));
|
||||
gc(ParseUrl("//[::1]:31337", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(3, h.host.n);
|
||||
ASSERT_BINEQ(u"::1", h.host.p);
|
||||
|
@ -334,14 +363,14 @@ TEST(ParseUrl, testLolv6) {
|
|||
|
||||
TEST(ParseUrl, testLolV6_withoutPort) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("//[::1]", -1, &h));
|
||||
gc(ParseUrl("//[::1]", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_STREQ("//[::1]", gc(EncodeUrl(&h, 0)));
|
||||
}
|
||||
|
||||
TEST(ParseUrl, testLolv7) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("//[vf.::1]", -1, &h));
|
||||
gc(ParseUrl("//[vf.::1]", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(6, h.host.n);
|
||||
ASSERT_BINEQ(u"vf.::1", h.host.p);
|
||||
|
@ -352,14 +381,14 @@ TEST(ParseUrl, testLolv7) {
|
|||
|
||||
TEST(ParseUrl, testLolv7WithoutColon_weCantProduceLegalEncodingSadly) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("//[v7.7.7.7]", -1, &h));
|
||||
gc(ParseUrl("//[v7.7.7.7]", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_STREQ("//v7.7.7.7", gc(EncodeUrl(&h, 0)));
|
||||
}
|
||||
|
||||
TEST(ParseUrl, testObviouslyIllegalIpLiteral_getsTreatedAsRegName) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("//[vf.::1%00]", -1, &h));
|
||||
gc(ParseUrl("//[vf.::1%00]", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_STREQ("//vf.%3A%3A1%00", gc(EncodeUrl(&h, 0)));
|
||||
}
|
||||
|
@ -411,7 +440,7 @@ TEST(EncodeUrl, testHostPortPlacedInHostField_ungoodIdea) {
|
|||
|
||||
TEST(ParseUrl, testUrlWithoutParams) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("a://b@c/d#f", -1, &h));
|
||||
gc(ParseUrl("a://b@c/d#f", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(1, h.scheme.n);
|
||||
ASSERT_EQ('a', h.scheme.p[0]);
|
||||
|
@ -430,7 +459,7 @@ TEST(ParseUrl, testUrlWithoutParams) {
|
|||
TEST(ParseUrl, testLatin1_doesNothing) {
|
||||
struct Url h;
|
||||
const char b[1] = {0377};
|
||||
gc(ParseUrl(b, 1, &h));
|
||||
gc(ParseUrl(b, 1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(1, h.path.n);
|
||||
ASSERT_EQ(0, memcmp("\377", h.path.p, 1));
|
||||
|
@ -440,7 +469,7 @@ TEST(ParseUrl, testLatin1_doesNothing) {
|
|||
TEST(ParseRequestUri, testLatin1_expandsMemoryToUtf8) {
|
||||
struct Url h;
|
||||
const char b[1] = {0377};
|
||||
gc(ParseRequestUri(b, 1, &h));
|
||||
gc(ParseUrl(b, 1, &h, kUrlPlus | kUrlLatin1));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(2, h.path.n);
|
||||
ASSERT_EQ(0, memcmp("\303\277", h.path.p, 2));
|
||||
|
@ -448,7 +477,7 @@ TEST(ParseRequestUri, testLatin1_expandsMemoryToUtf8) {
|
|||
|
||||
TEST(ParseUrl, testPercentShrinkingMemory) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("%Ff", 3, &h));
|
||||
gc(ParseUrl("%Ff", 3, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(1, h.path.n);
|
||||
ASSERT_EQ(0, memcmp("\377", h.path.p, 1));
|
||||
|
@ -458,7 +487,7 @@ TEST(ParseUrl, testPercentShrinkingMemory) {
|
|||
TEST(ParseUrl, testEscapingWontOverrun) {
|
||||
struct Url h;
|
||||
char b[1] = {'%'};
|
||||
gc(ParseUrl(b, 1, &h));
|
||||
gc(ParseUrl(b, 1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(1, h.path.n);
|
||||
ASSERT_EQ(0, memcmp("%", h.path.p, 1));
|
||||
|
@ -467,7 +496,7 @@ TEST(ParseUrl, testEscapingWontOverrun) {
|
|||
|
||||
TEST(ParseUrl, testBadPercent_getsIgnored) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("%FZ", 3, &h));
|
||||
gc(ParseUrl("%FZ", 3, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(3, h.path.n);
|
||||
ASSERT_EQ(0, memcmp("%FZ", h.path.p, 3));
|
||||
|
@ -475,7 +504,7 @@ TEST(ParseUrl, testBadPercent_getsIgnored) {
|
|||
|
||||
TEST(ParseUrl, testFileUrl) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("file:///etc/passwd", -1, &h));
|
||||
gc(ParseUrl("file:///etc/passwd", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(4, h.scheme.n);
|
||||
ASSERT_BINEQ(u"file", h.scheme.p);
|
||||
|
@ -491,7 +520,7 @@ TEST(ParseUrl, testFileUrl) {
|
|||
TEST(EncodeUrl, testModifyingParseResultAndReencoding_addsStructure) {
|
||||
size_t n;
|
||||
struct Url h;
|
||||
gc(ParseUrl("rel", -1, &h));
|
||||
gc(ParseUrl("rel", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
h.host.n = 7;
|
||||
h.host.p = "justine";
|
||||
|
@ -580,14 +609,14 @@ TEST(EncodeUrl, testEmptyRegName_isLegal) {
|
|||
|
||||
TEST(ParseUrl, testEmptyScheme_isNotPossible) {
|
||||
struct Url h;
|
||||
gc(ParseUrl(":", -1, &h));
|
||||
gc(ParseUrl(":", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(0, h.scheme.n);
|
||||
ASSERT_EQ(0, h.scheme.p);
|
||||
ASSERT_EQ(1, h.path.n);
|
||||
ASSERT_EQ(':', h.path.p[0]);
|
||||
ASSERT_STREQ(":", gc(EncodeUrl(&h, 0)));
|
||||
gc(ParseUrl("://hi", -1, &h));
|
||||
gc(ParseUrl("://hi", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(0, h.scheme.n);
|
||||
ASSERT_EQ(0, h.scheme.p);
|
||||
|
@ -598,7 +627,7 @@ TEST(ParseUrl, testEmptyScheme_isNotPossible) {
|
|||
|
||||
TEST(ParseUrl, testDataUri) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("data:image/png;base64,09AZaz+/==", -1, &h));
|
||||
gc(ParseUrl("data:image/png;base64,09AZaz+/==", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(0, h.host.n);
|
||||
ASSERT_EQ(0, h.host.p);
|
||||
|
@ -611,7 +640,7 @@ TEST(ParseUrl, testDataUri) {
|
|||
|
||||
TEST(ParseUrl, testBadSchemeCharacter_parserAssumesItsPath) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("fil\e://hi", -1, &h));
|
||||
gc(ParseUrl("fil\e://hi", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
ASSERT_EQ(0, h.scheme.n);
|
||||
ASSERT_EQ(0, h.scheme.p);
|
||||
|
@ -673,7 +702,7 @@ TEST(ParseRequestUri, fuzz) {
|
|||
for (j = 0; j < sizeof(B); ++j) {
|
||||
B[j] = C[rand() % sizeof(C)];
|
||||
}
|
||||
free(ParseRequestUri(B, 8, &h));
|
||||
free(ParseUrl(B, 8, &h, kUrlPlus | kUrlLatin1));
|
||||
free(h.params.p);
|
||||
}
|
||||
}
|
||||
|
@ -687,11 +716,11 @@ void A(void) {
|
|||
BENCH(ParseUrl, bench) {
|
||||
struct Url h;
|
||||
EZBENCH2("ParseParams hyperion", donothing, A());
|
||||
EZBENCH2("ParseUrl a", donothing, free(ParseUrl("a", -1, &h)));
|
||||
EZBENCH2("ParseUrl a", donothing, free(ParseUrl("a", -1, &h, false)));
|
||||
EZBENCH2("ParseUrl a://b@c/d#f", donothing,
|
||||
free(ParseUrl("a://b@c/d#f", -1, &h)));
|
||||
free(ParseUrl("a://b@c/d#f", -1, &h, false)));
|
||||
EZBENCH2("ParseUrl a://b@c/d?z#f", donothing, ({
|
||||
free(ParseUrl("a://b@c/?zd#f", -1, &h));
|
||||
free(ParseUrl("a://b@c/?zd#f", -1, &h, 0));
|
||||
free(h.params.p);
|
||||
}));
|
||||
EZBENCH2("ParseHost", donothing, free(ParseHost("127.0.0.1:34832", 15, &h)));
|
||||
|
@ -700,14 +729,14 @@ BENCH(ParseUrl, bench) {
|
|||
|
||||
BENCH(EncodeUrl, bench) {
|
||||
struct Url h;
|
||||
gc(ParseUrl("a", -1, &h));
|
||||
gc(ParseUrl("a", -1, &h, 0));
|
||||
EZBENCH2("EncodeUrl a", donothing, free(EncodeUrl(&h, 0)));
|
||||
gc(ParseUrl("a://b@c/d#f", -1, &h));
|
||||
gc(ParseUrl("a://b@c/d#f", -1, &h, 0));
|
||||
EZBENCH2("EncodeUrl a://b@c/d#f", donothing, free(EncodeUrl(&h, 0)));
|
||||
gc(ParseUrl("a://b@c/?zd#f", -1, &h));
|
||||
gc(ParseUrl("a://b@c/?zd#f", -1, &h, 0));
|
||||
gc(h.params.p);
|
||||
EZBENCH2("EncodeUrl a://b@c/d?z#f", donothing, free(EncodeUrl(&h, 0)));
|
||||
gc(ParseUrl(kHyperion, kHyperionSize, &h));
|
||||
gc(ParseUrl(kHyperion, kHyperionSize, &h, 0));
|
||||
gc(h.params.p);
|
||||
EZBENCH2("EncodeUrl hyperion", donothing, free(EncodeUrl(&h, 0)));
|
||||
}
|
||||
|
|
4
third_party/lua/luaparseurl.c
vendored
4
third_party/lua/luaparseurl.c
vendored
|
@ -36,12 +36,14 @@ static void LuaSetUrlView(lua_State *L, struct UrlView *v, const char *k) {
|
|||
}
|
||||
|
||||
int LuaParseUrl(lua_State *L) {
|
||||
int f;
|
||||
void *m;
|
||||
size_t n;
|
||||
struct Url h;
|
||||
const char *p;
|
||||
p = luaL_checklstring(L, 1, &n);
|
||||
m = ParseUrl(p, n, &h);
|
||||
f = luaL_optinteger(L, 2, 0);
|
||||
m = ParseUrl(p, n, &h, f);
|
||||
lua_newtable(L);
|
||||
LuaSetUrlView(L, &h.scheme, "scheme");
|
||||
LuaSetUrlView(L, &h.user, "user");
|
||||
|
|
|
@ -107,7 +107,7 @@ static int LuaFetch(lua_State *L) {
|
|||
/*
|
||||
* Parse URL.
|
||||
*/
|
||||
_gc(ParseUrl(urlarg, urlarglen, &url));
|
||||
_gc(ParseUrl(urlarg, urlarglen, &url, true));
|
||||
_gc(url.params.p);
|
||||
usingssl = false;
|
||||
if (url.scheme.n) {
|
||||
|
|
|
@ -1335,28 +1335,50 @@ FUNCTIONS
|
|||
Converts RFC1123 string that looks like this: Mon, 29 Mar 2021
|
||||
15:37:13 GMT to a UNIX timestamp. See parsehttpdatetime.c.
|
||||
|
||||
ParseUrl(str) → URL
|
||||
Parses URL, returning object having the following fields: scheme,
|
||||
user, pass, host, port, path, params, fragment. This parser is
|
||||
charset agnostic. Percent encoded bytes are decoded for all
|
||||
fields. Returned values might contain things like NUL characters,
|
||||
spaces, control codes, and non-canonical encodings. Absent can be
|
||||
discerned from empty by checking if the pointer is set. There's no
|
||||
failure condition for this routine. This is a permissive parser.
|
||||
This doesn't normalize path segments like `.` or `..` so use
|
||||
IsAcceptablePath() to check for those. No restrictions are imposed
|
||||
beyond that which is strictly necessary for parsing. All the data
|
||||
that is provided will be consumed to the one of the fields. Strict
|
||||
conformance is enforced on some fields more than others, like
|
||||
scheme, since it's the most non-deterministically defined field of
|
||||
them all. Please note this is a URL parser, not a URI parser.
|
||||
Which means we support everything everything the URI spec says we
|
||||
should do except for the things we won't do, like tokenizing path
|
||||
segments into an array and then nesting another array beneath each
|
||||
of those for storing semicolon parameters. So this parser won't
|
||||
make SIP easy. What it can do is parse HTTP URLs and most URIs
|
||||
like data:opaque, better in fact than most things which claim to
|
||||
be URI parsers.
|
||||
ParseUrl(url:str[, flags:int]) → URL
|
||||
|
||||
Parses URL.
|
||||
|
||||
An object containing the following fields is returned:
|
||||
|
||||
- `scheme` is a string, e.g. `"http"`
|
||||
- `user` is the username string, or nil if absent
|
||||
- `pass` is the password string, or nil if absent
|
||||
- `host` is the hostname string, or nil if `url` was a path
|
||||
- `port` is the port string, or nil if absent
|
||||
- `path` is the path string, or nil if absent
|
||||
- `params` is the URL paramaters, e.g. `/?a=b&c` would be
|
||||
represented as the data structure `{{"a", "b"}, {"c"}, ...}`
|
||||
- `fragment` is the stuff after the `#` character
|
||||
|
||||
`flags` may have:
|
||||
|
||||
- `kUrlPlus` to turn `+` into space
|
||||
- `kUrlLatin1` to transcode ISO-8859-1 input into UTF-8
|
||||
|
||||
This parser is charset agnostic. Percent encoded bytes are
|
||||
decoded for all fields. Returned values might contain things
|
||||
like NUL characters, spaces, control codes, and non-canonical
|
||||
encodings. Absent can be discerned from empty by checking if
|
||||
the pointer is set.
|
||||
|
||||
There's no failure condition for this routine. This is a
|
||||
permissive parser. This doesn't normalize path segments like
|
||||
`.` or `..` so use IsAcceptablePath() to check for those. No
|
||||
restrictions are imposed beyond that which is strictly
|
||||
necessary for parsing. All the data that is provided will be
|
||||
consumed to the one of the fields. Strict conformance is
|
||||
enforced on some fields more than others, like scheme, since
|
||||
it's the most non-deterministically defined field of them all.
|
||||
|
||||
Please note this is a URL parser, not a URI parser. Which
|
||||
means we support everything everything the URI spec says we
|
||||
should do except for the things we won't do, like tokenizing
|
||||
path segments into an array and then nesting another array
|
||||
beneath each of those for storing semicolon parameters. So
|
||||
this parser won't make SIP easy. What it can do is parse HTTP
|
||||
URLs and most URIs like data:opaque, better in fact than most
|
||||
things which claim to be URI parsers.
|
||||
|
||||
IsAcceptablePath(str) → bool
|
||||
Returns true if path doesn't contain ".", ".." or "//" segments
|
||||
|
|
|
@ -99,6 +99,7 @@
|
|||
#include "net/http/escape.h"
|
||||
#include "net/http/http.h"
|
||||
#include "net/http/ip.h"
|
||||
#include "net/http/url.h"
|
||||
#include "net/https/https.h"
|
||||
#include "third_party/getopt/getopt.h"
|
||||
#include "third_party/lua/cosmo.h"
|
||||
|
@ -5123,6 +5124,8 @@ static void LuaStart(void) {
|
|||
LuaSetConstant(L, "kLogWarn", kLogWarn);
|
||||
LuaSetConstant(L, "kLogError", kLogError);
|
||||
LuaSetConstant(L, "kLogFatal", kLogFatal);
|
||||
LuaSetConstant(L, "kUrlPlus", kUrlPlus);
|
||||
LuaSetConstant(L, "kUrlLatin1", kUrlLatin1);
|
||||
// create a list of custom content types
|
||||
lua_pushlightuserdata(L, (void *)&ctIdx); // push address as unique key
|
||||
lua_newtable(L);
|
||||
|
@ -5673,8 +5676,8 @@ static char *SynchronizeStream(void) {
|
|||
|
||||
static void ParseRequestParameters(void) {
|
||||
uint32_t ip;
|
||||
FreeLater(ParseRequestUri(inbuf.p + cpm.msg.uri.a,
|
||||
cpm.msg.uri.b - cpm.msg.uri.a, &url));
|
||||
FreeLater(ParseUrl(inbuf.p + cpm.msg.uri.a, cpm.msg.uri.b - cpm.msg.uri.a,
|
||||
&url, kUrlPlus | kUrlLatin1));
|
||||
if (!url.host.p) {
|
||||
if (HasHeader(kHttpXForwardedHost) && //
|
||||
!GetRemoteAddr(&ip, 0) && IsTrustedProxy(ip)) {
|
||||
|
|
|
@ -401,7 +401,7 @@ int main(int argc, char *argv[]) {
|
|||
/*
|
||||
* Parse URL.
|
||||
*/
|
||||
_gc(ParseUrl(urlarg, -1, &url));
|
||||
_gc(ParseUrl(urlarg, -1, &url, kUrlPlus));
|
||||
_gc(url.params.p);
|
||||
usessl = false;
|
||||
if (url.scheme.n) {
|
||||
|
|
Loading…
Reference in a new issue