mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 19:43:32 +00:00
229 lines
10 KiB
C
229 lines
10 KiB
C
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||
│ │
|
||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||
│ any purpose with or without fee is hereby granted, provided that the │
|
||
│ above copyright notice and this permission notice appear in all copies. │
|
||
│ │
|
||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||
#include "third_party/regex/regex.h"
|
||
#include "libc/mem/gc.internal.h"
|
||
#include "libc/mem/mem.h"
|
||
#include "libc/str/str.h"
|
||
#include "libc/testlib/ezbench.h"
|
||
#include "libc/testlib/testlib.h"
|
||
|
||
TEST(regex, test) {
|
||
regex_t rx;
|
||
EXPECT_EQ(REG_OK, regcomp(&rx, "^[A-Za-z\x7f-\uffff]{2}$", REG_EXTENDED));
|
||
EXPECT_EQ(REG_OK, regexec(&rx, "AZ", 0, NULL, 0));
|
||
EXPECT_EQ(REG_OK, regexec(&rx, "→→", 0, NULL, 0));
|
||
EXPECT_EQ(REG_NOMATCH, regexec(&rx, "A", 0, NULL, 0));
|
||
EXPECT_EQ(REG_NOMATCH, regexec(&rx, "→", 0, NULL, 0));
|
||
EXPECT_EQ(REG_NOMATCH, regexec(&rx, "0", 0, NULL, 0));
|
||
regfree(&rx);
|
||
}
|
||
|
||
TEST(regex, testDns) {
|
||
regex_t rx;
|
||
EXPECT_EQ(REG_OK, regcomp(&rx, "^[-._0-9A-Za-z]*$", REG_EXTENDED));
|
||
EXPECT_EQ(REG_OK, regexec(&rx, "", 0, NULL, 0));
|
||
EXPECT_EQ(REG_OK, regexec(&rx, "foo.com", 0, NULL, 0));
|
||
EXPECT_EQ(REG_NOMATCH, regexec(&rx, "bar@example", 0, NULL, 0));
|
||
regfree(&rx);
|
||
}
|
||
|
||
TEST(regex, testIpBasic) {
|
||
regex_t rx;
|
||
EXPECT_EQ(REG_OK, regcomp(&rx,
|
||
"^"
|
||
"\\([0-9][0-9]*\\)\\."
|
||
"\\([0-9][0-9]*\\)\\."
|
||
"\\([0-9][0-9]*\\)\\."
|
||
"\\([0-9][0-9]*\\)"
|
||
"$",
|
||
0));
|
||
const char *s = "127.0.0.1";
|
||
regmatch_t *m = gc(calloc(rx.re_nsub + 1, sizeof(regmatch_t)));
|
||
ASSERT_EQ(4, rx.re_nsub);
|
||
EXPECT_EQ(REG_OK, regexec(&rx, s, rx.re_nsub + 1, m, 0));
|
||
EXPECT_STREQ("127", gc(strndup(s + m[1].rm_so, m[1].rm_eo - m[1].rm_so)));
|
||
EXPECT_STREQ("0", gc(strndup(s + m[2].rm_so, m[2].rm_eo - m[2].rm_so)));
|
||
EXPECT_STREQ("0", gc(strndup(s + m[3].rm_so, m[3].rm_eo - m[3].rm_so)));
|
||
EXPECT_STREQ("1", gc(strndup(s + m[4].rm_so, m[4].rm_eo - m[4].rm_so)));
|
||
regfree(&rx);
|
||
}
|
||
|
||
TEST(regex, testIpExtended) {
|
||
regex_t rx;
|
||
EXPECT_EQ(REG_OK, regcomp(&rx,
|
||
"^"
|
||
"([0-9]{1,3})\\."
|
||
"([0-9]{1,3})\\."
|
||
"([0-9]{1,3})\\."
|
||
"([0-9]{1,3})"
|
||
"$",
|
||
REG_EXTENDED));
|
||
const char *s = "127.0.0.1";
|
||
regmatch_t *m = gc(calloc(rx.re_nsub + 1, sizeof(regmatch_t)));
|
||
ASSERT_EQ(4, rx.re_nsub);
|
||
EXPECT_EQ(REG_OK, regexec(&rx, s, rx.re_nsub + 1, m, 0));
|
||
EXPECT_STREQ("127", gc(strndup(s + m[1].rm_so, m[1].rm_eo - m[1].rm_so)));
|
||
EXPECT_STREQ("0", gc(strndup(s + m[2].rm_so, m[2].rm_eo - m[2].rm_so)));
|
||
EXPECT_STREQ("0", gc(strndup(s + m[3].rm_so, m[3].rm_eo - m[3].rm_so)));
|
||
EXPECT_STREQ("1", gc(strndup(s + m[4].rm_so, m[4].rm_eo - m[4].rm_so)));
|
||
regfree(&rx);
|
||
}
|
||
|
||
TEST(regex, testUnicodeCharacterClass) {
|
||
regex_t rx;
|
||
EXPECT_EQ(REG_OK, regcomp(&rx, "^[[:alpha:]][[:alpha:]]$", 0));
|
||
EXPECT_EQ(REG_OK, regexec(&rx, "𝐵𝑏", 0, 0, 0));
|
||
EXPECT_NE(REG_OK, regexec(&rx, "₀₁", 0, 0, 0));
|
||
regfree(&rx);
|
||
}
|
||
|
||
void A(void) {
|
||
regex_t rx;
|
||
regcomp(&rx, "^[-._0-9A-Za-z]*$", REG_EXTENDED);
|
||
regexec(&rx, "foo.com", 0, NULL, 0);
|
||
regfree(&rx);
|
||
}
|
||
void B(regex_t *rx) {
|
||
regexec(rx, "foo.com", 0, NULL, 0);
|
||
}
|
||
void C(void) {
|
||
regex_t rx;
|
||
regcomp(&rx, "^[-._0-9A-Za-z]*$", 0);
|
||
regexec(&rx, "foo.com", 0, NULL, 0);
|
||
regfree(&rx);
|
||
}
|
||
void D(regex_t *rx, regmatch_t *m) {
|
||
regexec(rx, "127.0.0.1", rx->re_nsub + 1, m, 0);
|
||
}
|
||
|
||
TEST(ape, testPeMachoDd) {
|
||
regex_t rx;
|
||
ASSERT_EQ(REG_OK, regcomp(&rx,
|
||
"bs=" // dd block size arg
|
||
"(['\"] *)?" // #1 optional quote w/ space
|
||
"(\\$\\(\\( *)?" // #2 optional math w/ space
|
||
"([[:digit:]]+)" // #3
|
||
"( *\\)\\))?" // #4 optional math w/ space
|
||
"( *['\"])?" // #5 optional quote w/ space
|
||
" +" //
|
||
"skip=",
|
||
REG_EXTENDED));
|
||
EXPECT_EQ(REG_OK, regexec(&rx, "bs=123 skip=", 0, NULL, 0));
|
||
EXPECT_EQ(REG_OK, regexec(&rx, "bs=\"123\" skip=", 0, NULL, 0));
|
||
EXPECT_EQ(REG_OK, regexec(&rx, "bs=$((123 skip=", 0, NULL, 0));
|
||
EXPECT_EQ(REG_OK, regexec(&rx, "bs=\"$((123 skip=", 0, NULL, 0));
|
||
EXPECT_EQ(REG_NOMATCH, regexec(&rx, "bs= skip=", 0, NULL, 0));
|
||
EXPECT_EQ(REG_NOMATCH, regexec(&rx, "bs= 123 skip=", 0, NULL, 0));
|
||
EXPECT_EQ(REG_NOMATCH, regexec(&rx, "bs= 123skip=", 0, NULL, 0));
|
||
EXPECT_EQ(REG_OK, regexec(&rx, "bs=' 123' skip=", 0, NULL, 0));
|
||
EXPECT_EQ(REG_OK, regexec(&rx, "bs=$(( 123)) skip=", 0, NULL, 0));
|
||
EXPECT_EQ(REG_OK, regexec(&rx, "bs=\"$(( 123 ))\" skip=", 0, NULL, 0));
|
||
regfree(&rx);
|
||
}
|
||
|
||
TEST(ape, testPeMachoDd2) {
|
||
regex_t rx;
|
||
ASSERT_EQ(REG_OK, regcomp(&rx,
|
||
"bs=" // dd block size arg
|
||
"(['\"] *)?" // #1 optional quote w/ space
|
||
"(\\$\\(\\( *)?" // #2 optional math w/ space
|
||
"([[:digit:]]+)" // #3
|
||
"( *\\)\\))?" // #4 optional math w/ space
|
||
"( *['\"])?" // #5 optional quote w/ space
|
||
" +" //
|
||
"skip=" // dd skip arg
|
||
"(['\"] *)?" // #6 optional quote w/ space
|
||
"(\\$\\(\\( *)?" // #7 optional math w/ space
|
||
"([[:digit:]]+)" // #8
|
||
"( *\\)\\))?" // #9 optional math w/ space
|
||
"( *['\"])?" // #10 optional quote w/ space
|
||
" +" //
|
||
"count=" // dd count arg
|
||
"(['\"] *)?" // #11 optional quote w/ space
|
||
"(\\$\\(\\( *)?" // #12 optional math w/ space
|
||
"([[:digit:]]+)", // #13
|
||
REG_EXTENDED));
|
||
ASSERT_EQ(13, rx.re_nsub);
|
||
regmatch_t *m = gc(calloc(rx.re_nsub + 1, sizeof(regmatch_t)));
|
||
const char *s = "dd bs=123 skip=$(( 456)) count='7'";
|
||
EXPECT_EQ(REG_OK, regexec(&rx, s, rx.re_nsub + 1, m, 0));
|
||
EXPECT_STREQ("123", gc(strndup(s + m[3].rm_so, m[3].rm_eo - m[3].rm_so)));
|
||
EXPECT_STREQ("456", gc(strndup(s + m[8].rm_so, m[8].rm_eo - m[8].rm_so)));
|
||
EXPECT_STREQ("7", gc(strndup(s + m[13].rm_so, m[13].rm_eo - m[13].rm_so)));
|
||
regfree(&rx);
|
||
}
|
||
|
||
BENCH(regex, bench) {
|
||
regex_t rx;
|
||
regmatch_t *m;
|
||
regcomp(&rx, "^[-._0-9A-Za-z]*$", REG_EXTENDED);
|
||
EZBENCH2("precompiled extended", donothing, B(&rx));
|
||
regfree(&rx);
|
||
EZBENCH2("easy api extended", donothing, A());
|
||
EZBENCH2("easy api basic", donothing, C());
|
||
EXPECT_EQ(REG_OK, regcomp(&rx,
|
||
"^"
|
||
"\\([0-9][0-9]*\\)\\."
|
||
"\\([0-9][0-9]*\\)\\."
|
||
"\\([0-9][0-9]*\\)\\."
|
||
"\\([0-9][0-9]*\\)"
|
||
"$",
|
||
0));
|
||
m = calloc(rx.re_nsub + 1, sizeof(regmatch_t));
|
||
EZBENCH2("precompiled basic match", donothing, D(&rx, m));
|
||
free(m);
|
||
regfree(&rx);
|
||
EXPECT_EQ(REG_OK, regcomp(&rx,
|
||
"^"
|
||
"([0-9]{1,3})\\."
|
||
"([0-9]{1,3})\\."
|
||
"([0-9]{1,3})\\."
|
||
"([0-9]{1,3})"
|
||
"$",
|
||
REG_EXTENDED));
|
||
m = calloc(rx.re_nsub + 1, sizeof(regmatch_t));
|
||
EZBENCH2("precompiled extended match", donothing, D(&rx, m));
|
||
free(m);
|
||
regfree(&rx);
|
||
EXPECT_EQ(REG_OK, regcomp(&rx,
|
||
"^"
|
||
"([0-9]{1,3})\\."
|
||
"([0-9]{1,3})\\."
|
||
"([0-9]{1,3})\\."
|
||
"([0-9]{1,3})"
|
||
"$",
|
||
REG_EXTENDED | REG_NOSUB));
|
||
m = calloc(rx.re_nsub + 1, sizeof(regmatch_t));
|
||
EZBENCH2("precompiled nosub match", donothing, D(&rx, m));
|
||
free(m);
|
||
regfree(&rx);
|
||
EXPECT_EQ(REG_OK, regcomp(&rx, "^[a-z]*$", REG_EXTENDED | REG_NOSUB));
|
||
m = calloc(rx.re_nsub + 1, sizeof(regmatch_t));
|
||
EZBENCH2("precompiled alpha", donothing,
|
||
regexec(&rx, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 0, 0));
|
||
free(m);
|
||
regfree(&rx);
|
||
EXPECT_EQ(REG_OK,
|
||
regcomp(&rx, "^[a-z]*$", REG_EXTENDED | REG_NOSUB | REG_ICASE));
|
||
m = calloc(rx.re_nsub + 1, sizeof(regmatch_t));
|
||
EZBENCH2("precompiled alpha icase", donothing,
|
||
regexec(&rx, "aaaaaaaaaaaaaaaAAAAAAAAAAAAAA", 0, 0, 0));
|
||
free(m);
|
||
regfree(&rx);
|
||
}
|