2020-06-15 14:18:57 +00:00
|
|
|
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
|
|
|
|
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
|
|
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
|
|
|
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
|
|
|
|
│ │
|
2020-12-28 01:18:44 +00:00
|
|
|
|
│ Permission to use, copy, modify, and/or distribute this software for │
|
|
|
|
|
│ any purpose with or without fee is hereby granted, provided that the │
|
|
|
|
|
│ above copyright notice and this permission notice appear in all copies. │
|
2020-06-15 14:18:57 +00:00
|
|
|
|
│ │
|
2020-12-28 01:18:44 +00:00
|
|
|
|
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
|
|
|
|
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
|
|
|
|
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
|
|
|
|
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
|
|
|
|
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
|
|
|
|
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
|
|
|
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
|
|
|
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
2020-06-15 14:18:57 +00:00
|
|
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
2022-06-09 03:01:28 +00:00
|
|
|
|
#include "libc/mem/mem.h"
|
2022-09-13 06:10:38 +00:00
|
|
|
|
#include "libc/mem/gc.internal.h"
|
2020-08-25 11:23:25 +00:00
|
|
|
|
#include "libc/str/str.h"
|
2021-04-21 02:14:21 +00:00
|
|
|
|
#include "libc/testlib/ezbench.h"
|
2020-06-15 14:18:57 +00:00
|
|
|
|
#include "libc/testlib/testlib.h"
|
2020-08-25 11:23:25 +00:00
|
|
|
|
#include "third_party/regex/regex.h"
|
2020-06-15 14:18:57 +00:00
|
|
|
|
|
2020-08-25 11:23:25 +00:00
|
|
|
|
TEST(regex, test) {
|
|
|
|
|
regex_t rx;
|
|
|
|
|
EXPECT_EQ(REG_OK, regcomp(&rx, "^[A-Za-z\x7f-\uffff]{2}$", REG_EXTENDED));
|
|
|
|
|
EXPECT_EQ(REG_OK, regexec(&rx, "AZ", 0, NULL, 0));
|
|
|
|
|
EXPECT_EQ(REG_OK, regexec(&rx, "→→", 0, NULL, 0));
|
|
|
|
|
EXPECT_EQ(REG_NOMATCH, regexec(&rx, "A", 0, NULL, 0));
|
|
|
|
|
EXPECT_EQ(REG_NOMATCH, regexec(&rx, "→", 0, NULL, 0));
|
|
|
|
|
EXPECT_EQ(REG_NOMATCH, regexec(&rx, "0", 0, NULL, 0));
|
|
|
|
|
regfree(&rx);
|
2020-06-15 14:18:57 +00:00
|
|
|
|
}
|
2021-04-21 02:14:21 +00:00
|
|
|
|
|
|
|
|
|
TEST(regex, testDns) {
|
|
|
|
|
regex_t rx;
|
|
|
|
|
EXPECT_EQ(REG_OK, regcomp(&rx, "^[-._0-9A-Za-z]*$", REG_EXTENDED));
|
|
|
|
|
EXPECT_EQ(REG_OK, regexec(&rx, "", 0, NULL, 0));
|
|
|
|
|
EXPECT_EQ(REG_OK, regexec(&rx, "foo.com", 0, NULL, 0));
|
|
|
|
|
EXPECT_EQ(REG_NOMATCH, regexec(&rx, "bar@example", 0, NULL, 0));
|
|
|
|
|
regfree(&rx);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST(regex, testIpBasic) {
|
|
|
|
|
regex_t rx;
|
|
|
|
|
EXPECT_EQ(REG_OK, regcomp(&rx,
|
|
|
|
|
"^"
|
|
|
|
|
"\\([0-9][0-9]*\\)\\."
|
|
|
|
|
"\\([0-9][0-9]*\\)\\."
|
|
|
|
|
"\\([0-9][0-9]*\\)\\."
|
|
|
|
|
"\\([0-9][0-9]*\\)"
|
|
|
|
|
"$",
|
|
|
|
|
0));
|
|
|
|
|
const char *s = "127.0.0.1";
|
|
|
|
|
regmatch_t *m = gc(calloc(rx.re_nsub + 1, sizeof(regmatch_t)));
|
|
|
|
|
ASSERT_EQ(4, rx.re_nsub);
|
|
|
|
|
EXPECT_EQ(REG_OK, regexec(&rx, s, rx.re_nsub + 1, m, 0));
|
|
|
|
|
EXPECT_STREQ("127", gc(strndup(s + m[1].rm_so, m[1].rm_eo - m[1].rm_so)));
|
|
|
|
|
EXPECT_STREQ("0", gc(strndup(s + m[2].rm_so, m[2].rm_eo - m[2].rm_so)));
|
|
|
|
|
EXPECT_STREQ("0", gc(strndup(s + m[3].rm_so, m[3].rm_eo - m[3].rm_so)));
|
|
|
|
|
EXPECT_STREQ("1", gc(strndup(s + m[4].rm_so, m[4].rm_eo - m[4].rm_so)));
|
|
|
|
|
regfree(&rx);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST(regex, testIpExtended) {
|
|
|
|
|
regex_t rx;
|
|
|
|
|
EXPECT_EQ(REG_OK, regcomp(&rx,
|
|
|
|
|
"^"
|
|
|
|
|
"([0-9]{1,3})\\."
|
|
|
|
|
"([0-9]{1,3})\\."
|
|
|
|
|
"([0-9]{1,3})\\."
|
|
|
|
|
"([0-9]{1,3})"
|
|
|
|
|
"$",
|
|
|
|
|
REG_EXTENDED));
|
|
|
|
|
const char *s = "127.0.0.1";
|
|
|
|
|
regmatch_t *m = gc(calloc(rx.re_nsub + 1, sizeof(regmatch_t)));
|
|
|
|
|
ASSERT_EQ(4, rx.re_nsub);
|
|
|
|
|
EXPECT_EQ(REG_OK, regexec(&rx, s, rx.re_nsub + 1, m, 0));
|
|
|
|
|
EXPECT_STREQ("127", gc(strndup(s + m[1].rm_so, m[1].rm_eo - m[1].rm_so)));
|
|
|
|
|
EXPECT_STREQ("0", gc(strndup(s + m[2].rm_so, m[2].rm_eo - m[2].rm_so)));
|
|
|
|
|
EXPECT_STREQ("0", gc(strndup(s + m[3].rm_so, m[3].rm_eo - m[3].rm_so)));
|
|
|
|
|
EXPECT_STREQ("1", gc(strndup(s + m[4].rm_so, m[4].rm_eo - m[4].rm_so)));
|
|
|
|
|
regfree(&rx);
|
|
|
|
|
}
|
|
|
|
|
|
2021-05-05 14:25:39 +00:00
|
|
|
|
TEST(regex, testUnicodeCharacterClass) {
|
|
|
|
|
regex_t rx;
|
|
|
|
|
EXPECT_EQ(REG_OK, regcomp(&rx, "^[[:alpha:]][[:alpha:]]$", 0));
|
|
|
|
|
EXPECT_EQ(REG_OK, regexec(&rx, "𝐵𝑏", 0, 0, 0));
|
|
|
|
|
EXPECT_NE(REG_OK, regexec(&rx, "₀₁", 0, 0, 0));
|
|
|
|
|
regfree(&rx);
|
|
|
|
|
}
|
|
|
|
|
|
2021-04-21 02:14:21 +00:00
|
|
|
|
void A(void) {
|
|
|
|
|
regex_t rx;
|
|
|
|
|
regcomp(&rx, "^[-._0-9A-Za-z]*$", REG_EXTENDED);
|
|
|
|
|
regexec(&rx, "foo.com", 0, NULL, 0);
|
|
|
|
|
regfree(&rx);
|
|
|
|
|
}
|
|
|
|
|
void B(regex_t *rx) {
|
|
|
|
|
regexec(rx, "foo.com", 0, NULL, 0);
|
|
|
|
|
}
|
|
|
|
|
void C(void) {
|
|
|
|
|
regex_t rx;
|
|
|
|
|
regcomp(&rx, "^[-._0-9A-Za-z]*$", 0);
|
|
|
|
|
regexec(&rx, "foo.com", 0, NULL, 0);
|
|
|
|
|
regfree(&rx);
|
|
|
|
|
}
|
|
|
|
|
void D(regex_t *rx, regmatch_t *m) {
|
|
|
|
|
regexec(rx, "127.0.0.1", rx->re_nsub + 1, m, 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
BENCH(regex, bench) {
|
|
|
|
|
regex_t rx;
|
|
|
|
|
regmatch_t *m;
|
|
|
|
|
regcomp(&rx, "^[-._0-9A-Za-z]*$", REG_EXTENDED);
|
|
|
|
|
EZBENCH2("precompiled extended", donothing, B(&rx));
|
|
|
|
|
regfree(&rx);
|
|
|
|
|
EZBENCH2("easy api extended", donothing, A());
|
|
|
|
|
EZBENCH2("easy api basic", donothing, C());
|
|
|
|
|
EXPECT_EQ(REG_OK, regcomp(&rx,
|
|
|
|
|
"^"
|
|
|
|
|
"\\([0-9][0-9]*\\)\\."
|
|
|
|
|
"\\([0-9][0-9]*\\)\\."
|
|
|
|
|
"\\([0-9][0-9]*\\)\\."
|
|
|
|
|
"\\([0-9][0-9]*\\)"
|
|
|
|
|
"$",
|
|
|
|
|
0));
|
|
|
|
|
m = calloc(rx.re_nsub + 1, sizeof(regmatch_t));
|
|
|
|
|
EZBENCH2("precompiled basic match", donothing, D(&rx, m));
|
|
|
|
|
free(m);
|
|
|
|
|
regfree(&rx);
|
|
|
|
|
EXPECT_EQ(REG_OK, regcomp(&rx,
|
|
|
|
|
"^"
|
|
|
|
|
"([0-9]{1,3})\\."
|
|
|
|
|
"([0-9]{1,3})\\."
|
|
|
|
|
"([0-9]{1,3})\\."
|
|
|
|
|
"([0-9]{1,3})"
|
|
|
|
|
"$",
|
|
|
|
|
REG_EXTENDED));
|
|
|
|
|
m = calloc(rx.re_nsub + 1, sizeof(regmatch_t));
|
|
|
|
|
EZBENCH2("precompiled extended match", donothing, D(&rx, m));
|
|
|
|
|
free(m);
|
|
|
|
|
regfree(&rx);
|
|
|
|
|
EXPECT_EQ(REG_OK, regcomp(&rx,
|
|
|
|
|
"^"
|
|
|
|
|
"([0-9]{1,3})\\."
|
|
|
|
|
"([0-9]{1,3})\\."
|
|
|
|
|
"([0-9]{1,3})\\."
|
|
|
|
|
"([0-9]{1,3})"
|
|
|
|
|
"$",
|
|
|
|
|
REG_EXTENDED | REG_NOSUB));
|
|
|
|
|
m = calloc(rx.re_nsub + 1, sizeof(regmatch_t));
|
|
|
|
|
EZBENCH2("precompiled nosub match", donothing, D(&rx, m));
|
|
|
|
|
free(m);
|
|
|
|
|
regfree(&rx);
|
2021-05-14 12:36:58 +00:00
|
|
|
|
EXPECT_EQ(REG_OK, regcomp(&rx, "^[a-z]*$", REG_EXTENDED | REG_NOSUB));
|
|
|
|
|
m = calloc(rx.re_nsub + 1, sizeof(regmatch_t));
|
|
|
|
|
EZBENCH2("precompiled alpha", donothing,
|
|
|
|
|
regexec(&rx, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 0, 0));
|
|
|
|
|
free(m);
|
|
|
|
|
regfree(&rx);
|
|
|
|
|
EXPECT_EQ(REG_OK,
|
|
|
|
|
regcomp(&rx, "^[a-z]*$", REG_EXTENDED | REG_NOSUB | REG_ICASE));
|
|
|
|
|
m = calloc(rx.re_nsub + 1, sizeof(regmatch_t));
|
|
|
|
|
EZBENCH2("precompiled alpha icase", donothing,
|
|
|
|
|
regexec(&rx, "aaaaaaaaaaaaaaaAAAAAAAAAAAAAA", 0, 0, 0));
|
|
|
|
|
free(m);
|
|
|
|
|
regfree(&rx);
|
2021-04-21 02:14:21 +00:00
|
|
|
|
}
|