/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8                                :vi│
╞══════════════════════════════════════════════════════════════════════════════╡
│ Copyright 2020 Justine Alexandra Roberts Tunney                              │
│                                                                              │
│ Permission to use, copy, modify, and/or distribute this software for         │
│ any purpose with or without fee is hereby granted, provided that the         │
│ above copyright notice and this permission notice appear in all copies.      │
│                                                                              │
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
│ PERFORMANCE OF THIS SOFTWARE.                                                │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "third_party/regex/regex.h"
#include "libc/mem/gc.internal.h"
#include "libc/mem/mem.h"
#include "libc/str/str.h"
#include "libc/testlib/ezbench.h"
#include "libc/testlib/testlib.h"

TEST(regex, test) {
  regex_t rx;
  EXPECT_EQ(REG_OK, regcomp(&rx, "^[A-Za-z\x7f-\uffff]{2}$", REG_EXTENDED));
  EXPECT_EQ(REG_OK, regexec(&rx, "AZ", 0, NULL, 0));
  EXPECT_EQ(REG_OK, regexec(&rx, "→→", 0, NULL, 0));
  EXPECT_EQ(REG_NOMATCH, regexec(&rx, "A", 0, NULL, 0));
  EXPECT_EQ(REG_NOMATCH, regexec(&rx, "→", 0, NULL, 0));
  EXPECT_EQ(REG_NOMATCH, regexec(&rx, "0", 0, NULL, 0));
  regfree(&rx);
}

TEST(regex, testDns) {
  regex_t rx;
  EXPECT_EQ(REG_OK, regcomp(&rx, "^[-._0-9A-Za-z]*$", REG_EXTENDED));
  EXPECT_EQ(REG_OK, regexec(&rx, "", 0, NULL, 0));
  EXPECT_EQ(REG_OK, regexec(&rx, "foo.com", 0, NULL, 0));
  EXPECT_EQ(REG_NOMATCH, regexec(&rx, "bar@example", 0, NULL, 0));
  regfree(&rx);
}

TEST(regex, testIpBasic) {
  regex_t rx;
  EXPECT_EQ(REG_OK, regcomp(&rx,
                            "^"
                            "\\([0-9][0-9]*\\)\\."
                            "\\([0-9][0-9]*\\)\\."
                            "\\([0-9][0-9]*\\)\\."
                            "\\([0-9][0-9]*\\)"
                            "$",
                            0));
  const char *s = "127.0.0.1";
  regmatch_t *m = gc(calloc(rx.re_nsub + 1, sizeof(regmatch_t)));
  ASSERT_EQ(4, rx.re_nsub);
  EXPECT_EQ(REG_OK, regexec(&rx, s, rx.re_nsub + 1, m, 0));
  EXPECT_STREQ("127", gc(strndup(s + m[1].rm_so, m[1].rm_eo - m[1].rm_so)));
  EXPECT_STREQ("0", gc(strndup(s + m[2].rm_so, m[2].rm_eo - m[2].rm_so)));
  EXPECT_STREQ("0", gc(strndup(s + m[3].rm_so, m[3].rm_eo - m[3].rm_so)));
  EXPECT_STREQ("1", gc(strndup(s + m[4].rm_so, m[4].rm_eo - m[4].rm_so)));
  regfree(&rx);
}

TEST(regex, testIpExtended) {
  regex_t rx;
  EXPECT_EQ(REG_OK, regcomp(&rx,
                            "^"
                            "([0-9]{1,3})\\."
                            "([0-9]{1,3})\\."
                            "([0-9]{1,3})\\."
                            "([0-9]{1,3})"
                            "$",
                            REG_EXTENDED));
  const char *s = "127.0.0.1";
  regmatch_t *m = gc(calloc(rx.re_nsub + 1, sizeof(regmatch_t)));
  ASSERT_EQ(4, rx.re_nsub);
  EXPECT_EQ(REG_OK, regexec(&rx, s, rx.re_nsub + 1, m, 0));
  EXPECT_STREQ("127", gc(strndup(s + m[1].rm_so, m[1].rm_eo - m[1].rm_so)));
  EXPECT_STREQ("0", gc(strndup(s + m[2].rm_so, m[2].rm_eo - m[2].rm_so)));
  EXPECT_STREQ("0", gc(strndup(s + m[3].rm_so, m[3].rm_eo - m[3].rm_so)));
  EXPECT_STREQ("1", gc(strndup(s + m[4].rm_so, m[4].rm_eo - m[4].rm_so)));
  regfree(&rx);
}

TEST(regex, testUnicodeCharacterClass) {
  regex_t rx;
  EXPECT_EQ(REG_OK, regcomp(&rx, "^[[:alpha:]][[:alpha:]]$", 0));
  EXPECT_EQ(REG_OK, regexec(&rx, "𝐵𝑏", 0, 0, 0));
  EXPECT_NE(REG_OK, regexec(&rx, "₀₁", 0, 0, 0));
  regfree(&rx);
}

void A(void) {
  regex_t rx;
  regcomp(&rx, "^[-._0-9A-Za-z]*$", REG_EXTENDED);
  regexec(&rx, "foo.com", 0, NULL, 0);
  regfree(&rx);
}
void B(regex_t *rx) {
  regexec(rx, "foo.com", 0, NULL, 0);
}
void C(void) {
  regex_t rx;
  regcomp(&rx, "^[-._0-9A-Za-z]*$", 0);
  regexec(&rx, "foo.com", 0, NULL, 0);
  regfree(&rx);
}
void D(regex_t *rx, regmatch_t *m) {
  regexec(rx, "127.0.0.1", rx->re_nsub + 1, m, 0);
}

TEST(ape, testPeMachoDd) {
  regex_t rx;
  ASSERT_EQ(REG_OK, regcomp(&rx,
                            "bs="             // dd block size arg
                            "(['\"] *)?"      // #1 optional quote w/ space
                            "(\\$\\(\\( *)?"  // #2 optional math w/ space
                            "([[:digit:]]+)"  // #3
                            "( *\\)\\))?"     // #4 optional math w/ space
                            "( *['\"])?"      // #5 optional quote w/ space
                            " +"              //
                            "skip=",
                            REG_EXTENDED));
  EXPECT_EQ(REG_OK, regexec(&rx, "bs=123 skip=", 0, NULL, 0));
  EXPECT_EQ(REG_OK, regexec(&rx, "bs=\"123\" skip=", 0, NULL, 0));
  EXPECT_EQ(REG_OK, regexec(&rx, "bs=$((123 skip=", 0, NULL, 0));
  EXPECT_EQ(REG_OK, regexec(&rx, "bs=\"$((123 skip=", 0, NULL, 0));
  EXPECT_EQ(REG_NOMATCH, regexec(&rx, "bs= skip=", 0, NULL, 0));
  EXPECT_EQ(REG_NOMATCH, regexec(&rx, "bs= 123 skip=", 0, NULL, 0));
  EXPECT_EQ(REG_NOMATCH, regexec(&rx, "bs= 123skip=", 0, NULL, 0));
  EXPECT_EQ(REG_OK, regexec(&rx, "bs=' 123'  skip=", 0, NULL, 0));
  EXPECT_EQ(REG_OK, regexec(&rx, "bs=$(( 123)) skip=", 0, NULL, 0));
  EXPECT_EQ(REG_OK, regexec(&rx, "bs=\"$(( 123 ))\" skip=", 0, NULL, 0));
  regfree(&rx);
}

TEST(ape, testPeMachoDd2) {
  regex_t rx;
  ASSERT_EQ(REG_OK, regcomp(&rx,
                            "bs="              // dd block size arg
                            "(['\"] *)?"       //   #1 optional quote w/ space
                            "(\\$\\(\\( *)?"   //   #2 optional math w/ space
                            "([[:digit:]]+)"   //   #3
                            "( *\\)\\))?"      //   #4 optional math w/ space
                            "( *['\"])?"       //   #5 optional quote w/ space
                            " +"               //
                            "skip="            // dd skip arg
                            "(['\"] *)?"       //   #6 optional quote w/ space
                            "(\\$\\(\\( *)?"   //   #7 optional math w/ space
                            "([[:digit:]]+)"   //   #8
                            "( *\\)\\))?"      //   #9 optional math w/ space
                            "( *['\"])?"       //  #10 optional quote w/ space
                            " +"               //
                            "count="           // dd count arg
                            "(['\"] *)?"       //  #11 optional quote w/ space
                            "(\\$\\(\\( *)?"   //  #12 optional math w/ space
                            "([[:digit:]]+)",  //  #13
                            REG_EXTENDED));
  ASSERT_EQ(13, rx.re_nsub);
  regmatch_t *m = gc(calloc(rx.re_nsub + 1, sizeof(regmatch_t)));
  const char *s = "dd bs=123 skip=$(( 456)) count='7'";
  EXPECT_EQ(REG_OK, regexec(&rx, s, rx.re_nsub + 1, m, 0));
  EXPECT_STREQ("123", gc(strndup(s + m[3].rm_so, m[3].rm_eo - m[3].rm_so)));
  EXPECT_STREQ("456", gc(strndup(s + m[8].rm_so, m[8].rm_eo - m[8].rm_so)));
  EXPECT_STREQ("7", gc(strndup(s + m[13].rm_so, m[13].rm_eo - m[13].rm_so)));
  regfree(&rx);
}

BENCH(regex, bench) {
  regex_t rx;
  regmatch_t *m;
  regcomp(&rx, "^[-._0-9A-Za-z]*$", REG_EXTENDED);
  EZBENCH2("precompiled extended", donothing, B(&rx));
  regfree(&rx);
  EZBENCH2("easy api extended", donothing, A());
  EZBENCH2("easy api basic", donothing, C());
  EXPECT_EQ(REG_OK, regcomp(&rx,
                            "^"
                            "\\([0-9][0-9]*\\)\\."
                            "\\([0-9][0-9]*\\)\\."
                            "\\([0-9][0-9]*\\)\\."
                            "\\([0-9][0-9]*\\)"
                            "$",
                            0));
  m = calloc(rx.re_nsub + 1, sizeof(regmatch_t));
  EZBENCH2("precompiled basic match", donothing, D(&rx, m));
  free(m);
  regfree(&rx);
  EXPECT_EQ(REG_OK, regcomp(&rx,
                            "^"
                            "([0-9]{1,3})\\."
                            "([0-9]{1,3})\\."
                            "([0-9]{1,3})\\."
                            "([0-9]{1,3})"
                            "$",
                            REG_EXTENDED));
  m = calloc(rx.re_nsub + 1, sizeof(regmatch_t));
  EZBENCH2("precompiled extended match", donothing, D(&rx, m));
  free(m);
  regfree(&rx);
  EXPECT_EQ(REG_OK, regcomp(&rx,
                            "^"
                            "([0-9]{1,3})\\."
                            "([0-9]{1,3})\\."
                            "([0-9]{1,3})\\."
                            "([0-9]{1,3})"
                            "$",
                            REG_EXTENDED | REG_NOSUB));
  m = calloc(rx.re_nsub + 1, sizeof(regmatch_t));
  EZBENCH2("precompiled nosub match", donothing, D(&rx, m));
  free(m);
  regfree(&rx);
  EXPECT_EQ(REG_OK, regcomp(&rx, "^[a-z]*$", REG_EXTENDED | REG_NOSUB));
  m = calloc(rx.re_nsub + 1, sizeof(regmatch_t));
  EZBENCH2("precompiled alpha", donothing,
           regexec(&rx, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 0, 0));
  free(m);
  regfree(&rx);
  EXPECT_EQ(REG_OK,
            regcomp(&rx, "^[a-z]*$", REG_EXTENDED | REG_NOSUB | REG_ICASE));
  m = calloc(rx.re_nsub + 1, sizeof(regmatch_t));
  EZBENCH2("precompiled alpha icase", donothing,
           regexec(&rx, "aaaaaaaaaaaaaaaAAAAAAAAAAAAAA", 0, 0, 0));
  free(m);
  regfree(&rx);
}