mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-05-28 08:12:28 +00:00
Initial import
This commit is contained in:
commit
c91b3c5006
14915 changed files with 590219 additions and 0 deletions
294
third_party/regex/glob.c.todo
vendored
Normal file
294
third_party/regex/glob.c.todo
vendored
Normal file
|
@ -0,0 +1,294 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=8 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
/* clang-format off */
|
||||
|
||||
/*
|
||||
Musl Libc
|
||||
Copyright © 2005-2014 Rich Felker, et al.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#define _BSD_SOURCE
|
||||
#include <dirent.h>
|
||||
#include <errno.h>
|
||||
#include <fnmatch.h>
|
||||
#include <glob.h>
|
||||
#include <limits.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
asm(".ident\t\"\\n\\n\
|
||||
Musl Libc » glob (MIT License)\\n\
|
||||
Copyright 2005-2014 Rich Felker\"");
|
||||
|
||||
struct match
|
||||
{
|
||||
struct match *next;
|
||||
char name[];
|
||||
};
|
||||
|
||||
static int append(struct match **tail, const char *name, size_t len, int mark)
|
||||
{
|
||||
struct match *new = malloc(sizeof(struct match) + len + 2);
|
||||
if (!new) return -1;
|
||||
(*tail)->next = new;
|
||||
new->next = NULL;
|
||||
memcpy(new->name, name, len+1);
|
||||
if (mark && len && name[len-1]!='/') {
|
||||
new->name[len] = '/';
|
||||
new->name[len+1] = 0;
|
||||
}
|
||||
*tail = new;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int do_glob(char *buf, size_t pos, int type, char *pat, int flags, int (*errfunc)(const char *path, int err), struct match **tail)
|
||||
{
|
||||
/* If GLOB_MARK is unused, we don't care about type. */
|
||||
if (!type && !(flags & GLOB_MARK)) type = DT_REG;
|
||||
|
||||
/* Special-case the remaining pattern being all slashes, in
|
||||
* which case we can use caller-passed type if it's a dir. */
|
||||
if (*pat && type!=DT_DIR) type = 0;
|
||||
while (pos+1 < PATH_MAX && *pat=='/') buf[pos++] = *pat++;
|
||||
|
||||
/* Consume maximal [escaped-]literal prefix of pattern, copying
|
||||
* and un-escaping it to the running buffer as we go. */
|
||||
ptrdiff_t i=0, j=0;
|
||||
int in_bracket = 0, overflow = 0;
|
||||
for (; pat[i]!='*' && pat[i]!='?' && (!in_bracket || pat[i]!=']'); i++) {
|
||||
if (!pat[i]) {
|
||||
if (overflow) return 0;
|
||||
pat += i;
|
||||
pos += j;
|
||||
i = j = 0;
|
||||
break;
|
||||
} else if (pat[i] == '[') {
|
||||
in_bracket = 1;
|
||||
} else if (pat[i] == '\\' && !(flags & GLOB_NOESCAPE)) {
|
||||
/* Backslashes inside a bracket are (at least by
|
||||
* our interpretation) non-special, so if next
|
||||
* char is ']' we have a complete expression. */
|
||||
if (in_bracket && pat[i+1]==']') break;
|
||||
/* Unpaired final backslash never matches. */
|
||||
if (!pat[i+1]) return 0;
|
||||
i++;
|
||||
}
|
||||
if (pat[i] == '/') {
|
||||
if (overflow) return 0;
|
||||
in_bracket = 0;
|
||||
pat += i+1;
|
||||
i = -1;
|
||||
pos += j+1;
|
||||
j = -1;
|
||||
}
|
||||
/* Only store a character if it fits in the buffer, but if
|
||||
* a potential bracket expression is open, the overflow
|
||||
* must be remembered and handled later only if the bracket
|
||||
* is unterminated (and thereby a literal), so as not to
|
||||
* disallow long bracket expressions with short matches. */
|
||||
if (pos+(j+1) < PATH_MAX) {
|
||||
buf[pos+j++] = pat[i];
|
||||
} else if (in_bracket) {
|
||||
overflow = 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
/* If we consume any new components, the caller-passed type
|
||||
* or dummy type from above is no longer valid. */
|
||||
type = 0;
|
||||
}
|
||||
buf[pos] = 0;
|
||||
if (!*pat) {
|
||||
/* If we consumed any components above, or if GLOB_MARK is
|
||||
* requested and we don't yet know if the match is a dir,
|
||||
* we must call stat to confirm the file exists and/or
|
||||
* determine its type. */
|
||||
struct stat st;
|
||||
if ((flags & GLOB_MARK) && type==DT_LNK) type = 0;
|
||||
if (!type && stat(buf, &st)) {
|
||||
if (errno!=ENOENT && (errfunc(buf, errno) || (flags & GLOB_ERR)))
|
||||
return GLOB_ABORTED;
|
||||
return 0;
|
||||
}
|
||||
if (!type && S_ISDIR(st.st_mode)) type = DT_DIR;
|
||||
if (append(tail, buf, pos, (flags & GLOB_MARK) && type==DT_DIR))
|
||||
return GLOB_NOSPACE;
|
||||
return 0;
|
||||
}
|
||||
char *p2 = strchr(pat, '/'), saved_sep = '/';
|
||||
/* Check if the '/' was escaped and, if so, remove the escape char
|
||||
* so that it will not be unpaired when passed to fnmatch. */
|
||||
if (p2 && !(flags & GLOB_NOESCAPE)) {
|
||||
char *p;
|
||||
for (p=p2; p>pat && p[-1]=='\\'; p--);
|
||||
if ((p2-p)%2) {
|
||||
p2--;
|
||||
saved_sep = '\\';
|
||||
}
|
||||
}
|
||||
DIR *dir = opendir(pos ? buf : ".");
|
||||
if (!dir) {
|
||||
if (errfunc(buf, errno) || (flags & GLOB_ERR))
|
||||
return GLOB_ABORTED;
|
||||
return 0;
|
||||
}
|
||||
int old_errno = errno;
|
||||
struct dirent *de;
|
||||
while (errno=0, de=readdir(dir)) {
|
||||
/* Quickly skip non-directories when there's pattern left. */
|
||||
if (p2 && de->d_type && de->d_type!=DT_DIR && de->d_type!=DT_LNK)
|
||||
continue;
|
||||
|
||||
size_t l = strlen(de->d_name);
|
||||
if (l >= PATH_MAX-pos) continue;
|
||||
|
||||
if (p2) *p2 = 0;
|
||||
|
||||
int fnm_flags= ((flags & GLOB_NOESCAPE) ? FNM_NOESCAPE : 0)
|
||||
| ((!(flags & GLOB_PERIOD)) ? FNM_PERIOD : 0);
|
||||
|
||||
if (fnmatch(pat, de->d_name, fnm_flags))
|
||||
continue;
|
||||
|
||||
/* With GLOB_PERIOD, don't allow matching . or .. unless
|
||||
* fnmatch would match them with FNM_PERIOD rules in effect. */
|
||||
if (p2 && (flags & GLOB_PERIOD) && de->d_name[0]=='.'
|
||||
&& (!de->d_name[1] || de->d_name[1]=='.' && !de->d_name[2])
|
||||
&& fnmatch(pat, de->d_name, fnm_flags | FNM_PERIOD))
|
||||
continue;
|
||||
|
||||
memcpy(buf+pos, de->d_name, l+1);
|
||||
if (p2) *p2 = saved_sep;
|
||||
int r = do_glob(buf, pos+l, de->d_type, p2 ? p2 : "", flags, errfunc, tail);
|
||||
if (r) {
|
||||
closedir(dir);
|
||||
return r;
|
||||
}
|
||||
}
|
||||
int readerr = errno;
|
||||
if (p2) *p2 = saved_sep;
|
||||
closedir(dir);
|
||||
if (readerr && (errfunc(buf, errno) || (flags & GLOB_ERR)))
|
||||
return GLOB_ABORTED;
|
||||
errno = old_errno;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ignore_err(const char *path, int err)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void freelist(struct match *head)
|
||||
{
|
||||
struct match *match, *next;
|
||||
for (match=head->next; match; match=next) {
|
||||
next = match->next;
|
||||
free(match);
|
||||
}
|
||||
}
|
||||
|
||||
static int sort(const void *a, const void *b)
|
||||
{
|
||||
return strcmp(*(const char **)a, *(const char **)b);
|
||||
}
|
||||
|
||||
int glob(const char *restrict pat, int flags, int (*errfunc)(const char *path, int err), glob_t *restrict g)
|
||||
{
|
||||
struct match head = { .next = NULL }, *tail = &head;
|
||||
size_t cnt, i;
|
||||
size_t offs = (flags & GLOB_DOOFFS) ? g->gl_offs : 0;
|
||||
int error = 0;
|
||||
char buf[PATH_MAX];
|
||||
|
||||
if (!errfunc) errfunc = ignore_err;
|
||||
|
||||
if (!(flags & GLOB_APPEND)) {
|
||||
g->gl_offs = offs;
|
||||
g->gl_pathc = 0;
|
||||
g->gl_pathv = NULL;
|
||||
}
|
||||
|
||||
if (*pat) {
|
||||
char *p = strdup(pat);
|
||||
if (!p) return GLOB_NOSPACE;
|
||||
buf[0] = 0;
|
||||
error = do_glob(buf, 0, 0, p, flags, errfunc, &tail);
|
||||
free(p);
|
||||
}
|
||||
|
||||
if (error == GLOB_NOSPACE) {
|
||||
freelist(&head);
|
||||
return error;
|
||||
}
|
||||
|
||||
for (cnt=0, tail=head.next; tail; tail=tail->next, cnt++);
|
||||
if (!cnt) {
|
||||
if (flags & GLOB_NOCHECK) {
|
||||
tail = &head;
|
||||
if (append(&tail, pat, strlen(pat), 0))
|
||||
return GLOB_NOSPACE;
|
||||
cnt++;
|
||||
} else
|
||||
return GLOB_NOMATCH;
|
||||
}
|
||||
|
||||
if (flags & GLOB_APPEND) {
|
||||
char **pathv = realloc(g->gl_pathv, (offs + g->gl_pathc + cnt + 1) * sizeof(char *));
|
||||
if (!pathv) {
|
||||
freelist(&head);
|
||||
return GLOB_NOSPACE;
|
||||
}
|
||||
g->gl_pathv = pathv;
|
||||
offs += g->gl_pathc;
|
||||
} else {
|
||||
g->gl_pathv = malloc((offs + cnt + 1) * sizeof(char *));
|
||||
if (!g->gl_pathv) {
|
||||
freelist(&head);
|
||||
return GLOB_NOSPACE;
|
||||
}
|
||||
for (i=0; i<offs; i++)
|
||||
g->gl_pathv[i] = NULL;
|
||||
}
|
||||
for (i=0, tail=head.next; i<cnt; tail=tail->next, i++)
|
||||
g->gl_pathv[offs + i] = tail->name;
|
||||
g->gl_pathv[offs + i] = NULL;
|
||||
g->gl_pathc += cnt;
|
||||
|
||||
if (!(flags & GLOB_NOSORT))
|
||||
qsort(g->gl_pathv+offs, cnt, sizeof(char *), sort);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
void globfree(glob_t *g)
|
||||
{
|
||||
size_t i;
|
||||
for (i=0; i<g->gl_pathc; i++)
|
||||
free(g->gl_pathv[g->gl_offs + i] - offsetof(struct match, name));
|
||||
free(g->gl_pathv);
|
||||
g->gl_pathc = 0;
|
||||
g->gl_pathv = NULL;
|
||||
}
|
10
third_party/regex/notice.inc
vendored
Normal file
10
third_party/regex/notice.inc
vendored
Normal file
|
@ -0,0 +1,10 @@
|
|||
asm(".ident\t\"\\n\\n\
|
||||
Musl Libc (MIT License)\\n\
|
||||
Copyright 2005-2014 Rich Felker\"");
|
||||
asm(".include \"libc/disclaimer.inc\"");
|
||||
|
||||
asm(".ident\t\"\\n\\n\
|
||||
TRE regex (BSD-2 License)\\n\
|
||||
Copyright 2001-2009 Ville Laurikari <vl@iki.fi>\\n\
|
||||
Copyright 2016 Szabolcs Nagy\"");
|
||||
asm(".include \"libc/disclaimer.inc\"");
|
2595
third_party/regex/regcomp.c
vendored
Normal file
2595
third_party/regex/regcomp.c
vendored
Normal file
File diff suppressed because it is too large
Load diff
61
third_party/regex/regerror.c
vendored
Normal file
61
third_party/regex/regerror.c
vendored
Normal file
|
@ -0,0 +1,61 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=8 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||
│ │
|
||||
│ Musl Libc │
|
||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
||||
│ │
|
||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||
│ a copy of this software and associated documentation files (the │
|
||||
│ "Software"), to deal in the Software without restriction, including │
|
||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
||||
│ the following conditions: │
|
||||
│ │
|
||||
│ The above copyright notice and this permission notice shall be │
|
||||
│ included in all copies or substantial portions of the Software. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||
│ │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/fmt/fmt.h"
|
||||
#include "third_party/regex/tre.inc"
|
||||
|
||||
/* Error message strings for error codes listed in `regex.h'. This list
|
||||
needs to be in sync with the codes listed there, naturally. */
|
||||
|
||||
/* Converted to single string by Rich Felker to remove the need for
|
||||
* data relocations at runtime, 27 Feb 2006. */
|
||||
|
||||
static const char messages[] = {
|
||||
"No error\0"
|
||||
"No match\0"
|
||||
"Invalid regexp\0"
|
||||
"Unknown collating element\0"
|
||||
"Unknown character class name\0"
|
||||
"Trailing backslash\0"
|
||||
"Invalid back reference\0"
|
||||
"Missing ']'\0"
|
||||
"Missing ')'\0"
|
||||
"Missing '}'\0"
|
||||
"Invalid contents of {}\0"
|
||||
"Invalid character range\0"
|
||||
"Out of memory\0"
|
||||
"Repetition not preceded by valid expression\0"
|
||||
"\0Unknown error"};
|
||||
|
||||
size_t regerror(int e, const regex_t *restrict preg, char *restrict buf,
|
||||
size_t size) {
|
||||
const char *s;
|
||||
for (s = messages; e && *s; e--, s += strlen(s) + 1)
|
||||
;
|
||||
if (!*s) s++;
|
||||
return 1 + snprintf(buf, size, "%s", s);
|
||||
}
|
60
third_party/regex/regex.h
vendored
Normal file
60
third_party/regex/regex.h
vendored
Normal file
|
@ -0,0 +1,60 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_REGEX_REGEX_H_
|
||||
#define COSMOPOLITAN_LIBC_REGEX_REGEX_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
#if 0
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § regular expressions ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
#endif
|
||||
|
||||
#define REG_EXTENDED 1
|
||||
#define REG_ICASE 2
|
||||
#define REG_NEWLINE 4
|
||||
#define REG_NOSUB 8
|
||||
|
||||
#define REG_NOTBOL 1
|
||||
#define REG_NOTEOL 2
|
||||
|
||||
#define REG_OK 0
|
||||
#define REG_NOMATCH 1
|
||||
#define REG_BADPAT 2
|
||||
#define REG_ECOLLATE 3
|
||||
#define REG_ECTYPE 4
|
||||
#define REG_EESCAPE 5
|
||||
#define REG_ESUBREG 6
|
||||
#define REG_EBRACK 7
|
||||
#define REG_EPAREN 8
|
||||
#define REG_EBRACE 9
|
||||
#define REG_BADBR 10
|
||||
#define REG_ERANGE 11
|
||||
#define REG_ESPACE 12
|
||||
#define REG_BADRPT 13
|
||||
|
||||
#define REG_ENOSYS -1
|
||||
|
||||
typedef long regoff_t;
|
||||
|
||||
typedef struct re_pattern_buffer {
|
||||
size_t re_nsub;
|
||||
void *__opaque, *__padding[4];
|
||||
size_t __nsub2;
|
||||
char __padding2;
|
||||
} regex_t;
|
||||
|
||||
typedef struct {
|
||||
regoff_t rm_so;
|
||||
regoff_t rm_eo;
|
||||
} regmatch_t;
|
||||
|
||||
int regcomp(regex_t *preg, const char *regex, int flags);
|
||||
int regexec(const regex_t *preg, const char *string, size_t nmatch,
|
||||
regmatch_t pmatch[], int eflags);
|
||||
void regfree(regex_t *preg);
|
||||
size_t regerror(int errcode, const regex_t *preg, char *errbuf,
|
||||
size_t errbufsize);
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_REGEX_REGEX_H_ */
|
56
third_party/regex/regex.mk
vendored
Normal file
56
third_party/regex/regex.mk
vendored
Normal file
|
@ -0,0 +1,56 @@
|
|||
#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐
|
||||
#───vi: set et ft=make ts=8 tw=8 fenc=utf-8 :vi───────────────────────┘
|
||||
|
||||
PKGS += THIRD_PARTY_REGEX
|
||||
|
||||
THIRD_PARTY_REGEX_ARTIFACTS += THIRD_PARTY_REGEX_A
|
||||
THIRD_PARTY_REGEX = $(THIRD_PARTY_REGEX_A_DEPS) $(THIRD_PARTY_REGEX_A)
|
||||
THIRD_PARTY_REGEX_A = o/$(MODE)/third_party/regex/regex.a
|
||||
THIRD_PARTY_REGEX_A_FILES := $(wildcard third_party/regex/*)
|
||||
THIRD_PARTY_REGEX_A_HDRS = $(filter %.h,$(THIRD_PARTY_REGEX_A_FILES))
|
||||
THIRD_PARTY_REGEX_A_SRCS = $(filter %.c,$(THIRD_PARTY_REGEX_A_FILES))
|
||||
|
||||
THIRD_PARTY_REGEX_A_OBJS = \
|
||||
$(THIRD_PARTY_REGEX_A_SRCS:%=o/$(MODE)/%.zip.o) \
|
||||
$(THIRD_PARTY_REGEX_A_SRCS:%.c=o/$(MODE)/%.o)
|
||||
|
||||
THIRD_PARTY_REGEX_A_DIRECTDEPS = \
|
||||
LIBC_ALG \
|
||||
LIBC_FMT \
|
||||
LIBC_MEM \
|
||||
LIBC_NEXGEN32E \
|
||||
LIBC_STR \
|
||||
LIBC_STUBS
|
||||
|
||||
THIRD_PARTY_REGEX_A_DEPS := \
|
||||
$(call uniq,$(foreach x,$(THIRD_PARTY_REGEX_A_DIRECTDEPS),$($(x))))
|
||||
|
||||
THIRD_PARTY_REGEX_A_CHECKS = \
|
||||
$(THIRD_PARTY_REGEX_A).pkg \
|
||||
$(THIRD_PARTY_REGEX_A_HDRS:%=o/$(MODE)/%.ok)
|
||||
|
||||
$(THIRD_PARTY_REGEX_A): \
|
||||
third_party/regex/ \
|
||||
$(THIRD_PARTY_REGEX_A).pkg \
|
||||
$(THIRD_PARTY_REGEX_A_OBJS)
|
||||
|
||||
$(THIRD_PARTY_REGEX_A).pkg: \
|
||||
$(THIRD_PARTY_REGEX_A_OBJS) \
|
||||
$(foreach x,$(THIRD_PARTY_REGEX_A_DIRECTDEPS),$($(x)_A).pkg)
|
||||
|
||||
THIRD_PARTY_REGEX_LIBS = $(foreach x,$(THIRD_PARTY_REGEX_ARTIFACTS),$($(x)))
|
||||
THIRD_PARTY_REGEX_SRCS = $(foreach x,$(THIRD_PARTY_REGEX_ARTIFACTS),$($(x)_SRCS))
|
||||
THIRD_PARTY_REGEX_CHECKS = $(foreach x,$(THIRD_PARTY_REGEX_ARTIFACTS),$($(x)_CHECKS))
|
||||
THIRD_PARTY_REGEX_OBJS = $(foreach x,$(THIRD_PARTY_REGEX_ARTIFACTS),$($(x)_OBJS))
|
||||
|
||||
$(THIRD_PARTY_REGEX_OBJS): $(BUILD_FILES) third_party/regex/regex.mk
|
||||
|
||||
o/$(MODE)/third_party/regex/regcomp.o \
|
||||
o/$(MODE)/third_party/regex/regerror.o \
|
||||
o/$(MODE)/third_party/regex/regexec.o \
|
||||
o/$(MODE)/third_party/regex/tre-mem.o: \
|
||||
OVERRIDE_COPTS += \
|
||||
$(OLD_CODE)
|
||||
|
||||
.PHONY: o/$(MODE)/third_party/regex
|
||||
o/$(MODE)/third_party/regex: $(THIRD_PARTY_REGEX_CHECKS)
|
898
third_party/regex/regexec.c
vendored
Normal file
898
third_party/regex/regexec.c
vendored
Normal file
|
@ -0,0 +1,898 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||
│ │
|
||||
│ regexec.c - TRE POSIX compatible matching functions (and more). │
|
||||
│ │
|
||||
│ Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi> │
|
||||
│ All rights reserved. │
|
||||
│ │
|
||||
│ Redistribution and use in source and binary forms, with or without │
|
||||
│ modification, are permitted provided that the following conditions │
|
||||
│ are met: │
|
||||
│ │
|
||||
│ 1. Redistributions of source code must retain the above copyright │
|
||||
│ notice, this list of conditions and the following disclaimer. │
|
||||
│ │
|
||||
│ 2. Redistributions in binary form must reproduce the above copyright │
|
||||
│ notice, this list of conditions and the following disclaimer in │
|
||||
│ the documentation and/or other materials provided with the │
|
||||
│ distribution. │
|
||||
│ │
|
||||
│ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS │
|
||||
│ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT │
|
||||
│ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR │
|
||||
│ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT │
|
||||
│ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, │
|
||||
│ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT │
|
||||
│ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, │
|
||||
│ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY │
|
||||
│ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT │
|
||||
│ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE │
|
||||
│ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. │
|
||||
│ │
|
||||
│──────────────────────────────────────────────────────────────────────────────│
|
||||
│ │
|
||||
│ Musl Libc │
|
||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
||||
│ │
|
||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||
│ a copy of this software and associated documentation files (the │
|
||||
│ "Software"), to deal in the Software without restriction, including │
|
||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
||||
│ the following conditions: │
|
||||
│ │
|
||||
│ The above copyright notice and this permission notice shall be │
|
||||
│ included in all copies or substantial portions of the Software. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||
│ │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/limits.h"
|
||||
#include "third_party/regex/tre.inc"
|
||||
|
||||
static void tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
|
||||
const tre_tnfa_t *tnfa, regoff_t *tags,
|
||||
regoff_t match_eo);
|
||||
|
||||
/***********************************************************************
|
||||
from tre-match-utils.h
|
||||
***********************************************************************/
|
||||
|
||||
#define GET_NEXT_WCHAR() \
|
||||
do { \
|
||||
prev_c = next_c; \
|
||||
pos += pos_add_next; \
|
||||
if ((pos_add_next = mbtowc(&next_c, str_byte, MB_LEN_MAX)) <= 0) { \
|
||||
if (pos_add_next < 0) { \
|
||||
ret = REG_NOMATCH; \
|
||||
goto error_exit; \
|
||||
} else \
|
||||
pos_add_next++; \
|
||||
} \
|
||||
str_byte += pos_add_next; \
|
||||
} while (0)
|
||||
|
||||
#define IS_WORD_CHAR(c) ((c) == L'_' || tre_isalnum(c))
|
||||
|
||||
#define CHECK_ASSERTIONS(assertions) \
|
||||
(((assertions & ASSERT_AT_BOL) && (pos > 0 || reg_notbol) && \
|
||||
(prev_c != L'\n' || !reg_newline)) || \
|
||||
((assertions & ASSERT_AT_EOL) && (next_c != L'\0' || reg_noteol) && \
|
||||
(next_c != L'\n' || !reg_newline)) || \
|
||||
((assertions & ASSERT_AT_BOW) && \
|
||||
(IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c))) || \
|
||||
((assertions & ASSERT_AT_EOW) && \
|
||||
(!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c))) || \
|
||||
((assertions & ASSERT_AT_WB) && \
|
||||
(pos != 0 && next_c != L'\0' && \
|
||||
IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c))) || \
|
||||
((assertions & ASSERT_AT_WB_NEG) && \
|
||||
(pos == 0 || next_c == L'\0' || \
|
||||
IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))
|
||||
|
||||
#define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags) \
|
||||
(((trans_i->assertions & ASSERT_CHAR_CLASS) && \
|
||||
!(tnfa->cflags & REG_ICASE) && \
|
||||
!tre_isctype((tre_cint_t)prev_c, trans_i->u.class)) || \
|
||||
((trans_i->assertions & ASSERT_CHAR_CLASS) && (tnfa->cflags & REG_ICASE) && \
|
||||
!tre_isctype(tre_tolower((tre_cint_t)prev_c), trans_i->u.class) && \
|
||||
!tre_isctype(tre_toupper((tre_cint_t)prev_c), trans_i->u.class)) || \
|
||||
((trans_i->assertions & ASSERT_CHAR_CLASS_NEG) && \
|
||||
tre_neg_char_classes_match(trans_i->neg_classes, (tre_cint_t)prev_c, \
|
||||
tnfa->cflags & REG_ICASE)))
|
||||
|
||||
/* Returns 1 if `t1' wins `t2', 0 otherwise. */
|
||||
static int tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions,
|
||||
regoff_t *t1, regoff_t *t2) {
|
||||
int i;
|
||||
for (i = 0; i < num_tags; i++) {
|
||||
if (tag_directions[i] == TRE_TAG_MINIMIZE) {
|
||||
if (t1[i] < t2[i]) return 1;
|
||||
if (t1[i] > t2[i]) return 0;
|
||||
} else {
|
||||
if (t1[i] > t2[i]) return 1;
|
||||
if (t1[i] < t2[i]) return 0;
|
||||
}
|
||||
}
|
||||
/* assert(0);*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc,
|
||||
int icase) {
|
||||
while (*classes != (tre_ctype_t)0)
|
||||
if ((!icase && tre_isctype(wc, *classes)) ||
|
||||
(icase && (tre_isctype(tre_toupper(wc), *classes) ||
|
||||
tre_isctype(tre_tolower(wc), *classes))))
|
||||
return 1; /* Match. */
|
||||
else
|
||||
classes++;
|
||||
return 0; /* No match. */
|
||||
}
|
||||
|
||||
/***********************************************************************
|
||||
from tre-match-parallel.c
|
||||
***********************************************************************/
|
||||
|
||||
/*
|
||||
This algorithm searches for matches basically by reading characters
|
||||
in the searched string one by one, starting at the beginning. All
|
||||
matching paths in the TNFA are traversed in parallel. When two or
|
||||
more paths reach the same state, exactly one is chosen according to
|
||||
tag ordering rules; if returning submatches is not required it does
|
||||
not matter which path is chosen.
|
||||
|
||||
The worst case time required for finding the leftmost and longest
|
||||
match, or determining that there is no match, is always linearly
|
||||
dependent on the length of the text being searched.
|
||||
|
||||
This algorithm cannot handle TNFAs with back referencing nodes.
|
||||
See `tre-match-backtrack.c'.
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
tre_tnfa_transition_t *state;
|
||||
regoff_t *tags;
|
||||
} tre_tnfa_reach_t;
|
||||
|
||||
typedef struct {
|
||||
regoff_t pos;
|
||||
regoff_t **tags;
|
||||
} tre_reach_pos_t;
|
||||
|
||||
static reg_errcode_t tre_tnfa_run_parallel(const tre_tnfa_t *tnfa,
|
||||
const void *string,
|
||||
regoff_t *match_tags, int eflags,
|
||||
regoff_t *match_end_ofs) {
|
||||
/* State variables required by GET_NEXT_WCHAR. */
|
||||
tre_char_t prev_c = 0, next_c = 0;
|
||||
const char *str_byte = string;
|
||||
regoff_t pos = -1;
|
||||
regoff_t pos_add_next = 1;
|
||||
#ifdef TRE_MBSTATE
|
||||
mbstate_t mbstate;
|
||||
#endif /* TRE_MBSTATE */
|
||||
int reg_notbol = eflags & REG_NOTBOL;
|
||||
int reg_noteol = eflags & REG_NOTEOL;
|
||||
int reg_newline = tnfa->cflags & REG_NEWLINE;
|
||||
reg_errcode_t ret;
|
||||
|
||||
char *buf;
|
||||
tre_tnfa_transition_t *trans_i;
|
||||
tre_tnfa_reach_t *reach, *reach_next, *reach_i, *reach_next_i;
|
||||
tre_reach_pos_t *reach_pos;
|
||||
int *tag_i;
|
||||
int num_tags, i;
|
||||
|
||||
regoff_t match_eo = -1; /* end offset of match (-1 if no match found yet) */
|
||||
int new_match = 0;
|
||||
regoff_t *tmp_tags = NULL;
|
||||
regoff_t *tmp_iptr;
|
||||
|
||||
#ifdef TRE_MBSTATE
|
||||
memset(&mbstate, '\0', sizeof(mbstate));
|
||||
#endif /* TRE_MBSTATE */
|
||||
|
||||
if (!match_tags)
|
||||
num_tags = 0;
|
||||
else
|
||||
num_tags = tnfa->num_tags;
|
||||
|
||||
/* Allocate memory for temporary data required for matching. This needs to
|
||||
be done for every matching operation to be thread safe. This allocates
|
||||
everything in a single large block with calloc(). */
|
||||
{
|
||||
size_t tbytes, rbytes, pbytes, xbytes, total_bytes;
|
||||
char *tmp_buf;
|
||||
|
||||
/* Ensure that tbytes and xbytes*num_states cannot overflow, and that
|
||||
* they don't contribute more than 1/8 of SIZE_MAX to total_bytes. */
|
||||
if (num_tags > SIZE_MAX / (8 * sizeof(regoff_t) * tnfa->num_states))
|
||||
return REG_ESPACE;
|
||||
|
||||
/* Likewise check rbytes. */
|
||||
if (tnfa->num_states + 1 > SIZE_MAX / (8 * sizeof(*reach_next)))
|
||||
return REG_ESPACE;
|
||||
|
||||
/* Likewise check pbytes. */
|
||||
if (tnfa->num_states > SIZE_MAX / (8 * sizeof(*reach_pos)))
|
||||
return REG_ESPACE;
|
||||
|
||||
/* Compute the length of the block we need. */
|
||||
tbytes = sizeof(*tmp_tags) * num_tags;
|
||||
rbytes = sizeof(*reach_next) * (tnfa->num_states + 1);
|
||||
pbytes = sizeof(*reach_pos) * tnfa->num_states;
|
||||
xbytes = sizeof(regoff_t) * num_tags;
|
||||
total_bytes = (sizeof(long) - 1) * 4 /* for alignment paddings */
|
||||
+ (rbytes + xbytes * tnfa->num_states) * 2 + tbytes + pbytes;
|
||||
|
||||
/* Allocate the memory. */
|
||||
buf = calloc(total_bytes, 1);
|
||||
if (buf == NULL) return REG_ESPACE;
|
||||
|
||||
/* Get the various pointers within tmp_buf (properly aligned). */
|
||||
tmp_tags = (void *)buf;
|
||||
tmp_buf = buf + tbytes;
|
||||
tmp_buf += ALIGN(tmp_buf, long);
|
||||
reach_next = (void *)tmp_buf;
|
||||
tmp_buf += rbytes;
|
||||
tmp_buf += ALIGN(tmp_buf, long);
|
||||
reach = (void *)tmp_buf;
|
||||
tmp_buf += rbytes;
|
||||
tmp_buf += ALIGN(tmp_buf, long);
|
||||
reach_pos = (void *)tmp_buf;
|
||||
tmp_buf += pbytes;
|
||||
tmp_buf += ALIGN(tmp_buf, long);
|
||||
for (i = 0; i < tnfa->num_states; i++) {
|
||||
reach[i].tags = (void *)tmp_buf;
|
||||
tmp_buf += xbytes;
|
||||
reach_next[i].tags = (void *)tmp_buf;
|
||||
tmp_buf += xbytes;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < tnfa->num_states; i++) reach_pos[i].pos = -1;
|
||||
|
||||
GET_NEXT_WCHAR();
|
||||
pos = 0;
|
||||
|
||||
reach_next_i = reach_next;
|
||||
while (1) {
|
||||
/* If no match found yet, add the initial states to `reach_next'. */
|
||||
if (match_eo < 0) {
|
||||
trans_i = tnfa->initial;
|
||||
while (trans_i->state != NULL) {
|
||||
if (reach_pos[trans_i->state_id].pos < pos) {
|
||||
if (trans_i->assertions && CHECK_ASSERTIONS(trans_i->assertions)) {
|
||||
trans_i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
reach_next_i->state = trans_i->state;
|
||||
for (i = 0; i < num_tags; i++) reach_next_i->tags[i] = -1;
|
||||
tag_i = trans_i->tags;
|
||||
if (tag_i)
|
||||
while (*tag_i >= 0) {
|
||||
if (*tag_i < num_tags) reach_next_i->tags[*tag_i] = pos;
|
||||
tag_i++;
|
||||
}
|
||||
if (reach_next_i->state == tnfa->final) {
|
||||
match_eo = pos;
|
||||
new_match = 1;
|
||||
for (i = 0; i < num_tags; i++)
|
||||
match_tags[i] = reach_next_i->tags[i];
|
||||
}
|
||||
reach_pos[trans_i->state_id].pos = pos;
|
||||
reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
|
||||
reach_next_i++;
|
||||
}
|
||||
trans_i++;
|
||||
}
|
||||
reach_next_i->state = NULL;
|
||||
} else {
|
||||
if (num_tags == 0 || reach_next_i == reach_next)
|
||||
/* We have found a match. */
|
||||
break;
|
||||
}
|
||||
|
||||
/* Check for end of string. */
|
||||
if (!next_c) break;
|
||||
|
||||
GET_NEXT_WCHAR();
|
||||
|
||||
/* Swap `reach' and `reach_next'. */
|
||||
reach_i = reach;
|
||||
reach = reach_next;
|
||||
reach_next = reach_i;
|
||||
|
||||
/* For each state in `reach', weed out states that don't fulfill the
|
||||
minimal matching conditions. */
|
||||
if (tnfa->num_minimals && new_match) {
|
||||
new_match = 0;
|
||||
reach_next_i = reach_next;
|
||||
for (reach_i = reach; reach_i->state; reach_i++) {
|
||||
int skip = 0;
|
||||
for (i = 0; tnfa->minimal_tags[i] >= 0; i += 2) {
|
||||
int end = tnfa->minimal_tags[i];
|
||||
int start = tnfa->minimal_tags[i + 1];
|
||||
if (end >= num_tags) {
|
||||
skip = 1;
|
||||
break;
|
||||
} else if (reach_i->tags[start] == match_tags[start] &&
|
||||
reach_i->tags[end] < match_tags[end]) {
|
||||
skip = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!skip) {
|
||||
reach_next_i->state = reach_i->state;
|
||||
tmp_iptr = reach_next_i->tags;
|
||||
reach_next_i->tags = reach_i->tags;
|
||||
reach_i->tags = tmp_iptr;
|
||||
reach_next_i++;
|
||||
}
|
||||
}
|
||||
reach_next_i->state = NULL;
|
||||
|
||||
/* Swap `reach' and `reach_next'. */
|
||||
reach_i = reach;
|
||||
reach = reach_next;
|
||||
reach_next = reach_i;
|
||||
}
|
||||
|
||||
/* For each state in `reach' see if there is a transition leaving with
|
||||
the current input symbol to a state not yet in `reach_next', and
|
||||
add the destination states to `reach_next'. */
|
||||
reach_next_i = reach_next;
|
||||
for (reach_i = reach; reach_i->state; reach_i++) {
|
||||
for (trans_i = reach_i->state; trans_i->state; trans_i++) {
|
||||
/* Does this transition match the input symbol? */
|
||||
if (trans_i->code_min <= (tre_cint_t)prev_c &&
|
||||
trans_i->code_max >= (tre_cint_t)prev_c) {
|
||||
if (trans_i->assertions &&
|
||||
(CHECK_ASSERTIONS(trans_i->assertions) ||
|
||||
CHECK_CHAR_CLASSES(trans_i, tnfa, eflags))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Compute the tags after this transition. */
|
||||
for (i = 0; i < num_tags; i++) tmp_tags[i] = reach_i->tags[i];
|
||||
tag_i = trans_i->tags;
|
||||
if (tag_i != NULL)
|
||||
while (*tag_i >= 0) {
|
||||
if (*tag_i < num_tags) tmp_tags[*tag_i] = pos;
|
||||
tag_i++;
|
||||
}
|
||||
|
||||
if (reach_pos[trans_i->state_id].pos < pos) {
|
||||
/* Found an unvisited node. */
|
||||
reach_next_i->state = trans_i->state;
|
||||
tmp_iptr = reach_next_i->tags;
|
||||
reach_next_i->tags = tmp_tags;
|
||||
tmp_tags = tmp_iptr;
|
||||
reach_pos[trans_i->state_id].pos = pos;
|
||||
reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
|
||||
|
||||
if (reach_next_i->state == tnfa->final &&
|
||||
(match_eo == -1 ||
|
||||
(num_tags > 0 && reach_next_i->tags[0] <= match_tags[0]))) {
|
||||
match_eo = pos;
|
||||
new_match = 1;
|
||||
for (i = 0; i < num_tags; i++)
|
||||
match_tags[i] = reach_next_i->tags[i];
|
||||
}
|
||||
reach_next_i++;
|
||||
|
||||
} else {
|
||||
assert(reach_pos[trans_i->state_id].pos == pos);
|
||||
/* Another path has also reached this state. We choose
|
||||
the winner by examining the tag values for both
|
||||
paths. */
|
||||
if (tre_tag_order(num_tags, tnfa->tag_directions, tmp_tags,
|
||||
*reach_pos[trans_i->state_id].tags)) {
|
||||
/* The new path wins. */
|
||||
tmp_iptr = *reach_pos[trans_i->state_id].tags;
|
||||
*reach_pos[trans_i->state_id].tags = tmp_tags;
|
||||
if (trans_i->state == tnfa->final) {
|
||||
match_eo = pos;
|
||||
new_match = 1;
|
||||
for (i = 0; i < num_tags; i++) match_tags[i] = tmp_tags[i];
|
||||
}
|
||||
tmp_tags = tmp_iptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
reach_next_i->state = NULL;
|
||||
}
|
||||
|
||||
*match_end_ofs = match_eo;
|
||||
ret = match_eo >= 0 ? REG_OK : REG_NOMATCH;
|
||||
error_exit:
|
||||
free(buf), buf = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/***********************************************************************
|
||||
from tre-match-backtrack.c
|
||||
***********************************************************************/
|
||||
|
||||
/*
|
||||
This matcher is for regexps that use back referencing. Regexp matching
|
||||
with back referencing is an NP-complete problem on the number of back
|
||||
references. The easiest way to match them is to use a backtracking
|
||||
routine which basically goes through all possible paths in the TNFA
|
||||
and chooses the one which results in the best (leftmost and longest)
|
||||
match. This can be spectacularly expensive and may run out of stack
|
||||
space, but there really is no better known generic algorithm. Quoting
|
||||
Henry Spencer from comp.compilers:
|
||||
<URL: http://compilers.iecc.com/comparch/article/93-03-102>
|
||||
|
||||
POSIX.2 REs require longest match, which is really exciting to
|
||||
implement since the obsolete ("basic") variant also includes
|
||||
\<digit>. I haven't found a better way of tackling this than doing
|
||||
a preliminary match using a DFA (or simulation) on a modified RE
|
||||
that just replicates subREs for \<digit>, and then doing a
|
||||
backtracking match to determine whether the subRE matches were
|
||||
right. This can be rather slow, but I console myself with the
|
||||
thought that people who use \<digit> deserve very slow execution.
|
||||
(Pun unintentional but very appropriate.)
|
||||
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
regoff_t pos;
|
||||
const char *str_byte;
|
||||
tre_tnfa_transition_t *state;
|
||||
int state_id;
|
||||
int next_c;
|
||||
regoff_t *tags;
|
||||
#ifdef TRE_MBSTATE
|
||||
mbstate_t mbstate;
|
||||
#endif /* TRE_MBSTATE */
|
||||
} tre_backtrack_item_t;
|
||||
|
||||
typedef struct tre_backtrack_struct {
|
||||
tre_backtrack_item_t item;
|
||||
struct tre_backtrack_struct *prev;
|
||||
struct tre_backtrack_struct *next;
|
||||
} * tre_backtrack_t;
|
||||
|
||||
#ifdef TRE_MBSTATE
|
||||
#define BT_STACK_MBSTATE_IN stack->item.mbstate = (mbstate)
|
||||
#define BT_STACK_MBSTATE_OUT (mbstate) = stack->item.mbstate
|
||||
#else /* !TRE_MBSTATE */
|
||||
#define BT_STACK_MBSTATE_IN
|
||||
#define BT_STACK_MBSTATE_OUT
|
||||
#endif /* !TRE_MBSTATE */
|
||||
|
||||
#define tre_bt_mem_new tre_mem_new
|
||||
#define tre_bt_mem_alloc tre_mem_alloc
|
||||
#define tre_bt_mem_destroy tre_mem_destroy
|
||||
|
||||
#define BT_STACK_PUSH(_pos, _str_byte, _str_wide, _state, _state_id, _next_c, \
|
||||
_tags, _mbstate) \
|
||||
do { \
|
||||
int i; \
|
||||
if (!stack->next) { \
|
||||
tre_backtrack_t s; \
|
||||
s = tre_bt_mem_alloc(mem, sizeof(*s)); \
|
||||
if (!s) { \
|
||||
tre_bt_mem_destroy(mem); \
|
||||
if (tags) free(tags), tags = NULL; \
|
||||
if (pmatch) free(pmatch), pmatch = NULL; \
|
||||
if (states_seen) free(states_seen), states_seen = NULL; \
|
||||
return REG_ESPACE; \
|
||||
} \
|
||||
s->prev = stack; \
|
||||
s->next = NULL; \
|
||||
s->item.tags = tre_bt_mem_alloc(mem, sizeof(*tags) * tnfa->num_tags); \
|
||||
if (!s->item.tags) { \
|
||||
tre_bt_mem_destroy(mem); \
|
||||
if (tags) free(tags), tags = NULL; \
|
||||
if (pmatch) free(pmatch), pmatch = NULL; \
|
||||
if (states_seen) free(states_seen), states_seen = NULL; \
|
||||
return REG_ESPACE; \
|
||||
} \
|
||||
stack->next = s; \
|
||||
stack = s; \
|
||||
} else \
|
||||
stack = stack->next; \
|
||||
stack->item.pos = (_pos); \
|
||||
stack->item.str_byte = (_str_byte); \
|
||||
stack->item.state = (_state); \
|
||||
stack->item.state_id = (_state_id); \
|
||||
stack->item.next_c = (_next_c); \
|
||||
for (i = 0; i < tnfa->num_tags; i++) stack->item.tags[i] = (_tags)[i]; \
|
||||
BT_STACK_MBSTATE_IN; \
|
||||
} while (0)
|
||||
|
||||
#define BT_STACK_POP() \
|
||||
do { \
|
||||
int i; \
|
||||
assert(stack->prev); \
|
||||
pos = stack->item.pos; \
|
||||
str_byte = stack->item.str_byte; \
|
||||
state = stack->item.state; \
|
||||
next_c = stack->item.next_c; \
|
||||
for (i = 0; i < tnfa->num_tags; i++) tags[i] = stack->item.tags[i]; \
|
||||
BT_STACK_MBSTATE_OUT; \
|
||||
stack = stack->prev; \
|
||||
} while (0)
|
||||
|
||||
#undef MIN
|
||||
#define MIN(a, b) ((a) <= (b) ? (a) : (b))
|
||||
|
||||
static reg_errcode_t tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa,
|
||||
const void *string,
|
||||
regoff_t *match_tags, int eflags,
|
||||
regoff_t *match_end_ofs) {
|
||||
/* State variables required by GET_NEXT_WCHAR. */
|
||||
tre_char_t prev_c = 0, next_c = 0;
|
||||
const char *str_byte = string;
|
||||
regoff_t pos = 0;
|
||||
regoff_t pos_add_next = 1;
|
||||
#ifdef TRE_MBSTATE
|
||||
mbstate_t mbstate;
|
||||
#endif /* TRE_MBSTATE */
|
||||
int reg_notbol = eflags & REG_NOTBOL;
|
||||
int reg_noteol = eflags & REG_NOTEOL;
|
||||
int reg_newline = tnfa->cflags & REG_NEWLINE;
|
||||
|
||||
/* These are used to remember the necessary values of the above
|
||||
variables to return to the position where the current search
|
||||
started from. */
|
||||
int next_c_start;
|
||||
const char *str_byte_start;
|
||||
regoff_t pos_start = -1;
|
||||
#ifdef TRE_MBSTATE
|
||||
mbstate_t mbstate_start;
|
||||
#endif /* TRE_MBSTATE */
|
||||
|
||||
/* End offset of best match so far, or -1 if no match found yet. */
|
||||
regoff_t match_eo = -1;
|
||||
/* Tag arrays. */
|
||||
int *next_tags;
|
||||
regoff_t *tags = NULL;
|
||||
/* Current TNFA state. */
|
||||
tre_tnfa_transition_t *state;
|
||||
int *states_seen = NULL;
|
||||
|
||||
/* Memory allocator to for allocating the backtracking stack. */
|
||||
tre_mem_t mem = tre_bt_mem_new();
|
||||
|
||||
/* The backtracking stack. */
|
||||
tre_backtrack_t stack;
|
||||
|
||||
tre_tnfa_transition_t *trans_i;
|
||||
regmatch_t *pmatch = NULL;
|
||||
int ret;
|
||||
|
||||
#ifdef TRE_MBSTATE
|
||||
memset(&mbstate, '\0', sizeof(mbstate));
|
||||
#endif /* TRE_MBSTATE */
|
||||
|
||||
if (!mem) return REG_ESPACE;
|
||||
stack = tre_bt_mem_alloc(mem, sizeof(*stack));
|
||||
if (!stack) {
|
||||
ret = REG_ESPACE;
|
||||
goto error_exit;
|
||||
}
|
||||
stack->prev = NULL;
|
||||
stack->next = NULL;
|
||||
|
||||
if (tnfa->num_tags) {
|
||||
tags = malloc(sizeof(*tags) * tnfa->num_tags);
|
||||
if (!tags) {
|
||||
ret = REG_ESPACE;
|
||||
goto error_exit;
|
||||
}
|
||||
}
|
||||
if (tnfa->num_submatches) {
|
||||
pmatch = malloc(sizeof(*pmatch) * tnfa->num_submatches);
|
||||
if (!pmatch) {
|
||||
ret = REG_ESPACE;
|
||||
goto error_exit;
|
||||
}
|
||||
}
|
||||
if (tnfa->num_states) {
|
||||
states_seen = malloc(sizeof(*states_seen) * tnfa->num_states);
|
||||
if (!states_seen) {
|
||||
ret = REG_ESPACE;
|
||||
goto error_exit;
|
||||
}
|
||||
}
|
||||
|
||||
retry : {
|
||||
int i;
|
||||
for (i = 0; i < tnfa->num_tags; i++) {
|
||||
tags[i] = -1;
|
||||
if (match_tags) match_tags[i] = -1;
|
||||
}
|
||||
for (i = 0; i < tnfa->num_states; i++) states_seen[i] = 0;
|
||||
}
|
||||
|
||||
state = NULL;
|
||||
pos = pos_start;
|
||||
GET_NEXT_WCHAR();
|
||||
pos_start = pos;
|
||||
next_c_start = next_c;
|
||||
str_byte_start = str_byte;
|
||||
#ifdef TRE_MBSTATE
|
||||
mbstate_start = mbstate;
|
||||
#endif /* TRE_MBSTATE */
|
||||
|
||||
/* Handle initial states. */
|
||||
next_tags = NULL;
|
||||
for (trans_i = tnfa->initial; trans_i->state; trans_i++) {
|
||||
if (trans_i->assertions && CHECK_ASSERTIONS(trans_i->assertions)) {
|
||||
continue;
|
||||
}
|
||||
if (state == NULL) {
|
||||
/* Start from this state. */
|
||||
state = trans_i->state;
|
||||
next_tags = trans_i->tags;
|
||||
} else {
|
||||
/* Backtrack to this state. */
|
||||
BT_STACK_PUSH(pos, str_byte, 0, trans_i->state, trans_i->state_id, next_c,
|
||||
tags, mbstate);
|
||||
{
|
||||
int *tmp = trans_i->tags;
|
||||
if (tmp)
|
||||
while (*tmp >= 0) stack->item.tags[*tmp++] = pos;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (next_tags)
|
||||
for (; *next_tags >= 0; next_tags++) tags[*next_tags] = pos;
|
||||
|
||||
if (state == NULL) goto backtrack;
|
||||
|
||||
while (1) {
|
||||
tre_tnfa_transition_t *next_state;
|
||||
int empty_br_match;
|
||||
|
||||
if (state == tnfa->final) {
|
||||
if (match_eo < pos || (match_eo == pos && match_tags &&
|
||||
tre_tag_order(tnfa->num_tags, tnfa->tag_directions,
|
||||
tags, match_tags))) {
|
||||
int i;
|
||||
/* This match wins the previous match. */
|
||||
match_eo = pos;
|
||||
if (match_tags)
|
||||
for (i = 0; i < tnfa->num_tags; i++) match_tags[i] = tags[i];
|
||||
}
|
||||
/* Our TNFAs never have transitions leaving from the final state,
|
||||
so we jump right to backtracking. */
|
||||
goto backtrack;
|
||||
}
|
||||
|
||||
/* Go to the next character in the input string. */
|
||||
empty_br_match = 0;
|
||||
trans_i = state;
|
||||
if (trans_i->state && trans_i->assertions & ASSERT_BACKREF) {
|
||||
/* This is a back reference state. All transitions leaving from
|
||||
this state have the same back reference "assertion". Instead
|
||||
of reading the next character, we match the back reference. */
|
||||
regoff_t so, eo;
|
||||
int bt = trans_i->u.backref;
|
||||
regoff_t bt_len;
|
||||
int result;
|
||||
|
||||
/* Get the substring we need to match against. Remember to
|
||||
turn off REG_NOSUB temporarily. */
|
||||
tre_fill_pmatch(bt + 1, pmatch, tnfa->cflags & ~REG_NOSUB, tnfa, tags,
|
||||
pos);
|
||||
so = pmatch[bt].rm_so;
|
||||
eo = pmatch[bt].rm_eo;
|
||||
bt_len = eo - so;
|
||||
|
||||
result = strncmp((const char *)string + so, str_byte - 1, (size_t)bt_len);
|
||||
|
||||
if (result == 0) {
|
||||
/* Back reference matched. Check for infinite loop. */
|
||||
if (bt_len == 0) empty_br_match = 1;
|
||||
if (empty_br_match && states_seen[trans_i->state_id]) {
|
||||
goto backtrack;
|
||||
}
|
||||
|
||||
states_seen[trans_i->state_id] = empty_br_match;
|
||||
|
||||
/* Advance in input string and resync `prev_c', `next_c'
|
||||
and pos. */
|
||||
str_byte += bt_len - 1;
|
||||
pos += bt_len - 1;
|
||||
GET_NEXT_WCHAR();
|
||||
} else {
|
||||
goto backtrack;
|
||||
}
|
||||
} else {
|
||||
/* Check for end of string. */
|
||||
if (next_c == L'\0') goto backtrack;
|
||||
|
||||
/* Read the next character. */
|
||||
GET_NEXT_WCHAR();
|
||||
}
|
||||
|
||||
next_state = NULL;
|
||||
for (trans_i = state; trans_i->state; trans_i++) {
|
||||
if (trans_i->code_min <= (tre_cint_t)prev_c &&
|
||||
trans_i->code_max >= (tre_cint_t)prev_c) {
|
||||
if (trans_i->assertions &&
|
||||
(CHECK_ASSERTIONS(trans_i->assertions) ||
|
||||
CHECK_CHAR_CLASSES(trans_i, tnfa, eflags))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (next_state == NULL) {
|
||||
/* First matching transition. */
|
||||
next_state = trans_i->state;
|
||||
next_tags = trans_i->tags;
|
||||
} else {
|
||||
/* Second matching transition. We may need to backtrack here
|
||||
to take this transition instead of the first one, so we
|
||||
push this transition in the backtracking stack so we can
|
||||
jump back here if needed. */
|
||||
BT_STACK_PUSH(pos, str_byte, 0, trans_i->state, trans_i->state_id,
|
||||
next_c, tags, mbstate);
|
||||
{
|
||||
int *tmp;
|
||||
for (tmp = trans_i->tags; tmp && *tmp >= 0; tmp++)
|
||||
stack->item.tags[*tmp] = pos;
|
||||
}
|
||||
#if 0 /* XXX - it's important not to look at all transitions here to keep \
|
||||
the stack small! */
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (next_state != NULL) {
|
||||
/* Matching transitions were found. Take the first one. */
|
||||
state = next_state;
|
||||
|
||||
/* Update the tag values. */
|
||||
if (next_tags)
|
||||
while (*next_tags >= 0) tags[*next_tags++] = pos;
|
||||
} else {
|
||||
backtrack:
|
||||
/* A matching transition was not found. Try to backtrack. */
|
||||
if (stack->prev) {
|
||||
if (stack->item.state->assertions & ASSERT_BACKREF) {
|
||||
states_seen[stack->item.state_id] = 0;
|
||||
}
|
||||
|
||||
BT_STACK_POP();
|
||||
} else if (match_eo < 0) {
|
||||
/* Try starting from a later position in the input string. */
|
||||
/* Check for end of string. */
|
||||
if (next_c == L'\0') {
|
||||
break;
|
||||
}
|
||||
next_c = next_c_start;
|
||||
#ifdef TRE_MBSTATE
|
||||
mbstate = mbstate_start;
|
||||
#endif /* TRE_MBSTATE */
|
||||
str_byte = str_byte_start;
|
||||
goto retry;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ret = match_eo >= 0 ? REG_OK : REG_NOMATCH;
|
||||
*match_end_ofs = match_eo;
|
||||
|
||||
error_exit:
|
||||
tre_bt_mem_destroy(mem);
|
||||
#ifndef TRE_USE_ALLOCA
|
||||
if (tags) free(tags), tags = NULL;
|
||||
if (pmatch) free(pmatch), pmatch = NULL;
|
||||
if (states_seen) free(states_seen), states_seen = NULL;
|
||||
#endif /* !TRE_USE_ALLOCA */
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/***********************************************************************
|
||||
from regexec.c
|
||||
***********************************************************************/
|
||||
|
||||
/* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match
|
||||
endpoint values. */
|
||||
static void tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
|
||||
const tre_tnfa_t *tnfa, regoff_t *tags,
|
||||
regoff_t match_eo) {
|
||||
tre_submatch_data_t *submatch_data;
|
||||
unsigned int i, j;
|
||||
int *parents;
|
||||
|
||||
i = 0;
|
||||
if (match_eo >= 0 && !(cflags & REG_NOSUB)) {
|
||||
/* Construct submatch offsets from the tags. */
|
||||
submatch_data = tnfa->submatch_data;
|
||||
while (i < tnfa->num_submatches && i < nmatch) {
|
||||
if (submatch_data[i].so_tag == tnfa->end_tag)
|
||||
pmatch[i].rm_so = match_eo;
|
||||
else
|
||||
pmatch[i].rm_so = tags[submatch_data[i].so_tag];
|
||||
|
||||
if (submatch_data[i].eo_tag == tnfa->end_tag)
|
||||
pmatch[i].rm_eo = match_eo;
|
||||
else
|
||||
pmatch[i].rm_eo = tags[submatch_data[i].eo_tag];
|
||||
|
||||
/* If either of the endpoints were not used, this submatch
|
||||
was not part of the match. */
|
||||
if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1)
|
||||
pmatch[i].rm_so = pmatch[i].rm_eo = -1;
|
||||
|
||||
i++;
|
||||
}
|
||||
/* Reset all submatches that are not within all of their parent
|
||||
submatches. */
|
||||
i = 0;
|
||||
while (i < tnfa->num_submatches && i < nmatch) {
|
||||
if (pmatch[i].rm_eo == -1) assert(pmatch[i].rm_so == -1);
|
||||
assert(pmatch[i].rm_so <= pmatch[i].rm_eo);
|
||||
|
||||
parents = submatch_data[i].parents;
|
||||
if (parents != NULL)
|
||||
for (j = 0; parents[j] >= 0; j++) {
|
||||
if (pmatch[i].rm_so < pmatch[parents[j]].rm_so ||
|
||||
pmatch[i].rm_eo > pmatch[parents[j]].rm_eo)
|
||||
pmatch[i].rm_so = pmatch[i].rm_eo = -1;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
while (i < nmatch) {
|
||||
pmatch[i].rm_so = -1;
|
||||
pmatch[i].rm_eo = -1;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Wrapper functions for POSIX compatible regexp matching.
|
||||
*/
|
||||
|
||||
int regexec(const regex_t *restrict preg, const char *restrict string,
|
||||
size_t nmatch, regmatch_t pmatch[restrict], int eflags) {
|
||||
tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
|
||||
reg_errcode_t status;
|
||||
regoff_t *tags = NULL, eo;
|
||||
if (tnfa->cflags & REG_NOSUB) nmatch = 0;
|
||||
if (tnfa->num_tags > 0 && nmatch > 0) {
|
||||
tags = malloc(sizeof(*tags) * tnfa->num_tags);
|
||||
if (tags == NULL) return REG_ESPACE;
|
||||
}
|
||||
|
||||
/* Dispatch to the appropriate matcher. */
|
||||
if (tnfa->have_backrefs) {
|
||||
/* The regex has back references, use the backtracking matcher. */
|
||||
status = tre_tnfa_run_backtrack(tnfa, string, tags, eflags, &eo);
|
||||
} else {
|
||||
/* Exact matching, no back references, use the parallel matcher. */
|
||||
status = tre_tnfa_run_parallel(tnfa, string, tags, eflags, &eo);
|
||||
}
|
||||
|
||||
if (status == REG_OK) /* A match was found, so fill the submatch registers. */
|
||||
tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo);
|
||||
if (tags) free(tags), tags = NULL;
|
||||
return status;
|
||||
}
|
158
third_party/regex/tre-mem.c
vendored
Normal file
158
third_party/regex/tre-mem.c
vendored
Normal file
|
@ -0,0 +1,158 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||
│ │
|
||||
│ tre-mem.c - TRE memory allocator │
|
||||
│ │
|
||||
│ Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi> │
|
||||
│ All rights reserved. │
|
||||
│ │
|
||||
│ Redistribution and use in source and binary forms, with or without │
|
||||
│ modification, are permitted provided that the following conditions │
|
||||
│ are met: │
|
||||
│ │
|
||||
│ 1. Redistributions of source code must retain the above copyright │
|
||||
│ notice, this list of conditions and the following disclaimer. │
|
||||
│ │
|
||||
│ 2. Redistributions in binary form must reproduce the above copyright │
|
||||
│ notice, this list of conditions and the following disclaimer in │
|
||||
│ the documentation and/or other materials provided with the │
|
||||
│ distribution. │
|
||||
│ │
|
||||
│ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS │
|
||||
│ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT │
|
||||
│ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR │
|
||||
│ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT │
|
||||
│ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, │
|
||||
│ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT │
|
||||
│ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, │
|
||||
│ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY │
|
||||
│ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT │
|
||||
│ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE │
|
||||
│ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. │
|
||||
│ │
|
||||
│──────────────────────────────────────────────────────────────────────────────│
|
||||
│ │
|
||||
│ Musl Libc │
|
||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
||||
│ │
|
||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||
│ a copy of this software and associated documentation files (the │
|
||||
│ "Software"), to deal in the Software without restriction, including │
|
||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
||||
│ the following conditions: │
|
||||
│ │
|
||||
│ The above copyright notice and this permission notice shall be │
|
||||
│ included in all copies or substantial portions of the Software. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||
│ │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "third_party/regex/tre.inc"
|
||||
|
||||
/*
|
||||
This memory allocator is for allocating small memory blocks efficiently
|
||||
in terms of memory overhead and execution speed. The allocated blocks
|
||||
cannot be freed individually, only all at once. There can be multiple
|
||||
allocators, though.
|
||||
*/
|
||||
|
||||
/*
|
||||
This memory allocator is for allocating small memory blocks efficiently
|
||||
in terms of memory overhead and execution speed. The allocated blocks
|
||||
cannot be freed individually, only all at once. There can be multiple
|
||||
allocators, though.
|
||||
*/
|
||||
|
||||
/* Returns a new memory allocator or NULL if out of memory. */
|
||||
tre_mem_t tre_mem_new_impl(int provided, void *provided_block) {
|
||||
tre_mem_t mem;
|
||||
if (provided) {
|
||||
mem = provided_block;
|
||||
memset(mem, 0, sizeof(*mem));
|
||||
} else
|
||||
mem = calloc(1, sizeof(*mem));
|
||||
if (mem == NULL) return NULL;
|
||||
return mem;
|
||||
}
|
||||
|
||||
/* Frees the memory allocator and all memory allocated with it. */
|
||||
void tre_mem_destroy(tre_mem_t mem) {
|
||||
tre_list_t *tmp, *l = mem->blocks;
|
||||
|
||||
while (l != NULL) {
|
||||
free(l->data), l->data = NULL;
|
||||
tmp = l->next;
|
||||
free(l), l = tmp;
|
||||
}
|
||||
free(mem), mem = NULL;
|
||||
}
|
||||
|
||||
/* Allocates a block of `size' bytes from `mem'. Returns a pointer to the
|
||||
allocated block or NULL if an underlying malloc() failed. */
|
||||
void *tre_mem_alloc_impl(tre_mem_t mem, int provided, void *provided_block,
|
||||
int zero, size_t size) {
|
||||
void *ptr;
|
||||
|
||||
if (mem->failed) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (mem->n < size) {
|
||||
/* We need more memory than is available in the current block.
|
||||
Allocate a new block. */
|
||||
tre_list_t *l;
|
||||
if (provided) {
|
||||
if (provided_block == NULL) {
|
||||
mem->failed = 1;
|
||||
return NULL;
|
||||
}
|
||||
mem->ptr = provided_block;
|
||||
mem->n = TRE_MEM_BLOCK_SIZE;
|
||||
} else {
|
||||
int block_size;
|
||||
if (size * 8 > TRE_MEM_BLOCK_SIZE)
|
||||
block_size = size * 8;
|
||||
else
|
||||
block_size = TRE_MEM_BLOCK_SIZE;
|
||||
l = malloc(sizeof(*l));
|
||||
if (l == NULL) {
|
||||
mem->failed = 1;
|
||||
return NULL;
|
||||
}
|
||||
l->data = malloc(block_size);
|
||||
if (l->data == NULL) {
|
||||
free(l), l = NULL;
|
||||
mem->failed = 1;
|
||||
return NULL;
|
||||
}
|
||||
l->next = NULL;
|
||||
if (mem->current != NULL) mem->current->next = l;
|
||||
if (mem->blocks == NULL) mem->blocks = l;
|
||||
mem->current = l;
|
||||
mem->ptr = l->data;
|
||||
mem->n = block_size;
|
||||
}
|
||||
}
|
||||
|
||||
/* Make sure the next pointer will be aligned. */
|
||||
size += ALIGN(mem->ptr + size, long);
|
||||
|
||||
/* Allocate from current block. */
|
||||
ptr = mem->ptr;
|
||||
mem->ptr += size;
|
||||
mem->n -= size;
|
||||
|
||||
/* Set to zero if needed. */
|
||||
if (zero) memset(ptr, 0, size);
|
||||
|
||||
return ptr;
|
||||
}
|
248
third_party/regex/tre.inc
vendored
Normal file
248
third_party/regex/tre.inc
vendored
Normal file
|
@ -0,0 +1,248 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||
│ │
|
||||
│ Musl Libc │
|
||||
│ Copyright © 2005-2014 Rich Felker, et al. │
|
||||
│ │
|
||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||
│ a copy of this software and associated documentation files (the │
|
||||
│ "Software"), to deal in the Software without restriction, including │
|
||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
||||
│ the following conditions: │
|
||||
│ │
|
||||
│ The above copyright notice and this permission notice shall be │
|
||||
│ included in all copies or substantial portions of the Software. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||
│ │
|
||||
│──────────────────────────────────────────────────────────────────────────────│
|
||||
│ │
|
||||
│ tre-internal.h - TRE internal definitions │
|
||||
│ │
|
||||
│ Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi> │
|
||||
│ All rights reserved. │
|
||||
│ │
|
||||
│ Redistribution and use in source and binary forms, with or without │
|
||||
│ modification, are permitted provided that the following conditions │
|
||||
│ are met: │
|
||||
│ │
|
||||
│ 1. Redistributions of source code must retain the above copyright │
|
||||
│ notice, this list of conditions and the following disclaimer. │
|
||||
│ │
|
||||
│ 2. Redistributions in binary form must reproduce the above copyright │
|
||||
│ notice, this list of conditions and the following disclaimer in │
|
||||
│ the documentation and/or other materials provided with the │
|
||||
│ distribution. │
|
||||
│ │
|
||||
│ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS │
|
||||
│ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT │
|
||||
│ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR │
|
||||
│ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT │
|
||||
│ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, │
|
||||
│ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT │
|
||||
│ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, │
|
||||
│ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY │
|
||||
│ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT │
|
||||
│ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE │
|
||||
│ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. │
|
||||
│ │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/alg/alg.h"
|
||||
#include "libc/assert.h"
|
||||
#include "libc/mem/mem.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "third_party/regex/notice.inc"
|
||||
#include "third_party/regex/regex.h"
|
||||
|
||||
#undef TRE_MBSTATE
|
||||
|
||||
#define TRE_REGEX_T_FIELD __opaque
|
||||
typedef int reg_errcode_t;
|
||||
typedef wchar_t tre_char_t;
|
||||
|
||||
#define DPRINT(msg) \
|
||||
do { \
|
||||
} while (0)
|
||||
|
||||
#define elementsof(x) (sizeof(x) / sizeof(x[0]))
|
||||
|
||||
#define tre_mbrtowc(pwc, s, n, ps) (mbtowc((pwc), (s), (n)))
|
||||
|
||||
/* Wide characters. */
|
||||
typedef wint_t tre_cint_t;
|
||||
#define TRE_CHAR_MAX 0x10ffff
|
||||
|
||||
#define tre_isalnum iswalnum
|
||||
#define tre_isalpha iswalpha
|
||||
#define tre_isblank iswblank
|
||||
#define tre_iscntrl iswcntrl
|
||||
#define tre_isdigit iswdigit
|
||||
#define tre_isgraph iswgraph
|
||||
#define tre_islower iswlower
|
||||
#define tre_isprint iswprint
|
||||
#define tre_ispunct iswpunct
|
||||
#define tre_isspace iswspace
|
||||
#define tre_isupper iswupper
|
||||
#define tre_isxdigit iswxdigit
|
||||
|
||||
#define tre_tolower towlower
|
||||
#define tre_toupper towupper
|
||||
#define tre_strlen wcslen
|
||||
|
||||
/* Use system provided iswctype() and wctype(). */
|
||||
typedef wctype_t tre_ctype_t;
|
||||
#define tre_isctype iswctype
|
||||
#define tre_ctype wctype
|
||||
|
||||
/* Returns number of bytes to add to (char *)ptr to make it
|
||||
properly aligned for the type. */
|
||||
#define ALIGN(ptr, type) \
|
||||
((((long)ptr) % sizeof(type)) \
|
||||
? (sizeof(type) - (((long)ptr) % sizeof(type))) \
|
||||
: 0)
|
||||
|
||||
#undef MAX
|
||||
#undef MIN
|
||||
#define MAX(a, b) (((a) >= (b)) ? (a) : (b))
|
||||
#define MIN(a, b) (((a) <= (b)) ? (a) : (b))
|
||||
|
||||
/* TNFA transition type. A TNFA state is an array of transitions,
|
||||
the terminator is a transition with NULL `state'. */
|
||||
typedef struct tnfa_transition tre_tnfa_transition_t;
|
||||
|
||||
struct tnfa_transition {
|
||||
/* Range of accepted characters. */
|
||||
tre_cint_t code_min;
|
||||
tre_cint_t code_max;
|
||||
/* Pointer to the destination state. */
|
||||
tre_tnfa_transition_t *state;
|
||||
/* ID number of the destination state. */
|
||||
int state_id;
|
||||
/* -1 terminated array of tags (or NULL). */
|
||||
int *tags;
|
||||
/* Assertion bitmap. */
|
||||
int assertions;
|
||||
/* Assertion parameters. */
|
||||
union {
|
||||
/* Character class assertion. */
|
||||
tre_ctype_t class;
|
||||
/* Back reference assertion. */
|
||||
int backref;
|
||||
} u;
|
||||
/* Negative character class assertions. */
|
||||
tre_ctype_t *neg_classes;
|
||||
};
|
||||
|
||||
/* Assertions. */
|
||||
#define ASSERT_AT_BOL 1 /* Beginning of line. */
|
||||
#define ASSERT_AT_EOL 2 /* End of line. */
|
||||
#define ASSERT_CHAR_CLASS 4 /* Character class in `class'. */
|
||||
#define ASSERT_CHAR_CLASS_NEG 8 /* Character classes in `neg_classes'. */
|
||||
#define ASSERT_AT_BOW 16 /* Beginning of word. */
|
||||
#define ASSERT_AT_EOW 32 /* End of word. */
|
||||
#define ASSERT_AT_WB 64 /* Word boundary. */
|
||||
#define ASSERT_AT_WB_NEG 128 /* Not a word boundary. */
|
||||
#define ASSERT_BACKREF 256 /* A back reference in `backref'. */
|
||||
#define ASSERT_LAST 256
|
||||
|
||||
/* Tag directions. */
|
||||
typedef enum { TRE_TAG_MINIMIZE = 0, TRE_TAG_MAXIMIZE = 1 } tre_tag_direction_t;
|
||||
|
||||
/* Instructions to compute submatch register values from tag values
|
||||
after a successful match. */
|
||||
struct tre_submatch_data {
|
||||
/* Tag that gives the value for rm_so (submatch start offset). */
|
||||
int so_tag;
|
||||
/* Tag that gives the value for rm_eo (submatch end offset). */
|
||||
int eo_tag;
|
||||
/* List of submatches this submatch is contained in. */
|
||||
int *parents;
|
||||
};
|
||||
|
||||
typedef struct tre_submatch_data tre_submatch_data_t;
|
||||
|
||||
/* TNFA definition. */
|
||||
typedef struct tnfa tre_tnfa_t;
|
||||
|
||||
struct tnfa {
|
||||
tre_tnfa_transition_t *transitions;
|
||||
unsigned int num_transitions;
|
||||
tre_tnfa_transition_t *initial;
|
||||
tre_tnfa_transition_t *final;
|
||||
tre_submatch_data_t *submatch_data;
|
||||
char *firstpos_chars;
|
||||
int first_char;
|
||||
unsigned int num_submatches;
|
||||
tre_tag_direction_t *tag_directions;
|
||||
int *minimal_tags;
|
||||
int num_tags;
|
||||
int num_minimals;
|
||||
int end_tag;
|
||||
int num_states;
|
||||
int cflags;
|
||||
int have_backrefs;
|
||||
int have_approx;
|
||||
};
|
||||
|
||||
/* from tre-mem.h: */
|
||||
|
||||
#define TRE_MEM_BLOCK_SIZE 1024
|
||||
|
||||
typedef struct tre_list {
|
||||
void *data;
|
||||
struct tre_list *next;
|
||||
} tre_list_t;
|
||||
|
||||
typedef struct tre_mem_struct {
|
||||
tre_list_t *blocks;
|
||||
tre_list_t *current;
|
||||
char *ptr;
|
||||
size_t n;
|
||||
int failed;
|
||||
void **provided;
|
||||
} * tre_mem_t;
|
||||
|
||||
#define tre_mem_new_impl __tre_mem_new_impl
|
||||
#define tre_mem_alloc_impl __tre_mem_alloc_impl
|
||||
#define tre_mem_destroy __tre_mem_destroy
|
||||
|
||||
tre_mem_t tre_mem_new_impl(int provided, void *provided_block) hidden;
|
||||
void *tre_mem_alloc_impl(tre_mem_t mem, int provided, void *provided_block,
|
||||
int zero, size_t size) hidden;
|
||||
|
||||
/* Returns a new memory allocator or NULL if out of memory. */
|
||||
#define tre_mem_new() tre_mem_new_impl(0, NULL)
|
||||
|
||||
/* Allocates a block of `size' bytes from `mem'. Returns a pointer to the
|
||||
allocated block or NULL if an underlying malloc() failed. */
|
||||
#define tre_mem_alloc(mem, size) tre_mem_alloc_impl(mem, 0, NULL, 0, size)
|
||||
|
||||
/* Allocates a block of `size' bytes from `mem'. Returns a pointer to the
|
||||
allocated block or NULL if an underlying malloc() failed. The memory
|
||||
is set to zero. */
|
||||
#define tre_mem_calloc(mem, size) tre_mem_alloc_impl(mem, 0, NULL, 1, size)
|
||||
|
||||
#ifdef TRE_USE_ALLOCA
|
||||
/* alloca() versions. Like above, but memory is allocated with alloca()
|
||||
instead of malloc(). */
|
||||
|
||||
#define tre_mem_newa() \
|
||||
tre_mem_new_impl(1, alloca(sizeof(struct tre_mem_struct)))
|
||||
|
||||
#define tre_mem_alloca(mem, size) \
|
||||
((mem)->n >= (size) \
|
||||
? tre_mem_alloc_impl((mem), 1, NULL, 0, (size)) \
|
||||
: tre_mem_alloc_impl((mem), 1, alloca(TRE_MEM_BLOCK_SIZE), 0, (size)))
|
||||
#endif /* TRE_USE_ALLOCA */
|
||||
|
||||
/* Frees the memory allocator and all memory allocated with it. */
|
||||
hidden void tre_mem_destroy(tre_mem_t mem);
|
Loading…
Add table
Add a link
Reference in a new issue