Import new gnulib.

This commit is contained in:
Vladimir 'phcoder' Serbinenko 2013-04-11 21:12:46 +02:00
parent 93cd84df63
commit 053cfcddf1
255 changed files with 12578 additions and 4948 deletions

View file

@ -1,22 +1,21 @@
/* Extended regular expression matching and search library.
Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free
Software Foundation, Inc.
Copyright (C) 2002-2013 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
You should have received a copy of the GNU General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
size_t length, reg_syntax_t syntax);
@ -95,20 +94,20 @@ static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
bitset_t sbcset,
re_charset_t *mbcset,
Idx *char_class_alloc,
const unsigned char *class_name,
const char *class_name,
reg_syntax_t syntax);
#else /* not RE_ENABLE_I18N */
static reg_errcode_t build_equiv_class (bitset_t sbcset,
const unsigned char *name);
static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
bitset_t sbcset,
const unsigned char *class_name,
const char *class_name,
reg_syntax_t syntax);
#endif /* not RE_ENABLE_I18N */
static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
RE_TRANSLATE_TYPE trans,
const unsigned char *class_name,
const unsigned char *extra,
const char *class_name,
const char *extra,
bool non_match, reg_errcode_t *err);
static bin_tree_t *create_tree (re_dfa_t *dfa,
bin_tree_t *left, bin_tree_t *right,
@ -207,7 +206,7 @@ static const size_t __re_error_msgid_idx[] =
compiles PATTERN (of length LENGTH) and puts the result in BUFP.
Returns 0 if the pattern was valid, otherwise an error string.
Assumes the `allocated' (and perhaps `buffer') and `translate' fields
Assumes the 'allocated' (and perhaps 'buffer') and 'translate' fields
are set in BUFP on entry. */
#ifdef _LIBC
@ -242,7 +241,7 @@ re_compile_pattern (const char *pattern, size_t length,
weak_alias (__re_compile_pattern, re_compile_pattern)
#endif
/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
/* Set by 're_set_syntax' to the current regexp syntax to recognize. Can
also be assigned to arbitrarily: each pattern buffer stores its own
syntax, so it can be changed between regex compilations. */
/* This has no initializer because initialized variables in Emacs
@ -274,7 +273,7 @@ int
re_compile_fastmap (bufp)
struct re_pattern_buffer *bufp;
{
re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
re_dfa_t *dfa = bufp->buffer;
char *fastmap = bufp->fastmap;
memset (fastmap, '\0', sizeof (char) * SBC_MAX);
@ -293,7 +292,7 @@ weak_alias (__re_compile_fastmap, re_compile_fastmap)
#endif
static inline void
__attribute ((always_inline))
__attribute__ ((always_inline))
re_set_fastmap (char *fastmap, bool icase, int ch)
{
fastmap[ch] = 1;
@ -308,7 +307,7 @@ static void
re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
char *fastmap)
{
re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
re_dfa_t *dfa = bufp->buffer;
Idx node_cnt;
bool icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE));
for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
@ -440,15 +439,15 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
PREG is a regex_t *. We do not expect any fields to be initialized,
since POSIX says we shouldn't. Thus, we set
`buffer' to the compiled pattern;
`used' to the length of the compiled pattern;
`syntax' to RE_SYNTAX_POSIX_EXTENDED if the
'buffer' to the compiled pattern;
'used' to the length of the compiled pattern;
'syntax' to RE_SYNTAX_POSIX_EXTENDED if the
REG_EXTENDED bit in CFLAGS is set; otherwise, to
RE_SYNTAX_POSIX_BASIC;
`newline_anchor' to REG_NEWLINE being set in CFLAGS;
`fastmap' to an allocated space for the fastmap;
`fastmap_accurate' to zero;
`re_nsub' to the number of subexpressions in PATTERN.
'newline_anchor' to REG_NEWLINE being set in CFLAGS;
'fastmap' to an allocated space for the fastmap;
'fastmap_accurate' to zero;
're_nsub' to the number of subexpressions in PATTERN.
PATTERN is the address of the pattern string.
@ -551,10 +550,6 @@ regerror (int errcode, const regex_t *_Restrict_ preg,
if (BE (errcode < 0
|| errcode >= (int) (sizeof (__re_error_msgid_idx)
/ sizeof (__re_error_msgid_idx[0])), 0))
/* Only error codes returned by the rest of the code should be passed
to this routine. If we are given anything else, or if other regex
code generates an invalid error code, then the program has a bug.
Dump core so we can fix it. */
msg = gettext ("unknown regexp error");
else
msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
@ -587,19 +582,23 @@ weak_alias (__regerror, regerror)
static const bitset_t utf8_sb_map =
{
/* Set the first 128 bits. */
# if 4 * BITSET_WORD_BITS < ASCII_CHARS
# error "bitset_word_t is narrower than 32 bits"
# elif 3 * BITSET_WORD_BITS < ASCII_CHARS
# ifdef __GNUC__
[0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX
# else
# if 4 * BITSET_WORD_BITS < ASCII_CHARS
# error "bitset_word_t is narrower than 32 bits"
# elif 3 * BITSET_WORD_BITS < ASCII_CHARS
BITSET_WORD_MAX, BITSET_WORD_MAX, BITSET_WORD_MAX,
# elif 2 * BITSET_WORD_BITS < ASCII_CHARS
# elif 2 * BITSET_WORD_BITS < ASCII_CHARS
BITSET_WORD_MAX, BITSET_WORD_MAX,
# elif 1 * BITSET_WORD_BITS < ASCII_CHARS
# elif 1 * BITSET_WORD_BITS < ASCII_CHARS
BITSET_WORD_MAX,
# endif
# endif
(BITSET_WORD_MAX
>> (SBC_MAX % BITSET_WORD_BITS == 0
? 0
: BITSET_WORD_BITS - SBC_MAX % BITSET_WORD_BITS))
# endif
};
#endif
@ -658,7 +657,7 @@ void
regfree (preg)
regex_t *preg;
{
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
re_dfa_t *dfa = preg->buffer;
if (BE (dfa != NULL, 1))
free_dfa_content (dfa);
preg->buffer = NULL;
@ -719,7 +718,7 @@ re_comp (s)
+ __re_error_msgid_idx[(int) REG_ESPACE]);
}
/* Since `re_exec' always passes NULL for the `regs' argument, we
/* Since 're_exec' always passes NULL for the 'regs' argument, we
don't need to initialize the pattern buffer fields which affect it. */
/* Match anchors at newlines. */
@ -730,7 +729,7 @@ re_comp (s)
if (!ret)
return NULL;
/* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
/* Yes, we're discarding 'const' here if !HAVE_LIBINTL. */
return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
}
@ -765,7 +764,7 @@ re_compile_internal (regex_t *preg, const char * pattern, size_t length,
preg->regs_allocated = REGS_UNALLOCATED;
/* Initialize the dfa. */
dfa = (re_dfa_t *) preg->buffer;
dfa = preg->buffer;
if (BE (preg->allocated < sizeof (re_dfa_t), 0))
{
/* If zero allocated, but buffer is non-null, try to realloc
@ -874,7 +873,7 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
calculation below, and for similar doubling calculations
elsewhere. And it's <= rather than <, because some of the
doubling calculations add 1 afterwards. */
if (BE (SIZE_MAX / max_object_size / 2 <= pat_len, 0))
if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) / 2 <= pat_len, 0))
return REG_ESPACE;
dfa->nodes_alloc = pat_len + 1;
@ -897,8 +896,10 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
!= 0);
#else
codeset_name = nl_langinfo (CODESET);
if (strcasecmp (codeset_name, "UTF-8") == 0
|| strcasecmp (codeset_name, "UTF8") == 0)
if ((codeset_name[0] == 'U' || codeset_name[0] == 'u')
&& (codeset_name[1] == 'T' || codeset_name[1] == 't')
&& (codeset_name[2] == 'F' || codeset_name[2] == 'f')
&& strcmp (codeset_name + 3 + (codeset_name[3] == '-'), "8") == 0)
dfa->is_utf8 = 1;
/* We check exhaustively in the loop below if this charset is a
@ -948,9 +949,43 @@ static void
internal_function
init_word_char (re_dfa_t *dfa)
{
int i, j, ch;
int i = 0;
int j;
int ch = 0;
dfa->word_ops_used = 1;
for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
if (BE (dfa->map_notascii == 0, 1))
{
bitset_word_t bits0 = 0x00000000;
bitset_word_t bits1 = 0x03ff0000;
bitset_word_t bits2 = 0x87fffffe;
bitset_word_t bits3 = 0x07fffffe;
if (BITSET_WORD_BITS == 64)
{
dfa->word_char[0] = bits1 << 31 << 1 | bits0;
dfa->word_char[1] = bits3 << 31 << 1 | bits2;
i = 2;
}
else if (BITSET_WORD_BITS == 32)
{
dfa->word_char[0] = bits0;
dfa->word_char[1] = bits1;
dfa->word_char[2] = bits2;
dfa->word_char[3] = bits3;
i = 4;
}
else
goto general_case;
ch = 128;
if (BE (dfa->is_utf8, 1))
{
memset (&dfa->word_char[i], '\0', (SBC_MAX - ch) / 8);
return;
}
}
general_case:
for (; i < BITSET_WORDS; ++i)
for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
if (isalnum (ch) || ch == '_')
dfa->word_char[i] |= (bitset_word_t) 1 << j;
@ -961,7 +996,7 @@ init_word_char (re_dfa_t *dfa)
static void
free_workarea_compile (regex_t *preg)
{
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
re_dfa_t *dfa = preg->buffer;
bin_tree_storage_t *storage, *next;
for (storage = dfa->str_tree_storage; storage; storage = next)
{
@ -1145,7 +1180,7 @@ optimize_utf8 (re_dfa_t *dfa)
static reg_errcode_t
analyze (regex_t *preg)
{
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
re_dfa_t *dfa = preg->buffer;
reg_errcode_t ret;
/* Allocate arrays. */
@ -1326,7 +1361,7 @@ lower_subexps (void *extra, bin_tree_t *node)
static bin_tree_t *
lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node)
{
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
re_dfa_t *dfa = preg->buffer;
bin_tree_t *body = node->left;
bin_tree_t *op, *cls, *tree1, *tree;
@ -1660,7 +1695,7 @@ calc_eclosure (re_dfa_t *dfa)
/* If we have already calculated, skip it. */
if (dfa->eclosures[node_idx].nelem != 0)
continue;
/* Calculate epsilon closure of `node_idx'. */
/* Calculate epsilon closure of 'node_idx'. */
err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, true);
if (BE (err != REG_NOERROR, 0))
return err;
@ -1710,14 +1745,14 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
{
re_node_set eclosure_elem;
Idx edest = dfa->edests[node].elems[i];
/* If calculating the epsilon closure of `edest' is in progress,
/* If calculating the epsilon closure of 'edest' is in progress,
return intermediate result. */
if (dfa->eclosures[edest].nelem == REG_MISSING)
{
incomplete = true;
continue;
}
/* If we haven't calculated the epsilon closure of `edest' yet,
/* If we haven't calculated the epsilon closure of 'edest' yet,
calculate now. Otherwise use calculated epsilon closure. */
if (dfa->eclosures[edest].nelem == 0)
{
@ -1727,11 +1762,11 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
}
else
eclosure_elem = dfa->eclosures[edest];
/* Merge the epsilon closure of `edest'. */
/* Merge the epsilon closure of 'edest'. */
err = re_node_set_merge (&eclosure, &eclosure_elem);
if (BE (err != REG_NOERROR, 0))
return err;
/* If the epsilon closure of `edest' is incomplete,
/* If the epsilon closure of 'edest' is incomplete,
the epsilon closure of this node is also incomplete. */
if (dfa->eclosures[edest].nelem == 0)
{
@ -2093,7 +2128,7 @@ peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
/* Entry point of the parser.
Parse the regular expression REGEXP and return the structure tree.
If an error is occured, ERR is set by error code, and return NULL.
If an error occurs, ERR is set by error code, and return NULL.
This function build the following tree, from regular expression <reg_exp>:
CAT
/ \
@ -2107,7 +2142,7 @@ static bin_tree_t *
parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax,
reg_errcode_t *err)
{
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
re_dfa_t *dfa = preg->buffer;
bin_tree_t *tree, *eor, *root;
re_token_t current_token;
dfa->syntax = syntax;
@ -2135,13 +2170,13 @@ parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax,
/ \
<branch1> <branch2>
ALT means alternative, which represents the operator `|'. */
ALT means alternative, which represents the operator '|'. */
static bin_tree_t *
parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
{
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
re_dfa_t *dfa = preg->buffer;
bin_tree_t *tree, *branch = NULL;
tree = parse_branch (regexp, preg, token, syntax, nest, err);
if (BE (*err != REG_NOERROR && tree == NULL, 0))
@ -2183,7 +2218,7 @@ parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token,
reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
{
bin_tree_t *tree, *expr;
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
re_dfa_t *dfa = preg->buffer;
tree = parse_expression (regexp, preg, token, syntax, nest, err);
if (BE (*err != REG_NOERROR && tree == NULL, 0))
return NULL;
@ -2194,16 +2229,21 @@ parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token,
expr = parse_expression (regexp, preg, token, syntax, nest, err);
if (BE (*err != REG_NOERROR && expr == NULL, 0))
{
if (tree != NULL)
postorder (tree, free_tree, NULL);
return NULL;
}
if (tree != NULL && expr != NULL)
{
tree = create_tree (dfa, tree, expr, CONCAT);
if (tree == NULL)
bin_tree_t *newtree = create_tree (dfa, tree, expr, CONCAT);
if (newtree == NULL)
{
postorder (expr, free_tree, NULL);
postorder (tree, free_tree, NULL);
*err = REG_ESPACE;
return NULL;
}
tree = newtree;
}
else if (tree == NULL)
tree = expr;
@ -2222,7 +2262,7 @@ static bin_tree_t *
parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
{
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
re_dfa_t *dfa = preg->buffer;
bin_tree_t *tree;
switch (token->type)
{
@ -2378,8 +2418,8 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
case OP_WORD:
case OP_NOTWORD:
tree = build_charclass_op (dfa, regexp->trans,
(const unsigned char *) "alnum",
(const unsigned char *) "_",
"alnum",
"_",
token->type == OP_NOTWORD, err);
if (BE (*err != REG_NOERROR && tree == NULL, 0))
return NULL;
@ -2387,8 +2427,8 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
case OP_SPACE:
case OP_NOTSPACE:
tree = build_charclass_op (dfa, regexp->trans,
(const unsigned char *) "space",
(const unsigned char *) "",
"space",
"",
token->type == OP_NOTSPACE, err);
if (BE (*err != REG_NOERROR && tree == NULL, 0))
return NULL;
@ -2438,7 +2478,7 @@ static bin_tree_t *
parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
{
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
re_dfa_t *dfa = preg->buffer;
bin_tree_t *tree;
size_t cur_nsub;
cur_nsub = preg->re_nsub++;
@ -2452,7 +2492,11 @@ parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
{
tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0))
*err = REG_EPAREN;
{
if (tree != NULL)
postorder (tree, free_tree, NULL);
*err = REG_EPAREN;
}
if (BE (*err != REG_NOERROR, 0))
return NULL;
}
@ -2530,6 +2574,12 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
*err = REG_BADBR;
return NULL;
}
if (BE (RE_DUP_MAX < (end == REG_MISSING ? start : end), 0))
{
*err = REG_ESIZE;
return NULL;
}
}
else
{
@ -2570,7 +2620,10 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
old_tree = NULL;
if (elem->token.type == SUBEXP)
postorder (elem, mark_opt_subexp, (void *) (long) elem->token.opr.idx);
{
uintptr_t subidx = elem->token.opr.idx;
postorder (elem, mark_opt_subexp, (void *) subidx);
}
tree = create_tree (dfa, elem, NULL,
(end == REG_MISSING ? OP_DUP_ASTERISK : OP_ALT));
@ -2616,7 +2669,7 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
Build the range expression which starts from START_ELEM, and ends
at END_ELEM. The result are written to MBCSET and SBCSET.
RANGE_ALLOC is the allocated size of mbcset->range_starts, and
mbcset->range_ends, is a pointer argument sinse we may
mbcset->range_ends, is a pointer argument since we may
update it. */
static reg_errcode_t
@ -2655,7 +2708,6 @@ build_range_exp (const reg_syntax_t syntax,
wchar_t wc;
wint_t start_wc;
wint_t end_wc;
wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
@ -2669,11 +2721,7 @@ build_range_exp (const reg_syntax_t syntax,
? __btowc (end_ch) : end_elem->opr.wch);
if (start_wc == WEOF || end_wc == WEOF)
return REG_ECOLLATE;
cmp_buf[0] = start_wc;
cmp_buf[4] = end_wc;
if (BE ((syntax & RE_NO_EMPTY_RANGES)
&& wcscoll (cmp_buf, cmp_buf + 4) > 0, 0))
else if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_wc > end_wc, 0))
return REG_ERANGE;
/* Got valid collation sequence values, add them as a new entry.
@ -2714,9 +2762,7 @@ build_range_exp (const reg_syntax_t syntax,
/* Build the table for single byte characters. */
for (wc = 0; wc < SBC_MAX; ++wc)
{
cmp_buf[2] = wc;
if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
&& wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
if (start_wc <= wc && wc <= end_wc)
bitset_set (sbcset, wc);
}
}
@ -2750,11 +2796,12 @@ build_range_exp (const reg_syntax_t syntax,
static reg_errcode_t
internal_function
build_collating_symbol (bitset_t sbcset,
# ifdef RE_ENABLE_I18N
re_charset_t *mbcset, Idx *coll_sym_alloc,
# endif
const unsigned char *name)
build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
Idx *coll_sym_alloc, const unsigned char *name)
# else /* not RE_ENABLE_I18N */
build_collating_symbol (bitset_t sbcset, const unsigned char *name)
# endif /* not RE_ENABLE_I18N */
{
size_t name_len = strlen ((const char *) name);
if (BE (name_len != 1, 0))
@ -2782,42 +2829,31 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
const int32_t *symb_table;
const unsigned char *extra;
/* Local function for parse_bracket_exp used in _LIBC environement.
Seek the collating symbol entry correspondings to NAME.
Return the index of the symbol in the SYMB_TABLE. */
/* Local function for parse_bracket_exp used in _LIBC environment.
Seek the collating symbol entry corresponding to NAME.
Return the index of the symbol in the SYMB_TABLE,
or -1 if not found. */
auto inline int32_t
__attribute ((always_inline))
seek_collating_symbol_entry (name, name_len)
const unsigned char *name;
size_t name_len;
__attribute__ ((always_inline))
seek_collating_symbol_entry (const unsigned char *name, size_t name_len)
{
int32_t hash = elem_hash ((const char *) name, name_len);
int32_t elem = hash % table_size;
if (symb_table[2 * elem] != 0)
{
int32_t second = hash % (table_size - 2) + 1;
int32_t elem;
do
{
/* First compare the hashing value. */
if (symb_table[2 * elem] == hash
/* Compare the length of the name. */
&& name_len == extra[symb_table[2 * elem + 1]]
/* Compare the name. */
&& memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
name_len) == 0)
{
/* Yep, this is the entry. */
break;
}
/* Next entry. */
elem += second;
}
while (symb_table[2 * elem] != 0);
}
return elem;
for (elem = 0; elem < table_size; elem++)
if (symb_table[2 * elem] != 0)
{
int32_t idx = symb_table[2 * elem + 1];
/* Skip the name of collating element name. */
idx += 1 + extra[idx];
if (/* Compare the length of the name. */
name_len == extra[idx]
/* Compare the name. */
&& memcmp (name, &extra[idx + 1], name_len) == 0)
/* Yep, this is the entry. */
return elem;
}
return -1;
}
/* Local function for parse_bracket_exp used in _LIBC environment.
@ -2825,9 +2861,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
Return the value if succeeded, UINT_MAX otherwise. */
auto inline unsigned int
__attribute ((always_inline))
lookup_collation_sequence_value (br_elem)
bracket_elem_t *br_elem;
__attribute__ ((always_inline))
lookup_collation_sequence_value (bracket_elem_t *br_elem)
{
if (br_elem->type == SB_CHAR)
{
@ -2855,7 +2890,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
int32_t elem, idx;
elem = seek_collating_symbol_entry (br_elem->opr.name,
sym_name_len);
if (symb_table[2 * elem] != 0)
if (elem != -1)
{
/* We found the entry. */
idx = symb_table[2 * elem + 1];
@ -2873,7 +2908,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
/* Return the collation sequence value. */
return *(unsigned int *) (extra + idx);
}
else if (symb_table[2 * elem] == 0 && sym_name_len == 1)
else if (sym_name_len == 1)
{
/* No valid character. Match it as a single byte
character. */
@ -2886,20 +2921,17 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
return UINT_MAX;
}
/* Local function for parse_bracket_exp used in _LIBC environement.
/* Local function for parse_bracket_exp used in _LIBC environment.
Build the range expression which starts from START_ELEM, and ends
at END_ELEM. The result are written to MBCSET and SBCSET.
RANGE_ALLOC is the allocated size of mbcset->range_starts, and
mbcset->range_ends, is a pointer argument sinse we may
mbcset->range_ends, is a pointer argument since we may
update it. */
auto inline reg_errcode_t
__attribute ((always_inline))
build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
re_charset_t *mbcset;
Idx *range_alloc;
bitset_t sbcset;
bracket_elem_t *start_elem, *end_elem;
__attribute__ ((always_inline))
build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
bracket_elem_t *start_elem, bracket_elem_t *end_elem)
{
unsigned int ch;
uint32_t start_collseq;
@ -2912,6 +2944,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
0))
return REG_ERANGE;
/* FIXME: Implement rational ranges here, too. */
start_collseq = lookup_collation_sequence_value (start_elem);
end_collseq = lookup_collation_sequence_value (end_elem);
/* Check start/end collation sequence values. */
@ -2970,33 +3003,30 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
return REG_NOERROR;
}
/* Local function for parse_bracket_exp used in _LIBC environement.
/* Local function for parse_bracket_exp used in _LIBC environment.
Build the collating element which is represented by NAME.
The result are written to MBCSET and SBCSET.
COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
pointer argument sinse we may update it. */
pointer argument since we may update it. */
auto inline reg_errcode_t
__attribute ((always_inline))
build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
re_charset_t *mbcset;
Idx *coll_sym_alloc;
bitset_t sbcset;
const unsigned char *name;
__attribute__ ((always_inline))
build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
Idx *coll_sym_alloc, const unsigned char *name)
{
int32_t elem, idx;
size_t name_len = strlen ((const char *) name);
if (nrules != 0)
{
elem = seek_collating_symbol_entry (name, name_len);
if (symb_table[2 * elem] != 0)
if (elem != -1)
{
/* We found the entry. */
idx = symb_table[2 * elem + 1];
/* Skip the name of collating element name. */
idx += 1 + extra[idx];
}
else if (symb_table[2 * elem] == 0 && name_len == 1)
else if (name_len == 1)
{
/* No valid character, treat it as a normal
character. */
@ -3076,6 +3106,10 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
if (BE (sbcset == NULL, 0))
#endif /* RE_ENABLE_I18N */
{
re_free (sbcset);
#ifdef RE_ENABLE_I18N
re_free (mbcset);
#endif
*err = REG_ESPACE;
return NULL;
}
@ -3235,7 +3269,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
#ifdef RE_ENABLE_I18N
mbcset, &char_class_alloc,
#endif /* RE_ENABLE_I18N */
start_elem.opr.name, syntax);
(const char *) start_elem.opr.name,
syntax);
if (BE (*err != REG_NOERROR, 0))
goto parse_bracket_exp_free_return;
break;
@ -3414,7 +3449,7 @@ parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp,
Build the equivalence class which is represented by NAME.
The result are written to MBCSET and SBCSET.
EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
is a pointer argument sinse we may update it. */
is a pointer argument since we may update it. */
static reg_errcode_t
#ifdef RE_ENABLE_I18N
@ -3445,19 +3480,18 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name)
_NL_COLLATE_EXTRAMB);
indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
_NL_COLLATE_INDIRECTMB);
idx1 = findidx (&cp);
if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0))
idx1 = findidx (&cp, -1);
if (BE (idx1 == 0 || *cp != '\0', 0))
/* This isn't a valid character. */
return REG_ECOLLATE;
/* Build single byte matcing table for this equivalence class. */
char_buf[1] = (unsigned char) '\0';
/* Build single byte matching table for this equivalence class. */
len = weights[idx1 & 0xffffff];
for (ch = 0; ch < SBC_MAX; ++ch)
{
char_buf[0] = ch;
cp = char_buf;
idx2 = findidx (&cp);
idx2 = findidx (&cp, 1);
/*
idx2 = table[ch];
*/
@ -3510,20 +3544,20 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name)
Build the character class which is represented by NAME.
The result are written to MBCSET and SBCSET.
CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
is a pointer argument sinse we may update it. */
is a pointer argument since we may update it. */
static reg_errcode_t
#ifdef RE_ENABLE_I18N
build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
re_charset_t *mbcset, Idx *char_class_alloc,
const unsigned char *class_name, reg_syntax_t syntax)
const char *class_name, reg_syntax_t syntax)
#else /* not RE_ENABLE_I18N */
build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
const unsigned char *class_name, reg_syntax_t syntax)
const char *class_name, reg_syntax_t syntax)
#endif /* not RE_ENABLE_I18N */
{
int i;
const char *name = (const char *) class_name;
const char *name = class_name;
/* In case of REG_ICASE "upper" and "lower" match the both of
upper and lower cases. */
@ -3597,8 +3631,8 @@ build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
static bin_tree_t *
build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
const unsigned char *class_name,
const unsigned char *extra, bool non_match,
const char *class_name,
const char *extra, bool non_match,
reg_errcode_t *err)
{
re_bitset_ptr_t sbcset;
@ -3704,8 +3738,9 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
}
/* This is intended for the expressions like "a{1,3}".
Fetch a number from `input', and return the number.
Fetch a number from 'input', and return the number.
Return REG_MISSING if the number field is empty like "{,1}".
Return RE_DUP_MAX + 1 if the number field is too large.
Return REG_ERROR if an error occurred. */
static Idx
@ -3724,8 +3759,9 @@ fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax)
num = ((token->type != CHARACTER || c < '0' || '9' < c
|| num == REG_ERROR)
? REG_ERROR
: ((num == REG_MISSING) ? c - '0' : num * 10 + c - '0'));
num = (num > RE_DUP_MAX) ? REG_ERROR : num;
: num == REG_MISSING
? c - '0'
: MIN (RE_DUP_MAX + 1, num * 10 + c - '0'));
}
return num;
}
@ -3799,7 +3835,7 @@ create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
static reg_errcode_t
mark_opt_subexp (void *extra, bin_tree_t *node)
{
Idx idx = (Idx) (long) extra;
Idx idx = (uintptr_t) extra;
if (node->token.type == SUBEXP && node->token.opr.idx == idx)
node->token.opt_subexp = 1;