resynced with gnulib. Cleaned up wrapping layer

This commit is contained in:
Vladimir 'phcoder' Serbinenko 2010-03-28 21:43:42 +02:00
parent bbdb72a1f4
commit a64b15feed
20 changed files with 620 additions and 305 deletions

View file

@ -1,6 +1,6 @@
/* Extended regular expression matching and search library.
Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009
Free Software Foundation, Inc.
Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free
Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
@ -383,7 +383,7 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
applies to multibyte character sets; for single byte character
sets, the SIMPLE_BRACKET again suffices. */
if (dfa->mb_cur_max > 1
&& (cset->nchar_classes || cset->non_match
&& (cset->nchar_classes || cset->non_match || cset->nranges
# ifdef _LIBC
|| cset->nequiv_classes
# endif /* _LIBC */
@ -636,7 +636,7 @@ free_dfa_content (re_dfa_t *dfa)
re_dfastate_t *state = entry->array[j];
free_state (state);
}
re_free (entry->array);
re_free (entry->array);
}
re_free (dfa->state_table);
#ifdef RE_ENABLE_I18N
@ -850,6 +850,9 @@ static reg_errcode_t
init_dfa (re_dfa_t *dfa, size_t pat_len)
{
__re_size_t table_size;
#ifndef _LIBC
char *codeset_name;
#endif
#ifdef RE_ENABLE_I18N
size_t max_i18n_object_size = MAX (sizeof (wchar_t), sizeof (wctype_t));
#else
@ -893,7 +896,9 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
!= 0);
#else
if (strcmp (locale_charset (), "UTF-8") == 0)
codeset_name = nl_langinfo (CODESET);
if (strcasecmp (codeset_name, "UTF-8") == 0
|| strcasecmp (codeset_name, "UTF8") == 0)
dfa->is_utf8 = 1;
/* We check exhaustively in the loop below if this charset is a
@ -1016,7 +1021,10 @@ create_initial_state (re_dfa_t *dfa)
Idx dest_idx = dfa->edests[node_idx].elems[0];
if (!re_node_set_contains (&init_nodes, dest_idx))
{
re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
reg_errcode_t merge_err
= re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
if (merge_err != REG_NOERROR)
return merge_err;
i = 0;
}
}
@ -1085,8 +1093,8 @@ optimize_utf8 (re_dfa_t *dfa)
}
break;
case OP_PERIOD:
has_period = true;
break;
has_period = true;
break;
case OP_BACK_REF:
case OP_ALT:
case END_OF_RE:
@ -1187,7 +1195,7 @@ analyze (regex_t *preg)
{
dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
if (BE (dfa->inveclosures == NULL, 0))
return REG_ESPACE;
return REG_ESPACE;
ret = calc_inveclosure (dfa);
}
@ -1209,16 +1217,16 @@ postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
if that's the only child). */
while (node->left || node->right)
if (node->left)
node = node->left;
else
node = node->right;
node = node->left;
else
node = node->right;
do
{
reg_errcode_t err = fn (extra, node);
if (BE (err != REG_NOERROR, 0))
return err;
if (node->parent == NULL)
if (node->parent == NULL)
return REG_NOERROR;
prev = node;
node = node->parent;
@ -1252,7 +1260,7 @@ preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
prev = node;
node = node->parent;
if (!node)
return REG_NOERROR;
return REG_NOERROR;
}
node = node->right;
}
@ -1275,13 +1283,13 @@ optimize_subexps (void *extra, bin_tree_t *node)
}
else if (node->token.type == SUBEXP
&& node->left && node->left->token.type == SUBEXP)
&& node->left && node->left->token.type == SUBEXP)
{
Idx other_idx = node->left->token.opr.idx;
node->left = node->left->left;
if (node->left)
node->left->parent = node;
node->left->parent = node;
dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
if (other_idx < BITSET_WORD_BITS)
@ -1366,9 +1374,9 @@ calc_first (void *extra, bin_tree_t *node)
node->first = node;
node->node_idx = re_dfa_add_node (dfa, node->token);
if (BE (node->node_idx == REG_MISSING, 0))
return REG_ESPACE;
return REG_ESPACE;
if (node->token.type == ANCHOR)
dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type;
dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type;
}
return REG_NOERROR;
}
@ -1390,7 +1398,7 @@ calc_next (void *extra, bin_tree_t *node)
if (node->left)
node->left->next = node->next;
if (node->right)
node->right->next = node->next;
node->right->next = node->next;
break;
}
return REG_NOERROR;
@ -1441,7 +1449,7 @@ link_nfa_nodes (void *extra, bin_tree_t *node)
case OP_BACK_REF:
dfa->nexts[idx] = node->next->node_idx;
if (node->token.type == OP_BACK_REF)
re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
err = re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
break;
default:
@ -1498,7 +1506,6 @@ duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node,
destination. */
org_dest = dfa->edests[org_node].elems[0];
re_node_set_empty (dfa->edests + clone_node);
clone_dest = search_duplicated_node (dfa, org_dest, constraint);
/* If the node is root_node itself, it means the epsilon closure
has a loop. Then tie it to the destination of the root_node. */
if (org_node == root_node && clone_node != org_node)
@ -1542,7 +1549,7 @@ duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node,
}
else
{
/* There is a duplicated node which satisfy the constraint,
/* There is a duplicated node which satisfies the constraint,
use it to avoid infinite loop. */
ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
if (BE (! ok, 0))
@ -1674,10 +1681,9 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
{
reg_errcode_t err;
Idx i;
bool incomplete;
bool ok;
re_node_set eclosure;
incomplete = false;
bool ok;
bool incomplete = false;
err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
if (BE (err != REG_NOERROR, 0))
return err;
@ -1722,7 +1728,9 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
else
eclosure_elem = dfa->eclosures[edest];
/* Merge the epsilon closure of `edest'. */
re_node_set_merge (&eclosure, &eclosure_elem);
err = re_node_set_merge (&eclosure, &eclosure_elem);
if (BE (err != REG_NOERROR, 0))
return err;
/* If the epsilon closure of `edest' is incomplete,
the epsilon closure of this node is also incomplete. */
if (dfa->eclosures[edest].nelem == 0)
@ -1732,7 +1740,7 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
}
}
/* Epsilon closures include itself. */
/* An epsilon closure includes itself. */
ok = re_node_set_insert (&eclosure, node);
if (BE (! ok, 0))
return REG_ESPACE;
@ -2319,7 +2327,7 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
&& dfa->word_ops_used == 0)
init_word_char (dfa);
if (token->opr.ctx_type == WORD_DELIM
|| token->opr.ctx_type == NOT_WORD_DELIM)
|| token->opr.ctx_type == NOT_WORD_DELIM)
{
bin_tree_t *tree_first, *tree_last;
if (token->opr.ctx_type == WORD_DELIM)
@ -2327,13 +2335,13 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
token->opr.ctx_type = WORD_FIRST;
tree_first = create_token_tree (dfa, NULL, NULL, token);
token->opr.ctx_type = WORD_LAST;
}
else
{
}
else
{
token->opr.ctx_type = INSIDE_WORD;
tree_first = create_token_tree (dfa, NULL, NULL, token);
token->opr.ctx_type = INSIDE_NOTWORD;
}
}
tree_last = create_token_tree (dfa, NULL, NULL, token);
tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0))
@ -2444,7 +2452,7 @@ parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
{
tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0))
*err = REG_EPAREN;
*err = REG_EPAREN;
if (BE (*err != REG_NOERROR, 0))
return NULL;
}
@ -2515,7 +2523,8 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
return elem;
}
if (BE (end != REG_MISSING && start > end, 0))
if (BE ((end != REG_MISSING && start > end)
|| token->type != OP_CLOSE_DUP_NUM, 0))
{
/* First number greater than second. */
*err = REG_BADBR;
@ -2568,10 +2577,14 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
if (BE (tree == NULL, 0))
goto parse_dup_op_espace;
/* From gnulib's "intprops.h":
True if the arithmetic type T is signed. */
#define TYPE_SIGNED(t) (! ((t) 0 < (t) -1))
/* This loop is actually executed only when end != REG_MISSING,
to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have
already created the start+1-th copy. */
if ((Idx) -1 < 0 || end != REG_MISSING)
if (TYPE_SIGNED (Idx) || end != REG_MISSING)
for (i = start + 2; i <= end; ++i)
{
elem = duplicate_tree (elem, dfa);
@ -2609,11 +2622,17 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
static reg_errcode_t
internal_function
# ifdef RE_ENABLE_I18N
build_range_exp (bitset_t sbcset, re_charset_t *mbcset, Idx *range_alloc,
bracket_elem_t *start_elem, bracket_elem_t *end_elem)
build_range_exp (const reg_syntax_t syntax,
bitset_t sbcset,
re_charset_t *mbcset,
Idx *range_alloc,
const bracket_elem_t *start_elem,
const bracket_elem_t *end_elem)
# else /* not RE_ENABLE_I18N */
build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,
bracket_elem_t *end_elem)
build_range_exp (const reg_syntax_t syntax,
bitset_t sbcset,
const bracket_elem_t *start_elem,
const bracket_elem_t *end_elem)
# endif /* not RE_ENABLE_I18N */
{
unsigned int start_ch, end_ch;
@ -2652,7 +2671,9 @@ build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,
return REG_ECOLLATE;
cmp_buf[0] = start_wc;
cmp_buf[4] = end_wc;
if (wcscoll (cmp_buf, cmp_buf + 4) > 0)
if (BE ((syntax & RE_NO_EMPTY_RANGES)
&& wcscoll (cmp_buf, cmp_buf + 4) > 0, 0))
return REG_ERANGE;
/* Got valid collation sequence values, add them as a new entry.
@ -2662,9 +2683,9 @@ build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,
no MBCSET if dfa->mb_cur_max == 1. */
if (mbcset)
{
/* Check the space of the arrays. */
if (BE (*range_alloc == mbcset->nranges, 0))
{
/* Check the space of the arrays. */
if (BE (*range_alloc == mbcset->nranges, 0))
{
/* There is not enough space, need realloc. */
wchar_t *new_array_start, *new_array_end;
Idx new_nranges;
@ -2674,9 +2695,9 @@ build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,
/* Use realloc since mbcset->range_starts and mbcset->range_ends
are NULL if *range_alloc == 0. */
new_array_start = re_realloc (mbcset->range_starts, wchar_t,
new_nranges);
new_nranges);
new_array_end = re_realloc (mbcset->range_ends, wchar_t,
new_nranges);
new_nranges);
if (BE (new_array_start == NULL || new_array_end == NULL, 0))
return REG_ESPACE;
@ -2684,10 +2705,10 @@ build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,
mbcset->range_starts = new_array_start;
mbcset->range_ends = new_array_end;
*range_alloc = new_nranges;
}
}
mbcset->range_starts[mbcset->nranges] = start_wc;
mbcset->range_ends[mbcset->nranges++] = end_wc;
mbcset->range_starts[mbcset->nranges] = start_wc;
mbcset->range_ends[mbcset->nranges++] = end_wc;
}
/* Build the table for single byte characters. */
@ -2799,7 +2820,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
return elem;
}
/* Local function for parse_bracket_exp used in _LIBC environement.
/* Local function for parse_bracket_exp used in _LIBC environment.
Look up the collation sequence value of BR_ELEM.
Return the value if succeeded, UINT_MAX otherwise. */
@ -2823,7 +2844,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
}
else if (br_elem->type == MB_CHAR)
{
return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
if (nrules != 0)
return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
}
else if (br_elem->type == COLL_SYM)
{
@ -2904,8 +2926,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
build below suffices. */
if (nrules > 0 || dfa->mb_cur_max > 1)
{
/* Check the space of the arrays. */
if (BE (*range_alloc == mbcset->nranges, 0))
/* Check the space of the arrays. */
if (BE (*range_alloc == mbcset->nranges, 0))
{
/* There is not enough space, need realloc. */
uint32_t *new_array_start;
@ -2917,18 +2939,18 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
new_array_start = re_realloc (mbcset->range_starts, uint32_t,
new_nranges);
new_array_end = re_realloc (mbcset->range_ends, uint32_t,
new_nranges);
new_nranges);
if (BE (new_array_start == NULL || new_array_end == NULL, 0))
return REG_ESPACE;
return REG_ESPACE;
mbcset->range_starts = new_array_start;
mbcset->range_ends = new_array_end;
*range_alloc = new_nranges;
}
mbcset->range_starts[mbcset->nranges] = start_collseq;
mbcset->range_ends[mbcset->nranges++] = end_collseq;
mbcset->range_starts[mbcset->nranges] = start_collseq;
mbcset->range_ends[mbcset->nranges++] = end_collseq;
}
/* Build the table for single byte characters. */
@ -3154,11 +3176,11 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
&start_elem, &end_elem);
#else
# ifdef RE_ENABLE_I18N
*err = build_range_exp (sbcset,
*err = build_range_exp (syntax, sbcset,
dfa->mb_cur_max > 1 ? mbcset : NULL,
&range_alloc, &start_elem, &end_elem);
# else
*err = build_range_exp (sbcset, &start_elem, &end_elem);
*err = build_range_exp (syntax, sbcset, &start_elem, &end_elem);
# endif
#endif /* RE_ENABLE_I18N */
if (BE (*err != REG_NOERROR, 0))
@ -3262,17 +3284,17 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */
if (sbc_idx < BITSET_WORDS)
{
/* Build a tree for simple bracket. */
br_token.type = SIMPLE_BRACKET;
br_token.opr.sbcset = sbcset;
work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
if (BE (work_tree == NULL, 0))
goto parse_bracket_exp_espace;
/* Build a tree for simple bracket. */
br_token.type = SIMPLE_BRACKET;
br_token.opr.sbcset = sbcset;
work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
if (BE (work_tree == NULL, 0))
goto parse_bracket_exp_espace;
/* Then join them by ALT node. */
work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
if (BE (work_tree == NULL, 0))
goto parse_bracket_exp_espace;
/* Then join them by ALT node. */
work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
if (BE (work_tree == NULL, 0))
goto parse_bracket_exp_espace;
}
else
{
@ -3291,7 +3313,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
br_token.opr.sbcset = sbcset;
work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
if (BE (work_tree == NULL, 0))
goto parse_bracket_exp_espace;
goto parse_bracket_exp_espace;
}
return work_tree;
@ -3430,7 +3452,7 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name)
/* Build single byte matcing table for this equivalence class. */
char_buf[1] = (unsigned char) '\0';
len = weights[idx1];
len = weights[idx1 & 0xffffff];
for (ch = 0; ch < SBC_MAX; ++ch)
{
char_buf[0] = ch;
@ -3442,11 +3464,15 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name)
if (idx2 == 0)
/* This isn't a valid character. */
continue;
if (len == weights[idx2])
/* Compare only if the length matches and the collation rule
index is the same. */
if (len == weights[idx2 & 0xffffff] && (idx1 >> 24) == (idx2 >> 24))
{
int cnt = 0;
while (cnt <= len &&
weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt])
weights[(idx1 & 0xffffff) + 1 + cnt]
== weights[(idx2 & 0xffffff) + 1 + cnt])
++cnt;
if (cnt > len)
@ -3842,7 +3868,7 @@ duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa)
node = node->parent;
dup_node = dup_node->parent;
if (!node)
return dup_root;
return dup_root;
}
node = node->right;
p_new = &dup_node->right;