resynced with gnulib. Cleaned up wrapping layer
This commit is contained in:
parent
bbdb72a1f4
commit
a64b15feed
20 changed files with 620 additions and 305 deletions
161
gnulib/regexec.c
161
gnulib/regexec.c
|
@ -1,6 +1,6 @@
|
|||
/* Extended regular expression matching and search library.
|
||||
Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
|
||||
Free Software Foundation, Inc.
|
||||
Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free
|
||||
Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
|
||||
|
||||
|
@ -637,7 +637,7 @@ re_exec (s)
|
|||
(0 <= LAST_START && LAST_START <= LENGTH) */
|
||||
|
||||
static reg_errcode_t
|
||||
internal_function
|
||||
internal_function __attribute_warn_unused_result__
|
||||
re_search_internal (const regex_t *preg,
|
||||
const char *string, Idx length,
|
||||
Idx start, Idx last_start, Idx stop,
|
||||
|
@ -833,10 +833,10 @@ re_search_internal (const regex_t *preg,
|
|||
break;
|
||||
match_first += incr;
|
||||
if (match_first < left_lim || match_first > right_lim)
|
||||
{
|
||||
err = REG_NOMATCH;
|
||||
goto free_return;
|
||||
}
|
||||
{
|
||||
err = REG_NOMATCH;
|
||||
goto free_return;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -953,14 +953,14 @@ re_search_internal (const regex_t *preg,
|
|||
}
|
||||
|
||||
if (dfa->subexp_map)
|
||||
for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
|
||||
if (dfa->subexp_map[reg_idx] != reg_idx)
|
||||
{
|
||||
pmatch[reg_idx + 1].rm_so
|
||||
= pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
|
||||
pmatch[reg_idx + 1].rm_eo
|
||||
= pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
|
||||
}
|
||||
for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
|
||||
if (dfa->subexp_map[reg_idx] != reg_idx)
|
||||
{
|
||||
pmatch[reg_idx + 1].rm_so
|
||||
= pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
|
||||
pmatch[reg_idx + 1].rm_eo
|
||||
= pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
|
||||
}
|
||||
}
|
||||
|
||||
free_return:
|
||||
|
@ -972,7 +972,7 @@ re_search_internal (const regex_t *preg,
|
|||
}
|
||||
|
||||
static reg_errcode_t
|
||||
internal_function
|
||||
internal_function __attribute_warn_unused_result__
|
||||
prune_impossible_nodes (re_match_context_t *mctx)
|
||||
{
|
||||
const re_dfa_t *const dfa = mctx->dfa;
|
||||
|
@ -1110,7 +1110,7 @@ acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
|
|||
index of the buffer. */
|
||||
|
||||
static Idx
|
||||
internal_function
|
||||
internal_function __attribute_warn_unused_result__
|
||||
check_matching (re_match_context_t *mctx, bool fl_longest_match,
|
||||
Idx *p_match_first)
|
||||
{
|
||||
|
@ -1149,7 +1149,7 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match,
|
|||
{
|
||||
err = transit_state_bkref (mctx, &cur_state->nodes);
|
||||
if (BE (err != REG_NOERROR, 0))
|
||||
return err;
|
||||
return err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1176,16 +1176,16 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match,
|
|||
Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
|
||||
|
||||
if (BE (next_char_idx >= mctx->input.bufs_len, 0)
|
||||
|| (BE (next_char_idx >= mctx->input.valid_len, 0)
|
||||
&& mctx->input.valid_len < mctx->input.len))
|
||||
{
|
||||
err = extend_buffers (mctx);
|
||||
if (BE (err != REG_NOERROR, 0))
|
||||
|| (BE (next_char_idx >= mctx->input.valid_len, 0)
|
||||
&& mctx->input.valid_len < mctx->input.len))
|
||||
{
|
||||
err = extend_buffers (mctx);
|
||||
if (BE (err != REG_NOERROR, 0))
|
||||
{
|
||||
assert (err == REG_ESPACE);
|
||||
return REG_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cur_state = transit_state (&err, mctx, cur_state);
|
||||
if (mctx->state_log != NULL)
|
||||
|
@ -1309,17 +1309,17 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
|
|||
if (dest_node == REG_MISSING)
|
||||
dest_node = candidate;
|
||||
|
||||
else
|
||||
else
|
||||
{
|
||||
/* In order to avoid infinite loop like "(a*)*", return the second
|
||||
epsilon-transition if the first was already considered. */
|
||||
epsilon-transition if the first was already considered. */
|
||||
if (re_node_set_contains (eps_via_nodes, dest_node))
|
||||
return candidate;
|
||||
return candidate;
|
||||
|
||||
/* Otherwise, push the second epsilon-transition on the fail stack. */
|
||||
else if (fs != NULL
|
||||
&& push_fail_stack (fs, *pidx, candidate, nregs, regs,
|
||||
eps_via_nodes))
|
||||
eps_via_nodes))
|
||||
return REG_ERROR;
|
||||
|
||||
/* We know we are going to exit. */
|
||||
|
@ -1385,7 +1385,7 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
|
|||
}
|
||||
|
||||
static reg_errcode_t
|
||||
internal_function
|
||||
internal_function __attribute_warn_unused_result__
|
||||
push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
|
||||
Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
|
||||
{
|
||||
|
@ -1432,7 +1432,7 @@ pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
|
|||
pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */
|
||||
|
||||
static reg_errcode_t
|
||||
internal_function
|
||||
internal_function __attribute_warn_unused_result__
|
||||
set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
|
||||
regmatch_t *pmatch, bool fl_backtrack)
|
||||
{
|
||||
|
@ -1667,7 +1667,7 @@ sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
|
|||
if (mctx->state_log[str_idx])
|
||||
{
|
||||
err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
|
||||
if (BE (err != REG_NOERROR, 0))
|
||||
if (BE (err != REG_NOERROR, 0))
|
||||
goto free_return;
|
||||
}
|
||||
|
||||
|
@ -1686,7 +1686,7 @@ sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
|
|||
}
|
||||
|
||||
static reg_errcode_t
|
||||
internal_function
|
||||
internal_function __attribute_warn_unused_result__
|
||||
build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
|
||||
Idx str_idx, re_node_set *cur_dest)
|
||||
{
|
||||
|
@ -1848,7 +1848,7 @@ update_cur_sifted_state (const re_match_context_t *mctx,
|
|||
}
|
||||
|
||||
static reg_errcode_t
|
||||
internal_function
|
||||
internal_function __attribute_warn_unused_result__
|
||||
add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
|
||||
const re_node_set *candidates)
|
||||
{
|
||||
|
@ -1863,10 +1863,14 @@ add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
|
|||
{
|
||||
err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
|
||||
if (BE (err != REG_NOERROR, 0))
|
||||
return REG_ESPACE;
|
||||
return REG_ESPACE;
|
||||
for (i = 0; i < dest_nodes->nelem; i++)
|
||||
re_node_set_merge (&state->inveclosure,
|
||||
dfa->inveclosures + dest_nodes->elems[i]);
|
||||
{
|
||||
err = re_node_set_merge (&state->inveclosure,
|
||||
dfa->inveclosures + dest_nodes->elems[i]);
|
||||
if (BE (err != REG_NOERROR, 0))
|
||||
return REG_ESPACE;
|
||||
}
|
||||
}
|
||||
return re_node_set_add_intersect (dest_nodes, candidates,
|
||||
&state->inveclosure);
|
||||
|
@ -1978,7 +1982,7 @@ check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
|
|||
{
|
||||
struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
|
||||
do
|
||||
{
|
||||
{
|
||||
Idx dst;
|
||||
int cpos;
|
||||
|
||||
|
@ -2000,9 +2004,9 @@ check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
|
|||
if (dst == from_node)
|
||||
{
|
||||
if (boundaries & 1)
|
||||
return -1;
|
||||
return -1;
|
||||
else /* if (boundaries & 2) */
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
cpos =
|
||||
|
@ -2016,7 +2020,7 @@ check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
|
|||
if (subexp_idx < BITSET_WORD_BITS)
|
||||
ent->eps_reachable_subexps_map
|
||||
&= ~((bitset_word_t) 1 << subexp_idx);
|
||||
}
|
||||
}
|
||||
while (ent++->more);
|
||||
}
|
||||
break;
|
||||
|
@ -2158,7 +2162,7 @@ check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
|
|||
}
|
||||
|
||||
static reg_errcode_t
|
||||
internal_function
|
||||
internal_function __attribute_warn_unused_result__
|
||||
sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
|
||||
Idx str_idx, const re_node_set *candidates)
|
||||
{
|
||||
|
@ -2241,7 +2245,7 @@ sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
|
|||
re_node_set_remove (&local_sctx.limits, enabled_idx);
|
||||
|
||||
/* mctx->bkref_ents may have changed, reload the pointer. */
|
||||
entry = mctx->bkref_ents + enabled_idx;
|
||||
entry = mctx->bkref_ents + enabled_idx;
|
||||
}
|
||||
while (enabled_idx++, entry++->more);
|
||||
}
|
||||
|
@ -2288,7 +2292,7 @@ sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
|
|||
update the destination of STATE_LOG. */
|
||||
|
||||
static re_dfastate_t *
|
||||
internal_function
|
||||
internal_function __attribute_warn_unused_result__
|
||||
transit_state (reg_errcode_t *err, re_match_context_t *mctx,
|
||||
re_dfastate_t *state)
|
||||
{
|
||||
|
@ -2322,7 +2326,7 @@ transit_state (reg_errcode_t *err, re_match_context_t *mctx,
|
|||
|
||||
trtable = state->word_trtable;
|
||||
if (BE (trtable != NULL, 1))
|
||||
{
|
||||
{
|
||||
unsigned int context;
|
||||
context
|
||||
= re_string_context_at (&mctx->input,
|
||||
|
@ -2368,21 +2372,21 @@ merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
|
|||
unsigned int context;
|
||||
re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
|
||||
/* If (state_log[cur_idx] != 0), it implies that cur_idx is
|
||||
the destination of a multibyte char/collating element/
|
||||
back reference. Then the next state is the union set of
|
||||
these destinations and the results of the transition table. */
|
||||
the destination of a multibyte char/collating element/
|
||||
back reference. Then the next state is the union set of
|
||||
these destinations and the results of the transition table. */
|
||||
pstate = mctx->state_log[cur_idx];
|
||||
log_nodes = pstate->entrance_nodes;
|
||||
if (next_state != NULL)
|
||||
{
|
||||
table_nodes = next_state->entrance_nodes;
|
||||
*err = re_node_set_init_union (&next_nodes, table_nodes,
|
||||
{
|
||||
table_nodes = next_state->entrance_nodes;
|
||||
*err = re_node_set_init_union (&next_nodes, table_nodes,
|
||||
log_nodes);
|
||||
if (BE (*err != REG_NOERROR, 0))
|
||||
if (BE (*err != REG_NOERROR, 0))
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
else
|
||||
next_nodes = *log_nodes;
|
||||
next_nodes = *log_nodes;
|
||||
/* Note: We already add the nodes of the initial state,
|
||||
then we don't need to add them here. */
|
||||
|
||||
|
@ -2390,12 +2394,12 @@ merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
|
|||
re_string_cur_idx (&mctx->input) - 1,
|
||||
mctx->eflags);
|
||||
next_state = mctx->state_log[cur_idx]
|
||||
= re_acquire_state_context (err, dfa, &next_nodes, context);
|
||||
= re_acquire_state_context (err, dfa, &next_nodes, context);
|
||||
/* We don't need to check errors here, since the return value of
|
||||
this function is next_state and ERR is already set. */
|
||||
this function is next_state and ERR is already set. */
|
||||
|
||||
if (table_nodes != NULL)
|
||||
re_node_set_free (&next_nodes);
|
||||
re_node_set_free (&next_nodes);
|
||||
}
|
||||
|
||||
if (BE (dfa->nbackref, 0) && next_state != NULL)
|
||||
|
@ -2436,9 +2440,9 @@ find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
|
|||
|
||||
do
|
||||
{
|
||||
if (++cur_str_idx > max)
|
||||
return NULL;
|
||||
re_string_skip_bytes (&mctx->input, 1);
|
||||
if (++cur_str_idx > max)
|
||||
return NULL;
|
||||
re_string_skip_bytes (&mctx->input, 1);
|
||||
}
|
||||
while (mctx->state_log[cur_str_idx] == NULL);
|
||||
|
||||
|
@ -2546,7 +2550,7 @@ transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
|
|||
re_dfastate_t *dest_state;
|
||||
|
||||
if (!dfa->nodes[cur_node_idx].accept_mb)
|
||||
continue;
|
||||
continue;
|
||||
|
||||
if (dfa->nodes[cur_node_idx].constraint)
|
||||
{
|
||||
|
@ -2714,7 +2718,7 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
|
|||
delay these checking for prune_impossible_nodes(). */
|
||||
|
||||
static reg_errcode_t
|
||||
internal_function
|
||||
internal_function __attribute_warn_unused_result__
|
||||
get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
|
||||
{
|
||||
const re_dfa_t *const dfa = mctx->dfa;
|
||||
|
@ -2727,7 +2731,7 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
|
|||
const struct re_backref_cache_entry *entry
|
||||
= mctx->bkref_ents + cache_idx;
|
||||
do
|
||||
if (entry->node == bkref_node)
|
||||
if (entry->node == bkref_node)
|
||||
return REG_NOERROR; /* We already checked it. */
|
||||
while (entry++->more);
|
||||
}
|
||||
|
@ -2915,7 +2919,7 @@ find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
|
|||
Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */
|
||||
|
||||
static reg_errcode_t
|
||||
internal_function
|
||||
internal_function __attribute_warn_unused_result__
|
||||
check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
|
||||
Idx top_str, Idx last_node, Idx last_str, int type)
|
||||
{
|
||||
|
@ -3077,7 +3081,7 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
|
|||
Can't we unify them? */
|
||||
|
||||
static reg_errcode_t
|
||||
internal_function
|
||||
internal_function __attribute_warn_unused_result__
|
||||
check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
|
||||
re_node_set *cur_nodes, re_node_set *next_nodes)
|
||||
{
|
||||
|
@ -3211,7 +3215,7 @@ check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
|
|||
problematic append it to DST_NODES. */
|
||||
|
||||
static reg_errcode_t
|
||||
internal_function
|
||||
internal_function __attribute_warn_unused_result__
|
||||
check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
|
||||
Idx target, Idx ex_subexp, int type)
|
||||
{
|
||||
|
@ -3256,7 +3260,7 @@ check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
|
|||
in MCTX->BKREF_ENTS. */
|
||||
|
||||
static reg_errcode_t
|
||||
internal_function
|
||||
internal_function __attribute_warn_unused_result__
|
||||
expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
|
||||
Idx cur_str, Idx subexp_num, int type)
|
||||
{
|
||||
|
@ -3622,7 +3626,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
|
|||
}
|
||||
#ifdef RE_ENABLE_I18N
|
||||
else if (type == OP_UTF8_PERIOD)
|
||||
{
|
||||
{
|
||||
if (ASCII_CHARS % BITSET_WORD_BITS == 0)
|
||||
memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
|
||||
else
|
||||
|
@ -3631,7 +3635,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
|
|||
bitset_clear (accepts, '\n');
|
||||
if (dfa->syntax & RE_DOT_NOT_NULL)
|
||||
bitset_clear (accepts, '\0');
|
||||
}
|
||||
}
|
||||
#endif
|
||||
else
|
||||
continue;
|
||||
|
@ -3836,7 +3840,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
|
|||
if (node->type == OP_PERIOD)
|
||||
{
|
||||
if (char_len <= 1)
|
||||
return 0;
|
||||
return 0;
|
||||
/* FIXME: I don't think this if is needed, as both '\n'
|
||||
and '\0' are char_len == 1. */
|
||||
/* '.' accepts any one character except the following two cases. */
|
||||
|
@ -3949,15 +3953,20 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
|
|||
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
|
||||
indirect = (const int32_t *)
|
||||
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
|
||||
idx = findidx (&cp);
|
||||
int32_t idx = findidx (&cp);
|
||||
if (idx > 0)
|
||||
for (i = 0; i < cset->nequiv_classes; ++i)
|
||||
{
|
||||
int32_t equiv_class_idx = cset->equiv_classes[i];
|
||||
size_t weight_len = weights[idx];
|
||||
if (weight_len == weights[equiv_class_idx])
|
||||
size_t weight_len = weights[idx & 0xffffff];
|
||||
if (weight_len == weights[equiv_class_idx & 0xffffff]
|
||||
&& (idx >> 24) == (equiv_class_idx >> 24))
|
||||
{
|
||||
Idx cnt = 0;
|
||||
|
||||
idx &= 0xffffff;
|
||||
equiv_class_idx &= 0xffffff;
|
||||
|
||||
while (cnt <= weight_len
|
||||
&& (weights[equiv_class_idx + 1 + cnt]
|
||||
== weights[idx + 1 + cnt]))
|
||||
|
@ -4123,7 +4132,7 @@ check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
|
|||
/* Extend the buffers, if the buffers have run out. */
|
||||
|
||||
static reg_errcode_t
|
||||
internal_function
|
||||
internal_function __attribute_warn_unused_result__
|
||||
extend_buffers (re_match_context_t *mctx)
|
||||
{
|
||||
reg_errcode_t ret;
|
||||
|
@ -4186,7 +4195,7 @@ extend_buffers (re_match_context_t *mctx)
|
|||
/* Initialize MCTX. */
|
||||
|
||||
static reg_errcode_t
|
||||
internal_function
|
||||
internal_function __attribute_warn_unused_result__
|
||||
match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
|
||||
{
|
||||
mctx->eflags = eflags;
|
||||
|
@ -4266,7 +4275,7 @@ match_ctx_free (re_match_context_t *mctx)
|
|||
*/
|
||||
|
||||
static reg_errcode_t
|
||||
internal_function
|
||||
internal_function __attribute_warn_unused_result__
|
||||
match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
|
||||
Idx to)
|
||||
{
|
||||
|
@ -4338,7 +4347,7 @@ search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
|
|||
at STR_IDX. */
|
||||
|
||||
static reg_errcode_t
|
||||
internal_function
|
||||
internal_function __attribute_warn_unused_result__
|
||||
match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
|
||||
{
|
||||
#ifdef DEBUG
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue