Import new gnulib.
This commit is contained in:
parent
93cd84df63
commit
053cfcddf1
255 changed files with 12578 additions and 4948 deletions
|
@ -1,22 +1,21 @@
|
|||
/* Extended regular expression matching and search library.
|
||||
Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free
|
||||
Software Foundation, Inc.
|
||||
Copyright (C) 2002-2013 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 3 of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation,
|
||||
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
|
||||
You should have received a copy of the GNU General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
|
||||
Idx n) internal_function;
|
||||
|
@ -52,9 +51,8 @@ static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
|
|||
regoff_t range, Idx stop,
|
||||
struct re_registers *regs,
|
||||
bool ret_len) internal_function;
|
||||
static unsigned int re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
|
||||
Idx nregs, int regs_allocated)
|
||||
internal_function;
|
||||
static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
|
||||
Idx nregs, int regs_allocated) internal_function;
|
||||
static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
|
||||
internal_function;
|
||||
static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
|
||||
|
@ -201,7 +199,7 @@ static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
|
|||
static bool check_node_accept (const re_match_context_t *mctx,
|
||||
const re_token_t *node, Idx idx)
|
||||
internal_function;
|
||||
static reg_errcode_t extend_buffers (re_match_context_t *mctx)
|
||||
static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len)
|
||||
internal_function;
|
||||
|
||||
/* Entry point for POSIX code. */
|
||||
|
@ -210,11 +208,11 @@ static reg_errcode_t extend_buffers (re_match_context_t *mctx)
|
|||
string STRING.
|
||||
|
||||
If NMATCH is zero or REG_NOSUB was set in the cflags argument to
|
||||
`regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
|
||||
'regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
|
||||
least NMATCH elements, and we set them to the offsets of the
|
||||
corresponding matched substrings.
|
||||
|
||||
EFLAGS specifies `execution flags' which affect matching: if
|
||||
EFLAGS specifies "execution flags" which affect matching: if
|
||||
REG_NOTBOL is set, then ^ does not match at the beginning of the
|
||||
string; if REG_NOTEOL is set, then $ does not match at the end.
|
||||
|
||||
|
@ -231,7 +229,7 @@ regexec (preg, string, nmatch, pmatch, eflags)
|
|||
reg_errcode_t err;
|
||||
Idx start, length;
|
||||
#ifdef _LIBC
|
||||
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
|
||||
re_dfa_t *dfa = preg->buffer;
|
||||
#endif
|
||||
|
||||
if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
|
||||
|
@ -366,7 +364,6 @@ weak_alias (__re_search_2, re_search_2)
|
|||
#endif
|
||||
|
||||
static regoff_t
|
||||
internal_function
|
||||
re_search_2_stub (struct re_pattern_buffer *bufp,
|
||||
const char *string1, Idx length1,
|
||||
const char *string2, Idx length2,
|
||||
|
@ -414,7 +411,6 @@ re_search_2_stub (struct re_pattern_buffer *bufp,
|
|||
otherwise the position of the match is returned. */
|
||||
|
||||
static regoff_t
|
||||
internal_function
|
||||
re_search_stub (struct re_pattern_buffer *bufp,
|
||||
const char *string, Idx length,
|
||||
Idx start, regoff_t range, Idx stop, struct re_registers *regs,
|
||||
|
@ -426,7 +422,7 @@ re_search_stub (struct re_pattern_buffer *bufp,
|
|||
regoff_t rval;
|
||||
int eflags = 0;
|
||||
#ifdef _LIBC
|
||||
re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
|
||||
re_dfa_t *dfa = bufp->buffer;
|
||||
#endif
|
||||
Idx last_start = start + range;
|
||||
|
||||
|
@ -478,9 +474,9 @@ re_search_stub (struct re_pattern_buffer *bufp,
|
|||
|
||||
rval = 0;
|
||||
|
||||
/* I hope we needn't fill ther regs with -1's when no match was found. */
|
||||
/* I hope we needn't fill their regs with -1's when no match was found. */
|
||||
if (result != REG_NOERROR)
|
||||
rval = -1;
|
||||
rval = result == REG_NOMATCH ? -1 : -2;
|
||||
else if (regs != NULL)
|
||||
{
|
||||
/* If caller wants register contents data back, copy them. */
|
||||
|
@ -506,15 +502,14 @@ re_search_stub (struct re_pattern_buffer *bufp,
|
|||
return rval;
|
||||
}
|
||||
|
||||
static unsigned int
|
||||
internal_function
|
||||
static unsigned
|
||||
re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
|
||||
int regs_allocated)
|
||||
{
|
||||
int rval = REGS_REALLOCATE;
|
||||
Idx i;
|
||||
Idx need_regs = nregs + 1;
|
||||
/* We need one extra element beyond `num_regs' for the `-1' marker GNU code
|
||||
/* We need one extra element beyond 'num_regs' for the '-1' marker GNU code
|
||||
uses. */
|
||||
|
||||
/* Have the register data arrays been allocated? */
|
||||
|
@ -637,7 +632,7 @@ re_exec (s)
|
|||
(0 <= LAST_START && LAST_START <= LENGTH) */
|
||||
|
||||
static reg_errcode_t
|
||||
internal_function __attribute_warn_unused_result__
|
||||
__attribute_warn_unused_result__
|
||||
re_search_internal (const regex_t *preg,
|
||||
const char *string, Idx length,
|
||||
Idx start, Idx last_start, Idx stop,
|
||||
|
@ -645,7 +640,7 @@ re_search_internal (const regex_t *preg,
|
|||
int eflags)
|
||||
{
|
||||
reg_errcode_t err;
|
||||
const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
|
||||
const re_dfa_t *dfa = preg->buffer;
|
||||
Idx left_lim, right_lim;
|
||||
int incr;
|
||||
bool fl_longest_match;
|
||||
|
@ -720,7 +715,8 @@ re_search_internal (const regex_t *preg,
|
|||
if (nmatch > 1 || dfa->has_mb_node)
|
||||
{
|
||||
/* Avoid overflow. */
|
||||
if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0))
|
||||
if (BE ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
|
||||
<= mctx.input.bufs_len), 0))
|
||||
{
|
||||
err = REG_ESPACE;
|
||||
goto free_return;
|
||||
|
@ -740,7 +736,7 @@ re_search_internal (const regex_t *preg,
|
|||
mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
|
||||
: CONTEXT_NEWLINE | CONTEXT_BEGBUF;
|
||||
|
||||
/* Check incrementally whether of not the input string match. */
|
||||
/* Check incrementally whether the input string matches. */
|
||||
incr = (last_start < start) ? -1 : 1;
|
||||
left_lim = (last_start < start) ? last_start : start;
|
||||
right_lim = (last_start < start) ? start : last_start;
|
||||
|
@ -922,7 +918,7 @@ re_search_internal (const regex_t *preg,
|
|||
goto free_return;
|
||||
}
|
||||
|
||||
/* At last, add the offset to the each registers, since we slided
|
||||
/* At last, add the offset to each register, since we slid
|
||||
the buffers so that we could assume that the matching starts
|
||||
from 0. */
|
||||
for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
|
||||
|
@ -972,7 +968,7 @@ re_search_internal (const regex_t *preg,
|
|||
}
|
||||
|
||||
static reg_errcode_t
|
||||
internal_function __attribute_warn_unused_result__
|
||||
__attribute_warn_unused_result__
|
||||
prune_impossible_nodes (re_match_context_t *mctx)
|
||||
{
|
||||
const re_dfa_t *const dfa = mctx->dfa;
|
||||
|
@ -988,7 +984,7 @@ prune_impossible_nodes (re_match_context_t *mctx)
|
|||
halt_node = mctx->last_node;
|
||||
|
||||
/* Avoid overflow. */
|
||||
if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0))
|
||||
if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) <= match_last, 0))
|
||||
return REG_ESPACE;
|
||||
|
||||
sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
|
||||
|
@ -1068,7 +1064,7 @@ prune_impossible_nodes (re_match_context_t *mctx)
|
|||
since initial states may have constraints like "\<", "^", etc.. */
|
||||
|
||||
static inline re_dfastate_t *
|
||||
__attribute ((always_inline)) internal_function
|
||||
__attribute__ ((always_inline)) internal_function
|
||||
acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
|
||||
Idx idx)
|
||||
{
|
||||
|
@ -1106,7 +1102,7 @@ acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
|
|||
FL_LONGEST_MATCH means we want the POSIX longest matching.
|
||||
If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
|
||||
next place where we may want to try matching.
|
||||
Note that the matcher assume that the maching starts from the current
|
||||
Note that the matcher assumes that the matching starts from the current
|
||||
index of the buffer. */
|
||||
|
||||
static Idx
|
||||
|
@ -1175,11 +1171,12 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match,
|
|||
re_dfastate_t *old_state = cur_state;
|
||||
Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
|
||||
|
||||
if (BE (next_char_idx >= mctx->input.bufs_len, 0)
|
||||
if ((BE (next_char_idx >= mctx->input.bufs_len, 0)
|
||||
&& mctx->input.bufs_len < mctx->input.len)
|
||||
|| (BE (next_char_idx >= mctx->input.valid_len, 0)
|
||||
&& mctx->input.valid_len < mctx->input.len))
|
||||
{
|
||||
err = extend_buffers (mctx);
|
||||
err = extend_buffers (mctx, next_char_idx + 1);
|
||||
if (BE (err != REG_NOERROR, 0))
|
||||
{
|
||||
assert (err == REG_ESPACE);
|
||||
|
@ -1436,7 +1433,7 @@ internal_function __attribute_warn_unused_result__
|
|||
set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
|
||||
regmatch_t *pmatch, bool fl_backtrack)
|
||||
{
|
||||
const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
|
||||
const re_dfa_t *dfa = preg->buffer;
|
||||
Idx idx, cur_node;
|
||||
re_node_set eps_via_nodes;
|
||||
struct re_fail_stack_t *fs;
|
||||
|
@ -1608,21 +1605,21 @@ update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
|
|||
and sift the nodes in each states according to the following rules.
|
||||
Updated state_log will be wrote to STATE_LOG.
|
||||
|
||||
Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
|
||||
Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if...
|
||||
1. When STR_IDX == MATCH_LAST(the last index in the state_log):
|
||||
If `a' isn't the LAST_NODE and `a' can't epsilon transit to
|
||||
the LAST_NODE, we throw away the node `a'.
|
||||
2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
|
||||
string `s' and transit to `b':
|
||||
If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to
|
||||
the LAST_NODE, we throw away the node 'a'.
|
||||
2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts
|
||||
string 's' and transit to 'b':
|
||||
i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
|
||||
away the node `a'.
|
||||
away the node 'a'.
|
||||
ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
|
||||
thrown away, we throw away the node `a'.
|
||||
thrown away, we throw away the node 'a'.
|
||||
3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
|
||||
i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
|
||||
node `a'.
|
||||
node 'a'.
|
||||
ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
|
||||
we throw away the node `a'. */
|
||||
we throw away the node 'a'. */
|
||||
|
||||
#define STATE_NODE_CONTAINS(state,node) \
|
||||
((state) != NULL && re_node_set_contains (&(state)->nodes, node))
|
||||
|
@ -1695,11 +1692,11 @@ build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
|
|||
Idx i;
|
||||
|
||||
/* Then build the next sifted state.
|
||||
We build the next sifted state on `cur_dest', and update
|
||||
`sifted_states[str_idx]' with `cur_dest'.
|
||||
We build the next sifted state on 'cur_dest', and update
|
||||
'sifted_states[str_idx]' with 'cur_dest'.
|
||||
Note:
|
||||
`cur_dest' is the sifted state from `state_log[str_idx + 1]'.
|
||||
`cur_src' points the node_set of the old `state_log[str_idx]'
|
||||
'cur_dest' is the sifted state from 'state_log[str_idx + 1]'.
|
||||
'cur_src' points the node_set of the old 'state_log[str_idx]'
|
||||
(with the epsilon nodes pre-filtered out). */
|
||||
for (i = 0; i < cur_src->nelem; i++)
|
||||
{
|
||||
|
@ -1712,7 +1709,7 @@ build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
|
|||
assert (!IS_EPSILON_NODE (type));
|
||||
#endif
|
||||
#ifdef RE_ENABLE_I18N
|
||||
/* If the node may accept `multi byte'. */
|
||||
/* If the node may accept "multi byte". */
|
||||
if (dfa->nodes[prev_node].accept_mb)
|
||||
naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
|
||||
str_idx, sctx->last_str_idx);
|
||||
|
@ -1753,12 +1750,13 @@ clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
|
|||
{
|
||||
Idx top = mctx->state_log_top;
|
||||
|
||||
if (next_state_log_idx >= mctx->input.bufs_len
|
||||
if ((next_state_log_idx >= mctx->input.bufs_len
|
||||
&& mctx->input.bufs_len < mctx->input.len)
|
||||
|| (next_state_log_idx >= mctx->input.valid_len
|
||||
&& mctx->input.valid_len < mctx->input.len))
|
||||
{
|
||||
reg_errcode_t err;
|
||||
err = extend_buffers (mctx);
|
||||
err = extend_buffers (mctx, next_state_log_idx + 1);
|
||||
if (BE (err != REG_NOERROR, 0))
|
||||
return err;
|
||||
}
|
||||
|
@ -2268,17 +2266,17 @@ sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
|
|||
{
|
||||
const re_dfa_t *const dfa = mctx->dfa;
|
||||
int naccepted;
|
||||
/* Check the node can accept `multi byte'. */
|
||||
/* Check the node can accept "multi byte". */
|
||||
naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
|
||||
if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
|
||||
!STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
|
||||
dfa->nexts[node_idx]))
|
||||
/* The node can't accept the `multi byte', or the
|
||||
/* The node can't accept the "multi byte", or the
|
||||
destination was already thrown away, then the node
|
||||
could't accept the current input `multi byte'. */
|
||||
could't accept the current input "multi byte". */
|
||||
naccepted = 0;
|
||||
/* Otherwise, it is sure that the node could accept
|
||||
`naccepted' bytes input. */
|
||||
'naccepted' bytes input. */
|
||||
return naccepted;
|
||||
}
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
|
@ -2457,7 +2455,7 @@ find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
|
|||
/* From the node set CUR_NODES, pick up the nodes whose types are
|
||||
OP_OPEN_SUBEXP and which have corresponding back references in the regular
|
||||
expression. And register them to use them later for evaluating the
|
||||
correspoding back references. */
|
||||
corresponding back references. */
|
||||
|
||||
static reg_errcode_t
|
||||
internal_function
|
||||
|
@ -2568,7 +2566,7 @@ transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
|
|||
if (naccepted == 0)
|
||||
continue;
|
||||
|
||||
/* The node can accepts `naccepted' bytes. */
|
||||
/* The node can accepts 'naccepted' bytes. */
|
||||
dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
|
||||
mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
|
||||
: mctx->max_mb_elem_len);
|
||||
|
@ -2620,7 +2618,7 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
|
|||
const re_token_t *node = dfa->nodes + node_idx;
|
||||
re_node_set *new_dest_nodes;
|
||||
|
||||
/* Check whether `node' is a backreference or not. */
|
||||
/* Check whether 'node' is a backreference or not. */
|
||||
if (node->type != OP_BACK_REF)
|
||||
continue;
|
||||
|
||||
|
@ -2632,14 +2630,14 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
|
|||
continue;
|
||||
}
|
||||
|
||||
/* `node' is a backreference.
|
||||
/* 'node' is a backreference.
|
||||
Check the substring which the substring matched. */
|
||||
bkc_idx = mctx->nbkref_ents;
|
||||
err = get_subexp (mctx, node_idx, cur_str_idx);
|
||||
if (BE (err != REG_NOERROR, 0))
|
||||
goto free_return;
|
||||
|
||||
/* And add the epsilon closures (which is `new_dest_nodes') of
|
||||
/* And add the epsilon closures (which is 'new_dest_nodes') of
|
||||
the backreference to appropriate state_log. */
|
||||
#ifdef DEBUG
|
||||
assert (dfa->nexts[node_idx] != REG_MISSING);
|
||||
|
@ -2663,7 +2661,7 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
|
|||
dest_state = mctx->state_log[dest_str_idx];
|
||||
prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
|
||||
: mctx->state_log[cur_str_idx]->nodes.nelem);
|
||||
/* Add `new_dest_node' to state_log. */
|
||||
/* Add 'new_dest_node' to state_log. */
|
||||
if (dest_state == NULL)
|
||||
{
|
||||
mctx->state_log[dest_str_idx]
|
||||
|
@ -2815,7 +2813,7 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
|
|||
if (bkref_str_off >= mctx->input.len)
|
||||
break;
|
||||
|
||||
err = extend_buffers (mctx);
|
||||
err = extend_buffers (mctx, bkref_str_off + 1);
|
||||
if (BE (err != REG_NOERROR, 0))
|
||||
return err;
|
||||
|
||||
|
@ -2937,9 +2935,12 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
|
|||
{
|
||||
re_dfastate_t **new_array;
|
||||
Idx old_alloc = path->alloc;
|
||||
Idx new_alloc = old_alloc + last_str + mctx->max_mb_elem_len + 1;
|
||||
if (BE (new_alloc < old_alloc, 0)
|
||||
|| BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0))
|
||||
Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1;
|
||||
Idx new_alloc;
|
||||
if (BE (IDX_MAX - old_alloc < incr_alloc, 0))
|
||||
return REG_ESPACE;
|
||||
new_alloc = old_alloc + incr_alloc;
|
||||
if (BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0))
|
||||
return REG_ESPACE;
|
||||
new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
|
||||
if (BE (new_array == NULL, 0))
|
||||
|
@ -3102,7 +3103,7 @@ check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
|
|||
assert (!IS_EPSILON_NODE (type));
|
||||
#endif
|
||||
#ifdef RE_ENABLE_I18N
|
||||
/* If the node may accept `multi byte'. */
|
||||
/* If the node may accept "multi byte". */
|
||||
if (dfa->nodes[cur_node].accept_mb)
|
||||
{
|
||||
naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
|
||||
|
@ -3359,7 +3360,7 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
|
|||
bitset_word_t elem, mask;
|
||||
bool dests_node_malloced = false;
|
||||
bool dest_states_malloced = false;
|
||||
Idx ndests; /* Number of the destination states from `state'. */
|
||||
Idx ndests; /* Number of the destination states from 'state'. */
|
||||
re_dfastate_t **trtable;
|
||||
re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
|
||||
re_node_set follows, *dests_node;
|
||||
|
@ -3373,8 +3374,8 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
|
|||
} *dests_alloc;
|
||||
|
||||
/* We build DFA states which corresponds to the destination nodes
|
||||
from `state'. `dests_node[i]' represents the nodes which i-th
|
||||
destination state contains, and `dests_ch[i]' represents the
|
||||
from 'state'. 'dests_node[i]' represents the nodes which i-th
|
||||
destination state contains, and 'dests_ch[i]' represents the
|
||||
characters which i-th destination state accepts. */
|
||||
if (__libc_use_alloca (sizeof (struct dests_alloc)))
|
||||
dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
|
||||
|
@ -3388,20 +3389,23 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
|
|||
dests_node = dests_alloc->dests_node;
|
||||
dests_ch = dests_alloc->dests_ch;
|
||||
|
||||
/* Initialize transiton table. */
|
||||
/* Initialize transition table. */
|
||||
state->word_trtable = state->trtable = NULL;
|
||||
|
||||
/* At first, group all nodes belonging to `state' into several
|
||||
/* At first, group all nodes belonging to 'state' into several
|
||||
destinations. */
|
||||
ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
|
||||
if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0))
|
||||
{
|
||||
if (dests_node_malloced)
|
||||
free (dests_alloc);
|
||||
/* Return false in case of an error, true otherwise. */
|
||||
if (ndests == 0)
|
||||
{
|
||||
state->trtable = (re_dfastate_t **)
|
||||
calloc (sizeof (re_dfastate_t *), SBC_MAX);
|
||||
if (BE (state->trtable == NULL, 0))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -3591,13 +3595,13 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
|
|||
reg_errcode_t err;
|
||||
bool ok;
|
||||
Idx i, j, k;
|
||||
Idx ndests; /* Number of the destinations from `state'. */
|
||||
Idx ndests; /* Number of the destinations from 'state'. */
|
||||
bitset_t accepts; /* Characters a node can accept. */
|
||||
const re_node_set *cur_nodes = &state->nodes;
|
||||
bitset_empty (accepts);
|
||||
ndests = 0;
|
||||
|
||||
/* For all the nodes belonging to `state', */
|
||||
/* For all the nodes belonging to 'state', */
|
||||
for (i = 0; i < cur_nodes->nelem; ++i)
|
||||
{
|
||||
re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
|
||||
|
@ -3640,7 +3644,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
|
|||
else
|
||||
continue;
|
||||
|
||||
/* Check the `accepts' and sift the characters which are not
|
||||
/* Check the 'accepts' and sift the characters which are not
|
||||
match it the context. */
|
||||
if (constraint)
|
||||
{
|
||||
|
@ -3699,7 +3703,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
|
|||
}
|
||||
}
|
||||
|
||||
/* Then divide `accepts' into DFA states, or create a new
|
||||
/* Then divide 'accepts' into DFA states, or create a new
|
||||
state. Above, we make sure that accepts is not empty. */
|
||||
for (j = 0; j < ndests; ++j)
|
||||
{
|
||||
|
@ -3712,7 +3716,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
|
|||
if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
|
||||
continue;
|
||||
|
||||
/* Enumerate the intersection set of this state and `accepts'. */
|
||||
/* Enumerate the intersection set of this state and 'accepts'. */
|
||||
has_intersec = 0;
|
||||
for (k = 0; k < BITSET_WORDS; ++k)
|
||||
has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
|
||||
|
@ -3720,7 +3724,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
|
|||
if (!has_intersec)
|
||||
continue;
|
||||
|
||||
/* Then check if this state is a subset of `accepts'. */
|
||||
/* Then check if this state is a subset of 'accepts'. */
|
||||
not_subset = not_consumed = 0;
|
||||
for (k = 0; k < BITSET_WORDS; ++k)
|
||||
{
|
||||
|
@ -3728,8 +3732,8 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
|
|||
not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
|
||||
}
|
||||
|
||||
/* If this state isn't a subset of `accepts', create a
|
||||
new group state, which has the `remains'. */
|
||||
/* If this state isn't a subset of 'accepts', create a
|
||||
new group state, which has the 'remains'. */
|
||||
if (not_subset)
|
||||
{
|
||||
bitset_copy (dests_ch[ndests], remains);
|
||||
|
@ -3768,7 +3772,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
|
|||
}
|
||||
|
||||
#ifdef RE_ENABLE_I18N
|
||||
/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
|
||||
/* Check how many bytes the node 'dfa->nodes[node_idx]' accepts.
|
||||
Return the number of the bytes the node accepts.
|
||||
STR_IDX is the current index of the input string.
|
||||
|
||||
|
@ -3895,7 +3899,6 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
|
|||
const int32_t *table, *indirect;
|
||||
const unsigned char *weights, *extra;
|
||||
const char *collseqwc;
|
||||
int32_t idx;
|
||||
/* This #include defines a local function! */
|
||||
# include <locale/weight.h>
|
||||
|
||||
|
@ -3933,6 +3936,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
|
|||
in_collseq = find_collation_sequence_value (pin, elem_len);
|
||||
}
|
||||
/* match with range expression? */
|
||||
/* FIXME: Implement rational ranges here, too. */
|
||||
for (i = 0; i < cset->nranges; ++i)
|
||||
if (cset->range_starts[i] <= in_collseq
|
||||
&& in_collseq <= cset->range_ends[i])
|
||||
|
@ -3953,7 +3957,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
|
|||
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
|
||||
indirect = (const int32_t *)
|
||||
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
|
||||
int32_t idx = findidx (&cp);
|
||||
int32_t idx = findidx (&cp, elem_len);
|
||||
if (idx > 0)
|
||||
for (i = 0; i < cset->nequiv_classes; ++i)
|
||||
{
|
||||
|
@ -3984,18 +3988,9 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
|
|||
# endif /* _LIBC */
|
||||
{
|
||||
/* match with range expression? */
|
||||
#if __GNUC__ >= 2 && ! (__STDC_VERSION__ < 199901L && __STRICT_ANSI__)
|
||||
wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
|
||||
#else
|
||||
wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
|
||||
cmp_buf[2] = wc;
|
||||
#endif
|
||||
for (i = 0; i < cset->nranges; ++i)
|
||||
{
|
||||
cmp_buf[0] = cset->range_starts[i];
|
||||
cmp_buf[4] = cset->range_ends[i];
|
||||
if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
|
||||
&& wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
|
||||
if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i])
|
||||
{
|
||||
match_len = char_len;
|
||||
goto check_node_accept_bytes_match;
|
||||
|
@ -4065,7 +4060,7 @@ find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
|
|||
/* Skip the collation sequence value. */
|
||||
idx += sizeof (uint32_t);
|
||||
/* Skip the wide char sequence of the collating element. */
|
||||
idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
|
||||
idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1);
|
||||
/* If we found the entry, return the sequence value. */
|
||||
if (found)
|
||||
return *(uint32_t *) (extra + idx);
|
||||
|
@ -4133,17 +4128,20 @@ check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
|
|||
|
||||
static reg_errcode_t
|
||||
internal_function __attribute_warn_unused_result__
|
||||
extend_buffers (re_match_context_t *mctx)
|
||||
extend_buffers (re_match_context_t *mctx, int min_len)
|
||||
{
|
||||
reg_errcode_t ret;
|
||||
re_string_t *pstr = &mctx->input;
|
||||
|
||||
/* Avoid overflow. */
|
||||
if (BE (SIZE_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0))
|
||||
if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2
|
||||
<= pstr->bufs_len, 0))
|
||||
return REG_ESPACE;
|
||||
|
||||
/* Double the lengthes of the buffers. */
|
||||
ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
|
||||
/* Double the lengths of the buffers, but allocate at least MIN_LEN. */
|
||||
ret = re_string_realloc_buffers (pstr,
|
||||
MAX (min_len,
|
||||
MIN (pstr->len, pstr->bufs_len * 2)));
|
||||
if (BE (ret != REG_NOERROR, 0))
|
||||
return ret;
|
||||
|
||||
|
@ -4206,7 +4204,7 @@ match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
|
|||
size_t max_object_size =
|
||||
MAX (sizeof (struct re_backref_cache_entry),
|
||||
sizeof (re_sub_match_top_t *));
|
||||
if (BE (SIZE_MAX / max_object_size < n, 0))
|
||||
if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n, 0))
|
||||
return REG_ESPACE;
|
||||
|
||||
mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue