1 /* Extended regular expression matching and search library.
2 Copyright (C) 2002 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 #if defined HAVE_WCHAR_H || defined _LIBC
29 #endif /* HAVE_WCHAR_H || _LIBC */
30 #if defined HAVE_WCTYPE_H || defined _LIBC
32 #endif /* HAVE_WCTYPE_H || _LIBC */
35 # ifndef _RE_DEFINE_LOCALE_FUNCTIONS
36 # define _RE_DEFINE_LOCALE_FUNCTIONS 1
37 # include <locale/localeinfo.h>
38 # include <locale/elem-hash.h>
39 # include <locale/coll-lookup.h>
44 #include "regex_internal.h"
46 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
47 re_string_t *input, int n);
48 static void match_ctx_free (re_match_context_t *cache);
49 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
50 int str_idx, int from, int to);
51 static void match_ctx_clear_flag (re_match_context_t *mctx);
52 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
53 re_dfastate_t **limited_sts, int last_node,
54 int last_str_idx, int check_subexp);
55 static reg_errcode_t re_search_internal (const regex_t *preg,
56 const char *string, int length,
57 int start, int range, int stop,
58 size_t nmatch, regmatch_t pmatch[],
60 static int re_search_2_stub (struct re_pattern_buffer *bufp,
61 const char *string1, int length1,
62 const char *string2, int length2,
63 int start, int range, struct re_registers *regs,
64 int stop, int ret_len);
65 static int re_search_stub (struct re_pattern_buffer *bufp,
66 const char *string, int length, int start,
67 int range, int stop, struct re_registers *regs,
69 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
70 int nregs, int regs_allocated);
71 static inline re_dfastate_t *acquire_init_state_context (reg_errcode_t *err,
73 const re_match_context_t *mctx,
75 static int check_matching (const regex_t *preg, re_match_context_t *mctx,
76 int fl_search, int fl_longest_match);
77 static int check_halt_node_context (const re_dfa_t *dfa, int node,
78 unsigned int context);
79 static int check_halt_state_context (const regex_t *preg,
80 const re_dfastate_t *state,
81 const re_match_context_t *mctx, int idx);
82 static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch, int cur_node,
83 int cur_idx, int nmatch);
84 static int proceed_next_node (const regex_t *preg, int nregs, regmatch_t *regs,
85 const re_match_context_t *mctx,
86 int *pidx, int node, re_node_set *eps_via_nodes,
87 struct re_fail_stack_t *fs);
88 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
89 int str_idx, int *dests, int nregs,
91 re_node_set *eps_via_nodes);
92 static int pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
93 regmatch_t *regs, re_node_set *eps_via_nodes);
94 static reg_errcode_t set_regs (const regex_t *preg,
95 const re_match_context_t *mctx,
96 size_t nmatch, regmatch_t *pmatch,
98 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
100 #ifdef RE_ENABLE_I18N
101 static int sift_states_iter_mb (const regex_t *preg,
102 const re_match_context_t *mctx,
103 re_sift_context_t *sctx,
104 int node_idx, int str_idx, int max_str_idx);
105 #endif /* RE_ENABLE_I18N */
106 static reg_errcode_t sift_states_backward (const regex_t *preg,
107 re_match_context_t *mctx,
108 re_sift_context_t *sctx);
109 static reg_errcode_t update_cur_sifted_state (const regex_t *preg,
110 re_match_context_t *mctx,
111 re_sift_context_t *sctx,
113 re_node_set *dest_nodes);
114 static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa,
115 re_node_set *dest_nodes,
116 const re_node_set *candidates);
117 static reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node,
118 re_node_set *dest_nodes,
119 const re_node_set *and_nodes);
120 static int check_dst_limits (re_dfa_t *dfa, re_node_set *limits,
121 re_match_context_t *mctx, int dst_node,
122 int dst_idx, int src_node, int src_idx);
123 static int check_dst_limits_calc_pos (re_dfa_t *dfa, re_match_context_t *mctx,
124 int limit, re_node_set *eclosures,
125 int subexp_idx, int node, int str_idx);
126 static reg_errcode_t check_subexp_limits (re_dfa_t *dfa,
127 re_node_set *dest_nodes,
128 const re_node_set *candidates,
130 struct re_backref_cache_entry *bkref_ents,
132 static reg_errcode_t search_subexp (const regex_t *preg,
133 re_match_context_t *mctx,
134 re_sift_context_t *sctx, int str_idx,
135 re_node_set *dest_nodes);
136 static reg_errcode_t sift_states_bkref (const regex_t *preg,
137 re_match_context_t *mctx,
138 re_sift_context_t *sctx,
139 int str_idx, re_node_set *dest_nodes);
140 static reg_errcode_t clean_state_log_if_need (re_match_context_t *mctx,
141 int next_state_log_idx);
142 static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst,
143 re_dfastate_t **src, int num);
144 static re_dfastate_t *transit_state (reg_errcode_t *err, const regex_t *preg,
145 re_match_context_t *mctx,
146 re_dfastate_t *state, int fl_search);
147 static re_dfastate_t *transit_state_sb (reg_errcode_t *err, const regex_t *preg,
148 re_dfastate_t *pstate,
150 re_match_context_t *mctx);
151 #ifdef RE_ENABLE_I18N
152 static reg_errcode_t transit_state_mb (const regex_t *preg,
153 re_dfastate_t *pstate,
154 re_match_context_t *mctx);
155 #endif /* RE_ENABLE_I18N */
156 static reg_errcode_t transit_state_bkref (const regex_t *preg,
157 re_dfastate_t *pstate,
158 re_match_context_t *mctx);
159 static reg_errcode_t transit_state_bkref_loop (const regex_t *preg,
161 re_dfastate_t **work_state_log,
162 re_match_context_t *mctx);
163 static re_dfastate_t **build_trtable (const regex_t *dfa,
164 const re_dfastate_t *state,
166 #ifdef RE_ENABLE_I18N
167 static int check_node_accept_bytes (const regex_t *preg, int node_idx,
168 const re_string_t *input, int idx);
170 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
173 #endif /* RE_ENABLE_I18N */
174 static int group_nodes_into_DFAstates (const regex_t *dfa,
175 const re_dfastate_t *state,
176 re_node_set *states_node,
178 static int check_node_accept (const regex_t *preg, const re_token_t *node,
179 const re_match_context_t *mctx, int idx);
180 static reg_errcode_t extend_buffers (re_match_context_t *mctx);
182 /* Entry point for POSIX code. */
184 /* regexec searches for a given pattern, specified by PREG, in the
187 If NMATCH is zero or REG_NOSUB was set in the cflags argument to
188 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
189 least NMATCH elements, and we set them to the offsets of the
190 corresponding matched substrings.
192 EFLAGS specifies `execution flags' which affect matching: if
193 REG_NOTBOL is set, then ^ does not match at the beginning of the
194 string; if REG_NOTEOL is set, then $ does not match at the end.
196 We return 0 if we find a match and REG_NOMATCH if not. */
199 regexec (preg, string, nmatch, pmatch, eflags)
200 const regex_t *__restrict preg;
201 const char *__restrict string;
207 int length = strlen (string);
209 err = re_search_internal (preg, string, length, 0, length, length, 0,
212 err = re_search_internal (preg, string, length, 0, length, length, nmatch,
214 return err != REG_NOERROR;
217 weak_alias (__regexec, regexec)
220 /* Entry points for GNU code. */
222 /* re_match, re_search, re_match_2, re_search_2
224 The former two functions operate on STRING with length LENGTH,
225 while the later two operate on concatenation of STRING1 and STRING2
226 with lengths LENGTH1 and LENGTH2, respectively.
228 re_match() matches the compiled pattern in BUFP against the string,
229 starting at index START.
231 re_search() first tries matching at index START, then it tries to match
232 starting from index START + 1, and so on. The last start position tried
233 is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
236 The parameter STOP of re_{match,search}_2 specifies that no match exceeding
237 the first STOP characters of the concatenation of the strings should be
240 If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
241 and all groups is stroed in REGS. (For the "_2" variants, the offsets are
242 computed relative to the concatenation, not relative to the individual
245 On success, re_match* functions return the length of the match, re_search*
246 return the position of the start of the match. Return value -1 means no
247 match was found and -2 indicates an internal error. */
250 re_match (bufp, string, length, start, regs)
251 struct re_pattern_buffer *bufp;
254 struct re_registers *regs;
256 return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
259 weak_alias (__re_match, re_match)
263 re_search (bufp, string, length, start, range, regs)
264 struct re_pattern_buffer *bufp;
266 int length, start, range;
267 struct re_registers *regs;
269 return re_search_stub (bufp, string, length, start, range, length, regs, 0);
272 weak_alias (__re_search, re_search)
276 re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
277 struct re_pattern_buffer *bufp;
278 const char *string1, *string2;
279 int length1, length2, start, stop;
280 struct re_registers *regs;
282 return re_search_2_stub (bufp, string1, length1, string2, length2,
283 start, 0, regs, stop, 1);
286 weak_alias (__re_match_2, re_match_2)
290 re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
291 struct re_pattern_buffer *bufp;
292 const char *string1, *string2;
293 int length1, length2, start, range, stop;
294 struct re_registers *regs;
296 return re_search_2_stub (bufp, string1, length1, string2, length2,
297 start, range, regs, stop, 0);
300 weak_alias (__re_search_2, re_search_2)
304 re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
306 struct re_pattern_buffer *bufp;
307 const char *string1, *string2;
308 int length1, length2, start, range, stop, ret_len;
309 struct re_registers *regs;
313 int len = length1 + length2;
316 if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
319 /* Concatenate the strings. */
323 char *s = re_malloc (char, len);
325 if (BE (s == NULL, 0))
327 memcpy (s, string1, length1);
328 memcpy (s + length1, string2, length2);
337 rval = re_search_stub (bufp, str, len, start, range, stop, regs,
340 re_free ((char *) str);
344 /* The parameters have the same meaning as those of re_search.
345 Additional parameters:
346 If RET_LEN is nonzero the length of the match is returned (re_match style);
347 otherwise the position of the match is returned. */
350 re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
351 struct re_pattern_buffer *bufp;
353 int length, start, range, stop, ret_len;
354 struct re_registers *regs;
356 reg_errcode_t result;
361 /* Check for out-of-range. */
362 if (BE (start < 0 || start > length, 0))
364 if (BE (start + range > length, 0))
365 range = length - start;
366 else if (BE (start + range < 0, 0))
369 eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
370 eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
372 /* Compile fastmap if we haven't yet. */
373 if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
374 re_compile_fastmap (bufp);
376 if (BE (bufp->no_sub, 0))
379 /* We need at least 1 register. */
382 else if (BE (bufp->regs_allocated == REGS_FIXED &&
383 regs->num_regs < bufp->re_nsub + 1, 0))
385 nregs = regs->num_regs;
386 if (BE (nregs < 1, 0))
388 /* Nothing can be copied to regs. */
394 nregs = bufp->re_nsub + 1;
395 pmatch = re_malloc (regmatch_t, nregs);
396 if (BE (pmatch == NULL, 0))
399 result = re_search_internal (bufp, string, length, start, range, stop,
400 nregs, pmatch, eflags);
404 /* I hope we needn't fill ther regs with -1's when no match was found. */
405 if (result != REG_NOERROR)
407 else if (regs != NULL)
409 /* If caller wants register contents data back, copy them. */
410 bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
411 bufp->regs_allocated);
412 if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
416 if (BE (rval == 0, 1))
420 assert (pmatch[0].rm_so == start);
421 rval = pmatch[0].rm_eo - start;
424 rval = pmatch[0].rm_so;
431 re_copy_regs (regs, pmatch, nregs, regs_allocated)
432 struct re_registers *regs;
434 int nregs, regs_allocated;
436 int rval = REGS_REALLOCATE;
438 int need_regs = nregs + 1;
439 /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
442 /* Have the register data arrays been allocated? */
443 if (regs_allocated == REGS_UNALLOCATED)
444 { /* No. So allocate them with malloc. */
445 regs->start = re_malloc (regoff_t, need_regs);
446 if (BE (regs->start == NULL, 0))
447 return REGS_UNALLOCATED;
448 regs->end = re_malloc (regoff_t, need_regs);
449 if (BE (regs->end == NULL, 0))
451 re_free (regs->start);
452 return REGS_UNALLOCATED;
454 regs->num_regs = need_regs;
456 else if (regs_allocated == REGS_REALLOCATE)
457 { /* Yes. If we need more elements than were already
458 allocated, reallocate them. If we need fewer, just
460 if (need_regs > regs->num_regs)
462 regs->start = re_realloc (regs->start, regoff_t, need_regs);
463 if (BE (regs->start == NULL, 0))
465 if (regs->end != NULL)
467 return REGS_UNALLOCATED;
469 regs->end = re_realloc (regs->end, regoff_t, need_regs);
470 if (BE (regs->end == NULL, 0))
472 re_free (regs->start);
473 return REGS_UNALLOCATED;
475 regs->num_regs = need_regs;
480 assert (regs_allocated == REGS_FIXED);
481 /* This function may not be called with REGS_FIXED and nregs too big. */
482 assert (regs->num_regs >= nregs);
487 for (i = 0; i < nregs; ++i)
489 regs->start[i] = pmatch[i].rm_so;
490 regs->end[i] = pmatch[i].rm_eo;
492 for ( ; i < regs->num_regs; ++i)
493 regs->start[i] = regs->end[i] = -1;
498 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
499 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
500 this memory for recording register information. STARTS and ENDS
501 must be allocated using the malloc library routine, and must each
502 be at least NUM_REGS * sizeof (regoff_t) bytes long.
504 If NUM_REGS == 0, then subsequent matches should allocate their own
507 Unless this function is called, the first search or match using
508 PATTERN_BUFFER will allocate its own register data, without
509 freeing the old data. */
512 re_set_registers (bufp, regs, num_regs, starts, ends)
513 struct re_pattern_buffer *bufp;
514 struct re_registers *regs;
516 regoff_t *starts, *ends;
520 bufp->regs_allocated = REGS_REALLOCATE;
521 regs->num_regs = num_regs;
522 regs->start = starts;
527 bufp->regs_allocated = REGS_UNALLOCATED;
529 regs->start = regs->end = (regoff_t *) 0;
533 weak_alias (__re_set_registers, re_set_registers)
536 /* Entry points compatible with 4.2 BSD regex library. We don't define
537 them unless specifically requested. */
539 #if defined _REGEX_RE_COMP || defined _LIBC
547 return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
549 #endif /* _REGEX_RE_COMP */
551 static re_node_set empty_set;
553 /* Internal entry point. */
555 /* Searches for a compiled pattern PREG in the string STRING, whose
556 length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
557 mingings with regexec. START, and RANGE have the same meanings
559 Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
560 otherwise return the error code.
561 Note: We assume front end functions already check ranges.
562 (START + RANGE >= 0 && START + RANGE <= LENGTH) */
565 re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
569 int length, start, range, stop, eflags;
574 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
576 int left_lim, right_lim, incr;
577 int fl_longest_match, match_first, match_last = -1;
578 re_match_context_t mctx;
579 char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate)
580 ? preg->fastmap : NULL);
582 /* Check if the DFA haven't been compiled. */
583 if (BE (preg->used == 0 || dfa->init_state == NULL
584 || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
585 || dfa->init_state_begbuf == NULL, 0))
588 re_node_set_init_empty (&empty_set);
590 /* We must check the longest matching, if nmatch > 0. */
591 fl_longest_match = (nmatch != 0);
593 err = re_string_allocate (&input, string, length, dfa->nodes_len + 1,
594 preg->translate, preg->syntax & RE_ICASE);
595 if (BE (err != REG_NOERROR, 0))
599 err = match_ctx_init (&mctx, eflags, &input, dfa->nbackref * 2);
600 if (BE (err != REG_NOERROR, 0))
603 /* We will log all the DFA states through which the dfa pass,
604 if nmatch > 1, or this dfa has "multibyte node", which is a
605 back-reference or a node which can accept multibyte character or
606 multi character collating element. */
607 if (nmatch > 1 || dfa->has_mb_node)
609 mctx.state_log = re_malloc (re_dfastate_t *, dfa->nodes_len + 1);
610 if (BE (mctx.state_log == NULL, 0))
614 mctx.state_log = NULL;
617 /* We assume front-end functions already check them. */
618 assert (start + range >= 0 && start + range <= length);
622 input.tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
623 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
625 /* Check incrementally whether of not the input string match. */
626 incr = (range < 0) ? -1 : 1;
627 left_lim = (range < 0) ? start + range : start;
628 right_lim = (range < 0) ? start : start + range;
632 /* At first get the current byte from input string. */
634 if (MB_CUR_MAX > 1 && (preg->syntax & RE_ICASE || preg->translate))
636 /* In this case, we can't determin easily the current byte,
637 since it might be a component byte of a multibyte character.
638 Then we use the constructed buffer instead. */
639 /* If MATCH_FIRST is out of the valid range, reconstruct the
641 if (input.raw_mbs_idx + input.valid_len <= match_first)
642 re_string_reconstruct (&input, match_first, eflags,
643 preg->newline_anchor);
644 /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
645 Note that MATCH_FIRST must not be smaller than 0. */
646 ch = ((match_first >= length) ? 0
647 : re_string_byte_at (&input, match_first - input.raw_mbs_idx));
651 /* We apply translate/conversion manually, since it is trivial
653 /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
654 Note that MATCH_FIRST must not be smaller than 0. */
655 ch = (match_first < length) ? (unsigned char)string[match_first] : 0;
656 /* Apply translation if we need. */
657 ch = preg->translate ? preg->translate[ch] : ch;
658 /* In case of case insensitive mode, convert to upper case. */
659 ch = ((preg->syntax & RE_ICASE) && islower (ch)) ? toupper (ch) : ch;
662 /* Eliminate inappropriate one by fastmap. */
663 if (preg->can_be_null || fastmap == NULL || fastmap[ch])
665 /* Reconstruct the buffers so that the matcher can assume that
666 the matching starts from the begining of the buffer. */
667 re_string_reconstruct (&input, match_first, eflags,
668 preg->newline_anchor);
669 #ifdef RE_ENABLE_I18N
670 /* Eliminate it when it is a component of a multibyte character
671 and isn't the head of a multibyte character. */
672 if (MB_CUR_MAX == 1 || re_string_first_byte (&input, 0))
675 /* It seems to be appropriate one, then use the matcher. */
676 /* We assume that the matching starts from 0. */
677 mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
678 match_last = check_matching (preg, &mctx, 0, fl_longest_match);
679 if (match_last != -1)
681 if (BE (match_last == -2, 0))
684 break; /* We found a matching. */
688 /* Update counter. */
690 if (match_first < left_lim || right_lim < match_first)
694 /* Set pmatch[] if we need. */
695 if (match_last != -1 && nmatch > 0)
699 /* Initialize registers. */
700 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
701 pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
703 /* Set the points where matching start/end. */
705 mctx.match_last = pmatch[0].rm_eo = match_last;
707 if (!preg->no_sub && nmatch > 1)
709 /* We need the ranges of all the subexpressions. */
711 re_dfastate_t **sifted_states;
712 re_dfastate_t **lim_states = NULL;
713 re_dfastate_t *pstate = mctx.state_log[match_last];
714 re_sift_context_t sctx;
716 assert (mctx.state_log != NULL);
718 halt_node = check_halt_state_context (preg, pstate, &mctx,
720 if (dfa->has_plural_match)
722 match_ctx_clear_flag (&mctx);
723 sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
724 if (BE (sifted_states == NULL, 0))
728 lim_states = calloc (sizeof (re_dfastate_t *),
730 if (BE (lim_states == NULL, 0))
733 sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
735 err = sift_states_backward (preg, &mctx, &sctx);
736 if (BE (err != REG_NOERROR, 0))
738 if (lim_states != NULL)
740 err = merge_state_array (dfa, sifted_states, lim_states,
742 if (BE (err != REG_NOERROR, 0))
744 re_free (lim_states);
746 re_node_set_free (&sctx.limits);
747 re_free (mctx.state_log);
748 mctx.state_log = sifted_states;
750 mctx.last_node = halt_node;
751 err = set_regs (preg, &mctx, nmatch, pmatch,
752 dfa->has_plural_match && dfa->nbackref > 0);
753 if (BE (err != REG_NOERROR, 0))
757 /* At last, add the offset to the each registers, since we slided
758 the buffers so that We can assume that the matching starts from 0. */
759 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
760 if (pmatch[reg_idx].rm_so != -1)
762 pmatch[reg_idx].rm_so += match_first;
763 pmatch[reg_idx].rm_eo += match_first;
767 re_free (mctx.state_log);
769 match_ctx_free (&mctx);
770 re_string_destruct (&input);
772 return (match_last == -1) ? REG_NOMATCH : REG_NOERROR;
775 /* Acquire an initial state and return it.
776 We must select appropriate initial state depending on the context,
777 since initial states may have constraints like "\<", "^", etc.. */
779 static inline re_dfastate_t *
780 acquire_init_state_context (err, preg, mctx, idx)
783 const re_match_context_t *mctx;
786 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
789 if (dfa->init_state->has_constraint)
791 unsigned int context;
792 context = re_string_context_at (mctx->input, idx - 1, mctx->eflags,
793 preg->newline_anchor);
794 if (IS_WORD_CONTEXT (context))
795 return dfa->init_state_word;
796 else if (IS_ORDINARY_CONTEXT (context))
797 return dfa->init_state;
798 else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
799 return dfa->init_state_begbuf;
800 else if (IS_NEWLINE_CONTEXT (context))
801 return dfa->init_state_nl;
802 else if (IS_BEGBUF_CONTEXT (context))
804 /* It is relatively rare case, then calculate on demand. */
805 return re_acquire_state_context (err, dfa,
806 dfa->init_state->entrance_nodes,
810 /* Must not happen? */
811 return dfa->init_state;
814 return dfa->init_state;
817 /* Check whether the regular expression match input string INPUT or not,
818 and return the index where the matching end, return -1 if not match,
819 or return -2 in case of an error.
820 FL_SEARCH means we must search where the matching starts,
821 FL_LONGEST_MATCH means we want the POSIX longest matching.
822 Note that the matcher assume that the maching starts from the current
823 index of the buffer. */
826 check_matching (preg, mctx, fl_search, fl_longest_match)
828 re_match_context_t *mctx;
829 int fl_search, fl_longest_match;
834 int cur_str_idx = re_string_cur_idx (mctx->input);
835 re_dfastate_t *cur_state;
837 cur_state = acquire_init_state_context (&err, preg, mctx, cur_str_idx);
838 /* An initial state must not be NULL(invalid state). */
839 if (BE (cur_state == NULL, 0))
841 if (mctx->state_log != NULL)
842 mctx->state_log[cur_str_idx] = cur_state;
844 if (cur_state->has_backref)
847 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
848 for (i = 0; i < cur_state->nodes.nelem; ++i)
850 int node = cur_state->nodes.elems[i];
851 re_token_type_t type = dfa->nodes[node].type;
852 if (type == OP_BACK_REF)
855 for (clexp_idx = 0; clexp_idx < cur_state->nodes.nelem;
858 re_token_t *clexp_node;
859 clexp_node = dfa->nodes + cur_state->nodes.elems[clexp_idx];
860 if (clexp_node->type == OP_CLOSE_SUBEXP
861 && clexp_node->opr.idx + 1== dfa->nodes[node].opr.idx)
863 err = match_ctx_add_entry (mctx, node, 0, 0, 0);
864 if (BE (err != REG_NOERROR, 0))
873 /* If the RE accepts NULL string. */
876 if (!cur_state->has_constraint
877 || check_halt_state_context (preg, cur_state, mctx, cur_str_idx))
879 if (!fl_longest_match)
883 match_last = cur_str_idx;
889 while (!re_string_eoi (mctx->input))
891 cur_state = transit_state (&err, preg, mctx, cur_state,
892 fl_search && !match);
893 if (cur_state == NULL) /* Reached at the invalid state or an error. */
895 cur_str_idx = re_string_cur_idx (mctx->input);
896 if (BE (err != REG_NOERROR, 0))
898 if (fl_search && !match)
900 /* Restart from initial state, since we are searching
901 the point from where matching start. */
902 #ifdef RE_ENABLE_I18N
904 || re_string_first_byte (mctx->input, cur_str_idx))
905 #endif /* RE_ENABLE_I18N */
906 cur_state = acquire_init_state_context (&err, preg, mctx,
908 if (BE (cur_state == NULL && err != REG_NOERROR, 0))
910 if (mctx->state_log != NULL)
911 mctx->state_log[cur_str_idx] = cur_state;
913 else if (!fl_longest_match && match)
915 else /* (fl_longest_match && match) || (!fl_search && !match) */
917 if (mctx->state_log == NULL)
921 int max = mctx->state_log_top;
922 for (; cur_str_idx <= max; ++cur_str_idx)
923 if (mctx->state_log[cur_str_idx] != NULL)
925 if (cur_str_idx > max)
931 if (cur_state != NULL && cur_state->halt)
933 /* Reached at a halt state.
934 Check the halt state can satisfy the current context. */
935 if (!cur_state->has_constraint
936 || check_halt_state_context (preg, cur_state, mctx,
937 re_string_cur_idx (mctx->input)))
939 /* We found an appropriate halt state. */
940 match_last = re_string_cur_idx (mctx->input);
942 if (!fl_longest_match)
950 /* Check NODE match the current context. */
952 static int check_halt_node_context (dfa, node, context)
955 unsigned int context;
957 re_token_type_t type = dfa->nodes[node].type;
958 unsigned int constraint = dfa->nodes[node].constraint;
959 if (type != END_OF_RE)
963 if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
968 /* Check the halt state STATE match the current context.
969 Return 0 if not match, if the node, STATE has, is a halt node and
970 match the context, return the node. */
973 check_halt_state_context (preg, state, mctx, idx)
975 const re_dfastate_t *state;
976 const re_match_context_t *mctx;
979 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
981 unsigned int context;
983 assert (state->halt);
985 context = re_string_context_at (mctx->input, idx, mctx->eflags,
986 preg->newline_anchor);
987 for (i = 0; i < state->nodes.nelem; ++i)
988 if (check_halt_node_context (dfa, state->nodes.elems[i], context))
989 return state->nodes.elems[i];
993 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
994 corresponding to the DFA).
995 Return the destination node, and update EPS_VIA_NODES, return -1 in case
999 proceed_next_node (preg, nregs, regs, mctx, pidx, node, eps_via_nodes, fs)
1000 const regex_t *preg;
1002 const re_match_context_t *mctx;
1003 int nregs, *pidx, node;
1004 re_node_set *eps_via_nodes;
1005 struct re_fail_stack_t *fs;
1007 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1008 int i, err, dest_node;
1010 if (IS_EPSILON_NODE (dfa->nodes[node].type))
1012 re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1013 int ndest, dest_nodes[2];
1014 err = re_node_set_insert (eps_via_nodes, node);
1015 if (BE (err < 0, 0))
1017 /* Pick up valid destinations. */
1018 for (ndest = 0, i = 0; i < dfa->edests[node].nelem; ++i)
1020 int candidate = dfa->edests[node].elems[i];
1021 if (!re_node_set_contains (cur_nodes, candidate))
1023 dest_nodes[0] = (ndest == 0) ? candidate : dest_nodes[0];
1024 dest_nodes[1] = (ndest == 1) ? candidate : dest_nodes[1];
1028 return ndest == 0 ? -1 : (ndest == 1 ? dest_nodes[0] : 0);
1029 /* In order to avoid infinite loop like "(a*)*". */
1030 if (re_node_set_contains (eps_via_nodes, dest_nodes[0]))
1031 return dest_nodes[1];
1033 push_fail_stack (fs, *pidx, dest_nodes, nregs, regs, eps_via_nodes);
1034 return dest_nodes[0];
1039 re_token_type_t type = dfa->nodes[node].type;
1041 #ifdef RE_ENABLE_I18N
1042 if (ACCEPT_MB_NODE (type))
1043 naccepted = check_node_accept_bytes (preg, node, mctx->input, *pidx);
1045 #endif /* RE_ENABLE_I18N */
1046 if (type == OP_BACK_REF)
1048 int subexp_idx = dfa->nodes[node].opr.idx;
1049 naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1052 if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1056 char *buf = re_string_get_buffer (mctx->input);
1057 if (strncmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1065 err = re_node_set_insert (eps_via_nodes, node);
1066 if (BE (err < 0, 0))
1068 dest_node = dfa->edests[node].elems[0];
1069 if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1076 || check_node_accept (preg, dfa->nodes + node, mctx, *pidx))
1078 dest_node = dfa->nexts[node];
1079 *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1080 if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1081 || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1084 re_node_set_empty (eps_via_nodes);
1091 static reg_errcode_t
1092 push_fail_stack (fs, str_idx, dests, nregs, regs, eps_via_nodes)
1093 struct re_fail_stack_t *fs;
1094 int str_idx, *dests, nregs;
1096 re_node_set *eps_via_nodes;
1099 int num = fs->num++;
1100 if (fs->num == fs->alloc)
1103 fs->stack = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1105 if (fs->stack == NULL)
1108 fs->stack[num].idx = str_idx;
1109 fs->stack[num].node = dests[1];
1110 fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1111 memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1112 err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1117 pop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes)
1118 struct re_fail_stack_t *fs;
1121 re_node_set *eps_via_nodes;
1123 int num = --fs->num;
1125 *pidx = fs->stack[num].idx;
1126 memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1127 re_node_set_free (eps_via_nodes);
1128 re_free (fs->stack[num].regs);
1129 *eps_via_nodes = fs->stack[num].eps_via_nodes;
1130 return fs->stack[num].node;
1133 /* Set the positions where the subexpressions are starts/ends to registers
1135 Note: We assume that pmatch[0] is already set, and
1136 pmatch[i].rm_so == pmatch[i].rm_eo == -1 (i > 1). */
1138 static reg_errcode_t
1139 set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
1140 const regex_t *preg;
1141 const re_match_context_t *mctx;
1146 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1147 int idx, cur_node, real_nmatch;
1148 re_node_set eps_via_nodes;
1149 struct re_fail_stack_t *fs;
1150 struct re_fail_stack_t fs_body = {0, 2, NULL};
1152 assert (nmatch > 1);
1153 assert (mctx->state_log != NULL);
1158 fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1162 cur_node = dfa->init_node;
1163 real_nmatch = (nmatch <= preg->re_nsub) ? nmatch : preg->re_nsub + 1;
1164 re_node_set_init_empty (&eps_via_nodes);
1165 for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1167 update_regs (dfa, pmatch, cur_node, idx, real_nmatch);
1168 if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1173 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1174 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1176 if (reg_idx == nmatch)
1178 re_node_set_free (&eps_via_nodes);
1179 return free_fail_stack_return (fs);
1181 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1186 re_node_set_free (&eps_via_nodes);
1191 /* Proceed to next node. */
1192 cur_node = proceed_next_node (preg, nmatch, pmatch, mctx, &idx, cur_node,
1193 &eps_via_nodes, fs);
1195 if (BE (cur_node < 0, 0))
1200 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1204 re_node_set_free (&eps_via_nodes);
1209 re_node_set_free (&eps_via_nodes);
1210 return free_fail_stack_return (fs);
1213 static reg_errcode_t
1214 free_fail_stack_return (fs)
1215 struct re_fail_stack_t *fs;
1220 for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1222 re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1223 re_free (fs->stack[fs_idx].regs);
1225 re_free (fs->stack);
1231 update_regs (dfa, pmatch, cur_node, cur_idx, nmatch)
1234 int cur_node, cur_idx, nmatch;
1236 int type = dfa->nodes[cur_node].type;
1238 if (type != OP_OPEN_SUBEXP && type != OP_CLOSE_SUBEXP)
1240 reg_num = dfa->nodes[cur_node].opr.idx + 1;
1241 if (reg_num >= nmatch)
1243 if (type == OP_OPEN_SUBEXP)
1245 /* We are at the first node of this sub expression. */
1246 pmatch[reg_num].rm_so = cur_idx;
1247 pmatch[reg_num].rm_eo = -1;
1249 else if (type == OP_CLOSE_SUBEXP)
1250 /* We are at the first node of this sub expression. */
1251 pmatch[reg_num].rm_eo = cur_idx;
1254 #define NUMBER_OF_STATE 1
1256 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1257 and sift the nodes in each states according to the following rules.
1258 Updated state_log will be wrote to STATE_LOG.
1260 Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1261 1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1262 If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1263 the LAST_NODE, we throw away the node `a'.
1264 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1265 string `s' and transit to `b':
1266 i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1268 ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1269 throwed away, we throw away the node `a'.
1270 3. When 0 <= STR_IDX < n and 'a' epsilon transit to 'b':
1271 i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1273 ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is throwed away,
1274 we throw away the node `a'. */
1276 #define STATE_NODE_CONTAINS(state,node) \
1277 ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1279 static reg_errcode_t
1280 sift_states_backward (preg, mctx, sctx)
1281 const regex_t *preg;
1282 re_match_context_t *mctx;
1283 re_sift_context_t *sctx;
1286 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1288 int str_idx = sctx->last_str_idx;
1289 re_node_set cur_dest;
1290 re_node_set *cur_src; /* Points the state_log[str_idx]->nodes */
1293 assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1295 cur_src = &mctx->state_log[str_idx]->nodes;
1297 /* Build sifted state_log[str_idx]. It has the nodes which can epsilon
1298 transit to the last_node and the last_node itself. */
1299 err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1300 if (BE (err != REG_NOERROR, 0))
1302 err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest);
1303 if (BE (err != REG_NOERROR, 0))
1306 /* Then check each states in the state_log. */
1310 /* Update counters. */
1311 null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1312 if (null_cnt > mctx->max_mb_elem_len)
1314 memset (sctx->sifted_states, '\0',
1315 sizeof (re_dfastate_t *) * str_idx);
1316 re_node_set_free (&cur_dest);
1319 re_node_set_empty (&cur_dest);
1321 cur_src = ((mctx->state_log[str_idx] == NULL) ? &empty_set
1322 : &mctx->state_log[str_idx]->nodes);
1324 /* Then build the next sifted state.
1325 We build the next sifted state on `cur_dest', and update
1326 `sifted_states[str_idx]' with `cur_dest'.
1328 `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1329 `cur_src' points the node_set of the old `state_log[str_idx]'. */
1330 for (i = 0; i < cur_src->nelem; i++)
1332 int prev_node = cur_src->elems[i];
1334 re_token_type_t type = dfa->nodes[prev_node].type;
1336 if (IS_EPSILON_NODE(type))
1338 #ifdef RE_ENABLE_I18N
1339 /* If the node may accept `multi byte'. */
1340 if (ACCEPT_MB_NODE (type))
1341 naccepted = sift_states_iter_mb (preg, mctx, sctx, prev_node,
1342 str_idx, sctx->last_str_idx);
1344 #endif /* RE_ENABLE_I18N */
1345 /* We don't check backreferences here.
1346 See update_cur_sifted_state(). */
1349 && check_node_accept (preg, dfa->nodes + prev_node, mctx,
1351 && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1352 dfa->nexts[prev_node]))
1358 if (sctx->limits.nelem)
1360 int to_idx = str_idx + naccepted;
1361 if (check_dst_limits (dfa, &sctx->limits, mctx,
1362 dfa->nexts[prev_node], to_idx,
1363 prev_node, str_idx))
1366 ret = re_node_set_insert (&cur_dest, prev_node);
1367 if (BE (ret == -1, 0))
1371 /* Add all the nodes which satisfy the following conditions:
1372 - It can epsilon transit to a node in CUR_DEST.
1374 And update state_log. */
1375 err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest);
1376 if (BE (err != REG_NOERROR, 0))
1380 re_node_set_free (&cur_dest);
1384 /* Helper functions. */
1386 static inline reg_errcode_t
1387 clean_state_log_if_need (mctx, next_state_log_idx)
1388 re_match_context_t *mctx;
1389 int next_state_log_idx;
1391 int top = mctx->state_log_top;
1393 if (next_state_log_idx >= mctx->input->bufs_len
1394 || (next_state_log_idx >= mctx->input->valid_len
1395 && mctx->input->valid_len < mctx->input->len))
1398 err = extend_buffers (mctx);
1399 if (BE (err != REG_NOERROR, 0))
1403 if (top < next_state_log_idx)
1405 memset (mctx->state_log + top + 1, '\0',
1406 sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1407 mctx->state_log_top = next_state_log_idx;
1412 static reg_errcode_t merge_state_array (dfa, dst, src, num)
1414 re_dfastate_t **dst;
1415 re_dfastate_t **src;
1420 for (st_idx = 0; st_idx < num; ++st_idx)
1422 if (dst[st_idx] == NULL)
1423 dst[st_idx] = src[st_idx];
1424 else if (src[st_idx] != NULL)
1426 re_node_set merged_set;
1427 err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1428 &src[st_idx]->nodes);
1429 if (BE (err != REG_NOERROR, 0))
1431 dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1432 if (BE (err != REG_NOERROR, 0))
1434 re_node_set_free (&merged_set);
1440 static reg_errcode_t
1441 update_cur_sifted_state (preg, mctx, sctx, str_idx, dest_nodes)
1442 const regex_t *preg;
1443 re_match_context_t *mctx;
1444 re_sift_context_t *sctx;
1446 re_node_set *dest_nodes;
1449 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1450 const re_node_set *candidates;
1451 candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
1452 : &mctx->state_log[str_idx]->nodes);
1454 /* At first, add the nodes which can epsilon transit to a node in
1456 if (dest_nodes->nelem)
1458 err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1459 if (BE (err != REG_NOERROR, 0))
1463 /* Then, check the limitations in the current sift_context. */
1464 if (dest_nodes->nelem && sctx->limits.nelem)
1466 err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1467 mctx->bkref_ents, str_idx);
1468 if (BE (err != REG_NOERROR, 0))
1472 /* Update state_log. */
1473 sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1474 if (BE (sctx->sifted_states[str_idx] == NULL && err != REG_NOERROR, 0))
1477 /* If we are searching for the subexpression candidates.
1478 Note that we were from transit_state_bkref_loop() in this case. */
1479 if (dest_nodes->nelem && sctx->check_subexp)
1481 err = search_subexp (preg, mctx, sctx, str_idx, dest_nodes);
1482 if (BE (err != REG_NOERROR, 0))
1486 if ((mctx->state_log[str_idx] != NULL
1487 && mctx->state_log[str_idx]->has_backref))
1489 err = sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes);
1490 if (BE (err != REG_NOERROR, 0))
1496 static reg_errcode_t
1497 add_epsilon_src_nodes (dfa, dest_nodes, candidates)
1499 re_node_set *dest_nodes;
1500 const re_node_set *candidates;
1504 re_node_set src_copy;
1506 err = re_node_set_init_copy (&src_copy, dest_nodes);
1507 if (BE (err != REG_NOERROR, 0))
1509 for (src_idx = 0; src_idx < src_copy.nelem; ++src_idx)
1511 err = re_node_set_add_intersect (dest_nodes, candidates,
1513 + src_copy.elems[src_idx]);
1514 if (BE (err != REG_NOERROR, 0))
1517 re_node_set_free (&src_copy);
1521 static reg_errcode_t
1522 sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates)
1525 re_node_set *dest_nodes;
1526 const re_node_set *candidates;
1530 re_node_set *inv_eclosure = dfa->inveclosures + node;
1531 re_node_set except_nodes;
1532 re_node_set_init_empty (&except_nodes);
1533 for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1535 int cur_node = inv_eclosure->elems[ecl_idx];
1536 if (cur_node == node)
1538 if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1540 int edst1 = dfa->edests[cur_node].elems[0];
1541 int edst2 = ((dfa->edests[cur_node].nelem > 1)
1542 ? dfa->edests[cur_node].elems[1] : -1);
1543 if ((!re_node_set_contains (inv_eclosure, edst1)
1544 && re_node_set_contains (dest_nodes, edst1))
1546 && !re_node_set_contains (inv_eclosure, edst2)
1547 && re_node_set_contains (dest_nodes, edst2)))
1549 err = re_node_set_add_intersect (&except_nodes, candidates,
1550 dfa->inveclosures + cur_node);
1551 if (BE (err != REG_NOERROR, 0))
1556 for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1558 int cur_node = inv_eclosure->elems[ecl_idx];
1559 if (!re_node_set_contains (&except_nodes, cur_node))
1561 int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1562 re_node_set_remove_at (dest_nodes, idx);
1565 re_node_set_free (&except_nodes);
1570 check_dst_limits (dfa, limits, mctx, dst_node, dst_idx, src_node, src_idx)
1572 re_node_set *limits;
1573 re_match_context_t *mctx;
1574 int dst_node, dst_idx, src_node, src_idx;
1576 int lim_idx, src_pos, dst_pos;
1578 for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1581 struct re_backref_cache_entry *ent;
1582 ent = mctx->bkref_ents + limits->elems[lim_idx];
1583 subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
1585 dst_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx],
1586 dfa->eclosures + dst_node,
1587 subexp_idx, dst_node, dst_idx);
1588 src_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx],
1589 dfa->eclosures + src_node,
1590 subexp_idx, src_node, src_idx);
1593 <src> <dst> ( <subexp> )
1594 ( <subexp> ) <src> <dst>
1595 ( <subexp1> <src> <subexp2> <dst> <subexp3> ) */
1596 if (src_pos == dst_pos)
1597 continue; /* This is unrelated limitation. */
1605 check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, node,
1608 re_match_context_t *mctx;
1609 re_node_set *eclosures;
1610 int limit, subexp_idx, node, str_idx;
1612 struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
1613 int pos = (str_idx < lim->subexp_from ? -1
1614 : (lim->subexp_to < str_idx ? 1 : 0));
1616 && (str_idx == lim->subexp_from || str_idx == lim->subexp_to))
1619 for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1621 int node = eclosures->elems[node_idx];
1622 re_token_type_t type= dfa->nodes[node].type;
1623 if (type == OP_BACK_REF)
1626 for (bi = 0; bi < mctx->nbkref_ents; ++bi)
1628 struct re_backref_cache_entry *ent = mctx->bkref_ents + bi;
1629 if (ent->node == node && ent->subexp_from == ent->subexp_to
1630 && ent->str_idx == str_idx)
1633 dst = dfa->edests[node].elems[0];
1634 cpos = check_dst_limits_calc_pos (dfa, mctx, limit,
1635 dfa->eclosures + dst,
1638 if ((str_idx == lim->subexp_from && cpos == -1)
1639 || (str_idx == lim->subexp_to && cpos == 0))
1644 if (type == OP_OPEN_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx
1645 && str_idx == lim->subexp_from)
1650 if (type == OP_CLOSE_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx
1651 && str_idx == lim->subexp_to)
1654 if (node_idx == eclosures->nelem && str_idx == lim->subexp_to)
1660 /* Check the limitations of sub expressions LIMITS, and remove the nodes
1661 which are against limitations from DEST_NODES. */
1663 static reg_errcode_t
1664 check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx)
1666 re_node_set *dest_nodes;
1667 const re_node_set *candidates;
1668 re_node_set *limits;
1669 struct re_backref_cache_entry *bkref_ents;
1673 int node_idx, lim_idx;
1675 for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1678 struct re_backref_cache_entry *ent;
1679 ent = bkref_ents + limits->elems[lim_idx];
1681 if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
1682 continue; /* This is unrelated limitation. */
1684 subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
1685 if (ent->subexp_to == str_idx)
1689 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
1691 int node = dest_nodes->elems[node_idx];
1692 re_token_type_t type= dfa->nodes[node].type;
1693 if (type == OP_OPEN_SUBEXP
1694 && subexp_idx == dfa->nodes[node].opr.idx)
1696 else if (type == OP_CLOSE_SUBEXP
1697 && subexp_idx == dfa->nodes[node].opr.idx)
1701 /* Check the limitation of the open subexpression. */
1702 /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */
1705 err = sub_epsilon_src_nodes(dfa, ops_node, dest_nodes,
1707 if (BE (err != REG_NOERROR, 0))
1710 /* Check the limitation of the close subexpression. */
1711 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
1713 int node = dest_nodes->elems[node_idx];
1714 if (!re_node_set_contains (dfa->inveclosures + node, cls_node)
1715 && !re_node_set_contains (dfa->eclosures + node, cls_node))
1717 /* It is against this limitation.
1718 Remove it form the current sifted state. */
1719 err = sub_epsilon_src_nodes(dfa, node, dest_nodes,
1721 if (BE (err != REG_NOERROR, 0))
1727 else /* (ent->subexp_to != str_idx) */
1729 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
1731 int node = dest_nodes->elems[node_idx];
1732 re_token_type_t type= dfa->nodes[node].type;
1733 if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
1735 if (subexp_idx != dfa->nodes[node].opr.idx)
1737 if ((type == OP_CLOSE_SUBEXP && ent->subexp_to != str_idx)
1738 || (type == OP_OPEN_SUBEXP))
1740 /* It is against this limitation.
1741 Remove it form the current sifted state. */
1742 err = sub_epsilon_src_nodes(dfa, node, dest_nodes,
1744 if (BE (err != REG_NOERROR, 0))
1754 /* Search for the top (in case of sctx->check_subexp < 0) or the
1755 bottom (in case of sctx->check_subexp > 0) of the subexpressions
1756 which the backreference sctx->cur_bkref can match. */
1758 static reg_errcode_t
1759 search_subexp (preg, mctx, sctx, str_idx, dest_nodes)
1760 const regex_t *preg;
1761 re_match_context_t *mctx;
1762 re_sift_context_t *sctx;
1764 re_node_set *dest_nodes;
1767 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1768 re_sift_context_t local_sctx;
1770 const re_node_set *candidates;
1771 re_dfastate_t **lim_states = NULL;
1772 candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
1773 : &mctx->state_log[str_idx]->nodes);
1774 local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */
1776 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
1778 re_token_type_t type;
1779 node = dest_nodes->elems[node_idx];
1780 type = dfa->nodes[node].type;
1782 if (type == OP_CLOSE_SUBEXP
1783 && sctx->check_subexp == dfa->nodes[node].opr.idx + 1)
1785 re_dfastate_t *cur_state;
1786 /* Found the bottom of the subexpression, then search for the
1788 if (local_sctx.sifted_states == NULL)
1790 /* It hasn't been initialized yet, initialize it now. */
1792 err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
1793 if (BE (err != REG_NOERROR, 0))
1796 local_sctx.check_subexp = -sctx->check_subexp;
1797 local_sctx.limited_states = sctx->limited_states;
1798 local_sctx.last_node = node;
1799 local_sctx.last_str_idx = local_sctx.cls_subexp_idx = str_idx;
1800 cur_state = local_sctx.sifted_states[str_idx];
1801 err = sift_states_backward (preg, mctx, &local_sctx);
1802 local_sctx.sifted_states[str_idx] = cur_state;
1803 if (BE (err != REG_NOERROR, 0))
1805 /* There must not 2 same node in a node set. */
1808 else if (type == OP_OPEN_SUBEXP
1809 && -sctx->check_subexp == dfa->nodes[node].opr.idx + 1)
1811 /* Found the top of the subexpression, check that the
1812 backreference can match the input string. */
1815 int bkref_str_idx = re_string_cur_idx (mctx->input);
1816 int subexp_len = sctx->cls_subexp_idx - str_idx;
1817 if (subexp_len < 0 || bkref_str_idx + subexp_len > mctx->input->len)
1820 if (bkref_str_idx + subexp_len > mctx->input->valid_len
1821 && mctx->input->valid_len < mctx->input->len)
1824 err = extend_buffers (mctx);
1825 if (BE (err != REG_NOERROR, 0))
1828 buf = (char *) re_string_get_buffer (mctx->input);
1829 if (strncmp (buf + str_idx, buf + bkref_str_idx, subexp_len) != 0)
1832 if (sctx->limits.nelem && str_idx > 0)
1834 re_dfastate_t *cur_state = sctx->sifted_states[str_idx];
1835 if (lim_states == NULL)
1837 lim_states = re_malloc (re_dfastate_t *, str_idx + 1);
1839 if (local_sctx.sifted_states == NULL)
1841 /* It hasn't been initialized yet, initialize it now. */
1843 if (BE (lim_states == NULL, 0))
1845 err = re_node_set_init_copy (&local_sctx.limits,
1847 if (BE (err != REG_NOERROR, 0))
1850 local_sctx.check_subexp = 0;
1851 local_sctx.last_node = node;
1852 local_sctx.last_str_idx = str_idx;
1853 local_sctx.limited_states = lim_states;
1854 memset (lim_states, '\0',
1855 sizeof (re_dfastate_t*) * (str_idx + 1));
1856 err = sift_states_backward (preg, mctx, &local_sctx);
1857 if (BE (err != REG_NOERROR, 0))
1859 if (local_sctx.sifted_states[0] == NULL
1860 && local_sctx.limited_states[0] == NULL)
1862 sctx->sifted_states[str_idx] = cur_state;
1865 sctx->sifted_states[str_idx] = cur_state;
1867 /* Successfully matched, add a new cache entry. */
1868 dest_str_idx = bkref_str_idx + subexp_len;
1869 err = match_ctx_add_entry (mctx, sctx->cur_bkref, bkref_str_idx,
1870 str_idx, sctx->cls_subexp_idx);
1871 if (BE (err != REG_NOERROR, 0))
1873 err = clean_state_log_if_need (mctx, dest_str_idx);
1874 if (BE (err != REG_NOERROR, 0))
1880 /* Remove the top/bottom of the sub expression we processed. */
1881 if (node_idx < dest_nodes->nelem)
1883 err = sub_epsilon_src_nodes(dfa, node, dest_nodes, candidates);
1884 if (BE (err != REG_NOERROR, 0))
1886 /* Update state_log. */
1887 sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1888 if (BE (err != REG_NOERROR, 0))
1892 if (local_sctx.sifted_states != NULL)
1893 re_node_set_free (&local_sctx.limits);
1894 if (lim_states != NULL)
1895 re_free (lim_states);
1899 static reg_errcode_t
1900 sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes)
1901 const regex_t *preg;
1902 re_match_context_t *mctx;
1903 re_sift_context_t *sctx;
1905 re_node_set *dest_nodes;
1908 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1910 re_sift_context_t local_sctx;
1911 const re_node_set *candidates;
1912 candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
1913 : &mctx->state_log[str_idx]->nodes);
1914 local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */
1916 for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
1918 int cur_bkref_idx = re_string_cur_idx (mctx->input);
1919 re_token_type_t type;
1920 node = candidates->elems[node_idx];
1921 type = dfa->nodes[node].type;
1922 if (node == sctx->cur_bkref && str_idx == cur_bkref_idx)
1924 /* Avoid infinite loop for the REs like "()\1+". */
1925 if (node == sctx->last_node && str_idx == sctx->last_str_idx)
1927 if (type == OP_BACK_REF)
1930 for (enabled_idx = 0; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
1932 int disabled_idx, subexp_len, to_idx, dst_node;
1933 struct re_backref_cache_entry *entry;
1934 entry = mctx->bkref_ents + enabled_idx;
1935 subexp_len = entry->subexp_to - entry->subexp_from;
1936 to_idx = str_idx + subexp_len;
1937 dst_node = (subexp_len ? dfa->nexts[node]
1938 : dfa->edests[node].elems[0]);
1940 if (entry->node != node || entry->str_idx != str_idx
1941 || to_idx > sctx->last_str_idx
1942 || sctx->sifted_states[to_idx] == NULL)
1944 if (!STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node))
1947 if (check_dst_limits (dfa, &sctx->limits, mctx, node,
1948 str_idx, dst_node, to_idx))
1950 if (sctx->check_subexp == dfa->nodes[node].opr.idx)
1953 buf = (char *) re_string_get_buffer (mctx->input);
1954 if (strncmp (buf + entry->subexp_from,
1955 buf + cur_bkref_idx, subexp_len) != 0)
1957 err = match_ctx_add_entry (mctx, sctx->cur_bkref,
1958 cur_bkref_idx, entry->subexp_from,
1960 if (BE (err != REG_NOERROR, 0))
1962 err = clean_state_log_if_need (mctx, cur_bkref_idx
1964 if (BE (err != REG_NOERROR, 0))
1969 re_dfastate_t *cur_state;
1971 for (disabled_idx = enabled_idx + 1;
1972 disabled_idx < mctx->nbkref_ents; ++disabled_idx)
1974 struct re_backref_cache_entry *entry2;
1975 entry2 = mctx->bkref_ents + disabled_idx;
1976 if (entry2->node != node || entry2->str_idx != str_idx)
1981 if (local_sctx.sifted_states == NULL)
1984 err = re_node_set_init_copy (&local_sctx.limits,
1986 if (BE (err != REG_NOERROR, 0))
1989 local_sctx.last_node = node;
1990 local_sctx.last_str_idx = str_idx;
1991 err = re_node_set_insert (&local_sctx.limits, enabled_idx);
1992 if (BE (err < 0, 0))
1994 cur_state = local_sctx.sifted_states[str_idx];
1995 err = sift_states_backward (preg, mctx, &local_sctx);
1996 if (BE (err != REG_NOERROR, 0))
1998 if (sctx->limited_states != NULL)
2000 err = merge_state_array (dfa, sctx->limited_states,
2001 local_sctx.sifted_states,
2003 if (BE (err != REG_NOERROR, 0))
2006 local_sctx.sifted_states[str_idx] = cur_state;
2007 re_node_set_remove (&local_sctx.limits, enabled_idx);
2008 /* We must not use the variable entry here, since
2009 mctx->bkref_ents might be realloced. */
2010 mctx->bkref_ents[enabled_idx].flag = 1;
2013 for (enabled_idx = 0; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
2015 struct re_backref_cache_entry *entry;
2016 entry = mctx->bkref_ents + enabled_idx;
2017 if (entry->node == node && entry->str_idx == str_idx)
2022 if (local_sctx.sifted_states != NULL)
2024 re_node_set_free (&local_sctx.limits);
2031 #ifdef RE_ENABLE_I18N
2033 sift_states_iter_mb (preg, mctx, sctx, node_idx, str_idx, max_str_idx)
2034 const regex_t *preg;
2035 const re_match_context_t *mctx;
2036 re_sift_context_t *sctx;
2037 int node_idx, str_idx, max_str_idx;
2039 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2041 /* Check the node can accept `multi byte'. */
2042 naccepted = check_node_accept_bytes (preg, node_idx, mctx->input, str_idx);
2043 if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2044 !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2045 dfa->nexts[node_idx]))
2046 /* The node can't accept the `multi byte', or the
2047 destination was already throwed away, then the node
2048 could't accept the current input `multi byte'. */
2050 /* Otherwise, it is sure that the node could accept
2051 `naccepted' bytes input. */
2054 #endif /* RE_ENABLE_I18N */
2057 /* Functions for state transition. */
2059 /* Return the next state to which the current state STATE will transit by
2060 accepting the current input byte, and update STATE_LOG if necessary.
2061 If STATE can accept a multibyte char/collating element/back reference
2062 update the destination of STATE_LOG. */
2064 static re_dfastate_t *
2065 transit_state (err, preg, mctx, state, fl_search)
2067 const regex_t *preg;
2068 re_match_context_t *mctx;
2069 re_dfastate_t *state;
2072 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2073 re_dfastate_t **trtable, *next_state;
2076 if (re_string_cur_idx (mctx->input) + 1 >= mctx->input->bufs_len
2077 || (re_string_cur_idx (mctx->input) + 1 >= mctx->input->valid_len
2078 && mctx->input->valid_len < mctx->input->len))
2080 *err = extend_buffers (mctx);
2081 if (BE (*err != REG_NOERROR, 0))
2089 re_string_skip_bytes (mctx->input, 1);
2093 #ifdef RE_ENABLE_I18N
2094 /* If the current state can accept multibyte. */
2095 if (state->accept_mb)
2097 *err = transit_state_mb (preg, state, mctx);
2098 if (BE (*err != REG_NOERROR, 0))
2101 #endif /* RE_ENABLE_I18N */
2103 /* Then decide the next state with the single byte. */
2106 /* Use transition table */
2107 ch = re_string_fetch_byte (mctx->input);
2108 trtable = fl_search ? state->trtable_search : state->trtable;
2109 if (trtable == NULL)
2111 trtable = build_trtable (preg, state, fl_search);
2113 state->trtable_search = trtable;
2115 state->trtable = trtable;
2117 next_state = trtable[ch];
2121 /* don't use transition table */
2122 next_state = transit_state_sb (err, preg, state, fl_search, mctx);
2123 if (BE (next_state == NULL && err != REG_NOERROR, 0))
2128 /* Update the state_log if we need. */
2129 if (mctx->state_log != NULL)
2131 int cur_idx = re_string_cur_idx (mctx->input);
2132 if (cur_idx > mctx->state_log_top)
2134 mctx->state_log[cur_idx] = next_state;
2135 mctx->state_log_top = cur_idx;
2137 else if (mctx->state_log[cur_idx] == 0)
2139 mctx->state_log[cur_idx] = next_state;
2143 re_dfastate_t *pstate;
2144 unsigned int context;
2145 re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2146 /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2147 the destination of a multibyte char/collating element/
2148 back reference. Then the next state is the union set of
2149 these destinations and the results of the transition table. */
2150 pstate = mctx->state_log[cur_idx];
2151 log_nodes = pstate->entrance_nodes;
2152 if (next_state != NULL)
2154 table_nodes = next_state->entrance_nodes;
2155 *err = re_node_set_init_union (&next_nodes, table_nodes,
2157 if (BE (*err != REG_NOERROR, 0))
2161 next_nodes = *log_nodes;
2162 /* Note: We already add the nodes of the initial state,
2163 then we don't need to add them here. */
2165 context = re_string_context_at (mctx->input,
2166 re_string_cur_idx (mctx->input) - 1,
2167 mctx->eflags, preg->newline_anchor);
2168 next_state = mctx->state_log[cur_idx]
2169 = re_acquire_state_context (err, dfa, &next_nodes, context);
2170 /* We don't need to check errors here, since the return value of
2171 this function is next_state and ERR is already set. */
2173 if (table_nodes != NULL)
2174 re_node_set_free (&next_nodes);
2176 /* If the next state has back references. */
2177 if (next_state != NULL && next_state->has_backref)
2179 *err = transit_state_bkref (preg, next_state, mctx);
2180 if (BE (*err != REG_NOERROR, 0))
2182 next_state = mctx->state_log[cur_idx];
2188 /* Helper functions for transit_state. */
2190 /* Return the next state to which the current state STATE will transit by
2191 accepting the current input byte. */
2193 static re_dfastate_t *
2194 transit_state_sb (err, preg, state, fl_search, mctx)
2196 const regex_t *preg;
2197 re_dfastate_t *state;
2199 re_match_context_t *mctx;
2201 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2202 re_node_set next_nodes;
2203 re_dfastate_t *next_state;
2204 int node_cnt, cur_str_idx = re_string_cur_idx (mctx->input);
2205 unsigned int context;
2207 *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2208 if (BE (*err != REG_NOERROR, 0))
2210 for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2212 int cur_node = state->nodes.elems[node_cnt];
2213 if (check_node_accept (preg, dfa->nodes + cur_node, mctx, cur_str_idx))
2215 *err = re_node_set_merge (&next_nodes,
2216 dfa->eclosures + dfa->nexts[cur_node]);
2217 if (BE (*err != REG_NOERROR, 0))
2223 #ifdef RE_ENABLE_I18N
2224 int not_initial = 0;
2226 for (node_cnt = 0; node_cnt < next_nodes.nelem; ++node_cnt)
2227 if (dfa->nodes[next_nodes.elems[node_cnt]].type == CHARACTER)
2229 not_initial = dfa->nodes[next_nodes.elems[node_cnt]].mb_partial;
2235 *err = re_node_set_merge (&next_nodes,
2236 dfa->init_state->entrance_nodes);
2237 if (BE (*err != REG_NOERROR, 0))
2241 context = re_string_context_at (mctx->input, cur_str_idx, mctx->eflags,
2242 preg->newline_anchor);
2243 next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2244 /* We don't need to check errors here, since the return value of
2245 this function is next_state and ERR is already set. */
2247 re_node_set_free (&next_nodes);
2248 re_string_skip_bytes (mctx->input, 1);
2252 #ifdef RE_ENABLE_I18N
2253 static reg_errcode_t
2254 transit_state_mb (preg, pstate, mctx)
2255 const regex_t *preg;
2256 re_dfastate_t *pstate;
2257 re_match_context_t *mctx;
2260 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2263 for (i = 0; i < pstate->nodes.nelem; ++i)
2265 re_node_set dest_nodes, *new_nodes;
2266 int cur_node_idx = pstate->nodes.elems[i];
2267 int naccepted = 0, dest_idx;
2268 unsigned int context;
2269 re_dfastate_t *dest_state;
2271 if (dfa->nodes[cur_node_idx].constraint)
2273 context = re_string_context_at (mctx->input,
2274 re_string_cur_idx (mctx->input),
2275 mctx->eflags, preg->newline_anchor);
2276 if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2281 /* How many bytes the node can accepts? */
2282 if (ACCEPT_MB_NODE (dfa->nodes[cur_node_idx].type))
2283 naccepted = check_node_accept_bytes (preg, cur_node_idx, mctx->input,
2284 re_string_cur_idx (mctx->input));
2288 /* The node can accepts `naccepted' bytes. */
2289 dest_idx = re_string_cur_idx (mctx->input) + naccepted;
2290 mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2291 : mctx->max_mb_elem_len);
2292 err = clean_state_log_if_need (mctx, dest_idx);
2293 if (BE (err != REG_NOERROR, 0))
2296 assert (dfa->nexts[cur_node_idx] != -1);
2298 /* `cur_node_idx' may point the entity of the OP_CONTEXT_NODE,
2299 then we use pstate->nodes.elems[i] instead. */
2300 new_nodes = dfa->eclosures + dfa->nexts[pstate->nodes.elems[i]];
2302 dest_state = mctx->state_log[dest_idx];
2303 if (dest_state == NULL)
2304 dest_nodes = *new_nodes;
2307 err = re_node_set_init_union (&dest_nodes,
2308 dest_state->entrance_nodes, new_nodes);
2309 if (BE (err != REG_NOERROR, 0))
2312 context = re_string_context_at (mctx->input, dest_idx - 1, mctx->eflags,
2313 preg->newline_anchor);
2314 mctx->state_log[dest_idx]
2315 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2316 if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2318 if (dest_state != NULL)
2319 re_node_set_free (&dest_nodes);
2323 #endif /* RE_ENABLE_I18N */
2325 static reg_errcode_t
2326 transit_state_bkref (preg, pstate, mctx)
2327 const regex_t *preg;
2328 re_dfastate_t *pstate;
2329 re_match_context_t *mctx;
2332 re_dfastate_t **work_state_log;
2334 work_state_log = re_malloc (re_dfastate_t *,
2335 re_string_cur_idx (mctx->input) + 1);
2336 if (BE (work_state_log == NULL, 0))
2339 err = transit_state_bkref_loop (preg, &pstate->nodes, work_state_log, mctx);
2340 re_free (work_state_log);
2344 /* Caller must allocate `work_state_log'. */
2346 static reg_errcode_t
2347 transit_state_bkref_loop (preg, nodes, work_state_log, mctx)
2348 const regex_t *preg;
2350 re_dfastate_t **work_state_log;
2351 re_match_context_t *mctx;
2354 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2356 regmatch_t *cur_regs = re_malloc (regmatch_t, preg->re_nsub + 1);
2357 int cur_str_idx = re_string_cur_idx (mctx->input);
2358 if (BE (cur_regs == NULL, 0))
2361 for (i = 0; i < nodes->nelem; ++i)
2363 int dest_str_idx, subexp_idx, prev_nelem, bkc_idx;
2364 int node_idx = nodes->elems[i];
2365 unsigned int context;
2366 re_token_t *node = dfa->nodes + node_idx;
2367 re_node_set *new_dest_nodes;
2368 re_sift_context_t sctx;
2370 /* Check whether `node' is a backreference or not. */
2371 if (node->type == OP_BACK_REF)
2372 subexp_idx = node->opr.idx;
2376 if (node->constraint)
2378 context = re_string_context_at (mctx->input, cur_str_idx,
2379 mctx->eflags, preg->newline_anchor);
2380 if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2384 /* `node' is a backreference.
2385 Check the substring which the substring matched. */
2386 sift_ctx_init (&sctx, work_state_log, NULL, node_idx, cur_str_idx,
2388 sctx.cur_bkref = node_idx;
2389 match_ctx_clear_flag (mctx);
2390 err = sift_states_backward (preg, mctx, &sctx);
2391 if (BE (err != REG_NOERROR, 0))
2394 /* And add the epsilon closures (which is `new_dest_nodes') of
2395 the backreference to appropriate state_log. */
2397 assert (dfa->nexts[node_idx] != -1);
2399 for (bkc_idx = 0; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2402 re_dfastate_t *dest_state;
2403 struct re_backref_cache_entry *bkref_ent;
2404 bkref_ent = mctx->bkref_ents + bkc_idx;
2405 if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2407 subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2408 new_dest_nodes = (subexp_len == 0
2409 ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2410 : dfa->eclosures + dfa->nexts[node_idx]);
2411 dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2412 - bkref_ent->subexp_from);
2413 context = (IS_WORD_CHAR (re_string_byte_at (mctx->input,
2415 ? CONTEXT_WORD : 0);
2416 dest_state = mctx->state_log[dest_str_idx];
2417 prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2418 : mctx->state_log[cur_str_idx]->nodes.nelem);
2419 /* Add `new_dest_node' to state_log. */
2420 if (dest_state == NULL)
2422 mctx->state_log[dest_str_idx]
2423 = re_acquire_state_context (&err, dfa, new_dest_nodes,
2425 if (BE (mctx->state_log[dest_str_idx] == NULL
2426 && err != REG_NOERROR, 0))
2431 re_node_set dest_nodes;
2432 err = re_node_set_init_union (&dest_nodes,
2433 dest_state->entrance_nodes,
2435 if (BE (err != REG_NOERROR, 0))
2437 mctx->state_log[dest_str_idx]
2438 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2439 if (BE (mctx->state_log[dest_str_idx] == NULL
2440 && err != REG_NOERROR, 0))
2442 re_node_set_free (&dest_nodes);
2444 /* We need to check recursively if the backreference can epsilon
2447 && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2449 err = transit_state_bkref_loop (preg, new_dest_nodes,
2450 work_state_log, mctx);
2451 if (BE (err != REG_NOERROR, 0))
2460 /* Build transition table for the state.
2461 Return the new table if succeeded, otherwise return NULL. */
2463 static re_dfastate_t **
2464 build_trtable (preg, state, fl_search)
2465 const regex_t *preg;
2466 const re_dfastate_t *state;
2470 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2472 int dests_node_malloced = 0, dest_states_malloced = 0;
2473 int ndests; /* Number of the destination states from `state'. */
2474 re_dfastate_t **trtable;
2475 re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
2476 re_node_set follows, *dests_node;
2480 /* We build DFA states which corresponds to the destination nodes
2481 from `state'. `dests_node[i]' represents the nodes which i-th
2482 destination state contains, and `dests_ch[i]' represents the
2483 characters which i-th destination state accepts. */
2485 if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX))
2486 dests_node = (re_node_set *)
2487 alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
2491 dests_node = (re_node_set *)
2492 malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
2493 if (BE (dests_node == NULL, 0))
2495 dests_node_malloced = 1;
2497 dests_ch = (bitset *) (dests_node + SBC_MAX);
2499 /* Initialize transiton table. */
2500 trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
2501 if (BE (trtable == NULL, 0))
2503 if (dests_node_malloced)
2508 /* At first, group all nodes belonging to `state' into several
2510 ndests = group_nodes_into_DFAstates (preg, state, dests_node, dests_ch);
2511 if (BE (ndests <= 0, 0))
2513 if (dests_node_malloced)
2515 /* Return NULL in case of an error, trtable otherwise. */
2522 err = re_node_set_alloc (&follows, ndests + 1);
2523 if (BE (err != REG_NOERROR, 0))
2527 if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX
2528 + ndests * 3 * sizeof (re_dfastate_t *)))
2529 dest_states = (re_dfastate_t **)
2530 alloca (ndests * 3 * sizeof (re_dfastate_t *));
2534 dest_states = (re_dfastate_t **)
2535 malloc (ndests * 3 * sizeof (re_dfastate_t *));
2536 if (BE (dest_states == NULL, 0))
2539 if (dest_states_malloced)
2541 re_node_set_free (&follows);
2542 for (i = 0; i < ndests; ++i)
2543 re_node_set_free (dests_node + i);
2545 if (dests_node_malloced)
2549 dest_states_malloced = 1;
2551 dest_states_word = dest_states + ndests;
2552 dest_states_nl = dest_states_word + ndests;
2553 bitset_empty (acceptable);
2555 /* Then build the states for all destinations. */
2556 for (i = 0; i < ndests; ++i)
2559 re_node_set_empty (&follows);
2560 /* Merge the follows of this destination states. */
2561 for (j = 0; j < dests_node[i].nelem; ++j)
2563 next_node = dfa->nexts[dests_node[i].elems[j]];
2564 if (next_node != -1)
2566 err = re_node_set_merge (&follows, dfa->eclosures + next_node);
2567 if (BE (err != REG_NOERROR, 0))
2571 /* If search flag is set, merge the initial state. */
2574 #ifdef RE_ENABLE_I18N
2575 int not_initial = 0;
2576 for (j = 0; j < follows.nelem; ++j)
2577 if (dfa->nodes[follows.elems[j]].type == CHARACTER)
2579 not_initial = dfa->nodes[follows.elems[j]].mb_partial;
2585 err = re_node_set_merge (&follows,
2586 dfa->init_state->entrance_nodes);
2587 if (BE (err != REG_NOERROR, 0))
2591 dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
2592 if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
2594 /* If the new state has context constraint,
2595 build appropriate states for these contexts. */
2596 if (dest_states[i]->has_constraint)
2598 dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
2600 if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
2602 dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
2604 if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
2609 dest_states_word[i] = dest_states[i];
2610 dest_states_nl[i] = dest_states[i];
2612 bitset_merge (acceptable, dests_ch[i]);
2615 /* Update the transition table. */
2616 /* For all characters ch...: */
2617 for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
2618 for (j = 0; j < UINT_BITS; ++j, ++ch)
2619 if ((acceptable[i] >> j) & 1)
2621 /* The current state accepts the character ch. */
2622 if (IS_WORD_CHAR (ch))
2624 for (k = 0; k < ndests; ++k)
2625 if ((dests_ch[k][i] >> j) & 1)
2627 /* k-th destination accepts the word character ch. */
2628 trtable[ch] = dest_states_word[k];
2629 /* There must be only one destination which accepts
2630 character ch. See group_nodes_into_DFAstates. */
2634 else /* not WORD_CHAR */
2636 for (k = 0; k < ndests; ++k)
2637 if ((dests_ch[k][i] >> j) & 1)
2639 /* k-th destination accepts the non-word character ch. */
2640 trtable[ch] = dest_states[k];
2641 /* There must be only one destination which accepts
2642 character ch. See group_nodes_into_DFAstates. */
2648 if (bitset_contain (acceptable, NEWLINE_CHAR))
2650 /* The current state accepts newline character. */
2651 for (k = 0; k < ndests; ++k)
2652 if (bitset_contain (dests_ch[k], NEWLINE_CHAR))
2654 /* k-th destination accepts newline character. */
2655 trtable[NEWLINE_CHAR] = dest_states_nl[k];
2656 /* There must be only one destination which accepts
2657 newline. See group_nodes_into_DFAstates. */
2662 if (dest_states_malloced)
2665 re_node_set_free (&follows);
2666 for (i = 0; i < ndests; ++i)
2667 re_node_set_free (dests_node + i);
2669 if (dests_node_malloced)
2675 /* Group all nodes belonging to STATE into several destinations.
2676 Then for all destinations, set the nodes belonging to the destination
2677 to DESTS_NODE[i] and set the characters accepted by the destination
2678 to DEST_CH[i]. This function return the number of destinations. */
2681 group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
2682 const regex_t *preg;
2683 const re_dfastate_t *state;
2684 re_node_set *dests_node;
2688 const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2690 int ndests; /* Number of the destinations from `state'. */
2691 bitset accepts; /* Characters a node can accept. */
2692 const re_node_set *cur_nodes = &state->nodes;
2693 bitset_empty (accepts);
2696 /* For all the nodes belonging to `state', */
2697 for (i = 0; i < cur_nodes->nelem; ++i)
2699 re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
2700 re_token_type_t type = node->type;
2701 unsigned int constraint = node->constraint;
2703 /* Enumerate all single byte character this node can accept. */
2704 if (type == CHARACTER)
2705 bitset_set (accepts, node->opr.c);
2706 else if (type == SIMPLE_BRACKET)
2708 bitset_merge (accepts, node->opr.sbcset);
2710 else if (type == OP_PERIOD)
2712 bitset_set_all (accepts);
2713 if (!(preg->syntax & RE_DOT_NEWLINE))
2714 bitset_clear (accepts, '\n');
2715 if (preg->syntax & RE_DOT_NOT_NULL)
2716 bitset_clear (accepts, '\0');
2721 /* Check the `accepts' and sift the characters which are not
2722 match it the context. */
2725 if (constraint & NEXT_WORD_CONSTRAINT)
2726 for (j = 0; j < BITSET_UINTS; ++j)
2727 accepts[j] &= dfa->word_char[j];
2728 if (constraint & NEXT_NOTWORD_CONSTRAINT)
2729 for (j = 0; j < BITSET_UINTS; ++j)
2730 accepts[j] &= ~dfa->word_char[j];
2731 if (constraint & NEXT_NEWLINE_CONSTRAINT)
2733 int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
2734 bitset_empty (accepts);
2735 if (accepts_newline)
2736 bitset_set (accepts, NEWLINE_CHAR);
2742 /* Then divide `accepts' into DFA states, or create a new
2744 for (j = 0; j < ndests; ++j)
2746 bitset intersec; /* Intersection sets, see below. */
2748 /* Flags, see below. */
2749 int has_intersec, not_subset, not_consumed;
2751 /* Optimization, skip if this state doesn't accept the character. */
2752 if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
2755 /* Enumerate the intersection set of this state and `accepts'. */
2757 for (k = 0; k < BITSET_UINTS; ++k)
2758 has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
2759 /* And skip if the intersection set is empty. */
2763 /* Then check if this state is a subset of `accepts'. */
2764 not_subset = not_consumed = 0;
2765 for (k = 0; k < BITSET_UINTS; ++k)
2767 not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
2768 not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
2771 /* If this state isn't a subset of `accepts', create a
2772 new group state, which has the `remains'. */
2775 bitset_copy (dests_ch[ndests], remains);
2776 bitset_copy (dests_ch[j], intersec);
2777 err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
2778 if (BE (err != REG_NOERROR, 0))
2783 /* Put the position in the current group. */
2784 err = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
2785 if (BE (err < 0, 0))
2788 /* If all characters are consumed, go to next node. */
2792 /* Some characters remain, create a new group. */
2795 bitset_copy (dests_ch[ndests], accepts);
2796 err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
2797 if (BE (err != REG_NOERROR, 0))
2800 bitset_empty (accepts);
2806 #ifdef RE_ENABLE_I18N
2807 /* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
2808 Return the number of the bytes the node accepts.
2809 STR_IDX is the current index of the input string.
2811 This function handles the nodes which can accept one character, or
2812 one collating element like '.', '[a-z]', opposite to the other nodes
2813 can only accept one byte. */
2816 check_node_accept_bytes (preg, node_idx, input, str_idx)
2817 const regex_t *preg;
2818 int node_idx, str_idx;
2819 const re_string_t *input;
2821 const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2822 const re_token_t *node = dfa->nodes + node_idx;
2823 int elem_len = re_string_elem_size_at (input, str_idx);
2824 int char_len = re_string_char_size_at (input, str_idx);
2828 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
2830 if (elem_len <= 1 && char_len <= 1)
2832 if (node->type == OP_PERIOD)
2834 /* '.' accepts any one character except the following two cases. */
2835 if ((!(preg->syntax & RE_DOT_NEWLINE) &&
2836 re_string_byte_at (input, str_idx) == '\n') ||
2837 ((preg->syntax & RE_DOT_NOT_NULL) &&
2838 re_string_byte_at (input, str_idx) == '\0'))
2842 else if (node->type == COMPLEX_BRACKET)
2844 const re_charset_t *cset = node->opr.mbcset;
2846 const unsigned char *pin = re_string_get_buffer (input) + str_idx;
2849 wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
2850 ? re_string_wchar_at (input, str_idx) : 0);
2852 /* match with multibyte character? */
2853 for (i = 0; i < cset->nmbchars; ++i)
2854 if (wc == cset->mbchars[i])
2856 match_len = char_len;
2857 goto check_node_accept_bytes_match;
2859 /* match with character_class? */
2860 for (i = 0; i < cset->nchar_classes; ++i)
2862 wctype_t wt = cset->char_classes[i];
2863 if (__iswctype (wc, wt))
2865 match_len = char_len;
2866 goto check_node_accept_bytes_match;
2873 unsigned int in_collseq = 0;
2874 const int32_t *table, *indirect;
2875 const unsigned char *weights, *extra;
2876 const char *collseqwc;
2878 /* This #include defines a local function! */
2879 # include <locale/weight.h>
2881 /* match with collating_symbol? */
2882 if (cset->ncoll_syms)
2883 extra = (const unsigned char *)
2884 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
2885 for (i = 0; i < cset->ncoll_syms; ++i)
2887 const unsigned char *coll_sym = extra + cset->coll_syms[i];
2888 /* Compare the length of input collating element and
2889 the length of current collating element. */
2890 if (*coll_sym != elem_len)
2892 /* Compare each bytes. */
2893 for (j = 0; j < *coll_sym; j++)
2894 if (pin[j] != coll_sym[1 + j])
2898 /* Match if every bytes is equal. */
2900 goto check_node_accept_bytes_match;
2906 if (elem_len <= char_len)
2908 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
2909 in_collseq = collseq_table_lookup (collseqwc, wc);
2912 in_collseq = find_collation_sequence_value (pin, elem_len);
2914 /* match with range expression? */
2915 for (i = 0; i < cset->nranges; ++i)
2916 if (cset->range_starts[i] <= in_collseq
2917 && in_collseq <= cset->range_ends[i])
2919 match_len = elem_len;
2920 goto check_node_accept_bytes_match;
2923 /* match with equivalence_class? */
2924 if (cset->nequiv_classes)
2926 const unsigned char *cp = pin;
2927 table = (const int32_t *)
2928 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
2929 weights = (const unsigned char *)
2930 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
2931 extra = (const unsigned char *)
2932 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
2933 indirect = (const int32_t *)
2934 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
2935 idx = findidx (&cp);
2937 for (i = 0; i < cset->nequiv_classes; ++i)
2939 int32_t equiv_class_idx = cset->equiv_classes[i];
2940 size_t weight_len = weights[idx];
2941 if (weight_len == weights[equiv_class_idx])
2944 while (cnt <= weight_len
2945 && (weights[equiv_class_idx + 1 + cnt]
2946 == weights[idx + 1 + cnt]))
2948 if (cnt > weight_len)
2950 match_len = elem_len;
2951 goto check_node_accept_bytes_match;
2960 /* match with range expression? */
2962 wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
2964 wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
2967 for (i = 0; i < cset->nranges; ++i)
2969 cmp_buf[0] = cset->range_starts[i];
2970 cmp_buf[4] = cset->range_ends[i];
2971 if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
2972 && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
2974 match_len = char_len;
2975 goto check_node_accept_bytes_match;
2979 check_node_accept_bytes_match:
2980 if (!cset->non_match)
2987 return (elem_len > char_len) ? elem_len : char_len;
2995 find_collation_sequence_value (mbs, mbs_len)
2996 const unsigned char *mbs;
2999 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3004 /* No valid character. Match it as a single byte character. */
3005 const unsigned char *collseq = (const unsigned char *)
3006 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3007 return collseq[mbs[0]];
3014 const unsigned char *extra = (const unsigned char *)
3015 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3019 int mbs_cnt, found = 0;
3020 int32_t elem_mbs_len;
3021 /* Skip the name of collating element name. */
3022 idx = idx + extra[idx] + 1;
3023 elem_mbs_len = extra[idx++];
3024 if (mbs_len == elem_mbs_len)
3026 for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3027 if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3029 if (mbs_cnt == elem_mbs_len)
3030 /* Found the entry. */
3033 /* Skip the byte sequence of the collating element. */
3034 idx += elem_mbs_len;
3035 /* Adjust for the alignment. */
3036 idx = (idx + 3) & ~3;
3037 /* Skip the collation sequence value. */
3038 idx += sizeof (uint32_t);
3039 /* Skip the wide char sequence of the collating element. */
3040 idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
3041 /* If we found the entry, return the sequence value. */
3043 return *(uint32_t *) (extra + idx);
3044 /* Skip the collation sequence value. */
3045 idx += sizeof (uint32_t);
3050 #endif /* RE_ENABLE_I18N */
3052 /* Check whether the node accepts the byte which is IDX-th
3053 byte of the INPUT. */
3056 check_node_accept (preg, node, mctx, idx)
3057 const regex_t *preg;
3058 const re_token_t *node;
3059 const re_match_context_t *mctx;
3063 if (node->constraint)
3065 /* The node has constraints. Check whether the current context
3066 satisfies the constraints. */
3067 unsigned int context = re_string_context_at (mctx->input, idx,
3069 preg->newline_anchor);
3070 if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
3073 ch = re_string_byte_at (mctx->input, idx);
3074 if (node->type == CHARACTER)
3075 return node->opr.c == ch;
3076 else if (node->type == SIMPLE_BRACKET)
3077 return bitset_contain (node->opr.sbcset, ch);
3078 else if (node->type == OP_PERIOD)
3079 return !((ch == '\n' && !(preg->syntax & RE_DOT_NEWLINE))
3080 || (ch == '\0' && (preg->syntax & RE_DOT_NOT_NULL)));
3085 /* Extend the buffers, if the buffers have run out. */
3087 static reg_errcode_t
3088 extend_buffers (mctx)
3089 re_match_context_t *mctx;
3092 re_string_t *pstr = mctx->input;
3094 /* Double the lengthes of the buffers. */
3095 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
3096 if (BE (ret != REG_NOERROR, 0))
3099 if (mctx->state_log != NULL)
3101 /* And double the length of state_log. */
3102 mctx->state_log = re_realloc (mctx->state_log, re_dfastate_t *,
3103 pstr->bufs_len * 2);
3104 if (BE (mctx->state_log == NULL, 0))
3108 /* Then reconstruct the buffers. */
3111 #ifdef RE_ENABLE_I18N
3113 build_wcs_upper_buffer (pstr);
3115 #endif /* RE_ENABLE_I18N */
3116 build_upper_buffer (pstr);
3120 #ifdef RE_ENABLE_I18N
3122 build_wcs_buffer (pstr);
3124 #endif /* RE_ENABLE_I18N */
3126 if (pstr->trans != NULL)
3127 re_string_translate_buffer (pstr);
3129 pstr->valid_len = pstr->bufs_len;
3136 /* Functions for matching context. */
3138 static reg_errcode_t
3139 match_ctx_init (mctx, eflags, input, n)
3140 re_match_context_t *mctx;
3144 mctx->eflags = eflags;
3145 mctx->input = input;
3146 mctx->match_last = -1;
3149 mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
3150 if (BE (mctx->bkref_ents == NULL, 0))
3154 mctx->bkref_ents = NULL;
3155 mctx->nbkref_ents = 0;
3156 mctx->abkref_ents = n;
3157 mctx->max_mb_elem_len = 0;
3162 match_ctx_free (mctx)
3163 re_match_context_t *mctx;
3165 re_free (mctx->bkref_ents);
3168 /* Add a new backreference entry to the cache. */
3170 static reg_errcode_t
3171 match_ctx_add_entry (mctx, node, str_idx, from, to)
3172 re_match_context_t *mctx;
3173 int node, str_idx, from, to;
3175 if (mctx->nbkref_ents >= mctx->abkref_ents)
3177 mctx->bkref_ents = re_realloc (mctx->bkref_ents,
3178 struct re_backref_cache_entry,
3179 mctx->abkref_ents * 2);
3180 if (BE (mctx->bkref_ents == NULL, 0))
3182 memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
3183 sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
3184 mctx->abkref_ents *= 2;
3186 mctx->bkref_ents[mctx->nbkref_ents].node = node;
3187 mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
3188 mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
3189 mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
3190 mctx->bkref_ents[mctx->nbkref_ents++].flag = 0;
3191 if (mctx->max_mb_elem_len < to - from)
3192 mctx->max_mb_elem_len = to - from;
3197 match_ctx_clear_flag (mctx)
3198 re_match_context_t *mctx;
3201 for (i = 0; i < mctx->nbkref_ents; ++i)
3203 mctx->bkref_ents[i].flag = 0;
3208 sift_ctx_init (sctx, sifted_sts, limited_sts, last_node, last_str_idx,
3210 re_sift_context_t *sctx;
3211 re_dfastate_t **sifted_sts, **limited_sts;
3212 int last_node, last_str_idx, check_subexp;
3214 sctx->sifted_states = sifted_sts;
3215 sctx->limited_states = limited_sts;
3216 sctx->last_node = last_node;
3217 sctx->last_str_idx = last_str_idx;
3218 sctx->check_subexp = check_subexp;
3219 sctx->cur_bkref = -1;
3220 sctx->cls_subexp_idx = -1;
3221 re_node_set_init_empty (&sctx->limits);