1 /* Extended regular expression matching and search library.
2 Copyright (C) 2002 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 #if defined HAVE_WCHAR_H || defined _LIBC
29 #endif /* HAVE_WCHAR_H || _LIBC */
30 #if defined HAVE_WCTYPE_H || defined _LIBC
32 #endif /* HAVE_WCTYPE_H || _LIBC */
35 # ifndef _RE_DEFINE_LOCALE_FUNCTIONS
36 # define _RE_DEFINE_LOCALE_FUNCTIONS 1
37 # include <locale/localeinfo.h>
38 # include <locale/elem-hash.h>
39 # include <locale/coll-lookup.h>
44 #include "regex_internal.h"
46 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
47 re_string_t *input, int n);
48 static void match_ctx_free (re_match_context_t *cache);
49 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
50 int str_idx, int from, int to);
51 static void match_ctx_clear_flag (re_match_context_t *mctx);
52 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
53 re_dfastate_t **limited_sts, int last_node,
54 int last_str_idx, int check_subexp);
55 static reg_errcode_t re_search_internal (const regex_t *preg,
56 const char *string, int length,
57 int start, int range, int stop,
58 size_t nmatch, regmatch_t pmatch[],
60 static int re_search_2_stub (struct re_pattern_buffer *bufp,
61 const char *string1, int length1,
62 const char *string2, int length2,
63 int start, int range, struct re_registers *regs,
64 int stop, int ret_len);
65 static int re_search_stub (struct re_pattern_buffer *bufp,
66 const char *string, int length, int start,
67 int range, int stop, struct re_registers *regs,
69 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
70 int nregs, int regs_allocated);
71 static inline re_dfastate_t *acquire_init_state_context (reg_errcode_t *err,
73 const re_match_context_t *mctx,
75 static int check_matching (const regex_t *preg, re_match_context_t *mctx,
76 int fl_search, int fl_longest_match);
77 static int check_halt_node_context (const re_dfa_t *dfa, int node,
78 unsigned int context);
79 static int check_halt_state_context (const regex_t *preg,
80 const re_dfastate_t *state,
81 const re_match_context_t *mctx, int idx);
82 static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch, int cur_node,
83 int cur_idx, int nmatch);
84 static int proceed_next_node (const regex_t *preg, int nregs, regmatch_t *regs,
85 const re_match_context_t *mctx,
86 int *pidx, int node, re_node_set *eps_via_nodes,
87 struct re_fail_stack_t *fs);
88 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
89 int str_idx, int *dests, int nregs,
91 re_node_set *eps_via_nodes);
92 static int pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
93 regmatch_t *regs, re_node_set *eps_via_nodes);
94 static reg_errcode_t set_regs (const regex_t *preg,
95 const re_match_context_t *mctx,
96 size_t nmatch, regmatch_t *pmatch,
98 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
100 #ifdef RE_ENABLE_I18N
101 static int sift_states_iter_mb (const regex_t *preg,
102 const re_match_context_t *mctx,
103 re_sift_context_t *sctx,
104 int node_idx, int str_idx, int max_str_idx);
105 #endif /* RE_ENABLE_I18N */
106 static reg_errcode_t sift_states_backward (const regex_t *preg,
107 re_match_context_t *mctx,
108 re_sift_context_t *sctx);
109 static reg_errcode_t update_cur_sifted_state (const regex_t *preg,
110 re_match_context_t *mctx,
111 re_sift_context_t *sctx,
113 re_node_set *dest_nodes);
114 static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa,
115 re_node_set *dest_nodes,
116 const re_node_set *candidates);
117 static reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node,
118 re_node_set *dest_nodes,
119 const re_node_set *and_nodes);
120 static int check_dst_limits (re_dfa_t *dfa, re_node_set *limits,
121 re_match_context_t *mctx, int dst_node,
122 int dst_idx, int src_node, int src_idx);
123 static int check_dst_limits_calc_pos (re_dfa_t *dfa, re_match_context_t *mctx,
124 int limit, re_node_set *eclosures,
125 int subexp_idx, int node, int str_idx);
126 static reg_errcode_t check_subexp_limits (re_dfa_t *dfa,
127 re_node_set *dest_nodes,
128 const re_node_set *candidates,
130 struct re_backref_cache_entry *bkref_ents,
132 static reg_errcode_t search_subexp (const regex_t *preg,
133 re_match_context_t *mctx,
134 re_sift_context_t *sctx, int str_idx,
135 re_node_set *dest_nodes);
136 static reg_errcode_t sift_states_bkref (const regex_t *preg,
137 re_match_context_t *mctx,
138 re_sift_context_t *sctx,
139 int str_idx, re_node_set *dest_nodes);
140 static reg_errcode_t clean_state_log_if_need (re_match_context_t *mctx,
141 int next_state_log_idx);
142 static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst,
143 re_dfastate_t **src, int num);
144 static re_dfastate_t *transit_state (reg_errcode_t *err, const regex_t *preg,
145 re_match_context_t *mctx,
146 re_dfastate_t *state, int fl_search);
147 static re_dfastate_t *transit_state_sb (reg_errcode_t *err, const regex_t *preg,
148 re_dfastate_t *pstate,
150 re_match_context_t *mctx);
151 #ifdef RE_ENABLE_I18N
152 static reg_errcode_t transit_state_mb (const regex_t *preg,
153 re_dfastate_t *pstate,
154 re_match_context_t *mctx);
155 #endif /* RE_ENABLE_I18N */
156 static reg_errcode_t transit_state_bkref (const regex_t *preg,
157 re_dfastate_t *pstate,
158 re_match_context_t *mctx);
159 static reg_errcode_t transit_state_bkref_loop (const regex_t *preg,
161 re_dfastate_t **work_state_log,
162 re_match_context_t *mctx);
163 static re_dfastate_t **build_trtable (const regex_t *dfa,
164 const re_dfastate_t *state,
166 #ifdef RE_ENABLE_I18N
167 static int check_node_accept_bytes (const regex_t *preg, int node_idx,
168 const re_string_t *input, int idx);
170 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
173 #endif /* RE_ENABLE_I18N */
174 static int group_nodes_into_DFAstates (const regex_t *dfa,
175 const re_dfastate_t *state,
176 re_node_set *states_node,
178 static int check_node_accept (const regex_t *preg, const re_token_t *node,
179 const re_match_context_t *mctx, int idx);
180 static reg_errcode_t extend_buffers (re_match_context_t *mctx);
182 /* Entry point for POSIX code. */
184 /* regexec searches for a given pattern, specified by PREG, in the
187 If NMATCH is zero or REG_NOSUB was set in the cflags argument to
188 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
189 least NMATCH elements, and we set them to the offsets of the
190 corresponding matched substrings.
192 EFLAGS specifies `execution flags' which affect matching: if
193 REG_NOTBOL is set, then ^ does not match at the beginning of the
194 string; if REG_NOTEOL is set, then $ does not match at the end.
196 We return 0 if we find a match and REG_NOMATCH if not. */
199 regexec (preg, string, nmatch, pmatch, eflags)
200 const regex_t *__restrict preg;
201 const char *__restrict string;
207 int length = strlen (string);
209 err = re_search_internal (preg, string, length, 0, length, length, 0,
212 err = re_search_internal (preg, string, length, 0, length, length, nmatch,
214 return err != REG_NOERROR;
217 weak_alias (__regexec, regexec)
220 /* Entry points for GNU code. */
222 /* re_match, re_search, re_match_2, re_search_2
224 The former two functions operate on STRING with length LENGTH,
225 while the later two operate on concatenation of STRING1 and STRING2
226 with lengths LENGTH1 and LENGTH2, respectively.
228 re_match() matches the compiled pattern in BUFP against the string,
229 starting at index START.
231 re_search() first tries matching at index START, then it tries to match
232 starting from index START + 1, and so on. The last start position tried
233 is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
236 The parameter STOP of re_{match,search}_2 specifies that no match exceeding
237 the first STOP characters of the concatenation of the strings should be
240 If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
241 and all groups is stroed in REGS. (For the "_2" variants, the offsets are
242 computed relative to the concatenation, not relative to the individual
245 On success, re_match* functions return the length of the match, re_search*
246 return the position of the start of the match. Return value -1 means no
247 match was found and -2 indicates an internal error. */
250 re_match (bufp, string, length, start, regs)
251 struct re_pattern_buffer *bufp;
254 struct re_registers *regs;
256 return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
259 weak_alias (__re_match, re_match)
263 re_search (bufp, string, length, start, range, regs)
264 struct re_pattern_buffer *bufp;
266 int length, start, range;
267 struct re_registers *regs;
269 return re_search_stub (bufp, string, length, start, range, length, regs, 0);
272 weak_alias (__re_search, re_search)
276 re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
277 struct re_pattern_buffer *bufp;
278 const char *string1, *string2;
279 int length1, length2, start, stop;
280 struct re_registers *regs;
282 return re_search_2_stub (bufp, string1, length1, string2, length2,
283 start, 0, regs, stop, 1);
286 weak_alias (__re_match_2, re_match_2)
290 re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
291 struct re_pattern_buffer *bufp;
292 const char *string1, *string2;
293 int length1, length2, start, range, stop;
294 struct re_registers *regs;
296 return re_search_2_stub (bufp, string1, length1, string2, length2,
297 start, range, regs, stop, 0);
300 weak_alias (__re_search_2, re_search_2)
304 re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
306 struct re_pattern_buffer *bufp;
307 const char *string1, *string2;
308 int length1, length2, start, range, stop, ret_len;
309 struct re_registers *regs;
313 int len = length1 + length2;
316 if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
319 /* Concatenate the strings. */
323 char *s = re_malloc (char, len);
325 if (BE (s == NULL, 0))
327 memcpy (s, string1, length1);
328 memcpy (s + length1, string2, length2);
337 rval = re_search_stub (bufp, str, len, start, range, stop, regs,
340 re_free ((char *) str);
344 /* The parameters have the same meaning as those of re_search.
345 Additional parameters:
346 If RET_LEN is nonzero the length of the match is returned (re_match style);
347 otherwise the position of the match is returned. */
350 re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
351 struct re_pattern_buffer *bufp;
353 int length, start, range, stop, ret_len;
354 struct re_registers *regs;
356 reg_errcode_t result;
361 /* Check for out-of-range. */
362 if (BE (start < 0 || start > length, 0))
364 if (BE (start + range > length, 0))
365 range = length - start;
366 else if (BE (start + range < 0, 0))
369 eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
370 eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
372 /* Compile fastmap if we haven't yet. */
373 if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
374 re_compile_fastmap (bufp);
376 if (BE (bufp->no_sub, 0))
379 /* We need at least 1 register. */
382 else if (BE (bufp->regs_allocated == REGS_FIXED &&
383 regs->num_regs < bufp->re_nsub + 1, 0))
385 nregs = regs->num_regs;
386 if (BE (nregs < 1, 0))
388 /* Nothing can be copied to regs. */
394 nregs = bufp->re_nsub + 1;
395 pmatch = re_malloc (regmatch_t, nregs);
396 if (BE (pmatch == NULL, 0))
399 result = re_search_internal (bufp, string, length, start, range, stop,
400 nregs, pmatch, eflags);
404 /* I hope we needn't fill ther regs with -1's when no match was found. */
405 if (result != REG_NOERROR)
407 else if (regs != NULL)
409 /* If caller wants register contents data back, copy them. */
410 bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
411 bufp->regs_allocated);
412 if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
416 if (BE (rval == 0, 1))
420 assert (pmatch[0].rm_so == start);
421 rval = pmatch[0].rm_eo - start;
424 rval = pmatch[0].rm_so;
431 re_copy_regs (regs, pmatch, nregs, regs_allocated)
432 struct re_registers *regs;
434 int nregs, regs_allocated;
436 int rval = REGS_REALLOCATE;
438 int need_regs = nregs + 1;
439 /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
442 /* Have the register data arrays been allocated? */
443 if (regs_allocated == REGS_UNALLOCATED)
444 { /* No. So allocate them with malloc. */
445 regs->start = re_malloc (regoff_t, need_regs);
446 if (BE (regs->start == NULL, 0))
447 return REGS_UNALLOCATED;
448 regs->end = re_malloc (regoff_t, need_regs);
449 if (BE (regs->end == NULL, 0))
451 re_free (regs->start);
452 return REGS_UNALLOCATED;
454 regs->num_regs = need_regs;
456 else if (regs_allocated == REGS_REALLOCATE)
457 { /* Yes. If we need more elements than were already
458 allocated, reallocate them. If we need fewer, just
460 if (need_regs > regs->num_regs)
462 regs->start = re_realloc (regs->start, regoff_t, need_regs);
463 if (BE (regs->start == NULL, 0))
465 if (regs->end != NULL)
467 return REGS_UNALLOCATED;
469 regs->end = re_realloc (regs->end, regoff_t, need_regs);
470 if (BE (regs->end == NULL, 0))
472 re_free (regs->start);
473 return REGS_UNALLOCATED;
475 regs->num_regs = need_regs;
480 assert (regs_allocated == REGS_FIXED);
481 /* This function may not be called with REGS_FIXED and nregs too big. */
482 assert (regs->num_regs >= nregs);
487 for (i = 0; i < nregs; ++i)
489 regs->start[i] = pmatch[i].rm_so;
490 regs->end[i] = pmatch[i].rm_eo;
492 for ( ; i < regs->num_regs; ++i)
493 regs->start[i] = regs->end[i] = -1;
498 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
499 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
500 this memory for recording register information. STARTS and ENDS
501 must be allocated using the malloc library routine, and must each
502 be at least NUM_REGS * sizeof (regoff_t) bytes long.
504 If NUM_REGS == 0, then subsequent matches should allocate their own
507 Unless this function is called, the first search or match using
508 PATTERN_BUFFER will allocate its own register data, without
509 freeing the old data. */
512 re_set_registers (bufp, regs, num_regs, starts, ends)
513 struct re_pattern_buffer *bufp;
514 struct re_registers *regs;
516 regoff_t *starts, *ends;
520 bufp->regs_allocated = REGS_REALLOCATE;
521 regs->num_regs = num_regs;
522 regs->start = starts;
527 bufp->regs_allocated = REGS_UNALLOCATED;
529 regs->start = regs->end = (regoff_t *) 0;
533 weak_alias (__re_set_registers, re_set_registers)
536 /* Entry points compatible with 4.2 BSD regex library. We don't define
537 them unless specifically requested. */
539 #if defined _REGEX_RE_COMP || defined _LIBC
547 return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
549 #endif /* _REGEX_RE_COMP */
551 static re_node_set empty_set;
553 /* Internal entry point. */
555 /* Searches for a compiled pattern PREG in the string STRING, whose
556 length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
557 mingings with regexec. START, and RANGE have the same meanings
559 Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
560 otherwise return the error code.
561 Note: We assume front end functions already check ranges.
562 (START + RANGE >= 0 && START + RANGE <= LENGTH) */
565 re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
569 int length, start, range, stop, eflags;
574 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
576 int left_lim, right_lim, incr;
577 int fl_longest_match, match_first, match_last = -1;
578 re_match_context_t mctx;
579 char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate)
580 ? preg->fastmap : NULL);
582 /* Check if the DFA haven't been compiled. */
583 if (BE (preg->used == 0 || dfa->init_state == NULL
584 || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
585 || dfa->init_state_begbuf == NULL, 0))
588 re_node_set_init_empty (&empty_set);
589 memset (&mctx, '\0', sizeof (re_match_context_t));
591 /* We must check the longest matching, if nmatch > 0. */
592 fl_longest_match = (nmatch != 0);
594 err = re_string_allocate (&input, string, length, dfa->nodes_len + 1,
595 preg->translate, preg->syntax & RE_ICASE);
596 if (BE (err != REG_NOERROR, 0))
600 err = match_ctx_init (&mctx, eflags, &input, dfa->nbackref * 2);
601 if (BE (err != REG_NOERROR, 0))
604 /* We will log all the DFA states through which the dfa pass,
605 if nmatch > 1, or this dfa has "multibyte node", which is a
606 back-reference or a node which can accept multibyte character or
607 multi character collating element. */
608 if (nmatch > 1 || dfa->has_mb_node)
610 mctx.state_log = re_malloc (re_dfastate_t *, dfa->nodes_len + 1);
611 if (BE (mctx.state_log == NULL, 0))
618 mctx.state_log = NULL;
621 /* We assume front-end functions already check them. */
622 assert (start + range >= 0 && start + range <= length);
626 input.tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
627 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
629 /* Check incrementally whether of not the input string match. */
630 incr = (range < 0) ? -1 : 1;
631 left_lim = (range < 0) ? start + range : start;
632 right_lim = (range < 0) ? start : start + range;
636 /* At first get the current byte from input string. */
638 if (MB_CUR_MAX > 1 && (preg->syntax & RE_ICASE || preg->translate))
640 /* In this case, we can't determin easily the current byte,
641 since it might be a component byte of a multibyte character.
642 Then we use the constructed buffer instead. */
643 /* If MATCH_FIRST is out of the valid range, reconstruct the
645 if (input.raw_mbs_idx + input.valid_len <= match_first)
647 err = re_string_reconstruct (&input, match_first, eflags,
648 preg->newline_anchor);
649 if (BE (err != REG_NOERROR, 0))
652 /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
653 Note that MATCH_FIRST must not be smaller than 0. */
654 ch = ((match_first >= length) ? 0
655 : re_string_byte_at (&input, match_first - input.raw_mbs_idx));
659 /* We apply translate/conversion manually, since it is trivial
661 /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
662 Note that MATCH_FIRST must not be smaller than 0. */
663 ch = (match_first < length) ? (unsigned char)string[match_first] : 0;
664 /* Apply translation if we need. */
665 ch = preg->translate ? preg->translate[ch] : ch;
666 /* In case of case insensitive mode, convert to upper case. */
667 ch = ((preg->syntax & RE_ICASE) && islower (ch)) ? toupper (ch) : ch;
670 /* Eliminate inappropriate one by fastmap. */
671 if (preg->can_be_null || fastmap == NULL || fastmap[ch])
673 /* Reconstruct the buffers so that the matcher can assume that
674 the matching starts from the begining of the buffer. */
675 err = re_string_reconstruct (&input, match_first, eflags,
676 preg->newline_anchor);
677 if (BE (err != REG_NOERROR, 0))
679 #ifdef RE_ENABLE_I18N
680 /* Eliminate it when it is a component of a multibyte character
681 and isn't the head of a multibyte character. */
682 if (MB_CUR_MAX == 1 || re_string_first_byte (&input, 0))
685 /* It seems to be appropriate one, then use the matcher. */
686 /* We assume that the matching starts from 0. */
687 mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
688 match_last = check_matching (preg, &mctx, 0, fl_longest_match);
689 if (match_last != -1)
691 if (BE (match_last == -2, 0))
697 break; /* We found a matching. */
701 /* Update counter. */
703 if (match_first < left_lim || right_lim < match_first)
707 /* Set pmatch[] if we need. */
708 if (match_last != -1 && nmatch > 0)
712 /* Initialize registers. */
713 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
714 pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
716 /* Set the points where matching start/end. */
718 mctx.match_last = pmatch[0].rm_eo = match_last;
720 if (!preg->no_sub && nmatch > 1)
722 /* We need the ranges of all the subexpressions. */
724 re_dfastate_t **sifted_states;
725 re_dfastate_t **lim_states = NULL;
726 re_dfastate_t *pstate = mctx.state_log[match_last];
727 re_sift_context_t sctx;
729 assert (mctx.state_log != NULL);
731 halt_node = check_halt_state_context (preg, pstate, &mctx,
733 if (dfa->has_plural_match)
735 match_ctx_clear_flag (&mctx);
736 sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
737 if (BE (sifted_states == NULL, 0))
744 lim_states = calloc (sizeof (re_dfastate_t *),
746 if (BE (lim_states == NULL, 0))
748 re_free (sifted_states);
753 sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
755 err = sift_states_backward (preg, &mctx, &sctx);
756 re_node_set_free (&sctx.limits);
757 if (BE (err != REG_NOERROR, 0))
759 re_free (sifted_states);
760 re_free (lim_states);
763 if (lim_states != NULL)
765 err = merge_state_array (dfa, sifted_states, lim_states,
767 re_free (lim_states);
768 if (BE (err != REG_NOERROR, 0))
770 re_free (sifted_states);
774 re_free (mctx.state_log);
775 mctx.state_log = sifted_states;
777 mctx.last_node = halt_node;
778 err = set_regs (preg, &mctx, nmatch, pmatch,
779 dfa->has_plural_match && dfa->nbackref > 0);
780 if (BE (err != REG_NOERROR, 0))
784 /* At last, add the offset to the each registers, since we slided
785 the buffers so that We can assume that the matching starts from 0. */
786 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
787 if (pmatch[reg_idx].rm_so != -1)
789 pmatch[reg_idx].rm_so += match_first;
790 pmatch[reg_idx].rm_eo += match_first;
793 err = (match_last == -1) ? REG_NOMATCH : REG_NOERROR;
795 re_free (mctx.state_log);
797 match_ctx_free (&mctx);
798 re_string_destruct (&input);
802 /* Acquire an initial state and return it.
803 We must select appropriate initial state depending on the context,
804 since initial states may have constraints like "\<", "^", etc.. */
806 static inline re_dfastate_t *
807 acquire_init_state_context (err, preg, mctx, idx)
810 const re_match_context_t *mctx;
813 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
816 if (dfa->init_state->has_constraint)
818 unsigned int context;
819 context = re_string_context_at (mctx->input, idx - 1, mctx->eflags,
820 preg->newline_anchor);
821 if (IS_WORD_CONTEXT (context))
822 return dfa->init_state_word;
823 else if (IS_ORDINARY_CONTEXT (context))
824 return dfa->init_state;
825 else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
826 return dfa->init_state_begbuf;
827 else if (IS_NEWLINE_CONTEXT (context))
828 return dfa->init_state_nl;
829 else if (IS_BEGBUF_CONTEXT (context))
831 /* It is relatively rare case, then calculate on demand. */
832 return re_acquire_state_context (err, dfa,
833 dfa->init_state->entrance_nodes,
837 /* Must not happen? */
838 return dfa->init_state;
841 return dfa->init_state;
844 /* Check whether the regular expression match input string INPUT or not,
845 and return the index where the matching end, return -1 if not match,
846 or return -2 in case of an error.
847 FL_SEARCH means we must search where the matching starts,
848 FL_LONGEST_MATCH means we want the POSIX longest matching.
849 Note that the matcher assume that the maching starts from the current
850 index of the buffer. */
853 check_matching (preg, mctx, fl_search, fl_longest_match)
855 re_match_context_t *mctx;
856 int fl_search, fl_longest_match;
861 int cur_str_idx = re_string_cur_idx (mctx->input);
862 re_dfastate_t *cur_state;
864 cur_state = acquire_init_state_context (&err, preg, mctx, cur_str_idx);
865 /* An initial state must not be NULL(invalid state). */
866 if (BE (cur_state == NULL, 0))
868 if (mctx->state_log != NULL)
869 mctx->state_log[cur_str_idx] = cur_state;
871 if (cur_state->has_backref)
874 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
875 for (i = 0; i < cur_state->nodes.nelem; ++i)
877 int node = cur_state->nodes.elems[i];
878 re_token_type_t type = dfa->nodes[node].type;
879 if (type == OP_BACK_REF)
882 for (clexp_idx = 0; clexp_idx < cur_state->nodes.nelem;
885 re_token_t *clexp_node;
886 clexp_node = dfa->nodes + cur_state->nodes.elems[clexp_idx];
887 if (clexp_node->type == OP_CLOSE_SUBEXP
888 && clexp_node->opr.idx + 1== dfa->nodes[node].opr.idx)
890 err = match_ctx_add_entry (mctx, node, 0, 0, 0);
891 if (BE (err != REG_NOERROR, 0))
900 /* If the RE accepts NULL string. */
903 if (!cur_state->has_constraint
904 || check_halt_state_context (preg, cur_state, mctx, cur_str_idx))
906 if (!fl_longest_match)
910 match_last = cur_str_idx;
916 while (!re_string_eoi (mctx->input))
918 cur_state = transit_state (&err, preg, mctx, cur_state,
919 fl_search && !match);
920 if (cur_state == NULL) /* Reached at the invalid state or an error. */
922 cur_str_idx = re_string_cur_idx (mctx->input);
923 if (BE (err != REG_NOERROR, 0))
925 if (fl_search && !match)
927 /* Restart from initial state, since we are searching
928 the point from where matching start. */
929 #ifdef RE_ENABLE_I18N
931 || re_string_first_byte (mctx->input, cur_str_idx))
932 #endif /* RE_ENABLE_I18N */
933 cur_state = acquire_init_state_context (&err, preg, mctx,
935 if (BE (cur_state == NULL && err != REG_NOERROR, 0))
937 if (mctx->state_log != NULL)
938 mctx->state_log[cur_str_idx] = cur_state;
940 else if (!fl_longest_match && match)
942 else /* (fl_longest_match && match) || (!fl_search && !match) */
944 if (mctx->state_log == NULL)
948 int max = mctx->state_log_top;
949 for (; cur_str_idx <= max; ++cur_str_idx)
950 if (mctx->state_log[cur_str_idx] != NULL)
952 if (cur_str_idx > max)
958 if (cur_state != NULL && cur_state->halt)
960 /* Reached at a halt state.
961 Check the halt state can satisfy the current context. */
962 if (!cur_state->has_constraint
963 || check_halt_state_context (preg, cur_state, mctx,
964 re_string_cur_idx (mctx->input)))
966 /* We found an appropriate halt state. */
967 match_last = re_string_cur_idx (mctx->input);
969 if (!fl_longest_match)
977 /* Check NODE match the current context. */
979 static int check_halt_node_context (dfa, node, context)
982 unsigned int context;
984 re_token_type_t type = dfa->nodes[node].type;
985 unsigned int constraint = dfa->nodes[node].constraint;
986 if (type != END_OF_RE)
990 if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
995 /* Check the halt state STATE match the current context.
996 Return 0 if not match, if the node, STATE has, is a halt node and
997 match the context, return the node. */
1000 check_halt_state_context (preg, state, mctx, idx)
1001 const regex_t *preg;
1002 const re_dfastate_t *state;
1003 const re_match_context_t *mctx;
1006 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
1008 unsigned int context;
1010 assert (state->halt);
1012 context = re_string_context_at (mctx->input, idx, mctx->eflags,
1013 preg->newline_anchor);
1014 for (i = 0; i < state->nodes.nelem; ++i)
1015 if (check_halt_node_context (dfa, state->nodes.elems[i], context))
1016 return state->nodes.elems[i];
1020 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1021 corresponding to the DFA).
1022 Return the destination node, and update EPS_VIA_NODES, return -1 in case
1026 proceed_next_node (preg, nregs, regs, mctx, pidx, node, eps_via_nodes, fs)
1027 const regex_t *preg;
1029 const re_match_context_t *mctx;
1030 int nregs, *pidx, node;
1031 re_node_set *eps_via_nodes;
1032 struct re_fail_stack_t *fs;
1034 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1035 int i, err, dest_node;
1037 if (IS_EPSILON_NODE (dfa->nodes[node].type))
1039 re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1040 int ndest, dest_nodes[2];
1041 err = re_node_set_insert (eps_via_nodes, node);
1042 if (BE (err < 0, 0))
1044 /* Pick up valid destinations. */
1045 for (ndest = 0, i = 0; i < dfa->edests[node].nelem; ++i)
1047 int candidate = dfa->edests[node].elems[i];
1048 if (!re_node_set_contains (cur_nodes, candidate))
1050 dest_nodes[0] = (ndest == 0) ? candidate : dest_nodes[0];
1051 dest_nodes[1] = (ndest == 1) ? candidate : dest_nodes[1];
1055 return ndest == 0 ? -1 : (ndest == 1 ? dest_nodes[0] : 0);
1056 /* In order to avoid infinite loop like "(a*)*". */
1057 if (re_node_set_contains (eps_via_nodes, dest_nodes[0]))
1058 return dest_nodes[1];
1060 push_fail_stack (fs, *pidx, dest_nodes, nregs, regs, eps_via_nodes);
1061 return dest_nodes[0];
1066 re_token_type_t type = dfa->nodes[node].type;
1068 #ifdef RE_ENABLE_I18N
1069 if (ACCEPT_MB_NODE (type))
1070 naccepted = check_node_accept_bytes (preg, node, mctx->input, *pidx);
1072 #endif /* RE_ENABLE_I18N */
1073 if (type == OP_BACK_REF)
1075 int subexp_idx = dfa->nodes[node].opr.idx;
1076 naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1079 if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1083 char *buf = re_string_get_buffer (mctx->input);
1084 if (strncmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1092 err = re_node_set_insert (eps_via_nodes, node);
1093 if (BE (err < 0, 0))
1095 dest_node = dfa->edests[node].elems[0];
1096 if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1103 || check_node_accept (preg, dfa->nodes + node, mctx, *pidx))
1105 dest_node = dfa->nexts[node];
1106 *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1107 if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1108 || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1111 re_node_set_empty (eps_via_nodes);
1118 static reg_errcode_t
1119 push_fail_stack (fs, str_idx, dests, nregs, regs, eps_via_nodes)
1120 struct re_fail_stack_t *fs;
1121 int str_idx, *dests, nregs;
1123 re_node_set *eps_via_nodes;
1126 int num = fs->num++;
1127 if (fs->num == fs->alloc)
1129 struct re_fail_stack_ent_t *new_array;
1131 new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1133 if (new_array == NULL)
1135 fs->stack = new_array;
1137 fs->stack[num].idx = str_idx;
1138 fs->stack[num].node = dests[1];
1139 fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1140 memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1141 err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1146 pop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes)
1147 struct re_fail_stack_t *fs;
1150 re_node_set *eps_via_nodes;
1152 int num = --fs->num;
1154 *pidx = fs->stack[num].idx;
1155 memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1156 re_node_set_free (eps_via_nodes);
1157 re_free (fs->stack[num].regs);
1158 *eps_via_nodes = fs->stack[num].eps_via_nodes;
1159 return fs->stack[num].node;
1162 /* Set the positions where the subexpressions are starts/ends to registers
1164 Note: We assume that pmatch[0] is already set, and
1165 pmatch[i].rm_so == pmatch[i].rm_eo == -1 (i > 1). */
1167 static reg_errcode_t
1168 set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
1169 const regex_t *preg;
1170 const re_match_context_t *mctx;
1175 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1176 int idx, cur_node, real_nmatch;
1177 re_node_set eps_via_nodes;
1178 struct re_fail_stack_t *fs;
1179 struct re_fail_stack_t fs_body = {0, 2, NULL};
1181 assert (nmatch > 1);
1182 assert (mctx->state_log != NULL);
1187 fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1191 cur_node = dfa->init_node;
1192 real_nmatch = (nmatch <= preg->re_nsub) ? nmatch : preg->re_nsub + 1;
1193 re_node_set_init_empty (&eps_via_nodes);
1194 for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1196 update_regs (dfa, pmatch, cur_node, idx, real_nmatch);
1197 if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1202 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1203 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1205 if (reg_idx == nmatch)
1207 re_node_set_free (&eps_via_nodes);
1208 return free_fail_stack_return (fs);
1210 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1215 re_node_set_free (&eps_via_nodes);
1220 /* Proceed to next node. */
1221 cur_node = proceed_next_node (preg, nmatch, pmatch, mctx, &idx, cur_node,
1222 &eps_via_nodes, fs);
1224 if (BE (cur_node < 0, 0))
1229 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1233 re_node_set_free (&eps_via_nodes);
1238 re_node_set_free (&eps_via_nodes);
1239 return free_fail_stack_return (fs);
1242 static reg_errcode_t
1243 free_fail_stack_return (fs)
1244 struct re_fail_stack_t *fs;
1249 for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1251 re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1252 re_free (fs->stack[fs_idx].regs);
1254 re_free (fs->stack);
1260 update_regs (dfa, pmatch, cur_node, cur_idx, nmatch)
1263 int cur_node, cur_idx, nmatch;
1265 int type = dfa->nodes[cur_node].type;
1267 if (type != OP_OPEN_SUBEXP && type != OP_CLOSE_SUBEXP)
1269 reg_num = dfa->nodes[cur_node].opr.idx + 1;
1270 if (reg_num >= nmatch)
1272 if (type == OP_OPEN_SUBEXP)
1274 /* We are at the first node of this sub expression. */
1275 pmatch[reg_num].rm_so = cur_idx;
1276 pmatch[reg_num].rm_eo = -1;
1278 else if (type == OP_CLOSE_SUBEXP)
1279 /* We are at the first node of this sub expression. */
1280 pmatch[reg_num].rm_eo = cur_idx;
1283 #define NUMBER_OF_STATE 1
1285 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1286 and sift the nodes in each states according to the following rules.
1287 Updated state_log will be wrote to STATE_LOG.
1289 Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1290 1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1291 If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1292 the LAST_NODE, we throw away the node `a'.
1293 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1294 string `s' and transit to `b':
1295 i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1297 ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1298 throwed away, we throw away the node `a'.
1299 3. When 0 <= STR_IDX < n and 'a' epsilon transit to 'b':
1300 i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1302 ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is throwed away,
1303 we throw away the node `a'. */
1305 #define STATE_NODE_CONTAINS(state,node) \
1306 ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1308 static reg_errcode_t
1309 sift_states_backward (preg, mctx, sctx)
1310 const regex_t *preg;
1311 re_match_context_t *mctx;
1312 re_sift_context_t *sctx;
1315 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1317 int str_idx = sctx->last_str_idx;
1318 re_node_set cur_dest;
1319 re_node_set *cur_src; /* Points the state_log[str_idx]->nodes */
1322 assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1324 cur_src = &mctx->state_log[str_idx]->nodes;
1326 /* Build sifted state_log[str_idx]. It has the nodes which can epsilon
1327 transit to the last_node and the last_node itself. */
1328 err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1329 if (BE (err != REG_NOERROR, 0))
1331 err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest);
1332 if (BE (err != REG_NOERROR, 0))
1335 /* Then check each states in the state_log. */
1339 /* Update counters. */
1340 null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1341 if (null_cnt > mctx->max_mb_elem_len)
1343 memset (sctx->sifted_states, '\0',
1344 sizeof (re_dfastate_t *) * str_idx);
1345 re_node_set_free (&cur_dest);
1348 re_node_set_empty (&cur_dest);
1350 cur_src = ((mctx->state_log[str_idx] == NULL) ? &empty_set
1351 : &mctx->state_log[str_idx]->nodes);
1353 /* Then build the next sifted state.
1354 We build the next sifted state on `cur_dest', and update
1355 `sifted_states[str_idx]' with `cur_dest'.
1357 `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1358 `cur_src' points the node_set of the old `state_log[str_idx]'. */
1359 for (i = 0; i < cur_src->nelem; i++)
1361 int prev_node = cur_src->elems[i];
1363 re_token_type_t type = dfa->nodes[prev_node].type;
1365 if (IS_EPSILON_NODE(type))
1367 #ifdef RE_ENABLE_I18N
1368 /* If the node may accept `multi byte'. */
1369 if (ACCEPT_MB_NODE (type))
1370 naccepted = sift_states_iter_mb (preg, mctx, sctx, prev_node,
1371 str_idx, sctx->last_str_idx);
1373 #endif /* RE_ENABLE_I18N */
1374 /* We don't check backreferences here.
1375 See update_cur_sifted_state(). */
1378 && check_node_accept (preg, dfa->nodes + prev_node, mctx,
1380 && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1381 dfa->nexts[prev_node]))
1387 if (sctx->limits.nelem)
1389 int to_idx = str_idx + naccepted;
1390 if (check_dst_limits (dfa, &sctx->limits, mctx,
1391 dfa->nexts[prev_node], to_idx,
1392 prev_node, str_idx))
1395 ret = re_node_set_insert (&cur_dest, prev_node);
1396 if (BE (ret == -1, 0))
1403 /* Add all the nodes which satisfy the following conditions:
1404 - It can epsilon transit to a node in CUR_DEST.
1406 And update state_log. */
1407 err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest);
1408 if (BE (err != REG_NOERROR, 0))
1413 re_node_set_free (&cur_dest);
1417 /* Helper functions. */
1419 static inline reg_errcode_t
1420 clean_state_log_if_need (mctx, next_state_log_idx)
1421 re_match_context_t *mctx;
1422 int next_state_log_idx;
1424 int top = mctx->state_log_top;
1426 if (next_state_log_idx >= mctx->input->bufs_len
1427 || (next_state_log_idx >= mctx->input->valid_len
1428 && mctx->input->valid_len < mctx->input->len))
1431 err = extend_buffers (mctx);
1432 if (BE (err != REG_NOERROR, 0))
1436 if (top < next_state_log_idx)
1438 memset (mctx->state_log + top + 1, '\0',
1439 sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1440 mctx->state_log_top = next_state_log_idx;
1445 static reg_errcode_t
1446 merge_state_array (dfa, dst, src, num)
1448 re_dfastate_t **dst;
1449 re_dfastate_t **src;
1454 for (st_idx = 0; st_idx < num; ++st_idx)
1456 if (dst[st_idx] == NULL)
1457 dst[st_idx] = src[st_idx];
1458 else if (src[st_idx] != NULL)
1460 re_node_set merged_set;
1461 err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1462 &src[st_idx]->nodes);
1463 if (BE (err != REG_NOERROR, 0))
1465 dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1466 re_node_set_free (&merged_set);
1467 if (BE (err != REG_NOERROR, 0))
1474 static reg_errcode_t
1475 update_cur_sifted_state (preg, mctx, sctx, str_idx, dest_nodes)
1476 const regex_t *preg;
1477 re_match_context_t *mctx;
1478 re_sift_context_t *sctx;
1480 re_node_set *dest_nodes;
1483 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1484 const re_node_set *candidates;
1485 candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
1486 : &mctx->state_log[str_idx]->nodes);
1488 /* At first, add the nodes which can epsilon transit to a node in
1490 if (dest_nodes->nelem)
1492 err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1493 if (BE (err != REG_NOERROR, 0))
1497 /* Then, check the limitations in the current sift_context. */
1498 if (dest_nodes->nelem && sctx->limits.nelem)
1500 err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1501 mctx->bkref_ents, str_idx);
1502 if (BE (err != REG_NOERROR, 0))
1506 /* Update state_log. */
1507 sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1508 if (BE (sctx->sifted_states[str_idx] == NULL && err != REG_NOERROR, 0))
1511 /* If we are searching for the subexpression candidates.
1512 Note that we were from transit_state_bkref_loop() in this case. */
1513 if (dest_nodes->nelem && sctx->check_subexp)
1515 err = search_subexp (preg, mctx, sctx, str_idx, dest_nodes);
1516 if (BE (err != REG_NOERROR, 0))
1520 if ((mctx->state_log[str_idx] != NULL
1521 && mctx->state_log[str_idx]->has_backref))
1523 err = sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes);
1524 if (BE (err != REG_NOERROR, 0))
1530 static reg_errcode_t
1531 add_epsilon_src_nodes (dfa, dest_nodes, candidates)
1533 re_node_set *dest_nodes;
1534 const re_node_set *candidates;
1538 re_node_set src_copy;
1540 err = re_node_set_init_copy (&src_copy, dest_nodes);
1541 if (BE (err != REG_NOERROR, 0))
1543 for (src_idx = 0; src_idx < src_copy.nelem; ++src_idx)
1545 err = re_node_set_add_intersect (dest_nodes, candidates,
1547 + src_copy.elems[src_idx]);
1548 if (BE (err != REG_NOERROR, 0))
1550 re_node_set_free (&src_copy);
1554 re_node_set_free (&src_copy);
1558 static reg_errcode_t
1559 sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates)
1562 re_node_set *dest_nodes;
1563 const re_node_set *candidates;
1567 re_node_set *inv_eclosure = dfa->inveclosures + node;
1568 re_node_set except_nodes;
1569 re_node_set_init_empty (&except_nodes);
1570 for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1572 int cur_node = inv_eclosure->elems[ecl_idx];
1573 if (cur_node == node)
1575 if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1577 int edst1 = dfa->edests[cur_node].elems[0];
1578 int edst2 = ((dfa->edests[cur_node].nelem > 1)
1579 ? dfa->edests[cur_node].elems[1] : -1);
1580 if ((!re_node_set_contains (inv_eclosure, edst1)
1581 && re_node_set_contains (dest_nodes, edst1))
1583 && !re_node_set_contains (inv_eclosure, edst2)
1584 && re_node_set_contains (dest_nodes, edst2)))
1586 err = re_node_set_add_intersect (&except_nodes, candidates,
1587 dfa->inveclosures + cur_node);
1588 if (BE (err != REG_NOERROR, 0))
1590 re_node_set_free (&except_nodes);
1596 for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1598 int cur_node = inv_eclosure->elems[ecl_idx];
1599 if (!re_node_set_contains (&except_nodes, cur_node))
1601 int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1602 re_node_set_remove_at (dest_nodes, idx);
1605 re_node_set_free (&except_nodes);
1610 check_dst_limits (dfa, limits, mctx, dst_node, dst_idx, src_node, src_idx)
1612 re_node_set *limits;
1613 re_match_context_t *mctx;
1614 int dst_node, dst_idx, src_node, src_idx;
1616 int lim_idx, src_pos, dst_pos;
1618 for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1621 struct re_backref_cache_entry *ent;
1622 ent = mctx->bkref_ents + limits->elems[lim_idx];
1623 subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
1625 dst_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx],
1626 dfa->eclosures + dst_node,
1627 subexp_idx, dst_node, dst_idx);
1628 src_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx],
1629 dfa->eclosures + src_node,
1630 subexp_idx, src_node, src_idx);
1633 <src> <dst> ( <subexp> )
1634 ( <subexp> ) <src> <dst>
1635 ( <subexp1> <src> <subexp2> <dst> <subexp3> ) */
1636 if (src_pos == dst_pos)
1637 continue; /* This is unrelated limitation. */
1645 check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, node,
1648 re_match_context_t *mctx;
1649 re_node_set *eclosures;
1650 int limit, subexp_idx, node, str_idx;
1652 struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
1653 int pos = (str_idx < lim->subexp_from ? -1
1654 : (lim->subexp_to < str_idx ? 1 : 0));
1656 && (str_idx == lim->subexp_from || str_idx == lim->subexp_to))
1659 for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1661 int node = eclosures->elems[node_idx];
1662 re_token_type_t type= dfa->nodes[node].type;
1663 if (type == OP_BACK_REF)
1666 for (bi = 0; bi < mctx->nbkref_ents; ++bi)
1668 struct re_backref_cache_entry *ent = mctx->bkref_ents + bi;
1669 if (ent->node == node && ent->subexp_from == ent->subexp_to
1670 && ent->str_idx == str_idx)
1673 dst = dfa->edests[node].elems[0];
1674 cpos = check_dst_limits_calc_pos (dfa, mctx, limit,
1675 dfa->eclosures + dst,
1678 if ((str_idx == lim->subexp_from && cpos == -1)
1679 || (str_idx == lim->subexp_to && cpos == 0))
1684 if (type == OP_OPEN_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx
1685 && str_idx == lim->subexp_from)
1690 if (type == OP_CLOSE_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx
1691 && str_idx == lim->subexp_to)
1694 if (node_idx == eclosures->nelem && str_idx == lim->subexp_to)
1700 /* Check the limitations of sub expressions LIMITS, and remove the nodes
1701 which are against limitations from DEST_NODES. */
1703 static reg_errcode_t
1704 check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx)
1706 re_node_set *dest_nodes;
1707 const re_node_set *candidates;
1708 re_node_set *limits;
1709 struct re_backref_cache_entry *bkref_ents;
1713 int node_idx, lim_idx;
1715 for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1718 struct re_backref_cache_entry *ent;
1719 ent = bkref_ents + limits->elems[lim_idx];
1721 if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
1722 continue; /* This is unrelated limitation. */
1724 subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
1725 if (ent->subexp_to == str_idx)
1729 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
1731 int node = dest_nodes->elems[node_idx];
1732 re_token_type_t type= dfa->nodes[node].type;
1733 if (type == OP_OPEN_SUBEXP
1734 && subexp_idx == dfa->nodes[node].opr.idx)
1736 else if (type == OP_CLOSE_SUBEXP
1737 && subexp_idx == dfa->nodes[node].opr.idx)
1741 /* Check the limitation of the open subexpression. */
1742 /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */
1745 err = sub_epsilon_src_nodes(dfa, ops_node, dest_nodes,
1747 if (BE (err != REG_NOERROR, 0))
1750 /* Check the limitation of the close subexpression. */
1751 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
1753 int node = dest_nodes->elems[node_idx];
1754 if (!re_node_set_contains (dfa->inveclosures + node, cls_node)
1755 && !re_node_set_contains (dfa->eclosures + node, cls_node))
1757 /* It is against this limitation.
1758 Remove it form the current sifted state. */
1759 err = sub_epsilon_src_nodes(dfa, node, dest_nodes,
1761 if (BE (err != REG_NOERROR, 0))
1767 else /* (ent->subexp_to != str_idx) */
1769 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
1771 int node = dest_nodes->elems[node_idx];
1772 re_token_type_t type= dfa->nodes[node].type;
1773 if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
1775 if (subexp_idx != dfa->nodes[node].opr.idx)
1777 if ((type == OP_CLOSE_SUBEXP && ent->subexp_to != str_idx)
1778 || (type == OP_OPEN_SUBEXP))
1780 /* It is against this limitation.
1781 Remove it form the current sifted state. */
1782 err = sub_epsilon_src_nodes(dfa, node, dest_nodes,
1784 if (BE (err != REG_NOERROR, 0))
1794 /* Search for the top (in case of sctx->check_subexp < 0) or the
1795 bottom (in case of sctx->check_subexp > 0) of the subexpressions
1796 which the backreference sctx->cur_bkref can match. */
1798 static reg_errcode_t
1799 search_subexp (preg, mctx, sctx, str_idx, dest_nodes)
1800 const regex_t *preg;
1801 re_match_context_t *mctx;
1802 re_sift_context_t *sctx;
1804 re_node_set *dest_nodes;
1807 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1808 re_sift_context_t local_sctx;
1810 const re_node_set *candidates;
1811 re_dfastate_t **lim_states = NULL;
1812 candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
1813 : &mctx->state_log[str_idx]->nodes);
1814 local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */
1816 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
1818 re_token_type_t type;
1819 node = dest_nodes->elems[node_idx];
1820 type = dfa->nodes[node].type;
1822 if (type == OP_CLOSE_SUBEXP
1823 && sctx->check_subexp == dfa->nodes[node].opr.idx + 1)
1825 re_dfastate_t *cur_state;
1826 /* Found the bottom of the subexpression, then search for the
1828 if (local_sctx.sifted_states == NULL)
1830 /* It hasn't been initialized yet, initialize it now. */
1832 err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
1833 if (BE (err != REG_NOERROR, 0))
1836 local_sctx.check_subexp = -sctx->check_subexp;
1837 local_sctx.limited_states = sctx->limited_states;
1838 local_sctx.last_node = node;
1839 local_sctx.last_str_idx = local_sctx.cls_subexp_idx = str_idx;
1840 cur_state = local_sctx.sifted_states[str_idx];
1841 err = sift_states_backward (preg, mctx, &local_sctx);
1842 local_sctx.sifted_states[str_idx] = cur_state;
1843 if (BE (err != REG_NOERROR, 0))
1845 /* There must not 2 same node in a node set. */
1848 else if (type == OP_OPEN_SUBEXP
1849 && -sctx->check_subexp == dfa->nodes[node].opr.idx + 1)
1851 /* Found the top of the subexpression, check that the
1852 backreference can match the input string. */
1855 int bkref_str_idx = re_string_cur_idx (mctx->input);
1856 int subexp_len = sctx->cls_subexp_idx - str_idx;
1857 if (subexp_len < 0 || bkref_str_idx + subexp_len > mctx->input->len)
1860 if (bkref_str_idx + subexp_len > mctx->input->valid_len
1861 && mctx->input->valid_len < mctx->input->len)
1864 err = extend_buffers (mctx);
1865 if (BE (err != REG_NOERROR, 0))
1868 buf = (char *) re_string_get_buffer (mctx->input);
1869 if (strncmp (buf + str_idx, buf + bkref_str_idx, subexp_len) != 0)
1872 if (sctx->limits.nelem && str_idx > 0)
1874 re_dfastate_t *cur_state = sctx->sifted_states[str_idx];
1875 if (lim_states == NULL)
1877 lim_states = re_malloc (re_dfastate_t *, str_idx + 1);
1878 if (BE (lim_states == NULL, 0))
1884 if (local_sctx.sifted_states == NULL)
1886 /* It hasn't been initialized yet, initialize it now. */
1888 err = re_node_set_init_copy (&local_sctx.limits,
1890 if (BE (err != REG_NOERROR, 0))
1893 local_sctx.check_subexp = 0;
1894 local_sctx.last_node = node;
1895 local_sctx.last_str_idx = str_idx;
1896 local_sctx.limited_states = lim_states;
1897 memset (lim_states, '\0',
1898 sizeof (re_dfastate_t*) * (str_idx + 1));
1899 err = sift_states_backward (preg, mctx, &local_sctx);
1900 if (BE (err != REG_NOERROR, 0))
1902 if (local_sctx.sifted_states[0] == NULL
1903 && local_sctx.limited_states[0] == NULL)
1905 sctx->sifted_states[str_idx] = cur_state;
1908 sctx->sifted_states[str_idx] = cur_state;
1910 /* Successfully matched, add a new cache entry. */
1911 dest_str_idx = bkref_str_idx + subexp_len;
1912 err = match_ctx_add_entry (mctx, sctx->cur_bkref, bkref_str_idx,
1913 str_idx, sctx->cls_subexp_idx);
1914 if (BE (err != REG_NOERROR, 0))
1916 err = clean_state_log_if_need (mctx, dest_str_idx);
1917 if (BE (err != REG_NOERROR, 0))
1923 /* Remove the top/bottom of the sub expression we processed. */
1924 if (node_idx < dest_nodes->nelem)
1926 err = sub_epsilon_src_nodes(dfa, node, dest_nodes, candidates);
1927 if (BE (err != REG_NOERROR, 0))
1929 /* Update state_log. */
1930 sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1931 if (BE (err != REG_NOERROR, 0))
1936 if (local_sctx.sifted_states != NULL)
1937 re_node_set_free (&local_sctx.limits);
1938 if (lim_states != NULL)
1939 re_free (lim_states);
1943 static reg_errcode_t
1944 sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes)
1945 const regex_t *preg;
1946 re_match_context_t *mctx;
1947 re_sift_context_t *sctx;
1949 re_node_set *dest_nodes;
1952 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1954 re_sift_context_t local_sctx;
1955 const re_node_set *candidates;
1956 candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
1957 : &mctx->state_log[str_idx]->nodes);
1958 local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */
1960 for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
1962 int cur_bkref_idx = re_string_cur_idx (mctx->input);
1963 re_token_type_t type;
1964 node = candidates->elems[node_idx];
1965 type = dfa->nodes[node].type;
1966 if (node == sctx->cur_bkref && str_idx == cur_bkref_idx)
1968 /* Avoid infinite loop for the REs like "()\1+". */
1969 if (node == sctx->last_node && str_idx == sctx->last_str_idx)
1971 if (type == OP_BACK_REF)
1974 for (enabled_idx = 0; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
1976 int disabled_idx, subexp_len, to_idx, dst_node;
1977 struct re_backref_cache_entry *entry;
1978 entry = mctx->bkref_ents + enabled_idx;
1979 subexp_len = entry->subexp_to - entry->subexp_from;
1980 to_idx = str_idx + subexp_len;
1981 dst_node = (subexp_len ? dfa->nexts[node]
1982 : dfa->edests[node].elems[0]);
1984 if (entry->node != node || entry->str_idx != str_idx
1985 || to_idx > sctx->last_str_idx
1986 || sctx->sifted_states[to_idx] == NULL)
1988 if (!STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node))
1991 if (check_dst_limits (dfa, &sctx->limits, mctx, node,
1992 str_idx, dst_node, to_idx))
1994 if (sctx->check_subexp == dfa->nodes[node].opr.idx)
1997 buf = (char *) re_string_get_buffer (mctx->input);
1998 if (strncmp (buf + entry->subexp_from,
1999 buf + cur_bkref_idx, subexp_len) != 0)
2001 err = match_ctx_add_entry (mctx, sctx->cur_bkref,
2002 cur_bkref_idx, entry->subexp_from,
2004 if (BE (err != REG_NOERROR, 0))
2006 err = clean_state_log_if_need (mctx, cur_bkref_idx
2008 if (BE (err != REG_NOERROR, 0))
2013 re_dfastate_t *cur_state;
2015 for (disabled_idx = enabled_idx + 1;
2016 disabled_idx < mctx->nbkref_ents; ++disabled_idx)
2018 struct re_backref_cache_entry *entry2;
2019 entry2 = mctx->bkref_ents + disabled_idx;
2020 if (entry2->node != node || entry2->str_idx != str_idx)
2025 if (local_sctx.sifted_states == NULL)
2028 err = re_node_set_init_copy (&local_sctx.limits,
2030 if (BE (err != REG_NOERROR, 0))
2033 local_sctx.last_node = node;
2034 local_sctx.last_str_idx = str_idx;
2035 err = re_node_set_insert (&local_sctx.limits, enabled_idx);
2036 if (BE (err < 0, 0))
2041 cur_state = local_sctx.sifted_states[str_idx];
2042 err = sift_states_backward (preg, mctx, &local_sctx);
2043 if (BE (err != REG_NOERROR, 0))
2045 if (sctx->limited_states != NULL)
2047 err = merge_state_array (dfa, sctx->limited_states,
2048 local_sctx.sifted_states,
2050 if (BE (err != REG_NOERROR, 0))
2053 local_sctx.sifted_states[str_idx] = cur_state;
2054 re_node_set_remove (&local_sctx.limits, enabled_idx);
2055 /* We must not use the variable entry here, since
2056 mctx->bkref_ents might be realloced. */
2057 mctx->bkref_ents[enabled_idx].flag = 1;
2060 for (enabled_idx = 0; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
2062 struct re_backref_cache_entry *entry;
2063 entry = mctx->bkref_ents + enabled_idx;
2064 if (entry->node == node && entry->str_idx == str_idx)
2071 if (local_sctx.sifted_states != NULL)
2073 re_node_set_free (&local_sctx.limits);
2080 #ifdef RE_ENABLE_I18N
2082 sift_states_iter_mb (preg, mctx, sctx, node_idx, str_idx, max_str_idx)
2083 const regex_t *preg;
2084 const re_match_context_t *mctx;
2085 re_sift_context_t *sctx;
2086 int node_idx, str_idx, max_str_idx;
2088 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2090 /* Check the node can accept `multi byte'. */
2091 naccepted = check_node_accept_bytes (preg, node_idx, mctx->input, str_idx);
2092 if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2093 !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2094 dfa->nexts[node_idx]))
2095 /* The node can't accept the `multi byte', or the
2096 destination was already throwed away, then the node
2097 could't accept the current input `multi byte'. */
2099 /* Otherwise, it is sure that the node could accept
2100 `naccepted' bytes input. */
2103 #endif /* RE_ENABLE_I18N */
2106 /* Functions for state transition. */
2108 /* Return the next state to which the current state STATE will transit by
2109 accepting the current input byte, and update STATE_LOG if necessary.
2110 If STATE can accept a multibyte char/collating element/back reference
2111 update the destination of STATE_LOG. */
2113 static re_dfastate_t *
2114 transit_state (err, preg, mctx, state, fl_search)
2116 const regex_t *preg;
2117 re_match_context_t *mctx;
2118 re_dfastate_t *state;
2121 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2122 re_dfastate_t **trtable, *next_state;
2125 if (re_string_cur_idx (mctx->input) + 1 >= mctx->input->bufs_len
2126 || (re_string_cur_idx (mctx->input) + 1 >= mctx->input->valid_len
2127 && mctx->input->valid_len < mctx->input->len))
2129 *err = extend_buffers (mctx);
2130 if (BE (*err != REG_NOERROR, 0))
2138 re_string_skip_bytes (mctx->input, 1);
2142 #ifdef RE_ENABLE_I18N
2143 /* If the current state can accept multibyte. */
2144 if (state->accept_mb)
2146 *err = transit_state_mb (preg, state, mctx);
2147 if (BE (*err != REG_NOERROR, 0))
2150 #endif /* RE_ENABLE_I18N */
2152 /* Then decide the next state with the single byte. */
2155 /* Use transition table */
2156 ch = re_string_fetch_byte (mctx->input);
2157 trtable = fl_search ? state->trtable_search : state->trtable;
2158 if (trtable == NULL)
2160 trtable = build_trtable (preg, state, fl_search);
2162 state->trtable_search = trtable;
2164 state->trtable = trtable;
2166 next_state = trtable[ch];
2170 /* don't use transition table */
2171 next_state = transit_state_sb (err, preg, state, fl_search, mctx);
2172 if (BE (next_state == NULL && err != REG_NOERROR, 0))
2177 /* Update the state_log if we need. */
2178 if (mctx->state_log != NULL)
2180 int cur_idx = re_string_cur_idx (mctx->input);
2181 if (cur_idx > mctx->state_log_top)
2183 mctx->state_log[cur_idx] = next_state;
2184 mctx->state_log_top = cur_idx;
2186 else if (mctx->state_log[cur_idx] == 0)
2188 mctx->state_log[cur_idx] = next_state;
2192 re_dfastate_t *pstate;
2193 unsigned int context;
2194 re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2195 /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2196 the destination of a multibyte char/collating element/
2197 back reference. Then the next state is the union set of
2198 these destinations and the results of the transition table. */
2199 pstate = mctx->state_log[cur_idx];
2200 log_nodes = pstate->entrance_nodes;
2201 if (next_state != NULL)
2203 table_nodes = next_state->entrance_nodes;
2204 *err = re_node_set_init_union (&next_nodes, table_nodes,
2206 if (BE (*err != REG_NOERROR, 0))
2210 next_nodes = *log_nodes;
2211 /* Note: We already add the nodes of the initial state,
2212 then we don't need to add them here. */
2214 context = re_string_context_at (mctx->input,
2215 re_string_cur_idx (mctx->input) - 1,
2216 mctx->eflags, preg->newline_anchor);
2217 next_state = mctx->state_log[cur_idx]
2218 = re_acquire_state_context (err, dfa, &next_nodes, context);
2219 /* We don't need to check errors here, since the return value of
2220 this function is next_state and ERR is already set. */
2222 if (table_nodes != NULL)
2223 re_node_set_free (&next_nodes);
2225 /* If the next state has back references. */
2226 if (next_state != NULL && next_state->has_backref)
2228 *err = transit_state_bkref (preg, next_state, mctx);
2229 if (BE (*err != REG_NOERROR, 0))
2231 next_state = mctx->state_log[cur_idx];
2237 /* Helper functions for transit_state. */
2239 /* Return the next state to which the current state STATE will transit by
2240 accepting the current input byte. */
2242 static re_dfastate_t *
2243 transit_state_sb (err, preg, state, fl_search, mctx)
2245 const regex_t *preg;
2246 re_dfastate_t *state;
2248 re_match_context_t *mctx;
2250 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2251 re_node_set next_nodes;
2252 re_dfastate_t *next_state;
2253 int node_cnt, cur_str_idx = re_string_cur_idx (mctx->input);
2254 unsigned int context;
2256 *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2257 if (BE (*err != REG_NOERROR, 0))
2259 for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2261 int cur_node = state->nodes.elems[node_cnt];
2262 if (check_node_accept (preg, dfa->nodes + cur_node, mctx, cur_str_idx))
2264 *err = re_node_set_merge (&next_nodes,
2265 dfa->eclosures + dfa->nexts[cur_node]);
2266 if (BE (*err != REG_NOERROR, 0))
2268 re_node_set_free (&next_nodes);
2275 #ifdef RE_ENABLE_I18N
2276 int not_initial = 0;
2278 for (node_cnt = 0; node_cnt < next_nodes.nelem; ++node_cnt)
2279 if (dfa->nodes[next_nodes.elems[node_cnt]].type == CHARACTER)
2281 not_initial = dfa->nodes[next_nodes.elems[node_cnt]].mb_partial;
2287 *err = re_node_set_merge (&next_nodes,
2288 dfa->init_state->entrance_nodes);
2289 if (BE (*err != REG_NOERROR, 0))
2291 re_node_set_free (&next_nodes);
2296 context = re_string_context_at (mctx->input, cur_str_idx, mctx->eflags,
2297 preg->newline_anchor);
2298 next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2299 /* We don't need to check errors here, since the return value of
2300 this function is next_state and ERR is already set. */
2302 re_node_set_free (&next_nodes);
2303 re_string_skip_bytes (mctx->input, 1);
2307 #ifdef RE_ENABLE_I18N
2308 static reg_errcode_t
2309 transit_state_mb (preg, pstate, mctx)
2310 const regex_t *preg;
2311 re_dfastate_t *pstate;
2312 re_match_context_t *mctx;
2315 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2318 for (i = 0; i < pstate->nodes.nelem; ++i)
2320 re_node_set dest_nodes, *new_nodes;
2321 int cur_node_idx = pstate->nodes.elems[i];
2322 int naccepted = 0, dest_idx;
2323 unsigned int context;
2324 re_dfastate_t *dest_state;
2326 if (dfa->nodes[cur_node_idx].constraint)
2328 context = re_string_context_at (mctx->input,
2329 re_string_cur_idx (mctx->input),
2330 mctx->eflags, preg->newline_anchor);
2331 if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2336 /* How many bytes the node can accepts? */
2337 if (ACCEPT_MB_NODE (dfa->nodes[cur_node_idx].type))
2338 naccepted = check_node_accept_bytes (preg, cur_node_idx, mctx->input,
2339 re_string_cur_idx (mctx->input));
2343 /* The node can accepts `naccepted' bytes. */
2344 dest_idx = re_string_cur_idx (mctx->input) + naccepted;
2345 mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2346 : mctx->max_mb_elem_len);
2347 err = clean_state_log_if_need (mctx, dest_idx);
2348 if (BE (err != REG_NOERROR, 0))
2351 assert (dfa->nexts[cur_node_idx] != -1);
2353 /* `cur_node_idx' may point the entity of the OP_CONTEXT_NODE,
2354 then we use pstate->nodes.elems[i] instead. */
2355 new_nodes = dfa->eclosures + dfa->nexts[pstate->nodes.elems[i]];
2357 dest_state = mctx->state_log[dest_idx];
2358 if (dest_state == NULL)
2359 dest_nodes = *new_nodes;
2362 err = re_node_set_init_union (&dest_nodes,
2363 dest_state->entrance_nodes, new_nodes);
2364 if (BE (err != REG_NOERROR, 0))
2367 context = re_string_context_at (mctx->input, dest_idx - 1, mctx->eflags,
2368 preg->newline_anchor);
2369 mctx->state_log[dest_idx]
2370 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2371 if (dest_state != NULL)
2372 re_node_set_free (&dest_nodes);
2373 if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2378 #endif /* RE_ENABLE_I18N */
2380 static reg_errcode_t
2381 transit_state_bkref (preg, pstate, mctx)
2382 const regex_t *preg;
2383 re_dfastate_t *pstate;
2384 re_match_context_t *mctx;
2387 re_dfastate_t **work_state_log;
2389 work_state_log = re_malloc (re_dfastate_t *,
2390 re_string_cur_idx (mctx->input) + 1);
2391 if (BE (work_state_log == NULL, 0))
2394 err = transit_state_bkref_loop (preg, &pstate->nodes, work_state_log, mctx);
2395 re_free (work_state_log);
2399 /* Caller must allocate `work_state_log'. */
2401 static reg_errcode_t
2402 transit_state_bkref_loop (preg, nodes, work_state_log, mctx)
2403 const regex_t *preg;
2405 re_dfastate_t **work_state_log;
2406 re_match_context_t *mctx;
2409 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2411 regmatch_t *cur_regs = re_malloc (regmatch_t, preg->re_nsub + 1);
2412 int cur_str_idx = re_string_cur_idx (mctx->input);
2413 if (BE (cur_regs == NULL, 0))
2416 for (i = 0; i < nodes->nelem; ++i)
2418 int dest_str_idx, subexp_idx, prev_nelem, bkc_idx;
2419 int node_idx = nodes->elems[i];
2420 unsigned int context;
2421 re_token_t *node = dfa->nodes + node_idx;
2422 re_node_set *new_dest_nodes;
2423 re_sift_context_t sctx;
2425 /* Check whether `node' is a backreference or not. */
2426 if (node->type == OP_BACK_REF)
2427 subexp_idx = node->opr.idx;
2431 if (node->constraint)
2433 context = re_string_context_at (mctx->input, cur_str_idx,
2434 mctx->eflags, preg->newline_anchor);
2435 if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2439 /* `node' is a backreference.
2440 Check the substring which the substring matched. */
2441 sift_ctx_init (&sctx, work_state_log, NULL, node_idx, cur_str_idx,
2443 sctx.cur_bkref = node_idx;
2444 match_ctx_clear_flag (mctx);
2445 err = sift_states_backward (preg, mctx, &sctx);
2446 if (BE (err != REG_NOERROR, 0))
2449 /* And add the epsilon closures (which is `new_dest_nodes') of
2450 the backreference to appropriate state_log. */
2452 assert (dfa->nexts[node_idx] != -1);
2454 for (bkc_idx = 0; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2457 re_dfastate_t *dest_state;
2458 struct re_backref_cache_entry *bkref_ent;
2459 bkref_ent = mctx->bkref_ents + bkc_idx;
2460 if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2462 subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2463 new_dest_nodes = (subexp_len == 0
2464 ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2465 : dfa->eclosures + dfa->nexts[node_idx]);
2466 dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2467 - bkref_ent->subexp_from);
2468 context = (IS_WORD_CHAR (re_string_byte_at (mctx->input,
2470 ? CONTEXT_WORD : 0);
2471 dest_state = mctx->state_log[dest_str_idx];
2472 prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2473 : mctx->state_log[cur_str_idx]->nodes.nelem);
2474 /* Add `new_dest_node' to state_log. */
2475 if (dest_state == NULL)
2477 mctx->state_log[dest_str_idx]
2478 = re_acquire_state_context (&err, dfa, new_dest_nodes,
2480 if (BE (mctx->state_log[dest_str_idx] == NULL
2481 && err != REG_NOERROR, 0))
2486 re_node_set dest_nodes;
2487 err = re_node_set_init_union (&dest_nodes,
2488 dest_state->entrance_nodes,
2490 if (BE (err != REG_NOERROR, 0))
2492 re_node_set_free (&dest_nodes);
2495 mctx->state_log[dest_str_idx]
2496 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2497 re_node_set_free (&dest_nodes);
2498 if (BE (mctx->state_log[dest_str_idx] == NULL
2499 && err != REG_NOERROR, 0))
2502 /* We need to check recursively if the backreference can epsilon
2505 && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2507 err = transit_state_bkref_loop (preg, new_dest_nodes,
2508 work_state_log, mctx);
2509 if (BE (err != REG_NOERROR, 0))
2520 /* Build transition table for the state.
2521 Return the new table if succeeded, otherwise return NULL. */
2523 static re_dfastate_t **
2524 build_trtable (preg, state, fl_search)
2525 const regex_t *preg;
2526 const re_dfastate_t *state;
2530 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2532 int dests_node_malloced = 0, dest_states_malloced = 0;
2533 int ndests; /* Number of the destination states from `state'. */
2534 re_dfastate_t **trtable;
2535 re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
2536 re_node_set follows, *dests_node;
2540 /* We build DFA states which corresponds to the destination nodes
2541 from `state'. `dests_node[i]' represents the nodes which i-th
2542 destination state contains, and `dests_ch[i]' represents the
2543 characters which i-th destination state accepts. */
2545 if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX))
2546 dests_node = (re_node_set *)
2547 alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
2551 dests_node = (re_node_set *)
2552 malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
2553 if (BE (dests_node == NULL, 0))
2555 dests_node_malloced = 1;
2557 dests_ch = (bitset *) (dests_node + SBC_MAX);
2559 /* Initialize transiton table. */
2560 trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
2561 if (BE (trtable == NULL, 0))
2563 if (dests_node_malloced)
2568 /* At first, group all nodes belonging to `state' into several
2570 ndests = group_nodes_into_DFAstates (preg, state, dests_node, dests_ch);
2571 if (BE (ndests <= 0, 0))
2573 if (dests_node_malloced)
2575 /* Return NULL in case of an error, trtable otherwise. */
2582 err = re_node_set_alloc (&follows, ndests + 1);
2583 if (BE (err != REG_NOERROR, 0))
2587 if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX
2588 + ndests * 3 * sizeof (re_dfastate_t *)))
2589 dest_states = (re_dfastate_t **)
2590 alloca (ndests * 3 * sizeof (re_dfastate_t *));
2594 dest_states = (re_dfastate_t **)
2595 malloc (ndests * 3 * sizeof (re_dfastate_t *));
2596 if (BE (dest_states == NULL, 0))
2599 if (dest_states_malloced)
2601 re_node_set_free (&follows);
2602 for (i = 0; i < ndests; ++i)
2603 re_node_set_free (dests_node + i);
2605 if (dests_node_malloced)
2609 dest_states_malloced = 1;
2611 dest_states_word = dest_states + ndests;
2612 dest_states_nl = dest_states_word + ndests;
2613 bitset_empty (acceptable);
2615 /* Then build the states for all destinations. */
2616 for (i = 0; i < ndests; ++i)
2619 re_node_set_empty (&follows);
2620 /* Merge the follows of this destination states. */
2621 for (j = 0; j < dests_node[i].nelem; ++j)
2623 next_node = dfa->nexts[dests_node[i].elems[j]];
2624 if (next_node != -1)
2626 err = re_node_set_merge (&follows, dfa->eclosures + next_node);
2627 if (BE (err != REG_NOERROR, 0))
2631 /* If search flag is set, merge the initial state. */
2634 #ifdef RE_ENABLE_I18N
2635 int not_initial = 0;
2636 for (j = 0; j < follows.nelem; ++j)
2637 if (dfa->nodes[follows.elems[j]].type == CHARACTER)
2639 not_initial = dfa->nodes[follows.elems[j]].mb_partial;
2645 err = re_node_set_merge (&follows,
2646 dfa->init_state->entrance_nodes);
2647 if (BE (err != REG_NOERROR, 0))
2651 dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
2652 if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
2654 /* If the new state has context constraint,
2655 build appropriate states for these contexts. */
2656 if (dest_states[i]->has_constraint)
2658 dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
2660 if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
2662 dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
2664 if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
2669 dest_states_word[i] = dest_states[i];
2670 dest_states_nl[i] = dest_states[i];
2672 bitset_merge (acceptable, dests_ch[i]);
2675 /* Update the transition table. */
2676 /* For all characters ch...: */
2677 for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
2678 for (j = 0; j < UINT_BITS; ++j, ++ch)
2679 if ((acceptable[i] >> j) & 1)
2681 /* The current state accepts the character ch. */
2682 if (IS_WORD_CHAR (ch))
2684 for (k = 0; k < ndests; ++k)
2685 if ((dests_ch[k][i] >> j) & 1)
2687 /* k-th destination accepts the word character ch. */
2688 trtable[ch] = dest_states_word[k];
2689 /* There must be only one destination which accepts
2690 character ch. See group_nodes_into_DFAstates. */
2694 else /* not WORD_CHAR */
2696 for (k = 0; k < ndests; ++k)
2697 if ((dests_ch[k][i] >> j) & 1)
2699 /* k-th destination accepts the non-word character ch. */
2700 trtable[ch] = dest_states[k];
2701 /* There must be only one destination which accepts
2702 character ch. See group_nodes_into_DFAstates. */
2708 if (bitset_contain (acceptable, NEWLINE_CHAR))
2710 /* The current state accepts newline character. */
2711 for (k = 0; k < ndests; ++k)
2712 if (bitset_contain (dests_ch[k], NEWLINE_CHAR))
2714 /* k-th destination accepts newline character. */
2715 trtable[NEWLINE_CHAR] = dest_states_nl[k];
2716 /* There must be only one destination which accepts
2717 newline. See group_nodes_into_DFAstates. */
2722 if (dest_states_malloced)
2725 re_node_set_free (&follows);
2726 for (i = 0; i < ndests; ++i)
2727 re_node_set_free (dests_node + i);
2729 if (dests_node_malloced)
2735 /* Group all nodes belonging to STATE into several destinations.
2736 Then for all destinations, set the nodes belonging to the destination
2737 to DESTS_NODE[i] and set the characters accepted by the destination
2738 to DEST_CH[i]. This function return the number of destinations. */
2741 group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
2742 const regex_t *preg;
2743 const re_dfastate_t *state;
2744 re_node_set *dests_node;
2748 const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2750 int ndests; /* Number of the destinations from `state'. */
2751 bitset accepts; /* Characters a node can accept. */
2752 const re_node_set *cur_nodes = &state->nodes;
2753 bitset_empty (accepts);
2756 /* For all the nodes belonging to `state', */
2757 for (i = 0; i < cur_nodes->nelem; ++i)
2759 re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
2760 re_token_type_t type = node->type;
2761 unsigned int constraint = node->constraint;
2763 /* Enumerate all single byte character this node can accept. */
2764 if (type == CHARACTER)
2765 bitset_set (accepts, node->opr.c);
2766 else if (type == SIMPLE_BRACKET)
2768 bitset_merge (accepts, node->opr.sbcset);
2770 else if (type == OP_PERIOD)
2772 bitset_set_all (accepts);
2773 if (!(preg->syntax & RE_DOT_NEWLINE))
2774 bitset_clear (accepts, '\n');
2775 if (preg->syntax & RE_DOT_NOT_NULL)
2776 bitset_clear (accepts, '\0');
2781 /* Check the `accepts' and sift the characters which are not
2782 match it the context. */
2785 if (constraint & NEXT_WORD_CONSTRAINT)
2786 for (j = 0; j < BITSET_UINTS; ++j)
2787 accepts[j] &= dfa->word_char[j];
2788 if (constraint & NEXT_NOTWORD_CONSTRAINT)
2789 for (j = 0; j < BITSET_UINTS; ++j)
2790 accepts[j] &= ~dfa->word_char[j];
2791 if (constraint & NEXT_NEWLINE_CONSTRAINT)
2793 int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
2794 bitset_empty (accepts);
2795 if (accepts_newline)
2796 bitset_set (accepts, NEWLINE_CHAR);
2802 /* Then divide `accepts' into DFA states, or create a new
2804 for (j = 0; j < ndests; ++j)
2806 bitset intersec; /* Intersection sets, see below. */
2808 /* Flags, see below. */
2809 int has_intersec, not_subset, not_consumed;
2811 /* Optimization, skip if this state doesn't accept the character. */
2812 if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
2815 /* Enumerate the intersection set of this state and `accepts'. */
2817 for (k = 0; k < BITSET_UINTS; ++k)
2818 has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
2819 /* And skip if the intersection set is empty. */
2823 /* Then check if this state is a subset of `accepts'. */
2824 not_subset = not_consumed = 0;
2825 for (k = 0; k < BITSET_UINTS; ++k)
2827 not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
2828 not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
2831 /* If this state isn't a subset of `accepts', create a
2832 new group state, which has the `remains'. */
2835 bitset_copy (dests_ch[ndests], remains);
2836 bitset_copy (dests_ch[j], intersec);
2837 err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
2838 if (BE (err != REG_NOERROR, 0))
2843 /* Put the position in the current group. */
2844 err = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
2845 if (BE (err < 0, 0))
2848 /* If all characters are consumed, go to next node. */
2852 /* Some characters remain, create a new group. */
2855 bitset_copy (dests_ch[ndests], accepts);
2856 err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
2857 if (BE (err != REG_NOERROR, 0))
2860 bitset_empty (accepts);
2865 for (j = 0; j < ndests; ++j)
2866 re_node_set_free (dests_node + j);
2870 #ifdef RE_ENABLE_I18N
2871 /* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
2872 Return the number of the bytes the node accepts.
2873 STR_IDX is the current index of the input string.
2875 This function handles the nodes which can accept one character, or
2876 one collating element like '.', '[a-z]', opposite to the other nodes
2877 can only accept one byte. */
2880 check_node_accept_bytes (preg, node_idx, input, str_idx)
2881 const regex_t *preg;
2882 int node_idx, str_idx;
2883 const re_string_t *input;
2885 const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2886 const re_token_t *node = dfa->nodes + node_idx;
2887 int elem_len = re_string_elem_size_at (input, str_idx);
2888 int char_len = re_string_char_size_at (input, str_idx);
2892 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
2894 if (elem_len <= 1 && char_len <= 1)
2896 if (node->type == OP_PERIOD)
2898 /* '.' accepts any one character except the following two cases. */
2899 if ((!(preg->syntax & RE_DOT_NEWLINE) &&
2900 re_string_byte_at (input, str_idx) == '\n') ||
2901 ((preg->syntax & RE_DOT_NOT_NULL) &&
2902 re_string_byte_at (input, str_idx) == '\0'))
2906 else if (node->type == COMPLEX_BRACKET)
2908 const re_charset_t *cset = node->opr.mbcset;
2910 const unsigned char *pin = re_string_get_buffer (input) + str_idx;
2913 wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
2914 ? re_string_wchar_at (input, str_idx) : 0);
2916 /* match with multibyte character? */
2917 for (i = 0; i < cset->nmbchars; ++i)
2918 if (wc == cset->mbchars[i])
2920 match_len = char_len;
2921 goto check_node_accept_bytes_match;
2923 /* match with character_class? */
2924 for (i = 0; i < cset->nchar_classes; ++i)
2926 wctype_t wt = cset->char_classes[i];
2927 if (__iswctype (wc, wt))
2929 match_len = char_len;
2930 goto check_node_accept_bytes_match;
2937 unsigned int in_collseq = 0;
2938 const int32_t *table, *indirect;
2939 const unsigned char *weights, *extra;
2940 const char *collseqwc;
2942 /* This #include defines a local function! */
2943 # include <locale/weight.h>
2945 /* match with collating_symbol? */
2946 if (cset->ncoll_syms)
2947 extra = (const unsigned char *)
2948 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
2949 for (i = 0; i < cset->ncoll_syms; ++i)
2951 const unsigned char *coll_sym = extra + cset->coll_syms[i];
2952 /* Compare the length of input collating element and
2953 the length of current collating element. */
2954 if (*coll_sym != elem_len)
2956 /* Compare each bytes. */
2957 for (j = 0; j < *coll_sym; j++)
2958 if (pin[j] != coll_sym[1 + j])
2962 /* Match if every bytes is equal. */
2964 goto check_node_accept_bytes_match;
2970 if (elem_len <= char_len)
2972 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
2973 in_collseq = collseq_table_lookup (collseqwc, wc);
2976 in_collseq = find_collation_sequence_value (pin, elem_len);
2978 /* match with range expression? */
2979 for (i = 0; i < cset->nranges; ++i)
2980 if (cset->range_starts[i] <= in_collseq
2981 && in_collseq <= cset->range_ends[i])
2983 match_len = elem_len;
2984 goto check_node_accept_bytes_match;
2987 /* match with equivalence_class? */
2988 if (cset->nequiv_classes)
2990 const unsigned char *cp = pin;
2991 table = (const int32_t *)
2992 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
2993 weights = (const unsigned char *)
2994 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
2995 extra = (const unsigned char *)
2996 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
2997 indirect = (const int32_t *)
2998 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
2999 idx = findidx (&cp);
3001 for (i = 0; i < cset->nequiv_classes; ++i)
3003 int32_t equiv_class_idx = cset->equiv_classes[i];
3004 size_t weight_len = weights[idx];
3005 if (weight_len == weights[equiv_class_idx])
3008 while (cnt <= weight_len
3009 && (weights[equiv_class_idx + 1 + cnt]
3010 == weights[idx + 1 + cnt]))
3012 if (cnt > weight_len)
3014 match_len = elem_len;
3015 goto check_node_accept_bytes_match;
3024 /* match with range expression? */
3026 wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
3028 wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
3031 for (i = 0; i < cset->nranges; ++i)
3033 cmp_buf[0] = cset->range_starts[i];
3034 cmp_buf[4] = cset->range_ends[i];
3035 if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
3036 && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
3038 match_len = char_len;
3039 goto check_node_accept_bytes_match;
3043 check_node_accept_bytes_match:
3044 if (!cset->non_match)
3051 return (elem_len > char_len) ? elem_len : char_len;
3059 find_collation_sequence_value (mbs, mbs_len)
3060 const unsigned char *mbs;
3063 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3068 /* No valid character. Match it as a single byte character. */
3069 const unsigned char *collseq = (const unsigned char *)
3070 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3071 return collseq[mbs[0]];
3078 const unsigned char *extra = (const unsigned char *)
3079 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3083 int mbs_cnt, found = 0;
3084 int32_t elem_mbs_len;
3085 /* Skip the name of collating element name. */
3086 idx = idx + extra[idx] + 1;
3087 elem_mbs_len = extra[idx++];
3088 if (mbs_len == elem_mbs_len)
3090 for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3091 if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3093 if (mbs_cnt == elem_mbs_len)
3094 /* Found the entry. */
3097 /* Skip the byte sequence of the collating element. */
3098 idx += elem_mbs_len;
3099 /* Adjust for the alignment. */
3100 idx = (idx + 3) & ~3;
3101 /* Skip the collation sequence value. */
3102 idx += sizeof (uint32_t);
3103 /* Skip the wide char sequence of the collating element. */
3104 idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
3105 /* If we found the entry, return the sequence value. */
3107 return *(uint32_t *) (extra + idx);
3108 /* Skip the collation sequence value. */
3109 idx += sizeof (uint32_t);
3114 #endif /* RE_ENABLE_I18N */
3116 /* Check whether the node accepts the byte which is IDX-th
3117 byte of the INPUT. */
3120 check_node_accept (preg, node, mctx, idx)
3121 const regex_t *preg;
3122 const re_token_t *node;
3123 const re_match_context_t *mctx;
3127 if (node->constraint)
3129 /* The node has constraints. Check whether the current context
3130 satisfies the constraints. */
3131 unsigned int context = re_string_context_at (mctx->input, idx,
3133 preg->newline_anchor);
3134 if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
3137 ch = re_string_byte_at (mctx->input, idx);
3138 if (node->type == CHARACTER)
3139 return node->opr.c == ch;
3140 else if (node->type == SIMPLE_BRACKET)
3141 return bitset_contain (node->opr.sbcset, ch);
3142 else if (node->type == OP_PERIOD)
3143 return !((ch == '\n' && !(preg->syntax & RE_DOT_NEWLINE))
3144 || (ch == '\0' && (preg->syntax & RE_DOT_NOT_NULL)));
3149 /* Extend the buffers, if the buffers have run out. */
3151 static reg_errcode_t
3152 extend_buffers (mctx)
3153 re_match_context_t *mctx;
3156 re_string_t *pstr = mctx->input;
3158 /* Double the lengthes of the buffers. */
3159 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
3160 if (BE (ret != REG_NOERROR, 0))
3163 if (mctx->state_log != NULL)
3165 /* And double the length of state_log. */
3166 re_dfastate_t **new_array;
3167 new_array = re_realloc (mctx->state_log, re_dfastate_t *,
3168 pstr->bufs_len * 2);
3169 if (BE (new_array == NULL, 0))
3171 mctx->state_log = new_array;
3174 /* Then reconstruct the buffers. */
3177 #ifdef RE_ENABLE_I18N
3179 build_wcs_upper_buffer (pstr);
3181 #endif /* RE_ENABLE_I18N */
3182 build_upper_buffer (pstr);
3186 #ifdef RE_ENABLE_I18N
3188 build_wcs_buffer (pstr);
3190 #endif /* RE_ENABLE_I18N */
3192 if (pstr->trans != NULL)
3193 re_string_translate_buffer (pstr);
3195 pstr->valid_len = pstr->bufs_len;
3202 /* Functions for matching context. */
3204 static reg_errcode_t
3205 match_ctx_init (mctx, eflags, input, n)
3206 re_match_context_t *mctx;
3210 mctx->eflags = eflags;
3211 mctx->input = input;
3212 mctx->match_last = -1;
3215 mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
3216 if (BE (mctx->bkref_ents == NULL, 0))
3220 mctx->bkref_ents = NULL;
3221 mctx->nbkref_ents = 0;
3222 mctx->abkref_ents = n;
3223 mctx->max_mb_elem_len = 0;
3228 match_ctx_free (mctx)
3229 re_match_context_t *mctx;
3231 re_free (mctx->bkref_ents);
3234 /* Add a new backreference entry to the cache. */
3236 static reg_errcode_t
3237 match_ctx_add_entry (mctx, node, str_idx, from, to)
3238 re_match_context_t *mctx;
3239 int node, str_idx, from, to;
3241 if (mctx->nbkref_ents >= mctx->abkref_ents)
3243 struct re_backref_cache_entry* new_entry;
3244 new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
3245 mctx->abkref_ents * 2);
3246 if (BE (new_entry == NULL, 0))
3248 re_free (mctx->bkref_ents);
3251 mctx->bkref_ents = new_entry;
3252 memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
3253 sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
3254 mctx->abkref_ents *= 2;
3256 mctx->bkref_ents[mctx->nbkref_ents].node = node;
3257 mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
3258 mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
3259 mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
3260 mctx->bkref_ents[mctx->nbkref_ents++].flag = 0;
3261 if (mctx->max_mb_elem_len < to - from)
3262 mctx->max_mb_elem_len = to - from;
3267 match_ctx_clear_flag (mctx)
3268 re_match_context_t *mctx;
3271 for (i = 0; i < mctx->nbkref_ents; ++i)
3273 mctx->bkref_ents[i].flag = 0;
3278 sift_ctx_init (sctx, sifted_sts, limited_sts, last_node, last_str_idx,
3280 re_sift_context_t *sctx;
3281 re_dfastate_t **sifted_sts, **limited_sts;
3282 int last_node, last_str_idx, check_subexp;
3284 sctx->sifted_states = sifted_sts;
3285 sctx->limited_states = limited_sts;
3286 sctx->last_node = last_node;
3287 sctx->last_str_idx = last_str_idx;
3288 sctx->check_subexp = check_subexp;
3289 sctx->cur_bkref = -1;
3290 sctx->cls_subexp_idx = -1;
3291 re_node_set_init_empty (&sctx->limits);