diff --git a/src/backtrack.rs b/src/backtrack.rs index 6e71e2c2f3..a064bbde3a 100644 --- a/src/backtrack.rs +++ b/src/backtrack.rs @@ -98,6 +98,7 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> { slots: &'s mut [Slot], input: I, start: usize, + end: usize, ) -> bool { let mut cache = cache.borrow_mut(); let cache = &mut cache.backtrack; @@ -109,7 +110,7 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> { slots: slots, m: cache, }; - b.exec_(start) + b.exec_(start, end) } /// Clears the cache such that the backtracking engine can be executed @@ -147,7 +148,7 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> { /// Start backtracking at the given position in the input, but also look /// for literal prefixes. - fn exec_(&mut self, mut at: InputAt) -> bool { + fn exec_(&mut self, mut at: InputAt, end: usize) -> bool { self.clear(); // If this is an anchored regex at the beginning of the input, then // we're either already done or we only need to try backtracking once. @@ -170,7 +171,7 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> { if matched && self.prog.matches.len() == 1 { return true; } - if at.is_end() { + if at.pos() == end { break; } at = self.input.at(at.next_pos()); diff --git a/src/exec.rs b/src/exec.rs index dd30c81ec5..14f4467504 100644 --- a/src/exec.rs +++ b/src/exec.rs @@ -10,7 +10,6 @@ use std::cell::RefCell; use std::collections::HashMap; -use std::cmp; use std::sync::Arc; use thread_local::CachedThreadLocal; @@ -557,7 +556,7 @@ impl<'c> RegularExpression for ExecNoSync<'c> { match self.ro.match_type { MatchType::Literal(ty) => { self.find_literals(ty, text, start).and_then(|(s, e)| { - self.captures_nfa_with_match(slots, text, s, e) + self.captures_nfa_type(MatchNfaType::Auto, slots, text, s, e) }) } MatchType::Dfa => { @@ -566,7 +565,7 @@ impl<'c> RegularExpression for ExecNoSync<'c> { } else { match self.find_dfa_forward(text, start) { dfa::Result::Match((s, e)) => { - self.captures_nfa_with_match(slots, text, s, e) + self.captures_nfa_type(MatchNfaType::Auto, slots, text, s, e) } dfa::Result::NoMatch(_) => None, dfa::Result::Quit => self.captures_nfa(slots, text, start), @@ -576,7 +575,7 @@ impl<'c> RegularExpression for ExecNoSync<'c> { MatchType::DfaAnchoredReverse => { match self.find_dfa_anchored_reverse(text, start) { dfa::Result::Match((s, e)) => { - self.captures_nfa_with_match(slots, text, s, e) + self.captures_nfa_type(MatchNfaType::Auto, slots, text, s, e) } dfa::Result::NoMatch(_) => None, dfa::Result::Quit => self.captures_nfa(slots, text, start), @@ -585,14 +584,14 @@ impl<'c> RegularExpression for ExecNoSync<'c> { MatchType::DfaSuffix => { match self.find_dfa_reverse_suffix(text, start) { dfa::Result::Match((s, e)) => { - self.captures_nfa_with_match(slots, text, s, e) + self.captures_nfa_type(MatchNfaType::Auto, slots, text, s, e) } dfa::Result::NoMatch(_) => None, dfa::Result::Quit => self.captures_nfa(slots, text, start), } } MatchType::Nfa(ty) => { - self.captures_nfa_type(ty, slots, text, start) + self.captures_nfa_type(ty, slots, text, start, text.len()) } MatchType::Nothing => None, MatchType::DfaMany => { @@ -830,7 +829,7 @@ impl<'c> ExecNoSync<'c> { text: &[u8], start: usize, ) -> bool { - self.exec_nfa(ty, &mut [false], &mut [], true, text, start) + self.exec_nfa(ty, &mut [false], &mut [], true, text, start, text.len()) } /// Finds the shortest match using an NFA. @@ -846,7 +845,7 @@ impl<'c> ExecNoSync<'c> { start: usize, ) -> Option { let mut slots = [None, None]; - if self.exec_nfa(ty, &mut [false], &mut slots, true, text, start) { + if self.exec_nfa(ty, &mut [false], &mut slots, true, text, start, text.len()) { slots[1] } else { None @@ -861,7 +860,7 @@ impl<'c> ExecNoSync<'c> { start: usize, ) -> Option<(usize, usize)> { let mut slots = [None, None]; - if self.exec_nfa(ty, &mut [false], &mut slots, false, text, start) { + if self.exec_nfa(ty, &mut [false], &mut slots, false, text, start, text.len()) { match (slots[0], slots[1]) { (Some(s), Some(e)) => Some((s, e)), _ => None, @@ -871,26 +870,6 @@ impl<'c> ExecNoSync<'c> { } } - /// Like find_nfa, but fills in captures and restricts the search space - /// using previously found match information. - /// - /// `slots` should have length equal to `2 * nfa.captures.len()`. - fn captures_nfa_with_match( - &self, - slots: &mut [Slot], - text: &[u8], - match_start: usize, - match_end: usize, - ) -> Option<(usize, usize)> { - // We can't use match_end directly, because we may need to examine one - // "character" after the end of a match for lookahead operators. We - // need to move two characters beyond the end, since some look-around - // operations may falsely assume a premature end of text otherwise. - let e = cmp::min( - next_utf8(text, next_utf8(text, match_end)), text.len()); - self.captures_nfa(slots, &text[..e], match_start) - } - /// Like find_nfa, but fills in captures. /// /// `slots` should have length equal to `2 * nfa.captures.len()`. @@ -900,7 +879,7 @@ impl<'c> ExecNoSync<'c> { text: &[u8], start: usize, ) -> Option<(usize, usize)> { - self.captures_nfa_type(MatchNfaType::Auto, slots, text, start) + self.captures_nfa_type(MatchNfaType::Auto, slots, text, start, text.len()) } /// Like captures_nfa, but allows specification of type of NFA engine. @@ -910,8 +889,9 @@ impl<'c> ExecNoSync<'c> { slots: &mut [Slot], text: &[u8], start: usize, + end: usize, ) -> Option<(usize, usize)> { - if self.exec_nfa(ty, &mut [false], slots, false, text, start) { + if self.exec_nfa(ty, &mut [false], slots, false, text, start, end) { match (slots[0], slots[1]) { (Some(s), Some(e)) => Some((s, e)), _ => None, @@ -929,6 +909,7 @@ impl<'c> ExecNoSync<'c> { quit_after_match: bool, text: &[u8], start: usize, + end: usize, ) -> bool { use self::MatchNfaType::*; if let Auto = ty { @@ -940,10 +921,10 @@ impl<'c> ExecNoSync<'c> { } match ty { Auto => unreachable!(), - Backtrack => self.exec_backtrack(matches, slots, text, start), + Backtrack => self.exec_backtrack(matches, slots, text, start, end), PikeVM => { self.exec_pikevm( - matches, slots, quit_after_match, text, start) + matches, slots, quit_after_match, text, start, end) } } } @@ -956,6 +937,7 @@ impl<'c> ExecNoSync<'c> { quit_after_match: bool, text: &[u8], start: usize, + end: usize, ) -> bool { if self.ro.nfa.uses_bytes() { pikevm::Fsm::exec( @@ -965,7 +947,8 @@ impl<'c> ExecNoSync<'c> { slots, quit_after_match, ByteInput::new(text, self.ro.nfa.only_utf8), - start) + start, + end) } else { pikevm::Fsm::exec( &self.ro.nfa, @@ -974,7 +957,8 @@ impl<'c> ExecNoSync<'c> { slots, quit_after_match, CharInput::new(text), - start) + start, + end) } } @@ -985,6 +969,7 @@ impl<'c> ExecNoSync<'c> { slots: &mut [Slot], text: &[u8], start: usize, + end: usize ) -> bool { if self.ro.nfa.uses_bytes() { backtrack::Bounded::exec( @@ -993,7 +978,8 @@ impl<'c> ExecNoSync<'c> { matches, slots, ByteInput::new(text, self.ro.nfa.only_utf8), - start) + start, + end) } else { backtrack::Bounded::exec( &self.ro.nfa, @@ -1001,7 +987,8 @@ impl<'c> ExecNoSync<'c> { matches, slots, CharInput::new(text), - start) + start, + end) } } @@ -1045,11 +1032,12 @@ impl<'c> ExecNoSync<'c> { &mut [], false, text, - start) + start, + text.len()) } } } - Nfa(ty) => self.exec_nfa(ty, matches, &mut [], false, text, start), + Nfa(ty) => self.exec_nfa(ty, matches, &mut [], false, text, start, text.len()), Nothing => false, } } diff --git a/src/pikevm.rs b/src/pikevm.rs index 80d44717ae..86ee8c1213 100644 --- a/src/pikevm.rs +++ b/src/pikevm.rs @@ -107,6 +107,7 @@ impl<'r, I: Input> Fsm<'r, I> { quit_after_match: bool, input: I, start: usize, + end: usize, ) -> bool { let mut cache = cache.borrow_mut(); let cache = &mut cache.pikevm; @@ -124,6 +125,7 @@ impl<'r, I: Input> Fsm<'r, I> { slots, quit_after_match, at, + end, ) } @@ -135,6 +137,7 @@ impl<'r, I: Input> Fsm<'r, I> { slots: &mut [Slot], quit_after_match: bool, mut at: InputAt, + end: usize, ) -> bool { let mut matched = false; let mut all_matched = false; @@ -212,7 +215,7 @@ impl<'r, I: Input> Fsm<'r, I> { } } } - if at.is_end() { + if at.pos() == end { break; } at = at_next; diff --git a/tests/regression.rs b/tests/regression.rs index 724e01bfa0..2298afae25 100644 --- a/tests/regression.rs +++ b/tests/regression.rs @@ -88,8 +88,13 @@ ismatch!(reverse_suffix2, r"\d\d\d000", "153.230000\n", true); matiter!(reverse_suffix3, r"\d\d\d000", "153.230000\n", (4, 10)); // See: /~https://github.com/rust-lang/regex/issues/334 -mat!(captures_after_dfa_premature_end, r"a(b*(X|$))?", "abcbX", +// See: /~https://github.com/rust-lang/regex/issues/557 +mat!(captures_after_dfa_premature_end1, r"a(b*(X|$))?", "abcbX", Some((0, 1)), None, None); +mat!(captures_after_dfa_premature_end2, r"a(bc*(X|$))?", "abcbX", + Some((0, 1)), None, None); +mat!(captures_after_dfa_premature_end3, r"(aa$)?", "aaz", + Some((0, 0))); // See: /~https://github.com/rust-lang/regex/issues/437 ismatch!(