From aa4e4c7120b0090ce0624e3c42a2ed06dd8b918a Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sat, 30 Sep 2023 07:24:05 -0400 Subject: [PATCH] automata: fix unintended panic in max_haystack_len This fixes a bug where the bounded backtracker's `max_haystack_len` could panic if its bitset capacity ended up being smaller than the total number of NFA states. Under a default configuration this seems unlikely to happen due to the default limits on the size of a compiled regex. But if the compiled regex size limit is increased to a large number, then the likelihood of this panicking increases. Of course, one can provoke this even easier by just setting the visited capacity to a small number. Indeed, this is how we provoke it in a regression test. --- regex-automata/src/meta/wrappers.rs | 5 +++- regex-automata/src/nfa/thompson/backtrack.rs | 28 ++++++++++++++++++-- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/regex-automata/src/meta/wrappers.rs b/regex-automata/src/meta/wrappers.rs index 08110d9bb..6cb19ba0d 100644 --- a/regex-automata/src/meta/wrappers.rs +++ b/regex-automata/src/meta/wrappers.rs @@ -212,7 +212,10 @@ impl BoundedBacktrackerEngine { .configure(backtrack_config) .build_from_nfa(nfa.clone()) .map_err(BuildError::nfa)?; - debug!("BoundedBacktracker built"); + debug!( + "BoundedBacktracker built (max haystack length: {:?})", + engine.max_haystack_len() + ); Ok(Some(BoundedBacktrackerEngine(engine))) } #[cfg(not(feature = "nfa-backtrack"))] diff --git a/regex-automata/src/nfa/thompson/backtrack.rs b/regex-automata/src/nfa/thompson/backtrack.rs index eba037c1d..df99e456d 100644 --- a/regex-automata/src/nfa/thompson/backtrack.rs +++ b/regex-automata/src/nfa/thompson/backtrack.rs @@ -820,8 +820,11 @@ impl BoundedBacktracker { // bytes to the capacity in bits. let capacity = 8 * self.get_config().get_visited_capacity(); let blocks = div_ceil(capacity, Visited::BLOCK_SIZE); - let real_capacity = blocks * Visited::BLOCK_SIZE; - (real_capacity / self.nfa.states().len()) - 1 + let real_capacity = blocks.saturating_mul(Visited::BLOCK_SIZE); + // It's possible for `real_capacity` to be smaller than the number of + // NFA states for particularly large regexes, so we saturate towards + // zero. + (real_capacity / self.nfa.states().len()).saturating_sub(1) } } @@ -1882,3 +1885,24 @@ fn div_ceil(lhs: usize, rhs: usize) -> usize { (lhs / rhs) + 1 } } + +#[cfg(test)] +mod tests { + use super::*; + + // This is a regression test for the maximum haystack length computation. + // Previously, it assumed that the total capacity of the backtracker's + // bitset would always be greater than the number of NFA states. But there + // is of course no guarantee that this is true. This regression test + // ensures that not only does `max_haystack_len` not panic, but that it + // should return `0`. + #[cfg(feature = "syntax")] + #[test] + fn max_haystack_len_overflow() { + let re = BoundedBacktracker::builder() + .configure(BoundedBacktracker::config().visited_capacity(10)) + .build(r"[0-9A-Za-z]{100}") + .unwrap(); + assert_eq!(0, re.max_haystack_len()); + } +}