From 0486cb79694ccd4f91789f0f2d3eba9adb82d337 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Tue, 28 Feb 2023 17:09:01 -0500 Subject: [PATCH] syntax: rename 'Group' to 'Capture' Now that it *only* represents a capturing group, it makes sense to give it a more specific name. --- regex-syntax/src/hir/literal.rs | 2 +- regex-syntax/src/hir/mod.rs | 36 ++++++------ regex-syntax/src/hir/print.rs | 4 +- regex-syntax/src/hir/translate.rs | 94 ++++++++++++++++--------------- regex-syntax/src/hir/visitor.rs | 10 ++-- src/compile.rs | 2 +- 6 files changed, 75 insertions(+), 73 deletions(-) diff --git a/regex-syntax/src/hir/literal.rs b/regex-syntax/src/hir/literal.rs index 4a892d837..2fbcdcff2 100644 --- a/regex-syntax/src/hir/literal.rs +++ b/regex-syntax/src/hir/literal.rs @@ -186,7 +186,7 @@ impl Extractor { } Class(hir::Class::Bytes(ref cls)) => self.extract_class_bytes(cls), Repetition(ref rep) => self.extract_repetition(rep), - Group(hir::Group { ref hir, .. }) => self.extract(hir), + Capture(hir::Capture { ref hir, .. }) => self.extract(hir), Concat(ref hirs) => match self.kind { ExtractKind::Prefix => self.extract_concat(hirs.iter()), ExtractKind::Suffix => self.extract_concat(hirs.iter().rev()), diff --git a/regex-syntax/src/hir/mod.rs b/regex-syntax/src/hir/mod.rs index d70cad947..7be604a66 100644 --- a/regex-syntax/src/hir/mod.rs +++ b/regex-syntax/src/hir/mod.rs @@ -185,8 +185,8 @@ pub enum HirKind { Look(Look), /// A repetition operation applied to a child expression. Repetition(Repetition), - /// A possibly capturing group, which contains a child expression. - Group(Group), + /// A capturing group, which contains a child expression. + Capture(Capture), /// A concatenation of expressions. A concatenation always has at least two /// child expressions. /// @@ -329,11 +329,11 @@ impl Hir { Hir { kind: HirKind::Repetition(rep), props } } - /// Creates a group HIR expression. + /// Creates a capture HIR expression. #[inline] - pub fn group(group: Group) -> Hir { - let props = Properties::group(&group); - Hir { kind: HirKind::Group(group), props } + pub fn capture(capture: Capture) -> Hir { + let props = Properties::capture(&capture); + Hir { kind: HirKind::Capture(capture), props } } /// Returns the concatenation of the given expressions. @@ -529,7 +529,7 @@ impl HirKind { | HirKind::Literal(_) | HirKind::Class(_) | HirKind::Look(_) => false, - HirKind::Group(_) + HirKind::Capture(_) | HirKind::Repetition(_) | HirKind::Concat(_) | HirKind::Alternation(_) => true, @@ -1431,10 +1431,10 @@ impl Look { /// in a `Hir`. Instead, non-capturing grouping is handled automatically by /// the recursive structure of the `Hir` itself. #[derive(Clone, Debug, Eq, PartialEq)] -pub struct Group { - /// The capture index of the group. +pub struct Capture { + /// The capture index of the capture. pub index: u32, - /// The name of the group, if it exists. + /// The name of the capture, if it exists. pub name: Option>, /// The expression inside the capturing group, which may be empty. pub hir: Box, @@ -1523,7 +1523,7 @@ impl Drop for Hir { | HirKind::Literal(_) | HirKind::Class(_) | HirKind::Look(_) => return, - HirKind::Group(ref x) if !x.hir.kind.has_subexprs() => return, + HirKind::Capture(ref x) if !x.hir.kind.has_subexprs() => return, HirKind::Repetition(ref x) if !x.hir.kind.has_subexprs() => return, HirKind::Concat(ref x) if x.is_empty() => return, HirKind::Alternation(ref x) if x.is_empty() => return, @@ -1537,7 +1537,7 @@ impl Drop for Hir { | HirKind::Literal(_) | HirKind::Class(_) | HirKind::Look(_) => {} - HirKind::Group(ref mut x) => { + HirKind::Capture(ref mut x) => { stack.push(mem::replace(&mut x.hir, Hir::empty())); } HirKind::Repetition(ref mut x) => { @@ -1955,13 +1955,9 @@ impl Properties { Properties(Box::new(inner)) } - /// Create a new set of HIR properties for a group. - fn group(group: &Group) -> Properties { - // FIXME: Groups really should always have the same properties as - // their child expressions. But the literal properties somewhat - // over-constrained in what they represent in order to make downstream - // analyses a bit more straight-forward. - let p = group.hir.properties(); + /// Create a new set of HIR properties for a capture. + fn capture(capture: &Capture) -> Properties { + let p = capture.hir.properties(); Properties(Box::new(PropertiesI { captures_len: p.captures_len().saturating_add(1), literal: false, @@ -3055,7 +3051,7 @@ mod tests { let run = || { let mut expr = Hir::empty(); for _ in 0..100 { - expr = Hir::group(Group { + expr = Hir::capture(Capture { index: 1, name: None, hir: Box::new(expr), diff --git a/regex-syntax/src/hir/print.rs b/regex-syntax/src/hir/print.rs index 63d78ad00..ef654d40c 100644 --- a/regex-syntax/src/hir/print.rs +++ b/regex-syntax/src/hir/print.rs @@ -190,7 +190,7 @@ impl Visitor for Writer { self.wtr.write_str(r"\B")?; } }, - HirKind::Group(hir::Group { ref name, .. }) => { + HirKind::Capture(hir::Capture { ref name, .. }) => { self.wtr.write_str("(")?; if let Some(ref name) = *name { write!(self.wtr, "?P<{}>", name)?; @@ -254,7 +254,7 @@ impl Visitor for Writer { self.wtr.write_str("?")?; } } - HirKind::Group(_) + HirKind::Capture(_) | HirKind::Concat(_) | HirKind::Alternation(_) => { self.wtr.write_str(r")")?; diff --git a/regex-syntax/src/hir/translate.rs b/regex-syntax/src/hir/translate.rs index b5bb41767..fcef47a1e 100644 --- a/regex-syntax/src/hir/translate.rs +++ b/regex-syntax/src/hir/translate.rs @@ -173,7 +173,7 @@ enum HirFrame { /// This sentinel only exists to stop other things (like flattening /// literals) from reaching across repetition operators. Repetition, - /// This is pushed on to the stack upon first seeing any kind of group, + /// This is pushed on to the stack upon first seeing any kind of capture, /// indicated by parentheses (including non-capturing groups). It is popped /// upon leaving a group. Group { @@ -414,7 +414,7 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { let expr = self.pop().unwrap().unwrap_expr(); let old_flags = self.pop().unwrap().unwrap_group(); self.trans().flags.set(old_flags); - self.push(HirFrame::Expr(self.hir_group(x, expr))); + self.push(HirFrame::Expr(self.hir_capture(x, expr))); } Ast::Concat(_) => { let mut exprs = vec![]; @@ -902,7 +902,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> { }) } - fn hir_group(&self, group: &ast::Group, expr: Hir) -> Hir { + fn hir_capture(&self, group: &ast::Group, expr: Hir) -> Hir { let (index, name) = match group.kind { ast::GroupKind::CaptureIndex(index) => (index, None), ast::GroupKind::CaptureName { ref name, .. } => { @@ -912,7 +912,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> { // in which the data type is defined handles this automatically. ast::GroupKind::NonCapturing(_) => return expr, }; - Hir::group(hir::Group { index, name, hir: Box::new(expr) }) + Hir::capture(hir::Capture { index, name, hir: Box::new(expr) }) } fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir { @@ -1352,12 +1352,12 @@ mod tests { Hir::literal(s) } - fn hir_group(index: u32, expr: Hir) -> Hir { - Hir::group(hir::Group { index, name: None, hir: Box::new(expr) }) + fn hir_capture(index: u32, expr: Hir) -> Hir { + Hir::capture(hir::Capture { index, name: None, hir: Box::new(expr) }) } - fn hir_group_name(index: u32, name: &str, expr: Hir) -> Hir { - Hir::group(hir::Group { + fn hir_capture_name(index: u32, name: &str, expr: Hir) -> Hir { + Hir::capture(hir::Capture { index, name: Some(name.into()), hir: Box::new(expr), @@ -1528,35 +1528,35 @@ mod tests { fn empty() { assert_eq!(t(""), Hir::empty()); assert_eq!(t("(?i)"), Hir::empty()); - assert_eq!(t("()"), hir_group(1, Hir::empty())); + assert_eq!(t("()"), hir_capture(1, Hir::empty())); assert_eq!(t("(?:)"), Hir::empty()); - assert_eq!(t("(?P)"), hir_group_name(1, "wat", Hir::empty())); + assert_eq!(t("(?P)"), hir_capture_name(1, "wat", Hir::empty())); assert_eq!(t("|"), hir_alt(vec![Hir::empty(), Hir::empty()])); assert_eq!( t("()|()"), hir_alt(vec![ - hir_group(1, Hir::empty()), - hir_group(2, Hir::empty()), + hir_capture(1, Hir::empty()), + hir_capture(2, Hir::empty()), ]) ); assert_eq!( t("(|b)"), - hir_group(1, hir_alt(vec![Hir::empty(), hir_lit("b"),])) + hir_capture(1, hir_alt(vec![Hir::empty(), hir_lit("b"),])) ); assert_eq!( t("(a|)"), - hir_group(1, hir_alt(vec![hir_lit("a"), Hir::empty(),])) + hir_capture(1, hir_alt(vec![hir_lit("a"), Hir::empty(),])) ); assert_eq!( t("(a||c)"), - hir_group( + hir_capture( 1, hir_alt(vec![hir_lit("a"), Hir::empty(), hir_lit("c"),]) ) ); assert_eq!( t("(||)"), - hir_group( + hir_capture( 1, hir_alt(vec![Hir::empty(), Hir::empty(), Hir::empty(),]) ) @@ -1740,56 +1740,59 @@ mod tests { #[test] fn group() { - assert_eq!(t("(a)"), hir_group(1, hir_lit("a"))); + assert_eq!(t("(a)"), hir_capture(1, hir_lit("a"))); assert_eq!( t("(a)(b)"), hir_cat(vec![ - hir_group(1, hir_lit("a")), - hir_group(2, hir_lit("b")), + hir_capture(1, hir_lit("a")), + hir_capture(2, hir_lit("b")), ]) ); assert_eq!( t("(a)|(b)"), hir_alt(vec![ - hir_group(1, hir_lit("a")), - hir_group(2, hir_lit("b")), + hir_capture(1, hir_lit("a")), + hir_capture(2, hir_lit("b")), ]) ); - assert_eq!(t("(?P)"), hir_group_name(1, "foo", Hir::empty())); - assert_eq!(t("(?Pa)"), hir_group_name(1, "foo", hir_lit("a"))); + assert_eq!(t("(?P)"), hir_capture_name(1, "foo", Hir::empty())); + assert_eq!(t("(?Pa)"), hir_capture_name(1, "foo", hir_lit("a"))); assert_eq!( t("(?Pa)(?Pb)"), hir_cat(vec![ - hir_group_name(1, "foo", hir_lit("a")), - hir_group_name(2, "bar", hir_lit("b")), + hir_capture_name(1, "foo", hir_lit("a")), + hir_capture_name(2, "bar", hir_lit("b")), ]) ); assert_eq!(t("(?:)"), Hir::empty()); assert_eq!(t("(?:a)"), hir_lit("a")); assert_eq!( t("(?:a)(b)"), - hir_cat(vec![hir_lit("a"), hir_group(1, hir_lit("b")),]) + hir_cat(vec![hir_lit("a"), hir_capture(1, hir_lit("b")),]) ); assert_eq!( t("(a)(?:b)(c)"), hir_cat(vec![ - hir_group(1, hir_lit("a")), + hir_capture(1, hir_lit("a")), hir_lit("b"), - hir_group(2, hir_lit("c")), + hir_capture(2, hir_lit("c")), ]) ); assert_eq!( t("(a)(?Pb)(c)"), hir_cat(vec![ - hir_group(1, hir_lit("a")), - hir_group_name(2, "foo", hir_lit("b")), - hir_group(3, hir_lit("c")), + hir_capture(1, hir_lit("a")), + hir_capture_name(2, "foo", hir_lit("b")), + hir_capture(3, hir_lit("c")), ]) ); - assert_eq!(t("()"), hir_group(1, Hir::empty())); - assert_eq!(t("((?i))"), hir_group(1, Hir::empty())); - assert_eq!(t("((?x))"), hir_group(1, Hir::empty())); - assert_eq!(t("(((?x)))"), hir_group(1, hir_group(2, Hir::empty()))); + assert_eq!(t("()"), hir_capture(1, Hir::empty())); + assert_eq!(t("((?i))"), hir_capture(1, Hir::empty())); + assert_eq!(t("((?x))"), hir_capture(1, Hir::empty())); + assert_eq!( + t("(((?x)))"), + hir_capture(1, hir_capture(2, Hir::empty())) + ); } #[test] @@ -1818,7 +1821,7 @@ mod tests { assert_eq!( t("((?i-u)a)b"), hir_cat(vec![ - hir_group(1, hir_bclass(&[(b'A', b'A'), (b'a', b'a')])), + hir_capture(1, hir_bclass(&[(b'A', b'A'), (b'a', b'a')])), hir_lit("b"), ]) ); @@ -1908,7 +1911,7 @@ mod tests { t("ab?"), hir_cat(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),]) ); - assert_eq!(t("(ab)?"), hir_quest(true, hir_group(1, hir_lit("ab")))); + assert_eq!(t("(ab)?"), hir_quest(true, hir_capture(1, hir_lit("ab")))); assert_eq!( t("a|b?"), hir_alt(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),]) @@ -1922,7 +1925,7 @@ mod tests { let c = || hir_look(hir::Look::WordUnicode); let d = || hir_look(hir::Look::WordUnicodeNegate); - assert_eq!(t("(^$)"), hir_group(1, hir_cat(vec![a(), b()]))); + assert_eq!(t("(^$)"), hir_capture(1, hir_cat(vec![a(), b()]))); assert_eq!(t("^|$"), hir_alt(vec![a(), b()])); assert_eq!(t(r"^|$|\b"), hir_alt(vec![a(), b(), c()])); assert_eq!( @@ -1933,11 +1936,14 @@ mod tests { hir_cat(vec![c(), d()]), ]) ); - assert_eq!(t("(^|$)"), hir_group(1, hir_alt(vec![a(), b()]))); - assert_eq!(t(r"(^|$|\b)"), hir_group(1, hir_alt(vec![a(), b(), c()]))); + assert_eq!(t("(^|$)"), hir_capture(1, hir_alt(vec![a(), b()]))); + assert_eq!( + t(r"(^|$|\b)"), + hir_capture(1, hir_alt(vec![a(), b(), c()])) + ); assert_eq!( t(r"(^$|$\b|\b\B)"), - hir_group( + hir_capture( 1, hir_alt(vec![ hir_cat(vec![a(), b()]), @@ -1948,15 +1954,15 @@ mod tests { ); assert_eq!( t(r"(^$|($\b|(\b\B)))"), - hir_group( + hir_capture( 1, hir_alt(vec![ hir_cat(vec![a(), b()]), - hir_group( + hir_capture( 2, hir_alt(vec![ hir_cat(vec![b(), c()]), - hir_group(3, hir_cat(vec![c(), d()])), + hir_capture(3, hir_cat(vec![c(), d()])), ]) ), ]) diff --git a/regex-syntax/src/hir/visitor.rs b/regex-syntax/src/hir/visitor.rs index 0012d5697..ba1db238a 100644 --- a/regex-syntax/src/hir/visitor.rs +++ b/regex-syntax/src/hir/visitor.rs @@ -75,9 +75,9 @@ enum Frame<'a> { /// A stack frame allocated just before descending into a repetition /// operator's child node. Repetition(&'a hir::Repetition), - /// A stack frame allocated just before descending into a group's child + /// A stack frame allocated just before descending into a capture's child /// node. - Group(&'a hir::Group), + Capture(&'a hir::Capture), /// The stack frame used while visiting every child node of a concatenation /// of expressions. Concat { @@ -150,7 +150,7 @@ impl<'a> HeapVisitor<'a> { fn induct(&mut self, hir: &'a Hir) -> Option> { match *hir.kind() { HirKind::Repetition(ref x) => Some(Frame::Repetition(x)), - HirKind::Group(ref x) => Some(Frame::Group(x)), + HirKind::Capture(ref x) => Some(Frame::Capture(x)), HirKind::Concat(ref x) if x.is_empty() => None, HirKind::Concat(ref x) => { Some(Frame::Concat { head: &x[0], tail: &x[1..] }) @@ -168,7 +168,7 @@ impl<'a> HeapVisitor<'a> { fn pop(&self, induct: Frame<'a>) -> Option> { match induct { Frame::Repetition(_) => None, - Frame::Group(_) => None, + Frame::Capture(_) => None, Frame::Concat { tail, .. } => { if tail.is_empty() { None @@ -196,7 +196,7 @@ impl<'a> Frame<'a> { fn child(&self) -> &'a Hir { match *self { Frame::Repetition(rep) => &rep.hir, - Frame::Group(group) => &group.hir, + Frame::Capture(capture) => &capture.hir, Frame::Concat { head, .. } => head, Frame::Alternation { head, .. } => head, } diff --git a/src/compile.rs b/src/compile.rs index 692533340..50ab78700 100644 --- a/src/compile.rs +++ b/src/compile.rs @@ -368,7 +368,7 @@ impl Compiler { self.c_empty_look(prog::EmptyLook::NotWordBoundary) } }, - Group(hir::Group { index, ref name, ref hir }) => { + Capture(hir::Capture { index, ref name, ref hir }) => { if index as usize >= self.compiled.captures.len() { let name = match *name { None => None,