diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 5d5b24c4c..060a2226c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -122,6 +122,8 @@ jobs:
     - name: Run subset of regex-automata tests
       if: matrix.build != 'win-gnu'  # Just horrifically slow.
       run: ${{ env.CARGO }} test --verbose --manifest-path regex-automata/Cargo.toml $TARGET
+    - name: Run regex-lite tests
+      run: ${{ env.CARGO }} test --verbose --manifest-path regex-lite/Cargo.toml $TARGET
     - if: matrix.build == 'nightly'
       name: Run benchmarks as tests
       run: |
diff --git a/Cargo.toml b/Cargo.toml
index 50f6ca6de..c8781f39f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -23,6 +23,7 @@ members = [
   "regex-automata",
   "regex-capi",
   "regex-cli",
+  "regex-lite",
   "regex-syntax",
   "regex-test",
 ]
diff --git a/regex-lite/Cargo.toml b/regex-lite/Cargo.toml
new file mode 100644
index 000000000..642e7dd64
--- /dev/null
+++ b/regex-lite/Cargo.toml
@@ -0,0 +1,27 @@
+[package]
+name = "regex-lite"
+version = "0.1.0"  #:version
+authors = ["The Rust Project Developers", "Andrew Gallant <jamslam@gmail.com>"]
+license = "MIT OR Apache-2.0"
+repository = "/~https://github.com/rust-lang/regex/tree/master/regex-lite"
+documentation = "https://docs.rs/regex-lite"
+description = """
+A lightweight regex engine that optimizes for binary size and compilation time.
+"""
+workspace = ".."
+edition = "2021"
+rust-version = "1.60.0"
+
+# Features are documented in the "Crate features" section of the crate docs:
+# https://docs.rs/regex-syntax/*/#crate-features
+[features]
+default = ["std"]
+std = []
+
+[package.metadata.docs.rs]
+# We want to document all features.
+all-features = true
+# To test this locally, run:
+#
+#     RUSTDOCFLAGS="--cfg docsrs" cargo +nightly doc --all-features
+rustdoc-args = ["--cfg", "docsrs"]
diff --git a/regex-lite/LICENSE-APACHE b/regex-lite/LICENSE-APACHE
new file mode 100644
index 000000000..16fe87b06
--- /dev/null
+++ b/regex-lite/LICENSE-APACHE
@@ -0,0 +1,201 @@
+                              Apache License
+                        Version 2.0, January 2004
+                     http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+   "License" shall mean the terms and conditions for use, reproduction,
+   and distribution as defined by Sections 1 through 9 of this document.
+
+   "Licensor" shall mean the copyright owner or entity authorized by
+   the copyright owner that is granting the License.
+
+   "Legal Entity" shall mean the union of the acting entity and all
+   other entities that control, are controlled by, or are under common
+   control with that entity. For the purposes of this definition,
+   "control" means (i) the power, direct or indirect, to cause the
+   direction or management of such entity, whether by contract or
+   otherwise, or (ii) ownership of fifty percent (50%) or more of the
+   outstanding shares, or (iii) beneficial ownership of such entity.
+
+   "You" (or "Your") shall mean an individual or Legal Entity
+   exercising permissions granted by this License.
+
+   "Source" form shall mean the preferred form for making modifications,
+   including but not limited to software source code, documentation
+   source, and configuration files.
+
+   "Object" form shall mean any form resulting from mechanical
+   transformation or translation of a Source form, including but
+   not limited to compiled object code, generated documentation,
+   and conversions to other media types.
+
+   "Work" shall mean the work of authorship, whether in Source or
+   Object form, made available under the License, as indicated by a
+   copyright notice that is included in or attached to the work
+   (an example is provided in the Appendix below).
+
+   "Derivative Works" shall mean any work, whether in Source or Object
+   form, that is based on (or derived from) the Work and for which the
+   editorial revisions, annotations, elaborations, or other modifications
+   represent, as a whole, an original work of authorship. For the purposes
+   of this License, Derivative Works shall not include works that remain
+   separable from, or merely link (or bind by name) to the interfaces of,
+   the Work and Derivative Works thereof.
+
+   "Contribution" shall mean any work of authorship, including
+   the original version of the Work and any modifications or additions
+   to that Work or Derivative Works thereof, that is intentionally
+   submitted to Licensor for inclusion in the Work by the copyright owner
+   or by an individual or Legal Entity authorized to submit on behalf of
+   the copyright owner. For the purposes of this definition, "submitted"
+   means any form of electronic, verbal, or written communication sent
+   to the Licensor or its representatives, including but not limited to
+   communication on electronic mailing lists, source code control systems,
+   and issue tracking systems that are managed by, or on behalf of, the
+   Licensor for the purpose of discussing and improving the Work, but
+   excluding communication that is conspicuously marked or otherwise
+   designated in writing by the copyright owner as "Not a Contribution."
+
+   "Contributor" shall mean Licensor and any individual or Legal Entity
+   on behalf of whom a Contribution has been received by Licensor and
+   subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+   this License, each Contributor hereby grants to You a perpetual,
+   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+   copyright license to reproduce, prepare Derivative Works of,
+   publicly display, publicly perform, sublicense, and distribute the
+   Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+   this License, each Contributor hereby grants to You a perpetual,
+   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+   (except as stated in this section) patent license to make, have made,
+   use, offer to sell, sell, import, and otherwise transfer the Work,
+   where such license applies only to those patent claims licensable
+   by such Contributor that are necessarily infringed by their
+   Contribution(s) alone or by combination of their Contribution(s)
+   with the Work to which such Contribution(s) was submitted. If You
+   institute patent litigation against any entity (including a
+   cross-claim or counterclaim in a lawsuit) alleging that the Work
+   or a Contribution incorporated within the Work constitutes direct
+   or contributory patent infringement, then any patent licenses
+   granted to You under this License for that Work shall terminate
+   as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+   Work or Derivative Works thereof in any medium, with or without
+   modifications, and in Source or Object form, provided that You
+   meet the following conditions:
+
+   (a) You must give any other recipients of the Work or
+       Derivative Works a copy of this License; and
+
+   (b) You must cause any modified files to carry prominent notices
+       stating that You changed the files; and
+
+   (c) You must retain, in the Source form of any Derivative Works
+       that You distribute, all copyright, patent, trademark, and
+       attribution notices from the Source form of the Work,
+       excluding those notices that do not pertain to any part of
+       the Derivative Works; and
+
+   (d) If the Work includes a "NOTICE" text file as part of its
+       distribution, then any Derivative Works that You distribute must
+       include a readable copy of the attribution notices contained
+       within such NOTICE file, excluding those notices that do not
+       pertain to any part of the Derivative Works, in at least one
+       of the following places: within a NOTICE text file distributed
+       as part of the Derivative Works; within the Source form or
+       documentation, if provided along with the Derivative Works; or,
+       within a display generated by the Derivative Works, if and
+       wherever such third-party notices normally appear. The contents
+       of the NOTICE file are for informational purposes only and
+       do not modify the License. You may add Your own attribution
+       notices within Derivative Works that You distribute, alongside
+       or as an addendum to the NOTICE text from the Work, provided
+       that such additional attribution notices cannot be construed
+       as modifying the License.
+
+   You may add Your own copyright statement to Your modifications and
+   may provide additional or different license terms and conditions
+   for use, reproduction, or distribution of Your modifications, or
+   for any such Derivative Works as a whole, provided Your use,
+   reproduction, and distribution of the Work otherwise complies with
+   the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+   any Contribution intentionally submitted for inclusion in the Work
+   by You to the Licensor shall be under the terms and conditions of
+   this License, without any additional terms or conditions.
+   Notwithstanding the above, nothing herein shall supersede or modify
+   the terms of any separate license agreement you may have executed
+   with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+   names, trademarks, service marks, or product names of the Licensor,
+   except as required for reasonable and customary use in describing the
+   origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+   agreed to in writing, Licensor provides the Work (and each
+   Contributor provides its Contributions) on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+   implied, including, without limitation, any warranties or conditions
+   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+   PARTICULAR PURPOSE. You are solely responsible for determining the
+   appropriateness of using or redistributing the Work and assume any
+   risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+   whether in tort (including negligence), contract, or otherwise,
+   unless required by applicable law (such as deliberate and grossly
+   negligent acts) or agreed to in writing, shall any Contributor be
+   liable to You for damages, including any direct, indirect, special,
+   incidental, or consequential damages of any character arising as a
+   result of this License or out of the use or inability to use the
+   Work (including but not limited to damages for loss of goodwill,
+   work stoppage, computer failure or malfunction, or any and all
+   other commercial damages or losses), even if such Contributor
+   has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+   the Work or Derivative Works thereof, You may choose to offer,
+   and charge a fee for, acceptance of support, warranty, indemnity,
+   or other liability obligations and/or rights consistent with this
+   License. However, in accepting such obligations, You may act only
+   on Your own behalf and on Your sole responsibility, not on behalf
+   of any other Contributor, and only if You agree to indemnify,
+   defend, and hold each Contributor harmless for any liability
+   incurred by, or claims asserted against, such Contributor by reason
+   of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work.
+
+   To apply the Apache License to your work, attach the following
+   boilerplate notice, with the fields enclosed by brackets "[]"
+   replaced with your own identifying information. (Don't include
+   the brackets!)  The text should be enclosed in the appropriate
+   comment syntax for the file format. We also recommend that a
+   file or class name and description of purpose be included on the
+   same "printed page" as the copyright notice for easier
+   identification within third-party archives.
+
+Copyright [yyyy] [name of copyright owner]
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+	http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/regex-lite/LICENSE-MIT b/regex-lite/LICENSE-MIT
new file mode 100644
index 000000000..39d4bdb5a
--- /dev/null
+++ b/regex-lite/LICENSE-MIT
@@ -0,0 +1,25 @@
+Copyright (c) 2014 The Rust Project Developers
+
+Permission is hereby granted, free of charge, to any
+person obtaining a copy of this software and associated
+documentation files (the "Software"), to deal in the
+Software without restriction, including without
+limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software
+is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice
+shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/regex-lite/README.md b/regex-lite/README.md
new file mode 100644
index 000000000..00d7bdd40
--- /dev/null
+++ b/regex-lite/README.md
@@ -0,0 +1 @@
+WIP
diff --git a/regex-lite/src/error.rs b/regex-lite/src/error.rs
new file mode 100644
index 000000000..a6313aa8a
--- /dev/null
+++ b/regex-lite/src/error.rs
@@ -0,0 +1,19 @@
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Error {
+    msg: &'static str,
+}
+
+impl Error {
+    pub(crate) fn new(msg: &'static str) -> Error {
+        Error { msg }
+    }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for Error {}
+
+impl core::fmt::Display for Error {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        write!(f, "{}", self.msg)
+    }
+}
diff --git a/regex-lite/src/hir/mod.rs b/regex-lite/src/hir/mod.rs
new file mode 100644
index 000000000..6a9935029
--- /dev/null
+++ b/regex-lite/src/hir/mod.rs
@@ -0,0 +1,553 @@
+use alloc::{boxed::Box, vec, vec::Vec};
+
+use crate::{error::Error, utf8};
+
+mod parse;
+
+/// The configuration for a regex parser.
+#[derive(Clone, Copy, Debug)]
+pub(crate) struct Config {
+    /// The maximum number of times we're allowed to recurse.
+    ///
+    /// Note that unlike the regex-syntax parser, we actually use recursion in
+    /// this parser for simplicity. My hope is that by setting a conservative
+    /// default call limit and providing a way to configure it, that we can
+    /// keep this simplification. But if we must, we can re-work the parser to
+    /// put the call stack on the heap like regex-syntax does.
+    pub(crate) nest_limit: u32,
+    /// Various flags that control how a pattern is interpreted.
+    pub(crate) flags: Flags,
+}
+
+impl Default for Config {
+    fn default() -> Config {
+        Config { nest_limit: 50, flags: Flags::default() }
+    }
+}
+
+/// Various flags that control the interpretation of the pattern.
+///
+/// These can be set via explicit configuration in code, or change dynamically
+/// during parsing via inline flags. For example, `foo(?i:bar)baz` will match
+/// `foo` and `baz` case sensitiviely and `bar` case insensitively (assuming a
+/// default configuration).
+#[derive(Clone, Copy, Debug, Default)]
+pub(crate) struct Flags {
+    /// Whether to match case insensitively.
+    ///
+    /// This is the `i` flag.
+    pub(crate) case_insensitive: bool,
+    /// Whether `^` and `$` should be treated as line anchors or not.
+    ///
+    /// This is the `m` flag.
+    pub(crate) multi_line: bool,
+    /// Whether `.` should match line terminators or not.
+    ///
+    /// This is the `s` flag.
+    pub(crate) dot_matches_new_line: bool,
+    /// Whether to swap the meaning of greedy and non-greedy operators.
+    ///
+    /// This is the `U` flag.
+    pub(crate) swap_greed: bool,
+    /// Whether to enable CRLF mode.
+    ///
+    /// This is the `R` flag.
+    pub(crate) crlf: bool,
+    /// Whether to ignore whitespace. i.e., verbose mode.
+    ///
+    /// This is the `x` flag.
+    pub(crate) ignore_whitespace: bool,
+}
+
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub(crate) struct Hir {
+    kind: HirKind,
+    is_start_anchored: bool,
+    is_match_empty: bool,
+}
+
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub(crate) enum HirKind {
+    Empty,
+    Char(char),
+    Class(Class),
+    Look(Look),
+    Repetition(Repetition),
+    Capture(Capture),
+    Concat(Vec<Hir>),
+    Alternation(Vec<Hir>),
+}
+
+impl Hir {
+    /// Parses the given pattern string with the given configuration into a
+    /// structured representation. If the pattern is invalid, then an error
+    /// is returned.
+    pub(crate) fn parse(config: Config, pattern: &str) -> Result<Hir, Error> {
+        self::parse::Parser::new(config, pattern).parse()
+    }
+
+    /// Returns the underlying kind of this high-level intermediate
+    /// representation.
+    ///
+    /// Note that there is explicitly no way to build an `Hir` directly from
+    /// an `HirKind`. If you need to do that, then you must do case analysis
+    /// on the `HirKind` and call the appropriate smart constructor on `Hir`.
+    pub(crate) fn kind(&self) -> &HirKind {
+        &self.kind
+    }
+
+    /// Returns true if and only if this Hir expression can only match at the
+    /// beginning of a haystack.
+    pub(crate) fn is_start_anchored(&self) -> bool {
+        self.is_start_anchored
+    }
+
+    /// Returns true if and only if this Hir expression can match the empty
+    /// string.
+    pub(crate) fn is_match_empty(&self) -> bool {
+        self.is_match_empty
+    }
+
+    fn fail() -> Hir {
+        let kind = HirKind::Class(Class { ranges: vec![] });
+        Hir { kind, is_start_anchored: false, is_match_empty: false }
+    }
+
+    fn empty() -> Hir {
+        let kind = HirKind::Empty;
+        Hir { kind, is_start_anchored: false, is_match_empty: true }
+    }
+
+    fn char(ch: char) -> Hir {
+        let kind = HirKind::Char(ch);
+        Hir { kind, is_start_anchored: false, is_match_empty: false }
+    }
+
+    fn class(class: Class) -> Hir {
+        let kind = HirKind::Class(class);
+        Hir { kind, is_start_anchored: false, is_match_empty: false }
+    }
+
+    fn look(look: Look) -> Hir {
+        let kind = HirKind::Look(look);
+        Hir {
+            kind,
+            is_start_anchored: matches!(look, Look::Start),
+            is_match_empty: true,
+        }
+    }
+
+    fn repetition(rep: Repetition) -> Hir {
+        if rep.min == 0 && rep.max == Some(0) {
+            return Hir::empty();
+        } else if rep.min == 1 && rep.max == Some(1) {
+            return *rep.sub;
+        }
+        let is_start_anchored = rep.min > 0 && rep.sub.is_start_anchored;
+        let is_match_empty = rep.min == 0 || rep.sub.is_match_empty;
+        let kind = HirKind::Repetition(rep);
+        Hir { kind, is_start_anchored, is_match_empty }
+    }
+
+    fn capture(cap: Capture) -> Hir {
+        let is_start_anchored = cap.sub.is_start_anchored;
+        let is_match_empty = cap.sub.is_match_empty;
+        let kind = HirKind::Capture(cap);
+        Hir { kind, is_start_anchored, is_match_empty }
+    }
+
+    fn concat(mut subs: Vec<Hir>) -> Hir {
+        if subs.is_empty() {
+            Hir::empty()
+        } else if subs.len() == 1 {
+            subs.pop().unwrap()
+        } else {
+            let is_start_anchored = subs[0].is_start_anchored;
+            let is_match_empty = subs.iter().all(|s| s.is_match_empty);
+            let kind = HirKind::Concat(subs);
+            Hir { kind, is_start_anchored, is_match_empty }
+        }
+    }
+
+    fn alternation(mut subs: Vec<Hir>) -> Hir {
+        if subs.is_empty() {
+            Hir::fail()
+        } else if subs.len() == 1 {
+            subs.pop().unwrap()
+        } else {
+            let is_start_anchored = subs.iter().all(|s| s.is_start_anchored);
+            let is_match_empty = subs.iter().any(|s| s.is_match_empty);
+            let kind = HirKind::Alternation(subs);
+            Hir { kind, is_start_anchored, is_match_empty }
+        }
+    }
+}
+
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub(crate) struct Class {
+    pub(crate) ranges: Vec<ClassRange>,
+}
+
+impl Class {
+    /// Create a new class from the given ranges. The ranges may be provided
+    /// in any order or may even overlap. They will be automatically
+    /// canonicalized.
+    fn new<I: IntoIterator<Item = ClassRange>>(ranges: I) -> Class {
+        let mut class = Class { ranges: ranges.into_iter().collect() };
+        class.canonicalize();
+        class
+    }
+
+    /// Add a new range to this set.
+    fn push(&mut self, range: ClassRange) {
+        self.ranges.push(range);
+        self.canonicalize();
+    }
+
+    /// Expand this class such that it matches the ASCII codepoints in this set
+    /// case insensitively.
+    fn ascii_case_fold(&mut self) {
+        let len = self.ranges.len();
+        for i in 0..len {
+            if let Some(folded) = self.ranges[i].ascii_case_fold() {
+                self.ranges.push(folded);
+            }
+        }
+        self.canonicalize();
+    }
+
+    /// Negate this set.
+    ///
+    /// For all `x` where `x` is any element, if `x` was in this set, then it
+    /// will not be in this set after negation.
+    fn negate(&mut self) {
+        const MIN: char = '\x00';
+        const MAX: char = char::MAX;
+
+        if self.ranges.is_empty() {
+            self.ranges.push(ClassRange { start: MIN, end: MAX });
+            return;
+        }
+
+        // There should be a way to do this in-place with constant memory,
+        // but I couldn't figure out a simple way to do it. So just append
+        // the negation to the end of this range, and then drain it before
+        // we're done.
+        let drain_end = self.ranges.len();
+
+        // If our class doesn't start the minimum possible char, then negation
+        // needs to include all codepoints up to the minimum in this set.
+        if self.ranges[0].start > MIN {
+            self.ranges.push(ClassRange {
+                start: MIN,
+                // OK because we know it's bigger than MIN.
+                end: prev_char(self.ranges[0].start).unwrap(),
+            });
+        }
+        for i in 1..drain_end {
+            // let lower = self.ranges[i - 1].upper().increment();
+            // let upper = self.ranges[i].lower().decrement();
+            // self.ranges.push(I::create(lower, upper));
+            self.ranges.push(ClassRange {
+                // OK because we know i-1 is never the last range and therefore
+                // there must be a range greater than it. It therefore follows
+                // that 'end' can never be char::MAX, and thus there must be
+                // a next char.
+                start: next_char(self.ranges[i - 1].end).unwrap(),
+                // Since 'i' is guaranteed to never be the first range, it
+                // follows that there is always a range before this and thus
+                // 'start' can never be '\x00'. Thus, there must be a previous
+                // char.
+                end: prev_char(self.ranges[i].start).unwrap(),
+            });
+        }
+        if self.ranges[drain_end - 1].end < MAX {
+            // let lower = self.ranges[drain_end - 1].upper().increment();
+            // self.ranges.push(I::create(lower, I::Bound::max_value()));
+            self.ranges.push(ClassRange {
+                // OK because we know 'end' is less than char::MAX, and thus
+                // there is a next char.
+                start: next_char(self.ranges[drain_end - 1].end).unwrap(),
+                end: MAX,
+            });
+        }
+        self.ranges.drain(..drain_end);
+        // We don't need to canonicalize because we processed the ranges above
+        // in canonical order and the new ranges we added based on those are
+        // also necessarily in canonical order.
+    }
+
+    /// Union this set with the given set, in place.
+    fn union(&mut self, other: &Class) {
+        if other.ranges.is_empty() || self.ranges == other.ranges {
+            return;
+        }
+        // This could almost certainly be done more efficiently.
+        self.ranges.extend(&other.ranges);
+        self.canonicalize();
+    }
+
+    /// Converts this set into a canonical ordering.
+    fn canonicalize(&mut self) {
+        if self.is_canonical() {
+            return;
+        }
+        self.ranges.sort();
+        assert!(!self.ranges.is_empty());
+
+        // Is there a way to do this in-place with constant memory? I couldn't
+        // figure out a way to do it. So just append the canonicalization to
+        // the end of this range, and then drain it before we're done.
+        let drain_end = self.ranges.len();
+        for oldi in 0..drain_end {
+            // If we've added at least one new range, then check if we can
+            // merge this range in the previously added range.
+            if self.ranges.len() > drain_end {
+                let (last, rest) = self.ranges.split_last_mut().unwrap();
+                if let Some(union) = last.union(&rest[oldi]) {
+                    *last = union;
+                    continue;
+                }
+            }
+            self.ranges.push(self.ranges[oldi]);
+        }
+        self.ranges.drain(..drain_end);
+    }
+
+    /// Returns true if and only if this class is in a canonical ordering.
+    fn is_canonical(&self) -> bool {
+        for pair in self.ranges.windows(2) {
+            if pair[0] >= pair[1] {
+                return false;
+            }
+            if pair[0].is_contiguous(&pair[1]) {
+                return false;
+            }
+        }
+        true
+    }
+}
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd, Ord)]
+pub(crate) struct ClassRange {
+    pub(crate) start: char,
+    pub(crate) end: char,
+}
+
+impl ClassRange {
+    /// Apply simple case folding to this byte range. Only ASCII case mappings
+    /// (for A-Za-z) are applied.
+    ///
+    /// Additional ranges are appended to the given vector. Canonical ordering
+    /// is *not* maintained in the given vector.
+    fn ascii_case_fold(&self) -> Option<ClassRange> {
+        if !(ClassRange { start: 'a', end: 'z' }).is_intersection_empty(self) {
+            let start = core::cmp::max(self.start, 'a');
+            let end = core::cmp::min(self.end, 'z');
+            return Some(ClassRange {
+                start: char::try_from(u32::from(start) - 32).unwrap(),
+                end: char::try_from(u32::from(end) - 32).unwrap(),
+            });
+        }
+        if !(ClassRange { start: 'A', end: 'Z' }).is_intersection_empty(self) {
+            let start = core::cmp::max(self.start, 'A');
+            let end = core::cmp::min(self.end, 'Z');
+            return Some(ClassRange {
+                start: char::try_from(u32::from(start) + 32).unwrap(),
+                end: char::try_from(u32::from(end) + 32).unwrap(),
+            });
+        }
+        None
+    }
+
+    /// Union the given overlapping range into this range.
+    ///
+    /// If the two ranges aren't contiguous, then this returns `None`.
+    fn union(&self, other: &ClassRange) -> Option<ClassRange> {
+        if !self.is_contiguous(other) {
+            return None;
+        }
+        let start = core::cmp::min(self.start, other.start);
+        let end = core::cmp::max(self.end, other.end);
+        Some(ClassRange { start, end })
+    }
+
+    /// Returns true if and only if the two ranges are contiguous. Two ranges
+    /// are contiguous if and only if the ranges are either overlapping or
+    /// adjacent.
+    fn is_contiguous(&self, other: &ClassRange) -> bool {
+        let (s1, e1) = (u32::from(self.start), u32::from(self.end));
+        let (s2, e2) = (u32::from(other.start), u32::from(other.end));
+        core::cmp::max(s1, s2) <= core::cmp::min(e1, e2).saturating_add(1)
+    }
+
+    /// Returns true if and only if the intersection of this range and the
+    /// other range is empty.
+    fn is_intersection_empty(&self, other: &ClassRange) -> bool {
+        let (s1, e1) = (self.start, self.end);
+        let (s2, e2) = (other.start, other.end);
+        core::cmp::max(s1, s2) > core::cmp::min(e1, e2)
+    }
+}
+
+/// The high-level intermediate representation for a look-around assertion.
+///
+/// An assertion match is always zero-length. Also called an "empty match."
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub(crate) enum Look {
+    /// Match the beginning of text. Specifically, this matches at the starting
+    /// position of the input.
+    Start = 1 << 0,
+    /// Match the end of text. Specifically, this matches at the ending
+    /// position of the input.
+    End = 1 << 1,
+    /// Match the beginning of a line or the beginning of text. Specifically,
+    /// this matches at the starting position of the input, or at the position
+    /// immediately following a `\n` character.
+    StartLF = 1 << 2,
+    /// Match the end of a line or the end of text. Specifically, this matches
+    /// at the end position of the input, or at the position immediately
+    /// preceding a `\n` character.
+    EndLF = 1 << 3,
+    /// Match the beginning of a line or the beginning of text. Specifically,
+    /// this matches at the starting position of the input, or at the position
+    /// immediately following either a `\r` or `\n` character, but never after
+    /// a `\r` when a `\n` follows.
+    StartCRLF = 1 << 4,
+    /// Match the end of a line or the end of text. Specifically, this matches
+    /// at the end position of the input, or at the position immediately
+    /// preceding a `\r` or `\n` character, but never before a `\n` when a `\r`
+    /// precedes it.
+    EndCRLF = 1 << 5,
+    /// Match an ASCII-only word boundary. That is, this matches a position
+    /// where the left adjacent character and right adjacent character
+    /// correspond to a word and non-word or a non-word and word character.
+    Word = 1 << 6,
+    /// Match an ASCII-only negation of a word boundary.
+    WordNegate = 1 << 7,
+}
+
+impl Look {
+    /// Returns true if the given position in the given haystack matches this
+    /// look-around assertion.
+    pub(crate) fn is_match(&self, haystack: &[u8], at: usize) -> bool {
+        use self::Look::*;
+
+        match *self {
+            Start => at == 0,
+            End => at == haystack.len(),
+            StartLF => at == 0 || haystack[at - 1] == b'\n',
+            EndLF => at == haystack.len() || haystack[at] == b'\n',
+            StartCRLF => {
+                at == 0
+                    || haystack[at - 1] == b'\n'
+                    || (haystack[at - 1] == b'\r'
+                        && (at >= haystack.len() || haystack[at] != b'\n'))
+            }
+            EndCRLF => {
+                at == haystack.len()
+                    || haystack[at] == b'\r'
+                    || (haystack[at] == b'\n'
+                        && (at == 0 || haystack[at - 1] != b'\r'))
+            }
+            Word => {
+                let word_before =
+                    at > 0 && utf8::is_word_byte(haystack[at - 1]);
+                let word_after =
+                    at < haystack.len() && utf8::is_word_byte(haystack[at]);
+                word_before != word_after
+            }
+            WordNegate => {
+                let word_before =
+                    at > 0 && utf8::is_word_byte(haystack[at - 1]);
+                let word_after =
+                    at < haystack.len() && utf8::is_word_byte(haystack[at]);
+                word_before == word_after
+            }
+        }
+    }
+}
+
+/// The high-level intermediate representation of a repetition operator.
+///
+/// A repetition operator permits the repetition of an arbitrary
+/// sub-expression.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub(crate) struct Repetition {
+    /// The minimum range of the repetition.
+    ///
+    /// Note that special cases like `?`, `+` and `*` all get translated into
+    /// the ranges `{0,1}`, `{1,}` and `{0,}`, respectively.
+    ///
+    /// When `min` is zero, this expression can match the empty string
+    /// regardless of what its sub-expression is.
+    pub(crate) min: u32,
+    /// The maximum range of the repetition.
+    ///
+    /// Note that when `max` is `None`, `min` acts as a lower bound but where
+    /// there is no upper bound. For something like `x{5}` where the min and
+    /// max are equivalent, `min` will be set to `5` and `max` will be set to
+    /// `Some(5)`.
+    pub(crate) max: Option<u32>,
+    /// Whether this repetition operator is greedy or not. A greedy operator
+    /// will match as much as it can. A non-greedy operator will match as
+    /// little as it can.
+    ///
+    /// Typically, operators are greedy by default and are only non-greedy when
+    /// a `?` suffix is used, e.g., `(expr)*` is greedy while `(expr)*?` is
+    /// not. However, this can be inverted via the `U` "ungreedy" flag.
+    pub(crate) greedy: bool,
+    /// The expression being repeated.
+    pub(crate) sub: Box<Hir>,
+}
+
+impl Repetition {
+    /// Returns a new repetition with the same `min`, `max` and `greedy`
+    /// values, but with its sub-expression replaced with the one given.
+    pub(crate) fn with(&self, sub: Hir) -> Repetition {
+        Repetition {
+            min: self.min,
+            max: self.max,
+            greedy: self.greedy,
+            sub: Box::new(sub),
+        }
+    }
+}
+
+/// The high-level intermediate representation for a capturing group.
+///
+/// A capturing group always has an index and a child expression. It may
+/// also have a name associated with it (e.g., `(?P<foo>\w)`), but it's not
+/// necessary.
+///
+/// Note that there is no explicit representation of a non-capturing group
+/// in a `Hir`. Instead, non-capturing grouping is handled automatically by
+/// the recursive structure of the `Hir` itself.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub(crate) struct Capture {
+    /// The capture index of the capture.
+    pub(crate) index: u32,
+    /// The name of the capture, if it exists.
+    pub(crate) name: Option<Box<str>>,
+    /// The expression inside the capturing group, which may be empty.
+    pub(crate) sub: Box<Hir>,
+}
+
+fn next_char(ch: char) -> Option<char> {
+    // Skip over the surrogate range.
+    if ch == '\u{D7FF}' {
+        return Some('\u{E000}');
+    }
+    // OK because char::MAX < u32::MAX and we handle U+D7FF above.
+    char::from_u32(u32::from(ch).checked_add(1).unwrap())
+}
+
+fn prev_char(ch: char) -> Option<char> {
+    // Skip over the surrogate range.
+    if ch == '\u{E000}' {
+        return Some('\u{D7FF}');
+    }
+    // OK because subtracting 1 from any valid scalar value other than 0
+    // and U+E000 yields a valid scalar value.
+    Some(char::from_u32(u32::from(ch).checked_sub(1)?).unwrap())
+}
diff --git a/regex-lite/src/hir/parse.rs b/regex-lite/src/hir/parse.rs
new file mode 100644
index 000000000..4ce2895f8
--- /dev/null
+++ b/regex-lite/src/hir/parse.rs
@@ -0,0 +1,2073 @@
+use core::cell::{Cell, RefCell};
+
+use alloc::{
+    boxed::Box,
+    string::{String, ToString},
+    vec,
+    vec::Vec,
+};
+
+use crate::{
+    error::Error,
+    hir::{self, Config, Flags, Hir, HirKind},
+    util,
+};
+
+// These are all of the errors that can occur while parsing a regex. Unlike
+// regex-syntax, our errors are not particularly great. They are just enough
+// to get a general sense of what went wrong. But in exchange, the error
+// reporting mechanism is *much* simpler than what's in regex-syntax.
+//
+// By convention, we use each of these messages in exactly one place. That
+// way, every branch that leads to an error has a unique message. This in turn
+// means that given a message, one can precisely identify which part of the
+// parser reported it.
+//
+// Finally, we give names to each message so that we can reference them in
+// tests.
+const ERR_TOO_MUCH_NESTING: &str = "pattern has too much nesting";
+const ERR_TOO_MANY_CAPTURES: &str = "too many capture groups";
+const ERR_DUPLICATE_CAPTURE_NAME: &str = "duplicate capture group name";
+const ERR_UNCLOSED_GROUP: &str = "found open group without closing ')'";
+const ERR_UNCLOSED_GROUP_QUESTION: &str =
+    "expected closing ')', but got end of pattern";
+const ERR_UNOPENED_GROUP: &str = "found closing ')' without matching '('";
+const ERR_LOOK_UNSUPPORTED: &str = "look-around is not supported";
+const ERR_EMPTY_FLAGS: &str = "empty flag directive '(?)' is not allowed";
+const ERR_MISSING_GROUP_NAME: &str =
+    "exepcted capture group name, but got end of pattern";
+const ERR_INVALID_GROUP_NAME: &str = "invalid group name";
+const ERR_UNCLOSED_GROUP_NAME: &str =
+    "expected end of capture group name, but got end of pattern";
+const ERR_EMPTY_GROUP_NAME: &str = "empty capture group names are not allowed";
+const ERR_FLAG_UNRECOGNIZED: &str = "unrecognized inline flag";
+const ERR_FLAG_REPEATED_NEGATION: &str =
+    "inline flag negation cannot be repeated";
+const ERR_FLAG_DUPLICATE: &str = "duplicate inline flag is not allowed";
+const ERR_FLAG_UNEXPECTED_EOF: &str =
+    "expected ':' or ')' to end inline flags, but got end of pattern";
+const ERR_FLAG_DANGLING_NEGATION: &str =
+    "inline flags cannot end with negation directive";
+const ERR_DECIMAL_NO_DIGITS: &str =
+    "expected decimal number, but found no digits";
+const ERR_DECIMAL_INVALID: &str = "got invalid decimal number";
+const ERR_HEX_BRACE_INVALID_DIGIT: &str =
+    "expected hexadecimal number in braces, but got non-hex digit";
+const ERR_HEX_BRACE_UNEXPECTED_EOF: &str =
+    "expected hexadecimal number, but saw end of pattern before closing brace";
+const ERR_HEX_BRACE_EMPTY: &str =
+    "expected hexadecimal number in braces, but got no digits";
+const ERR_HEX_BRACE_INVALID: &str = "got invalid hexadecimal number in braces";
+const ERR_HEX_FIXED_UNEXPECTED_EOF: &str =
+    "expected fixed length hexadecimal number, but saw end of pattern first";
+const ERR_HEX_FIXED_INVALID_DIGIT: &str =
+    "expected fixed length hexadecimal number, but got non-hex digit";
+const ERR_HEX_FIXED_INVALID: &str =
+    "got invalid fixed length hexadecimal number";
+const ERR_HEX_UNEXPECTED_EOF: &str =
+    "expected hexadecimal number, but saw end of pattern first";
+const ERR_ESCAPE_UNEXPECTED_EOF: &str =
+    "saw start of escape sequence, but saw end of pattern before it finished";
+const ERR_BACKREF_UNSUPPORTED: &str = "backreferences are not supported";
+const ERR_UNICODE_CLASS_UNSUPPORTED: &str =
+    "Unicode character classes are not supported";
+const ERR_ESCAPE_UNRECOGNIZED: &str = "unrecognized escape sequence";
+const ERR_POSIX_CLASS_UNRECOGNIZED: &str =
+    "unrecognized POSIX character class";
+const ERR_UNCOUNTED_REP_SUB_MISSING: &str =
+    "uncounted repetition operator must be applied to a sub-expression";
+const ERR_COUNTED_REP_SUB_MISSING: &str =
+    "counted repetition operator must be applied to a sub-expression";
+const ERR_COUNTED_REP_UNCLOSED: &str =
+    "found unclosed counted repetition operator";
+const ERR_COUNTED_REP_MIN_UNCLOSED: &str =
+    "found incomplete and unclosed counted repetition operator";
+const ERR_COUNTED_REP_COMMA_UNCLOSED: &str =
+    "found counted repetition operator with a comma that is unclosed";
+const ERR_COUNTED_REP_MIN_MAX_UNCLOSED: &str =
+    "found counted repetition with min and max that is unclosed";
+const ERR_COUNTED_REP_INVALID: &str =
+    "expected closing brace for counted repetition, but got something else";
+const ERR_COUNTED_REP_INVALID_RANGE: &str =
+    "found counted repetition with a min bigger than its max";
+const ERR_CLASS_UNCLOSED_AFTER_ITEM: &str =
+    "non-empty character class has no closing bracket";
+const ERR_CLASS_INVALID_RANGE_ITEM: &str =
+    "character class ranges must start and end with a single character";
+const ERR_CLASS_INVALID_ITEM: &str =
+    "invalid escape sequence in character class";
+const ERR_CLASS_UNCLOSED_AFTER_DASH: &str =
+    "non-empty character class has no closing bracket after dash";
+const ERR_CLASS_UNCLOSED_AFTER_NEGATION: &str =
+    "negated character class has no closing bracket";
+const ERR_CLASS_UNCLOSED_AFTER_CLOSING: &str =
+    "character class begins with literal ']' but has no closing bracket";
+const ERR_CLASS_INVALID_RANGE: &str = "invalid range in character class";
+const ERR_CLASS_UNCLOSED: &str = "found unclosed character class";
+const ERR_CLASS_NEST_UNSUPPORTED: &str =
+    "nested character classes are not supported";
+const ERR_CLASS_INTERSECTION_UNSUPPORTED: &str =
+    "character class intersection is not supported";
+const ERR_CLASS_DIFFERENCE_UNSUPPORTED: &str =
+    "character class difference is not supported";
+const ERR_CLASS_SYMDIFFERENCE_UNSUPPORTED: &str =
+    "character class symmetric difference is not supported";
+
+/// A regular expression parser.
+///
+/// This parses a string representation of a regular expression into an
+/// abstract syntax tree. The size of the tree is proportional to the length
+/// of the regular expression pattern.
+///
+/// A `Parser` can be configured in more detail via a [`ParserBuilder`].
+#[derive(Clone, Debug)]
+pub(super) struct Parser<'a> {
+    /// The configuration of the parser as given by the caller.
+    config: Config,
+    /// The pattern we're parsing as given by the caller.
+    pattern: &'a str,
+    /// The call depth of the parser. This is incremented for each
+    /// sub-expression parsed. Its peak value is the maximum nesting of the
+    /// pattern.
+    depth: Cell<u32>,
+    /// The current position of the parser.
+    pos: Cell<usize>,
+    /// The current codepoint of the parser. The codepoint corresponds to the
+    /// codepoint encoded in `pattern` beginning at `pos`.
+    ///
+    /// This is `None` if and only if `pos == pattern.len()`.
+    char: Cell<Option<char>>,
+    /// The current capture index.
+    capture_index: Cell<u32>,
+    /// The flags that are currently set.
+    flags: RefCell<Flags>,
+    /// A sorted sequence of capture names. This is used to detect duplicate
+    /// capture names and report an error if one is detected.
+    capture_names: RefCell<Vec<String>>,
+    /// A scratch buffer used in various places. Mostly this is used to
+    /// accumulate relevant characters from parts of a pattern.
+    scratch: RefCell<String>,
+}
+
+/// The constructor and a variety of helper routines.
+impl<'a> Parser<'a> {
+    /// Build a parser from this configuration with the given pattern.
+    pub(super) fn new(config: Config, pattern: &'a str) -> Parser<'a> {
+        Parser {
+            config,
+            pattern,
+            depth: Cell::new(0),
+            pos: Cell::new(0),
+            char: Cell::new(pattern.chars().next()),
+            capture_index: Cell::new(0),
+            flags: RefCell::new(Flags::default()),
+            capture_names: RefCell::new(vec![]),
+            scratch: RefCell::new(String::new()),
+        }
+    }
+
+    /// Returns the full pattern string that we're parsing.
+    fn pattern(&self) -> &str {
+        self.pattern
+    }
+
+    /// Return the current byte offset of the parser.
+    ///
+    /// The offset starts at `0` from the beginning of the regular expression
+    /// pattern string.
+    fn pos(&self) -> usize {
+        self.pos.get()
+    }
+
+    /// Increments the call depth of the parser.
+    ///
+    /// If the call depth would exceed the configured nest limit, then this
+    /// returns an error.
+    ///
+    /// This returns the old depth.
+    fn increment_depth(&self) -> Result<u32, Error> {
+        let old = self.depth.get();
+        // OK because our depth starts at 0, and we return an error if it
+        // ever reaches the limit. So the call depth can never exceed u32::MAX.
+        let new = old.checked_add(1).unwrap();
+        if new >= self.config.nest_limit {
+            return Err(Error::new(ERR_TOO_MUCH_NESTING));
+        }
+        self.depth.set(new);
+        Ok(old)
+    }
+
+    /// Decrements the call depth of the parser.
+    ///
+    /// This panics if the current depth is 0.
+    fn decrement_depth(&self) {
+        let old = self.depth.get();
+        // If this fails then the caller has a bug in how they're incrementing
+        // and decrementing the depth of the parser's call stack.
+        let new = old.checked_sub(1).unwrap();
+        self.depth.set(new);
+    }
+
+    /// Return the codepoint at the current position of the parser.
+    ///
+    /// This panics if the parser is positioned at the end of the pattern.
+    fn char(&self) -> char {
+        self.char.get().expect("codepoint, but parser is done")
+    }
+
+    /// Return the character at the given position.
+    ///
+    /// This panics if the given position does not point to a valid char.
+    fn char_at(&self, i: usize) -> char {
+        self.pattern()[i..]
+            .chars()
+            .next()
+            .unwrap_or_else(|| panic!("expected char at offset {}", i))
+    }
+
+    /// Returns true if the next call to `bump` would return false.
+    fn is_done(&self) -> bool {
+        self.pos() == self.pattern.len()
+    }
+
+    /// Returns the flags that are current set for this regex.
+    fn flags(&self) -> Flags {
+        *self.flags.borrow()
+    }
+
+    /// Bump the parser to the next Unicode scalar value.
+    ///
+    /// If the end of the input has been reached, then `false` is returned.
+    fn bump(&self) -> bool {
+        if self.is_done() {
+            return false;
+        }
+        self.pos.set(self.pos() + self.char().len_utf8());
+        self.char.set(self.pattern()[self.pos()..].chars().next());
+        self.char.get().is_some()
+    }
+
+    /// If the substring starting at the current position of the parser has
+    /// the given prefix, then bump the parser to the character immediately
+    /// following the prefix and return true. Otherwise, don't bump the parser
+    /// and return false.
+    fn bump_if(&self, prefix: &str) -> bool {
+        if self.pattern()[self.pos()..].starts_with(prefix) {
+            for _ in 0..prefix.chars().count() {
+                self.bump();
+            }
+            true
+        } else {
+            false
+        }
+    }
+
+    /// Bump the parser, and if the `x` flag is enabled, bump through any
+    /// subsequent spaces. Return true if and only if the parser is not done.
+    fn bump_and_bump_space(&self) -> bool {
+        if !self.bump() {
+            return false;
+        }
+        self.bump_space();
+        !self.is_done()
+    }
+
+    /// If the `x` flag is enabled (i.e., whitespace insensitivity with
+    /// comments), then this will advance the parser through all whitespace
+    /// and comments to the next non-whitespace non-comment byte.
+    ///
+    /// If the `x` flag is disabled, then this is a no-op.
+    ///
+    /// This should be used selectively throughout the parser where
+    /// arbitrary whitespace is permitted when the `x` flag is enabled. For
+    /// example, `{   5  , 6}` is equivalent to `{5,6}`.
+    fn bump_space(&self) {
+        if !self.flags().ignore_whitespace {
+            return;
+        }
+        while !self.is_done() {
+            if self.char().is_whitespace() {
+                self.bump();
+            } else if self.char() == '#' {
+                self.bump();
+                while !self.is_done() {
+                    let c = self.char();
+                    self.bump();
+                    if c == '\n' {
+                        break;
+                    }
+                }
+            } else {
+                break;
+            }
+        }
+    }
+
+    /// Peek at the next character in the input without advancing the parser.
+    ///
+    /// If the input has been exhausted, then this returns `None`.
+    fn peek(&self) -> Option<char> {
+        if self.is_done() {
+            return None;
+        }
+        self.pattern()[self.pos() + self.char().len_utf8()..].chars().next()
+    }
+
+    /// Peeks at the next character in the pattern from the current offset, and
+    /// will ignore spaces when the parser is in whitespace insensitive mode.
+    fn peek_space(&self) -> Option<char> {
+        if !self.flags().ignore_whitespace {
+            return self.peek();
+        }
+        if self.is_done() {
+            return None;
+        }
+        let mut start = self.pos() + self.char().len_utf8();
+        let mut in_comment = false;
+        for (i, ch) in self.pattern()[start..].char_indices() {
+            if ch.is_whitespace() {
+                continue;
+            } else if !in_comment && ch == '#' {
+                in_comment = true;
+            } else if in_comment && ch == '\n' {
+                in_comment = false;
+            } else {
+                start += i;
+                break;
+            }
+        }
+        self.pattern()[start..].chars().next()
+    }
+
+    /// Return the next capturing index. Each subsequent call increments the
+    /// internal index. Since the way capture indices are computed is a public
+    /// API guarantee, use of this routine depends on the parser being depth
+    /// first and left-to-right.
+    ///
+    /// If the capture limit is exceeded, then an error is returned.
+    fn next_capture_index(&self) -> Result<u32, Error> {
+        let current = self.capture_index.get();
+        let next = current
+            .checked_add(1)
+            .ok_or_else(|| Error::new(ERR_TOO_MANY_CAPTURES))?;
+        self.capture_index.set(next);
+        Ok(next)
+    }
+
+    /// Adds the given capture name to this parser. If this capture name has
+    /// already been used, then an error is returned.
+    fn add_capture_name(&self, name: &str) -> Result<(), Error> {
+        let mut names = self.capture_names.borrow_mut();
+        match names.binary_search_by(|n| name.cmp(n)) {
+            Ok(i) => Err(Error::new(ERR_DUPLICATE_CAPTURE_NAME)),
+            Err(i) => {
+                names.insert(i, name.to_string());
+                Ok(())
+            }
+        }
+    }
+
+    /// Returns true if and only if the parser is positioned at a look-around
+    /// prefix. The conditions under which this returns true must always
+    /// correspond to a regular expression that would otherwise be consider
+    /// invalid.
+    ///
+    /// This should only be called immediately after parsing the opening of
+    /// a group or a set of flags.
+    fn is_lookaround_prefix(&self) -> bool {
+        self.bump_if("?=")
+            || self.bump_if("?!")
+            || self.bump_if("?<=")
+            || self.bump_if("?<!")
+    }
+}
+
+/// The actual parser. We try to break out each kind of regex syntax into its
+/// own routine.
+impl<'a> Parser<'a> {
+    pub(super) fn parse(&self) -> Result<Hir, Error> {
+        let depth = self.increment_depth()?;
+        let mut alternates = vec![];
+        let mut concat = vec![];
+        loop {
+            self.bump_space();
+            if self.is_done() {
+                break;
+            }
+            match self.char() {
+                '(' => {
+                    // Save the old flags and reset them only when we close
+                    // the group.
+                    let oldflags = *self.flags.borrow();
+                    if let Some(sub) = self.parse_group()? {
+                        concat.push(sub);
+                        // We only reset them here because if 'parse_group'
+                        // returns None, then that means it handled a flag
+                        // directive, e.g., '(?ism)'. And the whole point is
+                        // that those flags remain active until either disabled
+                        // or the end of the pattern or current group.
+                        *self.flags.borrow_mut() = oldflags;
+                    }
+                    if self.char.get() != Some(')') {
+                        return Err(Error::new(ERR_UNCLOSED_GROUP));
+                    }
+                    self.bump();
+                }
+                ')' => {
+                    if depth == 0 {
+                        return Err(Error::new(ERR_UNOPENED_GROUP));
+                    }
+                    break;
+                }
+                '|' => {
+                    alternates.push(Hir::concat(core::mem::take(&mut concat)));
+                    self.bump();
+                }
+                '[' => concat.push(self.parse_class()?),
+                '?' | '*' | '+' => {
+                    concat = self.parse_uncounted_repetition(concat)?;
+                }
+                '{' => {
+                    concat = self.parse_counted_repetition(concat)?;
+                }
+                _ => concat.push(self.parse_primitive()?),
+            }
+        }
+        self.decrement_depth();
+        alternates.push(Hir::concat(concat));
+        // N.B. This strips off the "alternation" if there's only one branch.
+        Ok(Hir::alternation(alternates))
+    }
+
+    /// Parses a "primitive" pattern. A primitive is any expression that does
+    /// not contain any sub-expressions.
+    ///
+    /// This assumes the parser is pointing at the beginning of the primitive.
+    fn parse_primitive(&self) -> Result<Hir, Error> {
+        let ch = self.char();
+        self.bump();
+        match ch {
+            '\\' => self.parse_escape(),
+            '.' => Ok(self.hir_dot()),
+            '^' => Ok(self.hir_anchor_start()),
+            '$' => Ok(self.hir_anchor_end()),
+            ch => Ok(self.hir_char(ch)),
+        }
+    }
+
+    /// Parse an escape sequence. This always results in a "primitive" HIR,
+    /// that is, an HIR with no sub-expressions.
+    ///
+    /// This assumes the parser is positioned at the start of the sequence,
+    /// immediately *after* the `\`. It advances the parser to the first
+    /// position immediately following the escape sequence.
+    fn parse_escape(&self) -> Result<Hir, Error> {
+        if self.is_done() {
+            return Err(Error::new(ERR_ESCAPE_UNEXPECTED_EOF));
+        }
+        let ch = self.char();
+        // Put some of the more complicated routines into helpers.
+        match ch {
+            '0'..='9' => return Err(Error::new(ERR_BACKREF_UNSUPPORTED)),
+            'p' | 'P' => {
+                return Err(Error::new(ERR_UNICODE_CLASS_UNSUPPORTED))
+            }
+            'x' | 'u' | 'U' => return self.parse_hex(),
+            'd' | 's' | 'w' | 'D' | 'S' | 'W' => {
+                return Ok(self.parse_perl_class());
+            }
+            _ => {}
+        }
+
+        // Handle all of the one letter sequences inline.
+        self.bump();
+        if util::is_meta_character(ch) || util::is_escapeable_character(ch) {
+            return Ok(self.hir_char(ch));
+        }
+        let special = |ch| Ok(self.hir_char(ch));
+        match ch {
+            'a' => special('\x07'),
+            'f' => special('\x0C'),
+            't' => special('\t'),
+            'n' => special('\n'),
+            'r' => special('\r'),
+            'v' => special('\x0B'),
+            'A' => Ok(Hir::look(hir::Look::Start)),
+            'z' => Ok(Hir::look(hir::Look::End)),
+            'b' => Ok(Hir::look(hir::Look::Word)),
+            'B' => Ok(Hir::look(hir::Look::WordNegate)),
+            _ => Err(Error::new(ERR_ESCAPE_UNRECOGNIZED)),
+        }
+    }
+
+    /// Parse a hex representation of a Unicode codepoint. This handles both
+    /// hex notations, i.e., `\xFF` and `\x{FFFF}`. This expects the parser to
+    /// be positioned at the `x`, `u` or `U` prefix. The parser is advanced to
+    /// the first character immediately following the hexadecimal literal.
+    fn parse_hex(&self) -> Result<Hir, Error> {
+        let digit_len = match self.char() {
+            'x' => 2,
+            'u' => 4,
+            'U' => 8,
+            unk => unreachable!(
+                "invalid start of fixed length hexadecimal number {}",
+                unk
+            ),
+        };
+        if !self.bump_and_bump_space() {
+            return Err(Error::new(ERR_HEX_UNEXPECTED_EOF));
+        }
+        if self.char() == '{' {
+            self.parse_hex_brace()
+        } else {
+            self.parse_hex_digits(digit_len)
+        }
+    }
+
+    /// Parse an N-digit hex representation of a Unicode codepoint. This
+    /// expects the parser to be positioned at the first digit and will advance
+    /// the parser to the first character immediately following the escape
+    /// sequence.
+    ///
+    /// The number of digits given must be 2 (for `\xNN`), 4 (for `\uNNNN`)
+    /// or 8 (for `\UNNNNNNNN`).
+    fn parse_hex_digits(&self, digit_len: usize) -> Result<Hir, Error> {
+        let mut scratch = String::new();
+        for i in 0..digit_len {
+            if i > 0 && !self.bump_and_bump_space() {
+                return Err(Error::new(ERR_HEX_FIXED_UNEXPECTED_EOF));
+            }
+            if !is_hex(self.char()) {
+                return Err(Error::new(ERR_HEX_FIXED_INVALID_DIGIT));
+            }
+            scratch.push(self.char());
+        }
+        // The final bump just moves the parser past the literal, which may
+        // be EOF.
+        self.bump_and_bump_space();
+        match u32::from_str_radix(&scratch, 16).ok().and_then(char::from_u32) {
+            None => Err(Error::new(ERR_HEX_FIXED_INVALID)),
+            Some(ch) => Ok(self.hir_char(ch)),
+        }
+    }
+
+    /// Parse a hex representation of any Unicode scalar value. This expects
+    /// the parser to be positioned at the opening brace `{` and will advance
+    /// the parser to the first character following the closing brace `}`.
+    fn parse_hex_brace(&self) -> Result<Hir, Error> {
+        let mut scratch = String::new();
+        while self.bump_and_bump_space() && self.char() != '}' {
+            if !is_hex(self.char()) {
+                return Err(Error::new(ERR_HEX_BRACE_INVALID_DIGIT));
+            }
+            scratch.push(self.char());
+        }
+        if self.is_done() {
+            return Err(Error::new(ERR_HEX_BRACE_UNEXPECTED_EOF));
+        }
+        assert_eq!(self.char(), '}');
+        self.bump_and_bump_space();
+
+        if scratch.is_empty() {
+            return Err(Error::new(ERR_HEX_BRACE_EMPTY));
+        }
+        match u32::from_str_radix(&scratch, 16).ok().and_then(char::from_u32) {
+            None => Err(Error::new(ERR_HEX_BRACE_INVALID)),
+            Some(ch) => Ok(self.hir_char(ch)),
+        }
+    }
+
+    /// Parse a decimal number into a u32 while trimming leading and trailing
+    /// whitespace.
+    ///
+    /// This expects the parser to be positioned at the first position where
+    /// a decimal digit could occur. This will advance the parser to the byte
+    /// immediately following the last contiguous decimal digit.
+    ///
+    /// If no decimal digit could be found or if there was a problem parsing
+    /// the complete set of digits into a u32, then an error is returned.
+    fn parse_decimal(&self) -> Result<u32, Error> {
+        let mut scratch = String::new();
+        while !self.is_done() && self.char().is_whitespace() {
+            self.bump();
+        }
+        let start = self.pos();
+        while !self.is_done() && '0' <= self.char() && self.char() <= '9' {
+            scratch.push(self.char());
+            self.bump_and_bump_space();
+        }
+        while !self.is_done() && self.char().is_whitespace() {
+            self.bump_and_bump_space();
+        }
+        let digits = scratch.as_str();
+        if digits.is_empty() {
+            return Err(Error::new(ERR_DECIMAL_NO_DIGITS));
+        }
+        match u32::from_str_radix(digits, 10).ok() {
+            Some(n) => Ok(n),
+            None => Err(Error::new(ERR_DECIMAL_INVALID)),
+        }
+    }
+
+    /// Parses an uncounted repetition operator. An uncounted repetition
+    /// operator includes `?`, `*` and `+`, but does not include the `{m,n}`
+    /// syntax. The current character should be one of `?`, `*` or `+`. Any
+    /// other character will result in a panic.
+    ///
+    /// This assumes that the parser is currently positioned at the repetition
+    /// operator and advances the parser to the first character after the
+    /// operator. (Note that the operator may include a single additional `?`,
+    /// which makes the operator ungreedy.)
+    ///
+    /// The caller should include the concatenation that is being built. The
+    /// concatenation returned includes the repetition operator applied to the
+    /// last expression in the given concatenation.
+    ///
+    /// If the concatenation is empty, then this returns an error.
+    fn parse_uncounted_repetition(
+        &self,
+        mut concat: Vec<Hir>,
+    ) -> Result<Vec<Hir>, Error> {
+        let sub = match concat.pop() {
+            Some(hir) => Box::new(hir),
+            None => {
+                return Err(Error::new(ERR_UNCOUNTED_REP_SUB_MISSING));
+            }
+        };
+        let (min, max) = match self.char() {
+            '?' => (0, Some(1)),
+            '*' => (0, None),
+            '+' => (1, None),
+            unk => unreachable!("unrecognized repetition operator '{}'", unk),
+        };
+        let mut greedy = true;
+        if self.bump() && self.char() == '?' {
+            greedy = false;
+            self.bump();
+        }
+        concat.push(Hir::repetition(hir::Repetition {
+            min,
+            max,
+            greedy,
+            sub,
+        }));
+        Ok(concat)
+    }
+
+    /// Parses a counted repetition operation. A counted repetition operator
+    /// corresponds to the `{m,n}` syntax, and does not include the `?`, `*` or
+    /// `+` operators.
+    ///
+    /// This assumes that the parser is currently at the opening `{` and
+    /// advances the parser to the first character after the operator. (Note
+    /// that the operator may include a single additional `?`, which makes the
+    /// operator ungreedy.)
+    ///
+    /// The caller should include the concatenation that is being built. The
+    /// concatenation returned includes the repetition operator applied to the
+    /// last expression in the given concatenation.
+    ///
+    /// If the concatenation is empty, then this returns an error.
+    fn parse_counted_repetition(
+        &self,
+        mut concat: Vec<Hir>,
+    ) -> Result<Vec<Hir>, Error> {
+        assert_eq!(self.char(), '{', "expected opening brace");
+        let sub = match concat.pop() {
+            Some(hir) => Box::new(hir),
+            None => {
+                return Err(Error::new(ERR_COUNTED_REP_SUB_MISSING));
+            }
+        };
+        if !self.bump_and_bump_space() {
+            return Err(Error::new(ERR_COUNTED_REP_UNCLOSED));
+        }
+        let min = self.parse_decimal()?;
+        let mut max = Some(min);
+        if self.is_done() {
+            return Err(Error::new(ERR_COUNTED_REP_MIN_UNCLOSED));
+        }
+        if self.char() == ',' {
+            if !self.bump_and_bump_space() {
+                return Err(Error::new(ERR_COUNTED_REP_COMMA_UNCLOSED));
+            }
+            if self.char() != '}' {
+                max = Some(self.parse_decimal()?);
+            } else {
+                max = None;
+            }
+            if self.is_done() {
+                return Err(Error::new(ERR_COUNTED_REP_MIN_MAX_UNCLOSED));
+            }
+        }
+        if self.char() != '}' {
+            return Err(Error::new(ERR_COUNTED_REP_INVALID));
+        }
+
+        let mut greedy = true;
+        if self.bump_and_bump_space() && self.char() == '?' {
+            greedy = false;
+            self.bump();
+        }
+
+        if max.map_or(false, |max| min > max) {
+            return Err(Error::new(ERR_COUNTED_REP_INVALID_RANGE));
+        }
+        concat.push(Hir::repetition(hir::Repetition {
+            min,
+            max,
+            greedy,
+            sub,
+        }));
+        Ok(concat)
+    }
+
+    /// Parses the part of a pattern that starts with a `(`. This is usually
+    /// a group sub-expression, but might just be a directive that enables
+    /// (or disables) certain flags.
+    ///
+    /// This assumes the parser is pointing at the opening `(`.
+    fn parse_group(&self) -> Result<Option<Hir>, Error> {
+        assert_eq!(self.char(), '(');
+        self.bump_and_bump_space();
+        if self.is_lookaround_prefix() {
+            return Err(Error::new(ERR_LOOK_UNSUPPORTED));
+        }
+        if self.bump_if("?P<") || self.bump_if("?<") {
+            let index = self.next_capture_index()?;
+            let name = Some(Box::from(self.parse_capture_name(index)?));
+            let sub = Box::new(self.parse()?);
+            let cap = hir::Capture { index, name, sub };
+            Ok(Some(Hir::capture(cap)))
+        } else if self.bump_if("?") {
+            if self.is_done() {
+                return Err(Error::new(ERR_UNCLOSED_GROUP_QUESTION));
+            }
+            let start = self.pos();
+            // The flags get reset in the top-level 'parse' routine.
+            *self.flags.borrow_mut() = self.parse_flags()?;
+            let consumed = self.pos() - start;
+            if self.char() == ')' {
+                // We don't allow empty flags, e.g., `(?)`.
+                if consumed == 0 {
+                    return Err(Error::new(ERR_EMPTY_FLAGS));
+                }
+                Ok(None)
+            } else {
+                assert_eq!(':', self.char());
+                self.bump();
+                self.parse().map(Some)
+            }
+        } else {
+            let index = self.next_capture_index()?;
+            let sub = Box::new(self.parse()?);
+            let cap = hir::Capture { index, name: None, sub };
+            Ok(Some(Hir::capture(cap)))
+        }
+    }
+
+    /// Parses a capture group name. Assumes that the parser is positioned at
+    /// the first character in the name following the opening `<` (and may
+    /// possibly be EOF). This advances the parser to the first character
+    /// following the closing `>`.
+    ///
+    /// The caller must provide the capture index of the group for this name.
+    fn parse_capture_name(&self, capture_index: u32) -> Result<&str, Error> {
+        if self.is_done() {
+            return Err(Error::new(ERR_MISSING_GROUP_NAME));
+        }
+        let start = self.pos();
+        loop {
+            if self.char() == '>' {
+                break;
+            }
+            if !is_capture_char(self.char(), self.pos() == start) {
+                return Err(Error::new(ERR_INVALID_GROUP_NAME));
+            }
+            if !self.bump() {
+                break;
+            }
+        }
+        let end = self.pos();
+        if self.is_done() {
+            return Err(Error::new(ERR_UNCLOSED_GROUP_NAME));
+        }
+        assert_eq!(self.char(), '>');
+        self.bump();
+        let name = &self.pattern()[start..end];
+        if name.is_empty() {
+            return Err(Error::new(ERR_EMPTY_GROUP_NAME));
+        }
+        self.add_capture_name(name)?;
+        Ok(name)
+    }
+
+    /// Parse a sequence of flags starting at the current character.
+    ///
+    /// This advances the parser to the character immediately following the
+    /// flags, which is guaranteed to be either `:` or `)`.
+    ///
+    /// # Errors
+    ///
+    /// If any flags are duplicated, then an error is returned.
+    ///
+    /// If the negation operator is used more than once, then an error is
+    /// returned.
+    ///
+    /// If no flags could be found or if the negation operation is not followed
+    /// by any flags, then an error is returned.
+    fn parse_flags(&self) -> Result<Flags, Error> {
+        let mut flags = *self.flags.borrow();
+        let mut negate = false;
+        // Keeps track of whether the previous flag item was a '-'. We use this
+        // to detect whether there is a dangling '-', which is invalid.
+        let mut last_was_negation = false;
+        // A set to keep track of the flags we've seen. Since all flags are
+        // ASCII, we only need 128 bytes.
+        let mut seen = [false; 128];
+        while self.char() != ':' && self.char() != ')' {
+            if self.char() == '-' {
+                last_was_negation = true;
+                if negate {
+                    return Err(Error::new(ERR_FLAG_REPEATED_NEGATION));
+                }
+                negate = true;
+            } else {
+                last_was_negation = false;
+                self.parse_flag(&mut flags, negate)?;
+                // OK because every valid flag is ASCII, and we're only here if
+                // the flag is valid.
+                let flag_byte = u8::try_from(self.char()).unwrap();
+                if seen[usize::from(flag_byte)] {
+                    return Err(Error::new(ERR_FLAG_DUPLICATE));
+                }
+                seen[usize::from(flag_byte)] = true;
+            }
+            if !self.bump() {
+                return Err(Error::new(ERR_FLAG_UNEXPECTED_EOF));
+            }
+        }
+        if last_was_negation {
+            return Err(Error::new(ERR_FLAG_DANGLING_NEGATION));
+        }
+        Ok(flags)
+    }
+
+    /// Parse the current character as a flag. Do not advance the parser.
+    ///
+    /// This sets the appropriate boolean value in place on the set of flags
+    /// given. The boolean is inverted when `negate` is true.
+    ///
+    /// # Errors
+    ///
+    /// If the flag is not recognized, then an error is returned.
+    fn parse_flag(
+        &self,
+        flags: &mut Flags,
+        negate: bool,
+    ) -> Result<(), Error> {
+        let enabled = !negate;
+        match self.char() {
+            'i' => flags.case_insensitive = enabled,
+            'm' => flags.multi_line = enabled,
+            's' => flags.dot_matches_new_line = enabled,
+            'U' => flags.swap_greed = enabled,
+            'R' => flags.crlf = enabled,
+            'x' => flags.ignore_whitespace = enabled,
+            _ => return Err(Error::new(ERR_FLAG_UNRECOGNIZED)),
+        }
+        Ok(())
+    }
+
+    /// Parse a standard character class consisting primarily of characters or
+    /// character ranges.
+    ///
+    /// This assumes the parser is positioned at the opening `[`. If parsing
+    /// is successful, then the parser is advanced to the position immediately
+    /// following the closing `]`.
+    fn parse_class(&self) -> Result<Hir, Error> {
+        assert_eq!(self.char(), '[');
+
+        let mut union = vec![];
+        if !self.bump_and_bump_space() {
+            return Err(Error::new(ERR_CLASS_UNCLOSED));
+        }
+        // Determine whether the class is negated or not.
+        let negate = if self.char() != '^' {
+            false
+        } else {
+            if !self.bump_and_bump_space() {
+                return Err(Error::new(ERR_CLASS_UNCLOSED_AFTER_NEGATION));
+            }
+            true
+        };
+        // Accept any number of `-` as literal `-`.
+        while self.char() == '-' {
+            union.push(hir::ClassRange { start: '-', end: '-' });
+            if !self.bump_and_bump_space() {
+                return Err(Error::new(ERR_CLASS_UNCLOSED_AFTER_DASH));
+            }
+        }
+        // If `]` is the *first* char in a set, then interpret it as a literal
+        // `]`. That is, an empty class is impossible to write.
+        if union.is_empty() && self.char() == ']' {
+            union.push(hir::ClassRange { start: ']', end: ']' });
+            if !self.bump_and_bump_space() {
+                return Err(Error::new(ERR_CLASS_UNCLOSED_AFTER_CLOSING));
+            }
+        }
+        loop {
+            self.bump_space();
+            if self.is_done() {
+                return Err(Error::new(ERR_CLASS_UNCLOSED));
+            }
+            match self.char() {
+                '[' => {
+                    // Attempt to treat this as the beginning of a POSIX class.
+                    // If POSIX class parsing fails, then the parser backs up
+                    // to `[`.
+                    if let Some(ranges) = self.maybe_parse_posix_class() {
+                        union.extend(ranges);
+                        continue;
+                    }
+                    // ... otherwise we don't support nested classes.
+                    return Err(Error::new(ERR_CLASS_NEST_UNSUPPORTED));
+                }
+                ']' => {
+                    self.bump();
+                    let mut class = hir::Class::new(union);
+                    // Note that we must apply case folding before negation!
+                    // Consider `(?i)[^x]`. If we applied negation first, then
+                    // the result would be the character class that matched any
+                    // Unicode scalar value.
+                    if self.flags().case_insensitive {
+                        class.ascii_case_fold();
+                    }
+                    if negate {
+                        class.negate();
+                    }
+                    return Ok(Hir::class(class));
+                }
+                '&' if self.peek() == Some('&') => {
+                    return Err(Error::new(
+                        ERR_CLASS_INTERSECTION_UNSUPPORTED,
+                    ));
+                }
+                '-' if self.peek() == Some('-') => {
+                    return Err(Error::new(ERR_CLASS_DIFFERENCE_UNSUPPORTED));
+                }
+                '~' if self.peek() == Some('~') => {
+                    return Err(Error::new(
+                        ERR_CLASS_SYMDIFFERENCE_UNSUPPORTED,
+                    ));
+                }
+                _ => self.parse_class_range(&mut union)?,
+            }
+        }
+    }
+
+    /// Parse a single primitive item in a character class set. The item to
+    /// be parsed can either be one of a simple literal character, a range
+    /// between two simple literal characters or a "primitive" character
+    /// class like `\w`.
+    ///
+    /// If an invalid escape is found, or if a character class is found where
+    /// a simple literal is expected (e.g., in a range), then an error is
+    /// returned.
+    ///
+    /// Otherwise, the range (or ranges) are appended to the given union of
+    /// ranges.
+    fn parse_class_range(
+        &self,
+        union: &mut Vec<hir::ClassRange>,
+    ) -> Result<(), Error> {
+        let prim1 = self.parse_class_item()?;
+        self.bump_space();
+        if self.is_done() {
+            return Err(Error::new(ERR_CLASS_UNCLOSED_AFTER_ITEM));
+        }
+        // If the next char isn't a `-`, then we don't have a range.
+        // There are two exceptions. If the char after a `-` is a `]`, then
+        // `-` is interpreted as a literal `-`. Alternatively, if the char
+        // after a `-` is a `-`, then `--` corresponds to a "difference"
+        // operation. (Which we don't support in regex-lite, but error about
+        // specifically in an effort to be loud about differences between the
+        // main regex crate where possible.)
+        if self.char() != '-'
+            || self.peek_space() == Some(']')
+            || self.peek_space() == Some('-')
+        {
+            union.extend_from_slice(&into_class_item_ranges(prim1)?);
+            return Ok(());
+        }
+        // OK, now we're parsing a range, so bump past the `-` and parse the
+        // second half of the range.
+        if !self.bump_and_bump_space() {
+            return Err(Error::new(ERR_CLASS_UNCLOSED_AFTER_DASH));
+        }
+        let prim2 = self.parse_class_item()?;
+        let range = hir::ClassRange {
+            start: into_class_item_range(prim1)?,
+            end: into_class_item_range(prim2)?,
+        };
+        if range.start > range.end {
+            return Err(Error::new(ERR_CLASS_INVALID_RANGE));
+        }
+        union.push(range);
+        Ok(())
+    }
+
+    /// Parse a single item in a character class as a primitive, where the
+    /// primitive either consists of a verbatim literal or a single escape
+    /// sequence.
+    ///
+    /// This assumes the parser is positioned at the beginning of a primitive,
+    /// and advances the parser to the first position after the primitive if
+    /// successful.
+    ///
+    /// Note that it is the caller's responsibility to report an error if an
+    /// illegal primitive was parsed.
+    fn parse_class_item(&self) -> Result<Hir, Error> {
+        let ch = self.char();
+        self.bump();
+        if ch == '\\' {
+            self.parse_escape()
+        } else {
+            Ok(self.hir_char(ch))
+        }
+    }
+
+    /// Attempt to parse a POSIX character class, e.g., `[:alnum:]`.
+    ///
+    /// This assumes the parser is positioned at the opening `[`.
+    ///
+    /// If no valid POSIX character class could be found, then this does not
+    /// advance the parser and `None` is returned. Otherwise, the parser is
+    /// advanced to the first byte following the closing `]` and the
+    /// corresponding POSIX class is returned.
+    fn maybe_parse_posix_class(
+        &self,
+    ) -> Option<impl Iterator<Item = hir::ClassRange>> {
+        // POSIX character classes are interesting from a parsing perspective
+        // because parsing cannot fail with any interesting error. For example,
+        // in order to use an POSIX character class, it must be enclosed in
+        // double brackets, e.g., `[[:alnum:]]`. Alternatively, you might think
+        // of it as "POSIX character classes have the syntax `[:NAME:]` which
+        // can only appear within character brackets." This means that things
+        // like `[[:lower:]A]` are legal constructs.
+        //
+        // However, if one types an incorrect POSIX character class, e.g.,
+        // `[[:loower:]]`, then we treat that as if it were normal nested
+        // character class containing the characters `:elorw`. (Which isn't
+        // supported and results in an error in regex-lite.) One might argue
+        // that we should return an error instead since the repeated colons
+        // give away the intent to write an POSIX class. But what if the user
+        // typed `[[:lower]]` instead? How can we tell that was intended to be
+        // a POSXI class and not just a normal nested class?
+        //
+        // Reasonable people can probably disagree over this, but for better
+        // or worse, we implement semantics that never fails at the expense of
+        // better failure modes.
+        assert_eq!(self.char(), '[');
+
+        // If parsing fails, then we back up the parser to this starting point.
+        let start_pos = self.pos();
+        let start_char = self.char.get();
+        let reset = || {
+            self.pos.set(start_pos);
+            self.char.set(start_char);
+        };
+
+        let mut negated = false;
+        if !self.bump() || self.char() != ':' {
+            reset();
+            return None;
+        }
+        if !self.bump() {
+            reset();
+            return None;
+        }
+        if self.char() == '^' {
+            negated = true;
+            if !self.bump() {
+                reset();
+                return None;
+            }
+        }
+        let name_start = self.pos();
+        while self.char() != ':' && self.bump() {}
+        if self.is_done() {
+            reset();
+            return None;
+        }
+        let name = &self.pattern()[name_start..self.pos()];
+        if !self.bump_if(":]") {
+            reset();
+            return None;
+        }
+        if let Ok(ranges) = posix_class(name) {
+            return Some(ranges);
+        }
+        reset();
+        None
+    }
+
+    /// Parse a Perl character class, e.g., `\d` or `\W`. This assumes the
+    /// parser is currently at a valid character class name and will be
+    /// advanced to the character immediately following the class.
+    fn parse_perl_class(&self) -> Hir {
+        let ch = self.char();
+        self.bump();
+        let mut class = hir::Class::new(match ch {
+            'd' | 'D' => posix_class("digit").unwrap(),
+            's' | 'S' => posix_class("space").unwrap(),
+            'w' | 'W' => posix_class("word").unwrap(),
+            unk => unreachable!("invalid Perl class \\{}", unk),
+        });
+        if ch.is_ascii_uppercase() {
+            class.negate();
+        }
+        Hir::class(class)
+    }
+
+    fn hir_dot(&self) -> Hir {
+        if self.flags().dot_matches_new_line {
+            Hir::class(hir::Class::new([hir::ClassRange {
+                start: '\x00',
+                end: '\u{10FFFF}',
+            }]))
+        } else if self.flags().crlf {
+            Hir::class(hir::Class::new([
+                hir::ClassRange { start: '\x00', end: '\x09' },
+                hir::ClassRange { start: '\x0B', end: '\x0C' },
+                hir::ClassRange { start: '\x0E', end: '\u{10FFFF}' },
+            ]))
+        } else {
+            Hir::class(hir::Class::new([
+                hir::ClassRange { start: '\x00', end: '\x09' },
+                hir::ClassRange { start: '\x0B', end: '\u{10FFFF}' },
+            ]))
+        }
+    }
+
+    fn hir_anchor_start(&self) -> Hir {
+        let look = if self.flags().multi_line {
+            if self.flags().crlf {
+                hir::Look::StartCRLF
+            } else {
+                hir::Look::StartLF
+            }
+        } else {
+            hir::Look::Start
+        };
+        Hir::look(look)
+    }
+
+    fn hir_anchor_end(&self) -> Hir {
+        let look = if self.flags().multi_line {
+            if self.flags().crlf {
+                hir::Look::EndCRLF
+            } else {
+                hir::Look::EndLF
+            }
+        } else {
+            hir::Look::End
+        };
+        Hir::look(look)
+    }
+
+    fn hir_char(&self, ch: char) -> Hir {
+        if self.flags().case_insensitive {
+            let this = hir::ClassRange { start: ch, end: ch };
+            if let Some(folded) = this.ascii_case_fold() {
+                return Hir::class(hir::Class::new([this, folded]));
+            }
+        }
+        Hir::char(ch)
+    }
+}
+
+/// Converts the given Hir to a literal char if the Hir is just a single
+/// character. Otherwise this returns an error.
+///
+/// This is useful in contexts where you can only accept a single character,
+/// but where it is convenient to parse something more general. For example,
+/// parsing a single part of a character class range. It's useful to reuse
+/// the literal parsing code, but that code can itself return entire classes
+/// which can't be used as the start/end of a class range.
+fn into_class_item_range(hir: Hir) -> Result<char, Error> {
+    match hir.kind {
+        HirKind::Char(ch) => Ok(ch),
+        _ => Err(Error::new(ERR_CLASS_INVALID_RANGE_ITEM)),
+    }
+}
+
+fn into_class_item_ranges(hir: Hir) -> Result<Vec<hir::ClassRange>, Error> {
+    match hir.kind {
+        HirKind::Char(ch) => Ok(vec![hir::ClassRange { start: ch, end: ch }]),
+        HirKind::Class(hir::Class { ranges }) => Ok(ranges),
+        _ => Err(Error::new(ERR_CLASS_INVALID_ITEM)),
+    }
+}
+
+/// Returns an iterator of character class ranges for the given named POSIX
+/// character class. If no such character class exists for the name given, then
+/// an error is returned.
+fn posix_class(
+    kind: &str,
+) -> Result<impl Iterator<Item = hir::ClassRange>, Error> {
+    let slice: &'static [(u8, u8)] = match kind {
+        "alnum" => &[(b'0', b'9'), (b'A', b'Z'), (b'a', b'z')],
+        "alpha" => &[(b'A', b'Z'), (b'a', b'z')],
+        "ascii" => &[(b'\x00', b'\x7F')],
+        "blank" => &[(b'\t', b'\t'), (b' ', b' ')],
+        "cntrl" => &[(b'\x00', b'\x1F'), (b'\x7F', b'\x7F')],
+        "digit" => &[(b'0', b'9')],
+        "graph" => &[(b'!', b'~')],
+        "lower" => &[(b'a', b'z')],
+        "print" => &[(b' ', b'~')],
+        "punct" => &[(b'!', b'/'), (b':', b'@'), (b'[', b'`'), (b'{', b'~')],
+        "space" => &[
+            (b'\t', b'\t'),
+            (b'\n', b'\n'),
+            (b'\x0B', b'\x0B'),
+            (b'\x0C', b'\x0C'),
+            (b'\r', b'\r'),
+            (b' ', b' '),
+        ],
+        "upper" => &[(b'A', b'Z')],
+        "word" => &[(b'0', b'9'), (b'A', b'Z'), (b'_', b'_'), (b'a', b'z')],
+        "xdigit" => &[(b'0', b'9'), (b'A', b'F'), (b'a', b'f')],
+        _ => return Err(Error::new(ERR_POSIX_CLASS_UNRECOGNIZED)),
+    };
+    Ok(slice.iter().map(|&(start, end)| hir::ClassRange {
+        start: char::from(start),
+        end: char::from(end),
+    }))
+}
+
+/// Returns true if the given character is a hexadecimal digit.
+fn is_hex(c: char) -> bool {
+    ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')
+}
+
+/// Returns true if the given character is a valid in a capture group name.
+///
+/// If `first` is true, then `c` is treated as the first character in the
+/// group name (which must be alphabetic or underscore).
+fn is_capture_char(c: char, first: bool) -> bool {
+    if first {
+        c == '_' || c.is_alphabetic()
+    } else {
+        c == '_' || c == '.' || c == '[' || c == ']' || c.is_alphanumeric()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn p(pattern: &str) -> Hir {
+        Parser::new(Config::default(), pattern).parse().unwrap()
+    }
+
+    fn perr(pattern: &str) -> String {
+        Parser::new(Config::default(), pattern)
+            .parse()
+            .unwrap_err()
+            .to_string()
+    }
+
+    fn class<I: IntoIterator<Item = (char, char)>>(it: I) -> Hir {
+        Hir::class(hir::Class::new(
+            it.into_iter().map(|(start, end)| hir::ClassRange { start, end }),
+        ))
+    }
+
+    fn singles<I: IntoIterator<Item = char>>(it: I) -> Hir {
+        Hir::class(hir::Class::new(
+            it.into_iter().map(|ch| hir::ClassRange { start: ch, end: ch }),
+        ))
+    }
+
+    fn posix(name: &str) -> Hir {
+        Hir::class(hir::Class::new(posix_class(name).unwrap()))
+    }
+
+    fn cap(index: u32, sub: Hir) -> Hir {
+        Hir::capture(hir::Capture { index, name: None, sub: Box::new(sub) })
+    }
+
+    fn named_cap(index: u32, name: &str, sub: Hir) -> Hir {
+        Hir::capture(hir::Capture {
+            index,
+            name: Some(Box::from(name)),
+            sub: Box::new(sub),
+        })
+    }
+
+    #[test]
+    fn ok_literal() {
+        assert_eq!(p("a"), Hir::char('a'));
+        assert_eq!(p("ab"), Hir::concat(vec![Hir::char('a'), Hir::char('b')]));
+        assert_eq!(p("💩"), Hir::char('💩'));
+    }
+
+    #[test]
+    fn ok_meta_escapes() {
+        assert_eq!(p(r"\*"), Hir::char('*'));
+        assert_eq!(p(r"\+"), Hir::char('+'));
+        assert_eq!(p(r"\?"), Hir::char('?'));
+        assert_eq!(p(r"\|"), Hir::char('|'));
+        assert_eq!(p(r"\("), Hir::char('('));
+        assert_eq!(p(r"\)"), Hir::char(')'));
+        assert_eq!(p(r"\^"), Hir::char('^'));
+        assert_eq!(p(r"\$"), Hir::char('$'));
+        assert_eq!(p(r"\["), Hir::char('['));
+        assert_eq!(p(r"\]"), Hir::char(']'));
+    }
+
+    #[test]
+    fn ok_special_escapes() {
+        assert_eq!(p(r"\a"), Hir::char('\x07'));
+        assert_eq!(p(r"\f"), Hir::char('\x0C'));
+        assert_eq!(p(r"\t"), Hir::char('\t'));
+        assert_eq!(p(r"\n"), Hir::char('\n'));
+        assert_eq!(p(r"\r"), Hir::char('\r'));
+        assert_eq!(p(r"\v"), Hir::char('\x0B'));
+        assert_eq!(p(r"\A"), Hir::look(hir::Look::Start));
+        assert_eq!(p(r"\z"), Hir::look(hir::Look::End));
+        assert_eq!(p(r"\b"), Hir::look(hir::Look::Word));
+        assert_eq!(p(r"\B"), Hir::look(hir::Look::WordNegate));
+    }
+
+    #[test]
+    fn ok_hex() {
+        // fixed length
+        assert_eq!(p(r"\x41"), Hir::char('A'));
+        assert_eq!(p(r"\u2603"), Hir::char('☃'));
+        assert_eq!(p(r"\U0001F4A9"), Hir::char('💩'));
+        // braces
+        assert_eq!(p(r"\x{1F4A9}"), Hir::char('💩'));
+        assert_eq!(p(r"\u{1F4A9}"), Hir::char('💩'));
+        assert_eq!(p(r"\U{1F4A9}"), Hir::char('💩'));
+    }
+
+    #[test]
+    fn ok_perl() {
+        assert_eq!(p(r"\d"), posix("digit"));
+        assert_eq!(p(r"\s"), posix("space"));
+        assert_eq!(p(r"\w"), posix("word"));
+
+        let negated = |name| {
+            let mut class = hir::Class::new(posix_class(name).unwrap());
+            class.negate();
+            Hir::class(class)
+        };
+        assert_eq!(p(r"\D"), negated("digit"));
+        assert_eq!(p(r"\S"), negated("space"));
+        assert_eq!(p(r"\W"), negated("word"));
+    }
+
+    #[test]
+    fn ok_flags_and_primitives() {
+        assert_eq!(p(r"a"), Hir::char('a'));
+        assert_eq!(p(r"(?i:a)"), singles(['A', 'a']));
+
+        assert_eq!(p(r"^"), Hir::look(hir::Look::Start));
+        assert_eq!(p(r"(?m:^)"), Hir::look(hir::Look::StartLF));
+        assert_eq!(p(r"(?mR:^)"), Hir::look(hir::Look::StartCRLF));
+
+        assert_eq!(p(r"$"), Hir::look(hir::Look::End));
+        assert_eq!(p(r"(?m:$)"), Hir::look(hir::Look::EndLF));
+        assert_eq!(p(r"(?mR:$)"), Hir::look(hir::Look::EndCRLF));
+
+        assert_eq!(p(r"."), class([('\x00', '\x09'), ('\x0B', '\u{10FFFF}')]));
+        assert_eq!(
+            p(r"(?R:.)"),
+            class([
+                ('\x00', '\x09'),
+                ('\x0B', '\x0C'),
+                ('\x0E', '\u{10FFFF}'),
+            ])
+        );
+        assert_eq!(p(r"(?s:.)"), class([('\x00', '\u{10FFFF}')]));
+        assert_eq!(p(r"(?sR:.)"), class([('\x00', '\u{10FFFF}')]));
+    }
+
+    #[test]
+    fn ok_alternate() {
+        assert_eq!(
+            p(r"a|b"),
+            Hir::alternation(vec![Hir::char('a'), Hir::char('b')])
+        );
+        assert_eq!(
+            p(r"(?:a|b)"),
+            Hir::alternation(vec![Hir::char('a'), Hir::char('b')])
+        );
+
+        assert_eq!(
+            p(r"(a|b)"),
+            cap(1, Hir::alternation(vec![Hir::char('a'), Hir::char('b')]))
+        );
+        assert_eq!(
+            p(r"(?<foo>a|b)"),
+            named_cap(
+                1,
+                "foo",
+                Hir::alternation(vec![Hir::char('a'), Hir::char('b')])
+            )
+        );
+
+        assert_eq!(
+            p(r"a|b|c"),
+            Hir::alternation(vec![
+                Hir::char('a'),
+                Hir::char('b'),
+                Hir::char('c')
+            ])
+        );
+
+        assert_eq!(
+            p(r"ax|by|cz"),
+            Hir::alternation(vec![
+                Hir::concat(vec![Hir::char('a'), Hir::char('x')]),
+                Hir::concat(vec![Hir::char('b'), Hir::char('y')]),
+                Hir::concat(vec![Hir::char('c'), Hir::char('z')]),
+            ])
+        );
+        assert_eq!(
+            p(r"(ax|(by|(cz)))"),
+            cap(
+                1,
+                Hir::alternation(vec![
+                    Hir::concat(vec![Hir::char('a'), Hir::char('x')]),
+                    cap(
+                        2,
+                        Hir::alternation(vec![
+                            Hir::concat(vec![Hir::char('b'), Hir::char('y')]),
+                            cap(
+                                3,
+                                Hir::concat(vec![
+                                    Hir::char('c'),
+                                    Hir::char('z')
+                                ])
+                            ),
+                        ])
+                    ),
+                ])
+            )
+        );
+
+        assert_eq!(
+            p(r"|"),
+            Hir::alternation(vec![Hir::empty(), Hir::empty()])
+        );
+        assert_eq!(
+            p(r"||"),
+            Hir::alternation(vec![Hir::empty(), Hir::empty(), Hir::empty()])
+        );
+
+        assert_eq!(
+            p(r"a|"),
+            Hir::alternation(vec![Hir::char('a'), Hir::empty()])
+        );
+        assert_eq!(
+            p(r"|a"),
+            Hir::alternation(vec![Hir::empty(), Hir::char('a')])
+        );
+
+        assert_eq!(
+            p(r"(|)"),
+            cap(1, Hir::alternation(vec![Hir::empty(), Hir::empty()]))
+        );
+        assert_eq!(
+            p(r"(a|)"),
+            cap(1, Hir::alternation(vec![Hir::char('a'), Hir::empty()]))
+        );
+        assert_eq!(
+            p(r"(|a)"),
+            cap(1, Hir::alternation(vec![Hir::empty(), Hir::char('a')]))
+        );
+    }
+
+    #[test]
+    fn ok_flag_group() {
+        assert_eq!(
+            p("a(?i:b)"),
+            Hir::concat(vec![Hir::char('a'), singles(['B', 'b'])])
+        );
+    }
+
+    #[test]
+    fn ok_flag_directive() {
+        assert_eq!(p("(?i)a"), singles(['A', 'a']));
+        assert_eq!(p("a(?i)"), Hir::char('a'));
+        assert_eq!(
+            p("a(?i)b"),
+            Hir::concat(vec![Hir::char('a'), singles(['B', 'b'])])
+        );
+        assert_eq!(
+            p("a(?i)a(?-i)a"),
+            Hir::concat(vec![
+                Hir::char('a'),
+                singles(['A', 'a']),
+                Hir::char('a'),
+            ])
+        );
+        assert_eq!(
+            p("a(?:(?i)a)a"),
+            Hir::concat(vec![
+                Hir::char('a'),
+                singles(['A', 'a']),
+                Hir::char('a'),
+            ])
+        );
+        assert_eq!(
+            p("a((?i)a)a"),
+            Hir::concat(vec![
+                Hir::char('a'),
+                cap(1, singles(['A', 'a'])),
+                Hir::char('a'),
+            ])
+        );
+    }
+
+    #[test]
+    fn ok_uncounted_repetition() {
+        assert_eq!(
+            p(r"a?"),
+            Hir::repetition(hir::Repetition {
+                min: 0,
+                max: Some(1),
+                greedy: true,
+                sub: Box::new(Hir::char('a')),
+            }),
+        );
+        assert_eq!(
+            p(r"a*"),
+            Hir::repetition(hir::Repetition {
+                min: 0,
+                max: None,
+                greedy: true,
+                sub: Box::new(Hir::char('a')),
+            }),
+        );
+        assert_eq!(
+            p(r"a+"),
+            Hir::repetition(hir::Repetition {
+                min: 1,
+                max: None,
+                greedy: true,
+                sub: Box::new(Hir::char('a')),
+            }),
+        );
+
+        assert_eq!(
+            p(r"a??"),
+            Hir::repetition(hir::Repetition {
+                min: 0,
+                max: Some(1),
+                greedy: false,
+                sub: Box::new(Hir::char('a')),
+            }),
+        );
+        assert_eq!(
+            p(r"a*?"),
+            Hir::repetition(hir::Repetition {
+                min: 0,
+                max: None,
+                greedy: false,
+                sub: Box::new(Hir::char('a')),
+            }),
+        );
+        assert_eq!(
+            p(r"a+?"),
+            Hir::repetition(hir::Repetition {
+                min: 1,
+                max: None,
+                greedy: false,
+                sub: Box::new(Hir::char('a')),
+            }),
+        );
+
+        assert_eq!(
+            p(r"a?b"),
+            Hir::concat(vec![
+                Hir::repetition(hir::Repetition {
+                    min: 0,
+                    max: Some(1),
+                    greedy: true,
+                    sub: Box::new(Hir::char('a')),
+                }),
+                Hir::char('b'),
+            ]),
+        );
+
+        assert_eq!(
+            p(r"ab?"),
+            Hir::concat(vec![
+                Hir::char('a'),
+                Hir::repetition(hir::Repetition {
+                    min: 0,
+                    max: Some(1),
+                    greedy: true,
+                    sub: Box::new(Hir::char('b')),
+                }),
+            ]),
+        );
+
+        assert_eq!(
+            p(r"(?:ab)?"),
+            Hir::repetition(hir::Repetition {
+                min: 0,
+                max: Some(1),
+                greedy: true,
+                sub: Box::new(Hir::concat(vec![
+                    Hir::char('a'),
+                    Hir::char('b')
+                ])),
+            }),
+        );
+
+        assert_eq!(
+            p(r"(ab)?"),
+            Hir::repetition(hir::Repetition {
+                min: 0,
+                max: Some(1),
+                greedy: true,
+                sub: Box::new(cap(
+                    1,
+                    Hir::concat(vec![Hir::char('a'), Hir::char('b')])
+                )),
+            }),
+        );
+
+        assert_eq!(
+            p(r"|a?"),
+            Hir::alternation(vec![
+                Hir::empty(),
+                Hir::repetition(hir::Repetition {
+                    min: 0,
+                    max: Some(1),
+                    greedy: true,
+                    sub: Box::new(Hir::char('a')),
+                })
+            ]),
+        );
+    }
+
+    #[test]
+    fn ok_counted_repetition() {
+        assert_eq!(
+            p(r"a{5}"),
+            Hir::repetition(hir::Repetition {
+                min: 5,
+                max: Some(5),
+                greedy: true,
+                sub: Box::new(Hir::char('a')),
+            }),
+        );
+        assert_eq!(
+            p(r"a{5}?"),
+            Hir::repetition(hir::Repetition {
+                min: 5,
+                max: Some(5),
+                greedy: false,
+                sub: Box::new(Hir::char('a')),
+            }),
+        );
+
+        assert_eq!(
+            p(r"a{5,}"),
+            Hir::repetition(hir::Repetition {
+                min: 5,
+                max: None,
+                greedy: true,
+                sub: Box::new(Hir::char('a')),
+            }),
+        );
+
+        assert_eq!(
+            p(r"a{5,9}"),
+            Hir::repetition(hir::Repetition {
+                min: 5,
+                max: Some(9),
+                greedy: true,
+                sub: Box::new(Hir::char('a')),
+            }),
+        );
+
+        assert_eq!(
+            p(r"ab{5}c"),
+            Hir::concat(vec![
+                Hir::char('a'),
+                Hir::repetition(hir::Repetition {
+                    min: 5,
+                    max: Some(5),
+                    greedy: true,
+                    sub: Box::new(Hir::char('b')),
+                }),
+                Hir::char('c'),
+            ]),
+        );
+
+        assert_eq!(
+            p(r"a{ 5 }"),
+            Hir::repetition(hir::Repetition {
+                min: 5,
+                max: Some(5),
+                greedy: true,
+                sub: Box::new(Hir::char('a')),
+            }),
+        );
+        assert_eq!(
+            p(r"a{ 5 , 9 }"),
+            Hir::repetition(hir::Repetition {
+                min: 5,
+                max: Some(9),
+                greedy: true,
+                sub: Box::new(Hir::char('a')),
+            }),
+        );
+    }
+
+    #[test]
+    fn ok_group_unnamed() {
+        assert_eq!(p("(a)"), cap(1, Hir::char('a')));
+        assert_eq!(
+            p("(ab)"),
+            cap(1, Hir::concat(vec![Hir::char('a'), Hir::char('b')]))
+        );
+    }
+
+    #[test]
+    fn ok_group_named() {
+        assert_eq!(p("(?P<foo>a)"), named_cap(1, "foo", Hir::char('a')));
+        assert_eq!(p("(?<foo>a)"), named_cap(1, "foo", Hir::char('a')));
+
+        assert_eq!(
+            p("(?P<foo>ab)"),
+            named_cap(
+                1,
+                "foo",
+                Hir::concat(vec![Hir::char('a'), Hir::char('b')])
+            )
+        );
+        assert_eq!(
+            p("(?<foo>ab)"),
+            named_cap(
+                1,
+                "foo",
+                Hir::concat(vec![Hir::char('a'), Hir::char('b')])
+            )
+        );
+
+        assert_eq!(p(r"(?<a>z)"), named_cap(1, "a", Hir::char('z')));
+        assert_eq!(p(r"(?P<a>z)"), named_cap(1, "a", Hir::char('z')));
+
+        assert_eq!(p(r"(?<a_1>z)"), named_cap(1, "a_1", Hir::char('z')));
+        assert_eq!(p(r"(?P<a_1>z)"), named_cap(1, "a_1", Hir::char('z')));
+
+        assert_eq!(p(r"(?<a.1>z)"), named_cap(1, "a.1", Hir::char('z')));
+        assert_eq!(p(r"(?P<a.1>z)"), named_cap(1, "a.1", Hir::char('z')));
+
+        assert_eq!(p(r"(?<a[1]>z)"), named_cap(1, "a[1]", Hir::char('z')));
+        assert_eq!(p(r"(?P<a[1]>z)"), named_cap(1, "a[1]", Hir::char('z')));
+
+        assert_eq!(p(r"(?<a¾>z)"), named_cap(1, "a¾", Hir::char('z')));
+        assert_eq!(p(r"(?P<a¾>z)"), named_cap(1, "a¾", Hir::char('z')));
+
+        assert_eq!(p(r"(?<名字>z)"), named_cap(1, "名字", Hir::char('z')));
+        assert_eq!(p(r"(?P<名字>z)"), named_cap(1, "名字", Hir::char('z')));
+    }
+
+    #[test]
+    fn ok_class() {
+        assert_eq!(p(r"[a]"), singles(['a']));
+        assert_eq!(p(r"[a\]]"), singles(['a', ']']));
+        assert_eq!(p(r"[a\-z]"), singles(['a', '-', 'z']));
+        assert_eq!(p(r"[ab]"), class([('a', 'b')]));
+        assert_eq!(p(r"[a-]"), singles(['a', '-']));
+        assert_eq!(p(r"[-a]"), singles(['a', '-']));
+        assert_eq!(p(r"[--a]"), singles(['a', '-']));
+        assert_eq!(p(r"[---a]"), singles(['a', '-']));
+        assert_eq!(p(r"[[:alnum:]]"), posix("alnum"));
+        assert_eq!(p(r"[\w]"), posix("word"));
+        assert_eq!(p(r"[a\wz]"), posix("word"));
+        assert_eq!(p(r"[\s\S]"), class([('\x00', '\u{10FFFF}')]));
+        assert_eq!(p(r"[^\s\S]"), Hir::fail());
+        assert_eq!(p(r"[a-cx-z]"), class([('a', 'c'), ('x', 'z')]));
+        assert_eq!(p(r"[☃-⛄]"), class([('☃', '⛄')]));
+        assert_eq!(p(r"[]]"), singles([']']));
+        assert_eq!(p(r"[]a]"), singles([']', 'a']));
+        assert_eq!(p(r"[]\[]"), singles(['[', ']']));
+        assert_eq!(p(r"[\[]"), singles(['[']));
+
+        assert_eq!(p(r"(?i)[a]"), singles(['A', 'a']));
+        assert_eq!(p(r"(?i)[A]"), singles(['A', 'a']));
+        assert_eq!(p(r"(?i)[k]"), singles(['K', 'k']));
+        assert_eq!(p(r"(?i)[s]"), singles(['S', 's']));
+        assert_eq!(p(r"(?i)[β]"), singles(['β']));
+
+        assert_eq!(p(r"[^^]"), class([('\x00', ']'), ('_', '\u{10FFFF}')]));
+        assert_eq!(
+            p(r"[^-a]"),
+            class([('\x00', ','), ('.', '`'), ('b', '\u{10FFFF}')])
+        );
+
+        assert_eq!(
+            p(r"[-]a]"),
+            Hir::concat(vec![singles(['-']), Hir::char('a'), Hir::char(']')])
+        );
+    }
+
+    #[test]
+    fn ok_verbatim() {
+        assert_eq!(
+            p(r"(?x)a{5,9} ?"),
+            Hir::repetition(hir::Repetition {
+                min: 5,
+                max: Some(9),
+                greedy: false,
+                sub: Box::new(Hir::char('a')),
+            })
+        );
+        assert_eq!(p(r"(?x)[   a]"), singles(['a']));
+        assert_eq!(
+            p(r"(?x)[ ^  a]"),
+            class([('\x00', '`'), ('b', '\u{10FFFF}')])
+        );
+        assert_eq!(p(r"(?x)[ - a]"), singles(['a', '-']));
+        assert_eq!(p(r"(?x)[ ] a]"), singles([']', 'a']));
+
+        assert_eq!(
+            p(r"(?x)a b"),
+            Hir::concat(vec![Hir::char('a'), Hir::char('b')])
+        );
+        assert_eq!(
+            p(r"(?x)a b(?-x)a b"),
+            Hir::concat(vec![
+                Hir::char('a'),
+                Hir::char('b'),
+                Hir::char('a'),
+                Hir::char(' '),
+                Hir::char('b'),
+            ])
+        );
+        assert_eq!(
+            p(r"a (?x:a )a "),
+            Hir::concat(vec![
+                Hir::char('a'),
+                Hir::char(' '),
+                Hir::char('a'),
+                Hir::char('a'),
+                Hir::char(' '),
+            ])
+        );
+        assert_eq!(
+            p(r"(?x)( ?P<foo> a )"),
+            named_cap(1, "foo", Hir::char('a')),
+        );
+        assert_eq!(p(r"(?x)(  a )"), cap(1, Hir::char('a')));
+        assert_eq!(p(r"(?x)(   ?:  a )"), Hir::char('a'));
+        assert_eq!(p(r"(?x)\x { 53 }"), Hir::char('\x53'));
+        assert_eq!(p(r"(?x)\ "), Hir::char(' '));
+    }
+
+    #[test]
+    fn ok_comments() {
+        let pat = "(?x)
+# This is comment 1.
+foo # This is comment 2.
+  # This is comment 3.
+bar
+# This is comment 4.";
+        assert_eq!(
+            p(pat),
+            Hir::concat(vec![
+                Hir::char('f'),
+                Hir::char('o'),
+                Hir::char('o'),
+                Hir::char('b'),
+                Hir::char('a'),
+                Hir::char('r'),
+            ])
+        );
+    }
+
+    #[test]
+    fn err_standard() {
+        assert_eq!(
+            ERR_TOO_MUCH_NESTING,
+            perr("(((((((((((((((((((((((((((((((((((((((((((((((((a)))))))))))))))))))))))))))))))))))))))))))))))))"),
+        );
+        // This one is tricky, because the only way it can happen is if the
+        // number of captures overflows u32. Perhaps we should allow setting a
+        // lower limit?
+        // assert_eq!(ERR_TOO_MANY_CAPTURES, perr(""));
+        assert_eq!(ERR_DUPLICATE_CAPTURE_NAME, perr(r"(?P<a>y)(?P<a>z)"));
+        assert_eq!(ERR_UNCLOSED_GROUP, perr("("));
+        assert_eq!(ERR_UNCLOSED_GROUP_QUESTION, perr("(?"));
+        assert_eq!(ERR_UNOPENED_GROUP, perr(")"));
+        assert_eq!(ERR_LOOK_UNSUPPORTED, perr(r"(?=a)"));
+        assert_eq!(ERR_LOOK_UNSUPPORTED, perr(r"(?!a)"));
+        assert_eq!(ERR_LOOK_UNSUPPORTED, perr(r"(?<=a)"));
+        assert_eq!(ERR_LOOK_UNSUPPORTED, perr(r"(?<!a)"));
+        assert_eq!(ERR_EMPTY_FLAGS, perr(r"(?)"));
+        assert_eq!(ERR_MISSING_GROUP_NAME, perr(r"(?P<"));
+        assert_eq!(ERR_MISSING_GROUP_NAME, perr(r"(?<"));
+        assert_eq!(ERR_INVALID_GROUP_NAME, perr(r"(?P<1abc>z)"));
+        assert_eq!(ERR_INVALID_GROUP_NAME, perr(r"(?<1abc>z)"));
+        assert_eq!(ERR_INVALID_GROUP_NAME, perr(r"(?<¾>z)"));
+        assert_eq!(ERR_INVALID_GROUP_NAME, perr(r"(?<¾a>z)"));
+        assert_eq!(ERR_INVALID_GROUP_NAME, perr(r"(?<☃>z)"));
+        assert_eq!(ERR_INVALID_GROUP_NAME, perr(r"(?<a☃>z)"));
+        assert_eq!(ERR_UNCLOSED_GROUP_NAME, perr(r"(?P<foo"));
+        assert_eq!(ERR_UNCLOSED_GROUP_NAME, perr(r"(?<foo"));
+        assert_eq!(ERR_EMPTY_GROUP_NAME, perr(r"(?P<>z)"));
+        assert_eq!(ERR_EMPTY_GROUP_NAME, perr(r"(?<>z)"));
+        assert_eq!(ERR_FLAG_UNRECOGNIZED, perr(r"(?z:foo)"));
+        assert_eq!(ERR_FLAG_REPEATED_NEGATION, perr(r"(?s-i-R)"));
+        assert_eq!(ERR_FLAG_DUPLICATE, perr(r"(?isi)"));
+        assert_eq!(ERR_FLAG_DUPLICATE, perr(r"(?is-i)"));
+        assert_eq!(ERR_FLAG_UNEXPECTED_EOF, perr(r"(?is"));
+        assert_eq!(ERR_FLAG_DANGLING_NEGATION, perr(r"(?is-:foo)"));
+        assert_eq!(ERR_HEX_BRACE_INVALID_DIGIT, perr(r"\x{Z}"));
+        assert_eq!(ERR_HEX_BRACE_UNEXPECTED_EOF, perr(r"\x{"));
+        assert_eq!(ERR_HEX_BRACE_UNEXPECTED_EOF, perr(r"\x{A"));
+        assert_eq!(ERR_HEX_BRACE_EMPTY, perr(r"\x{}"));
+        assert_eq!(ERR_HEX_BRACE_INVALID, perr(r"\x{FFFFFFFFFFFFFFFFF}"));
+        assert_eq!(ERR_HEX_FIXED_UNEXPECTED_EOF, perr(r"\xA"));
+        assert_eq!(ERR_HEX_FIXED_INVALID_DIGIT, perr(r"\xZ"));
+        assert_eq!(ERR_HEX_FIXED_INVALID_DIGIT, perr(r"\xZA"));
+        assert_eq!(ERR_HEX_FIXED_INVALID_DIGIT, perr(r"\xAZ"));
+        assert_eq!(ERR_HEX_FIXED_INVALID, perr(r"\uD800"));
+        assert_eq!(ERR_HEX_FIXED_INVALID, perr(r"\UFFFFFFFF"));
+        assert_eq!(ERR_HEX_UNEXPECTED_EOF, perr(r"\x"));
+        assert_eq!(ERR_ESCAPE_UNEXPECTED_EOF, perr(r"\"));
+        assert_eq!(ERR_BACKREF_UNSUPPORTED, perr(r"\0"));
+        assert_eq!(ERR_BACKREF_UNSUPPORTED, perr(r"\1"));
+        assert_eq!(ERR_BACKREF_UNSUPPORTED, perr(r"\8"));
+        assert_eq!(ERR_UNICODE_CLASS_UNSUPPORTED, perr(r"\pL"));
+        assert_eq!(ERR_UNICODE_CLASS_UNSUPPORTED, perr(r"\p{L}"));
+        assert_eq!(ERR_ESCAPE_UNRECOGNIZED, perr(r"\i"));
+        assert_eq!(ERR_ESCAPE_UNRECOGNIZED, perr(r"\<"));
+        assert_eq!(ERR_ESCAPE_UNRECOGNIZED, perr(r"\>"));
+        assert_eq!(ERR_UNCOUNTED_REP_SUB_MISSING, perr(r"?"));
+        assert_eq!(ERR_UNCOUNTED_REP_SUB_MISSING, perr(r"*"));
+        assert_eq!(ERR_UNCOUNTED_REP_SUB_MISSING, perr(r"+"));
+        assert_eq!(ERR_UNCOUNTED_REP_SUB_MISSING, perr(r"(+)"));
+        assert_eq!(ERR_UNCOUNTED_REP_SUB_MISSING, perr(r"|?"));
+        assert_eq!(ERR_UNCOUNTED_REP_SUB_MISSING, perr(r"(?i)?"));
+        assert_eq!(ERR_COUNTED_REP_SUB_MISSING, perr(r"{5}"));
+        assert_eq!(ERR_COUNTED_REP_SUB_MISSING, perr(r"({5})"));
+        assert_eq!(ERR_COUNTED_REP_SUB_MISSING, perr(r"(?i){5}"));
+        assert_eq!(ERR_COUNTED_REP_UNCLOSED, perr(r"a{"));
+        assert_eq!(ERR_COUNTED_REP_MIN_UNCLOSED, perr(r"a{5"));
+        assert_eq!(ERR_COUNTED_REP_COMMA_UNCLOSED, perr(r"a{5,"));
+        assert_eq!(ERR_COUNTED_REP_MIN_MAX_UNCLOSED, perr(r"a{5,6"));
+        assert_eq!(ERR_COUNTED_REP_INVALID, perr(r"a{5,6Z"));
+        assert_eq!(ERR_COUNTED_REP_INVALID_RANGE, perr(r"a{6,5}"));
+        assert_eq!(ERR_DECIMAL_NO_DIGITS, perr(r"a{}"));
+        assert_eq!(ERR_DECIMAL_NO_DIGITS, perr(r"a{]}"));
+        assert_eq!(ERR_DECIMAL_INVALID, perr(r"a{999999999999999}"));
+        assert_eq!(ERR_CLASS_UNCLOSED_AFTER_ITEM, perr(r"[a"));
+        assert_eq!(ERR_CLASS_INVALID_RANGE_ITEM, perr(r"[\w-a]"));
+        assert_eq!(ERR_CLASS_INVALID_RANGE_ITEM, perr(r"[a-\w]"));
+        assert_eq!(ERR_CLASS_INVALID_ITEM, perr(r"[\b]"));
+        assert_eq!(ERR_CLASS_UNCLOSED_AFTER_DASH, perr(r"[a-"));
+        assert_eq!(ERR_CLASS_UNCLOSED_AFTER_NEGATION, perr(r"[^"));
+        assert_eq!(ERR_CLASS_UNCLOSED_AFTER_CLOSING, perr(r"[]"));
+        assert_eq!(ERR_CLASS_INVALID_RANGE, perr(r"[z-a]"));
+        assert_eq!(ERR_CLASS_UNCLOSED, perr(r"["));
+        assert_eq!(ERR_CLASS_UNCLOSED, perr(r"[a-z"));
+        assert_eq!(ERR_CLASS_NEST_UNSUPPORTED, perr(r"[a-z[A-Z]]"));
+        assert_eq!(ERR_CLASS_NEST_UNSUPPORTED, perr(r"[[:alnum]]"));
+        assert_eq!(ERR_CLASS_INTERSECTION_UNSUPPORTED, perr(r"[a&&b]"));
+        assert_eq!(ERR_CLASS_DIFFERENCE_UNSUPPORTED, perr(r"[a--b]"));
+        assert_eq!(ERR_CLASS_SYMDIFFERENCE_UNSUPPORTED, perr(r"[a~~b]"));
+    }
+
+    #[test]
+    fn err_verbatim() {
+        // See: /~https://github.com/rust-lang/regex/issues/792
+        assert_eq!(ERR_CLASS_UNCLOSED_AFTER_DASH, perr(r"(?x)[-#]"));
+        assert_eq!(ERR_CLASS_UNCLOSED_AFTER_ITEM, perr(r"(?x)[a "));
+        assert_eq!(ERR_CLASS_UNCLOSED_AFTER_DASH, perr(r"(?x)[a- "));
+        assert_eq!(ERR_CLASS_UNCLOSED, perr(r"(?x)[         "));
+    }
+
+    // This tests a bug fix where the nest limit checker wasn't decrementing
+    // its depth during post-traversal, which causes long regexes to trip
+    // the default limit too aggressively.
+    #[test]
+    fn regression_454_nest_too_big() {
+        let pattern = r#"
+        2(?:
+          [45]\d{3}|
+          7(?:
+            1[0-267]|
+            2[0-289]|
+            3[0-29]|
+            4[01]|
+            5[1-3]|
+            6[013]|
+            7[0178]|
+            91
+          )|
+          8(?:
+            0[125]|
+            [139][1-6]|
+            2[0157-9]|
+            41|
+            6[1-35]|
+            7[1-5]|
+            8[1-8]|
+            90
+          )|
+          9(?:
+            0[0-2]|
+            1[0-4]|
+            2[568]|
+            3[3-6]|
+            5[5-7]|
+            6[0167]|
+            7[15]|
+            8[0146-9]
+          )
+        )\d{4}
+        "#;
+        p(pattern);
+    }
+
+    // This tests that we treat a trailing `-` in a character class as a
+    // literal `-` even when whitespace mode is enabled and there is whitespace
+    // after the trailing `-`.
+    #[test]
+    fn regression_455_trailing_dash_ignore_whitespace() {
+        p("(?x)[ / - ]");
+        p("(?x)[ a - ]");
+        p("(?x)[
+            a
+            - ]
+        ");
+        p("(?x)[
+            a # wat
+            - ]
+        ");
+
+        perr("(?x)[ / -");
+        perr("(?x)[ / - ");
+        perr(
+            "(?x)[
+            / -
+        ",
+        );
+        perr(
+            "(?x)[
+            / - # wat
+        ",
+        );
+    }
+}
diff --git a/regex-lite/src/int.rs b/regex-lite/src/int.rs
new file mode 100644
index 000000000..c369f0429
--- /dev/null
+++ b/regex-lite/src/int.rs
@@ -0,0 +1,56 @@
+use core::num::NonZeroUsize;
+
+/// An extension trait that adds routines to the `u32` primitive type.
+pub(crate) trait U32 {
+    fn as_usize(self) -> usize;
+}
+
+impl U32 for u32 {
+    fn as_usize(self) -> usize {
+        // OK because we require 32 or 64 bit targets. Therefore, every u32
+        // necessarily fits into a usize.
+        self as usize
+    }
+}
+
+/// A `usize` that can never be `usize::MAX`.
+///
+/// This is similar to `core::num::NonZeroUsize`, but instead of not permitting
+/// a zero value, this does not permit a max value.
+///
+/// This is useful in certain contexts where one wants to optimize the memory
+/// usage of things that contain match offsets. Namely, since Rust slices
+/// are guaranteed to never have a length exceeding `isize::MAX`, we can use
+/// `usize::MAX` as a sentinel to indicate that no match was found. Indeed,
+/// types like `Option<NonMaxUsize>` have exactly the same size in memory as a
+/// `usize`.
+///
+/// This type is defined to be `repr(transparent)` for
+/// `core::num::NonZeroUsize`, which is in turn defined to be
+/// `repr(transparent)` for `usize`.
+#[derive(Clone, Copy, Eq, Hash, PartialEq, PartialOrd, Ord)]
+#[repr(transparent)]
+pub(crate) struct NonMaxUsize(NonZeroUsize);
+
+impl NonMaxUsize {
+    /// Create a new `NonMaxUsize` from the given value.
+    ///
+    /// This returns `None` only when the given value is equal to `usize::MAX`.
+    pub(crate) fn new(value: usize) -> Option<NonMaxUsize> {
+        NonZeroUsize::new(value.wrapping_add(1)).map(NonMaxUsize)
+    }
+
+    /// Return the underlying `usize` value. The returned value is guaranteed
+    /// to not equal `usize::MAX`.
+    pub(crate) fn get(self) -> usize {
+        self.0.get().wrapping_sub(1)
+    }
+}
+
+// We provide our own Debug impl because seeing the internal repr can be quite
+// surprising if you aren't expecting it. e.g., 'NonMaxUsize(5)' vs just '5'.
+impl core::fmt::Debug for NonMaxUsize {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        write!(f, "{:?}", self.get())
+    }
+}
diff --git a/regex-lite/src/lib.rs b/regex-lite/src/lib.rs
new file mode 100644
index 000000000..a5747a20f
--- /dev/null
+++ b/regex-lite/src/lib.rs
@@ -0,0 +1,31 @@
+/*!
+TODO
+*/
+
+#![allow(warnings)]
+#![no_std]
+#![forbid(unsafe_code)]
+// #![deny(missing_docs, rustdoc::broken_intra_doc_links)]
+#![warn(missing_debug_implementations)]
+#![cfg_attr(docsrs, feature(doc_auto_cfg))]
+
+#[cfg(not(any(target_pointer_width = "32", target_pointer_width = "64")))]
+compile_error!("not supported on non-{32,64}, please file an issue");
+
+extern crate alloc;
+#[cfg(any(test, feature = "std"))]
+extern crate std;
+
+pub use self::{
+    string::Regex,
+    util::{is_escapeable_character, is_meta_character},
+};
+
+mod error;
+mod hir;
+mod int;
+mod nfa;
+mod pikevm;
+mod string;
+mod utf8;
+mod util;
diff --git a/regex-lite/src/nfa.rs b/regex-lite/src/nfa.rs
new file mode 100644
index 000000000..30c680f2a
--- /dev/null
+++ b/regex-lite/src/nfa.rs
@@ -0,0 +1,678 @@
+use core::{cell::RefCell, mem::size_of};
+
+use alloc::{sync::Arc, vec, vec::Vec};
+
+use crate::{
+    error::Error,
+    hir::{self, Hir, HirKind},
+    int::U32,
+};
+
+pub(crate) type StateID = u32;
+
+#[derive(Clone, Copy, Debug)]
+pub(crate) struct Config {
+    size_limit: Option<usize>,
+}
+
+impl Default for Config {
+    fn default() -> Config {
+        Config { size_limit: Some(10 * (1 << 20)) }
+    }
+}
+
+#[derive(Clone)]
+pub(crate) struct NFA {
+    /// The states that make up this NFA.
+    states: Vec<State>,
+    /// The ID of the start state.
+    start: StateID,
+    /// Whether this NFA can only match at the beginning of a haystack.
+    is_start_anchored: bool,
+    /// Whether this NFA can match the empty string.
+    is_match_empty: bool,
+    /// A map from capture group name to its corresponding index.
+    cap_name_to_index: CaptureNameMap,
+    /// A map from capture group index to the corresponding name, if one
+    /// exists.
+    cap_index_to_name: Vec<Option<Arc<str>>>,
+    /// Heap memory used indirectly by NFA states and other things (like the
+    /// various capturing group representations above). Since each state
+    /// might use a different amount of heap, we need to keep track of this
+    /// incrementally.
+    memory_extra: usize,
+}
+
+impl NFA {
+    /// Creates a new NFA from the given configuration and HIR.
+    pub(crate) fn new(config: Config, hir: &Hir) -> Result<NFA, Error> {
+        Compiler::new(config).compile(hir)
+    }
+
+    /// Returns the state corresponding to the given ID.
+    ///
+    /// # Panics
+    ///
+    /// If the ID does not refer to a valid state, then this panics.
+    pub(crate) fn state(&self, id: StateID) -> &State {
+        &self.states[id.as_usize()]
+    }
+
+    /// Returns the total number of states in this NFA.
+    pub(crate) fn len(&self) -> usize {
+        self.states.len()
+    }
+
+    /// Returns the ID of the starting state for this NFA.
+    pub(crate) fn start(&self) -> StateID {
+        self.start
+    }
+
+    /// Returns the capture group index for the corresponding named group.
+    /// If no such group with the given name exists, then `None` is returned.
+    pub(crate) fn to_index(&self, name: &str) -> Option<usize> {
+        self.cap_name_to_index.get(name).cloned().map(|i| i.as_usize())
+    }
+
+    /// Returns the capture group name for the corresponding capture group
+    /// index. If no such group, then `None` is returned.
+    pub(crate) fn to_name(&self, index: usize) -> Option<&str> {
+        self.cap_index_to_name.get(index)?.as_deref()
+    }
+
+    /// Returns an iterator over all of the capture groups, along with their
+    /// names if they exist, in this NFA.
+    pub(crate) fn capture_names(&self) -> CaptureNames<'_> {
+        CaptureNames { it: self.cap_index_to_name.iter() }
+    }
+
+    /// Returns the total number of capture groups, including the first and
+    /// implicit group, in this NFA.
+    pub(crate) fn group_len(&self) -> usize {
+        self.cap_index_to_name.len()
+    }
+
+    /// Returns true if and only if this NFA can only match at the beginning of
+    /// a haystack.
+    pub(crate) fn is_start_anchored(&self) -> bool {
+        self.is_start_anchored
+    }
+
+    /// Returns true if and only if this NFA can match the empty string.
+    pub(crate) fn is_match_empty(&self) -> bool {
+        self.is_match_empty
+    }
+
+    /// Returns the heap memory usage, in bytes, used by this NFA.
+    fn memory_usage(&self) -> usize {
+        (self.states.len() * size_of::<State>())
+            + (self.cap_index_to_name.len() * size_of::<Option<Arc<str>>>())
+            + self.memory_extra
+    }
+}
+
+impl core::fmt::Debug for NFA {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        writeln!(f, "NFA(")?;
+        for (sid, state) in self.states.iter().enumerate() {
+            writeln!(f, "{:06?}: {:?}", sid, state)?;
+        }
+        writeln!(f, ")")?;
+        Ok(())
+    }
+}
+
+/// An iterator over all capture groups in an NFA.
+///
+/// If a particular group has a name, then it is yielded. Otherwise, `None`
+/// is yielded.
+#[derive(Clone, Debug)]
+pub(crate) struct CaptureNames<'a> {
+    it: core::slice::Iter<'a, Option<Arc<str>>>,
+}
+
+impl<'a> Iterator for CaptureNames<'a> {
+    type Item = Option<&'a str>;
+
+    fn next(&mut self) -> Option<Option<&'a str>> {
+        self.it.next().map(|n| n.as_deref())
+    }
+}
+
+#[derive(Clone, Eq, PartialEq)]
+pub(crate) enum State {
+    Char { target: StateID, ch: char },
+    Ranges { target: StateID, ranges: Vec<(char, char)> },
+    Splits { targets: Vec<StateID>, reverse: bool },
+    Goto { target: StateID, look: Option<hir::Look> },
+    Capture { target: StateID, slot: u32 },
+    Fail,
+    Match,
+}
+
+impl State {
+    /// Returns the heap memory usage of this NFA state in bytes.
+    fn memory_usage(&self) -> usize {
+        match *self {
+            State::Char { .. }
+            | State::Goto { .. }
+            | State::Capture { .. }
+            | State::Fail { .. }
+            | State::Match => 0,
+            State::Splits { ref targets, .. } => {
+                targets.len() * size_of::<StateID>()
+            }
+            State::Ranges { ref ranges, .. } => {
+                ranges.len() * size_of::<(char, char)>()
+            }
+        }
+    }
+
+    /// Returns an iterator over the given split targets. The order of the
+    /// iterator yields elements in reverse when `reverse` is true.
+    pub(crate) fn iter_splits<'a>(
+        splits: &'a [StateID],
+        reverse: bool,
+    ) -> impl Iterator<Item = StateID> + 'a {
+        let mut it = splits.iter();
+        core::iter::from_fn(move || {
+            if reverse { it.next_back() } else { it.next() }.copied()
+        })
+    }
+}
+
+impl core::fmt::Debug for State {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        match *self {
+            State::Char { target, ch } => {
+                write!(f, "{:?} => {:?}", ch, target)
+            }
+            State::Ranges { target, ref ranges } => {
+                for (i, &(start, end)) in ranges.iter().enumerate() {
+                    if i > 0 {
+                        write!(f, ", ")?;
+                    }
+                    write!(f, "{:?}-{:?} => {:?}", start, end, target)?;
+                }
+                Ok(())
+            }
+            State::Splits { ref targets, reverse } => {
+                write!(f, "splits(")?;
+                for (i, sid) in
+                    State::iter_splits(targets, reverse).enumerate()
+                {
+                    if i > 0 {
+                        write!(f, ", ")?;
+                    }
+                    write!(f, "{:?}", sid)?;
+                }
+                write!(f, ")")
+            }
+            State::Goto { target, look } => {
+                write!(f, "{:?} => {:?}", look, target)
+            }
+            State::Capture { target, slot } => {
+                write!(f, "capture(slot={:?}) => {:?}", slot, target,)
+            }
+            State::Fail => write!(f, "FAIL"),
+            State::Match => {
+                write!(f, "MATCH")
+            }
+        }
+    }
+}
+
+/// A map from capture group name to its corresponding capture group index.
+///
+/// We define a type alias here so that we can transparently use a `HashMap`
+/// whenever it's available. We do so presumably because it's faster, although
+/// there are no benchmarks verifying this.
+#[cfg(feature = "std")]
+type CaptureNameMap = std::collections::HashMap<Arc<str>, u32>;
+#[cfg(not(feature = "std"))]
+type CaptureNameMap = alloc::collections::BTreeMap<Arc<str>, u32>;
+
+#[derive(Debug)]
+struct Compiler {
+    config: Config,
+    nfa: RefCell<NFA>,
+}
+
+impl Compiler {
+    fn new(config: Config) -> Compiler {
+        let nfa = RefCell::new(NFA {
+            states: vec![],
+            start: 0,
+            is_start_anchored: false,
+            is_match_empty: false,
+            cap_name_to_index: CaptureNameMap::default(),
+            cap_index_to_name: vec![],
+            memory_extra: 0,
+        });
+        Compiler { config, nfa }
+    }
+
+    fn compile(self, hir: &Hir) -> Result<NFA, Error> {
+        self.nfa.borrow_mut().is_start_anchored = hir.is_start_anchored();
+        self.nfa.borrow_mut().is_match_empty = hir.is_match_empty();
+        let compiled = self.c_capture(0, None, hir)?;
+        let mat = self.add(State::Match)?;
+        self.patch(compiled.end, mat);
+        self.nfa.borrow_mut().start = compiled.start;
+        Ok(self.nfa.into_inner())
+    }
+
+    fn c(&self, hir: &Hir) -> Result<ThompsonRef, Error> {
+        match *hir.kind() {
+            HirKind::Empty => self.c_empty(),
+            HirKind::Char(ch) => self.c_char(ch),
+            HirKind::Class(ref class) => self.c_class(class),
+            HirKind::Look(ref look) => self.c_look(look),
+            HirKind::Repetition(ref rep) => self.c_repetition(rep),
+            HirKind::Capture(ref cap) => {
+                self.c_capture(cap.index, cap.name.as_deref(), &cap.sub)
+            }
+            HirKind::Concat(ref subs) => {
+                self.c_concat(subs.iter().map(|s| self.c(s)))
+            }
+            HirKind::Alternation(ref subs) => {
+                self.c_alternation(subs.iter().map(|s| self.c(s)))
+            }
+        }
+    }
+
+    /// Compile a "fail" state that can never be transitioned out of.
+    fn c_fail(&self) -> Result<ThompsonRef, Error> {
+        let id = self.add(State::Fail)?;
+        Ok(ThompsonRef { start: id, end: id })
+    }
+
+    /// Compile an "empty" state with one unconditional epsilon transition.
+    ///
+    /// Both the `start` and `end` locations point to the state created.
+    /// Callers will likely want to keep the `start`, but patch the `end` to
+    /// point to some other state.
+    fn c_empty(&self) -> Result<ThompsonRef, Error> {
+        let id = self.add_empty()?;
+        Ok(ThompsonRef { start: id, end: id })
+    }
+
+    /// Compile the given literal char to an NFA.
+    fn c_char(&self, ch: char) -> Result<ThompsonRef, Error> {
+        let id = self.add(State::Char { target: 0, ch })?;
+        Ok(ThompsonRef { start: id, end: id })
+    }
+
+    /// Compile the given character class into an NFA.
+    ///
+    /// If the class is empty, then this compiles to a `Fail` state.
+    fn c_class(&self, class: &hir::Class) -> Result<ThompsonRef, Error> {
+        let id = if class.ranges.is_empty() {
+            // Technically using an explicit fail state probably isn't
+            // necessary. Because if you try to match against an empty Ranges,
+            // then it should turn up with nothing regardless of input, and
+            // thus "acts" like a Fail state. But it's better to be more
+            // explicit, and there's no real cost to doing so.
+            self.add(State::Fail)
+        } else {
+            let ranges =
+                class.ranges.iter().map(|r| (r.start, r.end)).collect();
+            self.add(State::Ranges { target: 0, ranges })
+        }?;
+        Ok(ThompsonRef { start: id, end: id })
+    }
+
+    /// Compile the given HIR look-around assertion to an NFA look-around
+    /// assertion.
+    fn c_look(&self, look: &hir::Look) -> Result<ThompsonRef, Error> {
+        let id = self.add(State::Goto { target: 0, look: Some(*look) })?;
+        Ok(ThompsonRef { start: id, end: id })
+    }
+
+    /// Compile the given repetition expression. This handles all types of
+    /// repetitions and greediness.
+    fn c_repetition(
+        &self,
+        rep: &hir::Repetition,
+    ) -> Result<ThompsonRef, Error> {
+        match (rep.min, rep.max) {
+            (0, Some(1)) => self.c_zero_or_one(&rep.sub, rep.greedy),
+            (min, None) => self.c_at_least(&rep.sub, rep.greedy, min),
+            (min, Some(max)) if min == max => self.c_exactly(&rep.sub, min),
+            (min, Some(max)) => self.c_bounded(&rep.sub, rep.greedy, min, max),
+        }
+    }
+
+    /// Compile the given expression such that it matches at least `min` times,
+    /// but no more than `max` times.
+    ///
+    /// When `greedy` is true, then the preference is for the expression to
+    /// match as much as possible. Otheriwse, it will match as little as
+    /// possible.
+    fn c_bounded(
+        &self,
+        hir: &Hir,
+        greedy: bool,
+        min: u32,
+        max: u32,
+    ) -> Result<ThompsonRef, Error> {
+        let prefix = self.c_exactly(hir, min)?;
+        if min == max {
+            return Ok(prefix);
+        }
+
+        // It is tempting here to compile the rest here as a concatenation
+        // of zero-or-one matches. i.e., for `a{2,5}`, compile it as if it
+        // were `aaa?a?a?`. The problem here is that it leads to this program:
+        //
+        //     >000000: 61 => 01
+        //      000001: 61 => 02
+        //      000002: union(03, 04)
+        //      000003: 61 => 04
+        //      000004: union(05, 06)
+        //      000005: 61 => 06
+        //      000006: union(07, 08)
+        //      000007: 61 => 08
+        //      000008: MATCH
+        //
+        // And effectively, once you hit state 2, the epsilon closure will
+        // include states 3, 5, 6, 7 and 8, which is quite a bit. It is better
+        // to instead compile it like so:
+        //
+        //     >000000: 61 => 01
+        //      000001: 61 => 02
+        //      000002: union(03, 08)
+        //      000003: 61 => 04
+        //      000004: union(05, 08)
+        //      000005: 61 => 06
+        //      000006: union(07, 08)
+        //      000007: 61 => 08
+        //      000008: MATCH
+        //
+        // So that the epsilon closure of state 2 is now just 3 and 8.
+        let empty = self.add_empty()?;
+        let mut prev_end = prefix.end;
+        for _ in min..max {
+            let splits =
+                self.add(State::Splits { targets: vec![], reverse: !greedy })?;
+            let compiled = self.c(hir)?;
+            self.patch(prev_end, splits)?;
+            self.patch(splits, compiled.start)?;
+            self.patch(splits, empty)?;
+            prev_end = compiled.end;
+        }
+        self.patch(prev_end, empty)?;
+        Ok(ThompsonRef { start: prefix.start, end: empty })
+    }
+
+    /// Compile the given expression such that it may be matched `n` or more
+    /// times, where `n` can be any integer. (Although a particularly large
+    /// integer is likely to run afoul of any configured size limits.)
+    ///
+    /// When `greedy` is true, then the preference is for the expression to
+    /// match as much as possible. Otheriwse, it will match as little as
+    /// possible.
+    fn c_at_least(
+        &self,
+        hir: &Hir,
+        greedy: bool,
+        n: u32,
+    ) -> Result<ThompsonRef, Error> {
+        if n == 0 {
+            // When the expression cannot match the empty string, then we
+            // can get away with something much simpler: just one 'alt'
+            // instruction that optionally repeats itself. But if the expr
+            // can match the empty string... see below.
+            if !hir.is_match_empty() {
+                let splits = self.add(State::Splits {
+                    targets: vec![],
+                    reverse: !greedy,
+                })?;
+                let compiled = self.c(hir)?;
+                self.patch(splits, compiled.start)?;
+                self.patch(compiled.end, splits)?;
+                return Ok(ThompsonRef { start: splits, end: splits });
+            }
+
+            // What's going on here? Shouldn't x* be simpler than this? It
+            // turns out that when implementing leftmost-first (Perl-like)
+            // match semantics, x* results in an incorrect preference order
+            // when computing the transitive closure of states if and only if
+            // 'x' can match the empty string. So instead, we compile x* as
+            // (x+)?, which preserves the correct preference order.
+            //
+            // See: /~https://github.com/rust-lang/regex/issues/779
+            let compiled = self.c(hir)?;
+            let plus =
+                self.add(State::Splits { targets: vec![], reverse: !greedy })?;
+            self.patch(compiled.end, plus)?;
+            self.patch(plus, compiled.start)?;
+
+            let question =
+                self.add(State::Splits { targets: vec![], reverse: !greedy })?;
+            let empty = self.add_empty()?;
+            self.patch(question, compiled.start)?;
+            self.patch(question, empty)?;
+            self.patch(plus, empty)?;
+            Ok(ThompsonRef { start: question, end: empty })
+        } else if n == 1 {
+            let compiled = self.c(hir)?;
+            let splits =
+                self.add(State::Splits { targets: vec![], reverse: !greedy })?;
+            self.patch(compiled.end, splits)?;
+            self.patch(splits, compiled.start)?;
+            Ok(ThompsonRef { start: compiled.start, end: splits })
+        } else {
+            let prefix = self.c_exactly(hir, n - 1)?;
+            let last = self.c(hir)?;
+            let splits =
+                self.add(State::Splits { targets: vec![], reverse: !greedy })?;
+            self.patch(prefix.end, last.start)?;
+            self.patch(last.end, splits)?;
+            self.patch(splits, last.start)?;
+            Ok(ThompsonRef { start: prefix.start, end: splits })
+        }
+    }
+
+    /// Compile the given expression such that it may be matched zero or one
+    /// times.
+    ///
+    /// When `greedy` is true, then the preference is for the expression to
+    /// match as much as possible. Otheriwse, it will match as little as
+    /// possible.
+    fn c_zero_or_one(
+        &self,
+        hir: &Hir,
+        greedy: bool,
+    ) -> Result<ThompsonRef, Error> {
+        let splits =
+            self.add(State::Splits { targets: vec![], reverse: !greedy })?;
+        let compiled = self.c(hir)?;
+        let empty = self.add_empty()?;
+        self.patch(splits, compiled.start)?;
+        self.patch(splits, empty)?;
+        self.patch(compiled.end, empty)?;
+        Ok(ThompsonRef { start: splits, end: empty })
+    }
+
+    /// Compile the given HIR expression exactly `n` times.
+    fn c_exactly(&self, hir: &Hir, n: u32) -> Result<ThompsonRef, Error> {
+        self.c_concat((0..n).map(|_| self.c(hir)))
+    }
+
+    /// Compile the given expression and insert capturing states at the
+    /// beginning and end of it. The slot for the capture states is computed
+    /// from the index.
+    fn c_capture(
+        &self,
+        index: u32,
+        name: Option<&str>,
+        hir: &Hir,
+    ) -> Result<ThompsonRef, Error> {
+        let Some(slot) = index.checked_mul(1) else {
+            return Err(Error::new("capture group slots exhausted"));
+        };
+        let start = self.add(State::Capture { target: 0, slot })?;
+        let inner = self.c(hir)?;
+        let Some(slot) = slot.checked_add(1) else {
+            return Err(Error::new("capture group slots exhausted"));
+        };
+        let end = self.add(State::Capture { target: 0, slot })?;
+        self.patch(start, inner.start)?;
+        self.patch(inner.end, end)?;
+
+        assert_eq!(
+            index.as_usize(),
+            self.nfa.borrow().cap_index_to_name.len(),
+            "captures compiled in wrong order"
+        );
+        if let Some(name) = name {
+            let name = Arc::from(name);
+            let mut nfa = self.nfa.borrow_mut();
+            nfa.cap_name_to_index.insert(Arc::clone(&name), index);
+            nfa.cap_index_to_name.push(Some(Arc::clone(&name)));
+            nfa.memory_extra += name.len() + size_of::<u32>();
+        } else {
+            self.nfa.borrow_mut().cap_index_to_name.push(None);
+        }
+        Ok(ThompsonRef { start, end })
+    }
+
+    /// Compile a concatenation of the sub-expressions yielded by the given
+    /// iterator. If the iterator yields no elements, then this compiles down
+    /// to an "empty" state that always matches.
+    fn c_concat<I>(&self, mut it: I) -> Result<ThompsonRef, Error>
+    where
+        I: Iterator<Item = Result<ThompsonRef, Error>>,
+    {
+        let ThompsonRef { start, mut end } = match it.next() {
+            Some(result) => result?,
+            None => return self.c_empty(),
+        };
+        for result in it {
+            let compiled = result?;
+            self.patch(end, compiled.start)?;
+            end = compiled.end;
+        }
+        Ok(ThompsonRef { start, end })
+    }
+
+    /// Compile an alternation, where each element yielded by the given
+    /// iterator represents an item in the alternation. If the iterator yields
+    /// no elements, then this compiles down to a "fail" state.
+    ///
+    /// In an alternation, expressions appearing earlier are "preferred" at
+    /// match time over expressions appearing later. (This is currently always
+    /// true, as this crate only supports leftmost-first semantics.)
+    fn c_alternation<I>(&self, mut it: I) -> Result<ThompsonRef, Error>
+    where
+        I: Iterator<Item = Result<ThompsonRef, Error>>,
+    {
+        let first = match it.next() {
+            None => return self.c_fail(),
+            Some(result) => result?,
+        };
+        let second = match it.next() {
+            None => return Ok(first),
+            Some(result) => result?,
+        };
+
+        let splits =
+            self.add(State::Splits { targets: vec![], reverse: false })?;
+        let end = self.add_empty()?;
+        self.patch(splits, first.start)?;
+        self.patch(first.end, end)?;
+        self.patch(splits, second.start)?;
+        self.patch(second.end, end)?;
+        for result in it {
+            let compiled = result?;
+            self.patch(splits, compiled.start)?;
+            self.patch(compiled.end, end)?;
+        }
+        Ok(ThompsonRef { start: splits, end })
+    }
+
+    /// A convenience routine for adding an empty state, also known as an
+    /// unconditional epsilon transition. These are quite useful for making
+    /// NFA construction simpler.
+    ///
+    /// (In the regex crate, we do a second pass to remove these, but don't
+    /// bother with that here.)
+    fn add_empty(&self) -> Result<StateID, Error> {
+        self.add(State::Goto { target: 0, look: None })
+    }
+
+    /// The common implementation of "add a state." It handles the common
+    /// error cases of state ID exhausting (by owning state ID allocation) and
+    /// whether the size limit has been exceeded.
+    fn add(&self, state: State) -> Result<StateID, Error> {
+        let id = u32::try_from(self.nfa.borrow().states.len())
+            .map_err(|_| Error::new("exhausted state IDs, too many states"))?;
+        self.nfa.borrow_mut().memory_extra += state.memory_usage();
+        self.nfa.borrow_mut().states.push(state);
+        self.check_size_limit()?;
+        Ok(id)
+    }
+
+    /// Add a transition from one state to another.
+    ///
+    /// This routine is called "patch" since it is very common to add the
+    /// states you want, typically with "dummy" state ID transitions, and then
+    /// "patch" in the real state IDs later. This is because you don't always
+    /// know all of the necessary state IDs to add because they might not
+    /// exist yet.
+    ///
+    /// # Errors
+    ///
+    /// This may error if patching leads to an increase in heap usage beyond
+    /// the configured size limit. Heap usage only grows when patching adds a
+    /// new transition (as in the case of a "splits" state).
+    fn patch(&self, from: StateID, to: StateID) -> Result<(), Error> {
+        let mut new_memory_extra = self.nfa.borrow().memory_extra;
+        match self.nfa.borrow_mut().states[from.as_usize()] {
+            State::Char { ref mut target, .. } => {
+                *target = to;
+            }
+            State::Ranges { ref mut target, .. } => {
+                *target = to;
+            }
+            State::Splits { ref mut targets, .. } => {
+                targets.push(to);
+                new_memory_extra += size_of::<StateID>();
+            }
+            State::Goto { ref mut target, .. } => {
+                *target = to;
+            }
+            State::Capture { ref mut target, .. } => {
+                *target = to;
+            }
+            State::Fail | State::Match => {}
+        }
+        if new_memory_extra != self.nfa.borrow().memory_extra {
+            self.nfa.borrow_mut().memory_extra = new_memory_extra;
+            self.check_size_limit()?;
+        }
+        Ok(())
+    }
+
+    /// Checks that the current heap memory usage of the NFA being compiled
+    /// doesn't exceed the configured size limit. If it does, an error is
+    /// returned.
+    fn check_size_limit(&self) -> Result<(), Error> {
+        if let Some(limit) = self.config.size_limit {
+            if self.nfa.borrow().memory_usage() > limit {
+                return Err(Error::new("compiled regex exceeded size limit"));
+            }
+        }
+        Ok(())
+    }
+}
+
+/// A value that represents the result of compiling a sub-expression of a
+/// regex's HIR. Specifically, this represents a sub-graph of the NFA that
+/// has an initial state at `start` and a final state at `end`.
+#[derive(Clone, Copy, Debug)]
+struct ThompsonRef {
+    start: StateID,
+    end: StateID,
+}
diff --git a/regex-lite/src/pikevm.rs b/regex-lite/src/pikevm.rs
new file mode 100644
index 000000000..5af4d6198
--- /dev/null
+++ b/regex-lite/src/pikevm.rs
@@ -0,0 +1,831 @@
+use alloc::{vec, vec::Vec};
+
+use crate::{
+    int::{NonMaxUsize, U32},
+    nfa::{State, StateID, NFA},
+    utf8,
+};
+
+/// A PikeVM searcher.
+///
+/// A PikeVM uses the standard Thompson NFA linear time search algorithm, but
+/// augmented to support tracking the offsets of matching capture groups.
+#[derive(Clone, Debug)]
+pub(crate) struct PikeVM {
+    nfa: NFA,
+}
+
+impl PikeVM {
+    /// Create a new PikeVM searcher that uses the given NFA.
+    pub(crate) fn new(nfa: NFA) -> PikeVM {
+        PikeVM { nfa }
+    }
+
+    /// Return the underlying NFA used by this PikeVM.
+    pub(crate) fn nfa(&self) -> &NFA {
+        &self.nfa
+    }
+
+    /// The implementation of standard leftmost search.
+    ///
+    /// Capturing group spans are written to `slots`, but only if requested.
+    /// `slots` can be any length. Any slot in the NFA that is activated but
+    /// which is out of bounds for the given `slots` is ignored.
+    pub(crate) fn search(
+        &self,
+        cache: &mut Cache,
+        haystack: &[u8],
+        start: usize,
+        end: usize,
+        earliest: bool,
+        slots: &mut [Option<NonMaxUsize>],
+    ) -> bool {
+        cache.setup_search(slots.len());
+        if start > end {
+            return false;
+        }
+        // Why do we even care about this? Well, in our `slots` representation,
+        // we use usize::MAX as a sentinel to indicate "no match." This isn't
+        // problematic so long as our haystack doesn't have a maximal length.
+        // Byte slices are guaranteed by Rust to have a length that fits into
+        // isize, and so this assert should always pass. But we put it here to
+        // make our assumption explicit.
+        assert!(
+            haystack.len() < core::usize::MAX,
+            "byte slice lengths must be less than usize MAX",
+        );
+
+        let start_id = self.nfa().start();
+        let anchored = self.nfa().is_start_anchored();
+
+        let Cache { ref mut stack, ref mut curr, ref mut next } = cache;
+        let mut matched = false;
+        // Yes, our search doesn't end at `end`, but includes it. This is
+        // necessary because matches are delayed by one byte. The delay is used
+        // to handle look-behind assertions. In the case of the PikeVM, the
+        // delay is implemented by not considering a match to exist until it
+        // is visited in `nexts`. Technically, we know a match exists in the
+        // previous iteration via `epsilon_closure`.
+        let mut at = start;
+        while at <= end {
+            // If we have no states left to visit, then there are some cases
+            // where we know we can quit early or even skip ahead.
+            if curr.set.is_empty() {
+                // We have a match so we can quit.
+                if matched {
+                    break;
+                }
+                // If we're running an anchored search and we've advanced
+                // beyond the start position with no other states to try, then
+                // we will never observe a match and thus can stop.
+                if anchored && at > start {
+                    break;
+                }
+            }
+            // Instead of using a hypothetical unanchored start state in the
+            // NFA (which doesn't exist, but we could add it), we actually
+            // always use its anchored starting state. As a result, when doing
+            // an unanchored search, we need to simulate our own '(?s:.)*?'
+            // prefix, to permit a match to appear anywhere.
+            //
+            // Now, we don't *have* to do things this way. We could create
+            // a proper unanchored start state in the NFA and do one
+            // `epsilon_closure` call from that starting state before the main
+            // loop here. And that is just as correct. However, it turns out to
+            // be slower than our approach here because it slightly increases
+            // the cost of processing each byte by requiring us to visit
+            // more NFA states to deal with the additional NFA states in the
+            // unanchored prefix. By simulating it explicitly here, we lower
+            // those costs substantially. The cost is itself small, but it adds
+            // up for large haystacks.
+            //
+            // In order to simulate the '(?s:.)*?' prefix---which is not
+            // greedy---we are careful not to perform an epsilon closure on
+            // the start state if we already have a match. Namely, if we
+            // did otherwise, we would never reach a terminating condition
+            // because there would always be additional states to process.
+            if !matched {
+                // Since we are adding to the 'curr' active states and since
+                // this is for the start ID, we use a slots slice that is
+                // guaranteed to have the right length but where every element
+                // is absent. This is exactly what we want, because this
+                // epsilon closure is responsible for simulating an unanchored
+                // '(?s:.)*?' prefix. It is specifically outside of any
+                // capturing groups, and thus, using slots that are always
+                // absent is correct.
+                //
+                // Note though that we can't just use `&mut []` here, since
+                // this epsilon closure may traverse through `Capture` states
+                // transitions, and thus must be able to write offsets to the
+                // slots given which are later copied to slot values in `curr`.
+                let slots = next.slot_table.all_absent();
+                self.epsilon_closure(
+                    stack, slots, curr, haystack, at, start_id,
+                );
+            }
+            let (ch, len) = utf8::decode_lossy(&haystack[at..]);
+            matched =
+                self.nexts(stack, curr, next, haystack, at, ch, len, slots);
+            // Unless the caller asked us to return early, we need to mush
+            // on to see if we can extend our match. (But note that 'nexts'
+            // will quit right after seeing a match, as is consistent with
+            // leftmost-first match priority.)
+            if (earliest && matched) || len == 0 {
+                break;
+            }
+            core::mem::swap(curr, next);
+            next.set.clear();
+            at += len;
+        }
+        matched
+    }
+
+    /// Process the active states in 'curr' to find the states (written to
+    /// 'next') we should process for the next byte in the haystack.
+    ///
+    /// 'stack' is used to perform a depth first traversal of the NFA when
+    /// computing an epsilon closure.
+    ///
+    /// When a match is found, the slots for that match state (in 'curr') are
+    /// copied to 'caps'. Moreover, once a match is seen, processing for 'curr'
+    /// stops (unless the PikeVM was configured with MatchKind::All semantics).
+    ///
+    /// `at_ch` is the Unicode scalar value whose UTF-8 encoding begins at `at`
+    /// in `haystack`.
+    ///
+    /// `at_len` is the number of bytes consumed by `at_ch`. This is usually
+    /// equal to `at_ch.len_utf8()`, but not always. For example, in the case
+    /// where `at_ch` is the replacement codepoint that results from decoding
+    /// invalid UTF-8. In that case, `at_len` can be 1, 2 or 3.
+    fn nexts(
+        &self,
+        stack: &mut Vec<FollowEpsilon>,
+        curr: &mut ActiveStates,
+        next: &mut ActiveStates,
+        haystack: &[u8],
+        at: usize,
+        at_ch: char,
+        at_len: usize,
+        slots: &mut [Option<NonMaxUsize>],
+    ) -> bool {
+        let ActiveStates { ref set, ref mut slot_table } = *curr;
+        for sid in set.iter() {
+            if self.next(
+                stack, slot_table, next, haystack, at, at_ch, at_len, sid,
+            ) {
+                slots.copy_from_slice(slot_table.for_state(sid));
+                return true;
+            }
+        }
+        false
+    }
+
+    /// Starting from `sid`, if the position `at` in the `haystack` has a
+    /// transition defined out of `sid`, then add the state transitioned to and
+    /// its epsilon closure to the `next` set of states to explore.
+    ///
+    /// `stack` is used by the epsilon closure computation to perform a depth
+    /// first traversal of the NFA.
+    ///
+    /// `curr_slot_table` should be the table of slots for the current set of
+    /// states being explored. If there is a transition out of `sid`, then
+    /// sid's row in the slot table is used to perform the epsilon closure.
+    ///
+    /// `at_ch` is the Unicode scalar value whose UTF-8 encoding begins at `at`
+    /// in `haystack`. The caller provides it so that this routine doesn't
+    /// need to re-decode it. (Since it's expected that this routine is called
+    /// multiple times for each position.)
+    ///
+    /// `at_len` is the number of bytes consumed by `at_ch`. This is usually
+    /// equal to `at_ch.len_utf8()`, but not always. For example, in the case
+    /// where `at_ch` is the replacement codepoint that results from decoding
+    /// invalid UTF-8. In that case, `at_len` can be 1, 2 or 3.
+    fn next(
+        &self,
+        stack: &mut Vec<FollowEpsilon>,
+        curr_slot_table: &mut SlotTable,
+        next: &mut ActiveStates,
+        haystack: &[u8],
+        at: usize,
+        at_ch: char,
+        at_len: usize,
+        sid: StateID,
+    ) -> bool {
+        match *self.nfa.state(sid) {
+            State::Fail
+            | State::Goto { .. }
+            | State::Splits { .. }
+            | State::Capture { .. } => false,
+            State::Char { target, ch } => {
+                if at_ch == ch && at_len > 0 {
+                    let slots = curr_slot_table.for_state(sid);
+                    // OK because `at_len` is always derived from the number
+                    // of bytes read from `at` that make up `at_ch`. So this
+                    // will never wrap.
+                    let at = at.wrapping_add(at_len);
+                    self.epsilon_closure(
+                        stack, slots, next, haystack, at, target,
+                    );
+                }
+                false
+            }
+            State::Ranges { target, ref ranges } => {
+                for (start, end) in ranges.iter().copied() {
+                    if start > at_ch {
+                        break;
+                    } else if start <= at_ch && at_ch <= end {
+                        if at_len == 0 {
+                            return false;
+                        }
+                        let slots = curr_slot_table.for_state(sid);
+                        // OK because `at_len` is always derived from the
+                        // number of bytes read from `at` that make up `at_ch`.
+                        // So this will never wrap.
+                        let at = at.wrapping_add(at_len);
+                        self.epsilon_closure(
+                            stack, slots, next, haystack, at, target,
+                        );
+                    }
+                }
+                false
+            }
+            State::Match => true,
+        }
+    }
+
+    /// Compute the epsilon closure of `sid`, writing the closure into `next`
+    /// while copying slot values from `curr_slots` into corresponding states
+    /// in `next`. `curr_slots` should be the slot values corresponding to
+    /// `sid`.
+    ///
+    /// The given `stack` is used to perform a depth first traversal of the
+    /// NFA by recursively following all epsilon transitions out of `sid`.
+    /// Conditional epsilon transitions are followed if and only if they are
+    /// satisfied for the position `at` in the `input` haystack.
+    ///
+    /// While this routine may write to `curr_slots`, once it returns, any
+    /// writes are undone and the original values (even if absent) are
+    /// restored.
+    fn epsilon_closure(
+        &self,
+        stack: &mut Vec<FollowEpsilon>,
+        curr_slots: &mut [Option<NonMaxUsize>],
+        next: &mut ActiveStates,
+        haystack: &[u8],
+        at: usize,
+        sid: StateID,
+    ) {
+        stack.push(FollowEpsilon::Explore(sid));
+        while let Some(frame) = stack.pop() {
+            match frame {
+                FollowEpsilon::RestoreCapture { slot, offset } => {
+                    curr_slots[slot.as_usize()] = offset;
+                }
+                FollowEpsilon::Explore(sid) => {
+                    self.epsilon_closure_explore(
+                        stack, curr_slots, next, haystack, at, sid,
+                    );
+                }
+            }
+        }
+    }
+
+    /// Explore all of the epsilon transitions out of `sid`. This is mostly
+    /// split out from `epsilon_closure` in order to clearly delineate
+    /// the actual work of computing an epsilon closure from the stack
+    /// book-keeping.
+    ///
+    /// This will push any additional explorations needed on to `stack`.
+    ///
+    /// `curr_slots` should refer to the slots for the currently active NFA
+    /// state. That is, the current state we are stepping through. These
+    /// slots are mutated in place as new `Captures` states are traversed
+    /// during epsilon closure, but the slots are restored to their original
+    /// values once the full epsilon closure is completed. The ultimate use of
+    /// `curr_slots` is to copy them to the corresponding `next_slots`, so that
+    /// the capturing group spans are forwarded from the currently active state
+    /// to the next.
+    ///
+    /// `next` refers to the next set of active states. Computing an epsilon
+    /// closure may increase the next set of active states.
+    ///
+    /// `haystack` refers to the what we're searching and `at` refers to the
+    /// current position in the haystack. These are used to check whether
+    /// conditional epsilon transitions (like look-around) are satisfied at
+    /// the current position. If they aren't, then the epsilon closure won't
+    /// include them.
+    fn epsilon_closure_explore(
+        &self,
+        stack: &mut Vec<FollowEpsilon>,
+        curr_slots: &mut [Option<NonMaxUsize>],
+        next: &mut ActiveStates,
+        haystack: &[u8],
+        at: usize,
+        mut sid: StateID,
+    ) {
+        // We can avoid pushing some state IDs on to our stack in precisely
+        // the cases where a 'push(x)' would be immediately followed by a 'x
+        // = pop()'. This is achieved by this outer-loop. We simply set 'sid'
+        // to be the next state ID we want to explore once we're done with
+        // our initial exploration. In practice, this avoids a lot of stack
+        // thrashing.
+        loop {
+            // Record this state as part of our next set of active states. If
+            // we've already explored it, then no need to do it again.
+            if !next.set.insert(sid) {
+                return;
+            }
+            match *self.nfa.state(sid) {
+                State::Fail
+                | State::Match { .. }
+                | State::Char { .. }
+                | State::Ranges { .. } => {
+                    next.slot_table.for_state(sid).copy_from_slice(curr_slots);
+                    return;
+                }
+                State::Goto { target, look: None } => {
+                    sid = target;
+                }
+                State::Goto { target, look: Some(look) } => {
+                    if !look.is_match(haystack, at) {
+                        return;
+                    }
+                    sid = target;
+                }
+                State::Splits { ref targets, reverse: false } => {
+                    sid = match targets.get(0) {
+                        None => return,
+                        Some(&sid) => sid,
+                    };
+                    stack.extend(
+                        targets[1..]
+                            .iter()
+                            .copied()
+                            .rev()
+                            .map(FollowEpsilon::Explore),
+                    );
+                }
+                State::Splits { ref targets, reverse: true } => {
+                    sid = match targets.last() {
+                        None => return,
+                        Some(&sid) => sid,
+                    };
+                    stack.extend(
+                        targets[..targets.len() - 1]
+                            .iter()
+                            .copied()
+                            .map(FollowEpsilon::Explore),
+                    );
+                }
+                State::Capture { target, slot } => {
+                    // There's no need to do anything with slots that
+                    // ultimately won't be copied into the caller-provided
+                    // 'Captures' value. So we just skip dealing with them at
+                    // all.
+                    if slot.as_usize() < curr_slots.len() {
+                        stack.push(FollowEpsilon::RestoreCapture {
+                            slot,
+                            offset: curr_slots[slot.as_usize()],
+                        });
+                        // OK because length of a slice must fit into an isize.
+                        curr_slots[slot.as_usize()] =
+                            Some(NonMaxUsize::new(at).unwrap());
+                    }
+                    sid = target;
+                }
+            }
+        }
+    }
+}
+
+/// A cache represents mutable state that a `PikeVM` requires during a search.
+///
+/// For a given `PikeVM`, its corresponding cache may be created either via
+/// `PikeVM::create_cache`, or via `Cache::new`. They are equivalent in every
+/// way, except the former does not require explicitly importing `Cache`.
+///
+/// A particular `Cache` is coupled with the `PikeVM` from which it was
+/// created. It may only be used with that `PikeVM`. A cache and its
+/// allocations may be re-purposed via `Cache::reset`, in which case, it can
+/// only be used with the new `PikeVM` (and not the old one).
+#[derive(Clone, Debug)]
+pub(crate) struct Cache {
+    /// Stack used while computing epsilon closure. This effectively lets us
+    /// move what is more naturally expressed through recursion to a stack
+    /// on the heap.
+    stack: Vec<FollowEpsilon>,
+    /// The current active states being explored for the current byte in the
+    /// haystack.
+    curr: ActiveStates,
+    /// The next set of states we're building that will be explored for the
+    /// next byte in the haystack.
+    next: ActiveStates,
+}
+
+impl Cache {
+    /// Create a new `PikeVM` cache.
+    ///
+    /// A potentially more convenient routine to create a cache is
+    /// `PikeVM::create_cache`, as it does not require also importing the
+    /// `Cache` type.
+    ///
+    /// If you want to reuse the returned `Cache` with some other `PikeVM`,
+    /// then you must call `Cache::reset` with the desired `PikeVM`.
+    pub(crate) fn new(re: &PikeVM) -> Cache {
+        Cache {
+            stack: vec![],
+            curr: ActiveStates::new(re),
+            next: ActiveStates::new(re),
+        }
+    }
+
+    /// Reset this cache such that it can be used for searching with a
+    /// different `PikeVM`.
+    ///
+    /// A cache reset permits reusing memory already allocated in this cache
+    /// with a different `PikeVM`.
+    pub(crate) fn reset(&mut self, re: &PikeVM) {
+        self.curr.reset(re);
+        self.next.reset(re);
+    }
+
+    /// Returns the heap memory usage, in bytes, of this cache.
+    ///
+    /// This does **not** include the stack size used up by this cache. To
+    /// compute that, use `std::mem::size_of::<Cache>()`.
+    pub(crate) fn memory_usage(&self) -> usize {
+        (self.stack.len() * core::mem::size_of::<FollowEpsilon>())
+            + self.curr.memory_usage()
+            + self.next.memory_usage()
+    }
+
+    /// Clears this cache. This should be called at the start of every search
+    /// to ensure we start with a clean slate.
+    ///
+    /// This also sets the length of the capturing groups used in the current
+    /// search. This permits an optimization where by 'SlotTable::for_state'
+    /// only returns the number of slots equivalent to the number of slots
+    /// given in the 'Captures' value. This may be less than the total number
+    /// of possible slots, e.g., when one only wants to track overall match
+    /// offsets. This in turn permits less copying of capturing group spans
+    /// in the PikeVM.
+    fn setup_search(&mut self, captures_slot_len: usize) {
+        self.stack.clear();
+        self.curr.setup_search(captures_slot_len);
+        self.next.setup_search(captures_slot_len);
+    }
+}
+
+/// A set of active states used to "simulate" the execution of an NFA via the
+/// PikeVM.
+///
+/// There are two sets of these used during NFA simulation. One set corresponds
+/// to the "current" set of states being traversed for the current position
+/// in a haystack. The other set corresponds to the "next" set of states being
+/// built, which will become the new "current" set for the next position in the
+/// haystack. These two sets correspond to CLIST and NLIST in Thompson's
+/// original paper regexes: https://dl.acm.org/doi/pdf/10.1145/363347.363387
+///
+/// In addition to representing a set of NFA states, this also maintains slot
+/// values for each state. These slot values are what turn the NFA simulation
+/// into the "Pike VM." Namely, they track capturing group values for each
+/// state. During the computation of epsilon closure, we copy slot values from
+/// states in the "current" set to the "next" set. Eventually, once a match
+/// is found, the slot values for that match state are what we write to the
+/// caller provided slots.
+#[derive(Clone, Debug)]
+struct ActiveStates {
+    /// The set of active NFA states. This set preserves insertion order, which
+    /// is critical for simulating the match semantics of backtracking regex
+    /// engines.
+    set: SparseSet,
+    /// The slots for every NFA state, where each slot stores a (possibly
+    /// absent) offset. Every capturing group has two slots. One for a start
+    /// offset and one for an end offset.
+    slot_table: SlotTable,
+}
+
+impl ActiveStates {
+    /// Create a new set of active states for the given PikeVM. The active
+    /// states returned may only be used with the given PikeVM. (Use 'reset'
+    /// to re-purpose the allocation for a different PikeVM.)
+    fn new(re: &PikeVM) -> ActiveStates {
+        let mut active = ActiveStates {
+            set: SparseSet::new(0),
+            slot_table: SlotTable::new(),
+        };
+        active.reset(re);
+        active
+    }
+
+    /// Reset this set of active states such that it can be used with the given
+    /// PikeVM (and only that PikeVM).
+    fn reset(&mut self, re: &PikeVM) {
+        self.set.resize(re.nfa().len());
+        self.slot_table.reset(re);
+    }
+
+    /// Return the heap memory usage, in bytes, used by this set of active
+    /// states.
+    ///
+    /// This does not include the stack size of this value.
+    fn memory_usage(&self) -> usize {
+        self.set.memory_usage() + self.slot_table.memory_usage()
+    }
+
+    /// Setup this set of active states for a new search. The given slot
+    /// length should be the number of slots in a caller provided 'Captures'
+    /// (and may be zero).
+    fn setup_search(&mut self, captures_slot_len: usize) {
+        self.set.clear();
+        self.slot_table.setup_search(captures_slot_len);
+    }
+}
+
+/// A table of slots, where each row represent a state in an NFA. Thus, the
+/// table has room for storing slots for every single state in an NFA.
+///
+/// This table is represented with a single contiguous allocation. In general,
+/// the notion of "capturing group" doesn't really exist at this level of
+/// abstraction, hence the name "slot" instead. (Indeed, every capturing group
+/// maps to a pair of slots, one for the start offset and one for the end
+/// offset.) Slots are indexed by the `Captures` NFA state.
+#[derive(Clone, Debug)]
+struct SlotTable {
+    /// The actual table of offsets.
+    table: Vec<Option<NonMaxUsize>>,
+    /// The number of slots per state, i.e., the table's stride or the length
+    /// of each row.
+    slots_per_state: usize,
+    /// The number of slots in the caller-provided `Captures` value for the
+    /// current search. Setting this to `slots_per_state` is always correct,
+    /// but may be wasteful.
+    slots_for_captures: usize,
+}
+
+impl SlotTable {
+    /// Create a new slot table.
+    ///
+    /// One should call 'reset' with the corresponding PikeVM before use.
+    fn new() -> SlotTable {
+        SlotTable { table: vec![], slots_for_captures: 0, slots_per_state: 0 }
+    }
+
+    /// Reset this slot table such that it can be used with the given PikeVM
+    /// (and only that PikeVM).
+    fn reset(&mut self, re: &PikeVM) {
+        let nfa = re.nfa();
+        // OK because NFA construction would have failed if this overflowed.
+        self.slots_per_state = nfa.group_len().checked_mul(2).unwrap();
+        // This is always correct, but may be reduced for a particular search
+        // if fewer slots were given by the caller, e.g., none at all or only
+        // slots for tracking the overall match instead of all slots for every
+        // group.
+        self.slots_for_captures = self.slots_per_state;
+        let len = nfa
+            .len()
+            // We add 1 so that our last row is always empty. We use it as
+            // "scratch" space for computing the epsilon closure off of the
+            // starting state.
+            .checked_add(1)
+            .and_then(|x| x.checked_mul(self.slots_per_state))
+            // It seems like this could actually panic on legitimate inputs
+            // on 32-bit targets. Should we somehow convert this to an error?
+            // What about something similar for the lazy DFA cache? If you're
+            // tripping this assert, please file a bug.
+            .expect("slot table length doesn't overflow");
+        self.table.resize(len, None);
+    }
+
+    /// Return the heap memory usage, in bytes, used by this slot table.
+    ///
+    /// This does not include the stack size of this value.
+    fn memory_usage(&self) -> usize {
+        self.table.len() * core::mem::size_of::<Option<NonMaxUsize>>()
+    }
+
+    /// Perform any per-search setup for this slot table.
+    ///
+    /// In particular, this sets the length of the number of slots used in the
+    /// slots given by the caller (if any at all). This number may be smaller
+    /// than the total number of slots available, e.g., when the caller is only
+    /// interested in tracking the overall match and not the spans of every
+    /// matching capturing group. Only tracking the overall match can save a
+    /// substantial amount of time copying capturing spans during a search.
+    fn setup_search(&mut self, captures_slot_len: usize) {
+        self.slots_for_captures = captures_slot_len;
+    }
+
+    /// Return a mutable slice of the slots for the given state.
+    ///
+    /// Note that the length of the slice returned may be less than the total
+    /// number of slots available for this state. In particular, the length
+    /// always matches the number of slots indicated via `setup_search`.
+    fn for_state(&mut self, sid: StateID) -> &mut [Option<NonMaxUsize>] {
+        let i = sid.as_usize() * self.slots_per_state;
+        &mut self.table[i..i + self.slots_for_captures]
+    }
+
+    /// Return a slice of slots of appropriate length where every slot offset
+    /// is guaranteed to be absent. This is useful in cases where you need to
+    /// compute an epsilon closure outside of the user supplied regex, and thus
+    /// never want it to have any capturing slots set.
+    fn all_absent(&mut self) -> &mut [Option<NonMaxUsize>] {
+        let i = self.table.len() - self.slots_per_state;
+        &mut self.table[i..i + self.slots_for_captures]
+    }
+}
+
+/// Represents a stack frame for use while computing an epsilon closure.
+///
+/// (An "epsilon closure" refers to the set of reachable NFA states from a
+/// single state without consuming any input. That is, the set of all epsilon
+/// transitions not only from that single state, but from every other state
+/// reachable by an epsilon transition as well. This is why it's called a
+/// "closure.")
+///
+/// Computing the epsilon closure in a Thompson NFA proceeds via a depth
+/// first traversal over all epsilon transitions from a particular state.
+/// (A depth first traversal is important because it emulates the same priority
+/// of matches that is typically found in backtracking regex engines.) This
+/// depth first traversal is naturally expressed using recursion, but to avoid
+/// a call stack size proportional to the size of a regex, we put our stack on
+/// the heap instead.
+///
+/// This stack thus consists of call frames. The typical call frame is
+/// `Explore`, which instructs epsilon closure to explore the epsilon
+/// transitions from that state. (Subsequent epsilon transitions are then
+/// pushed on to the stack as more `Explore` frames.) If the state ID being
+/// explored has no epsilon transitions, then the capturing group slots are
+/// copied from the original state that sparked the epsilon closure (from the
+/// 'step' routine) to the state ID being explored. This way, capturing group
+/// slots are forwarded from the previous state to the next.
+///
+/// The other stack frame, `RestoreCaptures`, instructs the epsilon closure to
+/// set the position for a particular slot back to some particular offset. This
+/// frame is pushed when `Explore` sees a `Capture` transition. `Explore` will
+/// set the offset of the slot indicated in `Capture` to the current offset,
+/// and then push the old offset on to the stack as a `RestoreCapture` frame.
+/// Thus, the new offset is only used until the epsilon closure reverts back to
+/// the `RestoreCapture` frame. In effect, this gives the `Capture` epsilon
+/// transition its "scope" to only states that come "after" it during depth
+/// first traversal.
+#[derive(Clone, Debug)]
+enum FollowEpsilon {
+    /// Explore the epsilon transitions from a state ID.
+    Explore(StateID),
+    /// Reset the given `slot` to the given `offset` (which might be `None`).
+    RestoreCapture { slot: u32, offset: Option<NonMaxUsize> },
+}
+
+/// A sparse set used for representing ordered NFA states.
+///
+/// This supports constant time addition and membership testing. Clearing an
+/// entire set can also be done in constant time. Iteration yields elements
+/// in the order in which they were inserted.
+///
+/// The data structure is based on: https://research.swtch.com/sparse
+/// Note though that we don't actually use uninitialized memory. We generally
+/// reuse sparse sets, so the initial allocation cost is bareable. However, its
+/// other properties listed above are extremely useful.
+#[derive(Clone)]
+struct SparseSet {
+    /// The number of elements currently in this set.
+    len: usize,
+    /// Dense contains the ids in the order in which they were inserted.
+    dense: Vec<StateID>,
+    /// Sparse maps ids to their location in dense.
+    ///
+    /// A state ID is in the set if and only if
+    /// sparse[id] < len && id == dense[sparse[id]].
+    ///
+    /// Note that these are indices into 'dense'. It's a little weird to use
+    /// StateID here, but we know our length can never exceed the bounds of
+    /// StateID (enforced by 'resize') and StateID will be at most 4 bytes
+    /// where as a usize is likely double that in most cases.
+    sparse: Vec<StateID>,
+}
+
+impl SparseSet {
+    /// Create a new sparse set with the given capacity.
+    ///
+    /// Sparse sets have a fixed size and they cannot grow. Attempting to
+    /// insert more distinct elements than the total capacity of the set will
+    /// result in a panic.
+    ///
+    /// This panics if the capacity given is bigger than `StateID::LIMIT`.
+    fn new(capacity: usize) -> SparseSet {
+        let mut set = SparseSet { len: 0, dense: vec![], sparse: vec![] };
+        set.resize(capacity);
+        set
+    }
+
+    /// Resizes this sparse set to have the new capacity given.
+    ///
+    /// This set is automatically cleared.
+    ///
+    /// This panics if the capacity given is bigger than `StateID::LIMIT`.
+    fn resize(&mut self, new_capacity: usize) {
+        assert!(
+            new_capacity <= u32::MAX.as_usize(),
+            "sparse set capacity cannot excced {:?}",
+            u32::MAX,
+        );
+        self.clear();
+        self.dense.resize(new_capacity, 0);
+        self.sparse.resize(new_capacity, 0);
+    }
+
+    /// Returns the capacity of this set.
+    ///
+    /// The capacity represents a fixed limit on the number of distinct
+    /// elements that are allowed in this set. The capacity cannot be changed.
+    fn capacity(&self) -> usize {
+        self.dense.len()
+    }
+
+    /// Returns the number of elements in this set.
+    fn len(&self) -> usize {
+        self.len
+    }
+
+    /// Returns true if and only if this set is empty.
+    fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Insert the state ID value into this set and return true if the given
+    /// state ID was not previously in this set.
+    ///
+    /// This operation is idempotent. If the given value is already in this
+    /// set, then this is a no-op.
+    ///
+    /// If more than `capacity` ids are inserted, then this panics.
+    ///
+    /// This is marked as inline(always) since the compiler won't inline it
+    /// otherwise, and it's a fairly hot piece of code in DFA determinization.
+    fn insert(&mut self, id: StateID) -> bool {
+        if self.contains(id) {
+            return false;
+        }
+
+        let index = self.len();
+        assert!(
+            index < self.capacity(),
+            "{:?} exceeds capacity of {:?} when inserting {:?}",
+            index,
+            self.capacity(),
+            id,
+        );
+        self.dense[index] = id;
+        // OK because we don't permit the capacity to be set higher than
+        // u32::MAX.
+        self.sparse[id.as_usize()] = u32::try_from(index).unwrap();
+        self.len += 1;
+        true
+    }
+
+    /// Returns true if and only if this set contains the given value.
+    fn contains(&self, id: StateID) -> bool {
+        let index = self.sparse[id.as_usize()];
+        index.as_usize() < self.len() && self.dense[index.as_usize()] == id
+    }
+
+    /// Clear this set such that it has no members.
+    fn clear(&mut self) {
+        self.len = 0;
+    }
+
+    /// Returns an iterator over all the state IDs in this set in the order in
+    /// which they were inserted.
+    fn iter(&self) -> SparseSetIter<'_> {
+        SparseSetIter(self.dense[..self.len()].iter())
+    }
+
+    /// Returns the heap memory usage, in bytes, used by this sparse set.
+    fn memory_usage(&self) -> usize {
+        let idsize = core::mem::size_of::<StateID>();
+        (self.dense.len() * idsize) + (self.sparse.len() * idsize)
+    }
+}
+
+impl core::fmt::Debug for SparseSet {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        let elements: Vec<StateID> = self.iter().collect();
+        f.debug_tuple("SparseSet").field(&elements).finish()
+    }
+}
+
+/// An iterator over all elements in a sparse set.
+///
+/// The lifetime `'a` refers to the lifetime of the set being iterated over.
+#[derive(Debug)]
+struct SparseSetIter<'a>(core::slice::Iter<'a, StateID>);
+
+impl<'a> Iterator for SparseSetIter<'a> {
+    type Item = StateID;
+
+    fn next(&mut self) -> Option<StateID> {
+        self.0.next().map(|&id| id)
+    }
+}
diff --git a/regex-lite/src/string.rs b/regex-lite/src/string.rs
new file mode 100644
index 000000000..e358db9be
--- /dev/null
+++ b/regex-lite/src/string.rs
@@ -0,0 +1,80 @@
+use core::cell::RefCell;
+
+use alloc::sync::Arc;
+
+use crate::{
+    error::Error,
+    hir::{self, Hir},
+    nfa::{self, NFA},
+    pikevm::{Cache, PikeVM},
+};
+
+#[derive(Clone, Debug)]
+pub struct Regex {
+    pikevm: Arc<PikeVM>,
+    // TODO: Replace with pool.
+    cache: RefCell<Cache>,
+}
+
+impl Regex {
+    pub fn new(pattern: &str) -> Result<Regex, Error> {
+        let hir = Hir::parse(hir::Config::default(), pattern)?;
+        let nfa = NFA::new(nfa::Config::default(), &hir)?;
+        std::dbg!(&nfa);
+        let pikevm = PikeVM::new(nfa);
+        let cache = Cache::new(&pikevm);
+        Ok(Regex { pikevm: Arc::new(pikevm), cache: RefCell::new(cache) })
+    }
+
+    pub fn is_match(&self, haystack: &str) -> bool {
+        let mut cache = self.cache.borrow_mut();
+        self.pikevm.search(
+            &mut cache,
+            haystack.as_bytes(),
+            0,
+            haystack.len(),
+            true,
+            &mut [],
+        )
+    }
+
+    pub fn find(&self, haystack: &str) -> Option<(usize, usize)> {
+        let mut cache = self.cache.borrow_mut();
+        let mut slots = [None, None];
+        let matched = self.pikevm.search(
+            &mut cache,
+            haystack.as_bytes(),
+            0,
+            haystack.len(),
+            false,
+            &mut slots,
+        );
+        if !matched {
+            return None;
+        }
+        Some((slots[0].unwrap().get(), slots[1].unwrap().get()))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn scratch() {
+        let re = Regex::new("abc").unwrap();
+        assert_eq!(Some((0, 3)), re.find("abc"));
+
+        let re = Regex::new("abc").unwrap();
+        assert_eq!(Some((4, 7)), re.find("foo abc"));
+
+        let re = Regex::new("^abc").unwrap();
+        assert_eq!(Some((0, 3)), re.find("abc"));
+
+        let re = Regex::new("^abc").unwrap();
+        assert_eq!(None, re.find("foo abc"));
+
+        let re = Regex::new("(?Rm)^foo$").unwrap();
+        assert_eq!(Some((2, 5)), re.find("\r\nfoo\r\n"));
+    }
+}
diff --git a/regex-lite/src/utf8.rs b/regex-lite/src/utf8.rs
new file mode 100644
index 000000000..cb361ac5a
--- /dev/null
+++ b/regex-lite/src/utf8.rs
@@ -0,0 +1,445 @@
+/// Returns true if and only if the given byte is considered a word character.
+/// This only applies to ASCII.
+pub(crate) fn is_word_byte(b: u8) -> bool {
+    const fn mkwordset() -> [bool; 256] {
+        // FIXME: Use as_usize() once const functions in traits are stable.
+        let mut set = [false; 256];
+        set[b'_' as usize] = true;
+
+        let mut byte = b'0';
+        while byte <= b'9' {
+            set[byte as usize] = true;
+            byte += 1;
+        }
+        byte = b'A';
+        while byte <= b'Z' {
+            set[byte as usize] = true;
+            byte += 1;
+        }
+        byte = b'a';
+        while byte <= b'z' {
+            set[byte as usize] = true;
+            byte += 1;
+        }
+        set
+    }
+    const WORD: [bool; 256] = mkwordset();
+    WORD[b as usize]
+}
+
+/// The accept state index. When we enter this state, we know we've found a
+/// valid Unicode scalar value.
+const ACCEPT: usize = 12;
+/// The reject state index. When we enter this state, we know that we've found
+/// invalid UTF-8.
+const REJECT: usize = 0;
+
+/// Like `decode`, but automatically converts the `None` case to the
+/// replacement codepoint.
+pub(crate) fn decode_lossy<B: AsRef<[u8]>>(slice: B) -> (char, usize) {
+    match decode(slice) {
+        (Some(ch), size) => (ch, size),
+        (None, size) => ('\u{FFFD}', size),
+    }
+}
+
+/// Like `decode_last`, but automatically converts the `None` case to the
+/// replacement codepoint.
+pub(crate) fn decode_last_lossy<B: AsRef<[u8]>>(slice: B) -> (char, usize) {
+    match decode_last(slice) {
+        (Some(ch), size) => (ch, size),
+        (None, size) => ('\u{FFFD}', size),
+    }
+}
+
+/// UTF-8 decode a single Unicode scalar value from the beginning of a slice.
+///
+/// When successful, the corresponding Unicode scalar value is returned along
+/// with the number of bytes it was encoded with. The number of bytes consumed
+/// for a successful decode is always between 1 and 4, inclusive.
+///
+/// When unsuccessful, `None` is returned along with the number of bytes that
+/// make up a maximal prefix of a valid UTF-8 code unit sequence. In this case,
+/// the number of bytes consumed is always between 0 and 3, inclusive, where
+/// 0 is only returned when `slice` is empty.
+pub(crate) fn decode<B: AsRef<[u8]>>(slice: B) -> (Option<char>, usize) {
+    let slice = slice.as_ref();
+    match slice.get(0) {
+        None => return (None, 0),
+        Some(&b) if b <= 0x7F => return (Some(b as char), 1),
+        _ => {}
+    }
+
+    let (mut state, mut cp, mut i) = (ACCEPT, 0, 0);
+    while i < slice.len() {
+        decode_step(&mut state, &mut cp, slice[i]);
+        i += 1;
+
+        if state == ACCEPT {
+            // OK since `decode_step` guarantees that `cp` is a valid Unicode
+            // scalar value in an ACCEPT state.
+            //
+            // We don't have to use safe code here, but do so because perf
+            // isn't our primary objective in regex-lite.
+            let ch = char::from_u32(cp).unwrap();
+            return (Some(ch), i);
+        } else if state == REJECT {
+            // At this point, we always want to advance at least one byte.
+            return (None, core::cmp::max(1, i.saturating_sub(1)));
+        }
+    }
+    (None, i)
+}
+
+/// Like `decode`, but in reverse from the end of the given slice.
+pub(crate) fn decode_last<B: AsRef<[u8]>>(slice: B) -> (Option<char>, usize) {
+    // TODO: We could implement this by reversing the UTF-8 automaton, but for
+    // now, we do it the slow way by using the forward automaton.
+
+    let slice = slice.as_ref();
+    if slice.is_empty() {
+        return (None, 0);
+    }
+    let mut start = slice.len() - 1;
+    let limit = slice.len().saturating_sub(4);
+    while start > limit && !is_leading_or_invalid_utf8_byte(slice[start]) {
+        start -= 1;
+    }
+    let (ch, size) = decode(&slice[start..]);
+    // If we didn't consume all of the bytes, then that means there's at least
+    // one stray byte that never occurs in a valid code unit prefix, so we can
+    // advance by one byte.
+    if start + size != slice.len() {
+        (None, 1)
+    } else {
+        (ch, size)
+    }
+}
+
+/// Transitions to the next state and updates `cp` while it does.
+fn decode_step(state: &mut usize, cp: &mut u32, b: u8) {
+    // Splits the space of all bytes into equivalence classes, such that
+    // any byte in the same class can never discriminate between whether a
+    // particular sequence is valid UTF-8 or not.
+    #[cfg_attr(rustfmt, rustfmt::skip)]
+    const CLASSES: [u8; 256] = [
+       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+       1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+       7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+       8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+      10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
+    ];
+
+    // A state machine taken from `bstr` which was in turn adapted from:
+    // https://bjoern.hoehrmann.de/utf-8/decoder/dfa/
+    #[cfg_attr(rustfmt, rustfmt::skip)]
+    const STATES_FORWARD: &'static [u8] = &[
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      12, 0, 24, 36, 60, 96, 84, 0, 0, 0, 48, 72,
+      0, 12, 0, 0, 0, 0, 0, 12, 0, 12, 0, 0,
+      0, 24, 0, 0, 0, 0, 0, 24, 0, 24, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 24, 0, 0, 0, 0,
+      0, 24, 0, 0, 0, 0, 0, 0, 0, 24, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 36, 0, 36, 0, 0,
+      0, 36, 0, 0, 0, 0, 0, 36, 0, 36, 0, 0,
+      0, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    ];
+
+    let class = CLASSES[b as usize];
+    if *state == ACCEPT {
+        *cp = (0xFF >> class) & (b as u32);
+    } else {
+        *cp = (b as u32 & 0b111111) | (*cp << 6);
+    }
+    *state = STATES_FORWARD[*state + class as usize] as usize;
+}
+
+/// Returns true if and only if the given byte is either a valid leading UTF-8
+/// byte, or is otherwise an invalid byte that can never appear anywhere in a
+/// valid UTF-8 sequence.
+fn is_leading_or_invalid_utf8_byte(b: u8) -> bool {
+    // In the ASCII case, the most significant bit is never set. The leading
+    // byte of a 2/3/4-byte sequence always has the top two most significant
+    // bits set. For bytes that can never appear anywhere in valid UTF-8, this
+    // also returns true, since every such byte has its two most significant
+    // bits set:
+    //
+    //     \xC0 :: 11000000
+    //     \xC1 :: 11000001
+    //     \xF5 :: 11110101
+    //     \xF6 :: 11110110
+    //     \xF7 :: 11110111
+    //     \xF8 :: 11111000
+    //     \xF9 :: 11111001
+    //     \xFA :: 11111010
+    //     \xFB :: 11111011
+    //     \xFC :: 11111100
+    //     \xFD :: 11111101
+    //     \xFE :: 11111110
+    //     \xFF :: 11111111
+    (b & 0b1100_0000) != 0b1000_0000
+}
+
+#[cfg(test)]
+mod tests {
+    use alloc::{vec, vec::Vec};
+
+    use super::*;
+
+    #[test]
+    fn decode_valid() {
+        fn d(mut s: &str) -> Vec<char> {
+            let mut chars = vec![];
+            while !s.is_empty() {
+                let (ch, size) = decode(s.as_bytes());
+                s = &s[size..];
+                chars.push(ch.unwrap());
+            }
+            chars
+        }
+
+        assert_eq!(vec!['☃'], d("☃"));
+        assert_eq!(vec!['☃', '☃'], d("☃☃"));
+        assert_eq!(vec!['α', 'β', 'γ', 'δ', 'ε'], d("αβγδε"));
+        assert_eq!(vec!['☃', '⛄', '⛇'], d("☃⛄⛇"));
+        assert_eq!(vec!['𝗮', '𝗯', '𝗰', '𝗱', '𝗲'], d("𝗮𝗯𝗰𝗱𝗲"));
+    }
+
+    #[test]
+    fn decode_invalid() {
+        let (ch, size) = decode(b"");
+        assert_eq!(None, ch);
+        assert_eq!(0, size);
+
+        let (ch, size) = decode(b"\xFF");
+        assert_eq!(None, ch);
+        assert_eq!(1, size);
+
+        let (ch, size) = decode(b"\xCE\xF0");
+        assert_eq!(None, ch);
+        assert_eq!(1, size);
+
+        let (ch, size) = decode(b"\xE2\x98\xF0");
+        assert_eq!(None, ch);
+        assert_eq!(2, size);
+
+        let (ch, size) = decode(b"\xF0\x9D\x9D");
+        assert_eq!(None, ch);
+        assert_eq!(3, size);
+
+        let (ch, size) = decode(b"\xF0\x9D\x9D\xF0");
+        assert_eq!(None, ch);
+        assert_eq!(3, size);
+
+        let (ch, size) = decode(b"\xF0\x82\x82\xAC");
+        assert_eq!(None, ch);
+        assert_eq!(1, size);
+
+        let (ch, size) = decode(b"\xED\xA0\x80");
+        assert_eq!(None, ch);
+        assert_eq!(1, size);
+
+        let (ch, size) = decode(b"\xCEa");
+        assert_eq!(None, ch);
+        assert_eq!(1, size);
+
+        let (ch, size) = decode(b"\xE2\x98a");
+        assert_eq!(None, ch);
+        assert_eq!(2, size);
+
+        let (ch, size) = decode(b"\xF0\x9D\x9Ca");
+        assert_eq!(None, ch);
+        assert_eq!(3, size);
+    }
+
+    #[test]
+    fn decode_lossily() {
+        let (ch, size) = decode_lossy(b"");
+        assert_eq!('\u{FFFD}', ch);
+        assert_eq!(0, size);
+
+        let (ch, size) = decode_lossy(b"\xFF");
+        assert_eq!('\u{FFFD}', ch);
+        assert_eq!(1, size);
+
+        let (ch, size) = decode_lossy(b"\xCE\xF0");
+        assert_eq!('\u{FFFD}', ch);
+        assert_eq!(1, size);
+
+        let (ch, size) = decode_lossy(b"\xE2\x98\xF0");
+        assert_eq!('\u{FFFD}', ch);
+        assert_eq!(2, size);
+
+        let (ch, size) = decode_lossy(b"\xF0\x9D\x9D\xF0");
+        assert_eq!('\u{FFFD}', ch);
+        assert_eq!(3, size);
+
+        let (ch, size) = decode_lossy(b"\xF0\x82\x82\xAC");
+        assert_eq!('\u{FFFD}', ch);
+        assert_eq!(1, size);
+
+        let (ch, size) = decode_lossy(b"\xED\xA0\x80");
+        assert_eq!('\u{FFFD}', ch);
+        assert_eq!(1, size);
+
+        let (ch, size) = decode_lossy(b"\xCEa");
+        assert_eq!('\u{FFFD}', ch);
+        assert_eq!(1, size);
+
+        let (ch, size) = decode_lossy(b"\xE2\x98a");
+        assert_eq!('\u{FFFD}', ch);
+        assert_eq!(2, size);
+
+        let (ch, size) = decode_lossy(b"\xF0\x9D\x9Ca");
+        assert_eq!('\u{FFFD}', ch);
+        assert_eq!(3, size);
+    }
+
+    #[test]
+    fn decode_last_valid() {
+        fn d(mut s: &str) -> Vec<char> {
+            let mut chars = vec![];
+            while !s.is_empty() {
+                let (ch, size) = decode_last(s.as_bytes());
+                s = &s[..s.len() - size];
+                chars.push(ch.unwrap());
+            }
+            chars
+        }
+
+        assert_eq!(vec!['☃'], d("☃"));
+        assert_eq!(vec!['☃', '☃'], d("☃☃"));
+        assert_eq!(vec!['ε', 'δ', 'γ', 'β', 'α'], d("αβγδε"));
+        assert_eq!(vec!['⛇', '⛄', '☃'], d("☃⛄⛇"));
+        assert_eq!(vec!['𝗲', '𝗱', '𝗰', '𝗯', '𝗮'], d("𝗮𝗯𝗰𝗱𝗲"));
+    }
+
+    #[test]
+    fn decode_last_invalid() {
+        let (ch, size) = decode_last(b"");
+        assert_eq!(None, ch);
+        assert_eq!(0, size);
+
+        let (ch, size) = decode_last(b"\xFF");
+        assert_eq!(None, ch);
+        assert_eq!(1, size);
+
+        let (ch, size) = decode_last(b"\xCE\xF0");
+        assert_eq!(None, ch);
+        assert_eq!(1, size);
+
+        let (ch, size) = decode_last(b"\xCE");
+        assert_eq!(None, ch);
+        assert_eq!(1, size);
+
+        let (ch, size) = decode_last(b"\xE2\x98\xF0");
+        assert_eq!(None, ch);
+        assert_eq!(1, size);
+
+        let (ch, size) = decode_last(b"\xE2\x98");
+        assert_eq!(None, ch);
+        assert_eq!(2, size);
+
+        let (ch, size) = decode_last(b"\xF0\x9D\x9D\xF0");
+        assert_eq!(None, ch);
+        assert_eq!(1, size);
+
+        let (ch, size) = decode_last(b"\xF0\x9D\x9D");
+        assert_eq!(None, ch);
+        assert_eq!(3, size);
+
+        let (ch, size) = decode_last(b"\xF0\x82\x82\xAC");
+        assert_eq!(None, ch);
+        assert_eq!(1, size);
+
+        let (ch, size) = decode_last(b"\xED\xA0\x80");
+        assert_eq!(None, ch);
+        assert_eq!(1, size);
+
+        let (ch, size) = decode_last(b"\xED\xA0");
+        assert_eq!(None, ch);
+        assert_eq!(1, size);
+
+        let (ch, size) = decode_last(b"\xED");
+        assert_eq!(None, ch);
+        assert_eq!(1, size);
+
+        let (ch, size) = decode_last(b"a\xCE");
+        assert_eq!(None, ch);
+        assert_eq!(1, size);
+
+        let (ch, size) = decode_last(b"a\xE2\x98");
+        assert_eq!(None, ch);
+        assert_eq!(2, size);
+
+        let (ch, size) = decode_last(b"a\xF0\x9D\x9C");
+        assert_eq!(None, ch);
+        assert_eq!(3, size);
+    }
+
+    #[test]
+    fn decode_last_lossily() {
+        let (ch, size) = decode_last_lossy(b"");
+        assert_eq!('\u{FFFD}', ch);
+        assert_eq!(0, size);
+
+        let (ch, size) = decode_last_lossy(b"\xFF");
+        assert_eq!('\u{FFFD}', ch);
+        assert_eq!(1, size);
+
+        let (ch, size) = decode_last_lossy(b"\xCE\xF0");
+        assert_eq!('\u{FFFD}', ch);
+        assert_eq!(1, size);
+
+        let (ch, size) = decode_last_lossy(b"\xCE");
+        assert_eq!('\u{FFFD}', ch);
+        assert_eq!(1, size);
+
+        let (ch, size) = decode_last_lossy(b"\xE2\x98\xF0");
+        assert_eq!('\u{FFFD}', ch);
+        assert_eq!(1, size);
+
+        let (ch, size) = decode_last_lossy(b"\xE2\x98");
+        assert_eq!('\u{FFFD}', ch);
+        assert_eq!(2, size);
+
+        let (ch, size) = decode_last_lossy(b"\xF0\x9D\x9D\xF0");
+        assert_eq!('\u{FFFD}', ch);
+        assert_eq!(1, size);
+
+        let (ch, size) = decode_last_lossy(b"\xF0\x9D\x9D");
+        assert_eq!('\u{FFFD}', ch);
+        assert_eq!(3, size);
+
+        let (ch, size) = decode_last_lossy(b"\xF0\x82\x82\xAC");
+        assert_eq!('\u{FFFD}', ch);
+        assert_eq!(1, size);
+
+        let (ch, size) = decode_last_lossy(b"\xED\xA0\x80");
+        assert_eq!('\u{FFFD}', ch);
+        assert_eq!(1, size);
+
+        let (ch, size) = decode_last_lossy(b"\xED\xA0");
+        assert_eq!('\u{FFFD}', ch);
+        assert_eq!(1, size);
+
+        let (ch, size) = decode_last_lossy(b"\xED");
+        assert_eq!('\u{FFFD}', ch);
+        assert_eq!(1, size);
+
+        let (ch, size) = decode_last_lossy(b"a\xCE");
+        assert_eq!('\u{FFFD}', ch);
+        assert_eq!(1, size);
+
+        let (ch, size) = decode_last_lossy(b"a\xE2\x98");
+        assert_eq!('\u{FFFD}', ch);
+        assert_eq!(2, size);
+
+        let (ch, size) = decode_last_lossy(b"a\xF0\x9D\x9C");
+        assert_eq!('\u{FFFD}', ch);
+        assert_eq!(3, size);
+    }
+}
diff --git a/regex-lite/src/util.rs b/regex-lite/src/util.rs
new file mode 100644
index 000000000..f186b7a95
--- /dev/null
+++ b/regex-lite/src/util.rs
@@ -0,0 +1,108 @@
+/// Returns true if the given character has significance in a regex.
+///
+/// Generally speaking, these are the only characters which _must_ be escaped
+/// in order to match their literal meaning. For example, to match a literal
+/// `|`, one could write `\|`. Sometimes escaping isn't always necessary. For
+/// example, `-` is treated as a meta character because of its significance
+/// for writing ranges inside of character classes, but the regex `-` will
+/// match a literal `-` because `-` has no special meaning outside of character
+/// classes.
+///
+/// In order to determine whether a character may be escaped at all, the
+/// [`is_escapeable_character`] routine should be used. The difference between
+/// `is_meta_character` and `is_escapeable_character` is that the latter will
+/// return true for some characters that are _not_ meta characters. For
+/// example, `%` and `\%` both match a literal `%` in all contexts. In other
+/// words, `is_escapeable_character` includes "superfluous" escapes.
+///
+/// Note that the set of characters for which this function returns `true` or
+/// `false` is fixed and won't change in a semver compatible release. (In this
+/// case, "semver compatible release" actually refers to the `regex` crate
+/// itself, since reducing or expanding the set of meta characters would be a
+/// breaking change for not just `regex-syntax` but also `regex` itself.)
+///
+/// # Example
+///
+/// ```
+/// use regex_lite::is_meta_character;
+///
+/// assert!(is_meta_character('?'));
+/// assert!(is_meta_character('-'));
+/// assert!(is_meta_character('&'));
+/// assert!(is_meta_character('#'));
+///
+/// assert!(!is_meta_character('%'));
+/// assert!(!is_meta_character('/'));
+/// assert!(!is_meta_character('!'));
+/// assert!(!is_meta_character('"'));
+/// assert!(!is_meta_character('e'));
+/// ```
+pub fn is_meta_character(c: char) -> bool {
+    match c {
+        '\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' | '[' | ']' | '{'
+        | '}' | '^' | '$' | '#' | '&' | '-' | '~' => true,
+        _ => false,
+    }
+}
+
+/// Returns true if the given character can be escaped in a regex.
+///
+/// This returns true in all cases that `is_meta_character` returns true, but
+/// also returns true in some cases where `is_meta_character` returns false.
+/// For example, `%` is not a meta character, but it is escapeable. That is,
+/// `%` and `\%` both match a literal `%` in all contexts.
+///
+/// The purpose of this routine is to provide knowledge about what characters
+/// may be escaped. Namely, most regex engines permit "superfluous" escapes
+/// where characters without any special significance may be escaped even
+/// though there is no actual _need_ to do so.
+///
+/// This will return false for some characters. For example, `e` is not
+/// escapeable. Therefore, `\e` will either result in a parse error (which is
+/// true today), or it could backwards compatibly evolve into a new construct
+/// with its own meaning. Indeed, that is the purpose of banning _some_
+/// superfluous escapes: it provides a way to evolve the syntax in a compatible
+/// manner.
+///
+/// # Example
+///
+/// ```
+/// use regex_lite::is_escapeable_character;
+///
+/// assert!(is_escapeable_character('?'));
+/// assert!(is_escapeable_character('-'));
+/// assert!(is_escapeable_character('&'));
+/// assert!(is_escapeable_character('#'));
+/// assert!(is_escapeable_character('%'));
+/// assert!(is_escapeable_character('/'));
+/// assert!(is_escapeable_character('!'));
+/// assert!(is_escapeable_character('"'));
+///
+/// assert!(!is_escapeable_character('e'));
+/// ```
+pub fn is_escapeable_character(c: char) -> bool {
+    // Certainly escapeable if it's a meta character.
+    if is_meta_character(c) {
+        return true;
+    }
+    // Any character that isn't ASCII is definitely not escapeable. There's
+    // no real need to allow things like \☃ right?
+    if !c.is_ascii() {
+        return false;
+    }
+    // Otherwise, we basically say that everything is escapeable unless it's a
+    // letter or digit. Things like \3 are either octal (when enabled) or an
+    // error, and we should keep it that way. Otherwise, letters are reserved
+    // for adding new syntax in a backwards compatible way.
+    match c {
+        '0'..='9' | 'A'..='Z' | 'a'..='z' => false,
+        // While not currently supported, we keep these as not escapeable to
+        // give us some flexibility with respect to supporting the \< and
+        // \> word boundary assertions in the future. By rejecting them as
+        // escapeable, \< and \> will result in a parse error. Thus, we can
+        // turn them into something else in the future without it being a
+        // backwards incompatible change.
+        '<' | '>' => false,
+        _ => true,
+    }
+}