diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 5171d79363..bacb4b0873 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -52,7 +52,7 @@ jobs:
         include:
         - build: pinned
           os: ubuntu-latest
-          rust: 1.41.1
+          rust: 1.60.0
         - build: stable
           os: ubuntu-latest
           rust: stable
@@ -159,11 +159,6 @@ jobs:
         cd regex-capi
         ./test
 
-    - if: matrix.build == 'nightly'
-      name: Compile regex-debug
-      run: |
-        ${{ env.CARGO }} build --verbose --manifest-path regex-debug/Cargo.toml $TARGET
-
     - if: matrix.build == 'nightly'
       name: Run benchmarks as tests
       run: |
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 44274acac5..a07999edee 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,120 @@
+1.8.0 (TBD)
+===========
+This is a sizeable release that will be soon followed by another sizeable
+release. Both of them will combined close over 40 existing issues and PRs.
+
+This first release, despite its size, essentially represent preparatory work
+for the second release, which will be even bigger. Namely, this release:
+
+* Increases the MSRV to Rust 1.60.0, which was released about 1 year ago.
+* Upgrades its dependency on `aho-corasick` to the recently release 1.0
+version.
+* Upgrades its dependency on `regex-syntax` to the simultaneously released
+`0.7` version. The changes to `regex-syntax` principally revolve around a
+rewrite of its literal extraction code and a number of simplifications and
+optimizations to its high-level intermediate representation (HIR).
+
+The second release, which will follow ~shortly after the release above, will
+contain a soup-to-nuts rewrite of every regex engine. This will be done by
+bringing [`regex-automata`](https://github.com/BurntSushi/regex-automata) into
+this repository, and then changing the `regex` crate to be nothing but an API
+shim layer on top of `regex-automata`'s API.
+
+These tandem releases are the culmination of about 3
+years of on-and-off work that [began in earnest in March
+2020](https://github.com/rust-lang/regex/issues/656).
+
+Because of the scale of changes involved in these releases, I would love to
+hear about your experience. Especially if you notice undocumented changes in
+behavior or performance changes (positive *or* negative).
+
+Most changes in the first release are listed below. For more details, please
+see the commit log, which reflects a linear and decently documented history
+of all changes.
+
+New features:
+
+* [FEATURE #501](https://github.com/rust-lang/regex/issues/501):
+Permit many more characters to be escaped, even if they have no significance.
+More specifically, any character except for `[0-9A-Za-z<>]` can now be
+escaped. Also, a new routine, `is_escapeable_character`, has been added to
+`regex-syntax` to query whether a character is escapeable or not.
+* [FEATURE #547](https://github.com/rust-lang/regex/issues/547):
+Add `Regex::captures_at`. This filles a hole in the API, but doesn't otherwise
+introduce any new expressive power.
+* [FEATURE #595](https://github.com/rust-lang/regex/issues/595):
+Capture group names are now Unicode-aware. They can now begin with either a `_`
+or any "alphabetic" codepoint. After the first codepoint, subsequent codepoints
+can be any sequence of alpha-numeric codepoints, along with `_`, `.`, `[` and
+`]`. Note that replacement syntax has not changed.
+* [FEATURE #810](https://github.com/rust-lang/regex/issues/810):
+Add `Match::is_empty` and `Match::len` APIs.
+* [FEATURE #905](https://github.com/rust-lang/regex/issues/905):
+Add an `impl Default for RegexSet`, with the default being the empty set.
+* [FEATURE #908](https://github.com/rust-lang/regex/issues/908):
+A new method, `Regex::static_captures_len`, has been added which returns the
+number of capture groups in the pattern if and only if every possible match
+always contains the same number of matching groups.
+* [FEATURE #955](https://github.com/rust-lang/regex/issues/955):
+Named captures can now be written as `(?<name>re)` in addition to
+`(?P<name>re)`.
+* FEATURE: `regex-syntax` now supports empty character classes.
+* FEATURE: `regex-syntax` now has an optional `std` feature. (This will come
+to `regex` in the second release.)
+* FEATURE: The `Hir` type in `regex-syntax` has had a number of simplifications
+made to it.
+* FEATURE: `regex-syntax` has support for a new `R` flag for enabling CRLF
+mode. This will be supported in `regex` proper in the second release.
+* FEATURE: `regex-syntax` now has proper support for "regex that never
+matches" via `Hir::fail()`.
+* FEATURE: The `hir::literal` module of `regex-syntax` has been completely
+re-worked. It now has more documentation, examples and advice.
+* FEATURE: The `allow_invalid_utf8` option in `regex-syntax` has been renamed
+to `utf8`, and the meaning of the boolean has been flipped.
+
+Performance improvements:
+
+Bug fixes:
+
+* [BUG #514](https://github.com/rust-lang/regex/issues/514):
+Improve `Debug` impl for `Match` so that it doesn't show the entire haystack.
+* BUGS [#516](https://github.com/rust-lang/regex/issues/516),
+[#731](https://github.com/rust-lang/regex/issues/731):
+Fix a number of issues with printing `Hir` values as regex patterns.
+* [BUG #610](https://github.com/rust-lang/regex/issues/610):
+Add explicit example of `foo|bar` in the regex syntax docs.
+* [BUG #625](https://github.com/rust-lang/regex/issues/625):
+Clarify that `SetMatches::len` does not (regretably) refer to the number of
+matches in the set.
+* [BUG #660](https://github.com/rust-lang/regex/issues/660):
+Clarify "verbose mode" in regex syntax documentation.
+* BUG [#738](https://github.com/rust-lang/regex/issues/738),
+[#950](https://github.com/rust-lang/regex/issues/950):
+Fix `CaptureLocations::get` so that it never panics.
+* [BUG #747](https://github.com/rust-lang/regex/issues/747):
+Clarify documentation for `Regex::shortest_match`.
+* [BUG #835](https://github.com/rust-lang/regex/issues/835):
+Fix `\p{Sc}` so that it is equivalent to `\p{Currency_Symbol}`.
+* [BUG #846](https://github.com/rust-lang/regex/issues/846):
+Add more clarifying documentation to the `CompiledTooBig` error variant.
+* [BUG #854](https://github.com/rust-lang/regex/issues/854):
+Clarify that `regex::Regex` searches as if the haystack is a sequence of
+Unicode scalar values.
+* [BUG #884](https://github.com/rust-lang/regex/issues/884):
+Replace `__Nonexhaustive` variants with `#[non_exhaustive]` attribute.
+* [BUG #893](https://github.com/rust-lang/regex/pull/893):
+Optimize case folding since it can get quite slow in some pathological cases.
+* [BUG #895](https://github.com/rust-lang/regex/issues/895):
+Reject `(?-u:\W)` in `regex::Regex` APIs.
+* [BUG #942](https://github.com/rust-lang/regex/issues/942):
+Add a missing `void` keyword to indicate "no parameters" in C API.
+* [BUG #965](https://github.com/rust-lang/regex/issues/965):
+Fix `\p{Lc}` so that it is equivalent to `\p{Cased_Letter}`.
+* [BUG #975](https://github.com/rust-lang/regex/issues/975):
+Clarify documentation for `\pX` syntax.
+
+
+
 1.7.3 (2023-03-24)
 ==================
 This is a small release that fixes a bug in `Regex::shortest_match_at` that
@@ -743,7 +860,7 @@ Bug gixes:
 ==================
 This release includes a ground-up rewrite of the regex-syntax crate, which has
 been in development for over a year.
-
+731
 New features:
 
 * Error messages for invalid regexes have been greatly improved. You get these
diff --git a/Cargo.toml b/Cargo.toml
index 4c5bd1cc11..f4c70aa1a0 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,11 +14,12 @@ finite automata and guarantees linear time matching on all inputs.
 categories = ["text-processing"]
 autotests = false
 exclude = ["/scripts/*", "/.github/*"]
-edition = "2018"
+edition = "2021"
+rust-version = "1.60.0"
 
 [workspace]
 members = [
-  "bench", "regex-capi", "regex-debug", "regex-syntax",
+  "bench", "regex-capi", "regex-syntax",
 ]
 
 [lib]
@@ -106,7 +107,7 @@ pattern = []
 
 # For very fast prefix literal matching.
 [dependencies.aho-corasick]
-version = "0.7.18"
+version = "1.0.0"
 optional = true
 
 # For skipping along search text quickly when a leading byte is known.
diff --git a/bench/log/10-last-frontier/rust-after-literal.log b/bench/log/10-last-frontier/rust-after-literal.log
new file mode 100644
index 0000000000..c45b55cac7
--- /dev/null
+++ b/bench/log/10-last-frontier/rust-after-literal.log
@@ -0,0 +1,124 @@
+
+running 119 tests
+test misc::anchored_literal_long_match       ... bench:          18 ns/iter (+/- 0) = 21666 MB/s
+test misc::anchored_literal_long_non_match   ... bench:          20 ns/iter (+/- 0) = 19500 MB/s
+test misc::anchored_literal_short_match      ... bench:          18 ns/iter (+/- 0) = 1444 MB/s
+test misc::anchored_literal_short_non_match  ... bench:          20 ns/iter (+/- 0) = 1300 MB/s
+test misc::easy0_1K                          ... bench:          51 ns/iter (+/- 2) = 20607 MB/s
+test misc::easy0_1MB                         ... bench:          56 ns/iter (+/- 1) = 18725053 MB/s
+test misc::easy0_32                          ... bench:          51 ns/iter (+/- 0) = 1156 MB/s
+test misc::easy0_32K                         ... bench:          53 ns/iter (+/- 1) = 618773 MB/s
+test misc::easy1_1K                          ... bench:          41 ns/iter (+/- 0) = 25463 MB/s
+test misc::easy1_1MB                         ... bench:          44 ns/iter (+/- 1) = 23831727 MB/s
+test misc::easy1_32                          ... bench:          40 ns/iter (+/- 1) = 1300 MB/s
+test misc::easy1_32K                         ... bench:          40 ns/iter (+/- 1) = 819700 MB/s
+test misc::hard_1K                           ... bench:          51 ns/iter (+/- 2) = 20607 MB/s
+test misc::hard_1MB                          ... bench:          56 ns/iter (+/- 1) = 18725053 MB/s
+test misc::hard_32                           ... bench:          51 ns/iter (+/- 2) = 1156 MB/s
+test misc::hard_32K                          ... bench:          51 ns/iter (+/- 1) = 643039 MB/s
+test misc::is_match_set                      ... bench:          61 ns/iter (+/- 2) = 409 MB/s
+test misc::literal                           ... bench:          13 ns/iter (+/- 0) = 3923 MB/s
+test misc::long_needle1                      ... bench:       3,242 ns/iter (+/- 79) = 30845 MB/s
+test misc::long_needle2                      ... bench:     350,572 ns/iter (+/- 6,860) = 285 MB/s
+test misc::match_class                       ... bench:          62 ns/iter (+/- 6) = 1306 MB/s
+test misc::match_class_in_range              ... bench:          14 ns/iter (+/- 0) = 5785 MB/s
+test misc::match_class_unicode               ... bench:         259 ns/iter (+/- 15) = 621 MB/s
+test misc::matches_set                       ... bench:         462 ns/iter (+/- 9) = 54 MB/s
+test misc::medium_1K                         ... bench:          53 ns/iter (+/- 0) = 19849 MB/s
+test misc::medium_1MB                        ... bench:          58 ns/iter (+/- 1) = 18079379 MB/s
+test misc::medium_32                         ... bench:          53 ns/iter (+/- 1) = 1132 MB/s
+test misc::medium_32K                        ... bench:          53 ns/iter (+/- 1) = 618792 MB/s
+test misc::no_exponential                    ... bench:         423 ns/iter (+/- 13) = 236 MB/s
+test misc::not_literal                       ... bench:          89 ns/iter (+/- 0) = 573 MB/s
+test misc::one_pass_long_prefix              ... bench:          52 ns/iter (+/- 0) = 500 MB/s
+test misc::one_pass_long_prefix_not          ... bench:          52 ns/iter (+/- 1) = 500 MB/s
+test misc::one_pass_short                    ... bench:          38 ns/iter (+/- 1) = 447 MB/s
+test misc::one_pass_short_not                ... bench:          41 ns/iter (+/- 1) = 414 MB/s
+test misc::reallyhard2_1K                    ... bench:          81 ns/iter (+/- 1) = 12839 MB/s
+test misc::reallyhard_1K                     ... bench:       1,592 ns/iter (+/- 1) = 660 MB/s
+test misc::reallyhard_1MB                    ... bench:   1,575,822 ns/iter (+/- 39,203) = 665 MB/s
+test misc::reallyhard_32                     ... bench:         102 ns/iter (+/- 0) = 578 MB/s
+test misc::reallyhard_32K                    ... bench:      49,328 ns/iter (+/- 2,598) = 664 MB/s
+test misc::replace_all                       ... bench:         132 ns/iter (+/- 3)
+test misc::reverse_suffix_no_quadratic       ... bench:       4,171 ns/iter (+/- 134) = 1918 MB/s
+test misc::short_haystack_1000000x           ... bench:     132,251 ns/iter (+/- 729) = 60491 MB/s
+test misc::short_haystack_100000x            ... bench:      13,184 ns/iter (+/- 408) = 60680 MB/s
+test misc::short_haystack_10000x             ... bench:       6,036 ns/iter (+/- 167) = 13255 MB/s
+test misc::short_haystack_1000x              ... bench:         602 ns/iter (+/- 14) = 13307 MB/s
+test misc::short_haystack_100x               ... bench:         230 ns/iter (+/- 7) = 3526 MB/s
+test misc::short_haystack_10x                ... bench:         218 ns/iter (+/- 3) = 417 MB/s
+test misc::short_haystack_1x                 ... bench:         210 ns/iter (+/- 8) = 90 MB/s
+test misc::short_haystack_2x                 ... bench:         225 ns/iter (+/- 6) = 120 MB/s
+test misc::short_haystack_3x                 ... bench:         211 ns/iter (+/- 8) = 165 MB/s
+test misc::short_haystack_4x                 ... bench:         212 ns/iter (+/- 6) = 202 MB/s
+test regexdna::find_new_lines                ... bench:  12,245,066 ns/iter (+/- 117,141) = 415 MB/s
+test regexdna::subst1                        ... bench:     786,357 ns/iter (+/- 14,200) = 6464 MB/s
+test regexdna::subst10                       ... bench:     788,550 ns/iter (+/- 26,456) = 6446 MB/s
+test regexdna::subst11                       ... bench:     782,161 ns/iter (+/- 15,583) = 6499 MB/s
+test regexdna::subst2                        ... bench:     784,902 ns/iter (+/- 23,379) = 6476 MB/s
+test regexdna::subst3                        ... bench:     786,640 ns/iter (+/- 27,063) = 6462 MB/s
+test regexdna::subst4                        ... bench:     785,591 ns/iter (+/- 20,498) = 6470 MB/s
+test regexdna::subst5                        ... bench:     787,447 ns/iter (+/- 20,892) = 6455 MB/s
+test regexdna::subst6                        ... bench:     784,994 ns/iter (+/- 19,687) = 6475 MB/s
+test regexdna::subst7                        ... bench:     801,921 ns/iter (+/- 15,391) = 6339 MB/s
+test regexdna::subst8                        ... bench:     785,541 ns/iter (+/- 11,908) = 6471 MB/s
+test regexdna::subst9                        ... bench:     785,848 ns/iter (+/- 28,020) = 6468 MB/s
+test regexdna::variant1                      ... bench:   2,195,058 ns/iter (+/- 44,066) = 2315 MB/s
+test regexdna::variant2                      ... bench:   3,219,968 ns/iter (+/- 59,372) = 1578 MB/s
+test regexdna::variant3                      ... bench:   3,776,467 ns/iter (+/- 54,326) = 1346 MB/s
+test regexdna::variant4                      ... bench:   3,803,674 ns/iter (+/- 95,281) = 1336 MB/s
+test regexdna::variant5                      ... bench:   2,661,333 ns/iter (+/- 46,408) = 1910 MB/s
+test regexdna::variant6                      ... bench:   2,645,716 ns/iter (+/- 38,659) = 1921 MB/s
+test regexdna::variant7                      ... bench:   3,228,352 ns/iter (+/- 69,155) = 1574 MB/s
+test regexdna::variant8                      ... bench:   3,305,563 ns/iter (+/- 59,321) = 1537 MB/s
+test regexdna::variant9                      ... bench:   3,225,039 ns/iter (+/- 49,720) = 1576 MB/s
+test rust_compile::compile_huge              ... bench:     100,381 ns/iter (+/- 2,052)
+test rust_compile::compile_huge_bytes        ... bench:   5,899,989 ns/iter (+/- 114,363)
+test rust_compile::compile_huge_full         ... bench:  11,650,995 ns/iter (+/- 172,285)
+test rust_compile::compile_simple            ... bench:       4,082 ns/iter (+/- 88)
+test rust_compile::compile_simple_bytes      ... bench:       4,153 ns/iter (+/- 120)
+test rust_compile::compile_simple_full       ... bench:      20,414 ns/iter (+/- 1,860)
+test rust_compile::compile_small             ... bench:       9,114 ns/iter (+/- 216)
+test rust_compile::compile_small_bytes       ... bench:     183,049 ns/iter (+/- 9,917)
+test rust_compile::compile_small_full        ... bench:     361,291 ns/iter (+/- 11,045)
+test sherlock::before_after_holmes           ... bench:     907,103 ns/iter (+/- 12,165) = 655 MB/s
+test sherlock::before_holmes                 ... bench:      62,501 ns/iter (+/- 1,880) = 9518 MB/s
+test sherlock::everything_greedy             ... bench:   2,062,116 ns/iter (+/- 41,900) = 288 MB/s
+test sherlock::everything_greedy_nl          ... bench:     894,529 ns/iter (+/- 38,723) = 665 MB/s
+test sherlock::holmes_cochar_watson          ... bench:     103,305 ns/iter (+/- 3,798) = 5758 MB/s
+test sherlock::holmes_coword_watson          ... bench:     479,423 ns/iter (+/- 13,924) = 1240 MB/s
+test sherlock::ing_suffix                    ... bench:     318,300 ns/iter (+/- 6,846) = 1869 MB/s
+test sherlock::ing_suffix_limited_space      ... bench:   1,066,300 ns/iter (+/- 19,375) = 557 MB/s
+test sherlock::letters                       ... bench:  21,777,358 ns/iter (+/- 230,478) = 27 MB/s
+test sherlock::letters_lower                 ... bench:  21,152,019 ns/iter (+/- 203,617) = 28 MB/s
+test sherlock::letters_upper                 ... bench:   1,777,626 ns/iter (+/- 26,243) = 334 MB/s
+test sherlock::line_boundary_sherlock_holmes ... bench:     897,509 ns/iter (+/- 24,983) = 662 MB/s
+test sherlock::name_alt1                     ... bench:      32,255 ns/iter (+/- 681) = 18444 MB/s
+test sherlock::name_alt2                     ... bench:      86,369 ns/iter (+/- 2,494) = 6888 MB/s
+test sherlock::name_alt3                     ... bench:      97,618 ns/iter (+/- 564) = 6094 MB/s
+test sherlock::name_alt3_nocase              ... bench:     944,848 ns/iter (+/- 31,039) = 629 MB/s
+test sherlock::name_alt4                     ... bench:     122,029 ns/iter (+/- 2,716) = 4875 MB/s
+test sherlock::name_alt4_nocase              ... bench:     225,544 ns/iter (+/- 5,783) = 2637 MB/s
+test sherlock::name_alt5                     ... bench:      91,897 ns/iter (+/- 3,796) = 6473 MB/s
+test sherlock::name_alt5_nocase              ... bench:     936,420 ns/iter (+/- 15,092) = 635 MB/s
+test sherlock::name_holmes                   ... bench:      33,448 ns/iter (+/- 959) = 17786 MB/s
+test sherlock::name_holmes_nocase            ... bench:     115,864 ns/iter (+/- 1,645) = 5134 MB/s
+test sherlock::name_sherlock                 ... bench:      22,474 ns/iter (+/- 674) = 26472 MB/s
+test sherlock::name_sherlock_holmes          ... bench:      22,184 ns/iter (+/- 497) = 26818 MB/s
+test sherlock::name_sherlock_holmes_nocase   ... bench:      99,629 ns/iter (+/- 2,398) = 5971 MB/s
+test sherlock::name_sherlock_nocase          ... bench:      99,523 ns/iter (+/- 2,674) = 5977 MB/s
+test sherlock::name_whitespace               ... bench:      30,815 ns/iter (+/- 107) = 19306 MB/s
+test sherlock::no_match_common               ... bench:      19,661 ns/iter (+/- 656) = 30259 MB/s
+test sherlock::no_match_really_common        ... bench:      27,544 ns/iter (+/- 527) = 21599 MB/s
+test sherlock::no_match_uncommon             ... bench:      19,553 ns/iter (+/- 31) = 30426 MB/s
+test sherlock::quotes                        ... bench:     369,144 ns/iter (+/- 45,316) = 1611 MB/s
+test sherlock::repeated_class_negation       ... bench:  68,838,857 ns/iter (+/- 330,544) = 8 MB/s
+test sherlock::the_lower                     ... bench:     321,692 ns/iter (+/- 5,418) = 1849 MB/s
+test sherlock::the_nocase                    ... bench:     507,936 ns/iter (+/- 3,080) = 1171 MB/s
+test sherlock::the_upper                     ... bench:      43,705 ns/iter (+/- 788) = 13612 MB/s
+test sherlock::the_whitespace                ... bench:     819,179 ns/iter (+/- 20,071) = 726 MB/s
+test sherlock::word_ending_n                 ... bench:   1,700,300 ns/iter (+/- 36,623) = 349 MB/s
+test sherlock::words                         ... bench:   8,249,767 ns/iter (+/- 75,015) = 72 MB/s
+
+test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 111.55s
+
diff --git a/bench/log/10-last-frontier/rust-before-literal.log b/bench/log/10-last-frontier/rust-before-literal.log
new file mode 100644
index 0000000000..98b3496ae9
--- /dev/null
+++ b/bench/log/10-last-frontier/rust-before-literal.log
@@ -0,0 +1,124 @@
+
+running 119 tests
+test misc::anchored_literal_long_match       ... bench:          18 ns/iter (+/- 0) = 21666 MB/s
+test misc::anchored_literal_long_non_match   ... bench:          18 ns/iter (+/- 0) = 21666 MB/s
+test misc::anchored_literal_short_match      ... bench:          18 ns/iter (+/- 0) = 1444 MB/s
+test misc::anchored_literal_short_non_match  ... bench:          18 ns/iter (+/- 0) = 1444 MB/s
+test misc::easy0_1K                          ... bench:          15 ns/iter (+/- 0) = 70066 MB/s
+test misc::easy0_1MB                         ... bench:          21 ns/iter (+/- 0) = 49933476 MB/s
+test misc::easy0_32                          ... bench:          15 ns/iter (+/- 0) = 3933 MB/s
+test misc::easy0_32K                         ... bench:          14 ns/iter (+/- 0) = 2342500 MB/s
+test misc::easy1_1K                          ... bench:          40 ns/iter (+/- 1) = 26100 MB/s
+test misc::easy1_1MB                         ... bench:          45 ns/iter (+/- 1) = 23302133 MB/s
+test misc::easy1_32                          ... bench:          40 ns/iter (+/- 5) = 1300 MB/s
+test misc::easy1_32K                         ... bench:          40 ns/iter (+/- 1) = 819700 MB/s
+test misc::hard_1K                           ... bench:          51 ns/iter (+/- 1) = 20607 MB/s
+test misc::hard_1MB                          ... bench:          56 ns/iter (+/- 0) = 18725053 MB/s
+test misc::hard_32                           ... bench:          51 ns/iter (+/- 3) = 1156 MB/s
+test misc::hard_32K                          ... bench:          51 ns/iter (+/- 1) = 643039 MB/s
+test misc::is_match_set                      ... bench:          61 ns/iter (+/- 2) = 409 MB/s
+test misc::literal                           ... bench:          13 ns/iter (+/- 0) = 3923 MB/s
+test misc::long_needle1                      ... bench:       3,259 ns/iter (+/- 86) = 30684 MB/s
+test misc::long_needle2                      ... bench:     350,722 ns/iter (+/- 6,984) = 285 MB/s
+test misc::match_class                       ... bench:          60 ns/iter (+/- 1) = 1350 MB/s
+test misc::match_class_in_range              ... bench:          14 ns/iter (+/- 0) = 5785 MB/s
+test misc::match_class_unicode               ... bench:         255 ns/iter (+/- 0) = 631 MB/s
+test misc::matches_set                       ... bench:         481 ns/iter (+/- 11) = 51 MB/s
+test misc::medium_1K                         ... bench:          15 ns/iter (+/- 0) = 70133 MB/s
+test misc::medium_1MB                        ... bench:          22 ns/iter (+/- 0) = 47663818 MB/s
+test misc::medium_32                         ... bench:          15 ns/iter (+/- 0) = 4000 MB/s
+test misc::medium_32K                        ... bench:          15 ns/iter (+/- 0) = 2186400 MB/s
+test misc::no_exponential                    ... bench:         442 ns/iter (+/- 13) = 226 MB/s
+test misc::not_literal                       ... bench:          89 ns/iter (+/- 1) = 573 MB/s
+test misc::one_pass_long_prefix              ... bench:          54 ns/iter (+/- 1) = 481 MB/s
+test misc::one_pass_long_prefix_not          ... bench:          52 ns/iter (+/- 1) = 500 MB/s
+test misc::one_pass_short                    ... bench:          39 ns/iter (+/- 0) = 435 MB/s
+test misc::one_pass_short_not                ... bench:          42 ns/iter (+/- 0) = 404 MB/s
+test misc::reallyhard2_1K                    ... bench:          83 ns/iter (+/- 6) = 12530 MB/s
+test misc::reallyhard_1K                     ... bench:       1,592 ns/iter (+/- 4) = 660 MB/s
+test misc::reallyhard_1MB                    ... bench:   1,575,691 ns/iter (+/- 29,668) = 665 MB/s
+test misc::reallyhard_32                     ... bench:         101 ns/iter (+/- 5) = 584 MB/s
+test misc::reallyhard_32K                    ... bench:      49,325 ns/iter (+/- 1,734) = 664 MB/s
+test misc::replace_all                       ... bench:         134 ns/iter (+/- 2)
+test misc::reverse_suffix_no_quadratic       ... bench:       4,189 ns/iter (+/- 274) = 1909 MB/s
+test misc::short_haystack_1000000x           ... bench:     132,182 ns/iter (+/- 4,966) = 60522 MB/s
+test misc::short_haystack_100000x            ... bench:      13,344 ns/iter (+/- 275) = 59952 MB/s
+test misc::short_haystack_10000x             ... bench:       6,119 ns/iter (+/- 285) = 13075 MB/s
+test misc::short_haystack_1000x              ... bench:         617 ns/iter (+/- 15) = 12983 MB/s
+test misc::short_haystack_100x               ... bench:         230 ns/iter (+/- 7) = 3526 MB/s
+test misc::short_haystack_10x                ... bench:         207 ns/iter (+/- 8) = 439 MB/s
+test misc::short_haystack_1x                 ... bench:         213 ns/iter (+/- 7) = 89 MB/s
+test misc::short_haystack_2x                 ... bench:         206 ns/iter (+/- 6) = 131 MB/s
+test misc::short_haystack_3x                 ... bench:         207 ns/iter (+/- 10) = 169 MB/s
+test misc::short_haystack_4x                 ... bench:         208 ns/iter (+/- 7) = 206 MB/s
+test regexdna::find_new_lines                ... bench:  12,275,804 ns/iter (+/- 145,331) = 414 MB/s
+test regexdna::subst1                        ... bench:     793,517 ns/iter (+/- 44,203) = 6406 MB/s
+test regexdna::subst10                       ... bench:     794,922 ns/iter (+/- 23,459) = 6394 MB/s
+test regexdna::subst11                       ... bench:     790,525 ns/iter (+/- 23,010) = 6430 MB/s
+test regexdna::subst2                        ... bench:     790,637 ns/iter (+/- 17,962) = 6429 MB/s
+test regexdna::subst3                        ... bench:     793,559 ns/iter (+/- 17,575) = 6405 MB/s
+test regexdna::subst4                        ... bench:     792,738 ns/iter (+/- 15,237) = 6412 MB/s
+test regexdna::subst5                        ... bench:     795,060 ns/iter (+/- 26,172) = 6393 MB/s
+test regexdna::subst6                        ... bench:     792,357 ns/iter (+/- 15,067) = 6415 MB/s
+test regexdna::subst7                        ... bench:     797,006 ns/iter (+/- 27,928) = 6378 MB/s
+test regexdna::subst8                        ... bench:     790,603 ns/iter (+/- 22,754) = 6429 MB/s
+test regexdna::subst9                        ... bench:     793,055 ns/iter (+/- 13,202) = 6409 MB/s
+test regexdna::variant1                      ... bench:   2,204,304 ns/iter (+/- 50,669) = 2306 MB/s
+test regexdna::variant2                      ... bench:   3,224,798 ns/iter (+/- 45,705) = 1576 MB/s
+test regexdna::variant3                      ... bench:   3,802,774 ns/iter (+/- 86,530) = 1336 MB/s
+test regexdna::variant4                      ... bench:   3,805,916 ns/iter (+/- 69,737) = 1335 MB/s
+test regexdna::variant5                      ... bench:   2,662,373 ns/iter (+/- 61,259) = 1909 MB/s
+test regexdna::variant6                      ... bench:   2,654,072 ns/iter (+/- 51,095) = 1915 MB/s
+test regexdna::variant7                      ... bench:   3,232,369 ns/iter (+/- 67,147) = 1572 MB/s
+test regexdna::variant8                      ... bench:   3,311,225 ns/iter (+/- 66,086) = 1535 MB/s
+test regexdna::variant9                      ... bench:   3,241,601 ns/iter (+/- 68,394) = 1568 MB/s
+test rust_compile::compile_huge              ... bench:     100,955 ns/iter (+/- 2,466)
+test rust_compile::compile_huge_bytes        ... bench:   5,936,732 ns/iter (+/- 126,993)
+test rust_compile::compile_huge_full         ... bench:  11,880,838 ns/iter (+/- 211,387)
+test rust_compile::compile_simple            ... bench:       4,575 ns/iter (+/- 139)
+test rust_compile::compile_simple_bytes      ... bench:       4,653 ns/iter (+/- 122)
+test rust_compile::compile_simple_full       ... bench:      20,656 ns/iter (+/- 535)
+test rust_compile::compile_small             ... bench:       9,613 ns/iter (+/- 992)
+test rust_compile::compile_small_bytes       ... bench:     188,349 ns/iter (+/- 4,733)
+test rust_compile::compile_small_full        ... bench:     341,554 ns/iter (+/- 9,774)
+test sherlock::before_after_holmes           ... bench:     907,419 ns/iter (+/- 11,645) = 655 MB/s
+test sherlock::before_holmes                 ... bench:      62,036 ns/iter (+/- 1,854) = 9590 MB/s
+test sherlock::everything_greedy             ... bench:   2,072,694 ns/iter (+/- 45,192) = 287 MB/s
+test sherlock::everything_greedy_nl          ... bench:     884,483 ns/iter (+/- 25,710) = 672 MB/s
+test sherlock::holmes_cochar_watson          ... bench:     103,873 ns/iter (+/- 1,310) = 5727 MB/s
+test sherlock::holmes_coword_watson          ... bench:     481,491 ns/iter (+/- 11,516) = 1235 MB/s
+test sherlock::ing_suffix                    ... bench:     323,119 ns/iter (+/- 7,438) = 1841 MB/s
+test sherlock::ing_suffix_limited_space      ... bench:   1,067,293 ns/iter (+/- 18,661) = 557 MB/s
+test sherlock::letters                       ... bench:  21,732,526 ns/iter (+/- 253,563) = 27 MB/s
+test sherlock::letters_lower                 ... bench:  21,187,465 ns/iter (+/- 191,023) = 28 MB/s
+test sherlock::letters_upper                 ... bench:   1,766,003 ns/iter (+/- 17,494) = 336 MB/s
+test sherlock::line_boundary_sherlock_holmes ... bench:     897,387 ns/iter (+/- 26,674) = 662 MB/s
+test sherlock::name_alt1                     ... bench:      34,183 ns/iter (+/- 885) = 17404 MB/s
+test sherlock::name_alt2                     ... bench:      87,151 ns/iter (+/- 2,139) = 6826 MB/s
+test sherlock::name_alt3                     ... bench:      99,293 ns/iter (+/- 1,938) = 5991 MB/s
+test sherlock::name_alt3_nocase              ... bench:     379,228 ns/iter (+/- 22,539) = 1568 MB/s
+test sherlock::name_alt4                     ... bench:     123,040 ns/iter (+/- 2,676) = 4835 MB/s
+test sherlock::name_alt4_nocase              ... bench:     186,045 ns/iter (+/- 403) = 3197 MB/s
+test sherlock::name_alt5                     ... bench:      91,679 ns/iter (+/- 2,543) = 6489 MB/s
+test sherlock::name_alt5_nocase              ... bench:     343,668 ns/iter (+/- 6,807) = 1731 MB/s
+test sherlock::name_holmes                   ... bench:      33,802 ns/iter (+/- 936) = 17600 MB/s
+test sherlock::name_holmes_nocase            ... bench:     136,208 ns/iter (+/- 4,317) = 4367 MB/s
+test sherlock::name_sherlock                 ... bench:      22,534 ns/iter (+/- 462) = 26401 MB/s
+test sherlock::name_sherlock_holmes          ... bench:      22,514 ns/iter (+/- 697) = 26425 MB/s
+test sherlock::name_sherlock_holmes_nocase   ... bench:      97,796 ns/iter (+/- 2,037) = 6083 MB/s
+test sherlock::name_sherlock_nocase          ... bench:      95,809 ns/iter (+/- 1,538) = 6209 MB/s
+test sherlock::name_whitespace               ... bench:      30,959 ns/iter (+/- 968) = 19216 MB/s
+test sherlock::no_match_common               ... bench:      19,568 ns/iter (+/- 616) = 30403 MB/s
+test sherlock::no_match_really_common        ... bench:      26,273 ns/iter (+/- 1,143) = 22644 MB/s
+test sherlock::no_match_uncommon             ... bench:      19,643 ns/iter (+/- 496) = 30287 MB/s
+test sherlock::quotes                        ... bench:     371,876 ns/iter (+/- 2,494) = 1599 MB/s
+test sherlock::repeated_class_negation       ... bench:  76,963,104 ns/iter (+/- 277,311) = 7 MB/s
+test sherlock::the_lower                     ... bench:     331,250 ns/iter (+/- 8,588) = 1796 MB/s
+test sherlock::the_nocase                    ... bench:     516,528 ns/iter (+/- 40,826) = 1151 MB/s
+test sherlock::the_upper                     ... bench:      44,206 ns/iter (+/- 1,277) = 13458 MB/s
+test sherlock::the_whitespace                ... bench:     822,577 ns/iter (+/- 23,649) = 723 MB/s
+test sherlock::word_ending_n                 ... bench:   1,685,110 ns/iter (+/- 34,615) = 353 MB/s
+test sherlock::words                         ... bench:   8,333,499 ns/iter (+/- 152,757) = 71 MB/s
+
+test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 124.94s
+
diff --git a/bench/log/10-last-frontier/rust-bytes-after-literal.log b/bench/log/10-last-frontier/rust-bytes-after-literal.log
new file mode 100644
index 0000000000..470e09b9c8
--- /dev/null
+++ b/bench/log/10-last-frontier/rust-bytes-after-literal.log
@@ -0,0 +1,112 @@
+
+running 107 tests
+test misc::anchored_literal_long_match       ... bench:          18 ns/iter (+/- 0) = 21666 MB/s
+test misc::anchored_literal_long_non_match   ... bench:          20 ns/iter (+/- 1) = 19500 MB/s
+test misc::anchored_literal_short_match      ... bench:          18 ns/iter (+/- 0) = 1444 MB/s
+test misc::anchored_literal_short_non_match  ... bench:          20 ns/iter (+/- 0) = 1300 MB/s
+test misc::easy0_1K                          ... bench:          54 ns/iter (+/- 2) = 19462 MB/s
+test misc::easy0_1MB                         ... bench:          56 ns/iter (+/- 1) = 18725053 MB/s
+test misc::easy0_32                          ... bench:          51 ns/iter (+/- 1) = 1156 MB/s
+test misc::easy0_32K                         ... bench:          51 ns/iter (+/- 2) = 643039 MB/s
+test misc::easy1_1K                          ... bench:          41 ns/iter (+/- 1) = 25463 MB/s
+test misc::easy1_1MB                         ... bench:          44 ns/iter (+/- 1) = 23831727 MB/s
+test misc::easy1_32                          ... bench:          40 ns/iter (+/- 2) = 1300 MB/s
+test misc::easy1_32K                         ... bench:          40 ns/iter (+/- 1) = 819700 MB/s
+test misc::hard_1K                           ... bench:          52 ns/iter (+/- 1) = 20211 MB/s
+test misc::hard_1MB                          ... bench:          57 ns/iter (+/- 0) = 18396543 MB/s
+test misc::hard_32                           ... bench:          51 ns/iter (+/- 0) = 1156 MB/s
+test misc::hard_32K                          ... bench:          51 ns/iter (+/- 3) = 643039 MB/s
+test misc::is_match_set                      ... bench:          61 ns/iter (+/- 2) = 409 MB/s
+test misc::literal                           ... bench:          14 ns/iter (+/- 0) = 3642 MB/s
+test misc::long_needle1                      ... bench:       3,249 ns/iter (+/- 87) = 30779 MB/s
+test misc::long_needle2                      ... bench:     350,559 ns/iter (+/- 7,154) = 285 MB/s
+test misc::match_class                       ... bench:          61 ns/iter (+/- 4) = 1327 MB/s
+test misc::match_class_in_range              ... bench:          14 ns/iter (+/- 0) = 5785 MB/s
+test misc::matches_set                       ... bench:         401 ns/iter (+/- 17) = 62 MB/s
+test misc::medium_1K                         ... bench:          53 ns/iter (+/- 0) = 19849 MB/s
+test misc::medium_1MB                        ... bench:          58 ns/iter (+/- 0) = 18079379 MB/s
+test misc::medium_32                         ... bench:          53 ns/iter (+/- 0) = 1132 MB/s
+test misc::medium_32K                        ... bench:          53 ns/iter (+/- 2) = 618792 MB/s
+test misc::no_exponential                    ... bench:         421 ns/iter (+/- 8) = 237 MB/s
+test misc::not_literal                       ... bench:          90 ns/iter (+/- 0) = 566 MB/s
+test misc::one_pass_long_prefix              ... bench:          53 ns/iter (+/- 1) = 490 MB/s
+test misc::one_pass_long_prefix_not          ... bench:          53 ns/iter (+/- 0) = 490 MB/s
+test misc::one_pass_short                    ... bench:          38 ns/iter (+/- 0) = 447 MB/s
+test misc::one_pass_short_not                ... bench:          42 ns/iter (+/- 3) = 404 MB/s
+test misc::reallyhard2_1K                    ... bench:          77 ns/iter (+/- 1) = 13506 MB/s
+test misc::reallyhard_1K                     ... bench:       1,592 ns/iter (+/- 1) = 660 MB/s
+test misc::reallyhard_1MB                    ... bench:   1,575,759 ns/iter (+/- 49,997) = 665 MB/s
+test misc::reallyhard_32                     ... bench:         102 ns/iter (+/- 2) = 578 MB/s
+test misc::reallyhard_32K                    ... bench:      49,326 ns/iter (+/- 1,055) = 664 MB/s
+test misc::reverse_suffix_no_quadratic       ... bench:       4,161 ns/iter (+/- 94) = 1922 MB/s
+test regexdna::find_new_lines                ... bench:  12,344,799 ns/iter (+/- 188,054) = 411 MB/s
+test regexdna::subst1                        ... bench:     780,449 ns/iter (+/- 14,474) = 6513 MB/s
+test regexdna::subst10                       ... bench:     795,203 ns/iter (+/- 40,742) = 6392 MB/s
+test regexdna::subst11                       ... bench:     816,444 ns/iter (+/- 23,334) = 6226 MB/s
+test regexdna::subst2                        ... bench:     777,546 ns/iter (+/- 19,625) = 6537 MB/s
+test regexdna::subst3                        ... bench:     783,295 ns/iter (+/- 8,266) = 6489 MB/s
+test regexdna::subst4                        ... bench:     775,154 ns/iter (+/- 21,350) = 6557 MB/s
+test regexdna::subst5                        ... bench:     781,414 ns/iter (+/- 21,057) = 6505 MB/s
+test regexdna::subst6                        ... bench:     783,595 ns/iter (+/- 23,835) = 6487 MB/s
+test regexdna::subst7                        ... bench:     821,620 ns/iter (+/- 46,131) = 6187 MB/s
+test regexdna::subst8                        ... bench:     818,402 ns/iter (+/- 21,350) = 6211 MB/s
+test regexdna::subst9                        ... bench:     779,115 ns/iter (+/- 21,335) = 6524 MB/s
+test regexdna::variant1                      ... bench:   2,189,308 ns/iter (+/- 32,528) = 2321 MB/s
+test regexdna::variant2                      ... bench:   3,217,478 ns/iter (+/- 36,011) = 1579 MB/s
+test regexdna::variant3                      ... bench:   3,771,330 ns/iter (+/- 74,944) = 1347 MB/s
+test regexdna::variant4                      ... bench:   3,787,593 ns/iter (+/- 37,825) = 1342 MB/s
+test regexdna::variant5                      ... bench:   2,669,799 ns/iter (+/- 69,777) = 1904 MB/s
+test regexdna::variant6                      ... bench:   2,651,559 ns/iter (+/- 33,895) = 1917 MB/s
+test regexdna::variant7                      ... bench:   3,222,991 ns/iter (+/- 41,014) = 1577 MB/s
+test regexdna::variant8                      ... bench:   3,298,048 ns/iter (+/- 41,331) = 1541 MB/s
+test regexdna::variant9                      ... bench:   3,218,486 ns/iter (+/- 50,318) = 1579 MB/s
+test rust_compile::compile_huge              ... bench:     100,031 ns/iter (+/- 3,464)
+test rust_compile::compile_huge_bytes        ... bench:   5,885,102 ns/iter (+/- 130,016)
+test rust_compile::compile_huge_full         ... bench:  11,641,251 ns/iter (+/- 147,700)
+test rust_compile::compile_simple            ... bench:       4,263 ns/iter (+/- 116)
+test rust_compile::compile_simple_bytes      ... bench:       4,236 ns/iter (+/- 91)
+test rust_compile::compile_simple_full       ... bench:      22,349 ns/iter (+/- 2,085)
+test rust_compile::compile_small             ... bench:       9,537 ns/iter (+/- 298)
+test rust_compile::compile_small_bytes       ... bench:     178,561 ns/iter (+/- 3,796)
+test rust_compile::compile_small_full        ... bench:     363,343 ns/iter (+/- 9,481)
+test sherlock::before_after_holmes           ... bench:     907,022 ns/iter (+/- 19,133) = 655 MB/s
+test sherlock::before_holmes                 ... bench:      63,729 ns/iter (+/- 1,830) = 9335 MB/s
+test sherlock::everything_greedy             ... bench:   2,181,593 ns/iter (+/- 46,002) = 272 MB/s
+test sherlock::everything_greedy_nl          ... bench:     884,811 ns/iter (+/- 26,211) = 672 MB/s
+test sherlock::holmes_cochar_watson          ... bench:     105,610 ns/iter (+/- 3,120) = 5633 MB/s
+test sherlock::holmes_coword_watson          ... bench:     480,986 ns/iter (+/- 13,228) = 1236 MB/s
+test sherlock::ing_suffix                    ... bench:     322,921 ns/iter (+/- 3,555) = 1842 MB/s
+test sherlock::ing_suffix_limited_space      ... bench:   1,065,372 ns/iter (+/- 21,242) = 558 MB/s
+test sherlock::letters                       ... bench:  22,109,015 ns/iter (+/- 146,243) = 26 MB/s
+test sherlock::letters_lower                 ... bench:  21,686,153 ns/iter (+/- 206,041) = 27 MB/s
+test sherlock::letters_upper                 ... bench:   1,778,225 ns/iter (+/- 25,935) = 334 MB/s
+test sherlock::line_boundary_sherlock_holmes ... bench:     897,355 ns/iter (+/- 26,781) = 662 MB/s
+test sherlock::name_alt1                     ... bench:      31,927 ns/iter (+/- 633) = 18634 MB/s
+test sherlock::name_alt2                     ... bench:      87,040 ns/iter (+/- 1,859) = 6835 MB/s
+test sherlock::name_alt3                     ... bench:      97,715 ns/iter (+/- 2,109) = 6088 MB/s
+test sherlock::name_alt3_nocase              ... bench:     944,955 ns/iter (+/- 26,503) = 629 MB/s
+test sherlock::name_alt4                     ... bench:     120,935 ns/iter (+/- 2,399) = 4919 MB/s
+test sherlock::name_alt4_nocase              ... bench:     228,597 ns/iter (+/- 7,137) = 2602 MB/s
+test sherlock::name_alt5                     ... bench:      91,174 ns/iter (+/- 1,096) = 6525 MB/s
+test sherlock::name_alt5_nocase              ... bench:     937,189 ns/iter (+/- 23,839) = 634 MB/s
+test sherlock::name_holmes                   ... bench:      34,020 ns/iter (+/- 752) = 17487 MB/s
+test sherlock::name_holmes_nocase            ... bench:     117,194 ns/iter (+/- 3,444) = 5076 MB/s
+test sherlock::name_sherlock                 ... bench:      22,557 ns/iter (+/- 388) = 26374 MB/s
+test sherlock::name_sherlock_holmes          ... bench:      22,428 ns/iter (+/- 683) = 26526 MB/s
+test sherlock::name_sherlock_holmes_nocase   ... bench:      99,637 ns/iter (+/- 636) = 5971 MB/s
+test sherlock::name_sherlock_nocase          ... bench:      97,895 ns/iter (+/- 1,875) = 6077 MB/s
+test sherlock::name_whitespace               ... bench:      30,772 ns/iter (+/- 1,591) = 19333 MB/s
+test sherlock::no_match_common               ... bench:      19,665 ns/iter (+/- 296) = 30253 MB/s
+test sherlock::no_match_really_common        ... bench:      27,403 ns/iter (+/- 2,507) = 21710 MB/s
+test sherlock::no_match_uncommon             ... bench:      19,601 ns/iter (+/- 293) = 30352 MB/s
+test sherlock::quotes                        ... bench:     370,323 ns/iter (+/- 1,345) = 1606 MB/s
+test sherlock::repeated_class_negation       ... bench:  68,414,794 ns/iter (+/- 342,428) = 8 MB/s
+test sherlock::the_lower                     ... bench:     327,767 ns/iter (+/- 5,493) = 1815 MB/s
+test sherlock::the_nocase                    ... bench:     507,818 ns/iter (+/- 1,796) = 1171 MB/s
+test sherlock::the_upper                     ... bench:      45,045 ns/iter (+/- 1,400) = 13207 MB/s
+test sherlock::the_whitespace                ... bench:     822,080 ns/iter (+/- 16,581) = 723 MB/s
+test sherlock::word_ending_n                 ... bench:   1,690,084 ns/iter (+/- 40,361) = 352 MB/s
+test sherlock::words                         ... bench:   8,573,617 ns/iter (+/- 143,313) = 69 MB/s
+
+test result: ok. 0 passed; 0 failed; 0 ignored; 107 measured; 0 filtered out; finished in 110.03s
+
diff --git a/bench/log/10-last-frontier/rust-bytes-before-literal.log b/bench/log/10-last-frontier/rust-bytes-before-literal.log
new file mode 100644
index 0000000000..7016e3c565
--- /dev/null
+++ b/bench/log/10-last-frontier/rust-bytes-before-literal.log
@@ -0,0 +1,112 @@
+
+running 107 tests
+test misc::anchored_literal_long_match       ... bench:          18 ns/iter (+/- 0) = 21666 MB/s
+test misc::anchored_literal_long_non_match   ... bench:          19 ns/iter (+/- 1) = 20526 MB/s
+test misc::anchored_literal_short_match      ... bench:          18 ns/iter (+/- 0) = 1444 MB/s
+test misc::anchored_literal_short_non_match  ... bench:          20 ns/iter (+/- 0) = 1300 MB/s
+test misc::easy0_1K                          ... bench:          14 ns/iter (+/- 0) = 75071 MB/s
+test misc::easy0_1MB                         ... bench:          21 ns/iter (+/- 0) = 49933476 MB/s
+test misc::easy0_32                          ... bench:          14 ns/iter (+/- 0) = 4214 MB/s
+test misc::easy0_32K                         ... bench:          14 ns/iter (+/- 0) = 2342500 MB/s
+test misc::easy1_1K                          ... bench:          41 ns/iter (+/- 0) = 25463 MB/s
+test misc::easy1_1MB                         ... bench:          48 ns/iter (+/- 0) = 21845750 MB/s
+test misc::easy1_32                          ... bench:          41 ns/iter (+/- 0) = 1268 MB/s
+test misc::easy1_32K                         ... bench:          41 ns/iter (+/- 1) = 799707 MB/s
+test misc::hard_1K                           ... bench:          51 ns/iter (+/- 1) = 20607 MB/s
+test misc::hard_1MB                          ... bench:          56 ns/iter (+/- 2) = 18725053 MB/s
+test misc::hard_32                           ... bench:          51 ns/iter (+/- 6) = 1156 MB/s
+test misc::hard_32K                          ... bench:          51 ns/iter (+/- 1) = 643039 MB/s
+test misc::is_match_set                      ... bench:          62 ns/iter (+/- 2) = 403 MB/s
+test misc::literal                           ... bench:          13 ns/iter (+/- 0) = 3923 MB/s
+test misc::long_needle1                      ... bench:       2,825 ns/iter (+/- 57) = 35398 MB/s
+test misc::long_needle2                      ... bench:     350,755 ns/iter (+/- 11,905) = 285 MB/s
+test misc::match_class                       ... bench:          64 ns/iter (+/- 1) = 1265 MB/s
+test misc::match_class_in_range              ... bench:          13 ns/iter (+/- 0) = 6230 MB/s
+test misc::matches_set                       ... bench:         422 ns/iter (+/- 12) = 59 MB/s
+test misc::medium_1K                         ... bench:          15 ns/iter (+/- 0) = 70133 MB/s
+test misc::medium_1MB                        ... bench:          21 ns/iter (+/- 0) = 49933523 MB/s
+test misc::medium_32                         ... bench:          15 ns/iter (+/- 0) = 4000 MB/s
+test misc::medium_32K                        ... bench:          14 ns/iter (+/- 0) = 2342571 MB/s
+test misc::no_exponential                    ... bench:         443 ns/iter (+/- 12) = 225 MB/s
+test misc::not_literal                       ... bench:          89 ns/iter (+/- 1) = 573 MB/s
+test misc::one_pass_long_prefix              ... bench:          52 ns/iter (+/- 1) = 500 MB/s
+test misc::one_pass_long_prefix_not          ... bench:          52 ns/iter (+/- 1) = 500 MB/s
+test misc::one_pass_short                    ... bench:          40 ns/iter (+/- 1) = 425 MB/s
+test misc::one_pass_short_not                ... bench:          42 ns/iter (+/- 0) = 404 MB/s
+test misc::reallyhard2_1K                    ... bench:          80 ns/iter (+/- 0) = 13000 MB/s
+test misc::reallyhard_1K                     ... bench:       1,592 ns/iter (+/- 1) = 660 MB/s
+test misc::reallyhard_1MB                    ... bench:   1,575,789 ns/iter (+/- 34,236) = 665 MB/s
+test misc::reallyhard_32                     ... bench:         101 ns/iter (+/- 2) = 584 MB/s
+test misc::reallyhard_32K                    ... bench:      49,321 ns/iter (+/- 2,718) = 664 MB/s
+test misc::reverse_suffix_no_quadratic       ... bench:       4,158 ns/iter (+/- 93) = 1924 MB/s
+test regexdna::find_new_lines                ... bench:  12,391,732 ns/iter (+/- 180,913) = 410 MB/s
+test regexdna::subst1                        ... bench:     781,690 ns/iter (+/- 29,637) = 6503 MB/s
+test regexdna::subst10                       ... bench:     778,306 ns/iter (+/- 22,706) = 6531 MB/s
+test regexdna::subst11                       ... bench:     777,716 ns/iter (+/- 24,635) = 6536 MB/s
+test regexdna::subst2                        ... bench:     791,786 ns/iter (+/- 15,778) = 6420 MB/s
+test regexdna::subst3                        ... bench:     783,470 ns/iter (+/- 25,543) = 6488 MB/s
+test regexdna::subst4                        ... bench:     814,902 ns/iter (+/- 14,146) = 6238 MB/s
+test regexdna::subst5                        ... bench:     781,464 ns/iter (+/- 19,532) = 6504 MB/s
+test regexdna::subst6                        ... bench:     780,116 ns/iter (+/- 16,558) = 6516 MB/s
+test regexdna::subst7                        ... bench:     795,982 ns/iter (+/- 11,254) = 6386 MB/s
+test regexdna::subst8                        ... bench:     781,746 ns/iter (+/- 24,996) = 6502 MB/s
+test regexdna::subst9                        ... bench:     783,793 ns/iter (+/- 14,943) = 6485 MB/s
+test regexdna::variant1                      ... bench:   2,188,940 ns/iter (+/- 42,308) = 2322 MB/s
+test regexdna::variant2                      ... bench:   3,218,011 ns/iter (+/- 50,700) = 1579 MB/s
+test regexdna::variant3                      ... bench:   3,778,907 ns/iter (+/- 90,543) = 1345 MB/s
+test regexdna::variant4                      ... bench:   3,803,852 ns/iter (+/- 68,319) = 1336 MB/s
+test regexdna::variant5                      ... bench:   2,660,949 ns/iter (+/- 55,488) = 1910 MB/s
+test regexdna::variant6                      ... bench:   2,647,131 ns/iter (+/- 26,846) = 1920 MB/s
+test regexdna::variant7                      ... bench:   3,235,032 ns/iter (+/- 37,599) = 1571 MB/s
+test regexdna::variant8                      ... bench:   3,305,124 ns/iter (+/- 67,109) = 1538 MB/s
+test regexdna::variant9                      ... bench:   3,231,033 ns/iter (+/- 55,626) = 1573 MB/s
+test rust_compile::compile_huge              ... bench:      99,387 ns/iter (+/- 2,366)
+test rust_compile::compile_huge_bytes        ... bench:   5,865,693 ns/iter (+/- 62,255)
+test rust_compile::compile_huge_full         ... bench:  11,752,845 ns/iter (+/- 195,440)
+test rust_compile::compile_simple            ... bench:       4,117 ns/iter (+/- 141)
+test rust_compile::compile_simple_bytes      ... bench:       4,162 ns/iter (+/- 67)
+test rust_compile::compile_simple_full       ... bench:      19,955 ns/iter (+/- 622)
+test rust_compile::compile_small             ... bench:       9,140 ns/iter (+/- 112)
+test rust_compile::compile_small_bytes       ... bench:     165,990 ns/iter (+/- 5,876)
+test rust_compile::compile_small_full        ... bench:     342,897 ns/iter (+/- 13,730)
+test sherlock::before_after_holmes           ... bench:     906,789 ns/iter (+/- 13,931) = 656 MB/s
+test sherlock::before_holmes                 ... bench:      62,319 ns/iter (+/- 790) = 9546 MB/s
+test sherlock::everything_greedy             ... bench:   2,175,424 ns/iter (+/- 47,720) = 273 MB/s
+test sherlock::everything_greedy_nl          ... bench:     884,406 ns/iter (+/- 22,679) = 672 MB/s
+test sherlock::holmes_cochar_watson          ... bench:     105,261 ns/iter (+/- 3,536) = 5651 MB/s
+test sherlock::holmes_coword_watson          ... bench:     479,524 ns/iter (+/- 7,749) = 1240 MB/s
+test sherlock::ing_suffix                    ... bench:     321,401 ns/iter (+/- 9,123) = 1851 MB/s
+test sherlock::ing_suffix_limited_space      ... bench:   1,069,722 ns/iter (+/- 16,366) = 556 MB/s
+test sherlock::letters                       ... bench:  21,959,896 ns/iter (+/- 204,695) = 27 MB/s
+test sherlock::letters_lower                 ... bench:  21,462,457 ns/iter (+/- 207,449) = 27 MB/s
+test sherlock::letters_upper                 ... bench:   1,768,026 ns/iter (+/- 41,459) = 336 MB/s
+test sherlock::line_boundary_sherlock_holmes ... bench:     897,197 ns/iter (+/- 14,349) = 663 MB/s
+test sherlock::name_alt1                     ... bench:      34,037 ns/iter (+/- 719) = 17479 MB/s
+test sherlock::name_alt2                     ... bench:      86,788 ns/iter (+/- 1,203) = 6855 MB/s
+test sherlock::name_alt3                     ... bench:      98,225 ns/iter (+/- 1,589) = 6056 MB/s
+test sherlock::name_alt3_nocase              ... bench:     377,597 ns/iter (+/- 14,840) = 1575 MB/s
+test sherlock::name_alt4                     ... bench:     122,440 ns/iter (+/- 8,123) = 4858 MB/s
+test sherlock::name_alt4_nocase              ... bench:     187,282 ns/iter (+/- 5,176) = 3176 MB/s
+test sherlock::name_alt5                     ... bench:      91,429 ns/iter (+/- 1,944) = 6507 MB/s
+test sherlock::name_alt5_nocase              ... bench:     348,111 ns/iter (+/- 12,721) = 1709 MB/s
+test sherlock::name_holmes                   ... bench:      33,547 ns/iter (+/- 1,119) = 17734 MB/s
+test sherlock::name_holmes_nocase            ... bench:     132,342 ns/iter (+/- 3,974) = 4495 MB/s
+test sherlock::name_sherlock                 ... bench:      22,562 ns/iter (+/- 364) = 26368 MB/s
+test sherlock::name_sherlock_holmes          ... bench:      22,313 ns/iter (+/- 579) = 26663 MB/s
+test sherlock::name_sherlock_holmes_nocase   ... bench:      97,556 ns/iter (+/- 2,092) = 6098 MB/s
+test sherlock::name_sherlock_nocase          ... bench:      95,917 ns/iter (+/- 4,054) = 6202 MB/s
+test sherlock::name_whitespace               ... bench:      30,997 ns/iter (+/- 1,039) = 19193 MB/s
+test sherlock::no_match_common               ... bench:      19,690 ns/iter (+/- 378) = 30214 MB/s
+test sherlock::no_match_really_common        ... bench:      27,629 ns/iter (+/- 465) = 21532 MB/s
+test sherlock::no_match_uncommon             ... bench:      19,681 ns/iter (+/- 291) = 30228 MB/s
+test sherlock::quotes                        ... bench:     368,290 ns/iter (+/- 1,508) = 1615 MB/s
+test sherlock::repeated_class_negation       ... bench:  73,004,024 ns/iter (+/- 1,040,743) = 8 MB/s
+test sherlock::the_lower                     ... bench:     320,929 ns/iter (+/- 12,287) = 1853 MB/s
+test sherlock::the_nocase                    ... bench:     514,946 ns/iter (+/- 11,241) = 1155 MB/s
+test sherlock::the_upper                     ... bench:      43,816 ns/iter (+/- 1,719) = 13577 MB/s
+test sherlock::the_whitespace                ... bench:     825,245 ns/iter (+/- 20,797) = 720 MB/s
+test sherlock::word_ending_n                 ... bench:   1,676,908 ns/iter (+/- 40,650) = 354 MB/s
+test sherlock::words                         ... bench:   8,449,099 ns/iter (+/- 123,842) = 70 MB/s
+
+test result: ok. 0 passed; 0 failed; 0 ignored; 107 measured; 0 filtered out; finished in 128.47s
+
diff --git a/regex-capi/include/rure.h b/regex-capi/include/rure.h
index a87be61a89..01173b4518 100644
--- a/regex-capi/include/rure.h
+++ b/regex-capi/include/rure.h
@@ -408,7 +408,7 @@ size_t rure_captures_len(rure_captures *captures);
  * safe to call rure_compile from multiple threads simultaneously using the
  * same options pointer.
  */
-rure_options *rure_options_new();
+rure_options *rure_options_new(void);
 
 /*
  * rure_options_free frees the given options.
@@ -536,7 +536,7 @@ size_t rure_set_len(rure_set *re);
  * It is not safe to use errors from multiple threads simultaneously. An error
  * value may be reused on subsequent calls to rure_compile.
  */
-rure_error *rure_error_new();
+rure_error *rure_error_new(void);
 
 /*
  * rure_error_free frees the error given.
diff --git a/regex-debug/Cargo.toml b/regex-debug/Cargo.toml
deleted file mode 100644
index 1db4036b98..0000000000
--- a/regex-debug/Cargo.toml
+++ /dev/null
@@ -1,18 +0,0 @@
-[package]
-publish = false
-name = "regex-debug"
-version = "0.1.0"
-authors = ["The Rust Project Developers"]
-license = "MIT OR Apache-2.0"
-repository = "https://github.com/rust-lang/regex"
-documentation = "https://docs.rs/regex"
-homepage = "https://github.com/rust-lang/regex"
-description = "A tool useful for debugging regular expressions."
-workspace = ".."
-edition = "2018"
-
-[dependencies]
-docopt = "1"
-regex = { version = "1.1", path = ".." }
-regex-syntax = { version = "0.6", path = "../regex-syntax" }
-serde = { version = "1", features = ["derive"] }
diff --git a/regex-debug/src/main.rs b/regex-debug/src/main.rs
deleted file mode 100644
index a7dd453e1f..0000000000
--- a/regex-debug/src/main.rs
+++ /dev/null
@@ -1,376 +0,0 @@
-use std::error;
-use std::io::{self, Write};
-use std::process;
-use std::result;
-
-use docopt::Docopt;
-use regex::internal::{Compiler, LiteralSearcher};
-use regex_syntax::hir::literal::Literals;
-use regex_syntax::hir::Hir;
-
-const USAGE: &'static str = "
-Usage:
-    regex-debug [options] ast <pattern>
-    regex-debug [options] hir <pattern>
-    regex-debug [options] prefixes <patterns> ...
-    regex-debug [options] suffixes <patterns> ...
-    regex-debug [options] anchors <pattern>
-    regex-debug [options] captures <pattern>
-    regex-debug [options] compile <patterns> ...
-    regex-debug [options] utf8-ranges <class>
-    regex-debug [options] utf8-ranges-rev <class>
-    regex-debug --help
-
-Options:
-    --help               Show this usage message.
-    --size-limit ARG     An approximate size limit on the total size (in bytes)
-                         of a compiled regular expression program.
-                         [default: 10485760]
-    --bytes              Show the instruction codes for byte oriented programs.
-                         (As opposed to Unicode oriented programs.)
-    --dfa                Show the instruction codes for a DFA.
-    --dfa-reverse        Show the instruction codes for a reverse DFA.
-                         This implies --dfa.
-    -a, --all-literals   Shows all literals extracted.
-                         By default, only unambiguous literals are shown.
-    --literal-limit ARG  An approximate limit on the total size (in bytes)
-                         of all literals extracted. [default: 250]
-    --class-limit ARG    A limit on the size of character classes used to
-                         extract literals. [default: 10]
-    --literal-bytes      Show raw literal bytes instead of Unicode chars.
-    --lcp                Show the longest common prefix of all the literals
-                         extracted.
-    --lcs                Show the longest common suffix of all the literals
-                         extracted.
-    --searcher           Show the debug output for the literal searcher
-                         constructed by the literals found.
-    --quiet              Show less output.
-";
-
-#[derive(serde::Deserialize)]
-struct Args {
-    cmd_ast: bool,
-    cmd_hir: bool,
-    cmd_prefixes: bool,
-    cmd_suffixes: bool,
-    cmd_anchors: bool,
-    cmd_captures: bool,
-    cmd_compile: bool,
-    cmd_utf8_ranges: bool,
-    cmd_utf8_ranges_rev: bool,
-
-    arg_pattern: String,
-    arg_patterns: Vec<String>,
-    arg_class: String,
-
-    flag_size_limit: usize,
-    flag_bytes: bool,
-    flag_dfa: bool,
-    flag_dfa_reverse: bool,
-    flag_all_literals: bool,
-    flag_literal_limit: usize,
-    flag_class_limit: usize,
-    flag_literal_bytes: bool,
-    flag_lcp: bool,
-    flag_lcs: bool,
-    flag_searcher: bool,
-    flag_quiet: bool,
-}
-
-type Result<T> = result::Result<T, Box<dyn error::Error + Send + Sync>>;
-
-fn main() {
-    let mut args: Args = Docopt::new(USAGE)
-        .and_then(|d| d.deserialize())
-        .unwrap_or_else(|e| e.exit());
-    if args.flag_dfa_reverse {
-        args.flag_dfa = true;
-    }
-    match run(&args) {
-        Ok(_) => process::exit(0),
-        Err(err) => {
-            let _ = writeln!(&mut io::stderr(), "{}", err);
-            process::exit(1)
-        }
-    }
-}
-
-fn run(args: &Args) -> Result<()> {
-    if args.cmd_ast {
-        cmd_ast(args)
-    } else if args.cmd_hir {
-        cmd_hir(args)
-    } else if args.cmd_prefixes {
-        cmd_literals(args)
-    } else if args.cmd_suffixes {
-        cmd_literals(args)
-    } else if args.cmd_anchors {
-        cmd_anchors(args)
-    } else if args.cmd_captures {
-        cmd_captures(args)
-    } else if args.cmd_compile {
-        cmd_compile(args)
-    } else if args.cmd_utf8_ranges {
-        cmd_utf8_ranges(args)
-    } else if args.cmd_utf8_ranges_rev {
-        cmd_utf8_ranges_rev(args)
-    } else {
-        unreachable!()
-    }
-}
-
-fn cmd_ast(args: &Args) -> Result<()> {
-    use regex_syntax::ast::parse::Parser;
-
-    let mut parser = Parser::new();
-    let ast = parser.parse(&args.arg_pattern)?;
-    println!("{:#?}", ast);
-    Ok(())
-}
-
-fn cmd_hir(args: &Args) -> Result<()> {
-    use regex_syntax::ParserBuilder;
-
-    let mut parser = ParserBuilder::new().allow_invalid_utf8(false).build();
-    let hir = parser.parse(&args.arg_pattern)?;
-    println!("{:#?}", hir);
-    Ok(())
-}
-
-fn cmd_literals(args: &Args) -> Result<()> {
-    let exprs = args.parse_many()?;
-    let mut lits = if args.cmd_prefixes {
-        args.literals(&exprs, |lits, e| lits.union_prefixes(e))
-    } else {
-        args.literals(&exprs, |lits, e| lits.union_suffixes(e))
-    };
-    if !args.flag_all_literals {
-        if args.cmd_prefixes {
-            lits = lits.unambiguous_prefixes();
-        } else {
-            lits = lits.unambiguous_suffixes();
-        }
-    }
-    if args.flag_searcher {
-        if args.cmd_prefixes {
-            println!("{:?}", LiteralSearcher::prefixes(lits))
-        } else {
-            println!("{:?}", LiteralSearcher::suffixes(lits))
-        }
-    } else if args.flag_lcp {
-        println!("{}", escape_unicode(lits.longest_common_prefix()));
-    } else if args.flag_lcs {
-        println!("{}", escape_unicode(lits.longest_common_suffix()));
-    } else {
-        for lit in lits.literals() {
-            if args.flag_literal_bytes {
-                if lit.is_cut() {
-                    println!("Cut({})", escape_bytes(lit));
-                } else {
-                    println!("Complete({})", escape_bytes(lit));
-                }
-            } else {
-                println!("{:?}", lit);
-            }
-        }
-    }
-    Ok(())
-}
-
-fn cmd_anchors(args: &Args) -> Result<()> {
-    let expr = args.parse_one()?;
-    if expr.is_anchored_start() {
-        println!("start");
-    }
-    if expr.is_anchored_end() {
-        println!("end");
-    }
-    Ok(())
-}
-
-fn cmd_captures(args: &Args) -> Result<()> {
-    let expr = args.parse_one()?;
-    let prog = args.compiler().only_utf8(false).compile(&[expr])?;
-    for (i, name) in prog.captures.iter().enumerate() {
-        match *name {
-            None => println!("{}", i),
-            Some(ref name) => println!("{}:{}", i, name),
-        }
-    }
-    Ok(())
-}
-
-fn cmd_compile(args: &Args) -> Result<()> {
-    let exprs = args.parse_many()?;
-    let compiler = args
-        .compiler()
-        .bytes(args.flag_bytes)
-        .only_utf8(!args.flag_bytes)
-        .dfa(args.flag_dfa)
-        .reverse(args.flag_dfa_reverse);
-    let prog = compiler.compile(&exprs)?;
-    if !args.flag_quiet {
-        print!("{:?}", prog);
-    } else {
-        println!("instruction count: {}", prog.insts.len());
-    }
-    Ok(())
-}
-
-fn cmd_utf8_ranges(args: &Args) -> Result<()> {
-    use regex_syntax::hir::{self, HirKind};
-    use regex_syntax::utf8::Utf8Sequences;
-    use regex_syntax::ParserBuilder;
-
-    let hir = ParserBuilder::new()
-        .build()
-        .parse(&format!("[{}]", args.arg_class))?;
-    let cls = match hir.into_kind() {
-        HirKind::Class(hir::Class::Unicode(cls)) => cls,
-        _ => {
-            return Err(
-                format!("unexpected HIR, expected Unicode class").into()
-            )
-        }
-    };
-    let mut char_count = 0;
-    for (i, range) in cls.iter().enumerate() {
-        if i > 0 {
-            println!("----------------------------");
-        }
-        char_count += (range.end() as u32) - (range.start() as u32) + 1;
-        for seq in Utf8Sequences::new(range.start(), range.end()) {
-            for utf8_range in seq.into_iter() {
-                print!("[{:02X}-{:02X}]", utf8_range.start, utf8_range.end);
-            }
-            println!();
-        }
-    }
-    println!("codepoint count: {}", char_count);
-    Ok(())
-}
-
-fn cmd_utf8_ranges_rev(args: &Args) -> Result<()> {
-    use regex_syntax::hir::{self, HirKind};
-    use regex_syntax::utf8::Utf8Sequences;
-    use regex_syntax::ParserBuilder;
-
-    let hir = ParserBuilder::new()
-        .build()
-        .parse(&format!("[{}]", args.arg_class))?;
-    let cls = match hir.into_kind() {
-        HirKind::Class(hir::Class::Unicode(cls)) => cls,
-        _ => {
-            return Err(
-                format!("unexpected HIR, expected Unicode class").into()
-            )
-        }
-    };
-    let mut char_count = 0;
-    let mut seqs = vec![];
-    for (_, range) in cls.iter().enumerate() {
-        char_count += (range.end() as u32) - (range.start() as u32) + 1;
-        for seq in Utf8Sequences::new(range.start(), range.end()) {
-            let mut seq = seq.as_slice().to_vec();
-            seq.reverse();
-            seqs.push(seq);
-        }
-    }
-    seqs.sort();
-    for seq in seqs {
-        for utf8_range in seq.into_iter() {
-            print!("[{:02X}-{:02X}]", utf8_range.start, utf8_range.end);
-        }
-        println!();
-    }
-    println!("codepoint count: {}", char_count);
-    Ok(())
-}
-
-impl Args {
-    fn parse_one(&self) -> Result<Hir> {
-        parse(&self.arg_pattern)
-    }
-
-    fn parse_many(&self) -> Result<Vec<Hir>> {
-        self.arg_patterns.iter().map(|s| parse(s)).collect()
-    }
-
-    fn literals<F: Fn(&mut Literals, &Hir) -> bool>(
-        &self,
-        exprs: &[Hir],
-        get_literals: F,
-    ) -> Literals {
-        let mut lits = Some(self.empty_literals());
-        for e in exprs {
-            lits = lits.and_then(|mut lits| {
-                if !get_literals(&mut lits, e) {
-                    None
-                } else {
-                    Some(lits)
-                }
-            });
-        }
-        lits.unwrap_or(self.empty_literals())
-    }
-
-    fn empty_literals(&self) -> Literals {
-        let mut lits = Literals::empty();
-        lits.set_limit_size(self.flag_literal_limit);
-        lits.set_limit_class(self.flag_class_limit);
-        lits
-    }
-
-    fn compiler(&self) -> Compiler {
-        Compiler::new().size_limit(self.flag_size_limit)
-    }
-}
-
-fn parse(re: &str) -> Result<Hir> {
-    use regex_syntax::ParserBuilder;
-    ParserBuilder::new()
-        .allow_invalid_utf8(true)
-        .build()
-        .parse(re)
-        .map_err(From::from)
-}
-
-fn escape_unicode(bytes: &[u8]) -> String {
-    let show = match ::std::str::from_utf8(bytes) {
-        Ok(v) => v.to_string(),
-        Err(_) => escape_bytes(bytes),
-    };
-    let mut space_escaped = String::new();
-    for c in show.chars() {
-        if c.is_whitespace() {
-            let escaped = if c as u32 <= 0x7F {
-                escape_byte(c as u8)
-            } else {
-                if c as u32 <= 0xFFFF {
-                    format!(r"\u{{{:04x}}}", c as u32)
-                } else {
-                    format!(r"\U{{{:08x}}}", c as u32)
-                }
-            };
-            space_escaped.push_str(&escaped);
-        } else {
-            space_escaped.push(c);
-        }
-    }
-    space_escaped
-}
-
-fn escape_bytes(bytes: &[u8]) -> String {
-    let mut s = String::new();
-    for &b in bytes {
-        s.push_str(&escape_byte(b));
-    }
-    s
-}
-
-fn escape_byte(byte: u8) -> String {
-    use std::ascii::escape_default;
-
-    let escaped: Vec<u8> = escape_default(byte).collect();
-    String::from_utf8_lossy(&escaped).into_owned()
-}
diff --git a/regex-syntax/Cargo.toml b/regex-syntax/Cargo.toml
index be9aeb5689..6ae9b84852 100644
--- a/regex-syntax/Cargo.toml
+++ b/regex-syntax/Cargo.toml
@@ -8,12 +8,14 @@ documentation = "https://docs.rs/regex-syntax"
 homepage = "https://github.com/rust-lang/regex"
 description = "A regular expression parser."
 workspace = ".."
-edition = "2018"
+edition = "2021"
+rust-version = "1.60.0"
 
 # Features are documented in the "Crate features" section of the crate docs:
 # https://docs.rs/regex-syntax/*/#crate-features
 [features]
-default = ["unicode"]
+default = ["std", "unicode"]
+std = []
 
 unicode = [
   "unicode-age",
@@ -31,3 +33,16 @@ unicode-gencat = []
 unicode-perl = []
 unicode-script = []
 unicode-segment = []
+
+[package.metadata.docs.rs]
+# We want to document all features.
+all-features = true
+# Since this crate's feature setup is pretty complicated, it is worth opting
+# into a nightly unstable option to show the features that need to be enabled
+# for public API items. To do that, we set 'docsrs', and when that's enabled,
+# we enable the 'doc_auto_cfg' feature.
+#
+# To test this locally, run:
+#
+#     RUSTDOCFLAGS="--cfg docsrs" cargo +nightly doc --all-features
+rustdoc-args = ["--cfg", "docsrs"]
diff --git a/regex-syntax/README.md b/regex-syntax/README.md
index 592f842686..ff4fe094c3 100644
--- a/regex-syntax/README.md
+++ b/regex-syntax/README.md
@@ -30,13 +30,12 @@ concrete syntax that produced the `Hir`.
 This example shows how to parse a pattern string into its HIR:
 
 ```rust
-use regex_syntax::Parser;
-use regex_syntax::hir::{self, Hir};
+use regex_syntax::{hir::Hir, parse};
 
-let hir = Parser::new().parse("a|b").unwrap();
+let hir = parse("a|b").unwrap();
 assert_eq!(hir, Hir::alternation(vec![
-    Hir::literal(hir::Literal::Unicode('a')),
-    Hir::literal(hir::Literal::Unicode('b')),
+    Hir::literal("a".as_bytes()),
+    Hir::literal("b".as_bytes()),
 ]));
 ```
 
diff --git a/regex-syntax/src/ast/mod.rs b/regex-syntax/src/ast/mod.rs
index 9db9afaf17..faabca2c1c 100644
--- a/regex-syntax/src/ast/mod.rs
+++ b/regex-syntax/src/ast/mod.rs
@@ -2,9 +2,9 @@
 Defines an abstract syntax for regular expressions.
 */
 
-use std::cmp::Ordering;
-use std::error;
-use std::fmt;
+use core::cmp::Ordering;
+
+use alloc::{boxed::Box, string::String, vec, vec::Vec};
 
 pub use crate::ast::visitor::{visit, Visitor};
 
@@ -65,6 +65,10 @@ impl Error {
 }
 
 /// The type of an error that occurred while building an AST.
+///
+/// This error type is marked as `non_exhaustive`. This means that adding a
+/// new variant is not considered a breaking change.
+#[non_exhaustive]
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub enum ErrorKind {
     /// The capturing group limit was exceeded.
@@ -169,71 +173,26 @@ pub enum ErrorKind {
     /// `(?<!re)`. Note that all of these syntaxes are otherwise invalid; this
     /// error is used to improve the user experience.
     UnsupportedLookAround,
-    /// Hints that destructuring should not be exhaustive.
-    ///
-    /// This enum may grow additional variants, so this makes sure clients
-    /// don't count on exhaustive matching. (Otherwise, adding a new variant
-    /// could break existing code.)
-    #[doc(hidden)]
-    __Nonexhaustive,
 }
 
-impl error::Error for Error {
-    // TODO: Remove this method entirely on the next breaking semver release.
-    #[allow(deprecated)]
-    fn description(&self) -> &str {
-        use self::ErrorKind::*;
-        match self.kind {
-            CaptureLimitExceeded => "capture group limit exceeded",
-            ClassEscapeInvalid => "invalid escape sequence in character class",
-            ClassRangeInvalid => "invalid character class range",
-            ClassRangeLiteral => "invalid range boundary, must be a literal",
-            ClassUnclosed => "unclosed character class",
-            DecimalEmpty => "empty decimal literal",
-            DecimalInvalid => "invalid decimal literal",
-            EscapeHexEmpty => "empty hexadecimal literal",
-            EscapeHexInvalid => "invalid hexadecimal literal",
-            EscapeHexInvalidDigit => "invalid hexadecimal digit",
-            EscapeUnexpectedEof => "unexpected eof (escape sequence)",
-            EscapeUnrecognized => "unrecognized escape sequence",
-            FlagDanglingNegation => "dangling flag negation operator",
-            FlagDuplicate { .. } => "duplicate flag",
-            FlagRepeatedNegation { .. } => "repeated negation",
-            FlagUnexpectedEof => "unexpected eof (flag)",
-            FlagUnrecognized => "unrecognized flag",
-            GroupNameDuplicate { .. } => "duplicate capture group name",
-            GroupNameEmpty => "empty capture group name",
-            GroupNameInvalid => "invalid capture group name",
-            GroupNameUnexpectedEof => "unclosed capture group name",
-            GroupUnclosed => "unclosed group",
-            GroupUnopened => "unopened group",
-            NestLimitExceeded(_) => "nest limit exceeded",
-            RepetitionCountInvalid => "invalid repetition count range",
-            RepetitionCountUnclosed => "unclosed counted repetition",
-            RepetitionMissing => "repetition operator missing expression",
-            UnicodeClassInvalid => "invalid Unicode character class",
-            UnsupportedBackreference => "backreferences are not supported",
-            UnsupportedLookAround => "look-around is not supported",
-            _ => unreachable!(),
-        }
-    }
-}
+#[cfg(feature = "std")]
+impl std::error::Error for Error {}
 
-impl fmt::Display for Error {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+impl core::fmt::Display for Error {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
         crate::error::Formatter::from(self).fmt(f)
     }
 }
 
-impl fmt::Display for ErrorKind {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+impl core::fmt::Display for ErrorKind {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
         use self::ErrorKind::*;
         match *self {
             CaptureLimitExceeded => write!(
                 f,
                 "exceeded the maximum number of \
                  capturing groups ({})",
-                ::std::u32::MAX
+                u32::MAX
             ),
             ClassEscapeInvalid => {
                 write!(f, "invalid escape sequence found in character class")
@@ -310,7 +269,6 @@ impl fmt::Display for ErrorKind {
                 "look-around, including look-ahead and look-behind, \
                  is not supported"
             ),
-            _ => unreachable!(),
         }
     }
 }
@@ -327,8 +285,8 @@ pub struct Span {
     pub end: Position,
 }
 
-impl fmt::Debug for Span {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+impl core::fmt::Debug for Span {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
         write!(f, "Span({:?}, {:?})", self.start, self.end)
     }
 }
@@ -360,8 +318,8 @@ pub struct Position {
     pub column: usize,
 }
 
-impl fmt::Debug for Position {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+impl core::fmt::Debug for Position {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
         write!(
             f,
             "Position(o: {:?}, l: {:?}, c: {:?})",
@@ -541,8 +499,8 @@ impl Ast {
 ///
 /// This implementation uses constant stack space and heap space proportional
 /// to the size of the `Ast`.
-impl fmt::Display for Ast {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+impl core::fmt::Display for Ast {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
         use crate::ast::print::Printer;
         Printer::new().print(self, f)
     }
@@ -615,11 +573,12 @@ impl Literal {
     /// If this literal was written as a `\x` hex escape, then this returns
     /// the corresponding byte value. Otherwise, this returns `None`.
     pub fn byte(&self) -> Option<u8> {
-        let short_hex = LiteralKind::HexFixed(HexLiteralKind::X);
-        if self.c as u32 <= 255 && self.kind == short_hex {
-            Some(self.c as u8)
-        } else {
-            None
+        match self.kind {
+            LiteralKind::HexFixed(HexLiteralKind::X) => {
+                // MSRV(1.59): Use 'u8::try_from(self.c)' instead.
+                u8::try_from(u32::from(self.c)).ok()
+            }
+            _ => None,
         }
     }
 }
@@ -629,9 +588,12 @@ impl Literal {
 pub enum LiteralKind {
     /// The literal is written verbatim, e.g., `a` or `☃`.
     Verbatim,
-    /// The literal is written as an escape because it is punctuation, e.g.,
-    /// `\*` or `\[`.
-    Punctuation,
+    /// The literal is written as an escape because it is otherwise a special
+    /// regex meta character, e.g., `\*` or `\[`.
+    Meta,
+    /// The literal is written as an escape despite the fact that the escape is
+    /// unnecessary, e.g., `\%` or `\/`.
+    Superfluous,
     /// The literal is written as an octal escape, e.g., `\141`.
     Octal,
     /// The literal is written as a hex code with a fixed number of digits
@@ -1203,7 +1165,7 @@ impl Group {
     /// Returns true if and only if this group is capturing.
     pub fn is_capturing(&self) -> bool {
         match self.kind {
-            GroupKind::CaptureIndex(_) | GroupKind::CaptureName(_) => true,
+            GroupKind::CaptureIndex(_) | GroupKind::CaptureName { .. } => true,
             GroupKind::NonCapturing(_) => false,
         }
     }
@@ -1214,7 +1176,7 @@ impl Group {
     pub fn capture_index(&self) -> Option<u32> {
         match self.kind {
             GroupKind::CaptureIndex(i) => Some(i),
-            GroupKind::CaptureName(ref x) => Some(x.index),
+            GroupKind::CaptureName { ref name, .. } => Some(name.index),
             GroupKind::NonCapturing(_) => None,
         }
     }
@@ -1225,8 +1187,13 @@ impl Group {
 pub enum GroupKind {
     /// `(a)`
     CaptureIndex(u32),
-    /// `(?P<name>a)`
-    CaptureName(CaptureName),
+    /// `(?<name>a)` or `(?P<name>a)`
+    CaptureName {
+        /// True if the `?P<` syntax is used and false if the `?<` syntax is used.
+        starts_with_p: bool,
+        /// The capture name.
+        name: CaptureName,
+    },
     /// `(?:a)` and `(?i:a)`
     NonCapturing(Flags),
 }
@@ -1350,6 +1317,8 @@ pub enum Flag {
     SwapGreed,
     /// `u`
     Unicode,
+    /// `R`
+    CRLF,
     /// `x`
     IgnoreWhitespace,
 }
@@ -1358,7 +1327,7 @@ pub enum Flag {
 /// space but heap space proportional to the depth of the `Ast`.
 impl Drop for Ast {
     fn drop(&mut self) {
-        use std::mem;
+        use core::mem;
 
         match *self {
             Ast::Empty(_)
@@ -1408,7 +1377,7 @@ impl Drop for Ast {
 /// stack space but heap space proportional to the depth of the `ClassSet`.
 impl Drop for ClassSet {
     fn drop(&mut self) {
-        use std::mem;
+        use core::mem;
 
         match *self {
             ClassSet::Item(ref item) => match *item {
diff --git a/regex-syntax/src/ast/parse.rs b/regex-syntax/src/ast/parse.rs
index 6e9c9aca06..9cf64e9ec7 100644
--- a/regex-syntax/src/ast/parse.rs
+++ b/regex-syntax/src/ast/parse.rs
@@ -2,17 +2,26 @@
 This module provides a regular expression parser.
 */
 
-use std::borrow::Borrow;
-use std::cell::{Cell, RefCell};
-use std::mem;
-use std::result;
-
-use crate::ast::{self, Ast, Position, Span};
-use crate::either::Either;
-
-use crate::is_meta_character;
-
-type Result<T> = result::Result<T, ast::Error>;
+use core::{
+    borrow::Borrow,
+    cell::{Cell, RefCell},
+    mem,
+};
+
+use alloc::{
+    boxed::Box,
+    string::{String, ToString},
+    vec,
+    vec::Vec,
+};
+
+use crate::{
+    ast::{self, Ast, Position, Span},
+    either::Either,
+    is_escapeable_character, is_meta_character,
+};
+
+type Result<T> = core::result::Result<T, ast::Error>;
 
 /// A primitive is an expression with no sub-expressions. This includes
 /// literals, assertions and non-set character classes. This representation
@@ -100,11 +109,11 @@ fn is_hex(c: char) -> bool {
 /// If `first` is true, then `c` is treated as the first character in the
 /// group name (which must be alphabetic or underscore).
 fn is_capture_char(c: char, first: bool) -> bool {
-    c == '_'
-        || (!first
-            && (('0' <= c && c <= '9') || c == '.' || c == '[' || c == ']'))
-        || ('A' <= c && c <= 'Z')
-        || ('a' <= c && c <= 'z')
+    if first {
+        c == '_' || c.is_alphabetic()
+    } else {
+        c == '_' || c == '.' || c == '[' || c == ']' || c.is_alphanumeric()
+    }
 }
 
 /// A builder for a regular expression parser.
@@ -162,7 +171,7 @@ impl ParserBuilder {
     /// constant stack space and moving the call stack to the heap), other
     /// crates may.
     ///
-    /// This limit is not checked until the entire Ast is parsed. Therefore,
+    /// This limit is not checked until the entire AST is parsed. Therefore,
     /// if callers want to put a limit on the amount of heap space used, then
     /// they should impose a limit on the length, in bytes, of the concrete
     /// pattern string. In particular, this is viable since this parser
@@ -220,8 +229,7 @@ impl ParserBuilder {
 /// abstract syntax tree. The size of the tree is proportional to the length
 /// of the regular expression pattern.
 ///
-/// A `Parser` can be configured in more detail via a
-/// [`ParserBuilder`](struct.ParserBuilder.html).
+/// A `Parser` can be configured in more detail via a [`ParserBuilder`].
 #[derive(Clone, Debug)]
 pub struct Parser {
     /// The current position of the parser.
@@ -327,8 +335,7 @@ impl Parser {
     /// The parser can be run with either the `parse` or `parse_with_comments`
     /// methods. The parse methods return an abstract syntax tree.
     ///
-    /// To set configuration options on the parser, use
-    /// [`ParserBuilder`](struct.ParserBuilder.html).
+    /// To set configuration options on the parser, use [`ParserBuilder`].
     pub fn new() -> Parser {
         ParserBuilder::new().build()
     }
@@ -1195,12 +1202,16 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
             ));
         }
         let inner_span = self.span();
-        if self.bump_if("?P<") {
+        let mut starts_with_p = true;
+        if self.bump_if("?P<") || {
+            starts_with_p = false;
+            self.bump_if("?<")
+        } {
             let capture_index = self.next_capture_index(open_span)?;
-            let cap = self.parse_capture_name(capture_index)?;
+            let name = self.parse_capture_name(capture_index)?;
             Ok(Either::Right(ast::Group {
                 span: open_span,
-                kind: ast::GroupKind::CaptureName(cap),
+                kind: ast::GroupKind::CaptureName { starts_with_p, name },
                 ast: Box::new(Ast::Empty(self.span())),
             }))
         } else if self.bump_if("?") {
@@ -1370,6 +1381,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
             's' => Ok(ast::Flag::DotMatchesNewLine),
             'U' => Ok(ast::Flag::SwapGreed),
             'u' => Ok(ast::Flag::Unicode),
+            'R' => Ok(ast::Flag::CRLF),
             'x' => Ok(ast::Flag::IgnoreWhitespace),
             _ => {
                 Err(self
@@ -1483,7 +1495,14 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
         if is_meta_character(c) {
             return Ok(Primitive::Literal(ast::Literal {
                 span,
-                kind: ast::LiteralKind::Punctuation,
+                kind: ast::LiteralKind::Meta,
+                c,
+            }));
+        }
+        if is_escapeable_character(c) {
+            return Ok(Primitive::Literal(ast::Literal {
+                span,
+                kind: ast::LiteralKind::Superfluous,
                 c,
             }));
         }
@@ -1501,9 +1520,6 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
             'n' => special(ast::SpecialLiteralKind::LineFeed, '\n'),
             'r' => special(ast::SpecialLiteralKind::CarriageReturn, '\r'),
             'v' => special(ast::SpecialLiteralKind::VerticalTab, '\x0B'),
-            ' ' if self.ignore_whitespace() => {
-                special(ast::SpecialLiteralKind::Space, ' ')
-            }
             'A' => Ok(Primitive::Assertion(ast::Assertion {
                 span,
                 kind: ast::AssertionKind::StartText,
@@ -1533,9 +1549,6 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
     /// Assuming the preconditions are met, this routine can never fail.
     #[inline(never)]
     fn parse_octal(&self) -> ast::Literal {
-        use std::char;
-        use std::u32;
-
         assert!(self.parser().octal);
         assert!('0' <= self.char() && self.char() <= '7');
         let start = self.pos();
@@ -1600,9 +1613,6 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
         &self,
         kind: ast::HexLiteralKind,
     ) -> Result<ast::Literal> {
-        use std::char;
-        use std::u32;
-
         let mut scratch = self.parser().scratch.borrow_mut();
         scratch.clear();
 
@@ -1646,9 +1656,6 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
         &self,
         kind: ast::HexLiteralKind,
     ) -> Result<ast::Literal> {
-        use std::char;
-        use std::u32;
-
         let mut scratch = self.parser().scratch.borrow_mut();
         scratch.clear();
 
@@ -2146,7 +2153,7 @@ impl<'p, 's, P: Borrow<Parser>> NestLimiter<'p, 's, P> {
         let new = self.depth.checked_add(1).ok_or_else(|| {
             self.p.error(
                 span.clone(),
-                ast::ErrorKind::NestLimitExceeded(::std::u32::MAX),
+                ast::ErrorKind::NestLimitExceeded(u32::MAX),
             )
         })?;
         let limit = self.p.parser().nest_limit;
@@ -2297,11 +2304,14 @@ fn specialize_err<T>(
 
 #[cfg(test)]
 mod tests {
-    use std::ops::Range;
+    use core::ops::Range;
+
+    use alloc::format;
 
-    use super::{Parser, ParserBuilder, ParserI, Primitive};
     use crate::ast::{self, Ast, Position, Span};
 
+    use super::*;
+
     // Our own assert_eq, which has slightly better formatting (but honestly
     // still kind of crappy).
     macro_rules! assert_eq {
@@ -2414,13 +2424,9 @@ mod tests {
         lit_with(c, span(start..start + c.len_utf8()))
     }
 
-    /// Create a punctuation literal starting at the given position.
-    fn punct_lit(c: char, span: Span) -> Ast {
-        Ast::Literal(ast::Literal {
-            span,
-            kind: ast::LiteralKind::Punctuation,
-            c,
-        })
+    /// Create a meta literal starting at the given position.
+    fn meta_lit(c: char, span: Span) -> Ast {
+        Ast::Literal(ast::Literal { span, kind: ast::LiteralKind::Meta, c })
     }
 
     /// Create a verbatim literal with the given span.
@@ -2704,24 +2710,24 @@ bar
             Ok(concat(
                 0..36,
                 vec![
-                    punct_lit('\\', span(0..2)),
-                    punct_lit('.', span(2..4)),
-                    punct_lit('+', span(4..6)),
-                    punct_lit('*', span(6..8)),
-                    punct_lit('?', span(8..10)),
-                    punct_lit('(', span(10..12)),
-                    punct_lit(')', span(12..14)),
-                    punct_lit('|', span(14..16)),
-                    punct_lit('[', span(16..18)),
-                    punct_lit(']', span(18..20)),
-                    punct_lit('{', span(20..22)),
-                    punct_lit('}', span(22..24)),
-                    punct_lit('^', span(24..26)),
-                    punct_lit('$', span(26..28)),
-                    punct_lit('#', span(28..30)),
-                    punct_lit('&', span(30..32)),
-                    punct_lit('-', span(32..34)),
-                    punct_lit('~', span(34..36)),
+                    meta_lit('\\', span(0..2)),
+                    meta_lit('.', span(2..4)),
+                    meta_lit('+', span(4..6)),
+                    meta_lit('*', span(6..8)),
+                    meta_lit('?', span(8..10)),
+                    meta_lit('(', span(10..12)),
+                    meta_lit(')', span(12..14)),
+                    meta_lit('|', span(14..16)),
+                    meta_lit('[', span(16..18)),
+                    meta_lit(']', span(18..20)),
+                    meta_lit('{', span(20..22)),
+                    meta_lit('}', span(22..24)),
+                    meta_lit('^', span(24..26)),
+                    meta_lit('$', span(26..28)),
+                    meta_lit('#', span(28..30)),
+                    meta_lit('&', span(30..32)),
+                    meta_lit('-', span(32..34)),
+                    meta_lit('~', span(34..36)),
                 ]
             ))
         );
@@ -2799,11 +2805,14 @@ bar
                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
                     Ast::Group(ast::Group {
                         span: span_range(pat, 4..pat.len()),
-                        kind: ast::GroupKind::CaptureName(ast::CaptureName {
-                            span: span_range(pat, 9..12),
-                            name: s("foo"),
-                            index: 1,
-                        }),
+                        kind: ast::GroupKind::CaptureName {
+                            starts_with_p: true,
+                            name: ast::CaptureName {
+                                span: span_range(pat, 9..12),
+                                name: s("foo"),
+                                index: 1,
+                            }
+                        },
                         ast: Box::new(lit_with('a', span_range(pat, 14..15))),
                     }),
                 ]
@@ -2870,23 +2879,12 @@ bar
                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
                     Ast::Literal(ast::Literal {
                         span: span_range(pat, 4..6),
-                        kind: ast::LiteralKind::Special(
-                            ast::SpecialLiteralKind::Space
-                        ),
+                        kind: ast::LiteralKind::Superfluous,
                         c: ' ',
                     }),
                 ]
             ))
         );
-        // ... but only when `x` mode is enabled.
-        let pat = r"\ ";
-        assert_eq!(
-            parser(pat).parse().unwrap_err(),
-            TestError {
-                span: span_range(pat, 0..2),
-                kind: ast::ErrorKind::EscapeUnrecognized,
-            }
-        );
     }
 
     #[test]
@@ -3818,15 +3816,33 @@ bar
 
     #[test]
     fn parse_capture_name() {
+        assert_eq!(
+            parser("(?<a>z)").parse(),
+            Ok(Ast::Group(ast::Group {
+                span: span(0..7),
+                kind: ast::GroupKind::CaptureName {
+                    starts_with_p: false,
+                    name: ast::CaptureName {
+                        span: span(3..4),
+                        name: s("a"),
+                        index: 1,
+                    }
+                },
+                ast: Box::new(lit('z', 5)),
+            }))
+        );
         assert_eq!(
             parser("(?P<a>z)").parse(),
             Ok(Ast::Group(ast::Group {
                 span: span(0..8),
-                kind: ast::GroupKind::CaptureName(ast::CaptureName {
-                    span: span(4..5),
-                    name: s("a"),
-                    index: 1,
-                }),
+                kind: ast::GroupKind::CaptureName {
+                    starts_with_p: true,
+                    name: ast::CaptureName {
+                        span: span(4..5),
+                        name: s("a"),
+                        index: 1,
+                    }
+                },
                 ast: Box::new(lit('z', 6)),
             }))
         );
@@ -3834,11 +3850,14 @@ bar
             parser("(?P<abc>z)").parse(),
             Ok(Ast::Group(ast::Group {
                 span: span(0..10),
-                kind: ast::GroupKind::CaptureName(ast::CaptureName {
-                    span: span(4..7),
-                    name: s("abc"),
-                    index: 1,
-                }),
+                kind: ast::GroupKind::CaptureName {
+                    starts_with_p: true,
+                    name: ast::CaptureName {
+                        span: span(4..7),
+                        name: s("abc"),
+                        index: 1,
+                    }
+                },
                 ast: Box::new(lit('z', 8)),
             }))
         );
@@ -3847,11 +3866,14 @@ bar
             parser("(?P<a_1>z)").parse(),
             Ok(Ast::Group(ast::Group {
                 span: span(0..10),
-                kind: ast::GroupKind::CaptureName(ast::CaptureName {
-                    span: span(4..7),
-                    name: s("a_1"),
-                    index: 1,
-                }),
+                kind: ast::GroupKind::CaptureName {
+                    starts_with_p: true,
+                    name: ast::CaptureName {
+                        span: span(4..7),
+                        name: s("a_1"),
+                        index: 1,
+                    }
+                },
                 ast: Box::new(lit('z', 8)),
             }))
         );
@@ -3860,11 +3882,14 @@ bar
             parser("(?P<a.1>z)").parse(),
             Ok(Ast::Group(ast::Group {
                 span: span(0..10),
-                kind: ast::GroupKind::CaptureName(ast::CaptureName {
-                    span: span(4..7),
-                    name: s("a.1"),
-                    index: 1,
-                }),
+                kind: ast::GroupKind::CaptureName {
+                    starts_with_p: true,
+                    name: ast::CaptureName {
+                        span: span(4..7),
+                        name: s("a.1"),
+                        index: 1,
+                    }
+                },
                 ast: Box::new(lit('z', 8)),
             }))
         );
@@ -3873,15 +3898,67 @@ bar
             parser("(?P<a[1]>z)").parse(),
             Ok(Ast::Group(ast::Group {
                 span: span(0..11),
-                kind: ast::GroupKind::CaptureName(ast::CaptureName {
-                    span: span(4..8),
-                    name: s("a[1]"),
-                    index: 1,
-                }),
+                kind: ast::GroupKind::CaptureName {
+                    starts_with_p: true,
+                    name: ast::CaptureName {
+                        span: span(4..8),
+                        name: s("a[1]"),
+                        index: 1,
+                    }
+                },
                 ast: Box::new(lit('z', 9)),
             }))
         );
 
+        assert_eq!(
+            parser("(?P<a¾>)").parse(),
+            Ok(Ast::Group(ast::Group {
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(9, 1, 9),
+                ),
+                kind: ast::GroupKind::CaptureName {
+                    starts_with_p: true,
+                    name: ast::CaptureName {
+                        span: Span::new(
+                            Position::new(4, 1, 5),
+                            Position::new(7, 1, 7),
+                        ),
+                        name: s("a¾"),
+                        index: 1,
+                    }
+                },
+                ast: Box::new(Ast::Empty(Span::new(
+                    Position::new(8, 1, 8),
+                    Position::new(8, 1, 8),
+                ))),
+            }))
+        );
+        assert_eq!(
+            parser("(?P<名字>)").parse(),
+            Ok(Ast::Group(ast::Group {
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(12, 1, 9),
+                ),
+                kind: ast::GroupKind::CaptureName {
+                    starts_with_p: true,
+                    name: ast::CaptureName {
+                        span: Span::new(
+                            Position::new(4, 1, 5),
+                            Position::new(10, 1, 7),
+                        ),
+                        name: s("名字"),
+                        index: 1,
+                    }
+                },
+                ast: Box::new(Ast::Empty(Span::new(
+                    Position::new(11, 1, 8),
+                    Position::new(11, 1, 8),
+                ))),
+            }))
+        );
+
         assert_eq!(
             parser("(?P<").parse().unwrap_err(),
             TestError {
@@ -3940,6 +4017,60 @@ bar
                 },
             }
         );
+        assert_eq!(
+            parser("(?P<5>)").parse().unwrap_err(),
+            TestError {
+                span: span(4..5),
+                kind: ast::ErrorKind::GroupNameInvalid,
+            }
+        );
+        assert_eq!(
+            parser("(?P<5a>)").parse().unwrap_err(),
+            TestError {
+                span: span(4..5),
+                kind: ast::ErrorKind::GroupNameInvalid,
+            }
+        );
+        assert_eq!(
+            parser("(?P<¾>)").parse().unwrap_err(),
+            TestError {
+                span: Span::new(
+                    Position::new(4, 1, 5),
+                    Position::new(6, 1, 6),
+                ),
+                kind: ast::ErrorKind::GroupNameInvalid,
+            }
+        );
+        assert_eq!(
+            parser("(?P<¾a>)").parse().unwrap_err(),
+            TestError {
+                span: Span::new(
+                    Position::new(4, 1, 5),
+                    Position::new(6, 1, 6),
+                ),
+                kind: ast::ErrorKind::GroupNameInvalid,
+            }
+        );
+        assert_eq!(
+            parser("(?P<☃>)").parse().unwrap_err(),
+            TestError {
+                span: Span::new(
+                    Position::new(4, 1, 5),
+                    Position::new(7, 1, 6),
+                ),
+                kind: ast::ErrorKind::GroupNameInvalid,
+            }
+        );
+        assert_eq!(
+            parser("(?P<a☃>)").parse().unwrap_err(),
+            TestError {
+                span: Span::new(
+                    Position::new(5, 1, 6),
+                    Position::new(8, 1, 7),
+                ),
+                kind: ast::ErrorKind::GroupNameInvalid,
+            }
+        );
     }
 
     #[test]
@@ -4046,6 +4177,34 @@ bar
                 ],
             })
         );
+        assert_eq!(
+            parser("i-sR:").parse_flags(),
+            Ok(ast::Flags {
+                span: span(0..4),
+                items: vec![
+                    ast::FlagsItem {
+                        span: span(0..1),
+                        kind: ast::FlagsItemKind::Flag(
+                            ast::Flag::CaseInsensitive
+                        ),
+                    },
+                    ast::FlagsItem {
+                        span: span(1..2),
+                        kind: ast::FlagsItemKind::Negation,
+                    },
+                    ast::FlagsItem {
+                        span: span(2..3),
+                        kind: ast::FlagsItemKind::Flag(
+                            ast::Flag::DotMatchesNewLine
+                        ),
+                    },
+                    ast::FlagsItem {
+                        span: span(3..4),
+                        kind: ast::FlagsItemKind::Flag(ast::Flag::CRLF),
+                    },
+                ],
+            })
+        );
 
         assert_eq!(
             parser("isU").parse_flags().unwrap_err(),
@@ -4107,6 +4266,7 @@ bar
         assert_eq!(parser("s").parse_flag(), Ok(ast::Flag::DotMatchesNewLine));
         assert_eq!(parser("U").parse_flag(), Ok(ast::Flag::SwapGreed));
         assert_eq!(parser("u").parse_flag(), Ok(ast::Flag::Unicode));
+        assert_eq!(parser("R").parse_flag(), Ok(ast::Flag::CRLF));
         assert_eq!(parser("x").parse_flag(), Ok(ast::Flag::IgnoreWhitespace));
 
         assert_eq!(
@@ -4178,7 +4338,7 @@ bar
             parser(r"\|").parse_primitive(),
             Ok(Primitive::Literal(ast::Literal {
                 span: span(0..2),
-                kind: ast::LiteralKind::Punctuation,
+                kind: ast::LiteralKind::Meta,
                 c: '|',
             }))
         );
@@ -4229,11 +4389,26 @@ bar
             }))
         );
 
+        // We also support superfluous escapes in most cases now too.
+        for c in ['!', '@', '%', '"', '\'', '/', ' '] {
+            let pat = format!(r"\{}", c);
+            assert_eq!(
+                parser(&pat).parse_primitive(),
+                Ok(Primitive::Literal(ast::Literal {
+                    span: span(0..2),
+                    kind: ast::LiteralKind::Superfluous,
+                    c,
+                }))
+            );
+        }
+
+        // Some superfluous escapes, namely [0-9A-Za-z], are still banned. This
+        // gives flexibility for future evolution.
         assert_eq!(
-            parser(r"\").parse_escape().unwrap_err(),
+            parser(r"\e").parse_escape().unwrap_err(),
             TestError {
-                span: span(0..1),
-                kind: ast::ErrorKind::EscapeUnexpectedEof,
+                span: span(0..2),
+                kind: ast::ErrorKind::EscapeUnrecognized,
             }
         );
         assert_eq!(
@@ -4243,6 +4418,31 @@ bar
                 kind: ast::ErrorKind::EscapeUnrecognized,
             }
         );
+        // But also, < and > are banned, so that we may evolve them into
+        // start/end word boundary assertions. (Not sure if we will...)
+        assert_eq!(
+            parser(r"\<").parse_escape().unwrap_err(),
+            TestError {
+                span: span(0..2),
+                kind: ast::ErrorKind::EscapeUnrecognized,
+            }
+        );
+        assert_eq!(
+            parser(r"\>").parse_escape().unwrap_err(),
+            TestError {
+                span: span(0..2),
+                kind: ast::ErrorKind::EscapeUnrecognized,
+            }
+        );
+
+        // An unfinished escape is illegal.
+        assert_eq!(
+            parser(r"\").parse_escape().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            }
+        );
     }
 
     #[test]
@@ -4272,7 +4472,7 @@ bar
                 Ok(Primitive::Literal(ast::Literal {
                     span: span(0..pat.len()),
                     kind: ast::LiteralKind::Octal,
-                    c: ::std::char::from_u32(i).unwrap(),
+                    c: char::from_u32(i).unwrap(),
                 }))
             );
         }
@@ -4347,7 +4547,7 @@ bar
                 Ok(Primitive::Literal(ast::Literal {
                     span: span(0..pat.len()),
                     kind: ast::LiteralKind::HexFixed(ast::HexLiteralKind::X),
-                    c: ::std::char::from_u32(i).unwrap(),
+                    c: char::from_u32(i).unwrap(),
                 }))
             );
         }
@@ -4378,7 +4578,7 @@ bar
     #[test]
     fn parse_hex_four() {
         for i in 0..65536 {
-            let c = match ::std::char::from_u32(i) {
+            let c = match char::from_u32(i) {
                 None => continue,
                 Some(c) => c,
             };
@@ -4442,7 +4642,7 @@ bar
     #[test]
     fn parse_hex_eight() {
         for i in 0..65536 {
-            let c = match ::std::char::from_u32(i) {
+            let c = match char::from_u32(i) {
                 None => continue,
                 Some(c) => c,
             };
@@ -4839,7 +5039,7 @@ bar
                         lit(span(1..2), 'a'),
                         ast::ClassSetItem::Literal(ast::Literal {
                             span: span(2..4),
-                            kind: ast::LiteralKind::Punctuation,
+                            kind: ast::LiteralKind::Meta,
                             c: ']',
                         }),
                     ]
@@ -4857,7 +5057,7 @@ bar
                         lit(span(1..2), 'a'),
                         ast::ClassSetItem::Literal(ast::Literal {
                             span: span(2..4),
-                            kind: ast::LiteralKind::Punctuation,
+                            kind: ast::LiteralKind::Meta,
                             c: '-',
                         }),
                         lit(span(4..5), 'z'),
@@ -5049,7 +5249,7 @@ bar
                     span(1..6),
                     itemset(ast::ClassSetItem::Literal(ast::Literal {
                         span: span(1..3),
-                        kind: ast::LiteralKind::Punctuation,
+                        kind: ast::LiteralKind::Meta,
                         c: '^',
                     })),
                     itemset(lit(span(5..6), '^')),
@@ -5065,7 +5265,7 @@ bar
                     span(1..6),
                     itemset(ast::ClassSetItem::Literal(ast::Literal {
                         span: span(1..3),
-                        kind: ast::LiteralKind::Punctuation,
+                        kind: ast::LiteralKind::Meta,
                         c: '&',
                     })),
                     itemset(lit(span(5..6), '&')),
@@ -5130,7 +5330,7 @@ bar
                         lit(span(1..2), ']'),
                         ast::ClassSetItem::Literal(ast::Literal {
                             span: span(2..4),
-                            kind: ast::LiteralKind::Punctuation,
+                            kind: ast::LiteralKind::Meta,
                             c: '[',
                         }),
                     ]
@@ -5148,7 +5348,7 @@ bar
                         kind: itemset(ast::ClassSetItem::Literal(
                             ast::Literal {
                                 span: span(1..3),
-                                kind: ast::LiteralKind::Punctuation,
+                                kind: ast::LiteralKind::Meta,
                                 c: '[',
                             }
                         )),
diff --git a/regex-syntax/src/ast/print.rs b/regex-syntax/src/ast/print.rs
index 045de2eaf2..86a87e1439 100644
--- a/regex-syntax/src/ast/print.rs
+++ b/regex-syntax/src/ast/print.rs
@@ -2,10 +2,13 @@
 This module provides a regular expression printer for `Ast`.
 */
 
-use std::fmt;
+use core::fmt;
 
-use crate::ast::visitor::{self, Visitor};
-use crate::ast::{self, Ast};
+use crate::ast::{
+    self,
+    visitor::{self, Visitor},
+    Ast,
+};
 
 /// A builder for constructing a printer.
 ///
@@ -157,9 +160,10 @@ impl<W: fmt::Write> Writer<W> {
         use crate::ast::GroupKind::*;
         match ast.kind {
             CaptureIndex(_) => self.wtr.write_str("("),
-            CaptureName(ref x) => {
-                self.wtr.write_str("(?P<")?;
-                self.wtr.write_str(&x.name)?;
+            CaptureName { ref name, starts_with_p } => {
+                let start = if starts_with_p { "(?P<" } else { "(?<" };
+                self.wtr.write_str(start)?;
+                self.wtr.write_str(&name.name)?;
                 self.wtr.write_str(">")?;
                 Ok(())
             }
@@ -212,25 +216,25 @@ impl<W: fmt::Write> Writer<W> {
 
         match ast.kind {
             Verbatim => self.wtr.write_char(ast.c),
-            Punctuation => write!(self.wtr, r"\{}", ast.c),
-            Octal => write!(self.wtr, r"\{:o}", ast.c as u32),
+            Meta | Superfluous => write!(self.wtr, r"\{}", ast.c),
+            Octal => write!(self.wtr, r"\{:o}", u32::from(ast.c)),
             HexFixed(ast::HexLiteralKind::X) => {
-                write!(self.wtr, r"\x{:02X}", ast.c as u32)
+                write!(self.wtr, r"\x{:02X}", u32::from(ast.c))
             }
             HexFixed(ast::HexLiteralKind::UnicodeShort) => {
-                write!(self.wtr, r"\u{:04X}", ast.c as u32)
+                write!(self.wtr, r"\u{:04X}", u32::from(ast.c))
             }
             HexFixed(ast::HexLiteralKind::UnicodeLong) => {
-                write!(self.wtr, r"\U{:08X}", ast.c as u32)
+                write!(self.wtr, r"\U{:08X}", u32::from(ast.c))
             }
             HexBrace(ast::HexLiteralKind::X) => {
-                write!(self.wtr, r"\x{{{:X}}}", ast.c as u32)
+                write!(self.wtr, r"\x{{{:X}}}", u32::from(ast.c))
             }
             HexBrace(ast::HexLiteralKind::UnicodeShort) => {
-                write!(self.wtr, r"\u{{{:X}}}", ast.c as u32)
+                write!(self.wtr, r"\u{{{:X}}}", u32::from(ast.c))
             }
             HexBrace(ast::HexLiteralKind::UnicodeLong) => {
-                write!(self.wtr, r"\U{{{:X}}}", ast.c as u32)
+                write!(self.wtr, r"\U{{{:X}}}", u32::from(ast.c))
             }
             Special(ast::SpecialLiteralKind::Bell) => {
                 self.wtr.write_str(r"\a")
@@ -285,6 +289,7 @@ impl<W: fmt::Write> Writer<W> {
                     Flag::DotMatchesNewLine => self.wtr.write_str("s"),
                     Flag::SwapGreed => self.wtr.write_str("U"),
                     Flag::Unicode => self.wtr.write_str("u"),
+                    Flag::CRLF => self.wtr.write_str("R"),
                     Flag::IgnoreWhitespace => self.wtr.write_str("x"),
                 },
             }?;
@@ -395,9 +400,12 @@ impl<W: fmt::Write> Writer<W> {
 
 #[cfg(test)]
 mod tests {
-    use super::Printer;
+    use alloc::string::String;
+
     use crate::ast::parse::ParserBuilder;
 
+    use super::*;
+
     fn roundtrip(given: &str) {
         roundtrip_with(|b| b, given);
     }
@@ -499,6 +507,7 @@ mod tests {
     fn print_group() {
         roundtrip("(?i:a)");
         roundtrip("(?P<foo>a)");
+        roundtrip("(?<foo>a)");
         roundtrip("(a)");
     }
 
diff --git a/regex-syntax/src/ast/visitor.rs b/regex-syntax/src/ast/visitor.rs
index 78ee487cff..ab136739e6 100644
--- a/regex-syntax/src/ast/visitor.rs
+++ b/regex-syntax/src/ast/visitor.rs
@@ -1,4 +1,4 @@
-use std::fmt;
+use alloc::{vec, vec::Vec};
 
 use crate::ast::{self, Ast};
 
@@ -11,15 +11,12 @@ use crate::ast::{self, Ast};
 /// may be proportional to end user input.
 ///
 /// Typical usage of this trait involves providing an implementation and then
-/// running it using the [`visit`](fn.visit.html) function.
+/// running it using the [`visit`] function.
 ///
 /// Note that the abstract syntax tree for a regular expression is quite
-/// complex. Unless you specifically need it, you might be able to use the
-/// much simpler
-/// [high-level intermediate representation](../hir/struct.Hir.html)
-/// and its
-/// [corresponding `Visitor` trait](../hir/trait.Visitor.html)
-/// instead.
+/// complex. Unless you specifically need it, you might be able to use the much
+/// simpler [high-level intermediate representation](crate::hir::Hir) and its
+/// [corresponding `Visitor` trait](crate::hir::Visitor) instead.
 pub trait Visitor {
     /// The result of visiting an AST.
     type Output;
@@ -46,13 +43,12 @@ pub trait Visitor {
     }
 
     /// This method is called between child nodes of an
-    /// [`Alternation`](struct.Alternation.html).
+    /// [`Alternation`](ast::Alternation).
     fn visit_alternation_in(&mut self) -> Result<(), Self::Err> {
         Ok(())
     }
 
-    /// This method is called on every
-    /// [`ClassSetItem`](enum.ClassSetItem.html)
+    /// This method is called on every [`ClassSetItem`](ast::ClassSetItem)
     /// before descending into child nodes.
     fn visit_class_set_item_pre(
         &mut self,
@@ -61,8 +57,7 @@ pub trait Visitor {
         Ok(())
     }
 
-    /// This method is called on every
-    /// [`ClassSetItem`](enum.ClassSetItem.html)
+    /// This method is called on every [`ClassSetItem`](ast::ClassSetItem)
     /// after descending into child nodes.
     fn visit_class_set_item_post(
         &mut self,
@@ -72,8 +67,8 @@ pub trait Visitor {
     }
 
     /// This method is called on every
-    /// [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html)
-    /// before descending into child nodes.
+    /// [`ClassSetBinaryOp`](ast::ClassSetBinaryOp) before descending into
+    /// child nodes.
     fn visit_class_set_binary_op_pre(
         &mut self,
         _ast: &ast::ClassSetBinaryOp,
@@ -82,8 +77,8 @@ pub trait Visitor {
     }
 
     /// This method is called on every
-    /// [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html)
-    /// after descending into child nodes.
+    /// [`ClassSetBinaryOp`](ast::ClassSetBinaryOp) after descending into child
+    /// nodes.
     fn visit_class_set_binary_op_post(
         &mut self,
         _ast: &ast::ClassSetBinaryOp,
@@ -92,7 +87,7 @@ pub trait Visitor {
     }
 
     /// This method is called between the left hand and right hand child nodes
-    /// of a [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html).
+    /// of a [`ClassSetBinaryOp`](ast::ClassSetBinaryOp).
     fn visit_class_set_binary_op_in(
         &mut self,
         _ast: &ast::ClassSetBinaryOp,
@@ -104,8 +99,7 @@ pub trait Visitor {
 /// Executes an implementation of `Visitor` in constant stack space.
 ///
 /// This function will visit every node in the given `Ast` while calling the
-/// appropriate methods provided by the
-/// [`Visitor`](trait.Visitor.html) trait.
+/// appropriate methods provided by the [`Visitor`] trait.
 ///
 /// The primary use case for this method is when one wants to perform case
 /// analysis over an `Ast` without using a stack size proportional to the depth
@@ -475,8 +469,8 @@ impl<'a> ClassInduct<'a> {
     }
 }
 
-impl<'a> fmt::Debug for ClassFrame<'a> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+impl<'a> core::fmt::Debug for ClassFrame<'a> {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
         let x = match *self {
             ClassFrame::Union { .. } => "Union",
             ClassFrame::Binary { .. } => "Binary",
@@ -487,8 +481,8 @@ impl<'a> fmt::Debug for ClassFrame<'a> {
     }
 }
 
-impl<'a> fmt::Debug for ClassInduct<'a> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+impl<'a> core::fmt::Debug for ClassInduct<'a> {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
         let x = match *self {
             ClassInduct::Item(it) => match *it {
                 ast::ClassSetItem::Empty(_) => "Item(Empty)",
diff --git a/regex-syntax/src/debug.rs b/regex-syntax/src/debug.rs
new file mode 100644
index 0000000000..a0b051b441
--- /dev/null
+++ b/regex-syntax/src/debug.rs
@@ -0,0 +1,107 @@
+/// A type that wraps a single byte with a convenient fmt::Debug impl that
+/// escapes the byte.
+pub(crate) struct Byte(pub(crate) u8);
+
+impl core::fmt::Debug for Byte {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        // Special case ASCII space. It's too hard to read otherwise, so
+        // put quotes around it. I sometimes wonder whether just '\x20' would
+        // be better...
+        if self.0 == b' ' {
+            return write!(f, "' '");
+        }
+        // 10 bytes is enough to cover any output from ascii::escape_default.
+        let mut bytes = [0u8; 10];
+        let mut len = 0;
+        for (i, mut b) in core::ascii::escape_default(self.0).enumerate() {
+            // capitalize \xab to \xAB
+            if i >= 2 && b'a' <= b && b <= b'f' {
+                b -= 32;
+            }
+            bytes[len] = b;
+            len += 1;
+        }
+        write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap())
+    }
+}
+
+/// A type that provides a human readable debug impl for arbitrary bytes.
+///
+/// This generally works best when the bytes are presumed to be mostly UTF-8,
+/// but will work for anything.
+///
+/// N.B. This is copied nearly verbatim from regex-automata. Sigh.
+pub(crate) struct Bytes<'a>(pub(crate) &'a [u8]);
+
+impl<'a> core::fmt::Debug for Bytes<'a> {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        write!(f, "\"")?;
+        // This is a sad re-implementation of a similar impl found in bstr.
+        let mut bytes = self.0;
+        while let Some(result) = utf8_decode(bytes) {
+            let ch = match result {
+                Ok(ch) => ch,
+                Err(byte) => {
+                    write!(f, r"\x{:02x}", byte)?;
+                    bytes = &bytes[1..];
+                    continue;
+                }
+            };
+            bytes = &bytes[ch.len_utf8()..];
+            match ch {
+                '\0' => write!(f, "\\0")?,
+                // ASCII control characters except \0, \n, \r, \t
+                '\x01'..='\x08'
+                | '\x0b'
+                | '\x0c'
+                | '\x0e'..='\x19'
+                | '\x7f' => {
+                    write!(f, "\\x{:02x}", u32::from(ch))?;
+                }
+                '\n' | '\r' | '\t' | _ => {
+                    write!(f, "{}", ch.escape_debug())?;
+                }
+            }
+        }
+        write!(f, "\"")?;
+        Ok(())
+    }
+}
+
+/// Decodes the next UTF-8 encoded codepoint from the given byte slice.
+///
+/// If no valid encoding of a codepoint exists at the beginning of the given
+/// byte slice, then the first byte is returned instead.
+///
+/// This returns `None` if and only if `bytes` is empty.
+pub(crate) fn utf8_decode(bytes: &[u8]) -> Option<Result<char, u8>> {
+    fn len(byte: u8) -> Option<usize> {
+        if byte <= 0x7F {
+            return Some(1);
+        } else if byte & 0b1100_0000 == 0b1000_0000 {
+            return None;
+        } else if byte <= 0b1101_1111 {
+            Some(2)
+        } else if byte <= 0b1110_1111 {
+            Some(3)
+        } else if byte <= 0b1111_0111 {
+            Some(4)
+        } else {
+            None
+        }
+    }
+
+    if bytes.is_empty() {
+        return None;
+    }
+    let len = match len(bytes[0]) {
+        None => return Some(Err(bytes[0])),
+        Some(len) if len > bytes.len() => return Some(Err(bytes[0])),
+        Some(1) => return Some(Ok(char::from(bytes[0]))),
+        Some(len) => len,
+    };
+    match core::str::from_utf8(&bytes[..len]) {
+        Ok(s) => Some(Ok(s.chars().next().unwrap())),
+        Err(_) => Some(Err(bytes[0])),
+    }
+}
diff --git a/regex-syntax/src/error.rs b/regex-syntax/src/error.rs
index 1230d2fc5d..98869c4f79 100644
--- a/regex-syntax/src/error.rs
+++ b/regex-syntax/src/error.rs
@@ -1,15 +1,17 @@
-use std::cmp;
-use std::error;
-use std::fmt;
-use std::result;
+use alloc::{
+    format,
+    string::{String, ToString},
+    vec,
+    vec::Vec,
+};
 
-use crate::ast;
-use crate::hir;
-
-/// A type alias for dealing with errors returned by this crate.
-pub type Result<T> = result::Result<T, Error>;
+use crate::{ast, hir};
 
 /// This error type encompasses any error that can be returned by this crate.
+///
+/// This error type is marked as `non_exhaustive`. This means that adding a
+/// new variant is not considered a breaking change.
+#[non_exhaustive]
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub enum Error {
     /// An error that occurred while translating concrete syntax into abstract
@@ -18,13 +20,6 @@ pub enum Error {
     /// An error that occurred while translating abstract syntax into a high
     /// level intermediate representation (HIR).
     Translate(hir::Error),
-    /// Hints that destructuring should not be exhaustive.
-    ///
-    /// This enum may grow additional variants, so this makes sure clients
-    /// don't count on exhaustive matching. (Otherwise, adding a new variant
-    /// could break existing code.)
-    #[doc(hidden)]
-    __Nonexhaustive,
 }
 
 impl From<ast::Error> for Error {
@@ -39,24 +34,14 @@ impl From<hir::Error> for Error {
     }
 }
 
-impl error::Error for Error {
-    // TODO: Remove this method entirely on the next breaking semver release.
-    #[allow(deprecated)]
-    fn description(&self) -> &str {
-        match *self {
-            Error::Parse(ref x) => x.description(),
-            Error::Translate(ref x) => x.description(),
-            _ => unreachable!(),
-        }
-    }
-}
+#[cfg(feature = "std")]
+impl std::error::Error for Error {}
 
-impl fmt::Display for Error {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+impl core::fmt::Display for Error {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
         match *self {
             Error::Parse(ref x) => x.fmt(f),
             Error::Translate(ref x) => x.fmt(f),
-            _ => unreachable!(),
         }
     }
 }
@@ -101,8 +86,8 @@ impl<'e> From<&'e hir::Error> for Formatter<'e, hir::ErrorKind> {
     }
 }
 
-impl<'e, E: fmt::Display> fmt::Display for Formatter<'e, E> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+impl<'e, E: core::fmt::Display> core::fmt::Display for Formatter<'e, E> {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
         let spans = Spans::from_formatter(self);
         if self.pattern.contains('\n') {
             let divider = repeat_char('~', 79);
@@ -168,7 +153,7 @@ struct Spans<'p> {
 
 impl<'p> Spans<'p> {
     /// Build a sequence of spans from a formatter.
-    fn from_formatter<'e, E: fmt::Display>(
+    fn from_formatter<'e, E: core::fmt::Display>(
         fmter: &'p Formatter<'e, E>,
     ) -> Spans<'p> {
         let mut line_count = fmter.pattern.lines().count();
@@ -248,7 +233,7 @@ impl<'p> Spans<'p> {
                 pos += 1;
             }
             let note_len = span.end.column.saturating_sub(span.start.column);
-            for _ in 0..cmp::max(1, note_len) {
+            for _ in 0..core::cmp::max(1, note_len) {
                 notes.push('^');
                 pos += 1;
             }
@@ -281,11 +266,13 @@ impl<'p> Spans<'p> {
 }
 
 fn repeat_char(c: char, count: usize) -> String {
-    ::std::iter::repeat(c).take(count).collect()
+    core::iter::repeat(c).take(count).collect()
 }
 
 #[cfg(test)]
 mod tests {
+    use alloc::string::ToString;
+
     use crate::ast::parse::Parser;
 
     fn assert_panic_message(pattern: &str, expected_msg: &str) {
diff --git a/regex-syntax/src/hir/interval.rs b/regex-syntax/src/hir/interval.rs
index 56698c53af..e063390a8f 100644
--- a/regex-syntax/src/hir/interval.rs
+++ b/regex-syntax/src/hir/interval.rs
@@ -1,8 +1,6 @@
-use std::char;
-use std::cmp;
-use std::fmt::Debug;
-use std::slice;
-use std::u8;
+use core::{char, cmp, fmt::Debug, slice};
+
+use alloc::vec::Vec;
 
 use crate::unicode;
 
@@ -32,9 +30,38 @@ use crate::unicode;
 //
 // Tests on this are relegated to the public API of HIR in src/hir.rs.
 
-#[derive(Clone, Debug, Eq, PartialEq)]
+#[derive(Clone, Debug)]
 pub struct IntervalSet<I> {
+    /// A sorted set of non-overlapping ranges.
     ranges: Vec<I>,
+    /// While not required at all for correctness, we keep track of whether an
+    /// interval set has been case folded or not. This helps us avoid doing
+    /// redundant work if, for example, a set has already been cased folded.
+    /// And note that whether a set is folded or not is preserved through
+    /// all of the pairwise set operations. That is, if both interval sets
+    /// have been case folded, then any of difference, union, intersection or
+    /// symmetric difference all produce a case folded set.
+    ///
+    /// Note that when this is true, it *must* be the case that the set is case
+    /// folded. But when it's false, the set *may* be case folded. In other
+    /// words, we only set this to true when we know it to be case, but we're
+    /// okay with it being false if it would otherwise be costly to determine
+    /// whether it should be true. This means code cannot assume that a false
+    /// value necessarily indicates that the set is not case folded.
+    ///
+    /// Bottom line: this is a performance optimization.
+    folded: bool,
+}
+
+impl<I: Interval> Eq for IntervalSet<I> {}
+
+// We implement PartialEq manually so that we don't consider the set's internal
+// 'folded' property to be part of its identity. The 'folded' property is
+// strictly an optimization.
+impl<I: Interval> PartialEq for IntervalSet<I> {
+    fn eq(&self, other: &IntervalSet<I>) -> bool {
+        self.ranges.eq(&other.ranges)
+    }
 }
 
 impl<I: Interval> IntervalSet<I> {
@@ -44,7 +71,10 @@ impl<I: Interval> IntervalSet<I> {
     /// The given ranges do not need to be in any specific order, and ranges
     /// may overlap.
     pub fn new<T: IntoIterator<Item = I>>(intervals: T) -> IntervalSet<I> {
-        let mut set = IntervalSet { ranges: intervals.into_iter().collect() };
+        let ranges: Vec<I> = intervals.into_iter().collect();
+        // An empty set is case folded.
+        let folded = ranges.is_empty();
+        let mut set = IntervalSet { ranges, folded };
         set.canonicalize();
         set
     }
@@ -55,6 +85,10 @@ impl<I: Interval> IntervalSet<I> {
         // it preserves canonicalization.
         self.ranges.push(interval);
         self.canonicalize();
+        // We don't know whether the new interval added here is considered
+        // case folded, so we conservatively assume that the entire set is
+        // no longer case folded if it was previously.
+        self.folded = false;
     }
 
     /// Return an iterator over all intervals in this set.
@@ -79,6 +113,9 @@ impl<I: Interval> IntervalSet<I> {
     /// This returns an error if the necessary case mapping data is not
     /// available.
     pub fn case_fold_simple(&mut self) -> Result<(), unicode::CaseFoldError> {
+        if self.folded {
+            return Ok(());
+        }
         let len = self.ranges.len();
         for i in 0..len {
             let range = self.ranges[i];
@@ -88,14 +125,19 @@ impl<I: Interval> IntervalSet<I> {
             }
         }
         self.canonicalize();
+        self.folded = true;
         Ok(())
     }
 
     /// Union this set with the given set, in place.
     pub fn union(&mut self, other: &IntervalSet<I>) {
+        if other.ranges.is_empty() || self.ranges == other.ranges {
+            return;
+        }
         // This could almost certainly be done more efficiently.
         self.ranges.extend(&other.ranges);
         self.canonicalize();
+        self.folded = self.folded && other.folded;
     }
 
     /// Intersect this set with the given set, in place.
@@ -105,6 +147,8 @@ impl<I: Interval> IntervalSet<I> {
         }
         if other.ranges.is_empty() {
             self.ranges.clear();
+            // An empty set is case folded.
+            self.folded = true;
             return;
         }
 
@@ -134,6 +178,7 @@ impl<I: Interval> IntervalSet<I> {
             }
         }
         self.ranges.drain(..drain_end);
+        self.folded = self.folded && other.folded;
     }
 
     /// Subtract the given set from this set, in place.
@@ -226,6 +271,7 @@ impl<I: Interval> IntervalSet<I> {
             a += 1;
         }
         self.ranges.drain(..drain_end);
+        self.folded = self.folded && other.folded;
     }
 
     /// Compute the symmetric difference of the two sets, in place.
@@ -251,6 +297,8 @@ impl<I: Interval> IntervalSet<I> {
         if self.ranges.is_empty() {
             let (min, max) = (I::Bound::min_value(), I::Bound::max_value());
             self.ranges.push(I::create(min, max));
+            // The set containing everything must case folded.
+            self.folded = true;
             return;
         }
 
@@ -276,6 +324,19 @@ impl<I: Interval> IntervalSet<I> {
             self.ranges.push(I::create(lower, I::Bound::max_value()));
         }
         self.ranges.drain(..drain_end);
+        // We don't need to update whether this set is folded or not, because
+        // it is conservatively preserved through negation. Namely, if a set
+        // is not folded, then it is possible that its negation is folded, for
+        // example, [^☃]. But we're fine with assuming that the set is not
+        // folded in that case. (`folded` permits false negatives but not false
+        // positives.)
+        //
+        // But what about when a set is folded, is its negation also
+        // necessarily folded? Yes. Because if a set is folded, then for every
+        // character in the set, it necessarily included its equivalence class
+        // of case folded characters. Negating it in turn means that all
+        // equivalence classes in the set are negated, and any equivalence
+        // class that was previously not in the set is now entirely in the set.
     }
 
     /// Converts this set into a canonical ordering.
@@ -481,7 +542,7 @@ impl Bound for u8 {
         u8::MAX
     }
     fn as_u32(self) -> u32 {
-        self as u32
+        u32::from(self)
     }
     fn increment(self) -> Self {
         self.checked_add(1).unwrap()
@@ -499,20 +560,20 @@ impl Bound for char {
         '\u{10FFFF}'
     }
     fn as_u32(self) -> u32 {
-        self as u32
+        u32::from(self)
     }
 
     fn increment(self) -> Self {
         match self {
             '\u{D7FF}' => '\u{E000}',
-            c => char::from_u32((c as u32).checked_add(1).unwrap()).unwrap(),
+            c => char::from_u32(u32::from(c).checked_add(1).unwrap()).unwrap(),
         }
     }
 
     fn decrement(self) -> Self {
         match self {
             '\u{E000}' => '\u{D7FF}',
-            c => char::from_u32((c as u32).checked_sub(1).unwrap()).unwrap(),
+            c => char::from_u32(u32::from(c).checked_sub(1).unwrap()).unwrap(),
         }
     }
 }
diff --git a/regex-syntax/src/hir/literal.rs b/regex-syntax/src/hir/literal.rs
new file mode 100644
index 0000000000..bd3a2d143b
--- /dev/null
+++ b/regex-syntax/src/hir/literal.rs
@@ -0,0 +1,3165 @@
+/*!
+Provides literal extraction from `Hir` expressions.
+
+An [`Extractor`] pulls literals out of [`Hir`] expressions and returns a
+[`Seq`] of [`Literal`]s.
+
+The purpose of literal extraction is generally to provide avenues for
+optimizing regex searches. The main idea is that substring searches can be an
+order of magnitude faster than a regex search. Therefore, if one can execute
+a substring search to find candidate match locations and only run the regex
+search at those locations, then it is possible for huge improvements in
+performance to be realized.
+
+With that said, literal optimizations are generally a black art because even
+though substring search is generally faster, if the number of candidates
+produced is high, then it can create a lot of overhead by ping-ponging between
+the substring search and the regex search.
+
+Here are some heuristics that might be used to help increase the chances of
+effective literal optimizations:
+
+* Stick to small [`Seq`]s. If you search for too many literals, it's likely
+to lead to substring search that is only a little faster than a regex search,
+and thus the overhead of using literal optimizations in the first place might
+make things slower overall.
+* The literals in your [`Seq`] shoudn't be too short. In general, longer is
+better. A sequence corresponding to single bytes that occur frequently in the
+haystack, for example, is probably a bad literal optimization because it's
+likely to produce many false positive candidates. Longer literals are less
+likely to match, and thus probably produce fewer false positives.
+* If it's possible to estimate the approximate frequency of each byte according
+to some pre-computed background distribution, it is possible to compute a score
+of how "good" a `Seq` is. If a `Seq` isn't good enough, you might consider
+skipping the literal optimization and just use the regex engine.
+
+(It should be noted that there are always pathological cases that can make
+any kind of literal optimization be a net slower result. This is why it
+might be a good idea to be conservative, or to even provide a means for
+literal optimizations to be dynamically disabled if they are determined to be
+ineffective according to some measure.)
+
+You're encouraged to explore the methods on [`Seq`], which permit shrinking
+the size of sequences in a preference-order preserving fashion.
+
+Finally, note that it isn't strictly necessary to use an [`Extractor`]. Namely,
+an `Extractor` only uses public APIs of the [`Seq`] and [`Literal`] types,
+so it is possible to implement your own extractor. For example, for n-grams
+or "inner" literals (i.e., not prefix or suffix literals). The `Extractor`
+is mostly responsible for the case analysis over `Hir` expressions. Much of
+the "trickier" parts are how to combine literal sequences, and that is all
+implemented on [`Seq`].
+*/
+
+use core::{cmp, mem};
+
+use alloc::{vec, vec::Vec};
+
+use crate::hir::{self, Hir};
+
+/// Extracts prefix or suffix literal sequences from [`Hir`] expressions.
+///
+/// Literal extraction is based on the following observations:
+///
+/// * Many regexes start with one or a small number of literals.
+/// * Substring search for literals is often much faster (sometimes by an order
+/// of magnitude) than a regex search.
+///
+/// Thus, in many cases, one can search for literals to find candidate starting
+/// locations of a match, and then only run the full regex engine at each such
+/// location instead of over the full haystack.
+///
+/// The main downside of literal extraction is that it can wind up causing a
+/// search to be slower overall. For example, if there are many matches or if
+/// there are many candidates that don't ultimately lead to a match, then a
+/// lot of overhead will be spent in shuffing back-and-forth between substring
+/// search and the regex engine. This is the fundamental reason why literal
+/// optimizations for regex patterns is sometimes considered a "black art."
+///
+/// # Look-around assertions
+///
+/// Literal extraction treats all look-around assertions as-if they match every
+/// empty string. So for example, the regex `\bquux\b` will yield a sequence
+/// containing a single exact literal `quux`. However, not all occurrences
+/// of `quux` correspond to a match a of the regex. For example, `\bquux\b`
+/// does not match `ZquuxZ` anywhere because `quux` does not fall on a word
+/// boundary.
+///
+/// In effect, if your regex contains look-around assertions, then a match of
+/// an exact literal does not necessarily mean the regex overall matches. So
+/// you may still need to run the regex engine in such cases to confirm the
+/// match.
+///
+/// The precise guarantee you get from a literal sequence is: if every literal
+/// in the sequence is exact and the original regex contains zero look-around
+/// assertions, then a preference-order multi-substring search of those
+/// literals will precisely match a preference-order search of the original
+/// regex.
+///
+/// # Example
+///
+/// This shows how to extract prefixes:
+///
+/// ```
+/// use regex_syntax::{hir::literal::{Extractor, Literal, Seq}, parse};
+///
+/// let hir = parse(r"(a|b|c)(x|y|z)[A-Z]+foo")?;
+/// let got = Extractor::new().extract(&hir);
+/// // All literals returned are "inexact" because none of them reach the
+/// // match state.
+/// let expected = Seq::from_iter([
+///     Literal::inexact("ax"),
+///     Literal::inexact("ay"),
+///     Literal::inexact("az"),
+///     Literal::inexact("bx"),
+///     Literal::inexact("by"),
+///     Literal::inexact("bz"),
+///     Literal::inexact("cx"),
+///     Literal::inexact("cy"),
+///     Literal::inexact("cz"),
+/// ]);
+/// assert_eq!(expected, got);
+///
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+///
+/// This shows how to extract suffixes:
+///
+/// ```
+/// use regex_syntax::{
+///     hir::literal::{Extractor, ExtractKind, Literal, Seq},
+///     parse,
+/// };
+///
+/// let hir = parse(r"foo|[A-Z]+bar")?;
+/// let got = Extractor::new().kind(ExtractKind::Suffix).extract(&hir);
+/// // Since 'foo' gets to a match state, it is considered exact. But 'bar'
+/// // does not because of the '[A-Z]+', and thus is marked inexact.
+/// let expected = Seq::from_iter([
+///     Literal::exact("foo"),
+///     Literal::inexact("bar"),
+/// ]);
+/// assert_eq!(expected, got);
+///
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+#[derive(Clone, Debug)]
+pub struct Extractor {
+    kind: ExtractKind,
+    limit_class: usize,
+    limit_repeat: usize,
+    limit_literal_len: usize,
+    limit_total: usize,
+}
+
+impl Extractor {
+    /// Create a new extractor with a default configuration.
+    ///
+    /// The extractor can be optionally configured before calling
+    /// [`Extractor::extract`] to get a literal sequence.
+    pub fn new() -> Extractor {
+        Extractor {
+            kind: ExtractKind::Prefix,
+            limit_class: 10,
+            limit_repeat: 10,
+            limit_literal_len: 100,
+            limit_total: 250,
+        }
+    }
+
+    /// Execute the extractor and return a sequence of literals.
+    pub fn extract(&self, hir: &Hir) -> Seq {
+        use crate::hir::HirKind::*;
+
+        match *hir.kind() {
+            Empty | Look(_) => Seq::singleton(self::Literal::exact(vec![])),
+            Literal(hir::Literal(ref bytes)) => {
+                let mut seq =
+                    Seq::singleton(self::Literal::exact(bytes.to_vec()));
+                self.enforce_literal_len(&mut seq);
+                seq
+            }
+            Class(hir::Class::Unicode(ref cls)) => {
+                self.extract_class_unicode(cls)
+            }
+            Class(hir::Class::Bytes(ref cls)) => self.extract_class_bytes(cls),
+            Repetition(ref rep) => self.extract_repetition(rep),
+            Capture(hir::Capture { ref sub, .. }) => self.extract(sub),
+            Concat(ref hirs) => match self.kind {
+                ExtractKind::Prefix => self.extract_concat(hirs.iter()),
+                ExtractKind::Suffix => self.extract_concat(hirs.iter().rev()),
+            },
+            Alternation(ref hirs) => {
+                // Unlike concat, we always union starting from the beginning,
+                // since the beginning corresponds to the highest preference,
+                // which doesn't change based on forwards vs reverse.
+                self.extract_alternation(hirs.iter())
+            }
+        }
+    }
+
+    /// Set the kind of literal sequence to extract from an [`Hir`] expression.
+    ///
+    /// The default is to extract prefixes, but suffixes can be selected
+    /// instead. The contract for prefixes is that every match of the
+    /// corresponding `Hir` must start with one of the literals in the sequence
+    /// returned. Moreover, the _order_ of the sequence returned corresponds to
+    /// the preference order.
+    ///
+    /// Suffixes satisfy a similar contract in that every match of the
+    /// corresponding `Hir` must end with one of the literals in the sequence
+    /// returned. However, there is no guarantee that the literals are in
+    /// preference order.
+    ///
+    /// Remember that a sequence can be infinite. For example, unless the
+    /// limits are configured to be impractically large, attempting to extract
+    /// prefixes (or suffixes) for the pattern `[A-Z]` will return an infinite
+    /// sequence. Generally speaking, if the sequence returned is infinite,
+    /// then it is presumed to be unwise to do prefix (or suffix) optimizations
+    /// for the pattern.
+    pub fn kind(&mut self, kind: ExtractKind) -> &mut Extractor {
+        self.kind = kind;
+        self
+    }
+
+    /// Configure a limit on the length of the sequence that is permitted for
+    /// a character class. If a character class exceeds this limit, then the
+    /// sequence returned for it is infinite.
+    ///
+    /// This prevents classes like `[A-Z]` or `\pL` from getting turned into
+    /// huge and likely unproductive sequences of literals.
+    ///
+    /// # Example
+    ///
+    /// This example shows how this limit can be lowered to decrease the tolerance
+    /// for character classes being turned into literal sequences.
+    ///
+    /// ```
+    /// use regex_syntax::{hir::literal::{Extractor, Seq}, parse};
+    ///
+    /// let hir = parse(r"[0-9]")?;
+    ///
+    /// let got = Extractor::new().extract(&hir);
+    /// let expected = Seq::new([
+    ///     "0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
+    /// ]);
+    /// assert_eq!(expected, got);
+    ///
+    /// // Now let's shrink the limit and see how that changes things.
+    /// let got = Extractor::new().limit_class(4).extract(&hir);
+    /// let expected = Seq::infinite();
+    /// assert_eq!(expected, got);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn limit_class(&mut self, limit: usize) -> &mut Extractor {
+        self.limit_class = limit;
+        self
+    }
+
+    /// Configure a limit on the total number of repetitions that is permitted
+    /// before literal extraction is stopped.
+    ///
+    /// This is useful for limiting things like `(abcde){50}`, or more
+    /// insidiously, `(?:){1000000000}`. This limit prevents any one single
+    /// repetition from adding too much to a literal sequence.
+    ///
+    /// With this limit set, repetitions that exceed it will be stopped and any
+    /// literals extracted up to that point will be made inexact.
+    ///
+    /// # Example
+    ///
+    /// This shows how to decrease the limit and compares it with the default.
+    ///
+    /// ```
+    /// use regex_syntax::{hir::literal::{Extractor, Literal, Seq}, parse};
+    ///
+    /// let hir = parse(r"(abc){8}")?;
+    ///
+    /// let got = Extractor::new().extract(&hir);
+    /// let expected = Seq::new(["abcabcabcabcabcabcabcabc"]);
+    /// assert_eq!(expected, got);
+    ///
+    /// // Now let's shrink the limit and see how that changes things.
+    /// let got = Extractor::new().limit_repeat(4).extract(&hir);
+    /// let expected = Seq::from_iter([
+    ///     Literal::inexact("abcabcabcabc"),
+    /// ]);
+    /// assert_eq!(expected, got);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn limit_repeat(&mut self, limit: usize) -> &mut Extractor {
+        self.limit_repeat = limit;
+        self
+    }
+
+    /// Configure a limit on the maximum length of any literal in a sequence.
+    ///
+    /// This is useful for limiting things like `(abcde){5}{5}{5}{5}`. While
+    /// each repetition or literal in that regex is small, when all the
+    /// repetitions are applied, one ends up with a literal of length `5^4 =
+    /// 625`.
+    ///
+    /// With this limit set, literals that exceed it will be made inexact and
+    /// thus prevented from growing.
+    ///
+    /// # Example
+    ///
+    /// This shows how to decrease the limit and compares it with the default.
+    ///
+    /// ```
+    /// use regex_syntax::{hir::literal::{Extractor, Literal, Seq}, parse};
+    ///
+    /// let hir = parse(r"(abc){2}{2}{2}")?;
+    ///
+    /// let got = Extractor::new().extract(&hir);
+    /// let expected = Seq::new(["abcabcabcabcabcabcabcabc"]);
+    /// assert_eq!(expected, got);
+    ///
+    /// // Now let's shrink the limit and see how that changes things.
+    /// let got = Extractor::new().limit_literal_len(14).extract(&hir);
+    /// let expected = Seq::from_iter([
+    ///     Literal::inexact("abcabcabcabcab"),
+    /// ]);
+    /// assert_eq!(expected, got);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn limit_literal_len(&mut self, limit: usize) -> &mut Extractor {
+        self.limit_literal_len = limit;
+        self
+    }
+
+    /// Configure a limit on the total number of literals that will be
+    /// returned.
+    ///
+    /// This is useful as a practical measure for avoiding the creation of
+    /// large sequences of literals. While the extractor will automatically
+    /// handle local creations of large sequences (for example, `[A-Z]` yields
+    /// an infinite sequence by default), large sequences can be created
+    /// through non-local means as well.
+    ///
+    /// For example, `[ab]{3}{3}` would yield a sequence of length `512 = 2^9`
+    /// despite each of the repetitions being small on their own. This limit
+    /// thus represents a "catch all" for avoiding locally small sequences from
+    /// combining into large sequences.
+    ///
+    /// # Example
+    ///
+    /// This example shows how reducing the limit will change the literal
+    /// sequence returned.
+    ///
+    /// ```
+    /// use regex_syntax::{hir::literal::{Extractor, Literal, Seq}, parse};
+    ///
+    /// let hir = parse(r"[ab]{2}{2}")?;
+    ///
+    /// let got = Extractor::new().extract(&hir);
+    /// let expected = Seq::new([
+    ///     "aaaa", "aaab", "aaba", "aabb",
+    ///     "abaa", "abab", "abba", "abbb",
+    ///     "baaa", "baab", "baba", "babb",
+    ///     "bbaa", "bbab", "bbba", "bbbb",
+    /// ]);
+    /// assert_eq!(expected, got);
+    ///
+    /// // The default limit is not too big, but big enough to extract all
+    /// // literals from '[ab]{2}{2}'. If we shrink the limit to less than 16,
+    /// // then we'll get a truncated set. Notice that it returns a sequence of
+    /// // length 4 even though our limit was 10. This is because the sequence
+    /// // is difficult to increase without blowing the limit. Notice also
+    /// // that every literal in the sequence is now inexact because they were
+    /// // stripped of some suffix.
+    /// let got = Extractor::new().limit_total(10).extract(&hir);
+    /// let expected = Seq::from_iter([
+    ///     Literal::inexact("aa"),
+    ///     Literal::inexact("ab"),
+    ///     Literal::inexact("ba"),
+    ///     Literal::inexact("bb"),
+    /// ]);
+    /// assert_eq!(expected, got);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn limit_total(&mut self, limit: usize) -> &mut Extractor {
+        self.limit_total = limit;
+        self
+    }
+
+    /// Extract a sequence from the given concatenation. Sequences from each of
+    /// the child HIR expressions are combined via cross product.
+    ///
+    /// This short circuits once the cross product turns into a sequence
+    /// containing only inexact literals.
+    fn extract_concat<'a, I: Iterator<Item = &'a Hir>>(&self, it: I) -> Seq {
+        let mut seq = Seq::singleton(self::Literal::exact(vec![]));
+        for hir in it {
+            // If every element in the sequence is inexact, then a cross
+            // product will always be a no-op. Thus, there is nothing else we
+            // can add to it and can quit early. Note that this also includes
+            // infinite sequences.
+            if seq.is_inexact() {
+                break;
+            }
+            // Note that 'cross' also dispatches based on whether we're
+            // extracting prefixes or suffixes.
+            seq = self.cross(seq, &mut self.extract(hir));
+        }
+        seq
+    }
+
+    /// Extract a sequence from the given alternation.
+    ///
+    /// This short circuits once the union turns into an infinite sequence.
+    fn extract_alternation<'a, I: Iterator<Item = &'a Hir>>(
+        &self,
+        it: I,
+    ) -> Seq {
+        let mut seq = Seq::empty();
+        for hir in it {
+            // Once our 'seq' is infinite, every subsequent union
+            // operation on it will itself always result in an
+            // infinite sequence. Thus, it can never change and we can
+            // short-circuit.
+            if !seq.is_finite() {
+                break;
+            }
+            seq = self.union(seq, &mut self.extract(hir));
+        }
+        seq
+    }
+
+    /// Extract a sequence of literals from the given repetition. We do our
+    /// best, Some examples:
+    ///
+    ///   'a*'    => [inexact(a), exact("")]
+    ///   'a*?'   => [exact(""), inexact(a)]
+    ///   'a+'    => [inexact(a)]
+    ///   'a{3}'  => [exact(aaa)]
+    ///   'a{3,5} => [inexact(aaa)]
+    ///
+    /// The key here really is making sure we get the 'inexact' vs 'exact'
+    /// attributes correct on each of the literals we add. For example, the
+    /// fact that 'a*' gives us an inexact 'a' and an exact empty string means
+    /// that a regex like 'ab*c' will result in [inexact(ab), exact(ac)]
+    /// literals being extracted, which might actually be a better prefilter
+    /// than just 'a'.
+    fn extract_repetition(&self, rep: &hir::Repetition) -> Seq {
+        let mut subseq = self.extract(&rep.sub);
+        match *rep {
+            hir::Repetition { min: 0, max, greedy, .. } => {
+                // When 'max=1', we can retain exactness, since 'a?' is
+                // equivalent to 'a|'. Similarly below, 'a??' is equivalent to
+                // '|a'.
+                if max != Some(1) {
+                    subseq.make_inexact();
+                }
+                let mut empty = Seq::singleton(Literal::exact(vec![]));
+                if !greedy {
+                    mem::swap(&mut subseq, &mut empty);
+                }
+                self.union(subseq, &mut empty)
+            }
+            hir::Repetition { min, max: Some(max), .. } if min == max => {
+                assert!(min > 0); // handled above
+                let limit =
+                    u32::try_from(self.limit_repeat).unwrap_or(u32::MAX);
+                let mut seq = Seq::singleton(Literal::exact(vec![]));
+                for _ in 0..cmp::min(min, limit) {
+                    if seq.is_inexact() {
+                        break;
+                    }
+                    seq = self.cross(seq, &mut subseq.clone());
+                }
+                if usize::try_from(min).is_err() || min > limit {
+                    seq.make_inexact();
+                }
+                seq
+            }
+            hir::Repetition { min, max: Some(max), .. } if min < max => {
+                assert!(min > 0); // handled above
+                let limit =
+                    u32::try_from(self.limit_repeat).unwrap_or(u32::MAX);
+                let mut seq = Seq::singleton(Literal::exact(vec![]));
+                for _ in 0..cmp::min(min, limit) {
+                    if seq.is_inexact() {
+                        break;
+                    }
+                    seq = self.cross(seq, &mut subseq.clone());
+                }
+                seq.make_inexact();
+                seq
+            }
+            hir::Repetition { .. } => {
+                subseq.make_inexact();
+                subseq
+            }
+        }
+    }
+
+    /// Convert the given Unicode class into a sequence of literals if the
+    /// class is small enough. If the class is too big, return an infinite
+    /// sequence.
+    fn extract_class_unicode(&self, cls: &hir::ClassUnicode) -> Seq {
+        if self.class_over_limit_unicode(cls) {
+            return Seq::infinite();
+        }
+        let mut seq = Seq::empty();
+        for r in cls.iter() {
+            for ch in r.start()..=r.end() {
+                seq.push(Literal::from(ch));
+            }
+        }
+        self.enforce_literal_len(&mut seq);
+        seq
+    }
+
+    /// Convert the given byte class into a sequence of literals if the class
+    /// is small enough. If the class is too big, return an infinite sequence.
+    fn extract_class_bytes(&self, cls: &hir::ClassBytes) -> Seq {
+        if self.class_over_limit_bytes(cls) {
+            return Seq::infinite();
+        }
+        let mut seq = Seq::empty();
+        for r in cls.iter() {
+            for b in r.start()..=r.end() {
+                seq.push(Literal::from(b));
+            }
+        }
+        self.enforce_literal_len(&mut seq);
+        seq
+    }
+
+    /// Returns true if the given Unicode class exceeds the configured limits
+    /// on this extractor.
+    fn class_over_limit_unicode(&self, cls: &hir::ClassUnicode) -> bool {
+        let mut count = 0;
+        for r in cls.iter() {
+            if count > self.limit_class {
+                return true;
+            }
+            count += r.len();
+        }
+        count > self.limit_class
+    }
+
+    /// Returns true if the given byte class exceeds the configured limits on
+    /// this extractor.
+    fn class_over_limit_bytes(&self, cls: &hir::ClassBytes) -> bool {
+        let mut count = 0;
+        for r in cls.iter() {
+            if count > self.limit_class {
+                return true;
+            }
+            count += r.len();
+        }
+        count > self.limit_class
+    }
+
+    /// Compute the cross product of the two sequences if the result would be
+    /// within configured limits. Otherwise, make `seq2` infinite and cross the
+    /// infinite sequence with `seq1`.
+    fn cross(&self, mut seq1: Seq, seq2: &mut Seq) -> Seq {
+        if seq1.max_cross_len(seq2).map_or(false, |len| len > self.limit_total)
+        {
+            seq2.make_infinite();
+        }
+        if let ExtractKind::Suffix = self.kind {
+            seq1.cross_reverse(seq2);
+        } else {
+            seq1.cross_forward(seq2);
+        }
+        assert!(seq1.len().map_or(true, |x| x <= self.limit_total));
+        self.enforce_literal_len(&mut seq1);
+        seq1
+    }
+
+    /// Union the two sequences if the result would be within configured
+    /// limits. Otherwise, make `seq2` infinite and union the infinite sequence
+    /// with `seq1`.
+    fn union(&self, mut seq1: Seq, seq2: &mut Seq) -> Seq {
+        if seq1.max_union_len(seq2).map_or(false, |len| len > self.limit_total)
+        {
+            // We try to trim our literal sequences to see if we can make
+            // room for more literals. The idea is that we'd rather trim down
+            // literals already in our sequence if it means we can add a few
+            // more and retain a finite sequence. Otherwise, we'll union with
+            // an infinite sequence and that infects everything and effectively
+            // stops literal extraction in its tracks.
+            //
+            // We do we keep 4 bytes here? Well, it's a bit of an abstraction
+            // leakage. Downstream, the literals may wind up getting fed to
+            // the Teddy algorithm, which supports searching literals up to
+            // length 4. So that's why we pick that number here. Arguably this
+            // should be a tuneable parameter, but it seems a little tricky to
+            // describe. And I'm still unsure if this is the right way to go
+            // about culling literal sequences.
+            match self.kind {
+                ExtractKind::Prefix => {
+                    seq1.keep_first_bytes(4);
+                    seq2.keep_first_bytes(4);
+                }
+                ExtractKind::Suffix => {
+                    seq1.keep_last_bytes(4);
+                    seq2.keep_last_bytes(4);
+                }
+            }
+            seq1.dedup();
+            seq2.dedup();
+            if seq1
+                .max_union_len(seq2)
+                .map_or(false, |len| len > self.limit_total)
+            {
+                seq2.make_infinite();
+            }
+        }
+        seq1.union(seq2);
+        assert!(seq1.len().map_or(true, |x| x <= self.limit_total));
+        seq1
+    }
+
+    /// Applies the literal length limit to the given sequence. If none of the
+    /// literals in the sequence exceed the limit, then this is a no-op.
+    fn enforce_literal_len(&self, seq: &mut Seq) {
+        let len = self.limit_literal_len;
+        match self.kind {
+            ExtractKind::Prefix => seq.keep_first_bytes(len),
+            ExtractKind::Suffix => seq.keep_last_bytes(len),
+        }
+    }
+}
+
+impl Default for Extractor {
+    fn default() -> Extractor {
+        Extractor::new()
+    }
+}
+
+/// The kind of literals to extract from an [`Hir`] expression.
+///
+/// The default extraction kind is `Prefix`.
+#[non_exhaustive]
+#[derive(Clone, Debug)]
+pub enum ExtractKind {
+    /// Extracts only prefix literals from a regex.
+    Prefix,
+    /// Extracts only suffix literals from a regex.
+    ///
+    /// Note that the sequence returned by suffix literals currently may
+    /// not correctly represent leftmost-first or "preference" order match
+    /// semantics.
+    Suffix,
+}
+
+impl ExtractKind {
+    /// Returns true if this kind is the `Prefix` variant.
+    pub fn is_prefix(&self) -> bool {
+        matches!(*self, ExtractKind::Prefix)
+    }
+
+    /// Returns true if this kind is the `Suffix` variant.
+    pub fn is_suffix(&self) -> bool {
+        matches!(*self, ExtractKind::Suffix)
+    }
+}
+
+impl Default for ExtractKind {
+    fn default() -> ExtractKind {
+        ExtractKind::Prefix
+    }
+}
+
+/// A sequence of literals.
+///
+/// A `Seq` is very much like a set in that it represents a union of its
+/// members. That is, it corresponds to a set of literals where at least one
+/// must match in order for a particular [`Hir`] expression to match. (Whether
+/// this corresponds to the entire `Hir` expression, a prefix of it or a suffix
+/// of it depends on how the `Seq` was extracted from the `Hir`.)
+///
+/// It is also unlike a set in that multiple identical literals may appear,
+/// and that the order of the literals in the `Seq` matters. For example, if
+/// the sequence is `[sam, samwise]` and leftmost-first matching is used, then
+/// `samwise` can never match and the sequence is equivalent to `[sam]`.
+///
+/// # States of a sequence
+///
+/// A `Seq` has a few different logical states to consider:
+///
+/// * The sequence can represent "any" literal. When this happens, the set does
+/// not have a finite size. The purpose of this state is to inhibit callers
+/// from making assumptions about what literals are required in order to match
+/// a particular [`Hir`] expression. Generally speaking, when a set is in this
+/// state, literal optimizations are inhibited. A good example of a regex that
+/// will cause this sort of set to apppear is `[A-Za-z]`. The character class
+/// is just too big (and also too narrow) to be usefully expanded into 52
+/// different literals. (Note that the decision for when a seq should become
+/// infinite is determined by the caller. A seq itself has no hard-coded
+/// limits.)
+/// * The sequence can be empty, in which case, it is an affirmative statement
+/// that there are no literals that can match the corresponding `Hir`.
+/// Consequently, the `Hir` never matches any input. For example, `[a&&b]`.
+/// * The sequence can be non-empty, in which case, at least one of the
+/// literals must match in order for the corresponding `Hir` to match.
+///
+/// # Example
+///
+/// This example shows how literal sequences can be simplified by stripping
+/// suffixes and minimizing while maintaining preference order.
+///
+/// ```
+/// use regex_syntax::hir::literal::{Literal, Seq};
+///
+/// let mut seq = Seq::new(&[
+///     "farm",
+///     "appliance",
+///     "faraway",
+///     "apple",
+///     "fare",
+///     "gap",
+///     "applicant",
+///     "applaud",
+/// ]);
+/// seq.keep_first_bytes(3);
+/// seq.minimize_by_preference();
+/// // Notice that 'far' comes before 'app', which matches the order in the
+/// // original sequence. This guarantees that leftmost-first semantics are
+/// // not altered by simplifying the set.
+/// let expected = Seq::from_iter([
+///     Literal::inexact("far"),
+///     Literal::inexact("app"),
+///     Literal::exact("gap"),
+/// ]);
+/// assert_eq!(expected, seq);
+/// ```
+#[derive(Clone, Eq, PartialEq)]
+pub struct Seq {
+    /// The members of this seq.
+    ///
+    /// When `None`, the seq represents all possible literals. That is, it
+    /// prevents one from making assumptions about specific literals in the
+    /// seq, and forces one to treat it as if any literal might be in the seq.
+    ///
+    /// Note that `Some(vec![])` is valid and corresponds to the empty seq of
+    /// literals, i.e., a regex that can never match. For example, `[a&&b]`.
+    /// It is distinct from `Some(vec![""])`, which corresponds to the seq
+    /// containing an empty string, which matches at every position.
+    literals: Option<Vec<Literal>>,
+}
+
+impl Seq {
+    /// Returns an empty sequence.
+    ///
+    /// An empty sequence matches zero literals, and thus corresponds to a
+    /// regex that itself can never match.
+    #[inline]
+    pub fn empty() -> Seq {
+        Seq { literals: Some(vec![]) }
+    }
+
+    /// Returns a sequence of literals without a finite size and may contain
+    /// any literal.
+    ///
+    /// A sequence without finite size does not reveal anything about the
+    /// characteristics of the literals in its set. There are no fixed prefixes
+    /// or suffixes, nor are lower or upper bounds on the length of the literals
+    /// in the set known.
+    ///
+    /// This is useful to represent constructs in a regex that are "too big"
+    /// to useful represent as a sequence of literals. For example, `[A-Za-z]`.
+    /// When sequences get too big, they lose their discriminating nature and
+    /// are more likely to produce false positives, which in turn makes them
+    /// less likely to speed up searches.
+    ///
+    /// More pragmatically, for many regexes, enumerating all possible literals
+    /// is itself not possible or might otherwise use too many resources. So
+    /// constraining the size of sets during extraction is a practical trade
+    /// off to make.
+    #[inline]
+    pub fn infinite() -> Seq {
+        Seq { literals: None }
+    }
+
+    /// Returns a sequence containing a single literal.
+    #[inline]
+    pub fn singleton(lit: Literal) -> Seq {
+        Seq { literals: Some(vec![lit]) }
+    }
+
+    /// Returns a sequence of exact literals from the given byte strings.
+    #[inline]
+    pub fn new<I, B>(it: I) -> Seq
+    where
+        I: IntoIterator<Item = B>,
+        B: AsRef<[u8]>,
+    {
+        it.into_iter().map(|b| Literal::exact(b.as_ref())).collect()
+    }
+
+    /// If this is a finite sequence, return its members as a slice of
+    /// literals.
+    ///
+    /// The slice returned may be empty, in which case, there are no literals
+    /// that can match this sequence.
+    #[inline]
+    pub fn literals(&self) -> Option<&[Literal]> {
+        self.literals.as_deref()
+    }
+
+    /// Push a literal to the end of this sequence.
+    ///
+    /// If this sequence is not finite, then this is a no-op.
+    ///
+    /// Similarly, if the most recently added item of this sequence is
+    /// equivalent to the literal given, then it is not added. This reflects
+    /// a `Seq`'s "set like" behavior, and represents a practical trade off.
+    /// Namely, there is never any need to have two adjacent and equivalent
+    /// literals in the same sequence, _and_ it is easy to detect in some
+    /// cases.
+    #[inline]
+    pub fn push(&mut self, lit: Literal) {
+        let lits = match self.literals {
+            None => return,
+            Some(ref mut lits) => lits,
+        };
+        if lits.last().map_or(false, |m| m == &lit) {
+            return;
+        }
+        lits.push(lit);
+    }
+
+    /// Make all of the literals in this sequence inexact.
+    ///
+    /// This is a no-op if this sequence is not finite.
+    #[inline]
+    pub fn make_inexact(&mut self) {
+        let lits = match self.literals {
+            None => return,
+            Some(ref mut lits) => lits,
+        };
+        for lit in lits.iter_mut() {
+            lit.make_inexact();
+        }
+    }
+
+    /// Converts this sequence to an infinite sequence.
+    ///
+    /// This is a no-op if the sequence is already infinite.
+    #[inline]
+    pub fn make_infinite(&mut self) {
+        self.literals = None;
+    }
+
+    /// Modify this sequence to contain the cross product between it and the
+    /// sequence given.
+    ///
+    /// The cross product only considers literals in this sequence that are
+    /// exact. That is, inexact literals are not extended.
+    ///
+    /// The literals are always drained from `other`, even if none are used.
+    /// This permits callers to reuse the sequence allocation elsewhere.
+    ///
+    /// If this sequence is infinite, then this is a no-op, regardless of what
+    /// `other` contains (and in this case, the literals are still drained from
+    /// `other`). If `other` is infinite and this sequence is finite, then this
+    /// is a no-op, unless this sequence contains a zero-length literal. In
+    /// which case, the infiniteness of `other` infects this sequence, and this
+    /// sequence is itself made infinite.
+    ///
+    /// Like [`Seq::union`], this may attempt to deduplicate literals. See
+    /// [`Seq::dedup`] for how deduplication deals with exact and inexact
+    /// literals.
+    ///
+    /// # Example
+    ///
+    /// This example shows basic usage and how exact and inexact literals
+    /// interact.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::{Literal, Seq};
+    ///
+    /// let mut seq1 = Seq::from_iter([
+    ///     Literal::exact("foo"),
+    ///     Literal::inexact("bar"),
+    /// ]);
+    /// let mut seq2 = Seq::from_iter([
+    ///     Literal::inexact("quux"),
+    ///     Literal::exact("baz"),
+    /// ]);
+    /// seq1.cross_forward(&mut seq2);
+    ///
+    /// // The literals are pulled out of seq2.
+    /// assert_eq!(Some(0), seq2.len());
+    ///
+    /// let expected = Seq::from_iter([
+    ///     Literal::inexact("fooquux"),
+    ///     Literal::exact("foobaz"),
+    ///     Literal::inexact("bar"),
+    /// ]);
+    /// assert_eq!(expected, seq1);
+    /// ```
+    ///
+    /// This example shows the behavior of when `other` is an infinite
+    /// sequence.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::{Literal, Seq};
+    ///
+    /// let mut seq1 = Seq::from_iter([
+    ///     Literal::exact("foo"),
+    ///     Literal::inexact("bar"),
+    /// ]);
+    /// let mut seq2 = Seq::infinite();
+    /// seq1.cross_forward(&mut seq2);
+    ///
+    /// // When seq2 is infinite, cross product doesn't add anything, but
+    /// // ensures all members of seq1 are inexact.
+    /// let expected = Seq::from_iter([
+    ///     Literal::inexact("foo"),
+    ///     Literal::inexact("bar"),
+    /// ]);
+    /// assert_eq!(expected, seq1);
+    /// ```
+    ///
+    /// This example is like the one above, but shows what happens when this
+    /// sequence contains an empty string. In this case, an infinite `other`
+    /// sequence infects this sequence (because the empty string means that
+    /// there are no finite prefixes):
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::{Literal, Seq};
+    ///
+    /// let mut seq1 = Seq::from_iter([
+    ///     Literal::exact("foo"),
+    ///     Literal::exact(""), // inexact provokes same behavior
+    ///     Literal::inexact("bar"),
+    /// ]);
+    /// let mut seq2 = Seq::infinite();
+    /// seq1.cross_forward(&mut seq2);
+    ///
+    /// // seq1 is now infinite!
+    /// assert!(!seq1.is_finite());
+    /// ```
+    ///
+    /// This example shows the behavior of this sequence is infinite.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::{Literal, Seq};
+    ///
+    /// let mut seq1 = Seq::infinite();
+    /// let mut seq2 = Seq::from_iter([
+    ///     Literal::exact("foo"),
+    ///     Literal::inexact("bar"),
+    /// ]);
+    /// seq1.cross_forward(&mut seq2);
+    ///
+    /// // seq1 remains unchanged.
+    /// assert!(!seq1.is_finite());
+    /// // Even though the literals in seq2 weren't used, it was still drained.
+    /// assert_eq!(Some(0), seq2.len());
+    /// ```
+    #[inline]
+    pub fn cross_forward(&mut self, other: &mut Seq) {
+        let (lits1, lits2) = match self.cross_preamble(other) {
+            None => return,
+            Some((lits1, lits2)) => (lits1, lits2),
+        };
+        let newcap = lits1.len().saturating_mul(lits2.len());
+        for selflit in mem::replace(lits1, Vec::with_capacity(newcap)) {
+            if !selflit.is_exact() {
+                lits1.push(selflit);
+                continue;
+            }
+            for otherlit in lits2.iter() {
+                let mut newlit = Literal::exact(Vec::with_capacity(
+                    selflit.len() + otherlit.len(),
+                ));
+                newlit.extend(&selflit);
+                newlit.extend(&otherlit);
+                if !otherlit.is_exact() {
+                    newlit.make_inexact();
+                }
+                lits1.push(newlit);
+            }
+        }
+        lits2.drain(..);
+        self.dedup();
+    }
+
+    /// Modify this sequence to contain the cross product between it and
+    /// the sequence given, where the sequences are treated as suffixes
+    /// instead of prefixes. Namely, the sequence `other` is *prepended*
+    /// to `self` (as opposed to `other` being *appended* to `self` in
+    /// [`Seq::cross_forward`]).
+    ///
+    /// The cross product only considers literals in this sequence that are
+    /// exact. That is, inexact literals are not extended.
+    ///
+    /// The literals are always drained from `other`, even if none are used.
+    /// This permits callers to reuse the sequence allocation elsewhere.
+    ///
+    /// If this sequence is infinite, then this is a no-op, regardless of what
+    /// `other` contains (and in this case, the literals are still drained from
+    /// `other`). If `other` is infinite and this sequence is finite, then this
+    /// is a no-op, unless this sequence contains a zero-length literal. In
+    /// which case, the infiniteness of `other` infects this sequence, and this
+    /// sequence is itself made infinite.
+    ///
+    /// Like [`Seq::union`], this may attempt to deduplicate literals. See
+    /// [`Seq::dedup`] for how deduplication deals with exact and inexact
+    /// literals.
+    ///
+    /// # Example
+    ///
+    /// This example shows basic usage and how exact and inexact literals
+    /// interact.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::{Literal, Seq};
+    ///
+    /// let mut seq1 = Seq::from_iter([
+    ///     Literal::exact("foo"),
+    ///     Literal::inexact("bar"),
+    /// ]);
+    /// let mut seq2 = Seq::from_iter([
+    ///     Literal::inexact("quux"),
+    ///     Literal::exact("baz"),
+    /// ]);
+    /// seq1.cross_reverse(&mut seq2);
+    ///
+    /// // The literals are pulled out of seq2.
+    /// assert_eq!(Some(0), seq2.len());
+    ///
+    /// let expected = Seq::from_iter([
+    ///     Literal::inexact("quuxfoo"),
+    ///     Literal::inexact("bar"),
+    ///     Literal::exact("bazfoo"),
+    /// ]);
+    /// assert_eq!(expected, seq1);
+    /// ```
+    ///
+    /// This example shows the behavior of when `other` is an infinite
+    /// sequence.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::{Literal, Seq};
+    ///
+    /// let mut seq1 = Seq::from_iter([
+    ///     Literal::exact("foo"),
+    ///     Literal::inexact("bar"),
+    /// ]);
+    /// let mut seq2 = Seq::infinite();
+    /// seq1.cross_reverse(&mut seq2);
+    ///
+    /// // When seq2 is infinite, cross product doesn't add anything, but
+    /// // ensures all members of seq1 are inexact.
+    /// let expected = Seq::from_iter([
+    ///     Literal::inexact("foo"),
+    ///     Literal::inexact("bar"),
+    /// ]);
+    /// assert_eq!(expected, seq1);
+    /// ```
+    ///
+    /// This example is like the one above, but shows what happens when this
+    /// sequence contains an empty string. In this case, an infinite `other`
+    /// sequence infects this sequence (because the empty string means that
+    /// there are no finite suffixes):
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::{Literal, Seq};
+    ///
+    /// let mut seq1 = Seq::from_iter([
+    ///     Literal::exact("foo"),
+    ///     Literal::exact(""), // inexact provokes same behavior
+    ///     Literal::inexact("bar"),
+    /// ]);
+    /// let mut seq2 = Seq::infinite();
+    /// seq1.cross_reverse(&mut seq2);
+    ///
+    /// // seq1 is now infinite!
+    /// assert!(!seq1.is_finite());
+    /// ```
+    ///
+    /// This example shows the behavior when this sequence is infinite.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::{Literal, Seq};
+    ///
+    /// let mut seq1 = Seq::infinite();
+    /// let mut seq2 = Seq::from_iter([
+    ///     Literal::exact("foo"),
+    ///     Literal::inexact("bar"),
+    /// ]);
+    /// seq1.cross_reverse(&mut seq2);
+    ///
+    /// // seq1 remains unchanged.
+    /// assert!(!seq1.is_finite());
+    /// // Even though the literals in seq2 weren't used, it was still drained.
+    /// assert_eq!(Some(0), seq2.len());
+    /// ```
+    #[inline]
+    pub fn cross_reverse(&mut self, other: &mut Seq) {
+        let (lits1, lits2) = match self.cross_preamble(other) {
+            None => return,
+            Some((lits1, lits2)) => (lits1, lits2),
+        };
+        // We basically proceed as we do in 'cross_forward' at this point,
+        // except that the outer loop is now 'other' and the inner loop is now
+        // 'self'. That's because 'self' corresponds to suffixes and 'other'
+        // corresponds to the sequence we want to *prepend* to the suffixes.
+        let newcap = lits1.len().saturating_mul(lits2.len());
+        let selflits = mem::replace(lits1, Vec::with_capacity(newcap));
+        for (i, otherlit) in lits2.drain(..).enumerate() {
+            for selflit in selflits.iter() {
+                if !selflit.is_exact() {
+                    // If the suffix isn't exact, then we can't prepend
+                    // anything to it. However, we still want to keep it. But
+                    // we only want to keep one of them, to avoid duplication.
+                    // (The duplication is okay from a correctness perspective,
+                    // but wasteful.)
+                    if i == 0 {
+                        lits1.push(selflit.clone());
+                    }
+                    continue;
+                }
+                let mut newlit = Literal::exact(Vec::with_capacity(
+                    otherlit.len() + selflit.len(),
+                ));
+                newlit.extend(&otherlit);
+                newlit.extend(&selflit);
+                if !otherlit.is_exact() {
+                    newlit.make_inexact();
+                }
+                lits1.push(newlit);
+            }
+        }
+        self.dedup();
+    }
+
+    /// A helper function the corresponds to the subtle preamble for both
+    /// `cross_forward` and `cross_reverse`. In effect, it handles the cases
+    /// of infinite sequences for both `self` and `other`, as well as ensuring
+    /// that literals from `other` are drained even if they aren't used.
+    fn cross_preamble<'a>(
+        &'a mut self,
+        other: &'a mut Seq,
+    ) -> Option<(&'a mut Vec<Literal>, &'a mut Vec<Literal>)> {
+        let lits2 = match other.literals {
+            None => {
+                // If our current seq contains the empty string and the seq
+                // we're adding matches any literal, then it follows that the
+                // current seq must now also match any literal.
+                //
+                // Otherwise, we just have to make sure everything in this
+                // sequence is inexact.
+                if self.min_literal_len() == Some(0) {
+                    *self = Seq::infinite();
+                } else {
+                    self.make_inexact();
+                }
+                return None;
+            }
+            Some(ref mut lits) => lits,
+        };
+        let lits1 = match self.literals {
+            None => {
+                // If we aren't going to make it to the end of this routine
+                // where lits2 is drained, then we need to do it now.
+                lits2.drain(..);
+                return None;
+            }
+            Some(ref mut lits) => lits,
+        };
+        Some((lits1, lits2))
+    }
+
+    /// Unions the `other` sequence into this one.
+    ///
+    /// The literals are always drained out of the given `other` sequence,
+    /// even if they are being unioned into an infinite sequence. This permits
+    /// the caller to reuse the `other` sequence in another context.
+    ///
+    /// Some literal deduping may be performed. If any deduping happens,
+    /// any leftmost-first or "preference" order match semantics will be
+    /// preserved.
+    ///
+    /// # Example
+    ///
+    /// This example shows basic usage.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::Seq;
+    ///
+    /// let mut seq1 = Seq::new(&["foo", "bar"]);
+    /// let mut seq2 = Seq::new(&["bar", "quux", "foo"]);
+    /// seq1.union(&mut seq2);
+    ///
+    /// // The literals are pulled out of seq2.
+    /// assert_eq!(Some(0), seq2.len());
+    ///
+    /// // Adjacent literals are deduped, but non-adjacent literals may not be.
+    /// assert_eq!(Seq::new(&["foo", "bar", "quux", "foo"]), seq1);
+    /// ```
+    ///
+    /// This example shows that literals are drained from `other` even when
+    /// they aren't necessarily used.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::Seq;
+    ///
+    /// let mut seq1 = Seq::infinite();
+    /// // Infinite sequences have no finite length.
+    /// assert_eq!(None, seq1.len());
+    ///
+    /// let mut seq2 = Seq::new(&["bar", "quux", "foo"]);
+    /// seq1.union(&mut seq2);
+    ///
+    /// // seq1 is still infinite and seq2 has been drained.
+    /// assert_eq!(None, seq1.len());
+    /// assert_eq!(Some(0), seq2.len());
+    /// ```
+    #[inline]
+    pub fn union(&mut self, other: &mut Seq) {
+        let lits2 = match other.literals {
+            None => {
+                // Unioning with an infinite sequence always results in an
+                // infinite sequence.
+                self.make_infinite();
+                return;
+            }
+            Some(ref mut lits) => lits.drain(..),
+        };
+        let lits1 = match self.literals {
+            None => return,
+            Some(ref mut lits) => lits,
+        };
+        lits1.extend(lits2);
+        self.dedup();
+    }
+
+    /// Unions the `other` sequence into this one by splice the `other`
+    /// sequence at the position of the first zero-length literal.
+    ///
+    /// This is useful for preserving preference order semantics when combining
+    /// two literal sequences. For example, in the regex `(a||f)+foo`, the
+    /// correct preference order prefix sequence is `[a, foo, f]`.
+    ///
+    /// The literals are always drained out of the given `other` sequence,
+    /// even if they are being unioned into an infinite sequence. This permits
+    /// the caller to reuse the `other` sequence in another context. Note that
+    /// the literals are drained even if no union is performed as well, i.e.,
+    /// when this sequence does not contain a zero-length literal.
+    ///
+    /// Some literal deduping may be performed. If any deduping happens,
+    /// any leftmost-first or "preference" order match semantics will be
+    /// preserved.
+    ///
+    /// # Example
+    ///
+    /// This example shows basic usage.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::Seq;
+    ///
+    /// let mut seq1 = Seq::new(&["a", "", "f", ""]);
+    /// let mut seq2 = Seq::new(&["foo"]);
+    /// seq1.union_into_empty(&mut seq2);
+    ///
+    /// // The literals are pulled out of seq2.
+    /// assert_eq!(Some(0), seq2.len());
+    /// // 'foo' gets spliced into seq1 where the first empty string occurs.
+    /// assert_eq!(Seq::new(&["a", "foo", "f"]), seq1);
+    /// ```
+    ///
+    /// This example shows that literals are drained from `other` even when
+    /// they aren't necessarily used.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::Seq;
+    ///
+    /// let mut seq1 = Seq::new(&["foo", "bar"]);
+    /// let mut seq2 = Seq::new(&["bar", "quux", "foo"]);
+    /// seq1.union_into_empty(&mut seq2);
+    ///
+    /// // seq1 has no zero length literals, so no splicing happens.
+    /// assert_eq!(Seq::new(&["foo", "bar"]), seq1);
+    /// // Even though no splicing happens, seq2 is still drained.
+    /// assert_eq!(Some(0), seq2.len());
+    /// ```
+    #[inline]
+    pub fn union_into_empty(&mut self, other: &mut Seq) {
+        let lits2 = other.literals.as_mut().map(|lits| lits.drain(..));
+        let lits1 = match self.literals {
+            None => return,
+            Some(ref mut lits) => lits,
+        };
+        let first_empty = match lits1.iter().position(|m| m.is_empty()) {
+            None => return,
+            Some(i) => i,
+        };
+        let lits2 = match lits2 {
+            None => {
+                // Note that we are only here if we've found an empty literal,
+                // which implies that an infinite sequence infects this seq and
+                // also turns it into an infinite sequence.
+                self.literals = None;
+                return;
+            }
+            Some(lits) => lits,
+        };
+        // Clearing out the empties needs to come before the splice because
+        // the splice might add more empties that we don't want to get rid
+        // of. Since we're splicing into the position of the first empty, the
+        // 'first_empty' position computed above is still correct.
+        lits1.retain(|m| !m.is_empty());
+        lits1.splice(first_empty..first_empty, lits2);
+        self.dedup();
+    }
+
+    /// Deduplicate adjacent equivalent literals in this sequence.
+    ///
+    /// If adjacent literals are equivalent strings but one is exact and the
+    /// other inexact, the inexact literal is kept and the exact one is
+    /// removed.
+    ///
+    /// Deduping an infinite sequence is a no-op.
+    ///
+    /// # Example
+    ///
+    /// This example shows how literals that are duplicate byte strings but
+    /// are not equivalent with respect to exactness are resolved.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::{Literal, Seq};
+    ///
+    /// let mut seq = Seq::from_iter([
+    ///     Literal::exact("foo"),
+    ///     Literal::inexact("foo"),
+    /// ]);
+    /// seq.dedup();
+    ///
+    /// assert_eq!(Seq::from_iter([Literal::inexact("foo")]), seq);
+    /// ```
+    #[inline]
+    pub fn dedup(&mut self) {
+        if let Some(ref mut lits) = self.literals {
+            lits.dedup_by(|lit1, lit2| {
+                if lit1.as_bytes() != lit2.as_bytes() {
+                    return false;
+                }
+                if lit1.is_exact() != lit2.is_exact() {
+                    lit1.make_inexact();
+                    lit2.make_inexact();
+                }
+                true
+            });
+        }
+    }
+
+    /// Sorts this sequence of literals lexicographically.
+    ///
+    /// Note that if, before sorting, if a literal that is a prefix of another
+    /// literal appears after it, then after sorting, the sequence will not
+    /// represent the same preference order match semantics. For example,
+    /// sorting the sequence `[samwise, sam]` yields the sequence `[sam,
+    /// samwise]`. Under preference order semantics, the latter sequence will
+    /// never match `samwise` where as the first sequence can.
+    ///
+    /// # Example
+    ///
+    /// This example shows basic usage.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::Seq;
+    ///
+    /// let mut seq = Seq::new(&["foo", "quux", "bar"]);
+    /// seq.sort();
+    ///
+    /// assert_eq!(Seq::new(&["bar", "foo", "quux"]), seq);
+    /// ```
+    #[inline]
+    pub fn sort(&mut self) {
+        if let Some(ref mut lits) = self.literals {
+            lits.sort();
+        }
+    }
+
+    /// Reverses all of the literals in this sequence.
+    ///
+    /// The order of the sequence itself is preserved.
+    ///
+    /// # Example
+    ///
+    /// This example shows basic usage.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::Seq;
+    ///
+    /// let mut seq = Seq::new(&["oof", "rab"]);
+    /// seq.reverse_literals();
+    /// assert_eq!(Seq::new(&["foo", "bar"]), seq);
+    /// ```
+    #[inline]
+    pub fn reverse_literals(&mut self) {
+        if let Some(ref mut lits) = self.literals {
+            for lit in lits.iter_mut() {
+                lit.reverse();
+            }
+        }
+    }
+
+    /// Shrinks this seq to its minimal size while respecting the preference
+    /// order of its literals.
+    ///
+    /// While this routine will remove duplicate literals from this seq, it
+    /// will also remove literals that can never match in a leftmost-first or
+    /// "preference order" search. Similar to [`Seq::dedup`], if a literal is
+    /// deduped, then the one that remains is made inexact.
+    ///
+    /// This is a no-op on seqs that are empty or not finite.
+    ///
+    /// # Example
+    ///
+    /// This example shows the difference between `{sam, samwise}` and
+    /// `{samwise, sam}`.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::{Literal, Seq};
+    ///
+    /// // If 'sam' comes before 'samwise' and a preference order search is
+    /// // executed, then 'samwise' can never match.
+    /// let mut seq = Seq::new(&["sam", "samwise"]);
+    /// seq.minimize_by_preference();
+    /// assert_eq!(Seq::from_iter([Literal::inexact("sam")]), seq);
+    ///
+    /// // But if they are reversed, then it's possible for 'samwise' to match
+    /// // since it is given higher preference.
+    /// let mut seq = Seq::new(&["samwise", "sam"]);
+    /// seq.minimize_by_preference();
+    /// assert_eq!(Seq::new(&["samwise", "sam"]), seq);
+    /// ```
+    ///
+    /// This example shows that if an empty string is in this seq, then
+    /// anything that comes after it can never match.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::{Literal, Seq};
+    ///
+    /// // An empty string is a prefix of all strings, so it automatically
+    /// // inhibits any subsequent strings from matching.
+    /// let mut seq = Seq::new(&["foo", "bar", "", "quux", "fox"]);
+    /// seq.minimize_by_preference();
+    /// let expected = Seq::from_iter([
+    ///     Literal::exact("foo"),
+    ///     Literal::exact("bar"),
+    ///     Literal::inexact(""),
+    /// ]);
+    /// assert_eq!(expected, seq);
+    ///
+    /// // And of course, if it's at the beginning, then it makes it impossible
+    /// // for anything else to match.
+    /// let mut seq = Seq::new(&["", "foo", "quux", "fox"]);
+    /// seq.minimize_by_preference();
+    /// assert_eq!(Seq::from_iter([Literal::inexact("")]), seq);
+    /// ```
+    #[inline]
+    pub fn minimize_by_preference(&mut self) {
+        if let Some(ref mut lits) = self.literals {
+            PreferenceTrie::minimize(lits, false);
+        }
+    }
+
+    /// Trims all literals in this seq such that only the first `len` bytes
+    /// remain. If a literal has less than or equal to `len` bytes, then it
+    /// remains unchanged. Otherwise, it is trimmed and made inexact.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::{Literal, Seq};
+    ///
+    /// let mut seq = Seq::new(&["a", "foo", "quux"]);
+    /// seq.keep_first_bytes(2);
+    ///
+    /// let expected = Seq::from_iter([
+    ///     Literal::exact("a"),
+    ///     Literal::inexact("fo"),
+    ///     Literal::inexact("qu"),
+    /// ]);
+    /// assert_eq!(expected, seq);
+    /// ```
+    #[inline]
+    pub fn keep_first_bytes(&mut self, len: usize) {
+        if let Some(ref mut lits) = self.literals {
+            for m in lits.iter_mut() {
+                m.keep_first_bytes(len);
+            }
+        }
+    }
+
+    /// Trims all literals in this seq such that only the last `len` bytes
+    /// remain. If a literal has less than or equal to `len` bytes, then it
+    /// remains unchanged. Otherwise, it is trimmed and made inexact.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::{Literal, Seq};
+    ///
+    /// let mut seq = Seq::new(&["a", "foo", "quux"]);
+    /// seq.keep_last_bytes(2);
+    ///
+    /// let expected = Seq::from_iter([
+    ///     Literal::exact("a"),
+    ///     Literal::inexact("oo"),
+    ///     Literal::inexact("ux"),
+    /// ]);
+    /// assert_eq!(expected, seq);
+    /// ```
+    #[inline]
+    pub fn keep_last_bytes(&mut self, len: usize) {
+        if let Some(ref mut lits) = self.literals {
+            for m in lits.iter_mut() {
+                m.keep_last_bytes(len);
+            }
+        }
+    }
+
+    /// Returns true if this sequence is finite.
+    ///
+    /// When false, this sequence is infinite and must be treated as if it
+    /// contains every possible literal.
+    #[inline]
+    pub fn is_finite(&self) -> bool {
+        self.literals.is_some()
+    }
+
+    /// Returns true if and only if this sequence is finite and empty.
+    ///
+    /// An empty sequence never matches anything. It can only be produced by
+    /// literal extraction when the corresponding regex itself cannot match.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.len() == Some(0)
+    }
+
+    /// Returns the number of literals in this sequence if the sequence is
+    /// finite. If the sequence is infinite, then `None` is returned.
+    #[inline]
+    pub fn len(&self) -> Option<usize> {
+        self.literals.as_ref().map(|lits| lits.len())
+    }
+
+    /// Returns true if and only if all literals in this sequence are exact.
+    ///
+    /// This returns false if the sequence is infinite.
+    #[inline]
+    pub fn is_exact(&self) -> bool {
+        self.literals().map_or(false, |lits| lits.iter().all(|x| x.is_exact()))
+    }
+
+    /// Returns true if and only if all literals in this sequence are inexact.
+    ///
+    /// This returns true if the sequence is infinite.
+    #[inline]
+    pub fn is_inexact(&self) -> bool {
+        self.literals().map_or(true, |lits| lits.iter().all(|x| !x.is_exact()))
+    }
+
+    /// Return the maximum length of the sequence that would result from
+    /// unioning `self` with `other`. If either set is infinite, then this
+    /// returns `None`.
+    #[inline]
+    fn max_union_len(&self, other: &Seq) -> Option<usize> {
+        let len1 = self.len()?;
+        let len2 = other.len()?;
+        Some(len1.saturating_add(len2))
+    }
+
+    /// Return the maximum length of the sequence that would result from the
+    /// cross product of `self` with `other`. If either set is infinite, then
+    /// this returns `None`.
+    #[inline]
+    fn max_cross_len(&self, other: &Seq) -> Option<usize> {
+        let len1 = self.len()?;
+        let len2 = other.len()?;
+        Some(len1.saturating_mul(len2))
+    }
+
+    /// Returns the length of the shortest literal in this sequence.
+    ///
+    /// If the sequence is infinite or empty, then this returns `None`.
+    #[inline]
+    pub fn min_literal_len(&self) -> Option<usize> {
+        self.literals.as_ref()?.iter().map(|x| x.len()).min()
+    }
+
+    /// Returns the length of the longest literal in this sequence.
+    ///
+    /// If the sequence is infinite or empty, then this returns `None`.
+    #[inline]
+    pub fn max_literal_len(&self) -> Option<usize> {
+        self.literals.as_ref()?.iter().map(|x| x.len()).max()
+    }
+
+    /// Returns the longest common prefix from this seq.
+    ///
+    /// If the seq matches any literal or other contains no literals, then
+    /// there is no meaningful prefix and this returns `None`.
+    ///
+    /// # Example
+    ///
+    /// This shows some example seqs and their longest common prefix.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::Seq;
+    ///
+    /// let seq = Seq::new(&["foo", "foobar", "fo"]);
+    /// assert_eq!(Some(&b"fo"[..]), seq.longest_common_prefix());
+    /// let seq = Seq::new(&["foo", "foo"]);
+    /// assert_eq!(Some(&b"foo"[..]), seq.longest_common_prefix());
+    /// let seq = Seq::new(&["foo", "bar"]);
+    /// assert_eq!(Some(&b""[..]), seq.longest_common_prefix());
+    /// let seq = Seq::new(&[""]);
+    /// assert_eq!(Some(&b""[..]), seq.longest_common_prefix());
+    ///
+    /// let seq = Seq::infinite();
+    /// assert_eq!(None, seq.longest_common_prefix());
+    /// let seq = Seq::empty();
+    /// assert_eq!(None, seq.longest_common_prefix());
+    /// ```
+    #[inline]
+    pub fn longest_common_prefix(&self) -> Option<&[u8]> {
+        // If we match everything or match nothing, then there's no meaningful
+        // longest common prefix.
+        let lits = match self.literals {
+            None => return None,
+            Some(ref lits) => lits,
+        };
+        if lits.len() == 0 {
+            return None;
+        }
+        let base = lits[0].as_bytes();
+        let mut len = base.len();
+        for m in lits.iter().skip(1) {
+            len = m
+                .as_bytes()
+                .iter()
+                .zip(base[..len].iter())
+                .take_while(|&(a, b)| a == b)
+                .count();
+            if len == 0 {
+                return Some(&[]);
+            }
+        }
+        Some(&base[..len])
+    }
+
+    /// Returns the longest common suffix from this seq.
+    ///
+    /// If the seq matches any literal or other contains no literals, then
+    /// there is no meaningful suffix and this returns `None`.
+    ///
+    /// # Example
+    ///
+    /// This shows some example seqs and their longest common suffix.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::Seq;
+    ///
+    /// let seq = Seq::new(&["oof", "raboof", "of"]);
+    /// assert_eq!(Some(&b"of"[..]), seq.longest_common_suffix());
+    /// let seq = Seq::new(&["foo", "foo"]);
+    /// assert_eq!(Some(&b"foo"[..]), seq.longest_common_suffix());
+    /// let seq = Seq::new(&["foo", "bar"]);
+    /// assert_eq!(Some(&b""[..]), seq.longest_common_suffix());
+    /// let seq = Seq::new(&[""]);
+    /// assert_eq!(Some(&b""[..]), seq.longest_common_suffix());
+    ///
+    /// let seq = Seq::infinite();
+    /// assert_eq!(None, seq.longest_common_suffix());
+    /// let seq = Seq::empty();
+    /// assert_eq!(None, seq.longest_common_suffix());
+    /// ```
+    #[inline]
+    pub fn longest_common_suffix(&self) -> Option<&[u8]> {
+        // If we match everything or match nothing, then there's no meaningful
+        // longest common suffix.
+        let lits = match self.literals {
+            None => return None,
+            Some(ref lits) => lits,
+        };
+        if lits.len() == 0 {
+            return None;
+        }
+        let base = lits[0].as_bytes();
+        let mut len = base.len();
+        for m in lits.iter().skip(1) {
+            len = m
+                .as_bytes()
+                .iter()
+                .rev()
+                .zip(base[base.len() - len..].iter().rev())
+                .take_while(|&(a, b)| a == b)
+                .count();
+            if len == 0 {
+                return Some(&[]);
+            }
+        }
+        Some(&base[base.len() - len..])
+    }
+
+    /// Optimizes this seq while treating its literals as prefixes and
+    /// respecting the preference order of its literals.
+    ///
+    /// The specific way "optimization" works is meant to be an implementation
+    /// detail, as it essentially represents a set of heuristics. The goal
+    /// that optimization tries to accomplish is to make the literals in this
+    /// set reflect inputs that will result in a more effective prefilter.
+    /// Principally by reducing the false positive rate of candidates found by
+    /// the literals in this sequence. That is, when a match of a literal is
+    /// found, we would like it to be a strong predictor of the overall match
+    /// of the regex. If it isn't, then much time will be spent starting and
+    /// stopping the prefilter search and attempting to confirm the match only
+    /// to have it fail.
+    ///
+    /// Some of those heuristics might be:
+    ///
+    /// * Identifying a common prefix from a larger sequence of literals, and
+    /// shrinking the sequence down to that single common prefix.
+    /// * Rejecting the sequence entirely if it is believed to result in very
+    /// high false positive rate. When this happens, the sequence is made
+    /// infinite.
+    /// * Shrinking the sequence to a smaller number of literals representing
+    /// prefixes, but not shrinking it so much as to make literals too short.
+    /// (A sequence with very short literals, of 1 or 2 bytes, will typically
+    /// result in a higher false positive rate.)
+    ///
+    /// Optimization should only be run once extraction is complete. Namely,
+    /// optimization may make assumptions that do not compose with other
+    /// operations in the middle of extraction. For example, optimization will
+    /// reduce `[E(sam), E(samwise)]` to `[E(sam)]`, but such a transformation
+    /// is only valid if no other extraction will occur. If other extraction
+    /// may occur, then the correct transformation would be to `[I(sam)]`.
+    ///
+    /// The [`Seq::optimize_for_suffix_by_preference`] does the same thing, but
+    /// for suffixes.
+    ///
+    /// # Example
+    ///
+    /// This shows how optimization might transform a sequence. Note that
+    /// the specific behavior is not a documented guarantee. The heuristics
+    /// used are an implementation detail and may change over time in semver
+    /// compatible releases.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::{Seq, Literal};
+    ///
+    /// let mut seq = Seq::new(&[
+    ///     "samantha",
+    ///     "sam",
+    ///     "samwise",
+    ///     "frodo",
+    /// ]);
+    /// seq.optimize_for_prefix_by_preference();
+    /// assert_eq!(Seq::from_iter([
+    ///     Literal::exact("samantha"),
+    ///     // Kept exact even though 'samwise' got pruned
+    ///     // because optimization assumes literal extraction
+    ///     // has finished.
+    ///     Literal::exact("sam"),
+    ///     Literal::exact("frodo"),
+    /// ]), seq);
+    /// ```
+    ///
+    /// # Example: optimization may make the sequence infinite
+    ///
+    /// If the heuristics deem that the sequence could cause a very high false
+    /// positive rate, then it may make the sequence infinite, effectively
+    /// disabling its use as a prefilter.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::{Seq, Literal};
+    ///
+    /// let mut seq = Seq::new(&[
+    ///     "samantha",
+    ///     // An empty string matches at every position,
+    ///     // thus rendering the prefilter completely
+    ///     // ineffective.
+    ///     "",
+    ///     "sam",
+    ///     "samwise",
+    ///     "frodo",
+    /// ]);
+    /// seq.optimize_for_prefix_by_preference();
+    /// assert!(!seq.is_finite());
+    /// ```
+    ///
+    /// Do note that just because there is a `" "` in the sequence, that
+    /// doesn't mean the sequence will always be made infinite after it is
+    /// optimized. Namely, if the sequence is considered exact (any match
+    /// corresponds to an overall match of the original regex), then any match
+    /// is an overall match, and so the false positive rate is always `0`.
+    ///
+    /// To demonstrate this, we remove `samwise` from our sequence. This
+    /// results in no optimization happening and all literals remain exact.
+    /// Thus the entire sequence is exact, and it is kept as-is, even though
+    /// one is an ASCII space:
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::{Seq, Literal};
+    ///
+    /// let mut seq = Seq::new(&[
+    ///     "samantha",
+    ///     " ",
+    ///     "sam",
+    ///     "frodo",
+    /// ]);
+    /// seq.optimize_for_prefix_by_preference();
+    /// assert!(seq.is_finite());
+    /// ```
+    #[inline]
+    pub fn optimize_for_prefix_by_preference(&mut self) {
+        self.optimize_by_preference(true);
+    }
+
+    /// Optimizes this seq while treating its literals as suffixes and
+    /// respecting the preference order of its literals.
+    ///
+    /// Optimization should only be run once extraction is complete.
+    ///
+    /// The [`Seq::optimize_for_prefix_by_preference`] does the same thing, but
+    /// for prefixes. See its documentation for more explanation.
+    #[inline]
+    pub fn optimize_for_suffix_by_preference(&mut self) {
+        self.optimize_by_preference(false);
+    }
+
+    fn optimize_by_preference(&mut self, prefix: bool) {
+        let origlen = match self.len() {
+            None => return,
+            Some(len) => len,
+        };
+        // Make sure we start with the smallest sequence possible. We use a
+        // special version of preference minimization that retains exactness.
+        // This is legal because optimization is only expected to occur once
+        // extraction is complete.
+        if prefix {
+            if let Some(ref mut lits) = self.literals {
+                PreferenceTrie::minimize(lits, true);
+            }
+        }
+
+        // Look for a common prefix (or suffix). If we found one of those and
+        // it's long enough, then it's a good bet that it will be our fastest
+        // possible prefilter since single-substring search is so fast.
+        let fix = if prefix {
+            self.longest_common_prefix()
+        } else {
+            self.longest_common_suffix()
+        };
+        if let Some(fix) = fix {
+            // As a special case, if we have a common prefix and the leading
+            // byte of that prefix is one that we think probably occurs rarely,
+            // then strip everything down to just that single byte. This should
+            // promote the use of memchr.
+            //
+            // ... we only do this though if our sequence has more than one
+            // literal. Otherwise, we'd rather just stick with a single literal
+            // scan. That is, using memchr is probably better than looking
+            // for 2 or more literals, but probably not as good as a straight
+            // memmem search.
+            //
+            // ... and also only do this when the prefix is short and probably
+            // not too discriminatory anyway. If it's longer, then it's
+            // probably quite discriminatory and thus is likely to have a low
+            // false positive rate.
+            if prefix
+                && origlen > 1
+                && fix.len() >= 1
+                && fix.len() <= 3
+                && rank(fix[0]) < 200
+            {
+                self.keep_first_bytes(1);
+                self.dedup();
+                return;
+            }
+            // We only strip down to the common prefix/suffix if we think
+            // the existing set of literals isn't great, or if the common
+            // prefix/suffix is expected to be particularly discriminatory.
+            let isfast =
+                self.is_exact() && self.len().map_or(false, |len| len <= 16);
+            let usefix = fix.len() > 4 || (fix.len() > 1 && !isfast);
+            if usefix {
+                // If we keep exactly the number of bytes equal to the length
+                // of the prefix (or suffix), then by the definition of a
+                // prefix, every literal in the sequence will be equivalent.
+                // Thus, 'dedup' will leave us with one literal.
+                //
+                // We do it this way to avoid an alloc, but also to make sure
+                // the exactness of literals is kept (or not).
+                if prefix {
+                    self.keep_first_bytes(fix.len());
+                } else {
+                    self.keep_last_bytes(fix.len());
+                }
+                self.dedup();
+                assert_eq!(Some(1), self.len());
+                // We still fall through here. In particular, we want our
+                // longest common prefix to be subject to the poison check.
+            }
+        }
+        // Everything below this check is more-or-less about trying to
+        // heuristically reduce the false positive rate of a prefilter. But
+        // if our sequence is completely exact, then it's possible the regex
+        // engine can be skipped entirely. In this case, the false positive
+        // rate is zero because every literal match corresponds to a regex
+        // match.
+        //
+        // This is OK even if the sequence contains a poison literal. Remember,
+        // a literal is only poisononous because of what we assume about its
+        // impact on the false positive rate. However, we do still check for
+        // an empty string. Empty strings are weird and it's best to let the
+        // regex engine handle those.
+        //
+        // We do currently do this check after the longest common prefix (or
+        // suffix) check, under the theory that single-substring search is so
+        // fast that we want that even if we'd end up turning an exact sequence
+        // into an inexact one. But this might be wrong...
+        if self.is_exact()
+            && self.min_literal_len().map_or(false, |len| len > 0)
+        {
+            return;
+        }
+        // Now we attempt to shorten the sequence. The idea here is that we
+        // don't want to look for too many literals, but we want to shorten
+        // our sequence enough to improve our odds of using better algorithms
+        // downstream (such as Teddy).
+        const ATTEMPTS: [(usize, usize); 5] =
+            [(5, 64), (4, 64), (3, 64), (2, 64), (1, 10)];
+        for (keep, limit) in ATTEMPTS {
+            let len = match self.len() {
+                None => break,
+                Some(len) => len,
+            };
+            if len <= limit {
+                break;
+            }
+            if prefix {
+                self.keep_first_bytes(keep);
+            } else {
+                self.keep_last_bytes(keep);
+            }
+            self.minimize_by_preference();
+        }
+        // Check for a poison literal. A poison literal is one that is short
+        // and is believed to have a very high match count. These poisons
+        // generally lead to a prefilter with a very high false positive rate,
+        // and thus overall worse performance.
+        //
+        // We do this last because we could have gone from a non-poisonous
+        // sequence to a poisonous one. Perhaps we should add some code to
+        // prevent such transitions in the first place, but then again, we
+        // likely only made the transition in the first place if the sequence
+        // was itself huge. And huge sequences are themselves poisonous. So...
+        if let Some(lits) = self.literals() {
+            if lits.iter().any(|lit| lit.is_poisonous()) {
+                self.make_infinite();
+            }
+        }
+    }
+}
+
+impl core::fmt::Debug for Seq {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        write!(f, "Seq")?;
+        if let Some(lits) = self.literals() {
+            f.debug_list().entries(lits.iter()).finish()
+        } else {
+            write!(f, "[∅]")
+        }
+    }
+}
+
+impl FromIterator<Literal> for Seq {
+    fn from_iter<T: IntoIterator<Item = Literal>>(it: T) -> Seq {
+        let mut seq = Seq::empty();
+        for literal in it {
+            seq.push(literal);
+        }
+        seq
+    }
+}
+
+/// A single literal extracted from an [`Hir`] expression.
+///
+/// A literal is composed of two things:
+///
+/// * A sequence of bytes. No guarantees with respect to UTF-8 are provided.
+/// In particular, even if the regex a literal is extracted from is UTF-8, the
+/// literal extracted may not be valid UTF-8. (For example, if an [`Extractor`]
+/// limit resulted in trimming a literal in a way that splits a codepoint.)
+/// * Whether the literal is "exact" or not. An "exact" literal means that it
+/// has not been trimmed, and may continue to be extended. If a literal is
+/// "exact" after visiting the entire `Hir` expression, then this implies that
+/// the literal leads to a match state. (Although it doesn't necessarily imply
+/// all occurrences of the literal correspond to a match of the regex, since
+/// literal extraction ignores look-around assertions.)
+#[derive(Clone, Eq, PartialEq, PartialOrd, Ord)]
+pub struct Literal {
+    bytes: Vec<u8>,
+    exact: bool,
+}
+
+impl Literal {
+    /// Returns a new exact literal containing the bytes given.
+    #[inline]
+    pub fn exact<B: Into<Vec<u8>>>(bytes: B) -> Literal {
+        Literal { bytes: bytes.into(), exact: true }
+    }
+
+    /// Returns a new inexact literal containing the bytes given.
+    #[inline]
+    pub fn inexact<B: Into<Vec<u8>>>(bytes: B) -> Literal {
+        Literal { bytes: bytes.into(), exact: false }
+    }
+
+    /// Returns the bytes in this literal.
+    #[inline]
+    pub fn as_bytes(&self) -> &[u8] {
+        &self.bytes
+    }
+
+    /// Yields ownership of the bytes inside this literal.
+    ///
+    /// Note that this throws away whether the literal is "exact" or not.
+    #[inline]
+    pub fn into_bytes(self) -> Vec<u8> {
+        self.bytes
+    }
+
+    /// Returns the length of this literal in bytes.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.as_bytes().len()
+    }
+
+    /// Returns true if and only if this literal has zero bytes.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Returns true if and only if this literal is exact.
+    #[inline]
+    pub fn is_exact(&self) -> bool {
+        self.exact
+    }
+
+    /// Marks this literal as inexact.
+    ///
+    /// Inexact literals can never be extended. For example,
+    /// [`Seq::cross_forward`] will not extend inexact literals.
+    #[inline]
+    pub fn make_inexact(&mut self) {
+        self.exact = false;
+    }
+
+    /// Reverse the bytes in this literal.
+    #[inline]
+    pub fn reverse(&mut self) {
+        self.bytes.reverse();
+    }
+
+    /// Extend this literal with the literal given.
+    ///
+    /// If this literal is inexact, then this is a no-op.
+    #[inline]
+    pub fn extend(&mut self, lit: &Literal) {
+        if !self.is_exact() {
+            return;
+        }
+        self.bytes.extend_from_slice(&lit.bytes);
+    }
+
+    /// Trims this literal such that only the first `len` bytes remain. If
+    /// this literal has fewer than `len` bytes, then it remains unchanged.
+    /// Otherwise, the literal is marked as inexact.
+    #[inline]
+    pub fn keep_first_bytes(&mut self, len: usize) {
+        if len >= self.len() {
+            return;
+        }
+        self.make_inexact();
+        self.bytes.truncate(len);
+    }
+
+    /// Trims this literal such that only the last `len` bytes remain. If this
+    /// literal has fewer than `len` bytes, then it remains unchanged.
+    /// Otherwise, the literal is marked as inexact.
+    #[inline]
+    pub fn keep_last_bytes(&mut self, len: usize) {
+        if len >= self.len() {
+            return;
+        }
+        self.make_inexact();
+        self.bytes.drain(..self.len() - len);
+    }
+
+    /// Returns true if it is believe that this literal is likely to match very
+    /// frequently, and is thus not a good candidate for a prefilter.
+    fn is_poisonous(&self) -> bool {
+        self.is_empty() || (self.len() == 1 && rank(self.as_bytes()[0]) >= 250)
+    }
+}
+
+impl From<u8> for Literal {
+    fn from(byte: u8) -> Literal {
+        Literal::exact(vec![byte])
+    }
+}
+
+impl From<char> for Literal {
+    fn from(ch: char) -> Literal {
+        use alloc::string::ToString;
+        Literal::exact(ch.encode_utf8(&mut [0; 4]).to_string())
+    }
+}
+
+impl AsRef<[u8]> for Literal {
+    fn as_ref(&self) -> &[u8] {
+        self.as_bytes()
+    }
+}
+
+impl core::fmt::Debug for Literal {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        let tag = if self.exact { "E" } else { "I" };
+        f.debug_tuple(tag)
+            .field(&crate::debug::Bytes(self.as_bytes()))
+            .finish()
+    }
+}
+
+/// A "preference" trie that rejects literals that will never match when
+/// executing a leftmost first or "preference" search.
+///
+/// For example, if 'sam' is inserted, then trying to insert 'samwise' will be
+/// rejected because 'samwise' can never match since 'sam' will always take
+/// priority. However, if 'samwise' is inserted first, then inserting 'sam'
+/// after it is accepted. In this case, either 'samwise' or 'sam' can match in
+/// a "preference" search.
+///
+/// Note that we only use this trie as a "set." That is, given a sequence of
+/// literals, we insert each one in order. An `insert` will reject a literal
+/// if a prefix of that literal already exists in the trie. Thus, to rebuild
+/// the "minimal" sequence, we simply only keep literals that were successfully
+/// inserted. (Since we don't need traversal, one wonders whether we can make
+/// some simplifications here, but I haven't given it a ton of thought and I've
+/// never seen this show up on a profile. Because of the heuristic limits
+/// imposed on literal extractions, the size of the inputs here is usually
+/// very small.)
+#[derive(Debug, Default)]
+struct PreferenceTrie {
+    /// The states in this trie. The index of a state in this vector is its ID.
+    states: Vec<State>,
+    /// The index to allocate to the next literal added to this trie. Starts at
+    /// 0 and increments by 1 for every literal successfully added to the trie.
+    next_literal_index: usize,
+}
+
+/// A single state in a trie. Uses a sparse representation for its transitions.
+#[derive(Debug, Default)]
+struct State {
+    /// Sparse representation of the transitions out of this state. Transitions
+    /// are sorted by byte. There is at most one such transition for any
+    /// particular byte.
+    trans: Vec<(u8, usize)>,
+    /// Whether this is a matching state or not. If it is, then it contains the
+    /// index to the matching literal.
+    literal_index: Option<usize>,
+}
+
+impl PreferenceTrie {
+    /// Minimizes the given sequence of literals while preserving preference
+    /// order semantics.
+    ///
+    /// When `keep_exact` is true, the exactness of every literal retained is
+    /// kept. This is useful when dealing with a fully extracted `Seq` that
+    /// only contains exact literals. In that case, we can keep all retained
+    /// literals as exact because we know we'll never need to match anything
+    /// after them and because any removed literals are guaranteed to never
+    /// match.
+    fn minimize(literals: &mut Vec<Literal>, keep_exact: bool) {
+        use core::cell::RefCell;
+
+        // MSRV(1.61): Use retain_mut here to avoid interior mutability.
+        let trie = RefCell::new(PreferenceTrie::default());
+        let mut make_inexact = vec![];
+        literals.retain(|lit| {
+            match trie.borrow_mut().insert(lit.as_bytes()) {
+                Ok(_) => true,
+                Err(i) => {
+                    if !keep_exact {
+                        make_inexact.push(i);
+                    }
+                    false
+                }
+            }
+        });
+        for i in make_inexact {
+            literals[i].make_inexact();
+        }
+    }
+
+    /// Returns `Ok` if the given byte string is accepted into this trie and
+    /// `Err` otherwise. The index for the success case corresponds to the
+    /// index of the literal added. The index for the error case corresponds to
+    /// the index of the literal already in the trie that prevented the given
+    /// byte string from being added. (Which implies it is a prefix of the one
+    /// given.)
+    ///
+    /// In short, the byte string given is accepted into the trie if and only
+    /// if it is possible for it to match when executing a preference order
+    /// search.
+    fn insert(&mut self, bytes: &[u8]) -> Result<usize, usize> {
+        let mut prev = self.root();
+        if let Some(idx) = self.states[prev].literal_index {
+            return Err(idx);
+        }
+        for &b in bytes.iter() {
+            match self.states[prev].trans.binary_search_by_key(&b, |t| t.0) {
+                Ok(i) => {
+                    prev = self.states[prev].trans[i].1;
+                    if let Some(idx) = self.states[prev].literal_index {
+                        return Err(idx);
+                    }
+                }
+                Err(i) => {
+                    let next = self.create_state();
+                    self.states[prev].trans.insert(i, (b, next));
+                    prev = next;
+                }
+            }
+        }
+        let idx = self.next_literal_index;
+        self.next_literal_index += 1;
+        self.states[prev].literal_index = Some(idx);
+        Ok(idx)
+    }
+
+    /// Returns the root state ID, and if it doesn't exist, creates it.
+    fn root(&mut self) -> usize {
+        if !self.states.is_empty() {
+            0
+        } else {
+            self.create_state()
+        }
+    }
+
+    /// Creates a new empty state and returns its ID.
+    fn create_state(&mut self) -> usize {
+        let id = self.states.len();
+        self.states.push(State::default());
+        id
+    }
+}
+
+/// Returns the "rank" of the given byte.
+///
+/// The minimum rank value is `0` and the maximum rank value is `255`.
+///
+/// The rank of a byte is derived from a heuristic background distribution of
+/// relative frequencies of bytes. The heuristic says that lower the rank of a
+/// byte, the less likely that byte is to appear in any arbitrary haystack.
+pub fn rank(byte: u8) -> u8 {
+    crate::rank::BYTE_FREQUENCIES[usize::from(byte)]
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn parse(pattern: &str) -> Hir {
+        crate::ParserBuilder::new().utf8(false).build().parse(pattern).unwrap()
+    }
+
+    fn prefixes(pattern: &str) -> Seq {
+        Extractor::new().kind(ExtractKind::Prefix).extract(&parse(pattern))
+    }
+
+    fn suffixes(pattern: &str) -> Seq {
+        Extractor::new().kind(ExtractKind::Suffix).extract(&parse(pattern))
+    }
+
+    fn e(pattern: &str) -> (Seq, Seq) {
+        (prefixes(pattern), suffixes(pattern))
+    }
+
+    #[allow(non_snake_case)]
+    fn E(x: &str) -> Literal {
+        Literal::exact(x.as_bytes())
+    }
+
+    #[allow(non_snake_case)]
+    fn I(x: &str) -> Literal {
+        Literal::inexact(x.as_bytes())
+    }
+
+    fn seq<I: IntoIterator<Item = Literal>>(it: I) -> Seq {
+        Seq::from_iter(it)
+    }
+
+    fn infinite() -> (Seq, Seq) {
+        (Seq::infinite(), Seq::infinite())
+    }
+
+    fn inexact<I1, I2>(it1: I1, it2: I2) -> (Seq, Seq)
+    where
+        I1: IntoIterator<Item = Literal>,
+        I2: IntoIterator<Item = Literal>,
+    {
+        (Seq::from_iter(it1), Seq::from_iter(it2))
+    }
+
+    fn exact<B: AsRef<[u8]>, I: IntoIterator<Item = B>>(it: I) -> (Seq, Seq) {
+        let s1 = Seq::new(it);
+        let s2 = s1.clone();
+        (s1, s2)
+    }
+
+    fn opt<B: AsRef<[u8]>, I: IntoIterator<Item = B>>(it: I) -> (Seq, Seq) {
+        let (mut p, mut s) = exact(it);
+        p.optimize_for_prefix_by_preference();
+        s.optimize_for_suffix_by_preference();
+        (p, s)
+    }
+
+    #[test]
+    fn literal() {
+        assert_eq!(exact(["a"]), e("a"));
+        assert_eq!(exact(["aaaaa"]), e("aaaaa"));
+        assert_eq!(exact(["A", "a"]), e("(?i-u)a"));
+        assert_eq!(exact(["AB", "Ab", "aB", "ab"]), e("(?i-u)ab"));
+        assert_eq!(exact(["abC", "abc"]), e("ab(?i-u)c"));
+
+        assert_eq!(exact([b"\xFF"]), e(r"(?-u:\xFF)"));
+
+        #[cfg(feature = "unicode-case")]
+        {
+            assert_eq!(exact(["☃"]), e("☃"));
+            assert_eq!(exact(["☃"]), e("(?i)☃"));
+            assert_eq!(exact(["☃☃☃☃☃"]), e("☃☃☃☃☃"));
+
+            assert_eq!(exact(["Δ"]), e("Δ"));
+            assert_eq!(exact(["δ"]), e("δ"));
+            assert_eq!(exact(["Δ", "δ"]), e("(?i)Δ"));
+            assert_eq!(exact(["Δ", "δ"]), e("(?i)δ"));
+
+            assert_eq!(exact(["S", "s", "ſ"]), e("(?i)S"));
+            assert_eq!(exact(["S", "s", "ſ"]), e("(?i)s"));
+            assert_eq!(exact(["S", "s", "ſ"]), e("(?i)ſ"));
+        }
+
+        let letters = "ͱͳͷΐάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋ";
+        assert_eq!(exact([letters]), e(letters));
+    }
+
+    #[test]
+    fn class() {
+        assert_eq!(exact(["a", "b", "c"]), e("[abc]"));
+        assert_eq!(exact(["a1b", "a2b", "a3b"]), e("a[123]b"));
+        assert_eq!(exact(["δ", "ε"]), e("[εδ]"));
+        #[cfg(feature = "unicode-case")]
+        {
+            assert_eq!(exact(["Δ", "Ε", "δ", "ε", "ϵ"]), e(r"(?i)[εδ]"));
+        }
+    }
+
+    #[test]
+    fn look() {
+        assert_eq!(exact(["ab"]), e(r"a\Ab"));
+        assert_eq!(exact(["ab"]), e(r"a\zb"));
+        assert_eq!(exact(["ab"]), e(r"a(?m:^)b"));
+        assert_eq!(exact(["ab"]), e(r"a(?m:$)b"));
+        assert_eq!(exact(["ab"]), e(r"a\bb"));
+        assert_eq!(exact(["ab"]), e(r"a\Bb"));
+        assert_eq!(exact(["ab"]), e(r"a(?-u:\b)b"));
+        assert_eq!(exact(["ab"]), e(r"a(?-u:\B)b"));
+
+        assert_eq!(exact(["ab"]), e(r"^ab"));
+        assert_eq!(exact(["ab"]), e(r"$ab"));
+        assert_eq!(exact(["ab"]), e(r"(?m:^)ab"));
+        assert_eq!(exact(["ab"]), e(r"(?m:$)ab"));
+        assert_eq!(exact(["ab"]), e(r"\bab"));
+        assert_eq!(exact(["ab"]), e(r"\Bab"));
+        assert_eq!(exact(["ab"]), e(r"(?-u:\b)ab"));
+        assert_eq!(exact(["ab"]), e(r"(?-u:\B)ab"));
+
+        assert_eq!(exact(["ab"]), e(r"ab^"));
+        assert_eq!(exact(["ab"]), e(r"ab$"));
+        assert_eq!(exact(["ab"]), e(r"ab(?m:^)"));
+        assert_eq!(exact(["ab"]), e(r"ab(?m:$)"));
+        assert_eq!(exact(["ab"]), e(r"ab\b"));
+        assert_eq!(exact(["ab"]), e(r"ab\B"));
+        assert_eq!(exact(["ab"]), e(r"ab(?-u:\b)"));
+        assert_eq!(exact(["ab"]), e(r"ab(?-u:\B)"));
+
+        let expected = (seq([I("aZ"), E("ab")]), seq([I("Zb"), E("ab")]));
+        assert_eq!(expected, e(r"^aZ*b"));
+    }
+
+    #[test]
+    fn repetition() {
+        assert_eq!(exact(["a", ""]), e(r"a?"));
+        assert_eq!(exact(["", "a"]), e(r"a??"));
+        assert_eq!(inexact([I("a"), E("")], [I("a"), E("")]), e(r"a*"));
+        assert_eq!(inexact([E(""), I("a")], [E(""), I("a")]), e(r"a*?"));
+        assert_eq!(inexact([I("a")], [I("a")]), e(r"a+"));
+        assert_eq!(inexact([I("a")], [I("a")]), e(r"(a+)+"));
+
+        assert_eq!(exact(["ab"]), e(r"aZ{0}b"));
+        assert_eq!(exact(["aZb", "ab"]), e(r"aZ?b"));
+        assert_eq!(exact(["ab", "aZb"]), e(r"aZ??b"));
+        assert_eq!(
+            inexact([I("aZ"), E("ab")], [I("Zb"), E("ab")]),
+            e(r"aZ*b")
+        );
+        assert_eq!(
+            inexact([E("ab"), I("aZ")], [E("ab"), I("Zb")]),
+            e(r"aZ*?b")
+        );
+        assert_eq!(inexact([I("aZ")], [I("Zb")]), e(r"aZ+b"));
+        assert_eq!(inexact([I("aZ")], [I("Zb")]), e(r"aZ+?b"));
+
+        assert_eq!(exact(["aZZb"]), e(r"aZ{2}b"));
+        assert_eq!(inexact([I("aZZ")], [I("ZZb")]), e(r"aZ{2,3}b"));
+
+        assert_eq!(exact(["abc", ""]), e(r"(abc)?"));
+        assert_eq!(exact(["", "abc"]), e(r"(abc)??"));
+
+        assert_eq!(inexact([I("a"), E("b")], [I("ab"), E("b")]), e(r"a*b"));
+        assert_eq!(inexact([E("b"), I("a")], [E("b"), I("ab")]), e(r"a*?b"));
+        assert_eq!(inexact([I("ab")], [I("b")]), e(r"ab+"));
+        assert_eq!(inexact([I("a"), I("b")], [I("b")]), e(r"a*b+"));
+
+        // FIXME: The suffixes for this don't look quite right to me. I think
+        // the right suffixes would be: [I(ac), I(bc), E(c)]. The main issue I
+        // think is that suffixes are computed by iterating over concatenations
+        // in reverse, and then [bc, ac, c] ordering is indeed correct from
+        // that perspective. We also test a few more equivalent regexes, and
+        // we get the same result, so it is consistent at least I suppose.
+        //
+        // The reason why this isn't an issue is that it only messes up
+        // preference order, and currently, suffixes are never used in a
+        // context where preference order matters. For prefixes it matters
+        // because we sometimes want to use prefilters without confirmation
+        // when all of the literals are exact (and there's no look-around). But
+        // we never do that for suffixes. Any time we use suffixes, we always
+        // include a confirmation step. If that ever changes, then it's likely
+        // this bug will need to be fixed, but last time I looked, it appears
+        // hard to do so.
+        assert_eq!(
+            inexact([I("a"), I("b"), E("c")], [I("bc"), I("ac"), E("c")]),
+            e(r"a*b*c")
+        );
+        assert_eq!(
+            inexact([I("a"), I("b"), E("c")], [I("bc"), I("ac"), E("c")]),
+            e(r"(a+)?(b+)?c")
+        );
+        assert_eq!(
+            inexact([I("a"), I("b"), E("c")], [I("bc"), I("ac"), E("c")]),
+            e(r"(a+|)(b+|)c")
+        );
+        // A few more similarish but not identical regexes. These may have a
+        // similar problem as above.
+        assert_eq!(
+            inexact(
+                [I("a"), I("b"), I("c"), E("")],
+                [I("c"), I("b"), I("a"), E("")]
+            ),
+            e(r"a*b*c*")
+        );
+        assert_eq!(inexact([I("a"), I("b"), I("c")], [I("c")]), e(r"a*b*c+"));
+        assert_eq!(inexact([I("a"), I("b")], [I("bc")]), e(r"a*b+c"));
+        assert_eq!(inexact([I("a"), I("b")], [I("c"), I("b")]), e(r"a*b+c*"));
+        assert_eq!(inexact([I("ab"), E("a")], [I("b"), E("a")]), e(r"ab*"));
+        assert_eq!(
+            inexact([I("ab"), E("ac")], [I("bc"), E("ac")]),
+            e(r"ab*c")
+        );
+        assert_eq!(inexact([I("ab")], [I("b")]), e(r"ab+"));
+        assert_eq!(inexact([I("ab")], [I("bc")]), e(r"ab+c"));
+
+        assert_eq!(
+            inexact([I("z"), E("azb")], [I("zazb"), E("azb")]),
+            e(r"z*azb")
+        );
+
+        let expected =
+            exact(["aaa", "aab", "aba", "abb", "baa", "bab", "bba", "bbb"]);
+        assert_eq!(expected, e(r"[ab]{3}"));
+        let expected = inexact(
+            [
+                I("aaa"),
+                I("aab"),
+                I("aba"),
+                I("abb"),
+                I("baa"),
+                I("bab"),
+                I("bba"),
+                I("bbb"),
+            ],
+            [
+                I("aaa"),
+                I("aab"),
+                I("aba"),
+                I("abb"),
+                I("baa"),
+                I("bab"),
+                I("bba"),
+                I("bbb"),
+            ],
+        );
+        assert_eq!(expected, e(r"[ab]{3,4}"));
+    }
+
+    #[test]
+    fn concat() {
+        let empty: [&str; 0] = [];
+
+        assert_eq!(exact(["abcxyz"]), e(r"abc()xyz"));
+        assert_eq!(exact(["abcxyz"]), e(r"(abc)(xyz)"));
+        assert_eq!(exact(["abcmnoxyz"]), e(r"abc()mno()xyz"));
+        assert_eq!(exact(empty), e(r"abc[a&&b]xyz"));
+        assert_eq!(exact(["abcxyz"]), e(r"abc[a&&b]*xyz"));
+    }
+
+    #[test]
+    fn alternation() {
+        assert_eq!(exact(["abc", "mno", "xyz"]), e(r"abc|mno|xyz"));
+        assert_eq!(
+            inexact(
+                [E("abc"), I("mZ"), E("mo"), E("xyz")],
+                [E("abc"), I("Zo"), E("mo"), E("xyz")]
+            ),
+            e(r"abc|mZ*o|xyz")
+        );
+        assert_eq!(exact(["abc", "xyz"]), e(r"abc|M[a&&b]N|xyz"));
+        assert_eq!(exact(["abc", "MN", "xyz"]), e(r"abc|M[a&&b]*N|xyz"));
+
+        assert_eq!(exact(["aaa", "aaaaa"]), e(r"(?:|aa)aaa"));
+        assert_eq!(
+            inexact(
+                [I("aaa"), E(""), I("aaaaa"), E("aa")],
+                [I("aaa"), E(""), E("aa")]
+            ),
+            e(r"(?:|aa)(?:aaa)*")
+        );
+        assert_eq!(
+            inexact(
+                [E(""), I("aaa"), E("aa"), I("aaaaa")],
+                [E(""), I("aaa"), E("aa")]
+            ),
+            e(r"(?:|aa)(?:aaa)*?")
+        );
+
+        assert_eq!(
+            inexact([E("a"), I("b"), E("")], [E("a"), I("b"), E("")]),
+            e(r"a|b*")
+        );
+        assert_eq!(inexact([E("a"), I("b")], [E("a"), I("b")]), e(r"a|b+"));
+
+        assert_eq!(
+            inexact([I("a"), E("b"), E("c")], [I("ab"), E("b"), E("c")]),
+            e(r"a*b|c")
+        );
+
+        assert_eq!(
+            inexact(
+                [E("a"), E("b"), I("c"), E("")],
+                [E("a"), E("b"), I("c"), E("")]
+            ),
+            e(r"a|(?:b|c*)")
+        );
+
+        assert_eq!(
+            inexact(
+                [I("a"), I("b"), E("c"), I("a"), I("ab"), E("c")],
+                [I("ac"), I("bc"), E("c"), I("ac"), I("abc"), E("c")],
+            ),
+            e(r"(a|b)*c|(a|ab)*c")
+        );
+
+        assert_eq!(
+            exact(["abef", "abgh", "cdef", "cdgh"]),
+            e(r"(ab|cd)(ef|gh)")
+        );
+        assert_eq!(
+            exact([
+                "abefij", "abefkl", "abghij", "abghkl", "cdefij", "cdefkl",
+                "cdghij", "cdghkl",
+            ]),
+            e(r"(ab|cd)(ef|gh)(ij|kl)")
+        );
+    }
+
+    #[test]
+    fn impossible() {
+        let empty: [&str; 0] = [];
+
+        assert_eq!(exact(empty), e(r"[a&&b]"));
+        assert_eq!(exact(empty), e(r"a[a&&b]"));
+        assert_eq!(exact(empty), e(r"[a&&b]b"));
+        assert_eq!(exact(empty), e(r"a[a&&b]b"));
+        assert_eq!(exact(["a", "b"]), e(r"a|[a&&b]|b"));
+        assert_eq!(exact(["a", "b"]), e(r"a|c[a&&b]|b"));
+        assert_eq!(exact(["a", "b"]), e(r"a|[a&&b]d|b"));
+        assert_eq!(exact(["a", "b"]), e(r"a|c[a&&b]d|b"));
+        assert_eq!(exact([""]), e(r"[a&&b]*"));
+        assert_eq!(exact(["MN"]), e(r"M[a&&b]*N"));
+    }
+
+    // This tests patterns that contain something that defeats literal
+    // detection, usually because it would blow some limit on the total number
+    // of literals that can be returned.
+    //
+    // The main idea is that when literal extraction sees something that
+    // it knows will blow a limit, it replaces it with a marker that says
+    // "any literal will match here." While not necessarily true, the
+    // over-estimation is just fine for the purposes of literal extraction,
+    // because the imprecision doesn't matter: too big is too big.
+    //
+    // This is one of the trickier parts of literal extraction, since we need
+    // to make sure all of our literal extraction operations correctly compose
+    // with the markers.
+    #[test]
+    fn anything() {
+        assert_eq!(infinite(), e(r"."));
+        assert_eq!(infinite(), e(r"(?s)."));
+        assert_eq!(infinite(), e(r"[A-Za-z]"));
+        assert_eq!(infinite(), e(r"[A-Z]"));
+        assert_eq!(exact([""]), e(r"[A-Z]{0}"));
+        assert_eq!(infinite(), e(r"[A-Z]?"));
+        assert_eq!(infinite(), e(r"[A-Z]*"));
+        assert_eq!(infinite(), e(r"[A-Z]+"));
+        assert_eq!((seq([I("1")]), Seq::infinite()), e(r"1[A-Z]"));
+        assert_eq!((seq([I("1")]), seq([I("2")])), e(r"1[A-Z]2"));
+        assert_eq!((Seq::infinite(), seq([I("123")])), e(r"[A-Z]+123"));
+        assert_eq!(infinite(), e(r"[A-Z]+123[A-Z]+"));
+        assert_eq!(infinite(), e(r"1|[A-Z]|3"));
+        assert_eq!(
+            (seq([E("1"), I("2"), E("3")]), Seq::infinite()),
+            e(r"1|2[A-Z]|3"),
+        );
+        assert_eq!(
+            (Seq::infinite(), seq([E("1"), I("2"), E("3")])),
+            e(r"1|[A-Z]2|3"),
+        );
+        assert_eq!(
+            (seq([E("1"), I("2"), E("4")]), seq([E("1"), I("3"), E("4")])),
+            e(r"1|2[A-Z]3|4"),
+        );
+        assert_eq!((Seq::infinite(), seq([I("2")])), e(r"(?:|1)[A-Z]2"));
+        assert_eq!(inexact([I("a")], [I("z")]), e(r"a.z"));
+    }
+
+    // Like the 'anything' test, but it uses smaller limits in order to test
+    // the logic for effectively aborting literal extraction when the seqs get
+    // too big.
+    #[test]
+    fn anything_small_limits() {
+        fn prefixes(pattern: &str) -> Seq {
+            Extractor::new()
+                .kind(ExtractKind::Prefix)
+                .limit_total(10)
+                .extract(&parse(pattern))
+        }
+
+        fn suffixes(pattern: &str) -> Seq {
+            Extractor::new()
+                .kind(ExtractKind::Suffix)
+                .limit_total(10)
+                .extract(&parse(pattern))
+        }
+
+        fn e(pattern: &str) -> (Seq, Seq) {
+            (prefixes(pattern), suffixes(pattern))
+        }
+
+        assert_eq!(
+            (
+                seq([
+                    I("aaa"),
+                    I("aab"),
+                    I("aba"),
+                    I("abb"),
+                    I("baa"),
+                    I("bab"),
+                    I("bba"),
+                    I("bbb")
+                ]),
+                seq([
+                    I("aaa"),
+                    I("aab"),
+                    I("aba"),
+                    I("abb"),
+                    I("baa"),
+                    I("bab"),
+                    I("bba"),
+                    I("bbb")
+                ])
+            ),
+            e(r"[ab]{3}{3}")
+        );
+
+        assert_eq!(infinite(), e(r"ab|cd|ef|gh|ij|kl|mn|op|qr|st|uv|wx|yz"));
+    }
+
+    #[test]
+    fn empty() {
+        assert_eq!(exact([""]), e(r""));
+        assert_eq!(exact([""]), e(r"^"));
+        assert_eq!(exact([""]), e(r"$"));
+        assert_eq!(exact([""]), e(r"(?m:^)"));
+        assert_eq!(exact([""]), e(r"(?m:$)"));
+        assert_eq!(exact([""]), e(r"\b"));
+        assert_eq!(exact([""]), e(r"\B"));
+        assert_eq!(exact([""]), e(r"(?-u:\b)"));
+        assert_eq!(exact([""]), e(r"(?-u:\B)"));
+    }
+
+    #[test]
+    fn odds_and_ends() {
+        assert_eq!((Seq::infinite(), seq([I("a")])), e(r".a"));
+        assert_eq!((seq([I("a")]), Seq::infinite()), e(r"a."));
+        assert_eq!(infinite(), e(r"a|."));
+        assert_eq!(infinite(), e(r".|a"));
+
+        let pat = r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]";
+        let expected = inexact(
+            ["Mo'am", "Moam", "Mu'am", "Muam"].map(I),
+            [
+                "ddafi", "ddafy", "dhafi", "dhafy", "dzafi", "dzafy", "dafi",
+                "dafy", "tdafi", "tdafy", "thafi", "thafy", "tzafi", "tzafy",
+                "tafi", "tafy", "zdafi", "zdafy", "zhafi", "zhafy", "zzafi",
+                "zzafy", "zafi", "zafy",
+            ]
+            .map(I),
+        );
+        assert_eq!(expected, e(pat));
+
+        assert_eq!(
+            (seq(["fn is_", "fn as_"].map(I)), Seq::infinite()),
+            e(r"fn is_([A-Z]+)|fn as_([A-Z]+)"),
+        );
+        assert_eq!(
+            inexact([I("foo")], [I("quux")]),
+            e(r"foo[A-Z]+bar[A-Z]+quux")
+        );
+        assert_eq!(infinite(), e(r"[A-Z]+bar[A-Z]+"));
+        assert_eq!(
+            exact(["Sherlock Holmes"]),
+            e(r"(?m)^Sherlock Holmes|Sherlock Holmes$")
+        );
+
+        assert_eq!(exact(["sa", "sb"]), e(r"\bs(?:[ab])"));
+    }
+
+    // This tests a specific regex along with some heuristic steps to reduce
+    // the sequences extracted. This is meant to roughly correspond to the
+    // types of heuristics used to shrink literal sets in practice. (Shrinking
+    // is done because you want to balance "spend too much work looking for
+    // too many literals" and "spend too much work processing false positive
+    // matches from short literals.")
+    #[test]
+    #[cfg(feature = "unicode-case")]
+    fn holmes() {
+        let expected = inexact(
+            ["HOL", "HOl", "HoL", "Hol", "hOL", "hOl", "hoL", "hol"].map(I),
+            [
+                "MES", "MEs", "Eſ", "MeS", "Mes", "eſ", "mES", "mEs", "meS",
+                "mes",
+            ]
+            .map(I),
+        );
+        let (mut prefixes, mut suffixes) = e(r"(?i)Holmes");
+        prefixes.keep_first_bytes(3);
+        suffixes.keep_last_bytes(3);
+        prefixes.minimize_by_preference();
+        suffixes.minimize_by_preference();
+        assert_eq!(expected, (prefixes, suffixes));
+    }
+
+    // This tests that we get some kind of literals extracted for a beefier
+    // alternation with case insensitive mode enabled. At one point during
+    // development, this returned nothing, and motivated some special case
+    // code in Extractor::union to try and trim down the literal sequences
+    // if the union would blow the limits set.
+    #[test]
+    #[cfg(feature = "unicode-case")]
+    fn holmes_alt() {
+        let mut pre =
+            prefixes(r"(?i)Sherlock|Holmes|Watson|Irene|Adler|John|Baker");
+        assert!(pre.len().unwrap() > 0);
+        pre.optimize_for_prefix_by_preference();
+        assert!(pre.len().unwrap() > 0);
+    }
+
+    // See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8
+    // See: CVE-2022-24713
+    //
+    // We test this here to ensure literal extraction completes in reasonable
+    // time and isn't materially impacted by these sorts of pathological
+    // repeats.
+    #[test]
+    fn crazy_repeats() {
+        assert_eq!(inexact([I("")], [I("")]), e(r"(?:){4294967295}"));
+        assert_eq!(
+            inexact([I("")], [I("")]),
+            e(r"(?:){64}{64}{64}{64}{64}{64}")
+        );
+        assert_eq!(inexact([I("")], [I("")]), e(r"x{0}{4294967295}"));
+        assert_eq!(inexact([I("")], [I("")]), e(r"(?:|){4294967295}"));
+
+        assert_eq!(
+            inexact([E("")], [E("")]),
+            e(r"(?:){8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}")
+        );
+        let repa = "a".repeat(100);
+        assert_eq!(
+            inexact([I(&repa)], [I(&repa)]),
+            e(r"a{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}")
+        );
+    }
+
+    #[test]
+    fn huge() {
+        let pat = r#"(?-u)
+        2(?:
+          [45]\d{3}|
+          7(?:
+            1[0-267]|
+            2[0-289]|
+            3[0-29]|
+            4[01]|
+            5[1-3]|
+            6[013]|
+            7[0178]|
+            91
+          )|
+          8(?:
+            0[125]|
+            [139][1-6]|
+            2[0157-9]|
+            41|
+            6[1-35]|
+            7[1-5]|
+            8[1-8]|
+            90
+          )|
+          9(?:
+            0[0-2]|
+            1[0-4]|
+            2[568]|
+            3[3-6]|
+            5[5-7]|
+            6[0167]|
+            7[15]|
+            8[0146-9]
+          )
+        )\d{4}|
+        3(?:
+          12?[5-7]\d{2}|
+          0(?:
+            2(?:
+              [025-79]\d|
+              [348]\d{1,2}
+            )|
+            3(?:
+              [2-4]\d|
+              [56]\d?
+            )
+          )|
+          2(?:
+            1\d{2}|
+            2(?:
+              [12]\d|
+              [35]\d{1,2}|
+              4\d?
+            )
+          )|
+          3(?:
+            1\d{2}|
+            2(?:
+              [2356]\d|
+              4\d{1,2}
+            )
+          )|
+          4(?:
+            1\d{2}|
+            2(?:
+              2\d{1,2}|
+              [47]|
+              5\d{2}
+            )
+          )|
+          5(?:
+            1\d{2}|
+            29
+          )|
+          [67]1\d{2}|
+          8(?:
+            1\d{2}|
+            2(?:
+              2\d{2}|
+              3|
+              4\d
+            )
+          )
+        )\d{3}|
+        4(?:
+          0(?:
+            2(?:
+              [09]\d|
+              7
+            )|
+            33\d{2}
+          )|
+          1\d{3}|
+          2(?:
+            1\d{2}|
+            2(?:
+              [25]\d?|
+              [348]\d|
+              [67]\d{1,2}
+            )
+          )|
+          3(?:
+            1\d{2}(?:
+              \d{2}
+            )?|
+            2(?:
+              [045]\d|
+              [236-9]\d{1,2}
+            )|
+            32\d{2}
+          )|
+          4(?:
+            [18]\d{2}|
+            2(?:
+              [2-46]\d{2}|
+              3
+            )|
+            5[25]\d{2}
+          )|
+          5(?:
+            1\d{2}|
+            2(?:
+              3\d|
+              5
+            )
+          )|
+          6(?:
+            [18]\d{2}|
+            2(?:
+              3(?:
+                \d{2}
+              )?|
+              [46]\d{1,2}|
+              5\d{2}|
+              7\d
+            )|
+            5(?:
+              3\d?|
+              4\d|
+              [57]\d{1,2}|
+              6\d{2}|
+              8
+            )
+          )|
+          71\d{2}|
+          8(?:
+            [18]\d{2}|
+            23\d{2}|
+            54\d{2}
+          )|
+          9(?:
+            [18]\d{2}|
+            2[2-5]\d{2}|
+            53\d{1,2}
+          )
+        )\d{3}|
+        5(?:
+          02[03489]\d{2}|
+          1\d{2}|
+          2(?:
+            1\d{2}|
+            2(?:
+              2(?:
+                \d{2}
+              )?|
+              [457]\d{2}
+            )
+          )|
+          3(?:
+            1\d{2}|
+            2(?:
+              [37](?:
+                \d{2}
+              )?|
+              [569]\d{2}
+            )
+          )|
+          4(?:
+            1\d{2}|
+            2[46]\d{2}
+          )|
+          5(?:
+            1\d{2}|
+            26\d{1,2}
+          )|
+          6(?:
+            [18]\d{2}|
+            2|
+            53\d{2}
+          )|
+          7(?:
+            1|
+            24
+          )\d{2}|
+          8(?:
+            1|
+            26
+          )\d{2}|
+          91\d{2}
+        )\d{3}|
+        6(?:
+          0(?:
+            1\d{2}|
+            2(?:
+              3\d{2}|
+              4\d{1,2}
+            )
+          )|
+          2(?:
+            2[2-5]\d{2}|
+            5(?:
+              [3-5]\d{2}|
+              7
+            )|
+            8\d{2}
+          )|
+          3(?:
+            1|
+            2[3478]
+          )\d{2}|
+          4(?:
+            1|
+            2[34]
+          )\d{2}|
+          5(?:
+            1|
+            2[47]
+          )\d{2}|
+          6(?:
+            [18]\d{2}|
+            6(?:
+              2(?:
+                2\d|
+                [34]\d{2}
+              )|
+              5(?:
+                [24]\d{2}|
+                3\d|
+                5\d{1,2}
+              )
+            )
+          )|
+          72[2-5]\d{2}|
+          8(?:
+            1\d{2}|
+            2[2-5]\d{2}
+          )|
+          9(?:
+            1\d{2}|
+            2[2-6]\d{2}
+          )
+        )\d{3}|
+        7(?:
+          (?:
+            02|
+            [3-589]1|
+            6[12]|
+            72[24]
+          )\d{2}|
+          21\d{3}|
+          32
+        )\d{3}|
+        8(?:
+          (?:
+            4[12]|
+            [5-7]2|
+            1\d?
+          )|
+          (?:
+            0|
+            3[12]|
+            [5-7]1|
+            217
+          )\d
+        )\d{4}|
+        9(?:
+          [35]1|
+          (?:
+            [024]2|
+            81
+          )\d|
+          (?:
+            1|
+            [24]1
+          )\d{2}
+        )\d{3}
+        "#;
+        // TODO: This is a good candidate of a seq of literals that could be
+        // shrunk quite a bit and still be very productive with respect to
+        // literal optimizations.
+        let (prefixes, suffixes) = e(pat);
+        assert!(!suffixes.is_finite());
+        assert_eq!(Some(243), prefixes.len());
+    }
+
+    #[test]
+    fn optimize() {
+        // This gets a common prefix that isn't too short.
+        let (p, s) =
+            opt(["foobarfoobar", "foobar", "foobarzfoobar", "foobarfoobar"]);
+        assert_eq!(seq([I("foobar")]), p);
+        assert_eq!(seq([I("foobar")]), s);
+
+        // This also finds a common prefix, but since it's only one byte, it
+        // prefers the multiple literals.
+        let (p, s) = opt(["abba", "akka", "abccba"]);
+        assert_eq!(exact(["abba", "akka", "abccba"]), (p, s));
+
+        let (p, s) = opt(["sam", "samwise"]);
+        assert_eq!((seq([E("sam")]), seq([E("sam"), E("samwise")])), (p, s));
+
+        // The empty string is poisonous, so our seq becomes infinite, even
+        // though all literals are exact.
+        let (p, s) = opt(["foobarfoo", "foo", "", "foozfoo", "foofoo"]);
+        assert!(!p.is_finite());
+        assert!(!s.is_finite());
+
+        // A space is also poisonous, so our seq becomes infinite. But this
+        // only gets triggered when we don't have a completely exact sequence.
+        // When the sequence is exact, spaces are okay, since we presume that
+        // any prefilter will match a space more quickly than the regex engine.
+        // (When the sequence is exact, there's a chance of the prefilter being
+        // used without needing the regex engine at all.)
+        let mut p = seq([E("foobarfoo"), I("foo"), E(" "), E("foofoo")]);
+        p.optimize_for_prefix_by_preference();
+        assert!(!p.is_finite());
+    }
+}
diff --git a/regex-syntax/src/hir/literal/mod.rs b/regex-syntax/src/hir/literal/mod.rs
deleted file mode 100644
index fbc5d3c975..0000000000
--- a/regex-syntax/src/hir/literal/mod.rs
+++ /dev/null
@@ -1,1686 +0,0 @@
-/*!
-Provides routines for extracting literal prefixes and suffixes from an `Hir`.
-*/
-
-use std::cmp;
-use std::fmt;
-use std::iter;
-use std::mem;
-use std::ops;
-
-use crate::hir::{self, Hir, HirKind};
-
-/// A set of literal byte strings extracted from a regular expression.
-///
-/// Every member of the set is a `Literal`, which is represented by a
-/// `Vec<u8>`. (Notably, it may contain invalid UTF-8.) Every member is
-/// said to be either *complete* or *cut*. A complete literal means that
-/// it extends until the beginning (or end) of the regular expression. In
-/// some circumstances, this can be used to indicate a match in the regular
-/// expression.
-///
-/// A key aspect of literal extraction is knowing when to stop. It is not
-/// feasible to blindly extract all literals from a regular expression, even if
-/// there are finitely many. For example, the regular expression `[0-9]{10}`
-/// has `10^10` distinct literals. For this reason, literal extraction is
-/// bounded to some low number by default using heuristics, but the limits can
-/// be tweaked.
-///
-/// **WARNING**: Literal extraction uses stack space proportional to the size
-/// of the `Hir` expression. At some point, this drawback will be eliminated.
-/// To protect yourself, set a reasonable
-/// [`nest_limit` on your `Parser`](../../struct.ParserBuilder.html#method.nest_limit).
-/// This is done for you by default.
-#[derive(Clone, Eq, PartialEq)]
-pub struct Literals {
-    lits: Vec<Literal>,
-    limit_size: usize,
-    limit_class: usize,
-}
-
-/// A single member of a set of literals extracted from a regular expression.
-///
-/// This type has `Deref` and `DerefMut` impls to `Vec<u8>` so that all slice
-/// and `Vec` operations are available.
-#[derive(Clone, Eq, Ord)]
-pub struct Literal {
-    v: Vec<u8>,
-    cut: bool,
-}
-
-impl Literals {
-    /// Returns a new empty set of literals using default limits.
-    pub fn empty() -> Literals {
-        Literals { lits: vec![], limit_size: 250, limit_class: 10 }
-    }
-
-    /// Returns a set of literal prefixes extracted from the given `Hir`.
-    pub fn prefixes(expr: &Hir) -> Literals {
-        let mut lits = Literals::empty();
-        lits.union_prefixes(expr);
-        lits
-    }
-
-    /// Returns a set of literal suffixes extracted from the given `Hir`.
-    pub fn suffixes(expr: &Hir) -> Literals {
-        let mut lits = Literals::empty();
-        lits.union_suffixes(expr);
-        lits
-    }
-
-    /// Get the approximate size limit (in bytes) of this set.
-    pub fn limit_size(&self) -> usize {
-        self.limit_size
-    }
-
-    /// Set the approximate size limit (in bytes) of this set.
-    ///
-    /// If extracting a literal would put the set over this limit, then
-    /// extraction stops.
-    ///
-    /// The new limits will only apply to additions to this set. Existing
-    /// members remain unchanged, even if the set exceeds the new limit.
-    pub fn set_limit_size(&mut self, size: usize) -> &mut Literals {
-        self.limit_size = size;
-        self
-    }
-
-    /// Get the character class size limit for this set.
-    pub fn limit_class(&self) -> usize {
-        self.limit_class
-    }
-
-    /// Limits the size of character(or byte) classes considered.
-    ///
-    /// A value of `0` prevents all character classes from being considered.
-    ///
-    /// This limit also applies to case insensitive literals, since each
-    /// character in the case insensitive literal is converted to a class, and
-    /// then case folded.
-    ///
-    /// The new limits will only apply to additions to this set. Existing
-    /// members remain unchanged, even if the set exceeds the new limit.
-    pub fn set_limit_class(&mut self, size: usize) -> &mut Literals {
-        self.limit_class = size;
-        self
-    }
-
-    /// Returns the set of literals as a slice. Its order is unspecified.
-    pub fn literals(&self) -> &[Literal] {
-        &self.lits
-    }
-
-    /// Returns the length of the smallest literal.
-    ///
-    /// Returns None is there are no literals in the set.
-    pub fn min_len(&self) -> Option<usize> {
-        let mut min = None;
-        for lit in &self.lits {
-            match min {
-                None => min = Some(lit.len()),
-                Some(m) if lit.len() < m => min = Some(lit.len()),
-                _ => {}
-            }
-        }
-        min
-    }
-
-    /// Returns true if all members in this set are complete.
-    pub fn all_complete(&self) -> bool {
-        !self.lits.is_empty() && self.lits.iter().all(|l| !l.is_cut())
-    }
-
-    /// Returns true if any member in this set is complete.
-    pub fn any_complete(&self) -> bool {
-        self.lits.iter().any(|lit| !lit.is_cut())
-    }
-
-    /// Returns true if this set contains an empty literal.
-    pub fn contains_empty(&self) -> bool {
-        self.lits.iter().any(|lit| lit.is_empty())
-    }
-
-    /// Returns true if this set is empty or if all of its members is empty.
-    pub fn is_empty(&self) -> bool {
-        self.lits.is_empty() || self.lits.iter().all(|lit| lit.is_empty())
-    }
-
-    /// Returns a new empty set of literals using this set's limits.
-    pub fn to_empty(&self) -> Literals {
-        let mut lits = Literals::empty();
-        lits.set_limit_size(self.limit_size).set_limit_class(self.limit_class);
-        lits
-    }
-
-    /// Returns the longest common prefix of all members in this set.
-    pub fn longest_common_prefix(&self) -> &[u8] {
-        if self.is_empty() {
-            return &[];
-        }
-        let lit0 = &*self.lits[0];
-        let mut len = lit0.len();
-        for lit in &self.lits[1..] {
-            len = cmp::min(
-                len,
-                lit.iter().zip(lit0).take_while(|&(a, b)| a == b).count(),
-            );
-        }
-        &self.lits[0][..len]
-    }
-
-    /// Returns the longest common suffix of all members in this set.
-    pub fn longest_common_suffix(&self) -> &[u8] {
-        if self.is_empty() {
-            return &[];
-        }
-        let lit0 = &*self.lits[0];
-        let mut len = lit0.len();
-        for lit in &self.lits[1..] {
-            len = cmp::min(
-                len,
-                lit.iter()
-                    .rev()
-                    .zip(lit0.iter().rev())
-                    .take_while(|&(a, b)| a == b)
-                    .count(),
-            );
-        }
-        &self.lits[0][self.lits[0].len() - len..]
-    }
-
-    /// Returns a new set of literals with the given number of bytes trimmed
-    /// from the suffix of each literal.
-    ///
-    /// If any literal would be cut out completely by trimming, then None is
-    /// returned.
-    ///
-    /// Any duplicates that are created as a result of this transformation are
-    /// removed.
-    pub fn trim_suffix(&self, num_bytes: usize) -> Option<Literals> {
-        if self.min_len().map(|len| len <= num_bytes).unwrap_or(true) {
-            return None;
-        }
-        let mut new = self.to_empty();
-        for mut lit in self.lits.iter().cloned() {
-            let new_len = lit.len() - num_bytes;
-            lit.truncate(new_len);
-            lit.cut();
-            new.lits.push(lit);
-        }
-        new.lits.sort();
-        new.lits.dedup();
-        Some(new)
-    }
-
-    /// Returns a new set of prefixes of this set of literals that are
-    /// guaranteed to be unambiguous.
-    ///
-    /// Any substring match with a member of the set is returned is guaranteed
-    /// to never overlap with a substring match of another member of the set
-    /// at the same starting position.
-    ///
-    /// Given any two members of the returned set, neither is a substring of
-    /// the other.
-    pub fn unambiguous_prefixes(&self) -> Literals {
-        if self.lits.is_empty() {
-            return self.to_empty();
-        }
-        let mut old = self.lits.to_vec();
-        let mut new = self.to_empty();
-        'OUTER: while let Some(mut candidate) = old.pop() {
-            if candidate.is_empty() {
-                continue;
-            }
-            if new.lits.is_empty() {
-                new.lits.push(candidate);
-                continue;
-            }
-            for lit2 in &mut new.lits {
-                if lit2.is_empty() {
-                    continue;
-                }
-                if &candidate == lit2 {
-                    // If the literal is already in the set, then we can
-                    // just drop it. But make sure that cut literals are
-                    // infectious!
-                    candidate.cut = candidate.cut || lit2.cut;
-                    lit2.cut = candidate.cut;
-                    continue 'OUTER;
-                }
-                if candidate.len() < lit2.len() {
-                    if let Some(i) = position(&candidate, &lit2) {
-                        candidate.cut();
-                        let mut lit3 = lit2.clone();
-                        lit3.truncate(i);
-                        lit3.cut();
-                        old.push(lit3);
-                        lit2.clear();
-                    }
-                } else if let Some(i) = position(&lit2, &candidate) {
-                    lit2.cut();
-                    let mut new_candidate = candidate.clone();
-                    new_candidate.truncate(i);
-                    new_candidate.cut();
-                    old.push(new_candidate);
-                    candidate.clear();
-                }
-                // Oops, the candidate is already represented in the set.
-                if candidate.is_empty() {
-                    continue 'OUTER;
-                }
-            }
-            new.lits.push(candidate);
-        }
-        new.lits.retain(|lit| !lit.is_empty());
-        new.lits.sort();
-        new.lits.dedup();
-        new
-    }
-
-    /// Returns a new set of suffixes of this set of literals that are
-    /// guaranteed to be unambiguous.
-    ///
-    /// Any substring match with a member of the set is returned is guaranteed
-    /// to never overlap with a substring match of another member of the set
-    /// at the same ending position.
-    ///
-    /// Given any two members of the returned set, neither is a substring of
-    /// the other.
-    pub fn unambiguous_suffixes(&self) -> Literals {
-        // This is a touch wasteful...
-        let mut lits = self.clone();
-        lits.reverse();
-        let mut unamb = lits.unambiguous_prefixes();
-        unamb.reverse();
-        unamb
-    }
-
-    /// Unions the prefixes from the given expression to this set.
-    ///
-    /// If prefixes could not be added (for example, this set would exceed its
-    /// size limits or the set of prefixes from `expr` includes the empty
-    /// string), then false is returned.
-    ///
-    /// Note that prefix literals extracted from `expr` are said to be complete
-    /// if and only if the literal extends from the beginning of `expr` to the
-    /// end of `expr`.
-    pub fn union_prefixes(&mut self, expr: &Hir) -> bool {
-        let mut lits = self.to_empty();
-        prefixes(expr, &mut lits);
-        !lits.is_empty() && !lits.contains_empty() && self.union(lits)
-    }
-
-    /// Unions the suffixes from the given expression to this set.
-    ///
-    /// If suffixes could not be added (for example, this set would exceed its
-    /// size limits or the set of suffixes from `expr` includes the empty
-    /// string), then false is returned.
-    ///
-    /// Note that prefix literals extracted from `expr` are said to be complete
-    /// if and only if the literal extends from the end of `expr` to the
-    /// beginning of `expr`.
-    pub fn union_suffixes(&mut self, expr: &Hir) -> bool {
-        let mut lits = self.to_empty();
-        suffixes(expr, &mut lits);
-        lits.reverse();
-        !lits.is_empty() && !lits.contains_empty() && self.union(lits)
-    }
-
-    /// Unions this set with another set.
-    ///
-    /// If the union would cause the set to exceed its limits, then the union
-    /// is skipped and it returns false. Otherwise, if the union succeeds, it
-    /// returns true.
-    pub fn union(&mut self, lits: Literals) -> bool {
-        if self.num_bytes() + lits.num_bytes() > self.limit_size {
-            return false;
-        }
-        if lits.is_empty() {
-            self.lits.push(Literal::empty());
-        } else {
-            self.lits.extend(lits.lits);
-        }
-        true
-    }
-
-    /// Extends this set with another set.
-    ///
-    /// The set of literals is extended via a cross product.
-    ///
-    /// If a cross product would cause this set to exceed its limits, then the
-    /// cross product is skipped and it returns false. Otherwise, if the cross
-    /// product succeeds, it returns true.
-    pub fn cross_product(&mut self, lits: &Literals) -> bool {
-        if lits.is_empty() {
-            return true;
-        }
-        // Check that we make sure we stay in our limits.
-        let mut size_after;
-        if self.is_empty() || !self.any_complete() {
-            size_after = self.num_bytes();
-            for lits_lit in lits.literals() {
-                size_after += lits_lit.len();
-            }
-        } else {
-            size_after = self.lits.iter().fold(0, |accum, lit| {
-                accum + if lit.is_cut() { lit.len() } else { 0 }
-            });
-            for lits_lit in lits.literals() {
-                for self_lit in self.literals() {
-                    if !self_lit.is_cut() {
-                        size_after += self_lit.len() + lits_lit.len();
-                    }
-                }
-            }
-        }
-        if size_after > self.limit_size {
-            return false;
-        }
-
-        let mut base = self.remove_complete();
-        if base.is_empty() {
-            base = vec![Literal::empty()];
-        }
-        for lits_lit in lits.literals() {
-            for mut self_lit in base.clone() {
-                self_lit.extend(&**lits_lit);
-                self_lit.cut = lits_lit.cut;
-                self.lits.push(self_lit);
-            }
-        }
-        true
-    }
-
-    /// Extends each literal in this set with the bytes given.
-    ///
-    /// If the set is empty, then the given literal is added to the set.
-    ///
-    /// If adding any number of bytes to all members of this set causes a limit
-    /// to be exceeded, then no bytes are added and false is returned. If a
-    /// prefix of `bytes` can be fit into this set, then it is used and all
-    /// resulting literals are cut.
-    pub fn cross_add(&mut self, bytes: &[u8]) -> bool {
-        // N.B. This could be implemented by simply calling cross_product with
-        // a literal set containing just `bytes`, but we can be smarter about
-        // taking shorter prefixes of `bytes` if they'll fit.
-        if bytes.is_empty() {
-            return true;
-        }
-        if self.lits.is_empty() {
-            let i = cmp::min(self.limit_size, bytes.len());
-            self.lits.push(Literal::new(bytes[..i].to_owned()));
-            self.lits[0].cut = i < bytes.len();
-            return !self.lits[0].is_cut();
-        }
-        let size = self.num_bytes();
-        if size + self.lits.len() >= self.limit_size {
-            return false;
-        }
-        let mut i = 1;
-        while size + (i * self.lits.len()) <= self.limit_size
-            && i < bytes.len()
-        {
-            i += 1;
-        }
-        for lit in &mut self.lits {
-            if !lit.is_cut() {
-                lit.extend(&bytes[..i]);
-                if i < bytes.len() {
-                    lit.cut();
-                }
-            }
-        }
-        true
-    }
-
-    /// Adds the given literal to this set.
-    ///
-    /// Returns false if adding this literal would cause the class to be too
-    /// big.
-    pub fn add(&mut self, lit: Literal) -> bool {
-        if self.num_bytes() + lit.len() > self.limit_size {
-            return false;
-        }
-        self.lits.push(lit);
-        true
-    }
-
-    /// Extends each literal in this set with the character class given.
-    ///
-    /// Returns false if the character class was too big to add.
-    pub fn add_char_class(&mut self, cls: &hir::ClassUnicode) -> bool {
-        self._add_char_class(cls, false)
-    }
-
-    /// Extends each literal in this set with the character class given,
-    /// writing the bytes of each character in reverse.
-    ///
-    /// Returns false if the character class was too big to add.
-    fn add_char_class_reverse(&mut self, cls: &hir::ClassUnicode) -> bool {
-        self._add_char_class(cls, true)
-    }
-
-    fn _add_char_class(
-        &mut self,
-        cls: &hir::ClassUnicode,
-        reverse: bool,
-    ) -> bool {
-        use std::char;
-
-        if self.class_exceeds_limits(cls_char_count(cls)) {
-            return false;
-        }
-        let mut base = self.remove_complete();
-        if base.is_empty() {
-            base = vec![Literal::empty()];
-        }
-        for r in cls.iter() {
-            let (s, e) = (r.start as u32, r.end as u32 + 1);
-            for c in (s..e).filter_map(char::from_u32) {
-                for mut lit in base.clone() {
-                    let mut bytes = c.to_string().into_bytes();
-                    if reverse {
-                        bytes.reverse();
-                    }
-                    lit.extend(&bytes);
-                    self.lits.push(lit);
-                }
-            }
-        }
-        true
-    }
-
-    /// Extends each literal in this set with the byte class given.
-    ///
-    /// Returns false if the byte class was too big to add.
-    pub fn add_byte_class(&mut self, cls: &hir::ClassBytes) -> bool {
-        if self.class_exceeds_limits(cls_byte_count(cls)) {
-            return false;
-        }
-        let mut base = self.remove_complete();
-        if base.is_empty() {
-            base = vec![Literal::empty()];
-        }
-        for r in cls.iter() {
-            let (s, e) = (r.start as u32, r.end as u32 + 1);
-            for b in (s..e).map(|b| b as u8) {
-                for mut lit in base.clone() {
-                    lit.push(b);
-                    self.lits.push(lit);
-                }
-            }
-        }
-        true
-    }
-
-    /// Cuts every member of this set. When a member is cut, it can never
-    /// be extended.
-    pub fn cut(&mut self) {
-        for lit in &mut self.lits {
-            lit.cut();
-        }
-    }
-
-    /// Reverses all members in place.
-    pub fn reverse(&mut self) {
-        for lit in &mut self.lits {
-            lit.reverse();
-        }
-    }
-
-    /// Clears this set of all members.
-    pub fn clear(&mut self) {
-        self.lits.clear();
-    }
-
-    /// Pops all complete literals out of this set.
-    fn remove_complete(&mut self) -> Vec<Literal> {
-        let mut base = vec![];
-        for lit in mem::replace(&mut self.lits, vec![]) {
-            if lit.is_cut() {
-                self.lits.push(lit);
-            } else {
-                base.push(lit);
-            }
-        }
-        base
-    }
-
-    /// Returns the total number of bytes in this set.
-    fn num_bytes(&self) -> usize {
-        self.lits.iter().fold(0, |accum, lit| accum + lit.len())
-    }
-
-    /// Returns true if a character class with the given size would cause this
-    /// set to exceed its limits.
-    ///
-    /// The size given should correspond to the number of items in the class.
-    fn class_exceeds_limits(&self, size: usize) -> bool {
-        if size > self.limit_class {
-            return true;
-        }
-        // This is an approximation since codepoints in a char class can encode
-        // to 1-4 bytes.
-        let new_byte_count = if self.lits.is_empty() {
-            size
-        } else {
-            self.lits.iter().fold(0, |accum, lit| {
-                accum
-                    + if lit.is_cut() {
-                        // If the literal is cut, then we'll never add
-                        // anything to it, so don't count it.
-                        0
-                    } else {
-                        (lit.len() + 1) * size
-                    }
-            })
-        };
-        new_byte_count > self.limit_size
-    }
-}
-
-fn prefixes(expr: &Hir, lits: &mut Literals) {
-    match *expr.kind() {
-        HirKind::Literal(hir::Literal::Unicode(c)) => {
-            let mut buf = [0; 4];
-            lits.cross_add(c.encode_utf8(&mut buf).as_bytes());
-        }
-        HirKind::Literal(hir::Literal::Byte(b)) => {
-            lits.cross_add(&[b]);
-        }
-        HirKind::Class(hir::Class::Unicode(ref cls)) => {
-            if !lits.add_char_class(cls) {
-                lits.cut();
-            }
-        }
-        HirKind::Class(hir::Class::Bytes(ref cls)) => {
-            if !lits.add_byte_class(cls) {
-                lits.cut();
-            }
-        }
-        HirKind::Group(hir::Group { ref hir, .. }) => {
-            prefixes(&**hir, lits);
-        }
-        HirKind::Repetition(ref x) => match x.kind {
-            hir::RepetitionKind::ZeroOrOne => {
-                repeat_zero_or_one_literals(&x.hir, lits, prefixes);
-            }
-            hir::RepetitionKind::ZeroOrMore => {
-                repeat_zero_or_more_literals(&x.hir, lits, prefixes);
-            }
-            hir::RepetitionKind::OneOrMore => {
-                repeat_one_or_more_literals(&x.hir, lits, prefixes);
-            }
-            hir::RepetitionKind::Range(ref rng) => {
-                let (min, max) = match *rng {
-                    hir::RepetitionRange::Exactly(m) => (m, Some(m)),
-                    hir::RepetitionRange::AtLeast(m) => (m, None),
-                    hir::RepetitionRange::Bounded(m, n) => (m, Some(n)),
-                };
-                repeat_range_literals(
-                    &x.hir, min, max, x.greedy, lits, prefixes,
-                )
-            }
-        },
-        HirKind::Concat(ref es) if es.is_empty() => {}
-        HirKind::Concat(ref es) if es.len() == 1 => prefixes(&es[0], lits),
-        HirKind::Concat(ref es) => {
-            for e in es {
-                if let HirKind::Anchor(hir::Anchor::StartText) = *e.kind() {
-                    if !lits.is_empty() {
-                        lits.cut();
-                        break;
-                    }
-                    lits.add(Literal::empty());
-                    continue;
-                }
-                let mut lits2 = lits.to_empty();
-                prefixes(e, &mut lits2);
-                if !lits.cross_product(&lits2) || !lits2.any_complete() {
-                    // If this expression couldn't yield any literal that
-                    // could be extended, then we need to quit. Since we're
-                    // short-circuiting, we also need to freeze every member.
-                    lits.cut();
-                    break;
-                }
-            }
-        }
-        HirKind::Alternation(ref es) => {
-            alternate_literals(es, lits, prefixes);
-        }
-        _ => lits.cut(),
-    }
-}
-
-fn suffixes(expr: &Hir, lits: &mut Literals) {
-    match *expr.kind() {
-        HirKind::Literal(hir::Literal::Unicode(c)) => {
-            let mut buf = [0u8; 4];
-            let i = c.encode_utf8(&mut buf).len();
-            let buf = &mut buf[..i];
-            buf.reverse();
-            lits.cross_add(buf);
-        }
-        HirKind::Literal(hir::Literal::Byte(b)) => {
-            lits.cross_add(&[b]);
-        }
-        HirKind::Class(hir::Class::Unicode(ref cls)) => {
-            if !lits.add_char_class_reverse(cls) {
-                lits.cut();
-            }
-        }
-        HirKind::Class(hir::Class::Bytes(ref cls)) => {
-            if !lits.add_byte_class(cls) {
-                lits.cut();
-            }
-        }
-        HirKind::Group(hir::Group { ref hir, .. }) => {
-            suffixes(&**hir, lits);
-        }
-        HirKind::Repetition(ref x) => match x.kind {
-            hir::RepetitionKind::ZeroOrOne => {
-                repeat_zero_or_one_literals(&x.hir, lits, suffixes);
-            }
-            hir::RepetitionKind::ZeroOrMore => {
-                repeat_zero_or_more_literals(&x.hir, lits, suffixes);
-            }
-            hir::RepetitionKind::OneOrMore => {
-                repeat_one_or_more_literals(&x.hir, lits, suffixes);
-            }
-            hir::RepetitionKind::Range(ref rng) => {
-                let (min, max) = match *rng {
-                    hir::RepetitionRange::Exactly(m) => (m, Some(m)),
-                    hir::RepetitionRange::AtLeast(m) => (m, None),
-                    hir::RepetitionRange::Bounded(m, n) => (m, Some(n)),
-                };
-                repeat_range_literals(
-                    &x.hir, min, max, x.greedy, lits, suffixes,
-                )
-            }
-        },
-        HirKind::Concat(ref es) if es.is_empty() => {}
-        HirKind::Concat(ref es) if es.len() == 1 => suffixes(&es[0], lits),
-        HirKind::Concat(ref es) => {
-            for e in es.iter().rev() {
-                if let HirKind::Anchor(hir::Anchor::EndText) = *e.kind() {
-                    if !lits.is_empty() {
-                        lits.cut();
-                        break;
-                    }
-                    lits.add(Literal::empty());
-                    continue;
-                }
-                let mut lits2 = lits.to_empty();
-                suffixes(e, &mut lits2);
-                if !lits.cross_product(&lits2) || !lits2.any_complete() {
-                    // If this expression couldn't yield any literal that
-                    // could be extended, then we need to quit. Since we're
-                    // short-circuiting, we also need to freeze every member.
-                    lits.cut();
-                    break;
-                }
-            }
-        }
-        HirKind::Alternation(ref es) => {
-            alternate_literals(es, lits, suffixes);
-        }
-        _ => lits.cut(),
-    }
-}
-
-fn repeat_zero_or_one_literals<F: FnMut(&Hir, &mut Literals)>(
-    e: &Hir,
-    lits: &mut Literals,
-    mut f: F,
-) {
-    f(
-        &Hir::repetition(hir::Repetition {
-            kind: hir::RepetitionKind::ZeroOrMore,
-            // FIXME: Our literal extraction doesn't care about greediness.
-            // Which is partially why we're treating 'e?' as 'e*'. Namely,
-            // 'ab??' yields [Complete(ab), Complete(a)], but it should yield
-            // [Complete(a), Complete(ab)] because of the non-greediness.
-            greedy: true,
-            hir: Box::new(e.clone()),
-        }),
-        lits,
-    );
-}
-
-fn repeat_zero_or_more_literals<F: FnMut(&Hir, &mut Literals)>(
-    e: &Hir,
-    lits: &mut Literals,
-    mut f: F,
-) {
-    let (mut lits2, mut lits3) = (lits.clone(), lits.to_empty());
-    lits3.set_limit_size(lits.limit_size() / 2);
-    f(e, &mut lits3);
-
-    if lits3.is_empty() || !lits2.cross_product(&lits3) {
-        lits.cut();
-        return;
-    }
-    lits2.cut();
-    lits2.add(Literal::empty());
-    if !lits.union(lits2) {
-        lits.cut();
-    }
-}
-
-fn repeat_one_or_more_literals<F: FnMut(&Hir, &mut Literals)>(
-    e: &Hir,
-    lits: &mut Literals,
-    mut f: F,
-) {
-    f(e, lits);
-    lits.cut();
-}
-
-fn repeat_range_literals<F: FnMut(&Hir, &mut Literals)>(
-    e: &Hir,
-    min: u32,
-    max: Option<u32>,
-    greedy: bool,
-    lits: &mut Literals,
-    mut f: F,
-) {
-    if min == 0 {
-        // This is a bit conservative. If `max` is set, then we could
-        // treat this as a finite set of alternations. For now, we
-        // just treat it as `e*`.
-        f(
-            &Hir::repetition(hir::Repetition {
-                kind: hir::RepetitionKind::ZeroOrMore,
-                greedy,
-                hir: Box::new(e.clone()),
-            }),
-            lits,
-        );
-    } else {
-        if min > 0 {
-            let n = cmp::min(lits.limit_size, min as usize);
-            let es = iter::repeat(e.clone()).take(n).collect();
-            f(&Hir::concat(es), lits);
-            if n < min as usize || lits.contains_empty() {
-                lits.cut();
-            }
-        }
-        if max.map_or(true, |max| min < max) {
-            lits.cut();
-        }
-    }
-}
-
-fn alternate_literals<F: FnMut(&Hir, &mut Literals)>(
-    es: &[Hir],
-    lits: &mut Literals,
-    mut f: F,
-) {
-    let mut lits2 = lits.to_empty();
-    for e in es {
-        let mut lits3 = lits.to_empty();
-        lits3.set_limit_size(lits.limit_size() / 5);
-        f(e, &mut lits3);
-        if lits3.is_empty() || !lits2.union(lits3) {
-            // If we couldn't find suffixes for *any* of the
-            // alternates, then the entire alternation has to be thrown
-            // away and any existing members must be frozen. Similarly,
-            // if the union couldn't complete, stop and freeze.
-            lits.cut();
-            return;
-        }
-    }
-    if !lits.cross_product(&lits2) {
-        lits.cut();
-    }
-}
-
-impl fmt::Debug for Literals {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_struct("Literals")
-            .field("lits", &self.lits)
-            .field("limit_size", &self.limit_size)
-            .field("limit_class", &self.limit_class)
-            .finish()
-    }
-}
-
-impl Literal {
-    /// Returns a new complete literal with the bytes given.
-    pub fn new(bytes: Vec<u8>) -> Literal {
-        Literal { v: bytes, cut: false }
-    }
-
-    /// Returns a new complete empty literal.
-    pub fn empty() -> Literal {
-        Literal { v: vec![], cut: false }
-    }
-
-    /// Returns true if this literal was "cut."
-    pub fn is_cut(&self) -> bool {
-        self.cut
-    }
-
-    /// Cuts this literal.
-    pub fn cut(&mut self) {
-        self.cut = true;
-    }
-}
-
-impl PartialEq for Literal {
-    fn eq(&self, other: &Literal) -> bool {
-        self.v == other.v
-    }
-}
-
-impl PartialOrd for Literal {
-    fn partial_cmp(&self, other: &Literal) -> Option<cmp::Ordering> {
-        self.v.partial_cmp(&other.v)
-    }
-}
-
-impl fmt::Debug for Literal {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        if self.is_cut() {
-            write!(f, "Cut({})", escape_unicode(&self.v))
-        } else {
-            write!(f, "Complete({})", escape_unicode(&self.v))
-        }
-    }
-}
-
-impl AsRef<[u8]> for Literal {
-    fn as_ref(&self) -> &[u8] {
-        &self.v
-    }
-}
-
-impl ops::Deref for Literal {
-    type Target = Vec<u8>;
-    fn deref(&self) -> &Vec<u8> {
-        &self.v
-    }
-}
-
-impl ops::DerefMut for Literal {
-    fn deref_mut(&mut self) -> &mut Vec<u8> {
-        &mut self.v
-    }
-}
-
-fn position(needle: &[u8], mut haystack: &[u8]) -> Option<usize> {
-    let mut i = 0;
-    while haystack.len() >= needle.len() {
-        if needle == &haystack[..needle.len()] {
-            return Some(i);
-        }
-        i += 1;
-        haystack = &haystack[1..];
-    }
-    None
-}
-
-fn escape_unicode(bytes: &[u8]) -> String {
-    let show = match ::std::str::from_utf8(bytes) {
-        Ok(v) => v.to_string(),
-        Err(_) => escape_bytes(bytes),
-    };
-    let mut space_escaped = String::new();
-    for c in show.chars() {
-        if c.is_whitespace() {
-            let escaped = if c as u32 <= 0x7F {
-                escape_byte(c as u8)
-            } else if c as u32 <= 0xFFFF {
-                format!(r"\u{{{:04x}}}", c as u32)
-            } else {
-                format!(r"\U{{{:08x}}}", c as u32)
-            };
-            space_escaped.push_str(&escaped);
-        } else {
-            space_escaped.push(c);
-        }
-    }
-    space_escaped
-}
-
-fn escape_bytes(bytes: &[u8]) -> String {
-    let mut s = String::new();
-    for &b in bytes {
-        s.push_str(&escape_byte(b));
-    }
-    s
-}
-
-fn escape_byte(byte: u8) -> String {
-    use std::ascii::escape_default;
-
-    let escaped: Vec<u8> = escape_default(byte).collect();
-    String::from_utf8_lossy(&escaped).into_owned()
-}
-
-fn cls_char_count(cls: &hir::ClassUnicode) -> usize {
-    cls.iter().map(|&r| 1 + (r.end as u32) - (r.start as u32)).sum::<u32>()
-        as usize
-}
-
-fn cls_byte_count(cls: &hir::ClassBytes) -> usize {
-    cls.iter().map(|&r| 1 + (r.end as u32) - (r.start as u32)).sum::<u32>()
-        as usize
-}
-
-#[cfg(test)]
-mod tests {
-    use std::fmt;
-
-    use super::{escape_bytes, Literal, Literals};
-    use crate::hir::Hir;
-    use crate::ParserBuilder;
-
-    // To make test failures easier to read.
-    #[derive(Debug, Eq, PartialEq)]
-    struct Bytes(Vec<ULiteral>);
-    #[derive(Debug, Eq, PartialEq)]
-    struct Unicode(Vec<ULiteral>);
-
-    fn escape_lits(blits: &[Literal]) -> Vec<ULiteral> {
-        let mut ulits = vec![];
-        for blit in blits {
-            ulits
-                .push(ULiteral { v: escape_bytes(&blit), cut: blit.is_cut() });
-        }
-        ulits
-    }
-
-    fn create_lits<I: IntoIterator<Item = Literal>>(it: I) -> Literals {
-        Literals {
-            lits: it.into_iter().collect(),
-            limit_size: 0,
-            limit_class: 0,
-        }
-    }
-
-    // Needs to be pub for 1.3?
-    #[derive(Clone, Eq, PartialEq)]
-    pub struct ULiteral {
-        v: String,
-        cut: bool,
-    }
-
-    impl ULiteral {
-        fn is_cut(&self) -> bool {
-            self.cut
-        }
-    }
-
-    impl fmt::Debug for ULiteral {
-        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-            if self.is_cut() {
-                write!(f, "Cut({})", self.v)
-            } else {
-                write!(f, "Complete({})", self.v)
-            }
-        }
-    }
-
-    impl PartialEq<Literal> for ULiteral {
-        fn eq(&self, other: &Literal) -> bool {
-            self.v.as_bytes() == &*other.v && self.is_cut() == other.is_cut()
-        }
-    }
-
-    impl PartialEq<ULiteral> for Literal {
-        fn eq(&self, other: &ULiteral) -> bool {
-            &*self.v == other.v.as_bytes() && self.is_cut() == other.is_cut()
-        }
-    }
-
-    #[allow(non_snake_case)]
-    fn C(s: &'static str) -> ULiteral {
-        ULiteral { v: s.to_owned(), cut: true }
-    }
-    #[allow(non_snake_case)]
-    fn M(s: &'static str) -> ULiteral {
-        ULiteral { v: s.to_owned(), cut: false }
-    }
-
-    fn prefixes(lits: &mut Literals, expr: &Hir) {
-        lits.union_prefixes(expr);
-    }
-
-    fn suffixes(lits: &mut Literals, expr: &Hir) {
-        lits.union_suffixes(expr);
-    }
-
-    macro_rules! assert_lit_eq {
-        ($which:ident, $got_lits:expr, $($expected_lit:expr),*) => {{
-            let expected: Vec<ULiteral> = vec![$($expected_lit),*];
-            let lits = $got_lits;
-            assert_eq!(
-                $which(expected.clone()),
-                $which(escape_lits(lits.literals())));
-            assert_eq!(
-                !expected.is_empty() && expected.iter().all(|l| !l.is_cut()),
-                lits.all_complete());
-            assert_eq!(
-                expected.iter().any(|l| !l.is_cut()),
-                lits.any_complete());
-        }};
-    }
-
-    macro_rules! test_lit {
-        ($name:ident, $which:ident, $re:expr) => {
-            test_lit!($name, $which, $re,);
-        };
-        ($name:ident, $which:ident, $re:expr, $($lit:expr),*) => {
-            #[test]
-            fn $name() {
-                let expr = ParserBuilder::new()
-                    .build()
-                    .parse($re)
-                    .unwrap();
-                let lits = Literals::$which(&expr);
-                assert_lit_eq!(Unicode, lits, $($lit),*);
-
-                let expr = ParserBuilder::new()
-                    .allow_invalid_utf8(true)
-                    .unicode(false)
-                    .build()
-                    .parse($re)
-                    .unwrap();
-                let lits = Literals::$which(&expr);
-                assert_lit_eq!(Bytes, lits, $($lit),*);
-            }
-        };
-    }
-
-    // ************************************************************************
-    // Tests for prefix literal extraction.
-    // ************************************************************************
-
-    // Elementary tests.
-    test_lit!(pfx_one_lit1, prefixes, "a", M("a"));
-    test_lit!(pfx_one_lit2, prefixes, "abc", M("abc"));
-    test_lit!(pfx_one_lit3, prefixes, "(?u)☃", M("\\xe2\\x98\\x83"));
-    #[cfg(feature = "unicode-case")]
-    test_lit!(pfx_one_lit4, prefixes, "(?ui)☃", M("\\xe2\\x98\\x83"));
-    test_lit!(pfx_class1, prefixes, "[1-4]", M("1"), M("2"), M("3"), M("4"));
-    test_lit!(
-        pfx_class2,
-        prefixes,
-        "(?u)[☃Ⅰ]",
-        M("\\xe2\\x85\\xa0"),
-        M("\\xe2\\x98\\x83")
-    );
-    #[cfg(feature = "unicode-case")]
-    test_lit!(
-        pfx_class3,
-        prefixes,
-        "(?ui)[☃Ⅰ]",
-        M("\\xe2\\x85\\xa0"),
-        M("\\xe2\\x85\\xb0"),
-        M("\\xe2\\x98\\x83")
-    );
-    test_lit!(pfx_one_lit_casei1, prefixes, "(?i-u)a", M("A"), M("a"));
-    test_lit!(
-        pfx_one_lit_casei2,
-        prefixes,
-        "(?i-u)abc",
-        M("ABC"),
-        M("aBC"),
-        M("AbC"),
-        M("abC"),
-        M("ABc"),
-        M("aBc"),
-        M("Abc"),
-        M("abc")
-    );
-    test_lit!(pfx_group1, prefixes, "(a)", M("a"));
-    test_lit!(pfx_rep_zero_or_one1, prefixes, "a?");
-    test_lit!(pfx_rep_zero_or_one2, prefixes, "(?:abc)?");
-    test_lit!(pfx_rep_zero_or_one_cat1, prefixes, "ab?", C("ab"), M("a"));
-    // FIXME: This should return [M("a"), M("ab")] because of the non-greedy
-    // repetition. As a work-around, we rewrite ab?? as ab*?, and thus we get
-    // a cut literal.
-    test_lit!(pfx_rep_zero_or_one_cat2, prefixes, "ab??", C("ab"), M("a"));
-    test_lit!(pfx_rep_zero_or_more1, prefixes, "a*");
-    test_lit!(pfx_rep_zero_or_more2, prefixes, "(?:abc)*");
-    test_lit!(pfx_rep_one_or_more1, prefixes, "a+", C("a"));
-    test_lit!(pfx_rep_one_or_more2, prefixes, "(?:abc)+", C("abc"));
-    test_lit!(pfx_rep_nested_one_or_more, prefixes, "(?:a+)+", C("a"));
-    test_lit!(pfx_rep_range1, prefixes, "a{0}");
-    test_lit!(pfx_rep_range2, prefixes, "a{0,}");
-    test_lit!(pfx_rep_range3, prefixes, "a{0,1}");
-    test_lit!(pfx_rep_range4, prefixes, "a{1}", M("a"));
-    test_lit!(pfx_rep_range5, prefixes, "a{2}", M("aa"));
-    test_lit!(pfx_rep_range6, prefixes, "a{1,2}", C("a"));
-    test_lit!(pfx_rep_range7, prefixes, "a{2,3}", C("aa"));
-
-    // Test regexes with concatenations.
-    test_lit!(pfx_cat1, prefixes, "(?:a)(?:b)", M("ab"));
-    test_lit!(pfx_cat2, prefixes, "[ab]z", M("az"), M("bz"));
-    test_lit!(
-        pfx_cat3,
-        prefixes,
-        "(?i-u)[ab]z",
-        M("AZ"),
-        M("BZ"),
-        M("aZ"),
-        M("bZ"),
-        M("Az"),
-        M("Bz"),
-        M("az"),
-        M("bz")
-    );
-    test_lit!(
-        pfx_cat4,
-        prefixes,
-        "[ab][yz]",
-        M("ay"),
-        M("by"),
-        M("az"),
-        M("bz")
-    );
-    test_lit!(pfx_cat5, prefixes, "a*b", C("a"), M("b"));
-    test_lit!(pfx_cat6, prefixes, "a*b*c", C("a"), C("b"), M("c"));
-    test_lit!(pfx_cat7, prefixes, "a*b*c+", C("a"), C("b"), C("c"));
-    test_lit!(pfx_cat8, prefixes, "a*b+c", C("a"), C("b"));
-    test_lit!(pfx_cat9, prefixes, "a*b+c*", C("a"), C("b"));
-    test_lit!(pfx_cat10, prefixes, "ab*", C("ab"), M("a"));
-    test_lit!(pfx_cat11, prefixes, "ab*c", C("ab"), M("ac"));
-    test_lit!(pfx_cat12, prefixes, "ab+", C("ab"));
-    test_lit!(pfx_cat13, prefixes, "ab+c", C("ab"));
-    test_lit!(pfx_cat14, prefixes, "a^", C("a"));
-    test_lit!(pfx_cat15, prefixes, "$a");
-    test_lit!(pfx_cat16, prefixes, r"ab*c", C("ab"), M("ac"));
-    test_lit!(pfx_cat17, prefixes, r"ab+c", C("ab"));
-    test_lit!(pfx_cat18, prefixes, r"z*azb", C("z"), M("azb"));
-    test_lit!(pfx_cat19, prefixes, "a.z", C("a"));
-
-    // Test regexes with alternations.
-    test_lit!(pfx_alt1, prefixes, "a|b", M("a"), M("b"));
-    test_lit!(pfx_alt2, prefixes, "[1-3]|b", M("1"), M("2"), M("3"), M("b"));
-    test_lit!(pfx_alt3, prefixes, "y(?:a|b)z", M("yaz"), M("ybz"));
-    test_lit!(pfx_alt4, prefixes, "a|b*");
-    test_lit!(pfx_alt5, prefixes, "a|b+", M("a"), C("b"));
-    test_lit!(pfx_alt6, prefixes, "a|(?:b|c*)");
-    test_lit!(
-        pfx_alt7,
-        prefixes,
-        "(a|b)*c|(a|ab)*c",
-        C("a"),
-        C("b"),
-        M("c"),
-        C("a"),
-        C("ab"),
-        M("c")
-    );
-    test_lit!(pfx_alt8, prefixes, "a*b|c", C("a"), M("b"), M("c"));
-
-    // Test regexes with empty assertions.
-    test_lit!(pfx_empty1, prefixes, "^a", M("a"));
-    test_lit!(pfx_empty2, prefixes, "a${2}", C("a"));
-    test_lit!(pfx_empty3, prefixes, "^abc", M("abc"));
-    test_lit!(pfx_empty4, prefixes, "(?:^abc)|(?:^z)", M("abc"), M("z"));
-
-    // Make sure some curious regexes have no prefixes.
-    test_lit!(pfx_nothing1, prefixes, ".");
-    test_lit!(pfx_nothing2, prefixes, "(?s).");
-    test_lit!(pfx_nothing3, prefixes, "^");
-    test_lit!(pfx_nothing4, prefixes, "$");
-    test_lit!(pfx_nothing6, prefixes, "(?m)$");
-    test_lit!(pfx_nothing7, prefixes, r"\b");
-    test_lit!(pfx_nothing8, prefixes, r"\B");
-
-    // Test a few regexes that defeat any prefix literal detection.
-    test_lit!(pfx_defeated1, prefixes, ".a");
-    test_lit!(pfx_defeated2, prefixes, "(?s).a");
-    test_lit!(pfx_defeated3, prefixes, "a*b*c*");
-    test_lit!(pfx_defeated4, prefixes, "a|.");
-    test_lit!(pfx_defeated5, prefixes, ".|a");
-    test_lit!(pfx_defeated6, prefixes, "a|^");
-    test_lit!(pfx_defeated7, prefixes, ".(?:a(?:b)(?:c))");
-    test_lit!(pfx_defeated8, prefixes, "$a");
-    test_lit!(pfx_defeated9, prefixes, "(?m)$a");
-    test_lit!(pfx_defeated10, prefixes, r"\ba");
-    test_lit!(pfx_defeated11, prefixes, r"\Ba");
-    test_lit!(pfx_defeated12, prefixes, "^*a");
-    test_lit!(pfx_defeated13, prefixes, "^+a");
-
-    test_lit!(
-        pfx_crazy1,
-        prefixes,
-        r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]",
-        C("Mo\\'"),
-        C("Mu\\'"),
-        C("Moam"),
-        C("Muam")
-    );
-
-    // ************************************************************************
-    // Tests for quiting prefix literal search.
-    // ************************************************************************
-
-    macro_rules! test_exhausted {
-        ($name:ident, $which:ident, $re:expr) => {
-            test_exhausted!($name, $which, $re,);
-        };
-        ($name:ident, $which:ident, $re:expr, $($lit:expr),*) => {
-            #[test]
-            fn $name() {
-                let expr = ParserBuilder::new()
-                    .build()
-                    .parse($re)
-                    .unwrap();
-                let mut lits = Literals::empty();
-                lits.set_limit_size(20).set_limit_class(10);
-                $which(&mut lits, &expr);
-                assert_lit_eq!(Unicode, lits, $($lit),*);
-
-                let expr = ParserBuilder::new()
-                    .allow_invalid_utf8(true)
-                    .unicode(false)
-                    .build()
-                    .parse($re)
-                    .unwrap();
-                let mut lits = Literals::empty();
-                lits.set_limit_size(20).set_limit_class(10);
-                $which(&mut lits, &expr);
-                assert_lit_eq!(Bytes, lits, $($lit),*);
-            }
-        };
-    }
-
-    // These test use a much lower limit than the default so that we can
-    // write test cases of reasonable size.
-    test_exhausted!(pfx_exhausted1, prefixes, "[a-z]");
-    test_exhausted!(pfx_exhausted2, prefixes, "[a-z]*A");
-    test_exhausted!(pfx_exhausted3, prefixes, "A[a-z]Z", C("A"));
-    test_exhausted!(
-        pfx_exhausted4,
-        prefixes,
-        "(?i-u)foobar",
-        C("FO"),
-        C("fO"),
-        C("Fo"),
-        C("fo")
-    );
-    test_exhausted!(
-        pfx_exhausted5,
-        prefixes,
-        "(?:ab){100}",
-        C("abababababababababab")
-    );
-    test_exhausted!(
-        pfx_exhausted6,
-        prefixes,
-        "(?:(?:ab){100})*cd",
-        C("ababababab"),
-        M("cd")
-    );
-    test_exhausted!(
-        pfx_exhausted7,
-        prefixes,
-        "z(?:(?:ab){100})*cd",
-        C("zababababab"),
-        M("zcd")
-    );
-    test_exhausted!(
-        pfx_exhausted8,
-        prefixes,
-        "aaaaaaaaaaaaaaaaaaaaz",
-        C("aaaaaaaaaaaaaaaaaaaa")
-    );
-
-    // ************************************************************************
-    // Tests for suffix literal extraction.
-    // ************************************************************************
-
-    // Elementary tests.
-    test_lit!(sfx_one_lit1, suffixes, "a", M("a"));
-    test_lit!(sfx_one_lit2, suffixes, "abc", M("abc"));
-    test_lit!(sfx_one_lit3, suffixes, "(?u)☃", M("\\xe2\\x98\\x83"));
-    #[cfg(feature = "unicode-case")]
-    test_lit!(sfx_one_lit4, suffixes, "(?ui)☃", M("\\xe2\\x98\\x83"));
-    test_lit!(sfx_class1, suffixes, "[1-4]", M("1"), M("2"), M("3"), M("4"));
-    test_lit!(
-        sfx_class2,
-        suffixes,
-        "(?u)[☃Ⅰ]",
-        M("\\xe2\\x85\\xa0"),
-        M("\\xe2\\x98\\x83")
-    );
-    #[cfg(feature = "unicode-case")]
-    test_lit!(
-        sfx_class3,
-        suffixes,
-        "(?ui)[☃Ⅰ]",
-        M("\\xe2\\x85\\xa0"),
-        M("\\xe2\\x85\\xb0"),
-        M("\\xe2\\x98\\x83")
-    );
-    test_lit!(sfx_one_lit_casei1, suffixes, "(?i-u)a", M("A"), M("a"));
-    test_lit!(
-        sfx_one_lit_casei2,
-        suffixes,
-        "(?i-u)abc",
-        M("ABC"),
-        M("ABc"),
-        M("AbC"),
-        M("Abc"),
-        M("aBC"),
-        M("aBc"),
-        M("abC"),
-        M("abc")
-    );
-    test_lit!(sfx_group1, suffixes, "(a)", M("a"));
-    test_lit!(sfx_rep_zero_or_one1, suffixes, "a?");
-    test_lit!(sfx_rep_zero_or_one2, suffixes, "(?:abc)?");
-    test_lit!(sfx_rep_zero_or_more1, suffixes, "a*");
-    test_lit!(sfx_rep_zero_or_more2, suffixes, "(?:abc)*");
-    test_lit!(sfx_rep_one_or_more1, suffixes, "a+", C("a"));
-    test_lit!(sfx_rep_one_or_more2, suffixes, "(?:abc)+", C("abc"));
-    test_lit!(sfx_rep_nested_one_or_more, suffixes, "(?:a+)+", C("a"));
-    test_lit!(sfx_rep_range1, suffixes, "a{0}");
-    test_lit!(sfx_rep_range2, suffixes, "a{0,}");
-    test_lit!(sfx_rep_range3, suffixes, "a{0,1}");
-    test_lit!(sfx_rep_range4, suffixes, "a{1}", M("a"));
-    test_lit!(sfx_rep_range5, suffixes, "a{2}", M("aa"));
-    test_lit!(sfx_rep_range6, suffixes, "a{1,2}", C("a"));
-    test_lit!(sfx_rep_range7, suffixes, "a{2,3}", C("aa"));
-
-    // Test regexes with concatenations.
-    test_lit!(sfx_cat1, suffixes, "(?:a)(?:b)", M("ab"));
-    test_lit!(sfx_cat2, suffixes, "[ab]z", M("az"), M("bz"));
-    test_lit!(
-        sfx_cat3,
-        suffixes,
-        "(?i-u)[ab]z",
-        M("AZ"),
-        M("Az"),
-        M("BZ"),
-        M("Bz"),
-        M("aZ"),
-        M("az"),
-        M("bZ"),
-        M("bz")
-    );
-    test_lit!(
-        sfx_cat4,
-        suffixes,
-        "[ab][yz]",
-        M("ay"),
-        M("az"),
-        M("by"),
-        M("bz")
-    );
-    test_lit!(sfx_cat5, suffixes, "a*b", C("ab"), M("b"));
-    test_lit!(sfx_cat6, suffixes, "a*b*c", C("bc"), C("ac"), M("c"));
-    test_lit!(sfx_cat7, suffixes, "a*b*c+", C("c"));
-    test_lit!(sfx_cat8, suffixes, "a*b+c", C("bc"));
-    test_lit!(sfx_cat9, suffixes, "a*b+c*", C("c"), C("b"));
-    test_lit!(sfx_cat10, suffixes, "ab*", C("b"), M("a"));
-    test_lit!(sfx_cat11, suffixes, "ab*c", C("bc"), M("ac"));
-    test_lit!(sfx_cat12, suffixes, "ab+", C("b"));
-    test_lit!(sfx_cat13, suffixes, "ab+c", C("bc"));
-    test_lit!(sfx_cat14, suffixes, "a^");
-    test_lit!(sfx_cat15, suffixes, "$a", C("a"));
-    test_lit!(sfx_cat16, suffixes, r"ab*c", C("bc"), M("ac"));
-    test_lit!(sfx_cat17, suffixes, r"ab+c", C("bc"));
-    test_lit!(sfx_cat18, suffixes, r"z*azb", C("zazb"), M("azb"));
-    test_lit!(sfx_cat19, suffixes, "a.z", C("z"));
-
-    // Test regexes with alternations.
-    test_lit!(sfx_alt1, suffixes, "a|b", M("a"), M("b"));
-    test_lit!(sfx_alt2, suffixes, "[1-3]|b", M("1"), M("2"), M("3"), M("b"));
-    test_lit!(sfx_alt3, suffixes, "y(?:a|b)z", M("yaz"), M("ybz"));
-    test_lit!(sfx_alt4, suffixes, "a|b*");
-    test_lit!(sfx_alt5, suffixes, "a|b+", M("a"), C("b"));
-    test_lit!(sfx_alt6, suffixes, "a|(?:b|c*)");
-    test_lit!(
-        sfx_alt7,
-        suffixes,
-        "(a|b)*c|(a|ab)*c",
-        C("ac"),
-        C("bc"),
-        M("c"),
-        C("ac"),
-        C("abc"),
-        M("c")
-    );
-    test_lit!(sfx_alt8, suffixes, "a*b|c", C("ab"), M("b"), M("c"));
-
-    // Test regexes with empty assertions.
-    test_lit!(sfx_empty1, suffixes, "a$", M("a"));
-    test_lit!(sfx_empty2, suffixes, "${2}a", C("a"));
-
-    // Make sure some curious regexes have no suffixes.
-    test_lit!(sfx_nothing1, suffixes, ".");
-    test_lit!(sfx_nothing2, suffixes, "(?s).");
-    test_lit!(sfx_nothing3, suffixes, "^");
-    test_lit!(sfx_nothing4, suffixes, "$");
-    test_lit!(sfx_nothing6, suffixes, "(?m)$");
-    test_lit!(sfx_nothing7, suffixes, r"\b");
-    test_lit!(sfx_nothing8, suffixes, r"\B");
-
-    // Test a few regexes that defeat any suffix literal detection.
-    test_lit!(sfx_defeated1, suffixes, "a.");
-    test_lit!(sfx_defeated2, suffixes, "(?s)a.");
-    test_lit!(sfx_defeated3, suffixes, "a*b*c*");
-    test_lit!(sfx_defeated4, suffixes, "a|.");
-    test_lit!(sfx_defeated5, suffixes, ".|a");
-    test_lit!(sfx_defeated6, suffixes, "a|^");
-    test_lit!(sfx_defeated7, suffixes, "(?:a(?:b)(?:c)).");
-    test_lit!(sfx_defeated8, suffixes, "a^");
-    test_lit!(sfx_defeated9, suffixes, "(?m)a$");
-    test_lit!(sfx_defeated10, suffixes, r"a\b");
-    test_lit!(sfx_defeated11, suffixes, r"a\B");
-    test_lit!(sfx_defeated12, suffixes, "a^*");
-    test_lit!(sfx_defeated13, suffixes, "a^+");
-
-    // These test use a much lower limit than the default so that we can
-    // write test cases of reasonable size.
-    test_exhausted!(sfx_exhausted1, suffixes, "[a-z]");
-    test_exhausted!(sfx_exhausted2, suffixes, "A[a-z]*");
-    test_exhausted!(sfx_exhausted3, suffixes, "A[a-z]Z", C("Z"));
-    test_exhausted!(
-        sfx_exhausted4,
-        suffixes,
-        "(?i-u)foobar",
-        C("AR"),
-        C("Ar"),
-        C("aR"),
-        C("ar")
-    );
-    test_exhausted!(
-        sfx_exhausted5,
-        suffixes,
-        "(?:ab){100}",
-        C("abababababababababab")
-    );
-    test_exhausted!(
-        sfx_exhausted6,
-        suffixes,
-        "cd(?:(?:ab){100})*",
-        C("ababababab"),
-        M("cd")
-    );
-    test_exhausted!(
-        sfx_exhausted7,
-        suffixes,
-        "cd(?:(?:ab){100})*z",
-        C("abababababz"),
-        M("cdz")
-    );
-    test_exhausted!(
-        sfx_exhausted8,
-        suffixes,
-        "zaaaaaaaaaaaaaaaaaaaa",
-        C("aaaaaaaaaaaaaaaaaaaa")
-    );
-
-    // ************************************************************************
-    // Tests for generating unambiguous literal sets.
-    // ************************************************************************
-
-    macro_rules! test_unamb {
-        ($name:ident, $given:expr, $expected:expr) => {
-            #[test]
-            fn $name() {
-                let given: Vec<Literal> = $given
-                    .into_iter()
-                    .map(|ul| {
-                        let cut = ul.is_cut();
-                        Literal { v: ul.v.into_bytes(), cut: cut }
-                    })
-                    .collect();
-                let lits = create_lits(given);
-                let got = lits.unambiguous_prefixes();
-                assert_eq!($expected, escape_lits(got.literals()));
-            }
-        };
-    }
-
-    test_unamb!(unambiguous1, vec![M("z"), M("azb")], vec![C("a"), C("z")]);
-    test_unamb!(
-        unambiguous2,
-        vec![M("zaaaaaa"), M("aa")],
-        vec![C("aa"), C("z")]
-    );
-    test_unamb!(
-        unambiguous3,
-        vec![M("Sherlock"), M("Watson")],
-        vec![M("Sherlock"), M("Watson")]
-    );
-    test_unamb!(unambiguous4, vec![M("abc"), M("bc")], vec![C("a"), C("bc")]);
-    test_unamb!(unambiguous5, vec![M("bc"), M("abc")], vec![C("a"), C("bc")]);
-    test_unamb!(unambiguous6, vec![M("a"), M("aa")], vec![C("a")]);
-    test_unamb!(unambiguous7, vec![M("aa"), M("a")], vec![C("a")]);
-    test_unamb!(unambiguous8, vec![M("ab"), M("a")], vec![C("a")]);
-    test_unamb!(
-        unambiguous9,
-        vec![M("ac"), M("bc"), M("c"), M("ac"), M("abc"), M("c")],
-        vec![C("a"), C("b"), C("c")]
-    );
-    test_unamb!(
-        unambiguous10,
-        vec![M("Mo'"), M("Mu'"), M("Mo"), M("Mu")],
-        vec![C("Mo"), C("Mu")]
-    );
-    test_unamb!(
-        unambiguous11,
-        vec![M("zazb"), M("azb")],
-        vec![C("a"), C("z")]
-    );
-    test_unamb!(unambiguous12, vec![M("foo"), C("foo")], vec![C("foo")]);
-    test_unamb!(
-        unambiguous13,
-        vec![M("ABCX"), M("CDAX"), M("BCX")],
-        vec![C("A"), C("BCX"), C("CD")]
-    );
-    test_unamb!(
-        unambiguous14,
-        vec![M("IMGX"), M("MVIX"), M("MGX"), M("DSX")],
-        vec![M("DSX"), C("I"), C("MGX"), C("MV")]
-    );
-    test_unamb!(
-        unambiguous15,
-        vec![M("IMG_"), M("MG_"), M("CIMG")],
-        vec![C("C"), C("I"), C("MG_")]
-    );
-
-    // ************************************************************************
-    // Tests for suffix trimming.
-    // ************************************************************************
-    macro_rules! test_trim {
-        ($name:ident, $trim:expr, $given:expr, $expected:expr) => {
-            #[test]
-            fn $name() {
-                let given: Vec<Literal> = $given
-                    .into_iter()
-                    .map(|ul| {
-                        let cut = ul.is_cut();
-                        Literal { v: ul.v.into_bytes(), cut: cut }
-                    })
-                    .collect();
-                let lits = create_lits(given);
-                let got = lits.trim_suffix($trim).unwrap();
-                assert_eq!($expected, escape_lits(got.literals()));
-            }
-        };
-    }
-
-    test_trim!(trim1, 1, vec![M("ab"), M("yz")], vec![C("a"), C("y")]);
-    test_trim!(trim2, 1, vec![M("abc"), M("abd")], vec![C("ab")]);
-    test_trim!(trim3, 2, vec![M("abc"), M("abd")], vec![C("a")]);
-    test_trim!(trim4, 2, vec![M("abc"), M("ghij")], vec![C("a"), C("gh")]);
-
-    // ************************************************************************
-    // Tests for longest common prefix.
-    // ************************************************************************
-
-    macro_rules! test_lcp {
-        ($name:ident, $given:expr, $expected:expr) => {
-            #[test]
-            fn $name() {
-                let given: Vec<Literal> = $given
-                    .into_iter()
-                    .map(|s: &str| Literal {
-                        v: s.to_owned().into_bytes(),
-                        cut: false,
-                    })
-                    .collect();
-                let lits = create_lits(given);
-                let got = lits.longest_common_prefix();
-                assert_eq!($expected, escape_bytes(got));
-            }
-        };
-    }
-
-    test_lcp!(lcp1, vec!["a"], "a");
-    test_lcp!(lcp2, vec![], "");
-    test_lcp!(lcp3, vec!["a", "b"], "");
-    test_lcp!(lcp4, vec!["ab", "ab"], "ab");
-    test_lcp!(lcp5, vec!["ab", "a"], "a");
-    test_lcp!(lcp6, vec!["a", "ab"], "a");
-    test_lcp!(lcp7, vec!["ab", "b"], "");
-    test_lcp!(lcp8, vec!["b", "ab"], "");
-    test_lcp!(lcp9, vec!["foobar", "foobaz"], "fooba");
-    test_lcp!(lcp10, vec!["foobar", "foobaz", "a"], "");
-    test_lcp!(lcp11, vec!["a", "foobar", "foobaz"], "");
-    test_lcp!(lcp12, vec!["foo", "flub", "flab", "floo"], "f");
-
-    // ************************************************************************
-    // Tests for longest common suffix.
-    // ************************************************************************
-
-    macro_rules! test_lcs {
-        ($name:ident, $given:expr, $expected:expr) => {
-            #[test]
-            fn $name() {
-                let given: Vec<Literal> = $given
-                    .into_iter()
-                    .map(|s: &str| Literal {
-                        v: s.to_owned().into_bytes(),
-                        cut: false,
-                    })
-                    .collect();
-                let lits = create_lits(given);
-                let got = lits.longest_common_suffix();
-                assert_eq!($expected, escape_bytes(got));
-            }
-        };
-    }
-
-    test_lcs!(lcs1, vec!["a"], "a");
-    test_lcs!(lcs2, vec![], "");
-    test_lcs!(lcs3, vec!["a", "b"], "");
-    test_lcs!(lcs4, vec!["ab", "ab"], "ab");
-    test_lcs!(lcs5, vec!["ab", "a"], "");
-    test_lcs!(lcs6, vec!["a", "ab"], "");
-    test_lcs!(lcs7, vec!["ab", "b"], "b");
-    test_lcs!(lcs8, vec!["b", "ab"], "b");
-    test_lcs!(lcs9, vec!["barfoo", "bazfoo"], "foo");
-    test_lcs!(lcs10, vec!["barfoo", "bazfoo", "a"], "");
-    test_lcs!(lcs11, vec!["a", "barfoo", "bazfoo"], "");
-    test_lcs!(lcs12, vec!["flub", "bub", "boob", "dub"], "b");
-}
diff --git a/regex-syntax/src/hir/mod.rs b/regex-syntax/src/hir/mod.rs
index 156bcc2844..a198083173 100644
--- a/regex-syntax/src/hir/mod.rs
+++ b/regex-syntax/src/hir/mod.rs
@@ -1,19 +1,42 @@
 /*!
-Defines a high-level intermediate representation for regular expressions.
+Defines a high-level intermediate (HIR) representation for regular expressions.
+
+The HIR is represented by the [`Hir`] type, and it principally constructed via
+[translation](translate) from an [`Ast`](crate::ast::Ast). Alternatively, users
+may use the smart constructors defined on `Hir` to build their own by hand. The
+smart constructors simultaneously simplify and "optimize" the HIR, and are also
+the same routines used by translation.
+
+Most regex engines only have an HIR like this, and usually construct it
+directly from the concrete syntax. This crate however first parses the
+concrete syntax into an `Ast`, and only then creates the HIR from the `Ast`,
+as mentioned above. It's done this way to facilitate better error reporting,
+and to have a structured representation of a regex that faithfully represents
+its concrete syntax. Namely, while an `Hir` value can be converted back to an
+equivalent regex pattern string, it is unlikely to look like the original due
+to its simplified structure.
 */
-use std::char;
-use std::cmp;
-use std::error;
-use std::fmt;
-use std::result;
-use std::u8;
 
-use crate::ast::Span;
-use crate::hir::interval::{Interval, IntervalSet, IntervalSetIter};
-use crate::unicode;
+use core::{char, cmp};
 
-pub use crate::hir::visitor::{visit, Visitor};
-pub use crate::unicode::CaseFoldError;
+use alloc::{
+    boxed::Box,
+    format,
+    string::{String, ToString},
+    vec,
+    vec::Vec,
+};
+
+use crate::{
+    ast::Span,
+    hir::interval::{Interval, IntervalSet, IntervalSetIter},
+    unicode,
+};
+
+pub use crate::{
+    hir::visitor::{visit, Visitor},
+    unicode::CaseFoldError,
+};
 
 mod interval;
 pub mod literal;
@@ -53,13 +76,17 @@ impl Error {
 }
 
 /// The type of an error that occurred while building an `Hir`.
+///
+/// This error type is marked as `non_exhaustive`. This means that adding a
+/// new variant is not considered a breaking change.
+#[non_exhaustive]
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub enum ErrorKind {
     /// This error occurs when a Unicode feature is used when Unicode
     /// support is disabled. For example `(?-u:\pL)` would trigger this error.
     UnicodeNotAllowed,
     /// This error occurs when translating a pattern that could match a byte
-    /// sequence that isn't UTF-8 and `allow_invalid_utf8` was disabled.
+    /// sequence that isn't UTF-8 and `utf8` was enabled.
     InvalidUtf8,
     /// This occurs when an unrecognized Unicode property name could not
     /// be found.
@@ -75,27 +102,22 @@ pub enum ErrorKind {
     /// available, and the regular expression required Unicode aware case
     /// insensitivity.
     UnicodeCaseUnavailable,
-    /// This occurs when the translator attempts to construct a character class
-    /// that is empty.
-    ///
-    /// Note that this restriction in the translator may be removed in the
-    /// future.
-    EmptyClassNotAllowed,
-    /// Hints that destructuring should not be exhaustive.
-    ///
-    /// This enum may grow additional variants, so this makes sure clients
-    /// don't count on exhaustive matching. (Otherwise, adding a new variant
-    /// could break existing code.)
-    #[doc(hidden)]
-    __Nonexhaustive,
 }
 
-impl ErrorKind {
-    // TODO: Remove this method entirely on the next breaking semver release.
-    #[allow(deprecated)]
-    fn description(&self) -> &str {
+#[cfg(feature = "std")]
+impl std::error::Error for Error {}
+
+impl core::fmt::Display for Error {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        crate::error::Formatter::from(self).fmt(f)
+    }
+}
+
+impl core::fmt::Display for ErrorKind {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
         use self::ErrorKind::*;
-        match *self {
+
+        let msg = match *self {
             UnicodeNotAllowed => "Unicode not allowed here",
             InvalidUtf8 => "pattern can match invalid UTF-8",
             UnicodePropertyNotFound => "Unicode property not found",
@@ -108,112 +130,82 @@ impl ErrorKind {
                 "Unicode-aware case insensitivity matching is not available \
                  (make sure the unicode-case feature is enabled)"
             }
-            EmptyClassNotAllowed => "empty character classes are not allowed",
-            __Nonexhaustive => unreachable!(),
-        }
-    }
-}
-
-impl error::Error for Error {
-    // TODO: Remove this method entirely on the next breaking semver release.
-    #[allow(deprecated)]
-    fn description(&self) -> &str {
-        self.kind.description()
-    }
-}
-
-impl fmt::Display for Error {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        crate::error::Formatter::from(self).fmt(f)
-    }
-}
-
-impl fmt::Display for ErrorKind {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        // TODO: Remove this on the next breaking semver release.
-        #[allow(deprecated)]
-        f.write_str(self.description())
+        };
+        f.write_str(msg)
     }
 }
 
 /// A high-level intermediate representation (HIR) for a regular expression.
 ///
-/// The HIR of a regular expression represents an intermediate step between its
-/// abstract syntax (a structured description of the concrete syntax) and
-/// compiled byte codes. The purpose of HIR is to make regular expressions
+/// An HIR value is a combination of a [`HirKind`] and a set of [`Properties`].
+/// An `HirKind` indicates what kind of regular expression it is (a literal,
+/// a repetition, a look-around assertion, etc.), where as a `Properties`
+/// describes various facts about the regular expression. For example, whether
+/// it matches UTF-8 or if it matches the empty string.
+///
+/// The HIR of a regular expression represents an intermediate step between
+/// its abstract syntax (a structured description of the concrete syntax) and
+/// an actual regex matcher. The purpose of HIR is to make regular expressions
 /// easier to analyze. In particular, the AST is much more complex than the
 /// HIR. For example, while an AST supports arbitrarily nested character
 /// classes, the HIR will flatten all nested classes into a single set. The HIR
 /// will also "compile away" every flag present in the concrete syntax. For
 /// example, users of HIR expressions never need to worry about case folding;
-/// it is handled automatically by the translator (e.g., by translating `(?i)A`
-/// to `[aA]`).
-///
-/// If the HIR was produced by a translator that disallows invalid UTF-8, then
-/// the HIR is guaranteed to match UTF-8 exclusively.
-///
-/// This type defines its own destructor that uses constant stack space and
-/// heap space proportional to the size of the HIR.
+/// it is handled automatically by the translator (e.g., by translating
+/// `(?i:A)` to `[aA]`).
 ///
 /// The specific type of an HIR expression can be accessed via its `kind`
 /// or `into_kind` methods. This extra level of indirection exists for two
 /// reasons:
 ///
-/// 1. Construction of an HIR expression *must* use the constructor methods
-///    on this `Hir` type instead of building the `HirKind` values directly.
-///    This permits construction to enforce invariants like "concatenations
-///    always consist of two or more sub-expressions."
+/// 1. Construction of an HIR expression *must* use the constructor methods on
+/// this `Hir` type instead of building the `HirKind` values directly. This
+/// permits construction to enforce invariants like "concatenations always
+/// consist of two or more sub-expressions."
 /// 2. Every HIR expression contains attributes that are defined inductively,
-///    and can be computed cheaply during the construction process. For
-///    example, one such attribute is whether the expression must match at the
-///    beginning of the text.
+/// and can be computed cheaply during the construction process. For example,
+/// one such attribute is whether the expression must match at the beginning of
+/// the haystack.
+///
+/// In particular, if you have an `HirKind` value, then there is intentionally
+/// no way to build an `Hir` value from it. You instead need to do case
+/// analysis on the `HirKind` value and build the `Hir` value using its smart
+/// constructors.
+///
+/// # UTF-8
+///
+/// If the HIR was produced by a translator with
+/// [`TranslatorBuilder::utf8`](translate::TranslatorBuilder::utf8) enabled,
+/// then the HIR is guaranteed to match UTF-8 exclusively for all non-empty
+/// matches.
+///
+/// For empty matches, those can occur at any position. It is the
+/// repsonsibility of the regex engine to determine whether empty matches are
+/// permitted between the code units of a single codepoint.
+///
+/// # Stack space
+///
+/// This type defines its own destructor that uses constant stack space and
+/// heap space proportional to the size of the HIR.
 ///
 /// Also, an `Hir`'s `fmt::Display` implementation prints an HIR as a regular
 /// expression pattern string, and uses constant stack space and heap space
-/// proportional to the size of the `Hir`.
-#[derive(Clone, Debug, Eq, PartialEq)]
+/// proportional to the size of the `Hir`. The regex it prints is guaranteed to
+/// be _semantically_ equivalent to the original concrete syntax, but it may
+/// look very different. (And potentially not practically readable by a human.)
+///
+/// An `Hir`'s `fmt::Debug` implementation currently does not use constant
+/// stack space. The implementation will also suppress some details (such as
+/// the `Properties` inlined into every `Hir` value to make it less noisy).
+#[derive(Clone, Eq, PartialEq)]
 pub struct Hir {
     /// The underlying HIR kind.
     kind: HirKind,
     /// Analysis info about this HIR, computed during construction.
-    info: HirInfo,
-}
-
-/// The kind of an arbitrary `Hir` expression.
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub enum HirKind {
-    /// The empty regular expression, which matches everything, including the
-    /// empty string.
-    Empty,
-    /// A single literal character that matches exactly this character.
-    Literal(Literal),
-    /// A single character class that matches any of the characters in the
-    /// class. A class can either consist of Unicode scalar values as
-    /// characters, or it can use bytes.
-    Class(Class),
-    /// An anchor assertion. An anchor assertion match always has zero length.
-    Anchor(Anchor),
-    /// A word boundary assertion, which may or may not be Unicode aware. A
-    /// word boundary assertion match always has zero length.
-    WordBoundary(WordBoundary),
-    /// A repetition operation applied to a child expression.
-    Repetition(Repetition),
-    /// A possibly capturing group, which contains a child expression.
-    Group(Group),
-    /// A concatenation of expressions. A concatenation always has at least two
-    /// child expressions.
-    ///
-    /// A concatenation matches only if each of its child expression matches
-    /// one after the other.
-    Concat(Vec<Hir>),
-    /// An alternation of expressions. An alternation always has at least two
-    /// child expressions.
-    ///
-    /// An alternation matches only if at least one of its child expression
-    /// matches. If multiple expressions match, then the leftmost is preferred.
-    Alternation(Vec<Hir>),
+    props: Properties,
 }
 
+/// Methods for accessing the underlying `HirKind` and `Properties`.
 impl Hir {
     /// Returns a reference to the underlying HIR kind.
     pub fn kind(&self) -> &HirKind {
@@ -223,543 +215,560 @@ impl Hir {
     /// Consumes ownership of this HIR expression and returns its underlying
     /// `HirKind`.
     pub fn into_kind(mut self) -> HirKind {
-        use std::mem;
-        mem::replace(&mut self.kind, HirKind::Empty)
+        core::mem::replace(&mut self.kind, HirKind::Empty)
+    }
+
+    /// Returns the properties computed for this `Hir`.
+    pub fn properties(&self) -> &Properties {
+        &self.props
     }
 
+    /// Splits this HIR into its constituent parts.
+    ///
+    /// This is useful because `let Hir { kind, props } = hir;` does not work
+    /// because of `Hir`'s custom `Drop` implementation.
+    fn into_parts(mut self) -> (HirKind, Properties) {
+        (
+            core::mem::replace(&mut self.kind, HirKind::Empty),
+            core::mem::replace(&mut self.props, Properties::empty()),
+        )
+    }
+}
+
+/// Smart constructors for HIR values.
+///
+/// These constructors are called "smart" because they do inductive work or
+/// simplifications. For example, calling `Hir::repetition` with a repetition
+/// like `a{0}` will actually return a `Hir` with a `HirKind::Empty` kind
+/// since it is equivalent to an empty regex. Another example is calling
+/// `Hir::concat(vec![expr])`. Instead of getting a `HirKind::Concat`, you'll
+/// just get back the original `expr` since it's precisely equivalent.
+///
+/// Smart constructors enable maintaining invariants about the HIR data type
+/// while also simulanteously keeping the representation as simple as possible.
+impl Hir {
     /// Returns an empty HIR expression.
     ///
     /// An empty HIR expression always matches, including the empty string.
+    #[inline]
     pub fn empty() -> Hir {
-        let mut info = HirInfo::new();
-        info.set_always_utf8(true);
-        info.set_all_assertions(true);
-        info.set_anchored_start(false);
-        info.set_anchored_end(false);
-        info.set_line_anchored_start(false);
-        info.set_line_anchored_end(false);
-        info.set_any_anchored_start(false);
-        info.set_any_anchored_end(false);
-        info.set_match_empty(true);
-        info.set_literal(false);
-        info.set_alternation_literal(false);
-        Hir { kind: HirKind::Empty, info }
+        let props = Properties::empty();
+        Hir { kind: HirKind::Empty, props }
+    }
+
+    /// Returns an HIR expression that can never match anything. That is,
+    /// the size of the set of strings in the language described by the HIR
+    /// returned is `0`.
+    ///
+    /// This is distinct from [`Hir::empty`] in that the empty string matches
+    /// the HIR returned by `Hir::empty`. That is, the set of strings in the
+    /// language describe described by `Hir::empty` is non-empty.
+    ///
+    /// Note that currently, the HIR returned uses an empty character class to
+    /// indicate that nothing can match. An equivalent expression that cannot
+    /// match is an empty alternation, but all such "fail" expressions are
+    /// normalized (via smart constructors) to empty character classes. This is
+    /// because empty character classes can be spelled in the concrete syntax
+    /// of a regex (e.g., `\P{any}` or `(?-u:[^\x00-\xFF])` or `[a&&b]`), but
+    /// empty alternations cannot.
+    #[inline]
+    pub fn fail() -> Hir {
+        let class = Class::Bytes(ClassBytes::empty());
+        let props = Properties::class(&class);
+        // We can't just call Hir::class here because it defers to Hir::fail
+        // in order to canonicalize the Hir value used to represent "cannot
+        // match."
+        Hir { kind: HirKind::Class(class), props }
     }
 
     /// Creates a literal HIR expression.
     ///
-    /// If the given literal has a `Byte` variant with an ASCII byte, then this
-    /// method panics. This enforces the invariant that `Byte` variants are
-    /// only used to express matching of invalid UTF-8.
-    pub fn literal(lit: Literal) -> Hir {
-        if let Literal::Byte(b) = lit {
-            assert!(b > 0x7F);
+    /// This accepts anything that can be converted into a `Box<[u8]>`.
+    ///
+    /// Note that there is no mechanism for storing a `char` or a `Box<str>`
+    /// in an HIR. Everything is "just bytes." Whether a `Literal` (or
+    /// any HIR node) matches valid UTF-8 exclusively can be queried via
+    /// [`Properties::is_utf8`].
+    ///
+    /// # Example
+    ///
+    /// This example shows that concatenations of `Literal` HIR values will
+    /// automatically get flattened and combined together. So for example, even
+    /// if you concat multiple `Literal` values that are themselves not valid
+    /// UTF-8, they might add up to valid UTF-8. This also demonstrates just
+    /// how "smart" Hir's smart constructors are.
+    ///
+    /// ```
+    /// use regex_syntax::hir::{Hir, HirKind, Literal};
+    ///
+    /// let literals = vec![
+    ///     Hir::literal([0xE2]),
+    ///     Hir::literal([0x98]),
+    ///     Hir::literal([0x83]),
+    /// ];
+    /// // Each literal, on its own, is invalid UTF-8.
+    /// assert!(literals.iter().all(|hir| !hir.properties().is_utf8()));
+    ///
+    /// let concat = Hir::concat(literals);
+    /// // But the concatenation is valid UTF-8!
+    /// assert!(concat.properties().is_utf8());
+    ///
+    /// // And also notice that the literals have been concatenated into a
+    /// // single `Literal`, to the point where there is no explicit `Concat`!
+    /// let expected = HirKind::Literal(Literal(Box::from("☃".as_bytes())));
+    /// assert_eq!(&expected, concat.kind());
+    /// ```
+    #[inline]
+    pub fn literal<B: Into<Box<[u8]>>>(lit: B) -> Hir {
+        let bytes = lit.into();
+        if bytes.is_empty() {
+            return Hir::empty();
         }
 
-        let mut info = HirInfo::new();
-        info.set_always_utf8(lit.is_unicode());
-        info.set_all_assertions(false);
-        info.set_anchored_start(false);
-        info.set_anchored_end(false);
-        info.set_line_anchored_start(false);
-        info.set_line_anchored_end(false);
-        info.set_any_anchored_start(false);
-        info.set_any_anchored_end(false);
-        info.set_match_empty(false);
-        info.set_literal(true);
-        info.set_alternation_literal(true);
-        Hir { kind: HirKind::Literal(lit), info }
-    }
-
-    /// Creates a class HIR expression.
+        let lit = Literal(bytes);
+        let props = Properties::literal(&lit);
+        Hir { kind: HirKind::Literal(lit), props }
+    }
+
+    /// Creates a class HIR expression. The class may either be defined over
+    /// ranges of Unicode codepoints or ranges of raw byte values.
+    ///
+    /// Note that an empty class is permitted. An empty class is equivalent to
+    /// `Hir::fail()`.
+    #[inline]
     pub fn class(class: Class) -> Hir {
-        let mut info = HirInfo::new();
-        info.set_always_utf8(class.is_always_utf8());
-        info.set_all_assertions(false);
-        info.set_anchored_start(false);
-        info.set_anchored_end(false);
-        info.set_line_anchored_start(false);
-        info.set_line_anchored_end(false);
-        info.set_any_anchored_start(false);
-        info.set_any_anchored_end(false);
-        info.set_match_empty(false);
-        info.set_literal(false);
-        info.set_alternation_literal(false);
-        Hir { kind: HirKind::Class(class), info }
-    }
-
-    /// Creates an anchor assertion HIR expression.
-    pub fn anchor(anchor: Anchor) -> Hir {
-        let mut info = HirInfo::new();
-        info.set_always_utf8(true);
-        info.set_all_assertions(true);
-        info.set_anchored_start(false);
-        info.set_anchored_end(false);
-        info.set_line_anchored_start(false);
-        info.set_line_anchored_end(false);
-        info.set_any_anchored_start(false);
-        info.set_any_anchored_end(false);
-        info.set_match_empty(true);
-        info.set_literal(false);
-        info.set_alternation_literal(false);
-        if let Anchor::StartText = anchor {
-            info.set_anchored_start(true);
-            info.set_line_anchored_start(true);
-            info.set_any_anchored_start(true);
-        }
-        if let Anchor::EndText = anchor {
-            info.set_anchored_end(true);
-            info.set_line_anchored_end(true);
-            info.set_any_anchored_end(true);
-        }
-        if let Anchor::StartLine = anchor {
-            info.set_line_anchored_start(true);
-        }
-        if let Anchor::EndLine = anchor {
-            info.set_line_anchored_end(true);
+        if class.is_empty() {
+            return Hir::fail();
+        } else if let Some(bytes) = class.literal() {
+            return Hir::literal(bytes);
         }
-        Hir { kind: HirKind::Anchor(anchor), info }
-    }
-
-    /// Creates a word boundary assertion HIR expression.
-    pub fn word_boundary(word_boundary: WordBoundary) -> Hir {
-        let mut info = HirInfo::new();
-        info.set_always_utf8(true);
-        info.set_all_assertions(true);
-        info.set_anchored_start(false);
-        info.set_anchored_end(false);
-        info.set_line_anchored_start(false);
-        info.set_line_anchored_end(false);
-        info.set_any_anchored_start(false);
-        info.set_any_anchored_end(false);
-        info.set_literal(false);
-        info.set_alternation_literal(false);
-        // A negated word boundary matches '', so that's fine. But \b does not
-        // match \b, so why do we say it can match the empty string? Well,
-        // because, if you search for \b against 'a', it will report [0, 0) and
-        // [1, 1) as matches, and both of those matches correspond to the empty
-        // string. Thus, only *certain* empty strings match \b, which similarly
-        // applies to \B.
-        info.set_match_empty(true);
-        // Negated ASCII word boundaries can match invalid UTF-8.
-        if let WordBoundary::AsciiNegate = word_boundary {
-            info.set_always_utf8(false);
-        }
-        Hir { kind: HirKind::WordBoundary(word_boundary), info }
+        let props = Properties::class(&class);
+        Hir { kind: HirKind::Class(class), props }
+    }
+
+    /// Creates a look-around assertion HIR expression.
+    #[inline]
+    pub fn look(look: Look) -> Hir {
+        let props = Properties::look(look);
+        Hir { kind: HirKind::Look(look), props }
     }
 
     /// Creates a repetition HIR expression.
+    #[inline]
     pub fn repetition(rep: Repetition) -> Hir {
-        let mut info = HirInfo::new();
-        info.set_always_utf8(rep.hir.is_always_utf8());
-        info.set_all_assertions(rep.hir.is_all_assertions());
-        // If this operator can match the empty string, then it can never
-        // be anchored.
-        info.set_anchored_start(
-            !rep.is_match_empty() && rep.hir.is_anchored_start(),
-        );
-        info.set_anchored_end(
-            !rep.is_match_empty() && rep.hir.is_anchored_end(),
-        );
-        info.set_line_anchored_start(
-            !rep.is_match_empty() && rep.hir.is_anchored_start(),
-        );
-        info.set_line_anchored_end(
-            !rep.is_match_empty() && rep.hir.is_anchored_end(),
-        );
-        info.set_any_anchored_start(rep.hir.is_any_anchored_start());
-        info.set_any_anchored_end(rep.hir.is_any_anchored_end());
-        info.set_match_empty(rep.is_match_empty() || rep.hir.is_match_empty());
-        info.set_literal(false);
-        info.set_alternation_literal(false);
-        Hir { kind: HirKind::Repetition(rep), info }
-    }
-
-    /// Creates a group HIR expression.
-    pub fn group(group: Group) -> Hir {
-        let mut info = HirInfo::new();
-        info.set_always_utf8(group.hir.is_always_utf8());
-        info.set_all_assertions(group.hir.is_all_assertions());
-        info.set_anchored_start(group.hir.is_anchored_start());
-        info.set_anchored_end(group.hir.is_anchored_end());
-        info.set_line_anchored_start(group.hir.is_line_anchored_start());
-        info.set_line_anchored_end(group.hir.is_line_anchored_end());
-        info.set_any_anchored_start(group.hir.is_any_anchored_start());
-        info.set_any_anchored_end(group.hir.is_any_anchored_end());
-        info.set_match_empty(group.hir.is_match_empty());
-        info.set_literal(false);
-        info.set_alternation_literal(false);
-        Hir { kind: HirKind::Group(group), info }
+        // The regex 'a{0}' is always equivalent to the empty regex. This is
+        // true even when 'a' is an expression that never matches anything
+        // (like '\P{any}').
+        //
+        // Additionally, the regex 'a{1}' is always equivalent to 'a'.
+        if rep.min == 0 && rep.max == Some(0) {
+            return Hir::empty();
+        } else if rep.min == 1 && rep.max == Some(1) {
+            return *rep.sub;
+        }
+        let props = Properties::repetition(&rep);
+        Hir { kind: HirKind::Repetition(rep), props }
+    }
+
+    /// Creates a capture HIR expression.
+    ///
+    /// Note that there is no explicit HIR value for a non-capturing group.
+    /// Since a non-capturing group only exists to override precedence in the
+    /// concrete syntax and since an HIR already does its own grouping based on
+    /// what is parsed, there is no need to explicitly represent non-capturing
+    /// groups in the HIR.
+    #[inline]
+    pub fn capture(capture: Capture) -> Hir {
+        let props = Properties::capture(&capture);
+        Hir { kind: HirKind::Capture(capture), props }
     }
 
     /// Returns the concatenation of the given expressions.
     ///
-    /// This flattens the concatenation as appropriate.
-    pub fn concat(mut exprs: Vec<Hir>) -> Hir {
-        match exprs.len() {
-            0 => Hir::empty(),
-            1 => exprs.pop().unwrap(),
-            _ => {
-                let mut info = HirInfo::new();
-                info.set_always_utf8(true);
-                info.set_all_assertions(true);
-                info.set_any_anchored_start(false);
-                info.set_any_anchored_end(false);
-                info.set_match_empty(true);
-                info.set_literal(true);
-                info.set_alternation_literal(true);
-
-                // Some attributes require analyzing all sub-expressions.
-                for e in &exprs {
-                    let x = info.is_always_utf8() && e.is_always_utf8();
-                    info.set_always_utf8(x);
-
-                    let x = info.is_all_assertions() && e.is_all_assertions();
-                    info.set_all_assertions(x);
-
-                    let x = info.is_any_anchored_start()
-                        || e.is_any_anchored_start();
-                    info.set_any_anchored_start(x);
-
-                    let x =
-                        info.is_any_anchored_end() || e.is_any_anchored_end();
-                    info.set_any_anchored_end(x);
-
-                    let x = info.is_match_empty() && e.is_match_empty();
-                    info.set_match_empty(x);
-
-                    let x = info.is_literal() && e.is_literal();
-                    info.set_literal(x);
-
-                    let x = info.is_alternation_literal()
-                        && e.is_alternation_literal();
-                    info.set_alternation_literal(x);
+    /// This attempts to flatten and simplify the concatenation as appropriate.
+    ///
+    /// # Example
+    ///
+    /// This shows a simple example of basic flattening of both concatenations
+    /// and literals.
+    ///
+    /// ```
+    /// use regex_syntax::hir::Hir;
+    ///
+    /// let hir = Hir::concat(vec![
+    ///     Hir::concat(vec![
+    ///         Hir::literal([b'a']),
+    ///         Hir::literal([b'b']),
+    ///         Hir::literal([b'c']),
+    ///     ]),
+    ///     Hir::concat(vec![
+    ///         Hir::literal([b'x']),
+    ///         Hir::literal([b'y']),
+    ///         Hir::literal([b'z']),
+    ///     ]),
+    /// ]);
+    /// let expected = Hir::literal("abcxyz".as_bytes());
+    /// assert_eq!(expected, hir);
+    /// ```
+    pub fn concat(subs: Vec<Hir>) -> Hir {
+        // We rebuild the concatenation by simplifying it. Would be nice to do
+        // it in place, but that seems a little tricky?
+        let mut new = vec![];
+        // This gobbles up any adjacent literals in a concatenation and smushes
+        // them together. Basically, when we see a literal, we add its bytes
+        // to 'prior_lit', and whenever we see anything else, we first take
+        // any bytes in 'prior_lit' and add it to the 'new' concatenation.
+        let mut prior_lit: Option<Vec<u8>> = None;
+        for sub in subs {
+            let (kind, props) = sub.into_parts();
+            match kind {
+                HirKind::Literal(Literal(bytes)) => {
+                    if let Some(ref mut prior_bytes) = prior_lit {
+                        prior_bytes.extend_from_slice(&bytes);
+                    } else {
+                        prior_lit = Some(bytes.to_vec());
+                    }
+                }
+                // We also flatten concats that are direct children of another
+                // concat. We only need to do this one level deep since
+                // Hir::concat is the only way to build concatenations, and so
+                // flattening happens inductively.
+                HirKind::Concat(subs2) => {
+                    for sub2 in subs2 {
+                        let (kind2, props2) = sub2.into_parts();
+                        match kind2 {
+                            HirKind::Literal(Literal(bytes)) => {
+                                if let Some(ref mut prior_bytes) = prior_lit {
+                                    prior_bytes.extend_from_slice(&bytes);
+                                } else {
+                                    prior_lit = Some(bytes.to_vec());
+                                }
+                            }
+                            kind2 => {
+                                if let Some(prior_bytes) = prior_lit.take() {
+                                    new.push(Hir::literal(prior_bytes));
+                                }
+                                new.push(Hir { kind: kind2, props: props2 });
+                            }
+                        }
+                    }
+                }
+                // We can just skip empty HIRs.
+                HirKind::Empty => {}
+                kind => {
+                    if let Some(prior_bytes) = prior_lit.take() {
+                        new.push(Hir::literal(prior_bytes));
+                    }
+                    new.push(Hir { kind, props });
                 }
-                // Anchored attributes require something slightly more
-                // sophisticated. Normally, WLOG, to determine whether an
-                // expression is anchored to the start, we'd only need to check
-                // the first expression of a concatenation. However,
-                // expressions like `$\b^` are still anchored to the start,
-                // but the first expression in the concatenation *isn't*
-                // anchored to the start. So the "first" expression to look at
-                // is actually one that is either not an assertion or is
-                // specifically the StartText assertion.
-                info.set_anchored_start(
-                    exprs
-                        .iter()
-                        .take_while(|e| {
-                            e.is_anchored_start() || e.is_all_assertions()
-                        })
-                        .any(|e| e.is_anchored_start()),
-                );
-                // Similarly for the end anchor, but in reverse.
-                info.set_anchored_end(
-                    exprs
-                        .iter()
-                        .rev()
-                        .take_while(|e| {
-                            e.is_anchored_end() || e.is_all_assertions()
-                        })
-                        .any(|e| e.is_anchored_end()),
-                );
-                // Repeat the process for line anchors.
-                info.set_line_anchored_start(
-                    exprs
-                        .iter()
-                        .take_while(|e| {
-                            e.is_line_anchored_start() || e.is_all_assertions()
-                        })
-                        .any(|e| e.is_line_anchored_start()),
-                );
-                info.set_line_anchored_end(
-                    exprs
-                        .iter()
-                        .rev()
-                        .take_while(|e| {
-                            e.is_line_anchored_end() || e.is_all_assertions()
-                        })
-                        .any(|e| e.is_line_anchored_end()),
-                );
-                Hir { kind: HirKind::Concat(exprs), info }
             }
         }
+        if let Some(prior_bytes) = prior_lit.take() {
+            new.push(Hir::literal(prior_bytes));
+        }
+        if new.is_empty() {
+            return Hir::empty();
+        } else if new.len() == 1 {
+            return new.pop().unwrap();
+        }
+        let props = Properties::concat(&new);
+        Hir { kind: HirKind::Concat(new), props }
     }
 
     /// Returns the alternation of the given expressions.
     ///
-    /// This flattens the alternation as appropriate.
-    pub fn alternation(mut exprs: Vec<Hir>) -> Hir {
-        match exprs.len() {
-            0 => Hir::empty(),
-            1 => exprs.pop().unwrap(),
-            _ => {
-                let mut info = HirInfo::new();
-                info.set_always_utf8(true);
-                info.set_all_assertions(true);
-                info.set_anchored_start(true);
-                info.set_anchored_end(true);
-                info.set_line_anchored_start(true);
-                info.set_line_anchored_end(true);
-                info.set_any_anchored_start(false);
-                info.set_any_anchored_end(false);
-                info.set_match_empty(false);
-                info.set_literal(false);
-                info.set_alternation_literal(true);
-
-                // Some attributes require analyzing all sub-expressions.
-                for e in &exprs {
-                    let x = info.is_always_utf8() && e.is_always_utf8();
-                    info.set_always_utf8(x);
-
-                    let x = info.is_all_assertions() && e.is_all_assertions();
-                    info.set_all_assertions(x);
-
-                    let x = info.is_anchored_start() && e.is_anchored_start();
-                    info.set_anchored_start(x);
-
-                    let x = info.is_anchored_end() && e.is_anchored_end();
-                    info.set_anchored_end(x);
-
-                    let x = info.is_line_anchored_start()
-                        && e.is_line_anchored_start();
-                    info.set_line_anchored_start(x);
-
-                    let x = info.is_line_anchored_end()
-                        && e.is_line_anchored_end();
-                    info.set_line_anchored_end(x);
-
-                    let x = info.is_any_anchored_start()
-                        || e.is_any_anchored_start();
-                    info.set_any_anchored_start(x);
-
-                    let x =
-                        info.is_any_anchored_end() || e.is_any_anchored_end();
-                    info.set_any_anchored_end(x);
-
-                    let x = info.is_match_empty() || e.is_match_empty();
-                    info.set_match_empty(x);
-
-                    let x = info.is_alternation_literal() && e.is_literal();
-                    info.set_alternation_literal(x);
+    /// This flattens and simplifies the alternation as appropriate. This may
+    /// include factoring out common prefixes or even rewriting the alternation
+    /// as a character class.
+    ///
+    /// Note that an empty alternation is equivalent to `Hir::fail()`. (It
+    /// is not possible for one to write an empty alternation, or even an
+    /// alternation with a single sub-expression, in the concrete syntax of a
+    /// regex.)
+    ///
+    /// # Example
+    ///
+    /// This is a simple example showing how an alternation might get
+    /// simplified.
+    ///
+    /// ```
+    /// use regex_syntax::hir::{Hir, Class, ClassUnicode, ClassUnicodeRange};
+    ///
+    /// let hir = Hir::alternation(vec![
+    ///     Hir::literal([b'a']),
+    ///     Hir::literal([b'b']),
+    ///     Hir::literal([b'c']),
+    ///     Hir::literal([b'd']),
+    ///     Hir::literal([b'e']),
+    ///     Hir::literal([b'f']),
+    /// ]);
+    /// let expected = Hir::class(Class::Unicode(ClassUnicode::new([
+    ///     ClassUnicodeRange::new('a', 'f'),
+    /// ])));
+    /// assert_eq!(expected, hir);
+    /// ```
+    ///
+    /// And another example showing how common prefixes might get factored
+    /// out.
+    ///
+    /// ```
+    /// use regex_syntax::hir::{Hir, Class, ClassUnicode, ClassUnicodeRange};
+    ///
+    /// let hir = Hir::alternation(vec![
+    ///     Hir::concat(vec![
+    ///         Hir::literal("abc".as_bytes()),
+    ///         Hir::class(Class::Unicode(ClassUnicode::new([
+    ///             ClassUnicodeRange::new('A', 'Z'),
+    ///         ]))),
+    ///     ]),
+    ///     Hir::concat(vec![
+    ///         Hir::literal("abc".as_bytes()),
+    ///         Hir::class(Class::Unicode(ClassUnicode::new([
+    ///             ClassUnicodeRange::new('a', 'z'),
+    ///         ]))),
+    ///     ]),
+    /// ]);
+    /// let expected = Hir::concat(vec![
+    ///     Hir::literal("abc".as_bytes()),
+    ///     Hir::alternation(vec![
+    ///         Hir::class(Class::Unicode(ClassUnicode::new([
+    ///             ClassUnicodeRange::new('A', 'Z'),
+    ///         ]))),
+    ///         Hir::class(Class::Unicode(ClassUnicode::new([
+    ///             ClassUnicodeRange::new('a', 'z'),
+    ///         ]))),
+    ///     ]),
+    /// ]);
+    /// assert_eq!(expected, hir);
+    /// ```
+    ///
+    /// Note that these sorts of simplifications are not guaranteed.
+    pub fn alternation(subs: Vec<Hir>) -> Hir {
+        // We rebuild the alternation by simplifying it. We proceed similarly
+        // as the concatenation case. But in this case, there's no literal
+        // simplification happening. We're just flattening alternations.
+        let mut new = vec![];
+        for sub in subs {
+            let (kind, props) = sub.into_parts();
+            match kind {
+                HirKind::Alternation(subs2) => {
+                    new.extend(subs2);
+                }
+                kind => {
+                    new.push(Hir { kind, props });
                 }
-                Hir { kind: HirKind::Alternation(exprs), info }
             }
         }
-    }
-
-    /// Build an HIR expression for `.`.
-    ///
-    /// A `.` expression matches any character except for `\n`. To build an
-    /// expression that matches any character, including `\n`, use the `any`
-    /// method.
-    ///
-    /// If `bytes` is `true`, then this assumes characters are limited to a
-    /// single byte.
-    pub fn dot(bytes: bool) -> Hir {
-        if bytes {
-            let mut cls = ClassBytes::empty();
-            cls.push(ClassBytesRange::new(b'\0', b'\x09'));
-            cls.push(ClassBytesRange::new(b'\x0B', b'\xFF'));
-            Hir::class(Class::Bytes(cls))
-        } else {
-            let mut cls = ClassUnicode::empty();
-            cls.push(ClassUnicodeRange::new('\0', '\x09'));
-            cls.push(ClassUnicodeRange::new('\x0B', '\u{10FFFF}'));
-            Hir::class(Class::Unicode(cls))
+        if new.is_empty() {
+            return Hir::fail();
+        } else if new.len() == 1 {
+            return new.pop().unwrap();
+        }
+        // Now that it's completely flattened, look for the special case of
+        // 'char1|char2|...|charN' and collapse that into a class. Note that
+        // we look for 'char' first and then bytes. The issue here is that if
+        // we find both non-ASCII codepoints and non-ASCII singleton bytes,
+        // then it isn't actually possible to smush them into a single class.
+        // (Because classes are either "all codepoints" or "all bytes." You
+        // can have a class that both matches non-ASCII but valid UTF-8 and
+        // invalid UTF-8.) So we look for all chars and then all bytes, and
+        // don't handle anything else.
+        if let Some(singletons) = singleton_chars(&new) {
+            let it = singletons
+                .into_iter()
+                .map(|ch| ClassUnicodeRange { start: ch, end: ch });
+            return Hir::class(Class::Unicode(ClassUnicode::new(it)));
+        }
+        if let Some(singletons) = singleton_bytes(&new) {
+            let it = singletons
+                .into_iter()
+                .map(|b| ClassBytesRange { start: b, end: b });
+            return Hir::class(Class::Bytes(ClassBytes::new(it)));
         }
+        // Similar to singleton chars, we can also look for alternations of
+        // classes. Those can be smushed into a single class.
+        if let Some(cls) = class_chars(&new) {
+            return Hir::class(cls);
+        }
+        if let Some(cls) = class_bytes(&new) {
+            return Hir::class(cls);
+        }
+        // Factor out a common prefix if we can, which might potentially
+        // simplify the expression and unlock other optimizations downstream.
+        // It also might generally make NFA matching and DFA construction
+        // faster by reducing the scope of branching in the regex.
+        new = match lift_common_prefix(new) {
+            Ok(hir) => return hir,
+            Err(unchanged) => unchanged,
+        };
+        let props = Properties::alternation(&new);
+        Hir { kind: HirKind::Alternation(new), props }
     }
 
-    /// Build an HIR expression for `(?s).`.
+    /// Returns an HIR expression for `.`.
     ///
-    /// A `(?s).` expression matches any character, including `\n`. To build an
-    /// expression that matches any character except for `\n`, then use the
-    /// `dot` method.
+    /// * [`Dot::AnyChar`] maps to `(?su-R:.)`.
+    /// * [`Dot::AnyByte`] maps to `(?s-Ru:.)`.
+    /// * [`Dot::AnyCharExceptLF`] maps to `(?u-Rs:.)`.
+    /// * [`Dot::AnyCharExceptCRLF`] maps to `(?Ru-s:.)`.
+    /// * [`Dot::AnyByteExceptLF`] maps to `(?-Rsu:.)`.
+    /// * [`Dot::AnyByteExceptCRLF`] maps to `(?R-su:.)`.
     ///
-    /// If `bytes` is `true`, then this assumes characters are limited to a
-    /// single byte.
-    pub fn any(bytes: bool) -> Hir {
-        if bytes {
-            let mut cls = ClassBytes::empty();
-            cls.push(ClassBytesRange::new(b'\0', b'\xFF'));
-            Hir::class(Class::Bytes(cls))
-        } else {
-            let mut cls = ClassUnicode::empty();
-            cls.push(ClassUnicodeRange::new('\0', '\u{10FFFF}'));
-            Hir::class(Class::Unicode(cls))
-        }
-    }
-
-    /// Return true if and only if this HIR will always match valid UTF-8.
+    /// # Example
     ///
-    /// When this returns false, then it is possible for this HIR expression
-    /// to match invalid UTF-8.
-    pub fn is_always_utf8(&self) -> bool {
-        self.info.is_always_utf8()
-    }
-
-    /// Returns true if and only if this entire HIR expression is made up of
-    /// zero-width assertions.
+    /// Note that this is a convenience routine for constructing the correct
+    /// character class based on the value of `Dot`. There is no explicit "dot"
+    /// HIR value. It is just an abbreviation for a common character class.
     ///
-    /// This includes expressions like `^$\b\A\z` and even `((\b)+())*^`, but
-    /// not `^a`.
-    pub fn is_all_assertions(&self) -> bool {
-        self.info.is_all_assertions()
-    }
-
-    /// Return true if and only if this HIR is required to match from the
-    /// beginning of text. This includes expressions like `^foo`, `^(foo|bar)`,
-    /// `^foo|^bar` but not `^foo|bar`.
-    pub fn is_anchored_start(&self) -> bool {
-        self.info.is_anchored_start()
-    }
-
-    /// Return true if and only if this HIR is required to match at the end
-    /// of text. This includes expressions like `foo$`, `(foo|bar)$`,
-    /// `foo$|bar$` but not `foo$|bar`.
-    pub fn is_anchored_end(&self) -> bool {
-        self.info.is_anchored_end()
-    }
-
-    /// Return true if and only if this HIR is required to match from the
-    /// beginning of text or the beginning of a line. This includes expressions
-    /// like `^foo`, `(?m)^foo`, `^(foo|bar)`, `^(foo|bar)`, `(?m)^foo|^bar`
-    /// but not `^foo|bar` or `(?m)^foo|bar`.
+    /// ```
+    /// use regex_syntax::hir::{Hir, Dot, Class, ClassBytes, ClassBytesRange};
     ///
-    /// Note that if `is_anchored_start` is `true`, then
-    /// `is_line_anchored_start` will also be `true`. The reverse implication
-    /// is not true. For example, `(?m)^foo` is line anchored, but not
-    /// `is_anchored_start`.
-    pub fn is_line_anchored_start(&self) -> bool {
-        self.info.is_line_anchored_start()
+    /// let hir = Hir::dot(Dot::AnyByte);
+    /// let expected = Hir::class(Class::Bytes(ClassBytes::new([
+    ///     ClassBytesRange::new(0x00, 0xFF),
+    /// ])));
+    /// assert_eq!(expected, hir);
+    /// ```
+    #[inline]
+    pub fn dot(dot: Dot) -> Hir {
+        match dot {
+            Dot::AnyChar => {
+                let mut cls = ClassUnicode::empty();
+                cls.push(ClassUnicodeRange::new('\0', '\u{10FFFF}'));
+                Hir::class(Class::Unicode(cls))
+            }
+            Dot::AnyByte => {
+                let mut cls = ClassBytes::empty();
+                cls.push(ClassBytesRange::new(b'\0', b'\xFF'));
+                Hir::class(Class::Bytes(cls))
+            }
+            Dot::AnyCharExceptLF => {
+                let mut cls = ClassUnicode::empty();
+                cls.push(ClassUnicodeRange::new('\0', '\x09'));
+                cls.push(ClassUnicodeRange::new('\x0B', '\u{10FFFF}'));
+                Hir::class(Class::Unicode(cls))
+            }
+            Dot::AnyCharExceptCRLF => {
+                let mut cls = ClassUnicode::empty();
+                cls.push(ClassUnicodeRange::new('\0', '\x09'));
+                cls.push(ClassUnicodeRange::new('\x0B', '\x0C'));
+                cls.push(ClassUnicodeRange::new('\x0E', '\u{10FFFF}'));
+                Hir::class(Class::Unicode(cls))
+            }
+            Dot::AnyByteExceptLF => {
+                let mut cls = ClassBytes::empty();
+                cls.push(ClassBytesRange::new(b'\0', b'\x09'));
+                cls.push(ClassBytesRange::new(b'\x0B', b'\xFF'));
+                Hir::class(Class::Bytes(cls))
+            }
+            Dot::AnyByteExceptCRLF => {
+                let mut cls = ClassBytes::empty();
+                cls.push(ClassBytesRange::new(b'\0', b'\x09'));
+                cls.push(ClassBytesRange::new(b'\x0B', b'\x0C'));
+                cls.push(ClassBytesRange::new(b'\x0E', b'\xFF'));
+                Hir::class(Class::Bytes(cls))
+            }
+        }
     }
+}
 
-    /// Return true if and only if this HIR is required to match at the
-    /// end of text or the end of a line. This includes expressions like
-    /// `foo$`, `(?m)foo$`, `(foo|bar)$`, `(?m)(foo|bar)$`, `foo$|bar$`,
-    /// `(?m)(foo|bar)$`, but not `foo$|bar` or `(?m)foo$|bar`.
+/// The underlying kind of an arbitrary [`Hir`] expression.
+///
+/// An `HirKind` is principally useful for doing case analysis on the type
+/// of a regular expression. If you're looking to build new `Hir` values,
+/// then you _must_ use the smart constructors defined on `Hir`, like
+/// [`Hir::repetition`], to build new `Hir` values. The API intentionally does
+/// not expose any way of building an `Hir` directly from an `HirKind`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum HirKind {
+    /// The empty regular expression, which matches everything, including the
+    /// empty string.
+    Empty,
+    /// A literalstring that matches exactly these bytes.
+    Literal(Literal),
+    /// A single character class that matches any of the characters in the
+    /// class. A class can either consist of Unicode scalar values as
+    /// characters, or it can use bytes.
     ///
-    /// Note that if `is_anchored_end` is `true`, then
-    /// `is_line_anchored_end` will also be `true`. The reverse implication
-    /// is not true. For example, `(?m)foo$` is line anchored, but not
-    /// `is_anchored_end`.
-    pub fn is_line_anchored_end(&self) -> bool {
-        self.info.is_line_anchored_end()
-    }
-
-    /// Return true if and only if this HIR contains any sub-expression that
-    /// is required to match at the beginning of text. Specifically, this
-    /// returns true if the `^` symbol (when multiline mode is disabled) or the
-    /// `\A` escape appear anywhere in the regex.
-    pub fn is_any_anchored_start(&self) -> bool {
-        self.info.is_any_anchored_start()
-    }
-
-    /// Return true if and only if this HIR contains any sub-expression that is
-    /// required to match at the end of text. Specifically, this returns true
-    /// if the `$` symbol (when multiline mode is disabled) or the `\z` escape
-    /// appear anywhere in the regex.
-    pub fn is_any_anchored_end(&self) -> bool {
-        self.info.is_any_anchored_end()
-    }
-
-    /// Return true if and only if the empty string is part of the language
-    /// matched by this regular expression.
+    /// A class may be empty. In which case, it matches nothing.
+    Class(Class),
+    /// A look-around assertion. A look-around match always has zero length.
+    Look(Look),
+    /// A repetition operation applied to a sub-expression.
+    Repetition(Repetition),
+    /// A capturing group, which contains a sub-expression.
+    Capture(Capture),
+    /// A concatenation of expressions.
     ///
-    /// This includes `a*`, `a?b*`, `a{0}`, `()`, `()+`, `^$`, `a|b?`, `\b`
-    /// and `\B`, but not `a` or `a+`.
-    pub fn is_match_empty(&self) -> bool {
-        self.info.is_match_empty()
-    }
-
-    /// Return true if and only if this HIR is a simple literal. This is only
-    /// true when this HIR expression is either itself a `Literal` or a
-    /// concatenation of only `Literal`s.
+    /// A concatenation matches only if each of its sub-expressions match one
+    /// after the other.
     ///
-    /// For example, `f` and `foo` are literals, but `f+`, `(foo)`, `foo()`,
-    /// `` are not (even though that contain sub-expressions that are literals).
-    pub fn is_literal(&self) -> bool {
-        self.info.is_literal()
-    }
-
-    /// Return true if and only if this HIR is either a simple literal or an
-    /// alternation of simple literals. This is only
-    /// true when this HIR expression is either itself a `Literal` or a
-    /// concatenation of only `Literal`s or an alternation of only `Literal`s.
+    /// Concatenations are guaranteed by `Hir`'s smart constructors to always
+    /// have at least two sub-expressions.
+    Concat(Vec<Hir>),
+    /// An alternation of expressions.
     ///
-    /// For example, `f`, `foo`, `a|b|c`, and `foo|bar|baz` are alternation
-    /// literals, but `f+`, `(foo)`, `foo()`, ``
-    /// are not (even though that contain sub-expressions that are literals).
-    pub fn is_alternation_literal(&self) -> bool {
-        self.info.is_alternation_literal()
-    }
+    /// An alternation matches only if at least one of its sub-expressions
+    /// match. If multiple sub-expressions match, then the leftmost is
+    /// preferred.
+    ///
+    /// Alternations are guaranteed by `Hir`'s smart constructors to always
+    /// have at least two sub-expressions.
+    Alternation(Vec<Hir>),
 }
 
 impl HirKind {
-    /// Return true if and only if this HIR is the empty regular expression.
-    ///
-    /// Note that this is not defined inductively. That is, it only tests if
-    /// this kind is the `Empty` variant. To get the inductive definition,
-    /// use the `is_match_empty` method on [`Hir`](struct.Hir.html).
-    pub fn is_empty(&self) -> bool {
-        match *self {
-            HirKind::Empty => true,
-            _ => false,
-        }
-    }
+    /// Returns a slice of this kind's sub-expressions, if any.
+    pub fn subs(&self) -> &[Hir] {
+        use core::slice::from_ref;
 
-    /// Returns true if and only if this kind has any (including possibly
-    /// empty) subexpressions.
-    pub fn has_subexprs(&self) -> bool {
         match *self {
             HirKind::Empty
             | HirKind::Literal(_)
             | HirKind::Class(_)
-            | HirKind::Anchor(_)
-            | HirKind::WordBoundary(_) => false,
-            HirKind::Group(_)
-            | HirKind::Repetition(_)
-            | HirKind::Concat(_)
-            | HirKind::Alternation(_) => true,
+            | HirKind::Look(_) => &[],
+            HirKind::Repetition(Repetition { ref sub, .. }) => from_ref(sub),
+            HirKind::Capture(Capture { ref sub, .. }) => from_ref(sub),
+            HirKind::Concat(ref subs) => subs,
+            HirKind::Alternation(ref subs) => subs,
         }
     }
 }
 
+impl core::fmt::Debug for Hir {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        self.kind.fmt(f)
+    }
+}
+
 /// Print a display representation of this Hir.
 ///
 /// The result of this is a valid regular expression pattern string.
 ///
 /// This implementation uses constant stack space and heap space proportional
 /// to the size of the `Hir`.
-impl fmt::Display for Hir {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        use crate::hir::print::Printer;
-        Printer::new().print(self, f)
+impl core::fmt::Display for Hir {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        crate::hir::print::Printer::new().print(self, f)
     }
 }
 
 /// The high-level intermediate representation of a literal.
 ///
-/// A literal corresponds to a single character, where a character is either
-/// defined by a Unicode scalar value or an arbitrary byte. Unicode characters
-/// are preferred whenever possible. In particular, a `Byte` variant is only
-/// ever produced when it could match invalid UTF-8.
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub enum Literal {
-    /// A single character represented by a Unicode scalar value.
-    Unicode(char),
-    /// A single character represented by an arbitrary byte.
-    Byte(u8),
-}
+/// A literal corresponds to `0` or more bytes that should be matched
+/// literally. The smart constructors defined on `Hir` will automatically
+/// concatenate adjacent literals into one literal, and will even automatically
+/// replace empty literals with `Hir::empty()`.
+///
+/// Note that despite a literal being represented by a sequence of bytes, its
+/// `Debug` implementation will attempt to print it as a normal string. (That
+/// is, not a sequence of decimal numbers.)
+#[derive(Clone, Eq, PartialEq)]
+pub struct Literal(pub Box<[u8]>);
 
-impl Literal {
-    /// Returns true if and only if this literal corresponds to a Unicode
-    /// scalar value.
-    pub fn is_unicode(&self) -> bool {
-        match *self {
-            Literal::Unicode(_) => true,
-            Literal::Byte(b) if b <= 0x7F => true,
-            Literal::Byte(_) => false,
-        }
+impl core::fmt::Debug for Literal {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        crate::debug::Bytes(&self.0).fmt(f)
     }
 }
 
@@ -773,13 +782,12 @@ impl Literal {
 /// A character class, regardless of its character type, is represented by a
 /// sequence of non-overlapping non-adjacent ranges of characters.
 ///
-/// Note that unlike [`Literal`](enum.Literal.html), a `Bytes` variant may
-/// be produced even when it exclusively matches valid UTF-8. This is because
-/// a `Bytes` variant represents an intention by the author of the regular
-/// expression to disable Unicode mode, which in turn impacts the semantics of
-/// case insensitive matching. For example, `(?i)k` and `(?i-u)k` will not
-/// match the same set of strings.
-#[derive(Clone, Debug, Eq, PartialEq)]
+/// Note that `Bytes` variant may be produced even when it exclusively matches
+/// valid UTF-8. This is because a `Bytes` variant represents an intention by
+/// the author of the regular expression to disable Unicode mode, which in turn
+/// impacts the semantics of case insensitive matching. For example, `(?i)k`
+/// and `(?i-u)k` will not match the same set of strings.
+#[derive(Clone, Eq, PartialEq)]
 pub enum Class {
     /// A set of characters represented by Unicode scalar values.
     Unicode(ClassUnicode),
@@ -795,6 +803,15 @@ impl Class {
     ///
     /// If this is a byte oriented character class, then this will be limited
     /// to the ASCII ranges `A-Z` and `a-z`.
+    ///
+    /// # Panics
+    ///
+    /// This routine panics when the case mapping data necessary for this
+    /// routine to complete is unavailable. This occurs when the `unicode-case`
+    /// feature is not enabled and the underlying class is Unicode oriented.
+    ///
+    /// Callers should prefer using `try_case_fold_simple` instead, which will
+    /// return an error instead of panicking.
     pub fn case_fold_simple(&mut self) {
         match *self {
             Class::Unicode(ref mut x) => x.case_fold_simple(),
@@ -802,6 +819,29 @@ impl Class {
         }
     }
 
+    /// Apply Unicode simple case folding to this character class, in place.
+    /// The character class will be expanded to include all simple case folded
+    /// character variants.
+    ///
+    /// If this is a byte oriented character class, then this will be limited
+    /// to the ASCII ranges `A-Z` and `a-z`.
+    ///
+    /// # Error
+    ///
+    /// This routine returns an error when the case mapping data necessary
+    /// for this routine to complete is unavailable. This occurs when the
+    /// `unicode-case` feature is not enabled and the underlying class is
+    /// Unicode oriented.
+    pub fn try_case_fold_simple(
+        &mut self,
+    ) -> core::result::Result<(), CaseFoldError> {
+        match *self {
+            Class::Unicode(ref mut x) => x.try_case_fold_simple()?,
+            Class::Bytes(ref mut x) => x.case_fold_simple(),
+        }
+        Ok(())
+    }
+
     /// Negate this character class in place.
     ///
     /// After completion, this character class will contain precisely the
@@ -824,38 +864,177 @@ impl Class {
     /// 2. Unicode mode (via the `u` flag) was disabled either in the concrete
     ///    syntax or in the parser builder. By default, Unicode mode is
     ///    enabled.
-    pub fn is_always_utf8(&self) -> bool {
+    pub fn is_utf8(&self) -> bool {
         match *self {
             Class::Unicode(_) => true,
-            Class::Bytes(ref x) => x.is_all_ascii(),
+            Class::Bytes(ref x) => x.is_ascii(),
         }
     }
-}
-
-/// A set of characters represented by Unicode scalar values.
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub struct ClassUnicode {
-    set: IntervalSet<ClassUnicodeRange>,
-}
 
-impl ClassUnicode {
-    /// Create a new class from a sequence of ranges.
+    /// Returns the length, in bytes, of the smallest string matched by this
+    /// character class.
     ///
-    /// The given ranges do not need to be in any specific order, and ranges
-    /// may overlap.
-    pub fn new<I>(ranges: I) -> ClassUnicode
-    where
-        I: IntoIterator<Item = ClassUnicodeRange>,
-    {
-        ClassUnicode { set: IntervalSet::new(ranges) }
+    /// For non-empty byte oriented classes, this always returns `1`. For
+    /// non-empty Unicode oriented classes, this can return `1`, `2`, `3` or
+    /// `4`. For empty classes, `None` is returned. It is impossible for `0` to
+    /// be returned.
+    ///
+    /// # Example
+    ///
+    /// This example shows some examples of regexes and their corresponding
+    /// minimum length, if any.
+    ///
+    /// ```
+    /// use regex_syntax::{hir::Properties, parse};
+    ///
+    /// // The empty string has a min length of 0.
+    /// let hir = parse(r"")?;
+    /// assert_eq!(Some(0), hir.properties().minimum_len());
+    /// // As do other types of regexes that only match the empty string.
+    /// let hir = parse(r"^$\b\B")?;
+    /// assert_eq!(Some(0), hir.properties().minimum_len());
+    /// // A regex that can match the empty string but match more is still 0.
+    /// let hir = parse(r"a*")?;
+    /// assert_eq!(Some(0), hir.properties().minimum_len());
+    /// // A regex that matches nothing has no minimum defined.
+    /// let hir = parse(r"[a&&b]")?;
+    /// assert_eq!(None, hir.properties().minimum_len());
+    /// // Character classes usually have a minimum length of 1.
+    /// let hir = parse(r"\w")?;
+    /// assert_eq!(Some(1), hir.properties().minimum_len());
+    /// // But sometimes Unicode classes might be bigger!
+    /// let hir = parse(r"\p{Cyrillic}")?;
+    /// assert_eq!(Some(2), hir.properties().minimum_len());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn minimum_len(&self) -> Option<usize> {
+        match *self {
+            Class::Unicode(ref x) => x.minimum_len(),
+            Class::Bytes(ref x) => x.minimum_len(),
+        }
     }
 
-    /// Create a new class with no ranges.
-    pub fn empty() -> ClassUnicode {
-        ClassUnicode::new(vec![])
+    /// Returns the length, in bytes, of the longest string matched by this
+    /// character class.
+    ///
+    /// For non-empty byte oriented classes, this always returns `1`. For
+    /// non-empty Unicode oriented classes, this can return `1`, `2`, `3` or
+    /// `4`. For empty classes, `None` is returned. It is impossible for `0` to
+    /// be returned.
+    ///
+    /// # Example
+    ///
+    /// This example shows some examples of regexes and their corresponding
+    /// maximum length, if any.
+    ///
+    /// ```
+    /// use regex_syntax::{hir::Properties, parse};
+    ///
+    /// // The empty string has a max length of 0.
+    /// let hir = parse(r"")?;
+    /// assert_eq!(Some(0), hir.properties().maximum_len());
+    /// // As do other types of regexes that only match the empty string.
+    /// let hir = parse(r"^$\b\B")?;
+    /// assert_eq!(Some(0), hir.properties().maximum_len());
+    /// // A regex that matches nothing has no maximum defined.
+    /// let hir = parse(r"[a&&b]")?;
+    /// assert_eq!(None, hir.properties().maximum_len());
+    /// // Bounded repeats work as you expect.
+    /// let hir = parse(r"x{2,10}")?;
+    /// assert_eq!(Some(10), hir.properties().maximum_len());
+    /// // An unbounded repeat means there is no maximum.
+    /// let hir = parse(r"x{2,}")?;
+    /// assert_eq!(None, hir.properties().maximum_len());
+    /// // With Unicode enabled, \w can match up to 4 bytes!
+    /// let hir = parse(r"\w")?;
+    /// assert_eq!(Some(4), hir.properties().maximum_len());
+    /// // Without Unicode enabled, \w matches at most 1 byte.
+    /// let hir = parse(r"(?-u)\w")?;
+    /// assert_eq!(Some(1), hir.properties().maximum_len());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn maximum_len(&self) -> Option<usize> {
+        match *self {
+            Class::Unicode(ref x) => x.maximum_len(),
+            Class::Bytes(ref x) => x.maximum_len(),
+        }
     }
 
-    /// Add a new range to this set.
+    /// Returns true if and only if this character class is empty. That is,
+    /// it has no elements.
+    ///
+    /// An empty character can never match anything, including an empty string.
+    pub fn is_empty(&self) -> bool {
+        match *self {
+            Class::Unicode(ref x) => x.ranges().is_empty(),
+            Class::Bytes(ref x) => x.ranges().is_empty(),
+        }
+    }
+
+    /// If this class consists of exactly one element (whether a codepoint or a
+    /// byte), then return it as a literal byte string.
+    ///
+    /// If this class is empty or contains more than one element, then `None`
+    /// is returned.
+    pub fn literal(&self) -> Option<Vec<u8>> {
+        match *self {
+            Class::Unicode(ref x) => x.literal(),
+            Class::Bytes(ref x) => x.literal(),
+        }
+    }
+}
+
+impl core::fmt::Debug for Class {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        use crate::debug::Byte;
+
+        let mut fmter = f.debug_set();
+        match *self {
+            Class::Unicode(ref cls) => {
+                for r in cls.ranges().iter() {
+                    fmter.entry(&(r.start..=r.end));
+                }
+            }
+            Class::Bytes(ref cls) => {
+                for r in cls.ranges().iter() {
+                    fmter.entry(&(Byte(r.start)..=Byte(r.end)));
+                }
+            }
+        }
+        fmter.finish()
+    }
+}
+
+/// A set of characters represented by Unicode scalar values.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassUnicode {
+    set: IntervalSet<ClassUnicodeRange>,
+}
+
+impl ClassUnicode {
+    /// Create a new class from a sequence of ranges.
+    ///
+    /// The given ranges do not need to be in any specific order, and ranges
+    /// may overlap. Ranges will automatically be sorted into a canonical
+    /// non-overlapping order.
+    pub fn new<I>(ranges: I) -> ClassUnicode
+    where
+        I: IntoIterator<Item = ClassUnicodeRange>,
+    {
+        ClassUnicode { set: IntervalSet::new(ranges) }
+    }
+
+    /// Create a new class with no ranges.
+    ///
+    /// An empty class matches nothing. That is, it is equivalent to
+    /// [`Hir::fail`].
+    pub fn empty() -> ClassUnicode {
+        ClassUnicode::new(vec![])
+    }
+
+    /// Add a new range to this set.
     pub fn push(&mut self, range: ClassUnicodeRange) {
         self.set.push(range);
     }
@@ -903,7 +1082,7 @@ impl ClassUnicode {
     /// `unicode-case` feature is not enabled.
     pub fn try_case_fold_simple(
         &mut self,
-    ) -> result::Result<(), CaseFoldError> {
+    ) -> core::result::Result<(), CaseFoldError> {
         self.set.case_fold_simple()
     }
 
@@ -946,9 +1125,60 @@ impl ClassUnicode {
     /// Returns true if and only if this character class will either match
     /// nothing or only ASCII bytes. Stated differently, this returns false
     /// if and only if this class contains a non-ASCII codepoint.
-    pub fn is_all_ascii(&self) -> bool {
+    pub fn is_ascii(&self) -> bool {
         self.set.intervals().last().map_or(true, |r| r.end <= '\x7F')
     }
+
+    /// Returns the length, in bytes, of the smallest string matched by this
+    /// character class.
+    ///
+    /// Returns `None` when the class is empty.
+    pub fn minimum_len(&self) -> Option<usize> {
+        let first = self.ranges().get(0)?;
+        // Correct because c1 < c2 implies c1.len_utf8() < c2.len_utf8().
+        Some(first.start.len_utf8())
+    }
+
+    /// Returns the length, in bytes, of the longest string matched by this
+    /// character class.
+    ///
+    /// Returns `None` when the class is empty.
+    pub fn maximum_len(&self) -> Option<usize> {
+        let last = self.ranges().last()?;
+        // Correct because c1 < c2 implies c1.len_utf8() < c2.len_utf8().
+        Some(last.end.len_utf8())
+    }
+
+    /// If this class consists of exactly one codepoint, then return it as
+    /// a literal byte string.
+    ///
+    /// If this class is empty or contains more than one codepoint, then `None`
+    /// is returned.
+    pub fn literal(&self) -> Option<Vec<u8>> {
+        let rs = self.ranges();
+        if rs.len() == 1 && rs[0].start == rs[0].end {
+            Some(rs[0].start.encode_utf8(&mut [0; 4]).to_string().into_bytes())
+        } else {
+            None
+        }
+    }
+
+    /// If this class consists of only ASCII ranges, then return its
+    /// corresponding and equivalent byte class.
+    pub fn to_byte_class(&self) -> Option<ClassBytes> {
+        if !self.is_ascii() {
+            return None;
+        }
+        Some(ClassBytes::new(self.ranges().iter().map(|r| {
+            // Since we are guaranteed that our codepoint range is ASCII, the
+            // 'u8::try_from' calls below are guaranteed to be correct.
+            ClassBytesRange {
+                // MSRV(1.59): Use 'u8::try_from(c)' instead.
+                start: u8::try_from(u32::from(r.start)).unwrap(),
+                end: u8::try_from(u32::from(r.end)).unwrap(),
+            }
+        })))
+    }
 }
 
 /// An iterator over all ranges in a Unicode character class.
@@ -975,18 +1205,18 @@ pub struct ClassUnicodeRange {
     end: char,
 }
 
-impl fmt::Debug for ClassUnicodeRange {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+impl core::fmt::Debug for ClassUnicodeRange {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
         let start = if !self.start.is_whitespace() && !self.start.is_control()
         {
             self.start.to_string()
         } else {
-            format!("0x{:X}", self.start as u32)
+            format!("0x{:X}", u32::from(self.start))
         };
         let end = if !self.end.is_whitespace() && !self.end.is_control() {
             self.end.to_string()
         } else {
-            format!("0x{:X}", self.end as u32)
+            format!("0x{:X}", u32::from(self.end))
         };
         f.debug_struct("ClassUnicodeRange")
             .field("start", &start)
@@ -1023,24 +1253,13 @@ impl Interval for ClassUnicodeRange {
         &self,
         ranges: &mut Vec<ClassUnicodeRange>,
     ) -> Result<(), unicode::CaseFoldError> {
-        if !unicode::contains_simple_case_mapping(self.start, self.end)? {
+        let mut folder = unicode::SimpleCaseFolder::new()?;
+        if !folder.overlaps(self.start, self.end) {
             return Ok(());
         }
-        let start = self.start as u32;
-        let end = (self.end as u32).saturating_add(1);
-        let mut next_simple_cp = None;
-        for cp in (start..end).filter_map(char::from_u32) {
-            if next_simple_cp.map_or(false, |next| cp < next) {
-                continue;
-            }
-            let it = match unicode::simple_fold(cp)? {
-                Ok(it) => it,
-                Err(next) => {
-                    next_simple_cp = next;
-                    continue;
-                }
-            };
-            for cp_folded in it {
+        let (start, end) = (u32::from(self.start), u32::from(self.end));
+        for cp in (start..=end).filter_map(char::from_u32) {
+            for &cp_folded in folder.mapping(cp) {
                 ranges.push(ClassUnicodeRange::new(cp_folded, cp_folded));
             }
         }
@@ -1072,6 +1291,18 @@ impl ClassUnicodeRange {
     pub fn end(&self) -> char {
         self.end
     }
+
+    /// Returns the number of codepoints in this range.
+    pub fn len(&self) -> usize {
+        let diff = 1 + u32::from(self.end) - u32::from(self.start);
+        // This is likely to panic in 16-bit targets since a usize can only fit
+        // 2^16. It's not clear what to do here, other than to return an error
+        // when building a Unicode class that contains a range whose length
+        // overflows usize. (Which, to be honest, is probably quite common on
+        // 16-bit targets. For example, this would imply that '.' and '\p{any}'
+        // would be impossible to build.)
+        usize::try_from(diff).expect("char class len fits in usize")
+    }
 }
 
 /// A set of characters represented by arbitrary bytes (where one byte
@@ -1085,7 +1316,8 @@ impl ClassBytes {
     /// Create a new class from a sequence of ranges.
     ///
     /// The given ranges do not need to be in any specific order, and ranges
-    /// may overlap.
+    /// may overlap. Ranges will automatically be sorted into a canonical
+    /// non-overlapping order.
     pub fn new<I>(ranges: I) -> ClassBytes
     where
         I: IntoIterator<Item = ClassBytesRange>,
@@ -1094,6 +1326,9 @@ impl ClassBytes {
     }
 
     /// Create a new class with no ranges.
+    ///
+    /// An empty class matches nothing. That is, it is equivalent to
+    /// [`Hir::fail`].
     pub fn empty() -> ClassBytes {
         ClassBytes::new(vec![])
     }
@@ -1115,410 +1350,1535 @@ impl ClassBytes {
         self.set.intervals()
     }
 
-    /// Expand this character class such that it contains all case folded
-    /// characters. For example, if this class consists of the range `a-z`,
-    /// then applying case folding will result in the class containing both the
-    /// ranges `a-z` and `A-Z`.
-    ///
-    /// Note that this only applies ASCII case folding, which is limited to the
-    /// characters `a-z` and `A-Z`.
-    pub fn case_fold_simple(&mut self) {
-        self.set.case_fold_simple().expect("ASCII case folding never fails");
+    /// Expand this character class such that it contains all case folded
+    /// characters. For example, if this class consists of the range `a-z`,
+    /// then applying case folding will result in the class containing both the
+    /// ranges `a-z` and `A-Z`.
+    ///
+    /// Note that this only applies ASCII case folding, which is limited to the
+    /// characters `a-z` and `A-Z`.
+    pub fn case_fold_simple(&mut self) {
+        self.set.case_fold_simple().expect("ASCII case folding never fails");
+    }
+
+    /// Negate this byte class.
+    ///
+    /// For all `b` where `b` is a any byte, if `b` was in this set, then it
+    /// will not be in this set after negation.
+    pub fn negate(&mut self) {
+        self.set.negate();
+    }
+
+    /// Union this byte class with the given byte class, in place.
+    pub fn union(&mut self, other: &ClassBytes) {
+        self.set.union(&other.set);
+    }
+
+    /// Intersect this byte class with the given byte class, in place.
+    pub fn intersect(&mut self, other: &ClassBytes) {
+        self.set.intersect(&other.set);
+    }
+
+    /// Subtract the given byte class from this byte class, in place.
+    pub fn difference(&mut self, other: &ClassBytes) {
+        self.set.difference(&other.set);
+    }
+
+    /// Compute the symmetric difference of the given byte classes, in place.
+    ///
+    /// This computes the symmetric difference of two byte classes. This
+    /// removes all elements in this class that are also in the given class,
+    /// but all adds all elements from the given class that aren't in this
+    /// class. That is, the class will contain all elements in either class,
+    /// but will not contain any elements that are in both classes.
+    pub fn symmetric_difference(&mut self, other: &ClassBytes) {
+        self.set.symmetric_difference(&other.set);
+    }
+
+    /// Returns true if and only if this character class will either match
+    /// nothing or only ASCII bytes. Stated differently, this returns false
+    /// if and only if this class contains a non-ASCII byte.
+    pub fn is_ascii(&self) -> bool {
+        self.set.intervals().last().map_or(true, |r| r.end <= 0x7F)
+    }
+
+    /// Returns the length, in bytes, of the smallest string matched by this
+    /// character class.
+    ///
+    /// Returns `None` when the class is empty.
+    pub fn minimum_len(&self) -> Option<usize> {
+        if self.ranges().is_empty() {
+            None
+        } else {
+            Some(1)
+        }
+    }
+
+    /// Returns the length, in bytes, of the longest string matched by this
+    /// character class.
+    ///
+    /// Returns `None` when the class is empty.
+    pub fn maximum_len(&self) -> Option<usize> {
+        if self.ranges().is_empty() {
+            None
+        } else {
+            Some(1)
+        }
+    }
+
+    /// If this class consists of exactly one byte, then return it as
+    /// a literal byte string.
+    ///
+    /// If this class is empty or contains more than one byte, then `None`
+    /// is returned.
+    pub fn literal(&self) -> Option<Vec<u8>> {
+        let rs = self.ranges();
+        if rs.len() == 1 && rs[0].start == rs[0].end {
+            Some(vec![rs[0].start])
+        } else {
+            None
+        }
+    }
+
+    /// If this class consists of only ASCII ranges, then return its
+    /// corresponding and equivalent Unicode class.
+    pub fn to_unicode_class(&self) -> Option<ClassUnicode> {
+        if !self.is_ascii() {
+            return None;
+        }
+        Some(ClassUnicode::new(self.ranges().iter().map(|r| {
+            // Since we are guaranteed that our byte range is ASCII, the
+            // 'char::from' calls below are correct and will not erroneously
+            // convert a raw byte value into its corresponding codepoint.
+            ClassUnicodeRange {
+                start: char::from(r.start),
+                end: char::from(r.end),
+            }
+        })))
+    }
+}
+
+/// An iterator over all ranges in a byte character class.
+///
+/// The lifetime `'a` refers to the lifetime of the underlying class.
+#[derive(Debug)]
+pub struct ClassBytesIter<'a>(IntervalSetIter<'a, ClassBytesRange>);
+
+impl<'a> Iterator for ClassBytesIter<'a> {
+    type Item = &'a ClassBytesRange;
+
+    fn next(&mut self) -> Option<&'a ClassBytesRange> {
+        self.0.next()
+    }
+}
+
+/// A single range of characters represented by arbitrary bytes.
+///
+/// The range is closed. That is, the start and end of the range are included
+/// in the range.
+#[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)]
+pub struct ClassBytesRange {
+    start: u8,
+    end: u8,
+}
+
+impl Interval for ClassBytesRange {
+    type Bound = u8;
+
+    #[inline]
+    fn lower(&self) -> u8 {
+        self.start
+    }
+    #[inline]
+    fn upper(&self) -> u8 {
+        self.end
+    }
+    #[inline]
+    fn set_lower(&mut self, bound: u8) {
+        self.start = bound;
+    }
+    #[inline]
+    fn set_upper(&mut self, bound: u8) {
+        self.end = bound;
+    }
+
+    /// Apply simple case folding to this byte range. Only ASCII case mappings
+    /// (for a-z) are applied.
+    ///
+    /// Additional ranges are appended to the given vector. Canonical ordering
+    /// is *not* maintained in the given vector.
+    fn case_fold_simple(
+        &self,
+        ranges: &mut Vec<ClassBytesRange>,
+    ) -> Result<(), unicode::CaseFoldError> {
+        if !ClassBytesRange::new(b'a', b'z').is_intersection_empty(self) {
+            let lower = cmp::max(self.start, b'a');
+            let upper = cmp::min(self.end, b'z');
+            ranges.push(ClassBytesRange::new(lower - 32, upper - 32));
+        }
+        if !ClassBytesRange::new(b'A', b'Z').is_intersection_empty(self) {
+            let lower = cmp::max(self.start, b'A');
+            let upper = cmp::min(self.end, b'Z');
+            ranges.push(ClassBytesRange::new(lower + 32, upper + 32));
+        }
+        Ok(())
+    }
+}
+
+impl ClassBytesRange {
+    /// Create a new byte range for a character class.
+    ///
+    /// The returned range is always in a canonical form. That is, the range
+    /// returned always satisfies the invariant that `start <= end`.
+    pub fn new(start: u8, end: u8) -> ClassBytesRange {
+        ClassBytesRange::create(start, end)
+    }
+
+    /// Return the start of this range.
+    ///
+    /// The start of a range is always less than or equal to the end of the
+    /// range.
+    pub fn start(&self) -> u8 {
+        self.start
+    }
+
+    /// Return the end of this range.
+    ///
+    /// The end of a range is always greater than or equal to the start of the
+    /// range.
+    pub fn end(&self) -> u8 {
+        self.end
+    }
+
+    /// Returns the number of bytes in this range.
+    pub fn len(&self) -> usize {
+        usize::from(self.end.checked_sub(self.start).unwrap())
+            .checked_add(1)
+            .unwrap()
+    }
+}
+
+impl core::fmt::Debug for ClassBytesRange {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        f.debug_struct("ClassBytesRange")
+            .field("start", &crate::debug::Byte(self.start))
+            .field("end", &crate::debug::Byte(self.end))
+            .finish()
+    }
+}
+
+/// The high-level intermediate representation for a look-around assertion.
+///
+/// An assertion match is always zero-length. Also called an "empty match."
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum Look {
+    /// Match the beginning of text. Specifically, this matches at the starting
+    /// position of the input.
+    Start = 1 << 0,
+    /// Match the end of text. Specifically, this matches at the ending
+    /// position of the input.
+    End = 1 << 1,
+    /// Match the beginning of a line or the beginning of text. Specifically,
+    /// this matches at the starting position of the input, or at the position
+    /// immediately following a `\n` character.
+    StartLF = 1 << 2,
+    /// Match the end of a line or the end of text. Specifically, this matches
+    /// at the end position of the input, or at the position immediately
+    /// preceding a `\n` character.
+    EndLF = 1 << 3,
+    /// Match the beginning of a line or the beginning of text. Specifically,
+    /// this matches at the starting position of the input, or at the position
+    /// immediately following either a `\r` or `\n` character, but never after
+    /// a `\r` when a `\n` follows.
+    StartCRLF = 1 << 4,
+    /// Match the end of a line or the end of text. Specifically, this matches
+    /// at the end position of the input, or at the position immediately
+    /// preceding a `\r` or `\n` character, but never before a `\n` when a `\r`
+    /// precedes it.
+    EndCRLF = 1 << 5,
+    /// Match an ASCII-only word boundary. That is, this matches a position
+    /// where the left adjacent character and right adjacent character
+    /// correspond to a word and non-word or a non-word and word character.
+    WordAscii = 1 << 6,
+    /// Match an ASCII-only negation of a word boundary.
+    WordAsciiNegate = 1 << 7,
+    /// Match a Unicode-aware word boundary. That is, this matches a position
+    /// where the left adjacent character and right adjacent character
+    /// correspond to a word and non-word or a non-word and word character.
+    WordUnicode = 1 << 8,
+    /// Match a Unicode-aware negation of a word boundary.
+    WordUnicodeNegate = 1 << 9,
+}
+
+impl Look {
+    /// Flip the look-around assertion to its equivalent for reverse searches.
+    /// For example, `StartLF` gets translated to `EndLF`.
+    ///
+    /// Some assertions, such as `WordUnicode`, remain the same since they
+    /// match the same positions regardless of the direction of the search.
+    #[inline]
+    pub const fn reversed(self) -> Look {
+        match self {
+            Look::Start => Look::End,
+            Look::End => Look::Start,
+            Look::StartLF => Look::EndLF,
+            Look::EndLF => Look::StartLF,
+            Look::StartCRLF => Look::EndCRLF,
+            Look::EndCRLF => Look::StartCRLF,
+            Look::WordAscii => Look::WordAscii,
+            Look::WordAsciiNegate => Look::WordAsciiNegate,
+            Look::WordUnicode => Look::WordUnicode,
+            Look::WordUnicodeNegate => Look::WordUnicodeNegate,
+        }
+    }
+
+    /// Return the underlying representation of this look-around enumeration
+    /// as an integer. Giving the return value to the [`Look::from_repr`]
+    /// constructor is guaranteed to return the same look-around variant that
+    /// one started with within a semver compatible release of this crate.
+    #[inline]
+    pub const fn as_repr(self) -> u16 {
+        // AFAIK, 'as' is the only way to zero-cost convert an int enum to an
+        // actual int.
+        self as u16
+    }
+
+    /// Given the underlying representation of a `Look` value, return the
+    /// corresponding `Look` value if the representation is valid. Otherwise
+    /// `None` is returned.
+    #[inline]
+    pub const fn from_repr(repr: u16) -> Option<Look> {
+        match repr {
+            0b00_0000_0001 => Some(Look::Start),
+            0b00_0000_0010 => Some(Look::End),
+            0b00_0000_0100 => Some(Look::StartLF),
+            0b00_0000_1000 => Some(Look::EndLF),
+            0b00_0001_0000 => Some(Look::StartCRLF),
+            0b00_0010_0000 => Some(Look::EndCRLF),
+            0b00_0100_0000 => Some(Look::WordAscii),
+            0b00_1000_0000 => Some(Look::WordAsciiNegate),
+            0b01_0000_0000 => Some(Look::WordUnicode),
+            0b10_0000_0000 => Some(Look::WordUnicodeNegate),
+            _ => None,
+        }
+    }
+
+    /// Returns a convenient single codepoint representation of this
+    /// look-around assertion. Each assertion is guaranteed to be represented
+    /// by a distinct character.
+    ///
+    /// This is useful for succinctly representing a look-around assertion in
+    /// human friendly but succinct output intended for a programmer working on
+    /// regex internals.
+    #[inline]
+    pub const fn as_char(self) -> char {
+        match self {
+            Look::Start => 'A',
+            Look::End => 'z',
+            Look::StartLF => '^',
+            Look::EndLF => '$',
+            Look::StartCRLF => 'r',
+            Look::EndCRLF => 'R',
+            Look::WordAscii => 'b',
+            Look::WordAsciiNegate => 'B',
+            Look::WordUnicode => '𝛃',
+            Look::WordUnicodeNegate => '𝚩',
+        }
+    }
+}
+
+/// The high-level intermediate representation for a capturing group.
+///
+/// A capturing group always has an index and a child expression. It may
+/// also have a name associated with it (e.g., `(?P<foo>\w)`), but it's not
+/// necessary.
+///
+/// Note that there is no explicit representation of a non-capturing group
+/// in a `Hir`. Instead, non-capturing grouping is handled automatically by
+/// the recursive structure of the `Hir` itself.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Capture {
+    /// The capture index of the capture.
+    pub index: u32,
+    /// The name of the capture, if it exists.
+    pub name: Option<Box<str>>,
+    /// The expression inside the capturing group, which may be empty.
+    pub sub: Box<Hir>,
+}
+
+/// The high-level intermediate representation of a repetition operator.
+///
+/// A repetition operator permits the repetition of an arbitrary
+/// sub-expression.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Repetition {
+    /// The minimum range of the repetition.
+    ///
+    /// Note that special cases like `?`, `+` and `*` all get translated into
+    /// the ranges `{0,1}`, `{1,}` and `{0,}`, respectively.
+    ///
+    /// When `min` is zero, this expression can match the empty string
+    /// regardless of what its sub-expression is.
+    pub min: u32,
+    /// The maximum range of the repetition.
+    ///
+    /// Note that when `max` is `None`, `min` acts as a lower bound but where
+    /// there is no upper bound. For something like `x{5}` where the min and
+    /// max are equivalent, `min` will be set to `5` and `max` will be set to
+    /// `Some(5)`.
+    pub max: Option<u32>,
+    /// Whether this repetition operator is greedy or not. A greedy operator
+    /// will match as much as it can. A non-greedy operator will match as
+    /// little as it can.
+    ///
+    /// Typically, operators are greedy by default and are only non-greedy when
+    /// a `?` suffix is used, e.g., `(expr)*` is greedy while `(expr)*?` is
+    /// not. However, this can be inverted via the `U` "ungreedy" flag.
+    pub greedy: bool,
+    /// The expression being repeated.
+    pub sub: Box<Hir>,
+}
+
+impl Repetition {
+    /// Returns a new repetition with the same `min`, `max` and `greedy`
+    /// values, but with its sub-expression replaced with the one given.
+    pub fn with(&self, sub: Hir) -> Repetition {
+        Repetition {
+            min: self.min,
+            max: self.max,
+            greedy: self.greedy,
+            sub: Box::new(sub),
+        }
+    }
+}
+
+/// A type describing the different flavors of `.`.
+///
+/// This type is meant to be used with [`Hir::dot`], which is a convenience
+/// routine for building HIR values derived from the `.` regex.
+#[non_exhaustive]
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum Dot {
+    /// Matches the UTF-8 encoding of any Unicode scalar value.
+    ///
+    /// This is equivalent to `(?su:.)` and also `\p{any}`.
+    AnyChar,
+    /// Matches any byte value.
+    ///
+    /// This is equivalent to `(?s-u:.)` and also `(?-u:[\x00-\xFF])`.
+    AnyByte,
+    /// Matches the UTF-8 encoding of any Unicode scalar value except for `\n`.
+    ///
+    /// This is equivalent to `(?u-s:.)` and also `[\p{any}--\n]`.
+    AnyCharExceptLF,
+    /// Matches the UTF-8 encoding of any Unicode scalar value except for `\r`
+    /// and `\n`.
+    ///
+    /// This is equivalent to `(?uR-s:.)` and also `[\p{any}--\r\n]`.
+    AnyCharExceptCRLF,
+    /// Matches any byte value except for `\n`.
+    ///
+    /// This is equivalent to `(?-su:.)` and also `(?-u:[[\x00-\xFF]--\n])`.
+    AnyByteExceptLF,
+    /// Matches any byte value except for `\r` and `\n`.
+    ///
+    /// This is equivalent to `(?R-su:.)` and also `(?-u:[[\x00-\xFF]--\r\n])`.
+    AnyByteExceptCRLF,
+}
+
+/// A custom `Drop` impl is used for `HirKind` such that it uses constant stack
+/// space but heap space proportional to the depth of the total `Hir`.
+impl Drop for Hir {
+    fn drop(&mut self) {
+        use core::mem;
+
+        match *self.kind() {
+            HirKind::Empty
+            | HirKind::Literal(_)
+            | HirKind::Class(_)
+            | HirKind::Look(_) => return,
+            HirKind::Capture(ref x) if x.sub.kind.subs().is_empty() => return,
+            HirKind::Repetition(ref x) if x.sub.kind.subs().is_empty() => {
+                return
+            }
+            HirKind::Concat(ref x) if x.is_empty() => return,
+            HirKind::Alternation(ref x) if x.is_empty() => return,
+            _ => {}
+        }
+
+        let mut stack = vec![mem::replace(self, Hir::empty())];
+        while let Some(mut expr) = stack.pop() {
+            match expr.kind {
+                HirKind::Empty
+                | HirKind::Literal(_)
+                | HirKind::Class(_)
+                | HirKind::Look(_) => {}
+                HirKind::Capture(ref mut x) => {
+                    stack.push(mem::replace(&mut x.sub, Hir::empty()));
+                }
+                HirKind::Repetition(ref mut x) => {
+                    stack.push(mem::replace(&mut x.sub, Hir::empty()));
+                }
+                HirKind::Concat(ref mut x) => {
+                    stack.extend(x.drain(..));
+                }
+                HirKind::Alternation(ref mut x) => {
+                    stack.extend(x.drain(..));
+                }
+            }
+        }
+    }
+}
+
+/// A type that collects various properties of an HIR value.
+///
+/// Properties are always scalar values and represent meta data that is
+/// computed inductively on an HIR value. Properties are defined for all
+/// HIR values.
+///
+/// All methods on a `Properties` value take constant time and are meant to
+/// be cheap to call.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Properties(Box<PropertiesI>);
+
+/// The property definition. It is split out so that we can box it, and
+/// there by make `Properties` use less stack size. This is kind-of important
+/// because every HIR value has a `Properties` attached to it.
+///
+/// This does have the unfortunate consequence that creating any HIR value
+/// always leads to at least one alloc for properties, but this is generally
+/// true anyway (for pretty much all HirKinds except for look-arounds).
+#[derive(Clone, Debug, Eq, PartialEq)]
+struct PropertiesI {
+    minimum_len: Option<usize>,
+    maximum_len: Option<usize>,
+    look_set: LookSet,
+    look_set_prefix: LookSet,
+    look_set_suffix: LookSet,
+    utf8: bool,
+    explicit_captures_len: usize,
+    static_explicit_captures_len: Option<usize>,
+    literal: bool,
+    alternation_literal: bool,
+}
+
+impl Properties {
+    /// Returns the length (in bytes) of the smallest string matched by this
+    /// HIR.
+    ///
+    /// A return value of `0` is possible and occurs when the HIR can match an
+    /// empty string.
+    ///
+    /// `None` is returned when there is no minimum length. This occurs in
+    /// precisely the cases where the HIR matches nothing. i.e., The language
+    /// the regex matches is empty. An example of such a regex is `\P{any}`.
+    #[inline]
+    pub fn minimum_len(&self) -> Option<usize> {
+        self.0.minimum_len
+    }
+
+    /// Returns the length (in bytes) of the longest string matched by this
+    /// HIR.
+    ///
+    /// A return value of `0` is possible and occurs when nothing longer than
+    /// the empty string is in the language described by this HIR.
+    ///
+    /// `None` is returned when there is no longest matching string. This
+    /// occurs when the HIR matches nothing or when there is no upper bound on
+    /// the length of matching strings. Example of such regexes are `\P{any}`
+    /// (matches nothing) and `a+` (has no upper bound).
+    #[inline]
+    pub fn maximum_len(&self) -> Option<usize> {
+        self.0.maximum_len
+    }
+
+    /// Returns a set of all look-around assertions that appear at least once
+    /// in this HIR value.
+    #[inline]
+    pub fn look_set(&self) -> LookSet {
+        self.0.look_set
+    }
+
+    /// Returns a set of all look-around assertions that appear as a prefix for
+    /// this HIR value. That is, the set returned corresponds to the set of
+    /// assertions that must be passed before matching any bytes in a haystack.
+    ///
+    /// For example, `hir.look_set_prefix().contains(Look::Start)` returns true
+    /// if and only if the HIR is fully anchored at the start.
+    #[inline]
+    pub fn look_set_prefix(&self) -> LookSet {
+        self.0.look_set_prefix
+    }
+
+    /// Returns a set of all look-around assertions that appear as a suffix for
+    /// this HIR value. That is, the set returned corresponds to the set of
+    /// assertions that must be passed in order to be considered a match after
+    /// all other consuming HIR expressions.
+    ///
+    /// For example, `hir.look_set_suffix().contains(Look::End)` returns true
+    /// if and only if the HIR is fully anchored at the end.
+    #[inline]
+    pub fn look_set_suffix(&self) -> LookSet {
+        self.0.look_set_suffix
+    }
+
+    /// Return true if and only if the corresponding HIR will always match
+    /// valid UTF-8.
+    ///
+    /// When this returns false, then it is possible for this HIR expression to
+    /// match invalid UTF-8, including by matching between the code units of
+    /// a single UTF-8 encoded codepoint.
+    ///
+    /// Note that this returns true even when the corresponding HIR can match
+    /// the empty string. Since an empty string can technically appear between
+    /// UTF-8 code units, it is possible for a match to be reported that splits
+    /// a codepoint which could in turn be considered matching invalid UTF-8.
+    /// However, it is generally assumed that such empty matches are handled
+    /// specially by the search routine if it is absolutely required that
+    /// matches not split a codepoint.
+    ///
+    /// # Example
+    ///
+    /// This code example shows the UTF-8 property of a variety of patterns.
+    ///
+    /// ```
+    /// use regex_syntax::{ParserBuilder, parse};
+    ///
+    /// // Examples of 'is_utf8() == true'.
+    /// assert!(parse(r"a")?.properties().is_utf8());
+    /// assert!(parse(r"[^a]")?.properties().is_utf8());
+    /// assert!(parse(r".")?.properties().is_utf8());
+    /// assert!(parse(r"\W")?.properties().is_utf8());
+    /// assert!(parse(r"\b")?.properties().is_utf8());
+    /// assert!(parse(r"\B")?.properties().is_utf8());
+    /// assert!(parse(r"(?-u)\b")?.properties().is_utf8());
+    /// assert!(parse(r"(?-u)\B")?.properties().is_utf8());
+    /// // Unicode mode is enabled by default, and in
+    /// // that mode, all \x hex escapes are treated as
+    /// // codepoints. So this actually matches the UTF-8
+    /// // encoding of U+00FF.
+    /// assert!(parse(r"\xFF")?.properties().is_utf8());
+    ///
+    /// // Now we show examples of 'is_utf8() == false'.
+    /// // The only way to do this is to force the parser
+    /// // to permit invalid UTF-8, otherwise all of these
+    /// // would fail to parse!
+    /// let parse = |pattern| {
+    ///     ParserBuilder::new().utf8(false).build().parse(pattern)
+    /// };
+    /// assert!(!parse(r"(?-u)[^a]")?.properties().is_utf8());
+    /// assert!(!parse(r"(?-u).")?.properties().is_utf8());
+    /// assert!(!parse(r"(?-u)\W")?.properties().is_utf8());
+    /// // Conversely to the equivalent example above,
+    /// // when Unicode mode is disabled, \x hex escapes
+    /// // are treated as their raw byte values.
+    /// assert!(!parse(r"(?-u)\xFF")?.properties().is_utf8());
+    /// // Note that just because we disabled UTF-8 in the
+    /// // parser doesn't mean we still can't use Unicode.
+    /// // It is enabled by default, so \xFF is still
+    /// // equivalent to matching the UTF-8 encoding of
+    /// // U+00FF by default.
+    /// assert!(parse(r"\xFF")?.properties().is_utf8());
+    /// // Even though we use raw bytes that individually
+    /// // are not valid UTF-8, when combined together, the
+    /// // overall expression *does* match valid UTF-8!
+    /// assert!(parse(r"(?-u)\xE2\x98\x83")?.properties().is_utf8());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn is_utf8(&self) -> bool {
+        self.0.utf8
+    }
+
+    /// Returns the total number of explicit capturing groups in the
+    /// corresponding HIR.
+    ///
+    /// Note that this does not include the implicit capturing group
+    /// corresponding to the entire match that is typically included by regex
+    /// engines.
+    ///
+    /// # Example
+    ///
+    /// This method will return `0` for `a` and `1` for `(a)`:
+    ///
+    /// ```
+    /// use regex_syntax::parse;
+    ///
+    /// assert_eq!(0, parse("a")?.properties().explicit_captures_len());
+    /// assert_eq!(1, parse("(a)")?.properties().explicit_captures_len());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn explicit_captures_len(&self) -> usize {
+        self.0.explicit_captures_len
+    }
+
+    /// Returns the total number of explicit capturing groups that appear in
+    /// every possible match.
+    ///
+    /// If the number of capture groups can vary depending on the match, then
+    /// this returns `None`. That is, a value is only returned when the number
+    /// of matching groups is invariant or "static."
+    ///
+    /// Note that this does not include the implicit capturing group
+    /// corresponding to the entire match.
+    ///
+    /// # Example
+    ///
+    /// This shows a few cases where a static number of capture groups is
+    /// available and a few cases where it is not.
+    ///
+    /// ```
+    /// use regex_syntax::parse;
+    ///
+    /// let len = |pattern| {
+    ///     parse(pattern).map(|h| {
+    ///         h.properties().static_explicit_captures_len()
+    ///     })
+    /// };
+    ///
+    /// assert_eq!(Some(0), len("a")?);
+    /// assert_eq!(Some(1), len("(a)")?);
+    /// assert_eq!(Some(1), len("(a)|(b)")?);
+    /// assert_eq!(Some(2), len("(a)(b)|(c)(d)")?);
+    /// assert_eq!(None, len("(a)|b")?);
+    /// assert_eq!(None, len("a|(b)")?);
+    /// assert_eq!(None, len("(b)*")?);
+    /// assert_eq!(Some(1), len("(b)+")?);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn static_explicit_captures_len(&self) -> Option<usize> {
+        self.0.static_explicit_captures_len
+    }
+
+    /// Return true if and only if this HIR is a simple literal. This is
+    /// only true when this HIR expression is either itself a `Literal` or a
+    /// concatenation of only `Literal`s.
+    ///
+    /// For example, `f` and `foo` are literals, but `f+`, `(foo)`, `foo()` and
+    /// the empty string are not (even though they contain sub-expressions that
+    /// are literals).
+    #[inline]
+    pub fn is_literal(&self) -> bool {
+        self.0.literal
+    }
+
+    /// Return true if and only if this HIR is either a simple literal or an
+    /// alternation of simple literals. This is only
+    /// true when this HIR expression is either itself a `Literal` or a
+    /// concatenation of only `Literal`s or an alternation of only `Literal`s.
+    ///
+    /// For example, `f`, `foo`, `a|b|c`, and `foo|bar|baz` are alternation
+    /// literals, but `f+`, `(foo)`, `foo()`, ``
+    /// are not (even though that contain sub-expressions that are literals).
+    #[inline]
+    pub fn is_alternation_literal(&self) -> bool {
+        self.0.alternation_literal
+    }
+
+    /// Returns the total amount of heap memory usage, in bytes, used by this
+    /// `Properties` value.
+    #[inline]
+    pub fn memory_usage(&self) -> usize {
+        core::mem::size_of::<PropertiesI>()
+    }
+
+    /// Returns a new set of properties that corresponds to the union of the
+    /// iterator of properties given.
+    ///
+    /// This is useful when one has multiple `Hir` expressions and wants
+    /// to combine them into a single alternation without constructing the
+    /// corresponding `Hir`. This routine provides a way of combining the
+    /// properties of each `Hir` expression into one set of properties
+    /// representing the union of those expressions.
+    ///
+    /// # Example: union with HIRs that never match
+    ///
+    /// This example shows that unioning properties together with one that
+    /// represents a regex that never matches will "poison" certain attributes,
+    /// like the minimum and maximum lengths.
+    ///
+    /// ```
+    /// use regex_syntax::{hir::Properties, parse};
+    ///
+    /// let hir1 = parse("ab?c?")?;
+    /// assert_eq!(Some(1), hir1.properties().minimum_len());
+    /// assert_eq!(Some(3), hir1.properties().maximum_len());
+    ///
+    /// let hir2 = parse(r"[a&&b]")?;
+    /// assert_eq!(None, hir2.properties().minimum_len());
+    /// assert_eq!(None, hir2.properties().maximum_len());
+    ///
+    /// let hir3 = parse(r"wxy?z?")?;
+    /// assert_eq!(Some(2), hir3.properties().minimum_len());
+    /// assert_eq!(Some(4), hir3.properties().maximum_len());
+    ///
+    /// let unioned = Properties::union([
+    ///		hir1.properties(),
+    ///		hir2.properties(),
+    ///		hir3.properties(),
+    ///	]);
+    /// assert_eq!(None, unioned.minimum_len());
+    /// assert_eq!(None, unioned.maximum_len());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// The maximum length can also be "poisoned" by a pattern that has no
+    /// upper bound on the length of a match. The minimum length remains
+    /// unaffected:
+    ///
+    /// ```
+    /// use regex_syntax::{hir::Properties, parse};
+    ///
+    /// let hir1 = parse("ab?c?")?;
+    /// assert_eq!(Some(1), hir1.properties().minimum_len());
+    /// assert_eq!(Some(3), hir1.properties().maximum_len());
+    ///
+    /// let hir2 = parse(r"a+")?;
+    /// assert_eq!(Some(1), hir2.properties().minimum_len());
+    /// assert_eq!(None, hir2.properties().maximum_len());
+    ///
+    /// let hir3 = parse(r"wxy?z?")?;
+    /// assert_eq!(Some(2), hir3.properties().minimum_len());
+    /// assert_eq!(Some(4), hir3.properties().maximum_len());
+    ///
+    /// let unioned = Properties::union([
+    ///		hir1.properties(),
+    ///		hir2.properties(),
+    ///		hir3.properties(),
+    ///	]);
+    /// assert_eq!(Some(1), unioned.minimum_len());
+    /// assert_eq!(None, unioned.maximum_len());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn union<I, P>(props: I) -> Properties
+    where
+        I: IntoIterator<Item = P>,
+        P: core::borrow::Borrow<Properties>,
+    {
+        let mut it = props.into_iter().peekable();
+        // While empty alternations aren't possible, we still behave as if they
+        // are. When we have an empty alternate, then clearly the look-around
+        // prefix and suffix is empty. Otherwise, it is the intersection of all
+        // prefixes and suffixes (respectively) of the branches.
+        let fix = if it.peek().is_none() {
+            LookSet::empty()
+        } else {
+            LookSet::full()
+        };
+        // And also, an empty alternate means we have 0 static capture groups,
+        // but we otherwise start with the number corresponding to the first
+        // alternate. If any subsequent alternate has a different number of
+        // static capture groups, then we overall have a variation and not a
+        // static number of groups.
+        let static_explicit_captures_len =
+            it.peek().and_then(|p| p.borrow().static_explicit_captures_len());
+        // The base case is an empty alternation, which matches nothing.
+        // Note though that empty alternations aren't possible, because the
+        // Hir::alternation smart constructor rewrites those as empty character
+        // classes.
+        let mut props = PropertiesI {
+            minimum_len: None,
+            maximum_len: None,
+            look_set: LookSet::empty(),
+            look_set_prefix: fix,
+            look_set_suffix: fix,
+            utf8: true,
+            explicit_captures_len: 0,
+            static_explicit_captures_len,
+            literal: false,
+            alternation_literal: true,
+        };
+        let (mut min_poisoned, mut max_poisoned) = (false, false);
+        // Handle properties that need to visit every child hir.
+        for prop in it {
+            let p = prop.borrow();
+            props.look_set.set_union(p.look_set());
+            props.look_set_prefix.set_intersect(p.look_set_prefix());
+            props.look_set_suffix.set_intersect(p.look_set_suffix());
+            props.utf8 = props.utf8 && p.is_utf8();
+            props.explicit_captures_len = props
+                .explicit_captures_len
+                .saturating_add(p.explicit_captures_len());
+            if props.static_explicit_captures_len
+                != p.static_explicit_captures_len()
+            {
+                props.static_explicit_captures_len = None;
+            }
+            props.alternation_literal =
+                props.alternation_literal && p.is_alternation_literal();
+            if !min_poisoned {
+                if let Some(xmin) = p.minimum_len() {
+                    if props.minimum_len.map_or(true, |pmin| xmin < pmin) {
+                        props.minimum_len = Some(xmin);
+                    }
+                } else {
+                    props.minimum_len = None;
+                    min_poisoned = true;
+                }
+            }
+            if !max_poisoned {
+                if let Some(xmax) = p.maximum_len() {
+                    if props.maximum_len.map_or(true, |pmax| xmax > pmax) {
+                        props.maximum_len = Some(xmax);
+                    }
+                } else {
+                    props.maximum_len = None;
+                    max_poisoned = true;
+                }
+            }
+        }
+        Properties(Box::new(props))
+    }
+}
+
+impl Properties {
+    /// Create a new set of HIR properties for an empty regex.
+    fn empty() -> Properties {
+        let inner = PropertiesI {
+            minimum_len: Some(0),
+            maximum_len: Some(0),
+            look_set: LookSet::empty(),
+            look_set_prefix: LookSet::empty(),
+            look_set_suffix: LookSet::empty(),
+            // It is debatable whether an empty regex always matches at valid
+            // UTF-8 boundaries. Strictly speaking, at a byte oriented view,
+            // it is clearly false. There are, for example, many empty strings
+            // between the bytes encoding a '☃'.
+            //
+            // However, when Unicode mode is enabled, the fundamental atom
+            // of matching is really a codepoint. And in that scenario, an
+            // empty regex is defined to only match at valid UTF-8 boundaries
+            // and to never split a codepoint. It just so happens that this
+            // enforcement is somewhat tricky to do for regexes that match
+            // the empty string inside regex engines themselves. It usually
+            // requires some layer above the regex engine to filter out such
+            // matches.
+            //
+            // In any case, 'true' is really the only coherent option. If it
+            // were false, for example, then 'a*' would also need to be false
+            // since it too can match the empty string.
+            utf8: true,
+            explicit_captures_len: 0,
+            static_explicit_captures_len: Some(0),
+            literal: false,
+            alternation_literal: false,
+        };
+        Properties(Box::new(inner))
+    }
+
+    /// Create a new set of HIR properties for a literal regex.
+    fn literal(lit: &Literal) -> Properties {
+        let inner = PropertiesI {
+            minimum_len: Some(lit.0.len()),
+            maximum_len: Some(lit.0.len()),
+            look_set: LookSet::empty(),
+            look_set_prefix: LookSet::empty(),
+            look_set_suffix: LookSet::empty(),
+            utf8: core::str::from_utf8(&lit.0).is_ok(),
+            explicit_captures_len: 0,
+            static_explicit_captures_len: Some(0),
+            literal: true,
+            alternation_literal: true,
+        };
+        Properties(Box::new(inner))
+    }
+
+    /// Create a new set of HIR properties for a character class.
+    fn class(class: &Class) -> Properties {
+        let inner = PropertiesI {
+            minimum_len: class.minimum_len(),
+            maximum_len: class.maximum_len(),
+            look_set: LookSet::empty(),
+            look_set_prefix: LookSet::empty(),
+            look_set_suffix: LookSet::empty(),
+            utf8: class.is_utf8(),
+            explicit_captures_len: 0,
+            static_explicit_captures_len: Some(0),
+            literal: false,
+            alternation_literal: false,
+        };
+        Properties(Box::new(inner))
+    }
+
+    /// Create a new set of HIR properties for a look-around assertion.
+    fn look(look: Look) -> Properties {
+        let inner = PropertiesI {
+            minimum_len: Some(0),
+            maximum_len: Some(0),
+            look_set: LookSet::singleton(look),
+            look_set_prefix: LookSet::singleton(look),
+            look_set_suffix: LookSet::singleton(look),
+            // This requires a little explanation. Basically, we don't consider
+            // matching an empty string to be equivalent to matching invalid
+            // UTF-8, even though technically matching every empty string will
+            // split the UTF-8 encoding of a single codepoint when treating a
+            // UTF-8 encoded string as a sequence of bytes. Our defense here is
+            // that in such a case, a codepoint should logically be treated as
+            // the fundamental atom for matching, and thus the only valid match
+            // points are between codepoints and not bytes.
+            //
+            // More practically, this is true here because it's also true
+            // for 'Hir::empty()', otherwise something like 'a*' would be
+            // considered to match invalid UTF-8. That in turn makes this
+            // property borderline useless.
+            utf8: true,
+            explicit_captures_len: 0,
+            static_explicit_captures_len: Some(0),
+            literal: false,
+            alternation_literal: false,
+        };
+        Properties(Box::new(inner))
+    }
+
+    /// Create a new set of HIR properties for a repetition.
+    fn repetition(rep: &Repetition) -> Properties {
+        let p = rep.sub.properties();
+        let minimum_len = p.minimum_len().map(|child_min| {
+            let rep_min = usize::try_from(rep.min).unwrap_or(usize::MAX);
+            child_min.saturating_mul(rep_min)
+        });
+        let maximum_len = rep.max.and_then(|rep_max| {
+            let rep_max = usize::try_from(rep_max).ok()?;
+            let child_max = p.maximum_len()?;
+            child_max.checked_mul(rep_max)
+        });
+
+        let mut inner = PropertiesI {
+            minimum_len,
+            maximum_len,
+            look_set: p.look_set(),
+            look_set_prefix: LookSet::empty(),
+            look_set_suffix: LookSet::empty(),
+            utf8: p.is_utf8(),
+            explicit_captures_len: p.explicit_captures_len(),
+            static_explicit_captures_len: p.static_explicit_captures_len(),
+            literal: false,
+            alternation_literal: false,
+        };
+        // The repetition operator can match the empty string, then its lookset
+        // prefix and suffixes themselves remain empty since they are no longer
+        // required to match.
+        if rep.min > 0 {
+            inner.look_set_prefix = p.look_set_prefix();
+            inner.look_set_suffix = p.look_set_suffix();
+        }
+        // If the static captures len of the sub-expression is not known or is
+        // zero, then it automatically propagates to the repetition, regardless
+        // of the repetition. Otherwise, it might change, but only when the
+        // repetition can match 0 times.
+        if rep.min == 0
+            && inner.static_explicit_captures_len.map_or(false, |len| len > 0)
+        {
+            // If we require a match 0 times, then our captures len is
+            // guaranteed to be zero. Otherwise, if we *can* match the empty
+            // string, then it's impossible to know how many captures will be
+            // in the resulting match.
+            if rep.max == Some(0) {
+                inner.static_explicit_captures_len = Some(0);
+            } else {
+                inner.static_explicit_captures_len = None;
+            }
+        }
+        Properties(Box::new(inner))
+    }
+
+    /// Create a new set of HIR properties for a capture.
+    fn capture(capture: &Capture) -> Properties {
+        let p = capture.sub.properties();
+        Properties(Box::new(PropertiesI {
+            explicit_captures_len: p.explicit_captures_len().saturating_add(1),
+            static_explicit_captures_len: p
+                .static_explicit_captures_len()
+                .map(|len| len.saturating_add(1)),
+            literal: false,
+            alternation_literal: false,
+            ..*p.0.clone()
+        }))
+    }
+
+    /// Create a new set of HIR properties for a concatenation.
+    fn concat(concat: &[Hir]) -> Properties {
+        // The base case is an empty concatenation, which matches the empty
+        // string. Note though that empty concatenations aren't possible,
+        // because the Hir::concat smart constructor rewrites those as
+        // Hir::empty.
+        let mut props = PropertiesI {
+            minimum_len: Some(0),
+            maximum_len: Some(0),
+            look_set: LookSet::empty(),
+            look_set_prefix: LookSet::empty(),
+            look_set_suffix: LookSet::empty(),
+            utf8: true,
+            explicit_captures_len: 0,
+            static_explicit_captures_len: Some(0),
+            literal: true,
+            alternation_literal: true,
+        };
+        // Handle properties that need to visit every child hir.
+        for x in concat.iter() {
+            let p = x.properties();
+            props.look_set.set_union(p.look_set());
+            props.utf8 = props.utf8 && p.is_utf8();
+            props.explicit_captures_len = props
+                .explicit_captures_len
+                .saturating_add(p.explicit_captures_len());
+            props.static_explicit_captures_len = p
+                .static_explicit_captures_len()
+                .and_then(|len1| {
+                    Some((len1, props.static_explicit_captures_len?))
+                })
+                .and_then(|(len1, len2)| Some(len1.saturating_add(len2)));
+            props.literal = props.literal && p.is_literal();
+            props.alternation_literal =
+                props.alternation_literal && p.is_alternation_literal();
+            if let Some(ref mut minimum_len) = props.minimum_len {
+                match p.minimum_len() {
+                    None => props.minimum_len = None,
+                    Some(len) => *minimum_len += len,
+                }
+            }
+            if let Some(ref mut maximum_len) = props.maximum_len {
+                match p.maximum_len() {
+                    None => props.maximum_len = None,
+                    Some(len) => *maximum_len += len,
+                }
+            }
+        }
+        // Handle the prefix properties, which only requires visiting
+        // child exprs until one matches more than the empty string.
+        let mut it = concat.iter();
+        while let Some(x) = it.next() {
+            props.look_set_prefix.set_union(x.properties().look_set_prefix());
+            if x.properties().maximum_len().map_or(true, |x| x > 0) {
+                break;
+            }
+        }
+        // Same thing for the suffix properties, but in reverse.
+        let mut it = concat.iter().rev();
+        while let Some(x) = it.next() {
+            props.look_set_suffix.set_union(x.properties().look_set_suffix());
+            if x.properties().maximum_len().map_or(true, |x| x > 0) {
+                break;
+            }
+        }
+        Properties(Box::new(props))
+    }
+
+    /// Create a new set of HIR properties for a concatenation.
+    fn alternation(alts: &[Hir]) -> Properties {
+        Properties::union(alts.iter().map(|hir| hir.properties()))
+    }
+}
+
+/// A set of look-around assertions.
+///
+/// This is useful for efficiently tracking look-around assertions. For
+/// example, an [`Hir`] provides properties that return `LookSet`s.
+#[derive(Clone, Copy, Default, Eq, PartialEq)]
+pub struct LookSet {
+    /// The underlying representation this set is exposed to make it possible
+    /// to store it somewhere efficiently. The representation is that
+    /// of a bitset, where each assertion occupies bit `i` where `i =
+    /// Look::as_repr()`.
+    ///
+    /// Note that users of this internal representation must permit the full
+    /// range of `u16` values to be represented. For example, even if the
+    /// current implementation only makes use of the 10 least significant bits,
+    /// it may use more bits in a future semver compatible release.
+    pub bits: u16,
+}
+
+impl LookSet {
+    /// Create an empty set of look-around assertions.
+    #[inline]
+    pub fn empty() -> LookSet {
+        LookSet { bits: 0 }
+    }
+
+    /// Create a full set of look-around assertions.
+    ///
+    /// This set contains all possible look-around assertions.
+    #[inline]
+    pub fn full() -> LookSet {
+        LookSet { bits: !0 }
+    }
+
+    /// Create a look-around set containing the look-around assertion given.
+    ///
+    /// This is a convenience routine for creating an empty set and inserting
+    /// one look-around assertions.
+    #[inline]
+    pub fn singleton(look: Look) -> LookSet {
+        LookSet::empty().insert(look)
+    }
+
+    /// Returns the total number of look-around assertions in this set.
+    #[inline]
+    pub fn len(self) -> usize {
+        // OK because max value always fits in a u8, which in turn always
+        // fits in a usize, regardless of target.
+        usize::try_from(self.bits.count_ones()).unwrap()
+    }
+
+    /// Returns true if and only if this set is empty.
+    #[inline]
+    pub fn is_empty(self) -> bool {
+        self.len() == 0
+    }
+
+    /// Returns true if and only if the given look-around assertion is in this
+    /// set.
+    #[inline]
+    pub fn contains(self, look: Look) -> bool {
+        self.bits & look.as_repr() != 0
+    }
+
+    /// Returns true if and only if this set contains any anchor assertions.
+    /// This includes both "start/end of haystack" and "start/end of line."
+    #[inline]
+    pub fn contains_anchor(&self) -> bool {
+        self.contains_anchor_haystack() || self.contains_anchor_line()
+    }
+
+    /// Returns true if and only if this set contains any "start/end of
+    /// haystack" anchors. This doesn't include "start/end of line" anchors.
+    #[inline]
+    pub fn contains_anchor_haystack(&self) -> bool {
+        self.contains(Look::Start) || self.contains(Look::End)
+    }
+
+    /// Returns true if and only if this set contains any "start/end of line"
+    /// anchors. This doesn't include "start/end of haystack" anchors. This
+    /// includes both `\n` line anchors and CRLF (`\r\n`) aware line anchors.
+    #[inline]
+    pub fn contains_anchor_line(&self) -> bool {
+        self.contains(Look::StartLF)
+            || self.contains(Look::EndLF)
+            || self.contains(Look::StartCRLF)
+            || self.contains(Look::EndCRLF)
     }
 
-    /// Negate this byte class.
-    ///
-    /// For all `b` where `b` is a any byte, if `b` was in this set, then it
-    /// will not be in this set after negation.
-    pub fn negate(&mut self) {
-        self.set.negate();
+    /// Returns true if and only if this set contains any "start/end of line"
+    /// anchors that only treat `\n` as line terminators. This does not include
+    /// haystack anchors or CRLF aware line anchors.
+    #[inline]
+    pub fn contains_anchor_lf(&self) -> bool {
+        self.contains(Look::StartLF) || self.contains(Look::EndLF)
     }
 
-    /// Union this byte class with the given byte class, in place.
-    pub fn union(&mut self, other: &ClassBytes) {
-        self.set.union(&other.set);
+    /// Returns true if and only if this set contains any "start/end of line"
+    /// anchors that are CRLF-aware. This doesn't include "start/end of
+    /// haystack" or "start/end of line-feed" anchors.
+    #[inline]
+    pub fn contains_anchor_crlf(&self) -> bool {
+        self.contains(Look::StartCRLF) || self.contains(Look::EndCRLF)
     }
 
-    /// Intersect this byte class with the given byte class, in place.
-    pub fn intersect(&mut self, other: &ClassBytes) {
-        self.set.intersect(&other.set);
+    /// Returns true if and only if this set contains any word boundary or
+    /// negated word boundary assertions. This include both Unicode and ASCII
+    /// word boundaries.
+    #[inline]
+    pub fn contains_word(self) -> bool {
+        self.contains_word_unicode() || self.contains_word_ascii()
     }
 
-    /// Subtract the given byte class from this byte class, in place.
-    pub fn difference(&mut self, other: &ClassBytes) {
-        self.set.difference(&other.set);
+    /// Returns true if and only if this set contains any Unicode word boundary
+    /// or negated Unicode word boundary assertions.
+    #[inline]
+    pub fn contains_word_unicode(self) -> bool {
+        self.contains(Look::WordUnicode)
+            || self.contains(Look::WordUnicodeNegate)
     }
 
-    /// Compute the symmetric difference of the given byte classes, in place.
-    ///
-    /// This computes the symmetric difference of two byte classes. This
-    /// removes all elements in this class that are also in the given class,
-    /// but all adds all elements from the given class that aren't in this
-    /// class. That is, the class will contain all elements in either class,
-    /// but will not contain any elements that are in both classes.
-    pub fn symmetric_difference(&mut self, other: &ClassBytes) {
-        self.set.symmetric_difference(&other.set);
+    /// Returns true if and only if this set contains any ASCII word boundary
+    /// or negated ASCII word boundary assertions.
+    #[inline]
+    pub fn contains_word_ascii(self) -> bool {
+        self.contains(Look::WordAscii) || self.contains(Look::WordAsciiNegate)
     }
 
-    /// Returns true if and only if this character class will either match
-    /// nothing or only ASCII bytes. Stated differently, this returns false
-    /// if and only if this class contains a non-ASCII byte.
-    pub fn is_all_ascii(&self) -> bool {
-        self.set.intervals().last().map_or(true, |r| r.end <= 0x7F)
+    /// Returns an iterator over all of the look-around assertions in this set.
+    #[inline]
+    pub fn iter(self) -> LookSetIter {
+        LookSetIter { set: self }
     }
-}
-
-/// An iterator over all ranges in a byte character class.
-///
-/// The lifetime `'a` refers to the lifetime of the underlying class.
-#[derive(Debug)]
-pub struct ClassBytesIter<'a>(IntervalSetIter<'a, ClassBytesRange>);
 
-impl<'a> Iterator for ClassBytesIter<'a> {
-    type Item = &'a ClassBytesRange;
+    /// Return a new set that is equivalent to the original, but with the given
+    /// assertion added to it. If the assertion is already in the set, then the
+    /// returned set is equivalent to the original.
+    #[inline]
+    pub fn insert(self, look: Look) -> LookSet {
+        LookSet { bits: self.bits | look.as_repr() }
+    }
 
-    fn next(&mut self) -> Option<&'a ClassBytesRange> {
-        self.0.next()
+    /// Updates this set in place with the result of inserting the given
+    /// assertion into this set.
+    #[inline]
+    pub fn set_insert(&mut self, look: Look) {
+        *self = self.insert(look);
     }
-}
 
-/// A single range of characters represented by arbitrary bytes.
-///
-/// The range is closed. That is, the start and end of the range are included
-/// in the range.
-#[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)]
-pub struct ClassBytesRange {
-    start: u8,
-    end: u8,
-}
+    /// Return a new set that is equivalent to the original, but with the given
+    /// assertion removed from it. If the assertion is not in the set, then the
+    /// returned set is equivalent to the original.
+    #[inline]
+    pub fn remove(self, look: Look) -> LookSet {
+        LookSet { bits: self.bits & !look.as_repr() }
+    }
 
-impl Interval for ClassBytesRange {
-    type Bound = u8;
+    /// Updates this set in place with the result of removing the given
+    /// assertion from this set.
+    #[inline]
+    pub fn set_remove(&mut self, look: Look) {
+        *self = self.remove(look);
+    }
 
+    /// Returns a new set that is the result of subtracting the given set from
+    /// this set.
     #[inline]
-    fn lower(&self) -> u8 {
-        self.start
+    pub fn subtract(self, other: LookSet) -> LookSet {
+        LookSet { bits: self.bits & !other.bits }
     }
+
+    /// Updates this set in place with the result of subtracting the given set
+    /// from this set.
     #[inline]
-    fn upper(&self) -> u8 {
-        self.end
+    pub fn set_subtract(&mut self, other: LookSet) {
+        *self = self.subtract(other);
     }
+
+    /// Returns a new set that is the union of this and the one given.
     #[inline]
-    fn set_lower(&mut self, bound: u8) {
-        self.start = bound;
+    pub fn union(self, other: LookSet) -> LookSet {
+        LookSet { bits: self.bits | other.bits }
     }
+
+    /// Updates this set in place with the result of unioning it with the one
+    /// given.
     #[inline]
-    fn set_upper(&mut self, bound: u8) {
-        self.end = bound;
+    pub fn set_union(&mut self, other: LookSet) {
+        *self = self.union(other);
     }
 
-    /// Apply simple case folding to this byte range. Only ASCII case mappings
-    /// (for a-z) are applied.
-    ///
-    /// Additional ranges are appended to the given vector. Canonical ordering
-    /// is *not* maintained in the given vector.
-    fn case_fold_simple(
-        &self,
-        ranges: &mut Vec<ClassBytesRange>,
-    ) -> Result<(), unicode::CaseFoldError> {
-        if !ClassBytesRange::new(b'a', b'z').is_intersection_empty(self) {
-            let lower = cmp::max(self.start, b'a');
-            let upper = cmp::min(self.end, b'z');
-            ranges.push(ClassBytesRange::new(lower - 32, upper - 32));
-        }
-        if !ClassBytesRange::new(b'A', b'Z').is_intersection_empty(self) {
-            let lower = cmp::max(self.start, b'A');
-            let upper = cmp::min(self.end, b'Z');
-            ranges.push(ClassBytesRange::new(lower + 32, upper + 32));
-        }
-        Ok(())
+    /// Returns a new set that is the intersection of this and the one given.
+    #[inline]
+    pub fn intersect(self, other: LookSet) -> LookSet {
+        LookSet { bits: self.bits & other.bits }
     }
-}
 
-impl ClassBytesRange {
-    /// Create a new byte range for a character class.
-    ///
-    /// The returned range is always in a canonical form. That is, the range
-    /// returned always satisfies the invariant that `start <= end`.
-    pub fn new(start: u8, end: u8) -> ClassBytesRange {
-        ClassBytesRange::create(start, end)
+    /// Updates this set in place with the result of intersecting it with the
+    /// one given.
+    #[inline]
+    pub fn set_intersect(&mut self, other: LookSet) {
+        *self = self.intersect(other);
     }
 
-    /// Return the start of this range.
+    /// Return a `LookSet` from the slice given as a native endian 16-bit
+    /// integer.
     ///
-    /// The start of a range is always less than or equal to the end of the
-    /// range.
-    pub fn start(&self) -> u8 {
-        self.start
+    /// # Panics
+    ///
+    /// This panics if `slice.len() < 2`.
+    #[inline]
+    pub fn read_repr(slice: &[u8]) -> LookSet {
+        let bits = u16::from_ne_bytes(slice[..2].try_into().unwrap());
+        LookSet { bits }
     }
 
-    /// Return the end of this range.
+    /// Write a `LookSet` as a native endian 16-bit integer to the beginning
+    /// of the slice given.
     ///
-    /// The end of a range is always greater than or equal to the start of the
-    /// range.
-    pub fn end(&self) -> u8 {
-        self.end
+    /// # Panics
+    ///
+    /// This panics if `slice.len() < 2`.
+    #[inline]
+    pub fn write_repr(self, slice: &mut [u8]) {
+        let raw = self.bits.to_ne_bytes();
+        slice[0] = raw[0];
+        slice[1] = raw[1];
     }
 }
 
-impl fmt::Debug for ClassBytesRange {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        let mut debug = f.debug_struct("ClassBytesRange");
-        if self.start <= 0x7F {
-            debug.field("start", &(self.start as char));
-        } else {
-            debug.field("start", &self.start);
+impl core::fmt::Debug for LookSet {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        if self.is_empty() {
+            return write!(f, "∅");
         }
-        if self.end <= 0x7F {
-            debug.field("end", &(self.end as char));
-        } else {
-            debug.field("end", &self.end);
+        for look in self.iter() {
+            write!(f, "{}", look.as_char())?;
         }
-        debug.finish()
+        Ok(())
     }
 }
 
-/// The high-level intermediate representation for an anchor assertion.
+/// An iterator over all look-around assertions in a [`LookSet`].
 ///
-/// A matching anchor assertion is always zero-length.
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub enum Anchor {
-    /// Match the beginning of a line or the beginning of text. Specifically,
-    /// this matches at the starting position of the input, or at the position
-    /// immediately following a `\n` character.
-    StartLine,
-    /// Match the end of a line or the end of text. Specifically,
-    /// this matches at the end position of the input, or at the position
-    /// immediately preceding a `\n` character.
-    EndLine,
-    /// Match the beginning of text. Specifically, this matches at the starting
-    /// position of the input.
-    StartText,
-    /// Match the end of text. Specifically, this matches at the ending
-    /// position of the input.
-    EndText,
+/// This iterator is created by [`LookSet::iter`].
+#[derive(Clone, Debug)]
+pub struct LookSetIter {
+    set: LookSet,
 }
 
-/// The high-level intermediate representation for a word-boundary assertion.
-///
-/// A matching word boundary assertion is always zero-length.
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub enum WordBoundary {
-    /// Match a Unicode-aware word boundary. That is, this matches a position
-    /// where the left adjacent character and right adjacent character
-    /// correspond to a word and non-word or a non-word and word character.
-    Unicode,
-    /// Match a Unicode-aware negation of a word boundary.
-    UnicodeNegate,
-    /// Match an ASCII-only word boundary. That is, this matches a position
-    /// where the left adjacent character and right adjacent character
-    /// correspond to a word and non-word or a non-word and word character.
-    Ascii,
-    /// Match an ASCII-only negation of a word boundary.
-    AsciiNegate,
-}
+impl Iterator for LookSetIter {
+    type Item = Look;
 
-impl WordBoundary {
-    /// Returns true if and only if this word boundary assertion is negated.
-    pub fn is_negated(&self) -> bool {
-        match *self {
-            WordBoundary::Unicode | WordBoundary::Ascii => false,
-            WordBoundary::UnicodeNegate | WordBoundary::AsciiNegate => true,
+    #[inline]
+    fn next(&mut self) -> Option<Look> {
+        if self.set.is_empty() {
+            return None;
         }
+        // We'll never have more than u8::MAX distinct look-around assertions,
+        // so 'repr' will always fit into a u16.
+        let repr = u16::try_from(self.set.bits.trailing_zeros()).unwrap();
+        let look = Look::from_repr(1 << repr)?;
+        self.set = self.set.remove(look);
+        Some(look)
     }
 }
 
-/// The high-level intermediate representation for a group.
-///
-/// This represents one of three possible group types:
-///
-/// 1. A non-capturing group (e.g., `(?:expr)`).
-/// 2. A capturing group (e.g., `(expr)`).
-/// 3. A named capturing group (e.g., `(?P<name>expr)`).
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub struct Group {
-    /// The kind of this group. If it is a capturing group, then the kind
-    /// contains the capture group index (and the name, if it is a named
-    /// group).
-    pub kind: GroupKind,
-    /// The expression inside the capturing group, which may be empty.
-    pub hir: Box<Hir>,
-}
-
-/// The kind of group.
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub enum GroupKind {
-    /// A normal unnamed capturing group.
-    ///
-    /// The value is the capture index of the group.
-    CaptureIndex(u32),
-    /// A named capturing group.
-    CaptureName {
-        /// The name of the group.
-        name: String,
-        /// The capture index of the group.
-        index: u32,
-    },
-    /// A non-capturing group.
-    NonCapturing,
+/// Given a sequence of HIR values where each value corresponds to a Unicode
+/// class (or an all-ASCII byte class), return a single Unicode class
+/// corresponding to the union of the classes found.
+fn class_chars(hirs: &[Hir]) -> Option<Class> {
+    let mut cls = ClassUnicode::new(vec![]);
+    for hir in hirs.iter() {
+        match *hir.kind() {
+            HirKind::Class(Class::Unicode(ref cls2)) => {
+                cls.union(cls2);
+            }
+            HirKind::Class(Class::Bytes(ref cls2)) => {
+                cls.union(&cls2.to_unicode_class()?);
+            }
+            _ => return None,
+        };
+    }
+    Some(Class::Unicode(cls))
 }
 
-/// The high-level intermediate representation of a repetition operator.
-///
-/// A repetition operator permits the repetition of an arbitrary
-/// sub-expression.
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub struct Repetition {
-    /// The kind of this repetition operator.
-    pub kind: RepetitionKind,
-    /// Whether this repetition operator is greedy or not. A greedy operator
-    /// will match as much as it can. A non-greedy operator will match as
-    /// little as it can.
-    ///
-    /// Typically, operators are greedy by default and are only non-greedy when
-    /// a `?` suffix is used, e.g., `(expr)*` is greedy while `(expr)*?` is
-    /// not. However, this can be inverted via the `U` "ungreedy" flag.
-    pub greedy: bool,
-    /// The expression being repeated.
-    pub hir: Box<Hir>,
+/// Given a sequence of HIR values where each value corresponds to a byte class
+/// (or an all-ASCII Unicode class), return a single byte class corresponding
+/// to the union of the classes found.
+fn class_bytes(hirs: &[Hir]) -> Option<Class> {
+    let mut cls = ClassBytes::new(vec![]);
+    for hir in hirs.iter() {
+        match *hir.kind() {
+            HirKind::Class(Class::Unicode(ref cls2)) => {
+                cls.union(&cls2.to_byte_class()?);
+            }
+            HirKind::Class(Class::Bytes(ref cls2)) => {
+                cls.union(cls2);
+            }
+            _ => return None,
+        };
+    }
+    Some(Class::Bytes(cls))
 }
 
-impl Repetition {
-    /// Returns true if and only if this repetition operator makes it possible
-    /// to match the empty string.
-    ///
-    /// Note that this is not defined inductively. For example, while `a*`
-    /// will report `true`, `()+` will not, even though `()` matches the empty
-    /// string and one or more occurrences of something that matches the empty
-    /// string will always match the empty string. In order to get the
-    /// inductive definition, see the corresponding method on
-    /// [`Hir`](struct.Hir.html).
-    pub fn is_match_empty(&self) -> bool {
-        match self.kind {
-            RepetitionKind::ZeroOrOne => true,
-            RepetitionKind::ZeroOrMore => true,
-            RepetitionKind::OneOrMore => false,
-            RepetitionKind::Range(RepetitionRange::Exactly(m)) => m == 0,
-            RepetitionKind::Range(RepetitionRange::AtLeast(m)) => m == 0,
-            RepetitionKind::Range(RepetitionRange::Bounded(m, _)) => m == 0,
+/// Given a sequence of HIR values where each value corresponds to a literal
+/// that is a single `char`, return that sequence of `char`s. Otherwise return
+/// None. No deduplication is done.
+fn singleton_chars(hirs: &[Hir]) -> Option<Vec<char>> {
+    let mut singletons = vec![];
+    for hir in hirs.iter() {
+        let literal = match *hir.kind() {
+            HirKind::Literal(Literal(ref bytes)) => bytes,
+            _ => return None,
+        };
+        let ch = match crate::debug::utf8_decode(literal) {
+            None => return None,
+            Some(Err(_)) => return None,
+            Some(Ok(ch)) => ch,
+        };
+        if literal.len() != ch.len_utf8() {
+            return None;
         }
+        singletons.push(ch);
     }
+    Some(singletons)
 }
 
-/// The kind of a repetition operator.
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub enum RepetitionKind {
-    /// Matches a sub-expression zero or one times.
-    ZeroOrOne,
-    /// Matches a sub-expression zero or more times.
-    ZeroOrMore,
-    /// Matches a sub-expression one or more times.
-    OneOrMore,
-    /// Matches a sub-expression within a bounded range of times.
-    Range(RepetitionRange),
-}
-
-/// The kind of a counted repetition operator.
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub enum RepetitionRange {
-    /// Matches a sub-expression exactly this many times.
-    Exactly(u32),
-    /// Matches a sub-expression at least this many times.
-    AtLeast(u32),
-    /// Matches a sub-expression at least `m` times and at most `n` times.
-    Bounded(u32, u32),
-}
-
-/// A custom `Drop` impl is used for `HirKind` such that it uses constant stack
-/// space but heap space proportional to the depth of the total `Hir`.
-impl Drop for Hir {
-    fn drop(&mut self) {
-        use std::mem;
-
-        match *self.kind() {
-            HirKind::Empty
-            | HirKind::Literal(_)
-            | HirKind::Class(_)
-            | HirKind::Anchor(_)
-            | HirKind::WordBoundary(_) => return,
-            HirKind::Group(ref x) if !x.hir.kind.has_subexprs() => return,
-            HirKind::Repetition(ref x) if !x.hir.kind.has_subexprs() => return,
-            HirKind::Concat(ref x) if x.is_empty() => return,
-            HirKind::Alternation(ref x) if x.is_empty() => return,
-            _ => {}
-        }
-
-        let mut stack = vec![mem::replace(self, Hir::empty())];
-        while let Some(mut expr) = stack.pop() {
-            match expr.kind {
-                HirKind::Empty
-                | HirKind::Literal(_)
-                | HirKind::Class(_)
-                | HirKind::Anchor(_)
-                | HirKind::WordBoundary(_) => {}
-                HirKind::Group(ref mut x) => {
-                    stack.push(mem::replace(&mut x.hir, Hir::empty()));
-                }
-                HirKind::Repetition(ref mut x) => {
-                    stack.push(mem::replace(&mut x.hir, Hir::empty()));
-                }
-                HirKind::Concat(ref mut x) => {
-                    stack.extend(x.drain(..));
-                }
-                HirKind::Alternation(ref mut x) => {
-                    stack.extend(x.drain(..));
-                }
-            }
+/// Given a sequence of HIR values where each value corresponds to a literal
+/// that is a single byte, return that sequence of bytes. Otherwise return
+/// None. No deduplication is done.
+fn singleton_bytes(hirs: &[Hir]) -> Option<Vec<u8>> {
+    let mut singletons = vec![];
+    for hir in hirs.iter() {
+        let literal = match *hir.kind() {
+            HirKind::Literal(Literal(ref bytes)) => bytes,
+            _ => return None,
+        };
+        if literal.len() != 1 {
+            return None;
         }
+        singletons.push(literal[0]);
     }
+    Some(singletons)
 }
 
-/// A type that documents various attributes of an HIR expression.
+/// Looks for a common prefix in the list of alternation branches given. If one
+/// is found, then an equivalent but (hopefully) simplified Hir is returned.
+/// Otherwise, the original given list of branches is returned unmodified.
 ///
-/// These attributes are typically defined inductively on the HIR.
-#[derive(Clone, Debug, Eq, PartialEq)]
-struct HirInfo {
-    /// Represent yes/no questions by a bitfield to conserve space, since
-    /// this is included in every HIR expression.
-    ///
-    /// If more attributes need to be added, it is OK to increase the size of
-    /// this as appropriate.
-    bools: u16,
-}
-
-// A simple macro for defining bitfield accessors/mutators.
-macro_rules! define_bool {
-    ($bit:expr, $is_fn_name:ident, $set_fn_name:ident) => {
-        fn $is_fn_name(&self) -> bool {
-            self.bools & (0b1 << $bit) > 0
+/// This is not quite as good as it could be. Right now, it requires that
+/// all branches are 'Concat' expressions. It also doesn't do well with
+/// literals. For example, given 'foofoo|foobar', it will not refactor it to
+/// 'foo(?:foo|bar)' because literals are flattened into their own special
+/// concatenation. (One wonders if perhaps 'Literal' should be a single atom
+/// instead of a string of bytes because of this. Otherwise, handling the
+/// current representation in this routine will be pretty gnarly. Sigh.)
+fn lift_common_prefix(hirs: Vec<Hir>) -> Result<Hir, Vec<Hir>> {
+    if hirs.len() <= 1 {
+        return Err(hirs);
+    }
+    let mut prefix = match hirs[0].kind() {
+        HirKind::Concat(ref xs) => &**xs,
+        _ => return Err(hirs),
+    };
+    if prefix.is_empty() {
+        return Err(hirs);
+    }
+    for h in hirs.iter().skip(1) {
+        let concat = match h.kind() {
+            HirKind::Concat(ref xs) => xs,
+            _ => return Err(hirs),
+        };
+        let common_len = prefix
+            .iter()
+            .zip(concat.iter())
+            .take_while(|(x, y)| x == y)
+            .count();
+        prefix = &prefix[..common_len];
+        if prefix.is_empty() {
+            return Err(hirs);
         }
-
-        fn $set_fn_name(&mut self, yes: bool) {
-            if yes {
-                self.bools |= 1 << $bit;
-            } else {
-                self.bools &= !(1 << $bit);
-            }
+    }
+    let len = prefix.len();
+    assert_ne!(0, len);
+    let mut prefix_concat = vec![];
+    let mut suffix_alts = vec![];
+    for h in hirs {
+        let mut concat = match h.into_kind() {
+            HirKind::Concat(xs) => xs,
+            // We required all sub-expressions to be
+            // concats above, so we're only here if we
+            // have a concat.
+            _ => unreachable!(),
+        };
+        suffix_alts.push(Hir::concat(concat.split_off(len)));
+        if prefix_concat.is_empty() {
+            prefix_concat = concat;
         }
-    };
-}
-
-impl HirInfo {
-    fn new() -> HirInfo {
-        HirInfo { bools: 0 }
-    }
-
-    define_bool!(0, is_always_utf8, set_always_utf8);
-    define_bool!(1, is_all_assertions, set_all_assertions);
-    define_bool!(2, is_anchored_start, set_anchored_start);
-    define_bool!(3, is_anchored_end, set_anchored_end);
-    define_bool!(4, is_line_anchored_start, set_line_anchored_start);
-    define_bool!(5, is_line_anchored_end, set_line_anchored_end);
-    define_bool!(6, is_any_anchored_start, set_any_anchored_start);
-    define_bool!(7, is_any_anchored_end, set_any_anchored_end);
-    define_bool!(8, is_match_empty, set_match_empty);
-    define_bool!(9, is_literal, set_literal);
-    define_bool!(10, is_alternation_literal, set_alternation_literal);
+    }
+    let mut concat = prefix_concat;
+    concat.push(Hir::alternation(suffix_alts));
+    Ok(Hir::concat(concat))
 }
 
 #[cfg(test)]
@@ -2244,12 +3604,6 @@ mod tests {
         assert_eq!(expected, bsymdifference(&cls1, &cls2));
     }
 
-    #[test]
-    #[should_panic]
-    fn hir_byte_literal_non_ascii() {
-        Hir::literal(Literal::Byte(b'a'));
-    }
-
     // We use a thread with an explicit stack size to test that our destructor
     // for Hir can handle arbitrarily sized expressions in constant stack
     // space. In case we run on a platform without threads (WASM?), we limit
@@ -2262,26 +3616,28 @@ mod tests {
         let run = || {
             let mut expr = Hir::empty();
             for _ in 0..100 {
-                expr = Hir::group(Group {
-                    kind: GroupKind::NonCapturing,
-                    hir: Box::new(expr),
+                expr = Hir::capture(Capture {
+                    index: 1,
+                    name: None,
+                    sub: Box::new(expr),
                 });
                 expr = Hir::repetition(Repetition {
-                    kind: RepetitionKind::ZeroOrOne,
+                    min: 0,
+                    max: Some(1),
                     greedy: true,
-                    hir: Box::new(expr),
+                    sub: Box::new(expr),
                 });
 
                 expr = Hir {
                     kind: HirKind::Concat(vec![expr]),
-                    info: HirInfo::new(),
+                    props: Properties::empty(),
                 };
                 expr = Hir {
                     kind: HirKind::Alternation(vec![expr]),
-                    info: HirInfo::new(),
+                    props: Properties::empty(),
                 };
             }
-            assert!(!expr.kind.is_empty());
+            assert!(!matches!(*expr.kind(), HirKind::Empty));
         };
 
         // We run our test on a thread with a small stack size so we can
@@ -2296,4 +3652,31 @@ mod tests {
             .join()
             .unwrap();
     }
+
+    #[test]
+    fn look_set_iter() {
+        let set = LookSet::empty();
+        assert_eq!(0, set.iter().count());
+
+        let set = LookSet::full();
+        assert_eq!(10, set.iter().count());
+
+        let set =
+            LookSet::empty().insert(Look::StartLF).insert(Look::WordUnicode);
+        assert_eq!(2, set.iter().count());
+
+        let set = LookSet::empty().insert(Look::StartLF);
+        assert_eq!(1, set.iter().count());
+
+        let set = LookSet::empty().insert(Look::WordAsciiNegate);
+        assert_eq!(1, set.iter().count());
+    }
+
+    #[test]
+    fn look_set_debug() {
+        let res = format!("{:?}", LookSet::empty());
+        assert_eq!("∅", res);
+        let res = format!("{:?}", LookSet::full());
+        assert_eq!("Az^$rRbB𝛃𝚩", res);
+    }
 }
diff --git a/regex-syntax/src/hir/print.rs b/regex-syntax/src/hir/print.rs
index b71f3897cf..fcb7cd252b 100644
--- a/regex-syntax/src/hir/print.rs
+++ b/regex-syntax/src/hir/print.rs
@@ -2,11 +2,16 @@
 This module provides a regular expression printer for `Hir`.
 */
 
-use std::fmt;
+use core::fmt;
 
-use crate::hir::visitor::{self, Visitor};
-use crate::hir::{self, Hir, HirKind};
-use crate::is_meta_character;
+use crate::{
+    hir::{
+        self,
+        visitor::{self, Visitor},
+        Hir, HirKind,
+    },
+    is_meta_character,
+};
 
 /// A builder for constructing a printer.
 ///
@@ -84,21 +89,54 @@ impl<W: fmt::Write> Visitor for Writer<W> {
 
     fn visit_pre(&mut self, hir: &Hir) -> fmt::Result {
         match *hir.kind() {
-            HirKind::Empty
-            | HirKind::Repetition(_)
-            | HirKind::Concat(_)
-            | HirKind::Alternation(_) => {}
-            HirKind::Literal(hir::Literal::Unicode(c)) => {
-                self.write_literal_char(c)?;
-            }
-            HirKind::Literal(hir::Literal::Byte(b)) => {
-                self.write_literal_byte(b)?;
+            // Empty is represented by nothing in the concrete syntax, and
+            // repetition operators are strictly suffix oriented.
+            HirKind::Empty | HirKind::Repetition(_) => {}
+            HirKind::Literal(hir::Literal(ref bytes)) => {
+                // See the comment on the 'Concat' and 'Alternation' case below
+                // for why we put parens here. Literals are, conceptually,
+                // a special case of concatenation where each element is a
+                // character. The HIR flattens this into a Box<[u8]>, but we
+                // still need to treat it like a concatenation for correct
+                // printing. As a special case, we don't write parens if there
+                // is only one character. One character means there is no
+                // concat so we don't need parens. Adding parens would still be
+                // correct, but we drop them here because it tends to create
+                // rather noisy regexes even in simple cases.
+                let result = core::str::from_utf8(bytes);
+                let len = result.map_or(bytes.len(), |s| s.chars().count());
+                if len > 1 {
+                    self.wtr.write_str(r"(?:")?;
+                }
+                match result {
+                    Ok(string) => {
+                        for c in string.chars() {
+                            self.write_literal_char(c)?;
+                        }
+                    }
+                    Err(_) => {
+                        for &b in bytes.iter() {
+                            self.write_literal_byte(b)?;
+                        }
+                    }
+                }
+                if len > 1 {
+                    self.wtr.write_str(r")")?;
+                }
             }
             HirKind::Class(hir::Class::Unicode(ref cls)) => {
+                if cls.ranges().is_empty() {
+                    return self.wtr.write_str("[a&&b]");
+                }
                 self.wtr.write_str("[")?;
                 for range in cls.iter() {
                     if range.start() == range.end() {
                         self.write_literal_char(range.start())?;
+                    } else if u32::from(range.start()) + 1
+                        == u32::from(range.end())
+                    {
+                        self.write_literal_char(range.start())?;
+                        self.write_literal_char(range.end())?;
                     } else {
                         self.write_literal_char(range.start())?;
                         self.wtr.write_str("-")?;
@@ -108,10 +146,16 @@ impl<W: fmt::Write> Visitor for Writer<W> {
                 self.wtr.write_str("]")?;
             }
             HirKind::Class(hir::Class::Bytes(ref cls)) => {
+                if cls.ranges().is_empty() {
+                    return self.wtr.write_str("[a&&b]");
+                }
                 self.wtr.write_str("(?-u:[")?;
                 for range in cls.iter() {
                     if range.start() == range.end() {
                         self.write_literal_class_byte(range.start())?;
+                    } else if range.start() + 1 == range.end() {
+                        self.write_literal_class_byte(range.start())?;
+                        self.write_literal_class_byte(range.end())?;
                     } else {
                         self.write_literal_class_byte(range.start())?;
                         self.wtr.write_str("-")?;
@@ -120,41 +164,60 @@ impl<W: fmt::Write> Visitor for Writer<W> {
                 }
                 self.wtr.write_str("])")?;
             }
-            HirKind::Anchor(hir::Anchor::StartLine) => {
-                self.wtr.write_str("(?m:^)")?;
-            }
-            HirKind::Anchor(hir::Anchor::EndLine) => {
-                self.wtr.write_str("(?m:$)")?;
-            }
-            HirKind::Anchor(hir::Anchor::StartText) => {
-                self.wtr.write_str(r"\A")?;
-            }
-            HirKind::Anchor(hir::Anchor::EndText) => {
-                self.wtr.write_str(r"\z")?;
-            }
-            HirKind::WordBoundary(hir::WordBoundary::Unicode) => {
-                self.wtr.write_str(r"\b")?;
-            }
-            HirKind::WordBoundary(hir::WordBoundary::UnicodeNegate) => {
-                self.wtr.write_str(r"\B")?;
-            }
-            HirKind::WordBoundary(hir::WordBoundary::Ascii) => {
-                self.wtr.write_str(r"(?-u:\b)")?;
-            }
-            HirKind::WordBoundary(hir::WordBoundary::AsciiNegate) => {
-                self.wtr.write_str(r"(?-u:\B)")?;
-            }
-            HirKind::Group(ref x) => match x.kind {
-                hir::GroupKind::CaptureIndex(_) => {
-                    self.wtr.write_str("(")?;
+            HirKind::Look(ref look) => match *look {
+                hir::Look::Start => {
+                    self.wtr.write_str(r"\A")?;
+                }
+                hir::Look::End => {
+                    self.wtr.write_str(r"\z")?;
+                }
+                hir::Look::StartLF => {
+                    self.wtr.write_str("(?m:^)")?;
+                }
+                hir::Look::EndLF => {
+                    self.wtr.write_str("(?m:$)")?;
+                }
+                hir::Look::StartCRLF => {
+                    self.wtr.write_str("(?mR:^)")?;
                 }
-                hir::GroupKind::CaptureName { ref name, .. } => {
-                    write!(self.wtr, "(?P<{}>", name)?;
+                hir::Look::EndCRLF => {
+                    self.wtr.write_str("(?mR:$)")?;
                 }
-                hir::GroupKind::NonCapturing => {
-                    self.wtr.write_str("(?:")?;
+                hir::Look::WordAscii => {
+                    self.wtr.write_str(r"(?-u:\b)")?;
+                }
+                hir::Look::WordAsciiNegate => {
+                    self.wtr.write_str(r"(?-u:\B)")?;
+                }
+                hir::Look::WordUnicode => {
+                    self.wtr.write_str(r"\b")?;
+                }
+                hir::Look::WordUnicodeNegate => {
+                    self.wtr.write_str(r"\B")?;
                 }
             },
+            HirKind::Capture(hir::Capture { ref name, .. }) => {
+                self.wtr.write_str("(")?;
+                if let Some(ref name) = *name {
+                    write!(self.wtr, "?P<{}>", name)?;
+                }
+            }
+            // Why do this? Wrapping concats and alts in non-capturing groups
+            // is not *always* necessary, but is sometimes necessary. For
+            // example, 'concat(a, alt(b, c))' should be written as 'a(?:b|c)'
+            // and not 'ab|c'. The former is clearly the intended meaning, but
+            // the latter is actually 'alt(concat(a, b), c)'.
+            //
+            // It would be possible to only group these things in cases where
+            // it's strictly necessary, but it requires knowing the parent
+            // expression. And since this technique is simpler and always
+            // correct, we take this route. More to the point, it is a non-goal
+            // of an HIR printer to show a nice easy-to-read regex. Indeed,
+            // its construction forbids it from doing so. Therefore, inserting
+            // extra groups where they aren't necessary is perfectly okay.
+            HirKind::Concat(_) | HirKind::Alternation(_) => {
+                self.wtr.write_str(r"(?:")?;
+            }
         }
         Ok(())
     }
@@ -165,39 +228,42 @@ impl<W: fmt::Write> Visitor for Writer<W> {
             HirKind::Empty
             | HirKind::Literal(_)
             | HirKind::Class(_)
-            | HirKind::Anchor(_)
-            | HirKind::WordBoundary(_)
-            | HirKind::Concat(_)
-            | HirKind::Alternation(_) => {}
+            | HirKind::Look(_) => {}
             HirKind::Repetition(ref x) => {
-                match x.kind {
-                    hir::RepetitionKind::ZeroOrOne => {
+                match (x.min, x.max) {
+                    (0, Some(1)) => {
                         self.wtr.write_str("?")?;
                     }
-                    hir::RepetitionKind::ZeroOrMore => {
+                    (0, None) => {
                         self.wtr.write_str("*")?;
                     }
-                    hir::RepetitionKind::OneOrMore => {
+                    (1, None) => {
                         self.wtr.write_str("+")?;
                     }
-                    hir::RepetitionKind::Range(ref x) => match *x {
-                        hir::RepetitionRange::Exactly(m) => {
-                            write!(self.wtr, "{{{}}}", m)?;
-                        }
-                        hir::RepetitionRange::AtLeast(m) => {
-                            write!(self.wtr, "{{{},}}", m)?;
-                        }
-                        hir::RepetitionRange::Bounded(m, n) => {
-                            write!(self.wtr, "{{{},{}}}", m, n)?;
-                        }
-                    },
+                    (1, Some(1)) => {
+                        // 'a{1}' and 'a{1}?' are exactly equivalent to 'a'.
+                        return Ok(());
+                    }
+                    (m, None) => {
+                        write!(self.wtr, "{{{},}}", m)?;
+                    }
+                    (m, Some(n)) if m == n => {
+                        write!(self.wtr, "{{{}}}", m)?;
+                        // a{m} and a{m}? are always exactly equivalent.
+                        return Ok(());
+                    }
+                    (m, Some(n)) => {
+                        write!(self.wtr, "{{{},{}}}", m, n)?;
+                    }
                 }
                 if !x.greedy {
                     self.wtr.write_str("?")?;
                 }
             }
-            HirKind::Group(_) => {
-                self.wtr.write_str(")")?;
+            HirKind::Capture(_)
+            | HirKind::Concat(_)
+            | HirKind::Alternation(_) => {
+                self.wtr.write_str(r")")?;
             }
         }
         Ok(())
@@ -217,18 +283,16 @@ impl<W: fmt::Write> Writer<W> {
     }
 
     fn write_literal_byte(&mut self, b: u8) -> fmt::Result {
-        let c = b as char;
-        if c <= 0x7F as char && !c.is_control() && !c.is_whitespace() {
-            self.write_literal_char(c)
+        if b <= 0x7F && !b.is_ascii_control() && !b.is_ascii_whitespace() {
+            self.write_literal_char(char::try_from(b).unwrap())
         } else {
             write!(self.wtr, "(?-u:\\x{:02X})", b)
         }
     }
 
     fn write_literal_class_byte(&mut self, b: u8) -> fmt::Result {
-        let c = b as char;
-        if c <= 0x7F as char && !c.is_control() && !c.is_whitespace() {
-            self.write_literal_char(c)
+        if b <= 0x7F && !b.is_ascii_control() && !b.is_ascii_whitespace() {
+            self.write_literal_char(char::try_from(b).unwrap())
         } else {
             write!(self.wtr, "\\x{:02X}", b)
         }
@@ -237,15 +301,21 @@ impl<W: fmt::Write> Writer<W> {
 
 #[cfg(test)]
 mod tests {
-    use super::Printer;
+    use alloc::{
+        boxed::Box,
+        string::{String, ToString},
+    };
+
     use crate::ParserBuilder;
 
+    use super::*;
+
     fn roundtrip(given: &str, expected: &str) {
         roundtrip_with(|b| b, given, expected);
     }
 
     fn roundtrip_bytes(given: &str, expected: &str) {
-        roundtrip_with(|b| b.allow_invalid_utf8(true), given, expected);
+        roundtrip_with(|b| b.utf8(false), given, expected);
     }
 
     fn roundtrip_with<F>(mut f: F, given: &str, expected: &str)
@@ -277,28 +347,35 @@ mod tests {
 
     #[test]
     fn print_class() {
-        roundtrip(r"[a]", r"[a]");
+        roundtrip(r"[a]", r"a");
+        roundtrip(r"[ab]", r"[ab]");
         roundtrip(r"[a-z]", r"[a-z]");
         roundtrip(r"[a-z--b-c--x-y]", r"[ad-wz]");
-        roundtrip(r"[^\x01-\u{10FFFF}]", "[\u{0}]");
-        roundtrip(r"[-]", r"[\-]");
+        roundtrip(r"[^\x01-\u{10FFFF}]", "\u{0}");
+        roundtrip(r"[-]", r"\-");
         roundtrip(r"[☃-⛄]", r"[☃-⛄]");
 
-        roundtrip(r"(?-u)[a]", r"(?-u:[a])");
+        roundtrip(r"(?-u)[a]", r"a");
+        roundtrip(r"(?-u)[ab]", r"(?-u:[ab])");
         roundtrip(r"(?-u)[a-z]", r"(?-u:[a-z])");
         roundtrip_bytes(r"(?-u)[a-\xFF]", r"(?-u:[a-\xFF])");
 
         // The following test that the printer escapes meta characters
         // in character classes.
-        roundtrip(r"[\[]", r"[\[]");
+        roundtrip(r"[\[]", r"\[");
         roundtrip(r"[Z-_]", r"[Z-_]");
         roundtrip(r"[Z-_--Z]", r"[\[-_]");
 
         // The following test that the printer escapes meta characters
         // in byte oriented character classes.
-        roundtrip_bytes(r"(?-u)[\[]", r"(?-u:[\[])");
+        roundtrip_bytes(r"(?-u)[\[]", r"\[");
         roundtrip_bytes(r"(?-u)[Z-_]", r"(?-u:[Z-_])");
         roundtrip_bytes(r"(?-u)[Z-_--Z]", r"(?-u:[\[-_])");
+
+        // This tests that an empty character class is correctly roundtripped.
+        #[cfg(feature = "unicode-gencat")]
+        roundtrip(r"\P{any}", r"[a&&b]");
+        roundtrip_bytes(r"(?-u)[^\x00-\xFF]", r"[a&&b]");
     }
 
     #[test]
@@ -331,37 +408,170 @@ mod tests {
         roundtrip("a+?", "a+?");
         roundtrip("(?U)a+", "a+?");
 
-        roundtrip("a{1}", "a{1}");
-        roundtrip("a{1,}", "a{1,}");
+        roundtrip("a{1}", "a");
+        roundtrip("a{2}", "a{2}");
+        roundtrip("a{1,}", "a+");
         roundtrip("a{1,5}", "a{1,5}");
-        roundtrip("a{1}?", "a{1}?");
-        roundtrip("a{1,}?", "a{1,}?");
+        roundtrip("a{1}?", "a");
+        roundtrip("a{2}?", "a{2}");
+        roundtrip("a{1,}?", "a+?");
         roundtrip("a{1,5}?", "a{1,5}?");
-        roundtrip("(?U)a{1}", "a{1}?");
-        roundtrip("(?U)a{1,}", "a{1,}?");
+        roundtrip("(?U)a{1}", "a");
+        roundtrip("(?U)a{2}", "a{2}");
+        roundtrip("(?U)a{1,}", "a+?");
         roundtrip("(?U)a{1,5}", "a{1,5}?");
+
+        // Test that various zero-length repetitions always translate to an
+        // empty regex. This is more a property of HIR's smart constructors
+        // than the printer though.
+        roundtrip("a{0}", "");
+        roundtrip("(?:ab){0}", "");
+        #[cfg(feature = "unicode-gencat")]
+        {
+            roundtrip(r"\p{any}{0}", "");
+            roundtrip(r"\P{any}{0}", "");
+        }
     }
 
     #[test]
     fn print_group() {
         roundtrip("()", "()");
         roundtrip("(?P<foo>)", "(?P<foo>)");
-        roundtrip("(?:)", "(?:)");
+        roundtrip("(?:)", "");
 
         roundtrip("(a)", "(a)");
         roundtrip("(?P<foo>a)", "(?P<foo>a)");
-        roundtrip("(?:a)", "(?:a)");
+        roundtrip("(?:a)", "a");
 
         roundtrip("((((a))))", "((((a))))");
     }
 
     #[test]
     fn print_alternation() {
-        roundtrip("|", "|");
-        roundtrip("||", "||");
+        roundtrip("|", "(?:|)");
+        roundtrip("||", "(?:||)");
+
+        roundtrip("a|b", "[ab]");
+        roundtrip("ab|cd", "(?:(?:ab)|(?:cd))");
+        roundtrip("a|b|c", "[a-c]");
+        roundtrip("ab|cd|ef", "(?:(?:ab)|(?:cd)|(?:ef))");
+        roundtrip("foo|bar|quux", "(?:(?:foo)|(?:bar)|(?:quux))");
+    }
 
-        roundtrip("a|b", "a|b");
-        roundtrip("a|b|c", "a|b|c");
-        roundtrip("foo|bar|quux", "foo|bar|quux");
+    // This is a regression test that stresses a peculiarity of how the HIR
+    // is both constructed and printed. Namely, it is legal for a repetition
+    // to directly contain a concatenation. This particular construct isn't
+    // really possible to build from the concrete syntax directly, since you'd
+    // be forced to put the concatenation into (at least) a non-capturing
+    // group. Concurrently, the printer doesn't consider this case and just
+    // kind of naively prints the child expression and tacks on the repetition
+    // operator.
+    //
+    // As a result, if you attached '+' to a 'concat(a, b)', the printer gives
+    // you 'ab+', but clearly it really should be '(?:ab)+'.
+    //
+    // This bug isn't easy to surface because most ways of building an HIR
+    // come directly from the concrete syntax, and as mentioned above, it just
+    // isn't possible to build this kind of HIR from the concrete syntax.
+    // Nevertheless, this is definitely a bug.
+    //
+    // See: https://github.com/rust-lang/regex/issues/731
+    #[test]
+    fn regression_repetition_concat() {
+        let expr = Hir::concat(alloc::vec![
+            Hir::literal("x".as_bytes()),
+            Hir::repetition(hir::Repetition {
+                min: 1,
+                max: None,
+                greedy: true,
+                sub: Box::new(Hir::literal("ab".as_bytes())),
+            }),
+            Hir::literal("y".as_bytes()),
+        ]);
+        assert_eq!(r"(?:x(?:ab)+y)", expr.to_string());
+
+        let expr = Hir::concat(alloc::vec![
+            Hir::look(hir::Look::Start),
+            Hir::repetition(hir::Repetition {
+                min: 1,
+                max: None,
+                greedy: true,
+                sub: Box::new(Hir::concat(alloc::vec![
+                    Hir::look(hir::Look::Start),
+                    Hir::look(hir::Look::End),
+                ])),
+            }),
+            Hir::look(hir::Look::End),
+        ]);
+        assert_eq!(r"(?:\A(?:\A\z)+\z)", expr.to_string());
+    }
+
+    // Just like regression_repetition_concat, but with the repetition using
+    // an alternation as a child expression instead.
+    //
+    // See: https://github.com/rust-lang/regex/issues/731
+    #[test]
+    fn regression_repetition_alternation() {
+        let expr = Hir::concat(alloc::vec![
+            Hir::literal("ab".as_bytes()),
+            Hir::repetition(hir::Repetition {
+                min: 1,
+                max: None,
+                greedy: true,
+                sub: Box::new(Hir::alternation(alloc::vec![
+                    Hir::literal("cd".as_bytes()),
+                    Hir::literal("ef".as_bytes()),
+                ])),
+            }),
+            Hir::literal("gh".as_bytes()),
+        ]);
+        assert_eq!(r"(?:(?:ab)(?:(?:cd)|(?:ef))+(?:gh))", expr.to_string());
+
+        let expr = Hir::concat(alloc::vec![
+            Hir::look(hir::Look::Start),
+            Hir::repetition(hir::Repetition {
+                min: 1,
+                max: None,
+                greedy: true,
+                sub: Box::new(Hir::alternation(alloc::vec![
+                    Hir::look(hir::Look::Start),
+                    Hir::look(hir::Look::End),
+                ])),
+            }),
+            Hir::look(hir::Look::End),
+        ]);
+        assert_eq!(r"(?:\A(?:\A|\z)+\z)", expr.to_string());
+    }
+
+    // This regression test is very similar in flavor to
+    // regression_repetition_concat in that the root of the issue lies in a
+    // peculiarity of how the HIR is represented and how the printer writes it
+    // out. Like the other regression, this one is also rooted in the fact that
+    // you can't produce the peculiar HIR from the concrete syntax. Namely, you
+    // just can't have a 'concat(a, alt(b, c))' because the 'alt' will normally
+    // be in (at least) a non-capturing group. Why? Because the '|' has very
+    // low precedence (lower that concatenation), and so something like 'ab|c'
+    // is actually 'alt(ab, c)'.
+    //
+    // See: https://github.com/rust-lang/regex/issues/516
+    #[test]
+    fn regression_alternation_concat() {
+        let expr = Hir::concat(alloc::vec![
+            Hir::literal("ab".as_bytes()),
+            Hir::alternation(alloc::vec![
+                Hir::literal("mn".as_bytes()),
+                Hir::literal("xy".as_bytes()),
+            ]),
+        ]);
+        assert_eq!(r"(?:(?:ab)(?:(?:mn)|(?:xy)))", expr.to_string());
+
+        let expr = Hir::concat(alloc::vec![
+            Hir::look(hir::Look::Start),
+            Hir::alternation(alloc::vec![
+                Hir::look(hir::Look::Start),
+                Hir::look(hir::Look::End),
+            ]),
+        ]);
+        assert_eq!(r"(?:\A(?:\A|\z))", expr.to_string());
     }
 }
diff --git a/regex-syntax/src/hir/translate.rs b/regex-syntax/src/hir/translate.rs
index 890e1608b3..b22861fc7c 100644
--- a/regex-syntax/src/hir/translate.rs
+++ b/regex-syntax/src/hir/translate.rs
@@ -2,19 +2,23 @@
 Defines a translator that converts an `Ast` to an `Hir`.
 */
 
-use std::cell::{Cell, RefCell};
-use std::result;
+use core::cell::{Cell, RefCell};
 
-use crate::ast::{self, Ast, Span, Visitor};
-use crate::hir::{self, Error, ErrorKind, Hir};
-use crate::unicode::{self, ClassQuery};
+use alloc::{boxed::Box, string::ToString, vec, vec::Vec};
 
-type Result<T> = result::Result<T, Error>;
+use crate::{
+    ast::{self, Ast, Span, Visitor},
+    either::Either,
+    hir::{self, Error, ErrorKind, Hir, HirKind},
+    unicode::{self, ClassQuery},
+};
+
+type Result<T> = core::result::Result<T, Error>;
 
 /// A builder for constructing an AST->HIR translator.
 #[derive(Clone, Debug)]
 pub struct TranslatorBuilder {
-    allow_invalid_utf8: bool,
+    utf8: bool,
     flags: Flags,
 }
 
@@ -27,10 +31,7 @@ impl Default for TranslatorBuilder {
 impl TranslatorBuilder {
     /// Create a new translator builder with a default c onfiguration.
     pub fn new() -> TranslatorBuilder {
-        TranslatorBuilder {
-            allow_invalid_utf8: false,
-            flags: Flags::default(),
-        }
+        TranslatorBuilder { utf8: true, flags: Flags::default() }
     }
 
     /// Build a translator using the current configuration.
@@ -38,23 +39,27 @@ impl TranslatorBuilder {
         Translator {
             stack: RefCell::new(vec![]),
             flags: Cell::new(self.flags),
-            allow_invalid_utf8: self.allow_invalid_utf8,
+            utf8: self.utf8,
         }
     }
 
-    /// When enabled, translation will permit the construction of a regular
+    /// When disabled, translation will permit the construction of a regular
     /// expression that may match invalid UTF-8.
     ///
-    /// When disabled (the default), the translator is guaranteed to produce
-    /// an expression that will only ever match valid UTF-8 (otherwise, the
-    /// translator will return an error).
+    /// When enabled (the default), the translator is guaranteed to produce an
+    /// expression that, for non-empty matches, will only ever produce spans
+    /// that are entirely valid UTF-8 (otherwise, the translator will return an
+    /// error).
     ///
-    /// Perhaps surprisingly, when invalid UTF-8 isn't allowed, a negated ASCII
-    /// word boundary (uttered as `(?-u:\B)` in the concrete syntax) will cause
-    /// the parser to return an error. Namely, a negated ASCII word boundary
-    /// can result in matching positions that aren't valid UTF-8 boundaries.
-    pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut TranslatorBuilder {
-        self.allow_invalid_utf8 = yes;
+    /// Perhaps surprisingly, when UTF-8 is enabled, an empty regex or even
+    /// a negated ASCII word boundary (uttered as `(?-u:\B)` in the concrete
+    /// syntax) will be allowed even though they can produce matches that split
+    /// a UTF-8 encoded codepoint. This only applies to zero-width or "empty"
+    /// matches, and it is expected that the regex engine itself must handle
+    /// these cases if necessary (perhaps by suppressing any zero-width matches
+    /// that split a codepoint).
+    pub fn utf8(&mut self, yes: bool) -> &mut TranslatorBuilder {
+        self.utf8 = yes;
         self
     }
 
@@ -80,6 +85,12 @@ impl TranslatorBuilder {
         self
     }
 
+    /// Enable or disable the CRLF mode flag (`R`) by default.
+    pub fn crlf(&mut self, yes: bool) -> &mut TranslatorBuilder {
+        self.flags.crlf = if yes { Some(true) } else { None };
+        self
+    }
+
     /// Enable or disable the "swap greed" flag (`U`) by default.
     pub fn swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder {
         self.flags.swap_greed = if yes { Some(true) } else { None };
@@ -100,7 +111,7 @@ impl TranslatorBuilder {
 /// many abstract syntax trees.
 ///
 /// A `Translator` can be configured in more detail via a
-/// [`TranslatorBuilder`](struct.TranslatorBuilder.html).
+/// [`TranslatorBuilder`].
 #[derive(Clone, Debug)]
 pub struct Translator {
     /// Our call stack, but on the heap.
@@ -108,7 +119,7 @@ pub struct Translator {
     /// The current flag settings.
     flags: Cell<Flags>,
     /// Whether we're allowed to produce HIR that can match arbitrary bytes.
-    allow_invalid_utf8: bool,
+    utf8: bool,
 }
 
 impl Translator {
@@ -143,6 +154,12 @@ enum HirFrame {
     /// case in the Ast. They get popped after an inductive (i.e., recursive)
     /// step is complete.
     Expr(Hir),
+    /// A literal that is being constructed, character by character, from the
+    /// AST. We need this because the AST gives each individual character its
+    /// own node. So as we see characters, we peek at the top-most HirFrame.
+    /// If it's a literal, then we add to it. Otherwise, we push a new literal.
+    /// When it comes time to pop it, we convert it to an Hir via Hir::literal.
+    Literal(Vec<u8>),
     /// A Unicode character class. This frame is mutated as we descend into
     /// the Ast of a character class (which is itself its own mini recursive
     /// structure).
@@ -152,10 +169,17 @@ enum HirFrame {
     /// recursive structure).
     ///
     /// Byte character classes are created when Unicode mode (`u`) is disabled.
-    /// If `allow_invalid_utf8` is disabled (the default), then a byte
-    /// character is only permitted to match ASCII text.
+    /// If `utf8` is enabled (the default), then a byte character is only
+    /// permitted to match ASCII text.
     ClassBytes(hir::ClassBytes),
-    /// This is pushed on to the stack upon first seeing any kind of group,
+    /// This is pushed whenever a repetition is observed. After visiting every
+    /// sub-expression in the repetition, the translator's stack is expected to
+    /// have this sentinel at the top.
+    ///
+    /// This sentinel only exists to stop other things (like flattening
+    /// literals) from reaching across repetition operators.
+    Repetition,
+    /// This is pushed on to the stack upon first seeing any kind of capture,
     /// indicated by parentheses (including non-capturing groups). It is popped
     /// upon leaving a group.
     Group {
@@ -181,6 +205,14 @@ enum HirFrame {
     /// every sub-expression in the alternation, the translator's stack is
     /// popped until it sees an Alternation frame.
     Alternation,
+    /// This is pushed immediately before each sub-expression in an
+    /// alternation. This separates the branches of an alternation on the
+    /// stack and prevents literal flattening from reaching across alternation
+    /// branches.
+    ///
+    /// It is popped after each expression in a branch until an 'Alternation'
+    /// frame is observed when doing a post visit on an alternation.
+    AlternationBranch,
 }
 
 impl HirFrame {
@@ -188,6 +220,7 @@ impl HirFrame {
     fn unwrap_expr(self) -> Hir {
         match self {
             HirFrame::Expr(expr) => expr,
+            HirFrame::Literal(lit) => Hir::literal(lit),
             _ => panic!("tried to unwrap expr from HirFrame, got: {:?}", self),
         }
     }
@@ -218,6 +251,20 @@ impl HirFrame {
         }
     }
 
+    /// Assert that the current stack frame is a repetition sentinel. If it
+    /// isn't, then panic.
+    fn unwrap_repetition(self) {
+        match self {
+            HirFrame::Repetition => {}
+            _ => {
+                panic!(
+                    "tried to unwrap repetition from HirFrame, got: {:?}",
+                    self
+                )
+            }
+        }
+    }
+
     /// Assert that the current stack frame is a group indicator and return
     /// its corresponding flags (the flags that were active at the time the
     /// group was entered).
@@ -229,6 +276,20 @@ impl HirFrame {
             }
         }
     }
+
+    /// Assert that the current stack frame is an alternation pipe sentinel. If
+    /// it isn't, then panic.
+    fn unwrap_alternation_pipe(self) {
+        match self {
+            HirFrame::AlternationBranch => {}
+            _ => {
+                panic!(
+                    "tried to unwrap alt pipe from HirFrame, got: {:?}",
+                    self
+                )
+            }
+        }
+    }
 }
 
 impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
@@ -252,6 +313,7 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
                     self.push(HirFrame::ClassBytes(cls));
                 }
             }
+            Ast::Repetition(_) => self.push(HirFrame::Repetition),
             Ast::Group(ref x) => {
                 let old_flags = x
                     .flags()
@@ -266,6 +328,7 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
             Ast::Alternation(ref x) if x.asts.is_empty() => {}
             Ast::Alternation(_) => {
                 self.push(HirFrame::Alternation);
+                self.push(HirFrame::AlternationBranch);
             }
             _ => {}
         }
@@ -291,7 +354,20 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
                 self.push(HirFrame::Expr(Hir::empty()));
             }
             Ast::Literal(ref x) => {
-                self.push(HirFrame::Expr(self.hir_literal(x)?));
+                match self.ast_literal_to_scalar(x)? {
+                    Either::Right(byte) => self.push_byte(byte),
+                    Either::Left(ch) => {
+                        if !self.flags().unicode() && ch.len_utf8() > 1 {
+                            return Err(self
+                                .error(x.span, ErrorKind::UnicodeNotAllowed));
+                        }
+                        match self.case_fold_char(x.span, ch)? {
+                            None => self.push_char(ch),
+                            Some(expr) => self.push(HirFrame::Expr(expr)),
+                        }
+                    }
+                }
+                // self.push(HirFrame::Expr(self.hir_literal(x)?));
             }
             Ast::Dot(span) => {
                 self.push(HirFrame::Expr(self.hir_dot(span)?));
@@ -305,7 +381,7 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
                     let hcls = hir::Class::Unicode(cls);
                     self.push(HirFrame::Expr(Hir::class(hcls)));
                 } else {
-                    let cls = self.hir_perl_byte_class(x);
+                    let cls = self.hir_perl_byte_class(x)?;
                     let hcls = hir::Class::Bytes(cls);
                     self.push(HirFrame::Expr(Hir::class(hcls)));
                 }
@@ -322,12 +398,6 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
                         ast.negated,
                         &mut cls,
                     )?;
-                    if cls.ranges().is_empty() {
-                        return Err(self.error(
-                            ast.span,
-                            ErrorKind::EmptyClassNotAllowed,
-                        ));
-                    }
                     let expr = Hir::class(hir::Class::Unicode(cls));
                     self.push(HirFrame::Expr(expr));
                 } else {
@@ -337,31 +407,25 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
                         ast.negated,
                         &mut cls,
                     )?;
-                    if cls.ranges().is_empty() {
-                        return Err(self.error(
-                            ast.span,
-                            ErrorKind::EmptyClassNotAllowed,
-                        ));
-                    }
-
                     let expr = Hir::class(hir::Class::Bytes(cls));
                     self.push(HirFrame::Expr(expr));
                 }
             }
             Ast::Repetition(ref x) => {
                 let expr = self.pop().unwrap().unwrap_expr();
+                self.pop().unwrap().unwrap_repetition();
                 self.push(HirFrame::Expr(self.hir_repetition(x, expr)));
             }
             Ast::Group(ref x) => {
                 let expr = self.pop().unwrap().unwrap_expr();
                 let old_flags = self.pop().unwrap().unwrap_group();
                 self.trans().flags.set(old_flags);
-                self.push(HirFrame::Expr(self.hir_group(x, expr)));
+                self.push(HirFrame::Expr(self.hir_capture(x, expr)));
             }
             Ast::Concat(_) => {
                 let mut exprs = vec![];
-                while let Some(HirFrame::Expr(expr)) = self.pop() {
-                    if !expr.kind().is_empty() {
+                while let Some(expr) = self.pop_concat_expr() {
+                    if !matches!(*expr.kind(), HirKind::Empty) {
                         exprs.push(expr);
                     }
                 }
@@ -370,7 +434,8 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
             }
             Ast::Alternation(_) => {
                 let mut exprs = vec![];
-                while let Some(HirFrame::Expr(expr)) = self.pop() {
+                while let Some(expr) = self.pop_alt_expr() {
+                    self.pop().unwrap().unwrap_alternation_pipe();
                     exprs.push(expr);
                 }
                 exprs.reverse();
@@ -380,6 +445,11 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
         Ok(())
     }
 
+    fn visit_alternation_in(&mut self) -> Result<()> {
+        self.push(HirFrame::AlternationBranch);
+        Ok(())
+    }
+
     fn visit_class_set_item_pre(
         &mut self,
         ast: &ast::ClassSetItem,
@@ -458,7 +528,7 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
                     cls.union(&xcls);
                     self.push(HirFrame::ClassUnicode(cls));
                 } else {
-                    let xcls = self.hir_perl_byte_class(x);
+                    let xcls = self.hir_perl_byte_class(x)?;
                     let mut cls = self.pop().unwrap().unwrap_class_bytes();
                     cls.union(&xcls);
                     self.push(HirFrame::ClassBytes(cls));
@@ -602,11 +672,103 @@ impl<'t, 'p> TranslatorI<'t, 'p> {
         self.trans().stack.borrow_mut().push(frame);
     }
 
+    /// Push the given literal char on to the call stack.
+    ///
+    /// If the top-most element of the stack is a literal, then the char
+    /// is appended to the end of that literal. Otherwise, a new literal
+    /// containing just the given char is pushed to the top of the stack.
+    fn push_char(&self, ch: char) {
+        let mut buf = [0; 4];
+        let bytes = ch.encode_utf8(&mut buf).as_bytes();
+        let mut stack = self.trans().stack.borrow_mut();
+        if let Some(HirFrame::Literal(ref mut literal)) = stack.last_mut() {
+            literal.extend_from_slice(bytes);
+        } else {
+            stack.push(HirFrame::Literal(bytes.to_vec()));
+        }
+    }
+
+    /// Push the given literal byte on to the call stack.
+    ///
+    /// If the top-most element of the stack is a literal, then the byte
+    /// is appended to the end of that literal. Otherwise, a new literal
+    /// containing just the given byte is pushed to the top of the stack.
+    fn push_byte(&self, byte: u8) {
+        let mut stack = self.trans().stack.borrow_mut();
+        if let Some(HirFrame::Literal(ref mut literal)) = stack.last_mut() {
+            literal.push(byte);
+        } else {
+            stack.push(HirFrame::Literal(vec![byte]));
+        }
+    }
+
     /// Pop the top of the call stack. If the call stack is empty, return None.
     fn pop(&self) -> Option<HirFrame> {
         self.trans().stack.borrow_mut().pop()
     }
 
+    /// Pop an HIR expression from the top of the stack for a concatenation.
+    ///
+    /// This returns None if the stack is empty or when a concat frame is seen.
+    /// Otherwise, it panics if it could not find an HIR expression.
+    fn pop_concat_expr(&self) -> Option<Hir> {
+        let frame = self.pop()?;
+        match frame {
+            HirFrame::Concat => None,
+            HirFrame::Expr(expr) => Some(expr),
+            HirFrame::Literal(lit) => Some(Hir::literal(lit)),
+            HirFrame::ClassUnicode(_) => {
+                unreachable!("expected expr or concat, got Unicode class")
+            }
+            HirFrame::ClassBytes(_) => {
+                unreachable!("expected expr or concat, got byte class")
+            }
+            HirFrame::Repetition => {
+                unreachable!("expected expr or concat, got repetition")
+            }
+            HirFrame::Group { .. } => {
+                unreachable!("expected expr or concat, got group")
+            }
+            HirFrame::Alternation => {
+                unreachable!("expected expr or concat, got alt marker")
+            }
+            HirFrame::AlternationBranch => {
+                unreachable!("expected expr or concat, got alt branch marker")
+            }
+        }
+    }
+
+    /// Pop an HIR expression from the top of the stack for an alternation.
+    ///
+    /// This returns None if the stack is empty or when an alternation frame is
+    /// seen. Otherwise, it panics if it could not find an HIR expression.
+    fn pop_alt_expr(&self) -> Option<Hir> {
+        let frame = self.pop()?;
+        match frame {
+            HirFrame::Alternation => None,
+            HirFrame::Expr(expr) => Some(expr),
+            HirFrame::Literal(lit) => Some(Hir::literal(lit)),
+            HirFrame::ClassUnicode(_) => {
+                unreachable!("expected expr or alt, got Unicode class")
+            }
+            HirFrame::ClassBytes(_) => {
+                unreachable!("expected expr or alt, got byte class")
+            }
+            HirFrame::Repetition => {
+                unreachable!("expected expr or alt, got repetition")
+            }
+            HirFrame::Group { .. } => {
+                unreachable!("expected expr or alt, got group")
+            }
+            HirFrame::Concat => {
+                unreachable!("expected expr or alt, got concat marker")
+            }
+            HirFrame::AlternationBranch => {
+                unreachable!("expected expr or alt, got alt branch marker")
+            }
+        }
+    }
+
     /// Create a new error with the given span and error type.
     fn error(&self, span: Span, kind: ErrorKind) -> Error {
         Error { kind, pattern: self.pattern.to_string(), span }
@@ -627,63 +789,48 @@ impl<'t, 'p> TranslatorI<'t, 'p> {
         old_flags
     }
 
-    fn hir_literal(&self, lit: &ast::Literal) -> Result<Hir> {
-        let ch = match self.literal_to_char(lit)? {
-            byte @ hir::Literal::Byte(_) => return Ok(Hir::literal(byte)),
-            hir::Literal::Unicode(ch) => ch,
-        };
-        if self.flags().case_insensitive() {
-            self.hir_from_char_case_insensitive(lit.span, ch)
-        } else {
-            self.hir_from_char(lit.span, ch)
-        }
-    }
-
     /// Convert an Ast literal to its scalar representation.
     ///
     /// When Unicode mode is enabled, then this always succeeds and returns a
     /// `char` (Unicode scalar value).
     ///
-    /// When Unicode mode is disabled, then a raw byte is returned. If that
-    /// byte is not ASCII and invalid UTF-8 is not allowed, then this returns
-    /// an error.
-    fn literal_to_char(&self, lit: &ast::Literal) -> Result<hir::Literal> {
+    /// When Unicode mode is disabled, then a `char` will still be returned
+    /// whenever possible. A byte is returned only when invalid UTF-8 is
+    /// allowed and when the byte is not ASCII. Otherwise, a non-ASCII byte
+    /// will result in an error when invalid UTF-8 is not allowed.
+    fn ast_literal_to_scalar(
+        &self,
+        lit: &ast::Literal,
+    ) -> Result<Either<char, u8>> {
         if self.flags().unicode() {
-            return Ok(hir::Literal::Unicode(lit.c));
+            return Ok(Either::Left(lit.c));
         }
         let byte = match lit.byte() {
-            None => return Ok(hir::Literal::Unicode(lit.c)),
+            None => return Ok(Either::Left(lit.c)),
             Some(byte) => byte,
         };
         if byte <= 0x7F {
-            return Ok(hir::Literal::Unicode(byte as char));
+            return Ok(Either::Left(char::try_from(byte).unwrap()));
         }
-        if !self.trans().allow_invalid_utf8 {
+        if self.trans().utf8 {
             return Err(self.error(lit.span, ErrorKind::InvalidUtf8));
         }
-        Ok(hir::Literal::Byte(byte))
+        Ok(Either::Right(byte))
     }
 
-    fn hir_from_char(&self, span: Span, c: char) -> Result<Hir> {
-        if !self.flags().unicode() && c.len_utf8() > 1 {
-            return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
+    fn case_fold_char(&self, span: Span, c: char) -> Result<Option<Hir>> {
+        if !self.flags().case_insensitive() {
+            return Ok(None);
         }
-        Ok(Hir::literal(hir::Literal::Unicode(c)))
-    }
-
-    fn hir_from_char_case_insensitive(
-        &self,
-        span: Span,
-        c: char,
-    ) -> Result<Hir> {
         if self.flags().unicode() {
             // If case folding won't do anything, then don't bother trying.
-            let map =
-                unicode::contains_simple_case_mapping(c, c).map_err(|_| {
+            let map = unicode::SimpleCaseFolder::new()
+                .map(|f| f.overlaps(c, c))
+                .map_err(|_| {
                     self.error(span, ErrorKind::UnicodeCaseUnavailable)
                 })?;
             if !map {
-                return self.hir_from_char(span, c);
+                return Ok(None);
             }
             let mut cls =
                 hir::ClassUnicode::new(vec![hir::ClassUnicodeRange::new(
@@ -692,7 +839,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> {
             cls.try_case_fold_simple().map_err(|_| {
                 self.error(span, ErrorKind::UnicodeCaseUnavailable)
             })?;
-            Ok(Hir::class(hir::Class::Unicode(cls)))
+            Ok(Some(Hir::class(hir::Class::Unicode(cls))))
         } else {
             if c.len_utf8() > 1 {
                 return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
@@ -700,109 +847,104 @@ impl<'t, 'p> TranslatorI<'t, 'p> {
             // If case folding won't do anything, then don't bother trying.
             match c {
                 'A'..='Z' | 'a'..='z' => {}
-                _ => return self.hir_from_char(span, c),
+                _ => return Ok(None),
             }
             let mut cls =
                 hir::ClassBytes::new(vec![hir::ClassBytesRange::new(
-                    c as u8, c as u8,
+                    // OK because 'c.len_utf8() == 1' which in turn implies
+                    // that 'c' is ASCII.
+                    //
+                    // MSRV(1.59): Use 'u8::try_from(c)' instead.
+                    u8::try_from(u32::from(c)).unwrap(),
+                    u8::try_from(u32::from(c)).unwrap(),
                 )]);
             cls.case_fold_simple();
-            Ok(Hir::class(hir::Class::Bytes(cls)))
+            Ok(Some(Hir::class(hir::Class::Bytes(cls))))
         }
     }
 
     fn hir_dot(&self, span: Span) -> Result<Hir> {
-        let unicode = self.flags().unicode();
-        if !unicode && !self.trans().allow_invalid_utf8 {
+        if !self.flags().unicode() && self.trans().utf8 {
             return Err(self.error(span, ErrorKind::InvalidUtf8));
         }
-        Ok(if self.flags().dot_matches_new_line() {
-            Hir::any(!unicode)
-        } else {
-            Hir::dot(!unicode)
-        })
+        Ok(Hir::dot(self.flags().dot()))
     }
 
     fn hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir> {
         let unicode = self.flags().unicode();
         let multi_line = self.flags().multi_line();
+        let crlf = self.flags().crlf();
         Ok(match asst.kind {
-            ast::AssertionKind::StartLine => Hir::anchor(if multi_line {
-                hir::Anchor::StartLine
+            ast::AssertionKind::StartLine => Hir::look(if multi_line {
+                if crlf {
+                    hir::Look::StartCRLF
+                } else {
+                    hir::Look::StartLF
+                }
             } else {
-                hir::Anchor::StartText
+                hir::Look::Start
             }),
-            ast::AssertionKind::EndLine => Hir::anchor(if multi_line {
-                hir::Anchor::EndLine
+            ast::AssertionKind::EndLine => Hir::look(if multi_line {
+                if crlf {
+                    hir::Look::EndCRLF
+                } else {
+                    hir::Look::EndLF
+                }
             } else {
-                hir::Anchor::EndText
+                hir::Look::End
+            }),
+            ast::AssertionKind::StartText => Hir::look(hir::Look::Start),
+            ast::AssertionKind::EndText => Hir::look(hir::Look::End),
+            ast::AssertionKind::WordBoundary => Hir::look(if unicode {
+                hir::Look::WordUnicode
+            } else {
+                hir::Look::WordAscii
+            }),
+            ast::AssertionKind::NotWordBoundary => Hir::look(if unicode {
+                hir::Look::WordUnicodeNegate
+            } else {
+                hir::Look::WordAsciiNegate
             }),
-            ast::AssertionKind::StartText => {
-                Hir::anchor(hir::Anchor::StartText)
-            }
-            ast::AssertionKind::EndText => Hir::anchor(hir::Anchor::EndText),
-            ast::AssertionKind::WordBoundary => {
-                Hir::word_boundary(if unicode {
-                    hir::WordBoundary::Unicode
-                } else {
-                    hir::WordBoundary::Ascii
-                })
-            }
-            ast::AssertionKind::NotWordBoundary => {
-                Hir::word_boundary(if unicode {
-                    hir::WordBoundary::UnicodeNegate
-                } else {
-                    // It is possible for negated ASCII word boundaries to
-                    // match at invalid UTF-8 boundaries, even when searching
-                    // valid UTF-8.
-                    if !self.trans().allow_invalid_utf8 {
-                        return Err(
-                            self.error(asst.span, ErrorKind::InvalidUtf8)
-                        );
-                    }
-                    hir::WordBoundary::AsciiNegate
-                })
-            }
         })
     }
 
-    fn hir_group(&self, group: &ast::Group, expr: Hir) -> Hir {
-        let kind = match group.kind {
-            ast::GroupKind::CaptureIndex(idx) => {
-                hir::GroupKind::CaptureIndex(idx)
-            }
-            ast::GroupKind::CaptureName(ref capname) => {
-                hir::GroupKind::CaptureName {
-                    name: capname.name.clone(),
-                    index: capname.index,
-                }
+    fn hir_capture(&self, group: &ast::Group, expr: Hir) -> Hir {
+        let (index, name) = match group.kind {
+            ast::GroupKind::CaptureIndex(index) => (index, None),
+            ast::GroupKind::CaptureName { ref name, .. } => {
+                (name.index, Some(name.name.clone().into_boxed_str()))
             }
-            ast::GroupKind::NonCapturing(_) => hir::GroupKind::NonCapturing,
+            // The HIR doesn't need to use non-capturing groups, since the way
+            // in which the data type is defined handles this automatically.
+            ast::GroupKind::NonCapturing(_) => return expr,
         };
-        Hir::group(hir::Group { kind, hir: Box::new(expr) })
+        Hir::capture(hir::Capture { index, name, sub: Box::new(expr) })
     }
 
     fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir {
-        let kind = match rep.op.kind {
-            ast::RepetitionKind::ZeroOrOne => hir::RepetitionKind::ZeroOrOne,
-            ast::RepetitionKind::ZeroOrMore => hir::RepetitionKind::ZeroOrMore,
-            ast::RepetitionKind::OneOrMore => hir::RepetitionKind::OneOrMore,
+        let (min, max) = match rep.op.kind {
+            ast::RepetitionKind::ZeroOrOne => (0, Some(1)),
+            ast::RepetitionKind::ZeroOrMore => (0, None),
+            ast::RepetitionKind::OneOrMore => (1, None),
             ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(m)) => {
-                hir::RepetitionKind::Range(hir::RepetitionRange::Exactly(m))
+                (m, Some(m))
             }
             ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(m)) => {
-                hir::RepetitionKind::Range(hir::RepetitionRange::AtLeast(m))
+                (m, None)
             }
             ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(
                 m,
                 n,
-            )) => {
-                hir::RepetitionKind::Range(hir::RepetitionRange::Bounded(m, n))
-            }
+            )) => (m, Some(n)),
         };
         let greedy =
             if self.flags().swap_greed() { !rep.greedy } else { rep.greedy };
-        Hir::repetition(hir::Repetition { kind, greedy, hir: Box::new(expr) })
+        Hir::repetition(hir::Repetition {
+            min,
+            max,
+            greedy,
+            sub: Box::new(expr),
+        })
     }
 
     fn hir_unicode_class(
@@ -834,11 +976,6 @@ impl<'t, 'p> TranslatorI<'t, 'p> {
                 ast_class.negated,
                 class,
             )?;
-            if class.ranges().is_empty() {
-                let err = self
-                    .error(ast_class.span, ErrorKind::EmptyClassNotAllowed);
-                return Err(err);
-            }
         }
         result
     }
@@ -848,9 +985,8 @@ impl<'t, 'p> TranslatorI<'t, 'p> {
         ast: &ast::ClassAscii,
     ) -> Result<hir::ClassUnicode> {
         let mut cls = hir::ClassUnicode::new(
-            ascii_class(&ast.kind)
-                .iter()
-                .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e)),
+            ascii_class_as_chars(&ast.kind)
+                .map(|(s, e)| hir::ClassUnicodeRange::new(s, e)),
         );
         self.unicode_fold_and_negate(&ast.span, ast.negated, &mut cls)?;
         Ok(cls)
@@ -862,8 +998,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> {
     ) -> Result<hir::ClassBytes> {
         let mut cls = hir::ClassBytes::new(
             ascii_class(&ast.kind)
-                .iter()
-                .map(|&(s, e)| hir::ClassBytesRange::new(s as u8, e as u8)),
+                .map(|(s, e)| hir::ClassBytesRange::new(s, e)),
         );
         self.bytes_fold_and_negate(&ast.span, ast.negated, &mut cls)?;
         Ok(cls)
@@ -894,7 +1029,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> {
     fn hir_perl_byte_class(
         &self,
         ast_class: &ast::ClassPerl,
-    ) -> hir::ClassBytes {
+    ) -> Result<hir::ClassBytes> {
         use crate::ast::ClassPerlKind::*;
 
         assert!(!self.flags().unicode());
@@ -908,7 +1043,13 @@ impl<'t, 'p> TranslatorI<'t, 'p> {
         if ast_class.negated {
             class.negate();
         }
-        class
+        // Negating a Perl byte class is likely to cause it to match invalid
+        // UTF-8. That's only OK if the translator is configured to allow such
+        // things.
+        if self.trans().utf8 && !class.is_ascii() {
+            return Err(self.error(ast_class.span, ErrorKind::InvalidUtf8));
+        }
+        Ok(class)
     }
 
     /// Converts the given Unicode specific error to an HIR translation error.
@@ -918,7 +1059,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> {
     fn convert_unicode_class_error(
         &self,
         span: &Span,
-        result: unicode::Result<hir::ClassUnicode>,
+        result: core::result::Result<hir::ClassUnicode, unicode::Error>,
     ) -> Result<hir::ClassUnicode> {
         result.map_err(|err| {
             let sp = span.clone();
@@ -943,7 +1084,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> {
         class: &mut hir::ClassUnicode,
     ) -> Result<()> {
         // Note that we must apply case folding before negation!
-        // Consider `(?i)[^x]`. If we applied negation field, then
+        // Consider `(?i)[^x]`. If we applied negation first, then
         // the result would be the character class that matched any
         // Unicode scalar value.
         if self.flags().case_insensitive() {
@@ -973,7 +1114,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> {
         if negated {
             class.negate();
         }
-        if !self.trans().allow_invalid_utf8 && !class.is_all_ascii() {
+        if self.trans().utf8 && !class.is_ascii() {
             return Err(self.error(span.clone(), ErrorKind::InvalidUtf8));
         }
         Ok(())
@@ -982,11 +1123,12 @@ impl<'t, 'p> TranslatorI<'t, 'p> {
     /// Return a scalar byte value suitable for use as a literal in a byte
     /// character class.
     fn class_literal_byte(&self, ast: &ast::Literal) -> Result<u8> {
-        match self.literal_to_char(ast)? {
-            hir::Literal::Byte(byte) => Ok(byte),
-            hir::Literal::Unicode(ch) => {
-                if ch <= 0x7F as char {
-                    Ok(ch as u8)
+        match self.ast_literal_to_scalar(ast)? {
+            Either::Right(byte) => Ok(byte),
+            Either::Left(ch) => {
+                let cp = u32::from(ch);
+                if cp <= 0x7F {
+                    Ok(u8::try_from(cp).unwrap())
                 } else {
                     // We can't feasibly support Unicode in
                     // byte oriented classes. Byte classes don't
@@ -1010,6 +1152,7 @@ struct Flags {
     dot_matches_new_line: Option<bool>,
     swap_greed: Option<bool>,
     unicode: Option<bool>,
+    crlf: Option<bool>,
     // Note that `ignore_whitespace` is omitted here because it is handled
     // entirely in the parser.
 }
@@ -1038,6 +1181,9 @@ impl Flags {
                 ast::FlagsItemKind::Flag(ast::Flag::Unicode) => {
                     flags.unicode = Some(enable);
                 }
+                ast::FlagsItemKind::Flag(ast::Flag::CRLF) => {
+                    flags.crlf = Some(enable);
+                }
                 ast::FlagsItemKind::Flag(ast::Flag::IgnoreWhitespace) => {}
             }
         }
@@ -1060,6 +1206,33 @@ impl Flags {
         if self.unicode.is_none() {
             self.unicode = previous.unicode;
         }
+        if self.crlf.is_none() {
+            self.crlf = previous.crlf;
+        }
+    }
+
+    fn dot(&self) -> hir::Dot {
+        if self.dot_matches_new_line() {
+            if self.unicode() {
+                hir::Dot::AnyChar
+            } else {
+                hir::Dot::AnyByte
+            }
+        } else {
+            if self.unicode() {
+                if self.crlf() {
+                    hir::Dot::AnyCharExceptCRLF
+                } else {
+                    hir::Dot::AnyCharExceptLF
+                }
+            } else {
+                if self.crlf() {
+                    hir::Dot::AnyByteExceptCRLF
+                } else {
+                    hir::Dot::AnyByteExceptLF
+                }
+            }
+        }
     }
 
     fn case_insensitive(&self) -> bool {
@@ -1081,52 +1254,63 @@ impl Flags {
     fn unicode(&self) -> bool {
         self.unicode.unwrap_or(true)
     }
+
+    fn crlf(&self) -> bool {
+        self.crlf.unwrap_or(false)
+    }
 }
 
 fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes {
     let ranges: Vec<_> = ascii_class(kind)
-        .iter()
-        .cloned()
-        .map(|(s, e)| hir::ClassBytesRange::new(s as u8, e as u8))
+        .map(|(s, e)| hir::ClassBytesRange::new(s, e))
         .collect();
     hir::ClassBytes::new(ranges)
 }
 
-fn ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)] {
+fn ascii_class(kind: &ast::ClassAsciiKind) -> impl Iterator<Item = (u8, u8)> {
     use crate::ast::ClassAsciiKind::*;
-    match *kind {
-        Alnum => &[('0', '9'), ('A', 'Z'), ('a', 'z')],
-        Alpha => &[('A', 'Z'), ('a', 'z')],
-        Ascii => &[('\x00', '\x7F')],
-        Blank => &[('\t', '\t'), (' ', ' ')],
-        Cntrl => &[('\x00', '\x1F'), ('\x7F', '\x7F')],
-        Digit => &[('0', '9')],
-        Graph => &[('!', '~')],
-        Lower => &[('a', 'z')],
-        Print => &[(' ', '~')],
-        Punct => &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')],
+
+    let slice: &'static [(u8, u8)] = match *kind {
+        Alnum => &[(b'0', b'9'), (b'A', b'Z'), (b'a', b'z')],
+        Alpha => &[(b'A', b'Z'), (b'a', b'z')],
+        Ascii => &[(b'\x00', b'\x7F')],
+        Blank => &[(b'\t', b'\t'), (b' ', b' ')],
+        Cntrl => &[(b'\x00', b'\x1F'), (b'\x7F', b'\x7F')],
+        Digit => &[(b'0', b'9')],
+        Graph => &[(b'!', b'~')],
+        Lower => &[(b'a', b'z')],
+        Print => &[(b' ', b'~')],
+        Punct => &[(b'!', b'/'), (b':', b'@'), (b'[', b'`'), (b'{', b'~')],
         Space => &[
-            ('\t', '\t'),
-            ('\n', '\n'),
-            ('\x0B', '\x0B'),
-            ('\x0C', '\x0C'),
-            ('\r', '\r'),
-            (' ', ' '),
+            (b'\t', b'\t'),
+            (b'\n', b'\n'),
+            (b'\x0B', b'\x0B'),
+            (b'\x0C', b'\x0C'),
+            (b'\r', b'\r'),
+            (b' ', b' '),
         ],
-        Upper => &[('A', 'Z')],
-        Word => &[('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z')],
-        Xdigit => &[('0', '9'), ('A', 'F'), ('a', 'f')],
-    }
+        Upper => &[(b'A', b'Z')],
+        Word => &[(b'0', b'9'), (b'A', b'Z'), (b'_', b'_'), (b'a', b'z')],
+        Xdigit => &[(b'0', b'9'), (b'A', b'F'), (b'a', b'f')],
+    };
+    slice.iter().copied()
+}
+
+fn ascii_class_as_chars(
+    kind: &ast::ClassAsciiKind,
+) -> impl Iterator<Item = (char, char)> {
+    ascii_class(kind).map(|(s, e)| (char::from(s), char::from(e)))
 }
 
 #[cfg(test)]
 mod tests {
-    use crate::ast::parse::ParserBuilder;
-    use crate::ast::{self, Ast, Position, Span};
-    use crate::hir::{self, Hir, HirKind};
-    use crate::unicode::{self, ClassQuery};
+    use crate::{
+        ast::{self, parse::ParserBuilder, Ast, Position, Span},
+        hir::{self, Hir, HirKind, Look, Properties},
+        unicode::{self, ClassQuery},
+    };
 
-    use super::{ascii_class, TranslatorBuilder};
+    use super::*;
 
     // We create these errors to compare with real hir::Errors in the tests.
     // We define equality between TestError and hir::Error to disregard the
@@ -1155,7 +1339,7 @@ mod tests {
 
     fn t(pattern: &str) -> Hir {
         TranslatorBuilder::new()
-            .allow_invalid_utf8(false)
+            .utf8(true)
             .build()
             .translate(pattern, &parse(pattern))
             .unwrap()
@@ -1163,7 +1347,7 @@ mod tests {
 
     fn t_err(pattern: &str) -> hir::Error {
         TranslatorBuilder::new()
-            .allow_invalid_utf8(false)
+            .utf8(true)
             .build()
             .translate(pattern, &parse(pattern))
             .unwrap_err()
@@ -1171,95 +1355,73 @@ mod tests {
 
     fn t_bytes(pattern: &str) -> Hir {
         TranslatorBuilder::new()
-            .allow_invalid_utf8(true)
+            .utf8(false)
             .build()
             .translate(pattern, &parse(pattern))
             .unwrap()
     }
 
-    fn hir_lit(s: &str) -> Hir {
-        match s.len() {
-            0 => Hir::empty(),
-            _ => {
-                let lits = s
-                    .chars()
-                    .map(hir::Literal::Unicode)
-                    .map(Hir::literal)
-                    .collect();
-                Hir::concat(lits)
-            }
-        }
+    fn props(pattern: &str) -> Properties {
+        t(pattern).properties().clone()
     }
 
-    fn hir_blit(s: &[u8]) -> Hir {
-        match s.len() {
-            0 => Hir::empty(),
-            1 => Hir::literal(hir::Literal::Byte(s[0])),
-            _ => {
-                let lits = s
-                    .iter()
-                    .cloned()
-                    .map(hir::Literal::Byte)
-                    .map(Hir::literal)
-                    .collect();
-                Hir::concat(lits)
-            }
-        }
+    fn props_bytes(pattern: &str) -> Properties {
+        t_bytes(pattern).properties().clone()
     }
 
-    fn hir_group(i: u32, expr: Hir) -> Hir {
-        Hir::group(hir::Group {
-            kind: hir::GroupKind::CaptureIndex(i),
-            hir: Box::new(expr),
-        })
+    fn hir_lit(s: &str) -> Hir {
+        hir_blit(s.as_bytes())
     }
 
-    fn hir_group_name(i: u32, name: &str, expr: Hir) -> Hir {
-        Hir::group(hir::Group {
-            kind: hir::GroupKind::CaptureName {
-                name: name.to_string(),
-                index: i,
-            },
-            hir: Box::new(expr),
-        })
+    fn hir_blit(s: &[u8]) -> Hir {
+        Hir::literal(s)
+    }
+
+    fn hir_capture(index: u32, expr: Hir) -> Hir {
+        Hir::capture(hir::Capture { index, name: None, sub: Box::new(expr) })
     }
 
-    fn hir_group_nocap(expr: Hir) -> Hir {
-        Hir::group(hir::Group {
-            kind: hir::GroupKind::NonCapturing,
-            hir: Box::new(expr),
+    fn hir_capture_name(index: u32, name: &str, expr: Hir) -> Hir {
+        Hir::capture(hir::Capture {
+            index,
+            name: Some(name.into()),
+            sub: Box::new(expr),
         })
     }
 
     fn hir_quest(greedy: bool, expr: Hir) -> Hir {
         Hir::repetition(hir::Repetition {
-            kind: hir::RepetitionKind::ZeroOrOne,
+            min: 0,
+            max: Some(1),
             greedy,
-            hir: Box::new(expr),
+            sub: Box::new(expr),
         })
     }
 
     fn hir_star(greedy: bool, expr: Hir) -> Hir {
         Hir::repetition(hir::Repetition {
-            kind: hir::RepetitionKind::ZeroOrMore,
+            min: 0,
+            max: None,
             greedy,
-            hir: Box::new(expr),
+            sub: Box::new(expr),
         })
     }
 
     fn hir_plus(greedy: bool, expr: Hir) -> Hir {
         Hir::repetition(hir::Repetition {
-            kind: hir::RepetitionKind::OneOrMore,
+            min: 1,
+            max: None,
             greedy,
-            hir: Box::new(expr),
+            sub: Box::new(expr),
         })
     }
 
-    fn hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir {
+    fn hir_range(greedy: bool, min: u32, max: Option<u32>, expr: Hir) -> Hir {
         Hir::repetition(hir::Repetition {
-            kind: hir::RepetitionKind::Range(range),
+            min,
+            max,
             greedy,
-            hir: Box::new(expr),
+            sub: Box::new(expr),
         })
     }
 
@@ -1281,32 +1443,25 @@ mod tests {
         Hir::class(hir::Class::Unicode(unicode::perl_word().unwrap()))
     }
 
-    fn hir_uclass(ranges: &[(char, char)]) -> Hir {
-        let ranges: Vec<hir::ClassUnicodeRange> = ranges
-            .iter()
-            .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e))
-            .collect();
-        Hir::class(hir::Class::Unicode(hir::ClassUnicode::new(ranges)))
+    fn hir_ascii_uclass(kind: &ast::ClassAsciiKind) -> Hir {
+        Hir::class(hir::Class::Unicode(hir::ClassUnicode::new(
+            ascii_class_as_chars(kind)
+                .map(|(s, e)| hir::ClassUnicodeRange::new(s, e)),
+        )))
     }
 
-    fn hir_bclass(ranges: &[(u8, u8)]) -> Hir {
-        let ranges: Vec<hir::ClassBytesRange> = ranges
-            .iter()
-            .map(|&(s, e)| hir::ClassBytesRange::new(s, e))
-            .collect();
-        Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
+    fn hir_ascii_bclass(kind: &ast::ClassAsciiKind) -> Hir {
+        Hir::class(hir::Class::Bytes(hir::ClassBytes::new(
+            ascii_class(kind).map(|(s, e)| hir::ClassBytesRange::new(s, e)),
+        )))
     }
 
-    fn hir_bclass_from_char(ranges: &[(char, char)]) -> Hir {
-        let ranges: Vec<hir::ClassBytesRange> = ranges
-            .iter()
-            .map(|&(s, e)| {
-                assert!(s as u32 <= 0x7F);
-                assert!(e as u32 <= 0x7F);
-                hir::ClassBytesRange::new(s as u8, e as u8)
-            })
-            .collect();
-        Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
+    fn hir_uclass(ranges: &[(char, char)]) -> Hir {
+        Hir::class(uclass(ranges))
+    }
+
+    fn hir_bclass(ranges: &[(u8, u8)]) -> Hir {
+        Hir::class(bclass(ranges))
     }
 
     fn hir_case_fold(expr: Hir) -> Hir {
@@ -1329,6 +1484,33 @@ mod tests {
         }
     }
 
+    fn uclass(ranges: &[(char, char)]) -> hir::Class {
+        let ranges: Vec<hir::ClassUnicodeRange> = ranges
+            .iter()
+            .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e))
+            .collect();
+        hir::Class::Unicode(hir::ClassUnicode::new(ranges))
+    }
+
+    fn bclass(ranges: &[(u8, u8)]) -> hir::Class {
+        let ranges: Vec<hir::ClassBytesRange> = ranges
+            .iter()
+            .map(|&(s, e)| hir::ClassBytesRange::new(s, e))
+            .collect();
+        hir::Class::Bytes(hir::ClassBytes::new(ranges))
+    }
+
+    #[cfg(feature = "unicode-case")]
+    fn class_case_fold(mut cls: hir::Class) -> Hir {
+        cls.case_fold_simple();
+        Hir::class(cls)
+    }
+
+    fn class_negate(mut cls: hir::Class) -> Hir {
+        cls.negate();
+        Hir::class(cls)
+    }
+
     #[allow(dead_code)]
     fn hir_union(expr1: Hir, expr2: Hir) -> Hir {
         use crate::hir::Class::{Bytes, Unicode};
@@ -1363,47 +1545,43 @@ mod tests {
         }
     }
 
-    fn hir_anchor(anchor: hir::Anchor) -> Hir {
-        Hir::anchor(anchor)
-    }
-
-    fn hir_word(wb: hir::WordBoundary) -> Hir {
-        Hir::word_boundary(wb)
+    fn hir_look(look: hir::Look) -> Hir {
+        Hir::look(look)
     }
 
     #[test]
     fn empty() {
         assert_eq!(t(""), Hir::empty());
         assert_eq!(t("(?i)"), Hir::empty());
-        assert_eq!(t("()"), hir_group(1, Hir::empty()));
-        assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
-        assert_eq!(t("(?P<wat>)"), hir_group_name(1, "wat", Hir::empty()));
+        assert_eq!(t("()"), hir_capture(1, Hir::empty()));
+        assert_eq!(t("(?:)"), Hir::empty());
+        assert_eq!(t("(?P<wat>)"), hir_capture_name(1, "wat", Hir::empty()));
         assert_eq!(t("|"), hir_alt(vec![Hir::empty(), Hir::empty()]));
         assert_eq!(
             t("()|()"),
             hir_alt(vec![
-                hir_group(1, Hir::empty()),
-                hir_group(2, Hir::empty()),
+                hir_capture(1, Hir::empty()),
+                hir_capture(2, Hir::empty()),
             ])
         );
         assert_eq!(
             t("(|b)"),
-            hir_group(1, hir_alt(vec![Hir::empty(), hir_lit("b"),]))
+            hir_capture(1, hir_alt(vec![Hir::empty(), hir_lit("b"),]))
         );
         assert_eq!(
             t("(a|)"),
-            hir_group(1, hir_alt(vec![hir_lit("a"), Hir::empty(),]))
+            hir_capture(1, hir_alt(vec![hir_lit("a"), Hir::empty(),]))
         );
         assert_eq!(
             t("(a||c)"),
-            hir_group(
+            hir_capture(
                 1,
                 hir_alt(vec![hir_lit("a"), Hir::empty(), hir_lit("c"),])
             )
         );
         assert_eq!(
             t("(||)"),
-            hir_group(
+            hir_capture(
                 1,
                 hir_alt(vec![Hir::empty(), Hir::empty(), Hir::empty(),])
             )
@@ -1449,10 +1627,7 @@ mod tests {
         #[cfg(feature = "unicode-case")]
         assert_eq!(t("(?i)a"), hir_uclass(&[('A', 'A'), ('a', 'a'),]));
         #[cfg(feature = "unicode-case")]
-        assert_eq!(
-            t("(?i:a)"),
-            hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')],))
-        );
+        assert_eq!(t("(?i:a)"), hir_uclass(&[('A', 'A'), ('a', 'a')]));
         #[cfg(feature = "unicode-case")]
         assert_eq!(
             t("a(?i)a(?-i)a"),
@@ -1528,14 +1703,32 @@ mod tests {
     fn dot() {
         assert_eq!(
             t("."),
-            hir_uclass(&[('\0', '\t'), ('\x0B', '\u{10FFFF}'),])
+            hir_uclass(&[('\0', '\t'), ('\x0B', '\u{10FFFF}')])
         );
-        assert_eq!(t("(?s)."), hir_uclass(&[('\0', '\u{10FFFF}'),]));
+        assert_eq!(
+            t("(?R)."),
+            hir_uclass(&[
+                ('\0', '\t'),
+                ('\x0B', '\x0C'),
+                ('\x0E', '\u{10FFFF}'),
+            ])
+        );
+        assert_eq!(t("(?s)."), hir_uclass(&[('\0', '\u{10FFFF}')]));
+        assert_eq!(t("(?Rs)."), hir_uclass(&[('\0', '\u{10FFFF}')]));
         assert_eq!(
             t_bytes("(?-u)."),
-            hir_bclass(&[(b'\0', b'\t'), (b'\x0B', b'\xFF'),])
+            hir_bclass(&[(b'\0', b'\t'), (b'\x0B', b'\xFF')])
+        );
+        assert_eq!(
+            t_bytes("(?R-u)."),
+            hir_bclass(&[
+                (b'\0', b'\t'),
+                (b'\x0B', b'\x0C'),
+                (b'\x0E', b'\xFF'),
+            ])
         );
         assert_eq!(t_bytes("(?s-u)."), hir_bclass(&[(b'\0', b'\xFF'),]));
+        assert_eq!(t_bytes("(?Rs-u)."), hir_bclass(&[(b'\0', b'\xFF'),]));
 
         // If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed.
         assert_eq!(
@@ -1549,7 +1742,7 @@ mod tests {
             }
         );
         assert_eq!(
-            t_err("(?s-u)."),
+            t_err("(?R-u)."),
             TestError {
                 kind: hir::ErrorKind::InvalidUtf8,
                 span: Span::new(
@@ -1558,94 +1751,123 @@ mod tests {
                 ),
             }
         );
-    }
-
-    #[test]
-    fn assertions() {
-        assert_eq!(t("^"), hir_anchor(hir::Anchor::StartText));
-        assert_eq!(t("$"), hir_anchor(hir::Anchor::EndText));
-        assert_eq!(t(r"\A"), hir_anchor(hir::Anchor::StartText));
-        assert_eq!(t(r"\z"), hir_anchor(hir::Anchor::EndText));
-        assert_eq!(t("(?m)^"), hir_anchor(hir::Anchor::StartLine));
-        assert_eq!(t("(?m)$"), hir_anchor(hir::Anchor::EndLine));
-        assert_eq!(t(r"(?m)\A"), hir_anchor(hir::Anchor::StartText));
-        assert_eq!(t(r"(?m)\z"), hir_anchor(hir::Anchor::EndText));
-
-        assert_eq!(t(r"\b"), hir_word(hir::WordBoundary::Unicode));
-        assert_eq!(t(r"\B"), hir_word(hir::WordBoundary::UnicodeNegate));
-        assert_eq!(t(r"(?-u)\b"), hir_word(hir::WordBoundary::Ascii));
         assert_eq!(
-            t_bytes(r"(?-u)\B"),
-            hir_word(hir::WordBoundary::AsciiNegate)
+            t_err("(?s-u)."),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(6, 1, 7),
+                    Position::new(7, 1, 8)
+                ),
+            }
         );
-
         assert_eq!(
-            t_err(r"(?-u)\B"),
+            t_err("(?Rs-u)."),
             TestError {
                 kind: hir::ErrorKind::InvalidUtf8,
                 span: Span::new(
-                    Position::new(5, 1, 6),
-                    Position::new(7, 1, 8)
+                    Position::new(7, 1, 8),
+                    Position::new(8, 1, 9)
                 ),
             }
         );
     }
 
+    #[test]
+    fn assertions() {
+        assert_eq!(t("^"), hir_look(hir::Look::Start));
+        assert_eq!(t("$"), hir_look(hir::Look::End));
+        assert_eq!(t(r"\A"), hir_look(hir::Look::Start));
+        assert_eq!(t(r"\z"), hir_look(hir::Look::End));
+        assert_eq!(t("(?m)^"), hir_look(hir::Look::StartLF));
+        assert_eq!(t("(?m)$"), hir_look(hir::Look::EndLF));
+        assert_eq!(t(r"(?m)\A"), hir_look(hir::Look::Start));
+        assert_eq!(t(r"(?m)\z"), hir_look(hir::Look::End));
+
+        assert_eq!(t(r"\b"), hir_look(hir::Look::WordUnicode));
+        assert_eq!(t(r"\B"), hir_look(hir::Look::WordUnicodeNegate));
+        assert_eq!(t(r"(?-u)\b"), hir_look(hir::Look::WordAscii));
+        assert_eq!(t(r"(?-u)\B"), hir_look(hir::Look::WordAsciiNegate));
+    }
+
     #[test]
     fn group() {
-        assert_eq!(t("(a)"), hir_group(1, hir_lit("a")));
+        assert_eq!(t("(a)"), hir_capture(1, hir_lit("a")));
         assert_eq!(
             t("(a)(b)"),
             hir_cat(vec![
-                hir_group(1, hir_lit("a")),
-                hir_group(2, hir_lit("b")),
+                hir_capture(1, hir_lit("a")),
+                hir_capture(2, hir_lit("b")),
             ])
         );
         assert_eq!(
             t("(a)|(b)"),
             hir_alt(vec![
-                hir_group(1, hir_lit("a")),
-                hir_group(2, hir_lit("b")),
+                hir_capture(1, hir_lit("a")),
+                hir_capture(2, hir_lit("b")),
             ])
         );
-        assert_eq!(t("(?P<foo>)"), hir_group_name(1, "foo", Hir::empty()));
-        assert_eq!(t("(?P<foo>a)"), hir_group_name(1, "foo", hir_lit("a")));
+        assert_eq!(t("(?P<foo>)"), hir_capture_name(1, "foo", Hir::empty()));
+        assert_eq!(t("(?P<foo>a)"), hir_capture_name(1, "foo", hir_lit("a")));
         assert_eq!(
             t("(?P<foo>a)(?P<bar>b)"),
             hir_cat(vec![
-                hir_group_name(1, "foo", hir_lit("a")),
-                hir_group_name(2, "bar", hir_lit("b")),
+                hir_capture_name(1, "foo", hir_lit("a")),
+                hir_capture_name(2, "bar", hir_lit("b")),
             ])
         );
-        assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
-        assert_eq!(t("(?:a)"), hir_group_nocap(hir_lit("a")));
+        assert_eq!(t("(?:)"), Hir::empty());
+        assert_eq!(t("(?:a)"), hir_lit("a"));
         assert_eq!(
             t("(?:a)(b)"),
-            hir_cat(vec![
-                hir_group_nocap(hir_lit("a")),
-                hir_group(1, hir_lit("b")),
-            ])
+            hir_cat(vec![hir_lit("a"), hir_capture(1, hir_lit("b")),])
         );
         assert_eq!(
             t("(a)(?:b)(c)"),
             hir_cat(vec![
-                hir_group(1, hir_lit("a")),
-                hir_group_nocap(hir_lit("b")),
-                hir_group(2, hir_lit("c")),
+                hir_capture(1, hir_lit("a")),
+                hir_lit("b"),
+                hir_capture(2, hir_lit("c")),
             ])
         );
         assert_eq!(
             t("(a)(?P<foo>b)(c)"),
             hir_cat(vec![
-                hir_group(1, hir_lit("a")),
-                hir_group_name(2, "foo", hir_lit("b")),
-                hir_group(3, hir_lit("c")),
+                hir_capture(1, hir_lit("a")),
+                hir_capture_name(2, "foo", hir_lit("b")),
+                hir_capture(3, hir_lit("c")),
             ])
         );
-        assert_eq!(t("()"), hir_group(1, Hir::empty()));
-        assert_eq!(t("((?i))"), hir_group(1, Hir::empty()));
-        assert_eq!(t("((?x))"), hir_group(1, Hir::empty()));
-        assert_eq!(t("(((?x)))"), hir_group(1, hir_group(2, Hir::empty())));
+        assert_eq!(t("()"), hir_capture(1, Hir::empty()));
+        assert_eq!(t("((?i))"), hir_capture(1, Hir::empty()));
+        assert_eq!(t("((?x))"), hir_capture(1, Hir::empty()));
+        assert_eq!(
+            t("(((?x)))"),
+            hir_capture(1, hir_capture(2, Hir::empty()))
+        );
+    }
+
+    #[test]
+    fn line_anchors() {
+        assert_eq!(t("^"), hir_look(hir::Look::Start));
+        assert_eq!(t("$"), hir_look(hir::Look::End));
+        assert_eq!(t(r"\A"), hir_look(hir::Look::Start));
+        assert_eq!(t(r"\z"), hir_look(hir::Look::End));
+
+        assert_eq!(t(r"(?m)\A"), hir_look(hir::Look::Start));
+        assert_eq!(t(r"(?m)\z"), hir_look(hir::Look::End));
+        assert_eq!(t("(?m)^"), hir_look(hir::Look::StartLF));
+        assert_eq!(t("(?m)$"), hir_look(hir::Look::EndLF));
+
+        assert_eq!(t(r"(?R)\A"), hir_look(hir::Look::Start));
+        assert_eq!(t(r"(?R)\z"), hir_look(hir::Look::End));
+        assert_eq!(t("(?R)^"), hir_look(hir::Look::Start));
+        assert_eq!(t("(?R)$"), hir_look(hir::Look::End));
+
+        assert_eq!(t(r"(?Rm)\A"), hir_look(hir::Look::Start));
+        assert_eq!(t(r"(?Rm)\z"), hir_look(hir::Look::End));
+        assert_eq!(t("(?Rm)^"), hir_look(hir::Look::StartCRLF));
+        assert_eq!(t("(?Rm)$"), hir_look(hir::Look::EndCRLF));
     }
 
     #[test]
@@ -1653,46 +1875,44 @@ mod tests {
         #[cfg(feature = "unicode-case")]
         assert_eq!(
             t("(?i:a)a"),
-            hir_cat(vec![
-                hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')])),
-                hir_lit("a"),
-            ])
+            hir_cat(
+                vec![hir_uclass(&[('A', 'A'), ('a', 'a')]), hir_lit("a"),]
+            )
         );
         assert_eq!(
             t("(?i-u:a)β"),
             hir_cat(vec![
-                hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
+                hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
                 hir_lit("β"),
             ])
         );
         assert_eq!(
             t("(?:(?i-u)a)b"),
             hir_cat(vec![
-                hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
+                hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
                 hir_lit("b"),
             ])
         );
         assert_eq!(
             t("((?i-u)a)b"),
             hir_cat(vec![
-                hir_group(1, hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
+                hir_capture(1, hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
                 hir_lit("b"),
             ])
         );
         #[cfg(feature = "unicode-case")]
         assert_eq!(
             t("(?i)(?-i:a)a"),
-            hir_cat(vec![
-                hir_group_nocap(hir_lit("a")),
-                hir_uclass(&[('A', 'A'), ('a', 'a')]),
-            ])
+            hir_cat(
+                vec![hir_lit("a"), hir_uclass(&[('A', 'A'), ('a', 'a')]),]
+            )
         );
         #[cfg(feature = "unicode-case")]
         assert_eq!(
             t("(?im)a^"),
             hir_cat(vec![
                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
-                hir_anchor(hir::Anchor::StartLine),
+                hir_look(hir::Look::StartLF),
             ])
         );
         #[cfg(feature = "unicode-case")]
@@ -1700,9 +1920,9 @@ mod tests {
             t("(?im)a^(?i-m)a^"),
             hir_cat(vec![
                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
-                hir_anchor(hir::Anchor::StartLine),
+                hir_look(hir::Look::StartLF),
                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
-                hir_anchor(hir::Anchor::StartText),
+                hir_look(hir::Look::Start),
             ])
         );
         assert_eq!(
@@ -1718,10 +1938,10 @@ mod tests {
         assert_eq!(
             t("(?:a(?i)a)a"),
             hir_cat(vec![
-                hir_group_nocap(hir_cat(vec![
+                hir_cat(vec![
                     hir_lit("a"),
                     hir_uclass(&[('A', 'A'), ('a', 'a')]),
-                ])),
+                ]),
                 hir_lit("a"),
             ])
         );
@@ -1729,10 +1949,10 @@ mod tests {
         assert_eq!(
             t("(?i)(?:a(?-i)a)a"),
             hir_cat(vec![
-                hir_group_nocap(hir_cat(vec![
+                hir_cat(vec![
                     hir_uclass(&[('A', 'A'), ('a', 'a')]),
                     hir_lit("a"),
-                ])),
+                ]),
                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
             ])
         );
@@ -1755,46 +1975,18 @@ mod tests {
         assert_eq!(t("a*?"), hir_star(false, hir_lit("a")));
         assert_eq!(t("a+?"), hir_plus(false, hir_lit("a")));
 
-        assert_eq!(
-            t("a{1}"),
-            hir_range(true, hir::RepetitionRange::Exactly(1), hir_lit("a"),)
-        );
-        assert_eq!(
-            t("a{1,}"),
-            hir_range(true, hir::RepetitionRange::AtLeast(1), hir_lit("a"),)
-        );
-        assert_eq!(
-            t("a{1,2}"),
-            hir_range(true, hir::RepetitionRange::Bounded(1, 2), hir_lit("a"),)
-        );
-        assert_eq!(
-            t("a{1}?"),
-            hir_range(false, hir::RepetitionRange::Exactly(1), hir_lit("a"),)
-        );
-        assert_eq!(
-            t("a{1,}?"),
-            hir_range(false, hir::RepetitionRange::AtLeast(1), hir_lit("a"),)
-        );
-        assert_eq!(
-            t("a{1,2}?"),
-            hir_range(
-                false,
-                hir::RepetitionRange::Bounded(1, 2),
-                hir_lit("a"),
-            )
-        );
+        assert_eq!(t("a{1}"), hir_range(true, 1, Some(1), hir_lit("a"),));
+        assert_eq!(t("a{1,}"), hir_range(true, 1, None, hir_lit("a"),));
+        assert_eq!(t("a{1,2}"), hir_range(true, 1, Some(2), hir_lit("a"),));
+        assert_eq!(t("a{1}?"), hir_range(false, 1, Some(1), hir_lit("a"),));
+        assert_eq!(t("a{1,}?"), hir_range(false, 1, None, hir_lit("a"),));
+        assert_eq!(t("a{1,2}?"), hir_range(false, 1, Some(2), hir_lit("a"),));
 
         assert_eq!(
             t("ab?"),
             hir_cat(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
         );
-        assert_eq!(
-            t("(ab)?"),
-            hir_quest(
-                true,
-                hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),]))
-            )
-        );
+        assert_eq!(t("(ab)?"), hir_quest(true, hir_capture(1, hir_lit("ab"))));
         assert_eq!(
             t("a|b?"),
             hir_alt(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
@@ -1803,48 +1995,49 @@ mod tests {
 
     #[test]
     fn cat_alt() {
+        let a = || hir_look(hir::Look::Start);
+        let b = || hir_look(hir::Look::End);
+        let c = || hir_look(hir::Look::WordUnicode);
+        let d = || hir_look(hir::Look::WordUnicodeNegate);
+
+        assert_eq!(t("(^$)"), hir_capture(1, hir_cat(vec![a(), b()])));
+        assert_eq!(t("^|$"), hir_alt(vec![a(), b()]));
+        assert_eq!(t(r"^|$|\b"), hir_alt(vec![a(), b(), c()]));
         assert_eq!(
-            t("(ab)"),
-            hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),]))
-        );
-        assert_eq!(t("a|b"), hir_alt(vec![hir_lit("a"), hir_lit("b"),]));
-        assert_eq!(
-            t("a|b|c"),
-            hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
-        );
-        assert_eq!(
-            t("ab|bc|cd"),
-            hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
-        );
-        assert_eq!(
-            t("(a|b)"),
-            hir_group(1, hir_alt(vec![hir_lit("a"), hir_lit("b"),]))
+            t(r"^$|$\b|\b\B"),
+            hir_alt(vec![
+                hir_cat(vec![a(), b()]),
+                hir_cat(vec![b(), c()]),
+                hir_cat(vec![c(), d()]),
+            ])
         );
+        assert_eq!(t("(^|$)"), hir_capture(1, hir_alt(vec![a(), b()])));
         assert_eq!(
-            t("(a|b|c)"),
-            hir_group(
-                1,
-                hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
-            )
+            t(r"(^|$|\b)"),
+            hir_capture(1, hir_alt(vec![a(), b(), c()]))
         );
         assert_eq!(
-            t("(ab|bc|cd)"),
-            hir_group(
+            t(r"(^$|$\b|\b\B)"),
+            hir_capture(
                 1,
-                hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
+                hir_alt(vec![
+                    hir_cat(vec![a(), b()]),
+                    hir_cat(vec![b(), c()]),
+                    hir_cat(vec![c(), d()]),
+                ])
             )
         );
         assert_eq!(
-            t("(ab|(bc|(cd)))"),
-            hir_group(
+            t(r"(^$|($\b|(\b\B)))"),
+            hir_capture(
                 1,
                 hir_alt(vec![
-                    hir_lit("ab"),
-                    hir_group(
+                    hir_cat(vec![a(), b()]),
+                    hir_capture(
                         2,
                         hir_alt(vec![
-                            hir_lit("bc"),
-                            hir_group(3, hir_lit("cd")),
+                            hir_cat(vec![b(), c()]),
+                            hir_capture(3, hir_cat(vec![c(), d()])),
                         ])
                     ),
                 ])
@@ -1852,68 +2045,107 @@ mod tests {
         );
     }
 
+    // Tests the HIR transformation of things like '[a-z]|[A-Z]' into
+    // '[A-Za-z]'. In other words, an alternation of just classes is always
+    // equivalent to a single class corresponding to the union of the branches
+    // in that class. (Unless some branches match invalid UTF-8 and others
+    // match non-ASCII Unicode.)
+    #[test]
+    fn cat_class_flattened() {
+        assert_eq!(t(r"[a-z]|[A-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')]));
+        // Combining all of the letter properties should give us the one giant
+        // letter property.
+        #[cfg(feature = "unicode-gencat")]
+        assert_eq!(
+            t(r"(?x)
+                \p{Lowercase_Letter}
+                |\p{Uppercase_Letter}
+                |\p{Titlecase_Letter}
+                |\p{Modifier_Letter}
+                |\p{Other_Letter}
+            "),
+            hir_uclass_query(ClassQuery::Binary("letter"))
+        );
+        // Byte classes that can truly match invalid UTF-8 cannot be combined
+        // with Unicode classes.
+        assert_eq!(
+            t_bytes(r"[Δδ]|(?-u:[\x90-\xFF])|[Λλ]"),
+            hir_alt(vec![
+                hir_uclass(&[('Δ', 'Δ'), ('δ', 'δ')]),
+                hir_bclass(&[(b'\x90', b'\xFF')]),
+                hir_uclass(&[('Λ', 'Λ'), ('λ', 'λ')]),
+            ])
+        );
+        // Byte classes on their own can be combined, even if some are ASCII
+        // and others are invalid UTF-8.
+        assert_eq!(
+            t_bytes(r"[a-z]|(?-u:[\x90-\xFF])|[A-Z]"),
+            hir_bclass(&[(b'A', b'Z'), (b'a', b'z'), (b'\x90', b'\xFF')]),
+        );
+    }
+
     #[test]
     fn class_ascii() {
         assert_eq!(
             t("[[:alnum:]]"),
-            hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum))
+            hir_ascii_uclass(&ast::ClassAsciiKind::Alnum)
         );
         assert_eq!(
             t("[[:alpha:]]"),
-            hir_uclass(ascii_class(&ast::ClassAsciiKind::Alpha))
+            hir_ascii_uclass(&ast::ClassAsciiKind::Alpha)
         );
         assert_eq!(
             t("[[:ascii:]]"),
-            hir_uclass(ascii_class(&ast::ClassAsciiKind::Ascii))
+            hir_ascii_uclass(&ast::ClassAsciiKind::Ascii)
         );
         assert_eq!(
             t("[[:blank:]]"),
-            hir_uclass(ascii_class(&ast::ClassAsciiKind::Blank))
+            hir_ascii_uclass(&ast::ClassAsciiKind::Blank)
         );
         assert_eq!(
             t("[[:cntrl:]]"),
-            hir_uclass(ascii_class(&ast::ClassAsciiKind::Cntrl))
+            hir_ascii_uclass(&ast::ClassAsciiKind::Cntrl)
         );
         assert_eq!(
             t("[[:digit:]]"),
-            hir_uclass(ascii_class(&ast::ClassAsciiKind::Digit))
+            hir_ascii_uclass(&ast::ClassAsciiKind::Digit)
         );
         assert_eq!(
             t("[[:graph:]]"),
-            hir_uclass(ascii_class(&ast::ClassAsciiKind::Graph))
+            hir_ascii_uclass(&ast::ClassAsciiKind::Graph)
         );
         assert_eq!(
             t("[[:lower:]]"),
-            hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower))
+            hir_ascii_uclass(&ast::ClassAsciiKind::Lower)
         );
         assert_eq!(
             t("[[:print:]]"),
-            hir_uclass(ascii_class(&ast::ClassAsciiKind::Print))
+            hir_ascii_uclass(&ast::ClassAsciiKind::Print)
         );
         assert_eq!(
             t("[[:punct:]]"),
-            hir_uclass(ascii_class(&ast::ClassAsciiKind::Punct))
+            hir_ascii_uclass(&ast::ClassAsciiKind::Punct)
         );
         assert_eq!(
             t("[[:space:]]"),
-            hir_uclass(ascii_class(&ast::ClassAsciiKind::Space))
+            hir_ascii_uclass(&ast::ClassAsciiKind::Space)
         );
         assert_eq!(
             t("[[:upper:]]"),
-            hir_uclass(ascii_class(&ast::ClassAsciiKind::Upper))
+            hir_ascii_uclass(&ast::ClassAsciiKind::Upper)
         );
         assert_eq!(
             t("[[:word:]]"),
-            hir_uclass(ascii_class(&ast::ClassAsciiKind::Word))
+            hir_ascii_uclass(&ast::ClassAsciiKind::Word)
         );
         assert_eq!(
             t("[[:xdigit:]]"),
-            hir_uclass(ascii_class(&ast::ClassAsciiKind::Xdigit))
+            hir_ascii_uclass(&ast::ClassAsciiKind::Xdigit)
         );
 
         assert_eq!(
             t("[[:^lower:]]"),
-            hir_negate(hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower)))
+            hir_negate(hir_ascii_uclass(&ast::ClassAsciiKind::Lower))
         );
         #[cfg(feature = "unicode-case")]
         assert_eq!(
@@ -1928,13 +2160,11 @@ mod tests {
 
         assert_eq!(
             t("(?-u)[[:lower:]]"),
-            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Lower))
+            hir_ascii_bclass(&ast::ClassAsciiKind::Lower)
         );
         assert_eq!(
             t("(?i-u)[[:lower:]]"),
-            hir_case_fold(hir_bclass_from_char(ascii_class(
-                &ast::ClassAsciiKind::Lower
-            )))
+            hir_case_fold(hir_ascii_bclass(&ast::ClassAsciiKind::Lower))
         );
 
         assert_eq!(
@@ -1965,14 +2195,14 @@ mod tests {
         assert_eq!(
             t("[[:alnum:][:^ascii:]]"),
             hir_union(
-                hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum)),
+                hir_ascii_uclass(&ast::ClassAsciiKind::Alnum),
                 hir_uclass(&[('\u{80}', '\u{10FFFF}')]),
             ),
         );
         assert_eq!(
             t_bytes("(?-u)[[:alnum:][:^ascii:]]"),
             hir_union(
-                hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Alnum)),
+                hir_ascii_bclass(&ast::ClassAsciiKind::Alnum),
                 hir_bclass(&[(0x80, 0xFF)]),
             ),
         );
@@ -1980,7 +2210,7 @@ mod tests {
 
     #[test]
     #[cfg(feature = "unicode-perl")]
-    fn class_perl() {
+    fn class_perl_unicode() {
         // Unicode
         assert_eq!(t(r"\d"), hir_uclass_query(ClassQuery::Binary("digit")));
         assert_eq!(t(r"\s"), hir_uclass_query(ClassQuery::Binary("space")));
@@ -2020,69 +2250,124 @@ mod tests {
         );
         #[cfg(feature = "unicode-case")]
         assert_eq!(t(r"(?i)\W"), hir_negate(hir_uclass_perl_word()));
+    }
 
+    #[test]
+    fn class_perl_ascii() {
         // ASCII only
         assert_eq!(
             t(r"(?-u)\d"),
-            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
+            hir_ascii_bclass(&ast::ClassAsciiKind::Digit)
         );
         assert_eq!(
             t(r"(?-u)\s"),
-            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space))
+            hir_ascii_bclass(&ast::ClassAsciiKind::Space)
         );
         assert_eq!(
             t(r"(?-u)\w"),
-            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word))
+            hir_ascii_bclass(&ast::ClassAsciiKind::Word)
         );
         assert_eq!(
             t(r"(?i-u)\d"),
-            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
+            hir_ascii_bclass(&ast::ClassAsciiKind::Digit)
         );
         assert_eq!(
             t(r"(?i-u)\s"),
-            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space))
+            hir_ascii_bclass(&ast::ClassAsciiKind::Space)
         );
         assert_eq!(
             t(r"(?i-u)\w"),
-            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word))
+            hir_ascii_bclass(&ast::ClassAsciiKind::Word)
         );
 
         // ASCII only, negated
         assert_eq!(
-            t(r"(?-u)\D"),
-            hir_negate(hir_bclass_from_char(ascii_class(
-                &ast::ClassAsciiKind::Digit
-            )))
+            t_bytes(r"(?-u)\D"),
+            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit))
         );
         assert_eq!(
-            t(r"(?-u)\S"),
-            hir_negate(hir_bclass_from_char(ascii_class(
-                &ast::ClassAsciiKind::Space
-            )))
+            t_bytes(r"(?-u)\S"),
+            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Space))
         );
         assert_eq!(
-            t(r"(?-u)\W"),
-            hir_negate(hir_bclass_from_char(ascii_class(
-                &ast::ClassAsciiKind::Word
-            )))
+            t_bytes(r"(?-u)\W"),
+            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Word))
         );
         assert_eq!(
-            t(r"(?i-u)\D"),
-            hir_negate(hir_bclass_from_char(ascii_class(
-                &ast::ClassAsciiKind::Digit
-            )))
+            t_bytes(r"(?i-u)\D"),
+            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit))
         );
         assert_eq!(
-            t(r"(?i-u)\S"),
-            hir_negate(hir_bclass_from_char(ascii_class(
-                &ast::ClassAsciiKind::Space
-            )))
+            t_bytes(r"(?i-u)\S"),
+            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Space))
         );
         assert_eq!(
-            t(r"(?i-u)\W"),
-            hir_negate(hir_bclass_from_char(ascii_class(
-                &ast::ClassAsciiKind::Word
-            )))
+            t_bytes(r"(?i-u)\W"),
+            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Word))
+        );
+
+        // ASCII only, negated, with UTF-8 mode enabled.
+        // In this case, negating any Perl class results in an error because
+        // all such classes can match invalid UTF-8.
+        assert_eq!(
+            t_err(r"(?-u)\D"),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(5, 1, 6),
+                    Position::new(7, 1, 8),
+                ),
+            },
+        );
+        assert_eq!(
+            t_err(r"(?-u)\S"),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(5, 1, 6),
+                    Position::new(7, 1, 8),
+                ),
+            },
+        );
+        assert_eq!(
+            t_err(r"(?-u)\W"),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(5, 1, 6),
+                    Position::new(7, 1, 8),
+                ),
+            },
+        );
+        assert_eq!(
+            t_err(r"(?i-u)\D"),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(6, 1, 7),
+                    Position::new(8, 1, 9),
+                ),
+            },
+        );
+        assert_eq!(
+            t_err(r"(?i-u)\S"),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(6, 1, 7),
+                    Position::new(8, 1, 9),
+                ),
+            },
+        );
+        assert_eq!(
+            t_err(r"(?i-u)\W"),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(6, 1, 7),
+                    Position::new(8, 1, 9),
+                ),
+            },
         );
     }
 
@@ -2360,16 +2645,7 @@ mod tests {
     #[test]
     #[cfg(feature = "unicode-gencat")]
     fn class_unicode_any_empty() {
-        assert_eq!(
-            t_err(r"\P{any}"),
-            TestError {
-                kind: hir::ErrorKind::EmptyClassNotAllowed,
-                span: Span::new(
-                    Position::new(0, 1, 1),
-                    Position::new(7, 1, 8)
-                ),
-            }
-        );
+        assert_eq!(t(r"\P{any}"), hir_uclass(&[]),);
     }
 
     #[test]
@@ -2389,8 +2665,9 @@ mod tests {
 
     #[test]
     fn class_bracketed() {
-        assert_eq!(t("[a]"), hir_uclass(&[('a', 'a')]));
-        assert_eq!(t("[^[a]]"), hir_negate(hir_uclass(&[('a', 'a')])));
+        assert_eq!(t("[a]"), hir_lit("a"));
+        assert_eq!(t("[ab]"), hir_uclass(&[('a', 'b')]));
+        assert_eq!(t("[^[a]]"), class_negate(uclass(&[('a', 'a')])));
         assert_eq!(t("[a-z]"), hir_uclass(&[('a', 'z')]));
         assert_eq!(t("[a-fd-h]"), hir_uclass(&[('a', 'h')]));
         assert_eq!(t("[a-fg-m]"), hir_uclass(&[('a', 'm')]));
@@ -2453,11 +2730,11 @@ mod tests {
         );
         assert_eq!(t("(?i-u)[k]"), hir_bclass(&[(b'K', b'K'), (b'k', b'k'),]));
 
-        assert_eq!(t("[^a]"), hir_negate(hir_uclass(&[('a', 'a')])));
-        assert_eq!(t(r"[^\x00]"), hir_negate(hir_uclass(&[('\0', '\0')])));
+        assert_eq!(t("[^a]"), class_negate(uclass(&[('a', 'a')])));
+        assert_eq!(t(r"[^\x00]"), class_negate(uclass(&[('\0', '\0')])));
         assert_eq!(
             t_bytes("(?-u)[^a]"),
-            hir_negate(hir_bclass(&[(b'a', b'a')]))
+            class_negate(bclass(&[(b'a', b'a')]))
         );
         #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
         assert_eq!(
@@ -2521,27 +2798,9 @@ mod tests {
             }
         );
         #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
-        assert_eq!(
-            t_err(r"[^\s\S]"),
-            TestError {
-                kind: hir::ErrorKind::EmptyClassNotAllowed,
-                span: Span::new(
-                    Position::new(0, 1, 1),
-                    Position::new(7, 1, 8)
-                ),
-            }
-        );
+        assert_eq!(t(r"[^\s\S]"), hir_uclass(&[]),);
         #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
-        assert_eq!(
-            t_err(r"(?-u)[^\s\S]"),
-            TestError {
-                kind: hir::ErrorKind::EmptyClassNotAllowed,
-                span: Span::new(
-                    Position::new(5, 1, 6),
-                    Position::new(12, 1, 13)
-                ),
-            }
-        );
+        assert_eq!(t_bytes(r"(?-u)[^\s\S]"), hir_bclass(&[]),);
     }
 
     #[test]
@@ -2663,9 +2922,9 @@ mod tests {
 
     #[test]
     fn class_bracketed_nested() {
-        assert_eq!(t(r"[a[^c]]"), hir_negate(hir_uclass(&[('c', 'c')])));
-        assert_eq!(t(r"[a-b[^c]]"), hir_negate(hir_uclass(&[('c', 'c')])));
-        assert_eq!(t(r"[a-c[^c]]"), hir_negate(hir_uclass(&[])));
+        assert_eq!(t(r"[a[^c]]"), class_negate(uclass(&[('c', 'c')])));
+        assert_eq!(t(r"[a-b[^c]]"), class_negate(uclass(&[('c', 'c')])));
+        assert_eq!(t(r"[a-c[^c]]"), class_negate(uclass(&[])));
 
         assert_eq!(t(r"[^a[^c]]"), hir_uclass(&[('c', 'c')]));
         assert_eq!(t(r"[^a-b[^c]]"), hir_uclass(&[('c', 'c')]));
@@ -2673,12 +2932,12 @@ mod tests {
         #[cfg(feature = "unicode-case")]
         assert_eq!(
             t(r"(?i)[a[^c]]"),
-            hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')])))
+            hir_negate(class_case_fold(uclass(&[('c', 'c')])))
         );
         #[cfg(feature = "unicode-case")]
         assert_eq!(
             t(r"(?i)[a-b[^c]]"),
-            hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')])))
+            hir_negate(class_case_fold(uclass(&[('c', 'c')])))
         );
 
         #[cfg(feature = "unicode-case")]
@@ -2689,27 +2948,9 @@ mod tests {
             hir_uclass(&[('C', 'C'), ('c', 'c')])
         );
 
-        assert_eq!(
-            t_err(r"[^a-c[^c]]"),
-            TestError {
-                kind: hir::ErrorKind::EmptyClassNotAllowed,
-                span: Span::new(
-                    Position::new(0, 1, 1),
-                    Position::new(10, 1, 11)
-                ),
-            }
-        );
+        assert_eq!(t(r"[^a-c[^c]]"), hir_uclass(&[]),);
         #[cfg(feature = "unicode-case")]
-        assert_eq!(
-            t_err(r"(?i)[^a-c[^c]]"),
-            TestError {
-                kind: hir::ErrorKind::EmptyClassNotAllowed,
-                span: Span::new(
-                    Position::new(4, 1, 5),
-                    Position::new(14, 1, 15)
-                ),
-            }
-        );
+        assert_eq!(t(r"(?i)[^a-c[^c]]"), hir_uclass(&[]),);
     }
 
     #[test]
@@ -2826,9 +3067,7 @@ mod tests {
         #[cfg(feature = "unicode-perl")]
         assert_eq!(
             t_bytes(r"(?-u)[^\w&&\d]"),
-            hir_negate(hir_bclass_from_char(ascii_class(
-                &ast::ClassAsciiKind::Digit
-            )))
+            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit))
         );
         assert_eq!(
             t_bytes(r"(?-u)[^[a-z&&a-c]]"),
@@ -2836,19 +3075,15 @@ mod tests {
         );
         assert_eq!(
             t_bytes(r"(?-u)[^[\w&&\d]]"),
-            hir_negate(hir_bclass_from_char(ascii_class(
-                &ast::ClassAsciiKind::Digit
-            )))
+            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit))
         );
         assert_eq!(
             t_bytes(r"(?-u)[^[^\w&&\d]]"),
-            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
+            hir_ascii_bclass(&ast::ClassAsciiKind::Digit)
         );
         assert_eq!(
             t_bytes(r"(?-u)[[[^\w]&&[^\d]]]"),
-            hir_negate(hir_bclass_from_char(ascii_class(
-                &ast::ClassAsciiKind::Word
-            )))
+            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Word))
         );
     }
 
@@ -2924,284 +3159,420 @@ mod tests {
     , # comment
     10 # comment
 } # comment"),
-            hir_range(
-                true,
-                hir::RepetitionRange::Bounded(5, 10),
-                hir_lit("a")
-            )
+            hir_range(true, 5, Some(10), hir_lit("a"))
         );
 
         assert_eq!(t(r"(?x)a\  # hi there"), hir_lit("a "));
     }
 
     #[test]
-    fn analysis_is_always_utf8() {
+    fn analysis_is_utf8() {
         // Positive examples.
-        assert!(t_bytes(r"a").is_always_utf8());
-        assert!(t_bytes(r"ab").is_always_utf8());
-        assert!(t_bytes(r"(?-u)a").is_always_utf8());
-        assert!(t_bytes(r"(?-u)ab").is_always_utf8());
-        assert!(t_bytes(r"\xFF").is_always_utf8());
-        assert!(t_bytes(r"\xFF\xFF").is_always_utf8());
-        assert!(t_bytes(r"[^a]").is_always_utf8());
-        assert!(t_bytes(r"[^a][^a]").is_always_utf8());
-        assert!(t_bytes(r"\b").is_always_utf8());
-        assert!(t_bytes(r"\B").is_always_utf8());
-        assert!(t_bytes(r"(?-u)\b").is_always_utf8());
+        assert!(props_bytes(r"a").is_utf8());
+        assert!(props_bytes(r"ab").is_utf8());
+        assert!(props_bytes(r"(?-u)a").is_utf8());
+        assert!(props_bytes(r"(?-u)ab").is_utf8());
+        assert!(props_bytes(r"\xFF").is_utf8());
+        assert!(props_bytes(r"\xFF\xFF").is_utf8());
+        assert!(props_bytes(r"[^a]").is_utf8());
+        assert!(props_bytes(r"[^a][^a]").is_utf8());
+        assert!(props_bytes(r"\b").is_utf8());
+        assert!(props_bytes(r"\B").is_utf8());
+        assert!(props_bytes(r"(?-u)\b").is_utf8());
+        assert!(props_bytes(r"(?-u)\B").is_utf8());
 
         // Negative examples.
-        assert!(!t_bytes(r"(?-u)\xFF").is_always_utf8());
-        assert!(!t_bytes(r"(?-u)\xFF\xFF").is_always_utf8());
-        assert!(!t_bytes(r"(?-u)[^a]").is_always_utf8());
-        assert!(!t_bytes(r"(?-u)[^a][^a]").is_always_utf8());
-        assert!(!t_bytes(r"(?-u)\B").is_always_utf8());
+        assert!(!props_bytes(r"(?-u)\xFF").is_utf8());
+        assert!(!props_bytes(r"(?-u)\xFF\xFF").is_utf8());
+        assert!(!props_bytes(r"(?-u)[^a]").is_utf8());
+        assert!(!props_bytes(r"(?-u)[^a][^a]").is_utf8());
+    }
+
+    #[test]
+    fn analysis_captures_len() {
+        assert_eq!(0, props(r"a").explicit_captures_len());
+        assert_eq!(0, props(r"(?:a)").explicit_captures_len());
+        assert_eq!(0, props(r"(?i-u:a)").explicit_captures_len());
+        assert_eq!(0, props(r"(?i-u)a").explicit_captures_len());
+        assert_eq!(1, props(r"(a)").explicit_captures_len());
+        assert_eq!(1, props(r"(?P<foo>a)").explicit_captures_len());
+        assert_eq!(1, props(r"()").explicit_captures_len());
+        assert_eq!(1, props(r"()a").explicit_captures_len());
+        assert_eq!(1, props(r"(a)+").explicit_captures_len());
+        assert_eq!(2, props(r"(a)(b)").explicit_captures_len());
+        assert_eq!(2, props(r"(a)|(b)").explicit_captures_len());
+        assert_eq!(2, props(r"((a))").explicit_captures_len());
+        assert_eq!(1, props(r"([a&&b])").explicit_captures_len());
+    }
+
+    #[test]
+    fn analysis_static_captures_len() {
+        let len = |pattern| props(pattern).static_explicit_captures_len();
+        assert_eq!(Some(0), len(r""));
+        assert_eq!(Some(0), len(r"foo|bar"));
+        assert_eq!(None, len(r"(foo)|bar"));
+        assert_eq!(None, len(r"foo|(bar)"));
+        assert_eq!(Some(1), len(r"(foo|bar)"));
+        assert_eq!(Some(1), len(r"(a|b|c|d|e|f)"));
+        assert_eq!(Some(1), len(r"(a)|(b)|(c)|(d)|(e)|(f)"));
+        assert_eq!(Some(2), len(r"(a)(b)|(c)(d)|(e)(f)"));
+        assert_eq!(Some(6), len(r"(a)(b)(c)(d)(e)(f)"));
+        assert_eq!(Some(3), len(r"(a)(b)(extra)|(a)(b)()"));
+        assert_eq!(Some(3), len(r"(a)(b)((?:extra)?)"));
+        assert_eq!(None, len(r"(a)(b)(extra)?"));
+        assert_eq!(Some(1), len(r"(foo)|(bar)"));
+        assert_eq!(Some(2), len(r"(foo)(bar)"));
+        assert_eq!(Some(2), len(r"(foo)+(bar)"));
+        assert_eq!(None, len(r"(foo)*(bar)"));
+        assert_eq!(Some(0), len(r"(foo)?{0}"));
+        assert_eq!(None, len(r"(foo)?{1}"));
+        assert_eq!(Some(1), len(r"(foo){1}"));
+        assert_eq!(Some(1), len(r"(foo){1,}"));
+        assert_eq!(Some(1), len(r"(foo){1,}?"));
+        assert_eq!(None, len(r"(foo){1,}??"));
+        assert_eq!(None, len(r"(foo){0,}"));
+        assert_eq!(Some(1), len(r"(foo)(?:bar)"));
+        assert_eq!(Some(2), len(r"(foo(?:bar)+)(?:baz(boo))"));
+        assert_eq!(Some(2), len(r"(?P<bar>foo)(?:bar)(bal|loon)"));
+        assert_eq!(
+            Some(2),
+            len(r#"<(a)[^>]+href="([^"]+)"|<(img)[^>]+src="([^"]+)""#)
+        );
     }
 
     #[test]
     fn analysis_is_all_assertions() {
         // Positive examples.
-        assert!(t(r"\b").is_all_assertions());
-        assert!(t(r"\B").is_all_assertions());
-        assert!(t(r"^").is_all_assertions());
-        assert!(t(r"$").is_all_assertions());
-        assert!(t(r"\A").is_all_assertions());
-        assert!(t(r"\z").is_all_assertions());
-        assert!(t(r"$^\z\A\b\B").is_all_assertions());
-        assert!(t(r"$|^|\z|\A|\b|\B").is_all_assertions());
-        assert!(t(r"^$|$^").is_all_assertions());
-        assert!(t(r"((\b)+())*^").is_all_assertions());
+        let p = props(r"\b");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(0));
+
+        let p = props(r"\B");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(0));
+
+        let p = props(r"^");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(0));
+
+        let p = props(r"$");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(0));
+
+        let p = props(r"\A");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(0));
+
+        let p = props(r"\z");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(0));
+
+        let p = props(r"$^\z\A\b\B");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(0));
+
+        let p = props(r"$|^|\z|\A|\b|\B");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(0));
+
+        let p = props(r"^$|$^");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(0));
+
+        let p = props(r"((\b)+())*^");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(0));
 
         // Negative examples.
-        assert!(!t(r"^a").is_all_assertions());
+        let p = props(r"^a");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(1));
     }
 
     #[test]
     fn analysis_is_anchored() {
+        let is_start = |p| props(p).look_set_prefix().contains(Look::Start);
+        let is_end = |p| props(p).look_set_suffix().contains(Look::End);
+
         // Positive examples.
-        assert!(t(r"^").is_anchored_start());
-        assert!(t(r"$").is_anchored_end());
-        assert!(t(r"^").is_line_anchored_start());
-        assert!(t(r"$").is_line_anchored_end());
-
-        assert!(t(r"^^").is_anchored_start());
-        assert!(t(r"$$").is_anchored_end());
-        assert!(t(r"^^").is_line_anchored_start());
-        assert!(t(r"$$").is_line_anchored_end());
-
-        assert!(t(r"^$").is_anchored_start());
-        assert!(t(r"^$").is_anchored_end());
-        assert!(t(r"^$").is_line_anchored_start());
-        assert!(t(r"^$").is_line_anchored_end());
-
-        assert!(t(r"^foo").is_anchored_start());
-        assert!(t(r"foo$").is_anchored_end());
-        assert!(t(r"^foo").is_line_anchored_start());
-        assert!(t(r"foo$").is_line_anchored_end());
-
-        assert!(t(r"^foo|^bar").is_anchored_start());
-        assert!(t(r"foo$|bar$").is_anchored_end());
-        assert!(t(r"^foo|^bar").is_line_anchored_start());
-        assert!(t(r"foo$|bar$").is_line_anchored_end());
-
-        assert!(t(r"^(foo|bar)").is_anchored_start());
-        assert!(t(r"(foo|bar)$").is_anchored_end());
-        assert!(t(r"^(foo|bar)").is_line_anchored_start());
-        assert!(t(r"(foo|bar)$").is_line_anchored_end());
-
-        assert!(t(r"^+").is_anchored_start());
-        assert!(t(r"$+").is_anchored_end());
-        assert!(t(r"^+").is_line_anchored_start());
-        assert!(t(r"$+").is_line_anchored_end());
-        assert!(t(r"^++").is_anchored_start());
-        assert!(t(r"$++").is_anchored_end());
-        assert!(t(r"^++").is_line_anchored_start());
-        assert!(t(r"$++").is_line_anchored_end());
-        assert!(t(r"(^)+").is_anchored_start());
-        assert!(t(r"($)+").is_anchored_end());
-        assert!(t(r"(^)+").is_line_anchored_start());
-        assert!(t(r"($)+").is_line_anchored_end());
-
-        assert!(t(r"$^").is_anchored_start());
-        assert!(t(r"$^").is_anchored_start());
-        assert!(t(r"$^").is_line_anchored_end());
-        assert!(t(r"$^").is_line_anchored_end());
-        assert!(t(r"$^|^$").is_anchored_start());
-        assert!(t(r"$^|^$").is_anchored_end());
-        assert!(t(r"$^|^$").is_line_anchored_start());
-        assert!(t(r"$^|^$").is_line_anchored_end());
-
-        assert!(t(r"\b^").is_anchored_start());
-        assert!(t(r"$\b").is_anchored_end());
-        assert!(t(r"\b^").is_line_anchored_start());
-        assert!(t(r"$\b").is_line_anchored_end());
-        assert!(t(r"^(?m:^)").is_anchored_start());
-        assert!(t(r"(?m:$)$").is_anchored_end());
-        assert!(t(r"^(?m:^)").is_line_anchored_start());
-        assert!(t(r"(?m:$)$").is_line_anchored_end());
-        assert!(t(r"(?m:^)^").is_anchored_start());
-        assert!(t(r"$(?m:$)").is_anchored_end());
-        assert!(t(r"(?m:^)^").is_line_anchored_start());
-        assert!(t(r"$(?m:$)").is_line_anchored_end());
+        assert!(is_start(r"^"));
+        assert!(is_end(r"$"));
 
-        // Negative examples.
-        assert!(!t(r"(?m)^").is_anchored_start());
-        assert!(!t(r"(?m)$").is_anchored_end());
-        assert!(!t(r"(?m:^$)|$^").is_anchored_start());
-        assert!(!t(r"(?m:^$)|$^").is_anchored_end());
-        assert!(!t(r"$^|(?m:^$)").is_anchored_start());
-        assert!(!t(r"$^|(?m:^$)").is_anchored_end());
-
-        assert!(!t(r"a^").is_anchored_start());
-        assert!(!t(r"$a").is_anchored_start());
-        assert!(!t(r"a^").is_line_anchored_start());
-        assert!(!t(r"$a").is_line_anchored_start());
-
-        assert!(!t(r"a^").is_anchored_end());
-        assert!(!t(r"$a").is_anchored_end());
-        assert!(!t(r"a^").is_line_anchored_end());
-        assert!(!t(r"$a").is_line_anchored_end());
-
-        assert!(!t(r"^foo|bar").is_anchored_start());
-        assert!(!t(r"foo|bar$").is_anchored_end());
-        assert!(!t(r"^foo|bar").is_line_anchored_start());
-        assert!(!t(r"foo|bar$").is_line_anchored_end());
-
-        assert!(!t(r"^*").is_anchored_start());
-        assert!(!t(r"$*").is_anchored_end());
-        assert!(!t(r"^*").is_line_anchored_start());
-        assert!(!t(r"$*").is_line_anchored_end());
-        assert!(!t(r"^*+").is_anchored_start());
-        assert!(!t(r"$*+").is_anchored_end());
-        assert!(!t(r"^*+").is_line_anchored_start());
-        assert!(!t(r"$*+").is_line_anchored_end());
-        assert!(!t(r"^+*").is_anchored_start());
-        assert!(!t(r"$+*").is_anchored_end());
-        assert!(!t(r"^+*").is_line_anchored_start());
-        assert!(!t(r"$+*").is_line_anchored_end());
-        assert!(!t(r"(^)*").is_anchored_start());
-        assert!(!t(r"($)*").is_anchored_end());
-        assert!(!t(r"(^)*").is_line_anchored_start());
-        assert!(!t(r"($)*").is_line_anchored_end());
-    }
+        assert!(is_start(r"^^"));
+        assert!(props(r"$$").look_set_suffix().contains(Look::End));
 
-    #[test]
-    fn analysis_is_line_anchored() {
-        assert!(t(r"(?m)^(foo|bar)").is_line_anchored_start());
-        assert!(t(r"(?m)(foo|bar)$").is_line_anchored_end());
+        assert!(is_start(r"^$"));
+        assert!(is_end(r"^$"));
 
-        assert!(t(r"(?m)^foo|^bar").is_line_anchored_start());
-        assert!(t(r"(?m)foo$|bar$").is_line_anchored_end());
+        assert!(is_start(r"^foo"));
+        assert!(is_end(r"foo$"));
+
+        assert!(is_start(r"^foo|^bar"));
+        assert!(is_end(r"foo$|bar$"));
+
+        assert!(is_start(r"^(foo|bar)"));
+        assert!(is_end(r"(foo|bar)$"));
+
+        assert!(is_start(r"^+"));
+        assert!(is_end(r"$+"));
+        assert!(is_start(r"^++"));
+        assert!(is_end(r"$++"));
+        assert!(is_start(r"(^)+"));
+        assert!(is_end(r"($)+"));
+
+        assert!(is_start(r"$^"));
+        assert!(is_start(r"$^"));
+        assert!(is_start(r"$^|^$"));
+        assert!(is_end(r"$^|^$"));
+
+        assert!(is_start(r"\b^"));
+        assert!(is_end(r"$\b"));
+        assert!(is_start(r"^(?m:^)"));
+        assert!(is_end(r"(?m:$)$"));
+        assert!(is_start(r"(?m:^)^"));
+        assert!(is_end(r"$(?m:$)"));
+
+        // Negative examples.
+        assert!(!is_start(r"(?m)^"));
+        assert!(!is_end(r"(?m)$"));
+        assert!(!is_start(r"(?m:^$)|$^"));
+        assert!(!is_end(r"(?m:^$)|$^"));
+        assert!(!is_start(r"$^|(?m:^$)"));
+        assert!(!is_end(r"$^|(?m:^$)"));
 
-        assert!(t(r"(?m)^").is_line_anchored_start());
-        assert!(t(r"(?m)$").is_line_anchored_end());
+        assert!(!is_start(r"a^"));
+        assert!(!is_start(r"$a"));
 
-        assert!(t(r"(?m:^$)|$^").is_line_anchored_start());
-        assert!(t(r"(?m:^$)|$^").is_line_anchored_end());
+        assert!(!is_end(r"a^"));
+        assert!(!is_end(r"$a"));
 
-        assert!(t(r"$^|(?m:^$)").is_line_anchored_start());
-        assert!(t(r"$^|(?m:^$)").is_line_anchored_end());
+        assert!(!is_start(r"^foo|bar"));
+        assert!(!is_end(r"foo|bar$"));
+
+        assert!(!is_start(r"^*"));
+        assert!(!is_end(r"$*"));
+        assert!(!is_start(r"^*+"));
+        assert!(!is_end(r"$*+"));
+        assert!(!is_start(r"^+*"));
+        assert!(!is_end(r"$+*"));
+        assert!(!is_start(r"(^)*"));
+        assert!(!is_end(r"($)*"));
     }
 
     #[test]
     fn analysis_is_any_anchored() {
+        let is_start = |p| props(p).look_set().contains(Look::Start);
+        let is_end = |p| props(p).look_set().contains(Look::End);
+
         // Positive examples.
-        assert!(t(r"^").is_any_anchored_start());
-        assert!(t(r"$").is_any_anchored_end());
-        assert!(t(r"\A").is_any_anchored_start());
-        assert!(t(r"\z").is_any_anchored_end());
+        assert!(is_start(r"^"));
+        assert!(is_end(r"$"));
+        assert!(is_start(r"\A"));
+        assert!(is_end(r"\z"));
 
         // Negative examples.
-        assert!(!t(r"(?m)^").is_any_anchored_start());
-        assert!(!t(r"(?m)$").is_any_anchored_end());
-        assert!(!t(r"$").is_any_anchored_start());
-        assert!(!t(r"^").is_any_anchored_end());
+        assert!(!is_start(r"(?m)^"));
+        assert!(!is_end(r"(?m)$"));
+        assert!(!is_start(r"$"));
+        assert!(!is_end(r"^"));
     }
 
     #[test]
-    fn analysis_is_match_empty() {
+    fn analysis_can_empty() {
         // Positive examples.
-        assert!(t(r"").is_match_empty());
-        assert!(t(r"()").is_match_empty());
-        assert!(t(r"()*").is_match_empty());
-        assert!(t(r"()+").is_match_empty());
-        assert!(t(r"()?").is_match_empty());
-        assert!(t(r"a*").is_match_empty());
-        assert!(t(r"a?").is_match_empty());
-        assert!(t(r"a{0}").is_match_empty());
-        assert!(t(r"a{0,}").is_match_empty());
-        assert!(t(r"a{0,1}").is_match_empty());
-        assert!(t(r"a{0,10}").is_match_empty());
+        let assert_empty =
+            |p| assert_eq!(Some(0), props_bytes(p).minimum_len());
+        assert_empty(r"");
+        assert_empty(r"()");
+        assert_empty(r"()*");
+        assert_empty(r"()+");
+        assert_empty(r"()?");
+        assert_empty(r"a*");
+        assert_empty(r"a?");
+        assert_empty(r"a{0}");
+        assert_empty(r"a{0,}");
+        assert_empty(r"a{0,1}");
+        assert_empty(r"a{0,10}");
         #[cfg(feature = "unicode-gencat")]
-        assert!(t(r"\pL*").is_match_empty());
-        assert!(t(r"a*|b").is_match_empty());
-        assert!(t(r"b|a*").is_match_empty());
-        assert!(t(r"a|").is_match_empty());
-        assert!(t(r"|a").is_match_empty());
-        assert!(t(r"a||b").is_match_empty());
-        assert!(t(r"a*a?(abcd)*").is_match_empty());
-        assert!(t(r"^").is_match_empty());
-        assert!(t(r"$").is_match_empty());
-        assert!(t(r"(?m)^").is_match_empty());
-        assert!(t(r"(?m)$").is_match_empty());
-        assert!(t(r"\A").is_match_empty());
-        assert!(t(r"\z").is_match_empty());
-        assert!(t(r"\B").is_match_empty());
-        assert!(t_bytes(r"(?-u)\B").is_match_empty());
-        assert!(t(r"\b").is_match_empty());
-        assert!(t(r"(?-u)\b").is_match_empty());
+        assert_empty(r"\pL*");
+        assert_empty(r"a*|b");
+        assert_empty(r"b|a*");
+        assert_empty(r"a|");
+        assert_empty(r"|a");
+        assert_empty(r"a||b");
+        assert_empty(r"a*a?(abcd)*");
+        assert_empty(r"^");
+        assert_empty(r"$");
+        assert_empty(r"(?m)^");
+        assert_empty(r"(?m)$");
+        assert_empty(r"\A");
+        assert_empty(r"\z");
+        assert_empty(r"\B");
+        assert_empty(r"(?-u)\B");
+        assert_empty(r"\b");
+        assert_empty(r"(?-u)\b");
 
         // Negative examples.
-        assert!(!t(r"a+").is_match_empty());
-        assert!(!t(r"a{1}").is_match_empty());
-        assert!(!t(r"a{1,}").is_match_empty());
-        assert!(!t(r"a{1,2}").is_match_empty());
-        assert!(!t(r"a{1,10}").is_match_empty());
-        assert!(!t(r"b|a").is_match_empty());
-        assert!(!t(r"a*a+(abcd)*").is_match_empty());
+        let assert_non_empty =
+            |p| assert_ne!(Some(0), props_bytes(p).minimum_len());
+        assert_non_empty(r"a+");
+        assert_non_empty(r"a{1}");
+        assert_non_empty(r"a{1,}");
+        assert_non_empty(r"a{1,2}");
+        assert_non_empty(r"a{1,10}");
+        assert_non_empty(r"b|a");
+        assert_non_empty(r"a*a+(abcd)*");
+        #[cfg(feature = "unicode-gencat")]
+        assert_non_empty(r"\P{any}");
+        assert_non_empty(r"[a--a]");
+        assert_non_empty(r"[a&&b]");
     }
 
     #[test]
     fn analysis_is_literal() {
         // Positive examples.
-        assert!(t(r"a").is_literal());
-        assert!(t(r"ab").is_literal());
-        assert!(t(r"abc").is_literal());
-        assert!(t(r"(?m)abc").is_literal());
+        assert!(props(r"a").is_literal());
+        assert!(props(r"ab").is_literal());
+        assert!(props(r"abc").is_literal());
+        assert!(props(r"(?m)abc").is_literal());
+        assert!(props(r"(?:a)").is_literal());
+        assert!(props(r"foo(?:a)").is_literal());
+        assert!(props(r"(?:a)foo").is_literal());
+        assert!(props(r"[a]").is_literal());
 
         // Negative examples.
-        assert!(!t(r"").is_literal());
-        assert!(!t(r"^").is_literal());
-        assert!(!t(r"a|b").is_literal());
-        assert!(!t(r"(a)").is_literal());
-        assert!(!t(r"a+").is_literal());
-        assert!(!t(r"foo(a)").is_literal());
-        assert!(!t(r"(a)foo").is_literal());
-        assert!(!t(r"[a]").is_literal());
+        assert!(!props(r"").is_literal());
+        assert!(!props(r"^").is_literal());
+        assert!(!props(r"a|b").is_literal());
+        assert!(!props(r"(a)").is_literal());
+        assert!(!props(r"a+").is_literal());
+        assert!(!props(r"foo(a)").is_literal());
+        assert!(!props(r"(a)foo").is_literal());
+        assert!(!props(r"[ab]").is_literal());
     }
 
     #[test]
     fn analysis_is_alternation_literal() {
         // Positive examples.
-        assert!(t(r"a").is_alternation_literal());
-        assert!(t(r"ab").is_alternation_literal());
-        assert!(t(r"abc").is_alternation_literal());
-        assert!(t(r"(?m)abc").is_alternation_literal());
-        assert!(t(r"a|b").is_alternation_literal());
-        assert!(t(r"a|b|c").is_alternation_literal());
-        assert!(t(r"foo|bar").is_alternation_literal());
-        assert!(t(r"foo|bar|baz").is_alternation_literal());
+        assert!(props(r"a").is_alternation_literal());
+        assert!(props(r"ab").is_alternation_literal());
+        assert!(props(r"abc").is_alternation_literal());
+        assert!(props(r"(?m)abc").is_alternation_literal());
+        assert!(props(r"foo|bar").is_alternation_literal());
+        assert!(props(r"foo|bar|baz").is_alternation_literal());
+        assert!(props(r"[a]").is_alternation_literal());
+        assert!(props(r"(?:ab)|cd").is_alternation_literal());
+        assert!(props(r"ab|(?:cd)").is_alternation_literal());
 
         // Negative examples.
-        assert!(!t(r"").is_alternation_literal());
-        assert!(!t(r"^").is_alternation_literal());
-        assert!(!t(r"(a)").is_alternation_literal());
-        assert!(!t(r"a+").is_alternation_literal());
-        assert!(!t(r"foo(a)").is_alternation_literal());
-        assert!(!t(r"(a)foo").is_alternation_literal());
-        assert!(!t(r"[a]").is_alternation_literal());
-        assert!(!t(r"[a]|b").is_alternation_literal());
-        assert!(!t(r"a|[b]").is_alternation_literal());
-        assert!(!t(r"(a)|b").is_alternation_literal());
-        assert!(!t(r"a|(b)").is_alternation_literal());
+        assert!(!props(r"").is_alternation_literal());
+        assert!(!props(r"^").is_alternation_literal());
+        assert!(!props(r"(a)").is_alternation_literal());
+        assert!(!props(r"a+").is_alternation_literal());
+        assert!(!props(r"foo(a)").is_alternation_literal());
+        assert!(!props(r"(a)foo").is_alternation_literal());
+        assert!(!props(r"[ab]").is_alternation_literal());
+        assert!(!props(r"[ab]|b").is_alternation_literal());
+        assert!(!props(r"a|[ab]").is_alternation_literal());
+        assert!(!props(r"(a)|b").is_alternation_literal());
+        assert!(!props(r"a|(b)").is_alternation_literal());
+        assert!(!props(r"a|b").is_alternation_literal());
+        assert!(!props(r"a|b|c").is_alternation_literal());
+        assert!(!props(r"[a]|b").is_alternation_literal());
+        assert!(!props(r"a|[b]").is_alternation_literal());
+        assert!(!props(r"(?:a)|b").is_alternation_literal());
+        assert!(!props(r"a|(?:b)").is_alternation_literal());
+    }
+
+    // This tests that the smart Hir::concat constructor simplifies the given
+    // exprs in a way we expect.
+    #[test]
+    fn smart_concat() {
+        assert_eq!(t(""), Hir::empty());
+        assert_eq!(t("(?:)"), Hir::empty());
+        assert_eq!(t("abc"), hir_lit("abc"));
+        assert_eq!(t("(?:foo)(?:bar)"), hir_lit("foobar"));
+        assert_eq!(t("quux(?:foo)(?:bar)baz"), hir_lit("quuxfoobarbaz"));
+        assert_eq!(
+            t("foo(?:bar^baz)quux"),
+            hir_cat(vec![
+                hir_lit("foobar"),
+                hir_look(hir::Look::Start),
+                hir_lit("bazquux"),
+            ])
+        );
+        assert_eq!(
+            t("foo(?:ba(?:r^b)az)quux"),
+            hir_cat(vec![
+                hir_lit("foobar"),
+                hir_look(hir::Look::Start),
+                hir_lit("bazquux"),
+            ])
+        );
+    }
+
+    // This tests that the smart Hir::alternation constructor simplifies the
+    // given exprs in a way we expect.
+    #[test]
+    fn smart_alternation() {
+        assert_eq!(
+            t("(?:foo)|(?:bar)"),
+            hir_alt(vec![hir_lit("foo"), hir_lit("bar")])
+        );
+        assert_eq!(
+            t("quux|(?:abc|def|xyz)|baz"),
+            hir_alt(vec![
+                hir_lit("quux"),
+                hir_lit("abc"),
+                hir_lit("def"),
+                hir_lit("xyz"),
+                hir_lit("baz"),
+            ])
+        );
+        assert_eq!(
+            t("quux|(?:abc|(?:def|mno)|xyz)|baz"),
+            hir_alt(vec![
+                hir_lit("quux"),
+                hir_lit("abc"),
+                hir_lit("def"),
+                hir_lit("mno"),
+                hir_lit("xyz"),
+                hir_lit("baz"),
+            ])
+        );
+        assert_eq!(
+            t("a|b|c|d|e|f|x|y|z"),
+            hir_uclass(&[('a', 'f'), ('x', 'z')]),
+        );
+        // Tests that we lift common prefixes out of an alternation.
+        assert_eq!(
+            t("[A-Z]foo|[A-Z]quux"),
+            hir_cat(vec![
+                hir_uclass(&[('A', 'Z')]),
+                hir_alt(vec![hir_lit("foo"), hir_lit("quux")]),
+            ]),
+        );
+        assert_eq!(
+            t("[A-Z][A-Z]|[A-Z]quux"),
+            hir_cat(vec![
+                hir_uclass(&[('A', 'Z')]),
+                hir_alt(vec![hir_uclass(&[('A', 'Z')]), hir_lit("quux")]),
+            ]),
+        );
+        assert_eq!(
+            t("[A-Z][A-Z]|[A-Z][A-Z]quux"),
+            hir_cat(vec![
+                hir_uclass(&[('A', 'Z')]),
+                hir_uclass(&[('A', 'Z')]),
+                hir_alt(vec![Hir::empty(), hir_lit("quux")]),
+            ]),
+        );
+        assert_eq!(
+            t("[A-Z]foo|[A-Z]foobar"),
+            hir_cat(vec![
+                hir_uclass(&[('A', 'Z')]),
+                hir_alt(vec![hir_lit("foo"), hir_lit("foobar")]),
+            ]),
+        );
     }
 }
diff --git a/regex-syntax/src/hir/visitor.rs b/regex-syntax/src/hir/visitor.rs
index 4f5a70909c..e5f15cf1c2 100644
--- a/regex-syntax/src/hir/visitor.rs
+++ b/regex-syntax/src/hir/visitor.rs
@@ -1,3 +1,5 @@
+use alloc::{vec, vec::Vec};
+
 use crate::hir::{self, Hir, HirKind};
 
 /// A trait for visiting the high-level IR (HIR) in depth first order.
@@ -9,7 +11,7 @@ use crate::hir::{self, Hir, HirKind};
 /// important since the size of an HIR may be proportional to end user input.
 ///
 /// Typical usage of this trait involves providing an implementation and then
-/// running it using the [`visit`](fn.visit.html) function.
+/// running it using the [`visit`] function.
 pub trait Visitor {
     /// The result of visiting an HIR.
     type Output;
@@ -44,8 +46,7 @@ pub trait Visitor {
 /// Executes an implementation of `Visitor` in constant stack space.
 ///
 /// This function will visit every node in the given `Hir` while calling
-/// appropriate methods provided by the
-/// [`Visitor`](trait.Visitor.html) trait.
+/// appropriate methods provided by the [`Visitor`] trait.
 ///
 /// The primary use case for this method is when one wants to perform case
 /// analysis over an `Hir` without using a stack size proportional to the depth
@@ -74,9 +75,9 @@ enum Frame<'a> {
     /// A stack frame allocated just before descending into a repetition
     /// operator's child node.
     Repetition(&'a hir::Repetition),
-    /// A stack frame allocated just before descending into a group's child
+    /// A stack frame allocated just before descending into a capture's child
     /// node.
-    Group(&'a hir::Group),
+    Capture(&'a hir::Capture),
     /// The stack frame used while visiting every child node of a concatenation
     /// of expressions.
     Concat {
@@ -149,7 +150,7 @@ impl<'a> HeapVisitor<'a> {
     fn induct(&mut self, hir: &'a Hir) -> Option<Frame<'a>> {
         match *hir.kind() {
             HirKind::Repetition(ref x) => Some(Frame::Repetition(x)),
-            HirKind::Group(ref x) => Some(Frame::Group(x)),
+            HirKind::Capture(ref x) => Some(Frame::Capture(x)),
             HirKind::Concat(ref x) if x.is_empty() => None,
             HirKind::Concat(ref x) => {
                 Some(Frame::Concat { head: &x[0], tail: &x[1..] })
@@ -167,7 +168,7 @@ impl<'a> HeapVisitor<'a> {
     fn pop(&self, induct: Frame<'a>) -> Option<Frame<'a>> {
         match induct {
             Frame::Repetition(_) => None,
-            Frame::Group(_) => None,
+            Frame::Capture(_) => None,
             Frame::Concat { tail, .. } => {
                 if tail.is_empty() {
                     None
@@ -194,8 +195,8 @@ impl<'a> Frame<'a> {
     /// child HIR node to visit.
     fn child(&self) -> &'a Hir {
         match *self {
-            Frame::Repetition(rep) => &rep.hir,
-            Frame::Group(group) => &group.hir,
+            Frame::Repetition(rep) => &rep.sub,
+            Frame::Capture(capture) => &capture.sub,
             Frame::Concat { head, .. } => head,
             Frame::Alternation { head, .. } => head,
         }
diff --git a/regex-syntax/src/lib.rs b/regex-syntax/src/lib.rs
index 1dfb38af39..4953641d73 100644
--- a/regex-syntax/src/lib.rs
+++ b/regex-syntax/src/lib.rs
@@ -3,14 +3,14 @@ This crate provides a robust regular expression parser.
 
 This crate defines two primary types:
 
-* [`Ast`](ast/enum.Ast.html) is the abstract syntax of a regular expression.
+* [`Ast`](ast::Ast) is the abstract syntax of a regular expression.
   An abstract syntax corresponds to a *structured representation* of the
   concrete syntax of a regular expression, where the concrete syntax is the
   pattern string itself (e.g., `foo(bar)+`). Given some abstract syntax, it
   can be converted back to the original concrete syntax (modulo some details,
   like whitespace). To a first approximation, the abstract syntax is complex
   and difficult to analyze.
-* [`Hir`](hir/struct.Hir.html) is the high-level intermediate representation
+* [`Hir`](hir::Hir) is the high-level intermediate representation
   ("HIR" or "high-level IR" for short) of regular expression. It corresponds to
   an intermediate state of a regular expression that sits between the abstract
   syntax and the low level compiled opcodes that are eventually responsible for
@@ -22,14 +22,15 @@ This crate defines two primary types:
 
 These two types come with conversion routines:
 
-* An [`ast::parse::Parser`](ast/parse/struct.Parser.html) converts concrete
-  syntax (a `&str`) to an [`Ast`](ast/enum.Ast.html).
-* A [`hir::translate::Translator`](hir/translate/struct.Translator.html)
-  converts an [`Ast`](ast/enum.Ast.html) to a [`Hir`](hir/struct.Hir.html).
+* An [`ast::parse::Parser`] converts concrete syntax (a `&str`) to an
+[`Ast`](ast::Ast).
+* A [`hir::translate::Translator`] converts an [`Ast`](ast::Ast) to a
+[`Hir`](hir::Hir).
 
 As a convenience, the above two conversion routines are combined into one via
-the top-level [`Parser`](struct.Parser.html) type. This `Parser` will first
-convert your pattern to an `Ast` and then convert the `Ast` to an `Hir`.
+the top-level [`Parser`] type. This `Parser` will first convert your pattern to
+an `Ast` and then convert the `Ast` to an `Hir`. It's also exposed as top-level
+[`parse`] free function.
 
 
 # Example
@@ -37,14 +38,14 @@ convert your pattern to an `Ast` and then convert the `Ast` to an `Hir`.
 This example shows how to parse a pattern string into its HIR:
 
 ```
-use regex_syntax::Parser;
-use regex_syntax::hir::{self, Hir};
+use regex_syntax::{hir::Hir, parse};
 
-let hir = Parser::new().parse("a|b").unwrap();
+let hir = parse("a|b")?;
 assert_eq!(hir, Hir::alternation(vec![
-    Hir::literal(hir::Literal::Unicode('a')),
-    Hir::literal(hir::Literal::Unicode('b')),
+    Hir::literal("a".as_bytes()),
+    Hir::literal("b".as_bytes()),
 ]));
+# Ok::<(), Box<dyn std::error::Error>>(())
 ```
 
 
@@ -81,10 +82,9 @@ in a monospace font.
 
 # Literal extraction
 
-This crate provides limited support for
-[literal extraction from `Hir` values](hir/literal/struct.Literals.html).
-Be warned that literal extraction currently uses recursion, and therefore,
-stack size proportional to the size of the `Hir`.
+This crate provides limited support for [literal extraction from `Hir`
+values](hir::literal). Be warned that literal extraction uses recursion, and
+therefore, stack size proportional to the size of the `Hir`.
 
 The purpose of literal extraction is to speed up searches. That is, if you
 know a regular expression must match a prefix or suffix literal, then it is
@@ -116,6 +116,11 @@ match semantics of a regular expression.
 
 The following features are available:
 
+* **std** -
+  Enables support for the standard library. This feature is enabled by default.
+  When disabled, only `core` and `alloc` are used. Otherwise, enabling `std`
+  generally just enables `std::error::Error` trait impls for the various error
+  types.
 * **unicode** -
   Enables all Unicode features. This feature is enabled by default, and will
   always cover all Unicode features, even if more are added in the future.
@@ -154,19 +159,32 @@ The following features are available:
   `\p{sb=ATerm}`.
 */
 
-#![deny(missing_docs)]
-#![warn(missing_debug_implementations)]
+#![no_std]
 #![forbid(unsafe_code)]
+#![deny(missing_docs, rustdoc::broken_intra_doc_links)]
+#![warn(missing_debug_implementations)]
+#![cfg_attr(docsrs, feature(doc_auto_cfg))]
+
+#[cfg(any(test, feature = "std"))]
+extern crate std;
 
-pub use crate::error::{Error, Result};
-pub use crate::parser::{Parser, ParserBuilder};
-pub use crate::unicode::UnicodeWordError;
+extern crate alloc;
+
+pub use crate::{
+    error::Error,
+    parser::{parse, Parser, ParserBuilder},
+    unicode::UnicodeWordError,
+};
+
+use alloc::string::String;
 
 pub mod ast;
+mod debug;
 mod either;
 mod error;
 pub mod hir;
 mod parser;
+mod rank;
 mod unicode;
 mod unicode_tables;
 pub mod utf8;
@@ -197,13 +215,43 @@ pub fn escape_into(text: &str, buf: &mut String) {
 
 /// Returns true if the given character has significance in a regex.
 ///
-/// These are the only characters that are allowed to be escaped, with one
-/// exception: an ASCII space character may be escaped when extended mode (with
-/// the `x` flag) is enabled. In particular, `is_meta_character(' ')` returns
-/// `false`.
+/// Generally speaking, these are the only characters which _must_ be escaped
+/// in order to match their literal meaning. For example, to match a literal
+/// `|`, one could write `\|`. Sometimes escaping isn't always necessary. For
+/// example, `-` is treated as a meta character because of its significance
+/// for writing ranges inside of character classes, but the regex `-` will
+/// match a literal `-` because `-` has no special meaning outside of character
+/// classes.
+///
+/// In order to determine whether a character may be escaped at all, the
+/// [`is_escapeable_character`] routine should be used. The difference between
+/// `is_meta_character` and `is_escapeable_character` is that the latter will
+/// return true for some characters that are _not_ meta characters. For
+/// example, `%` and `\%` both match a literal `%` in all contexts. In other
+/// words, `is_escapeable_character` includes "superfluous" escapes.
 ///
 /// Note that the set of characters for which this function returns `true` or
-/// `false` is fixed and won't change in a semver compatible release.
+/// `false` is fixed and won't change in a semver compatible release. (In this
+/// case, "semver compatible release" actually refers to the `regex` crate
+/// itself, since reducing or expanding the set of meta characters would be a
+/// breaking change for not just `regex-syntax` but also `regex` itself.)
+///
+/// # Example
+///
+/// ```
+/// use regex_syntax::is_meta_character;
+///
+/// assert!(is_meta_character('?'));
+/// assert!(is_meta_character('-'));
+/// assert!(is_meta_character('&'));
+/// assert!(is_meta_character('#'));
+///
+/// assert!(!is_meta_character('%'));
+/// assert!(!is_meta_character('/'));
+/// assert!(!is_meta_character('!'));
+/// assert!(!is_meta_character('"'));
+/// assert!(!is_meta_character('e'));
+/// ```
 pub fn is_meta_character(c: char) -> bool {
     match c {
         '\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' | '[' | ']' | '{'
@@ -212,6 +260,68 @@ pub fn is_meta_character(c: char) -> bool {
     }
 }
 
+/// Returns true if the given character can be escaped in a regex.
+///
+/// This returns true in all cases that `is_meta_character` returns true, but
+/// also returns true in some cases where `is_meta_character` returns false.
+/// For example, `%` is not a meta character, but it is escapeable. That is,
+/// `%` and `\%` both match a literal `%` in all contexts.
+///
+/// The purpose of this routine is to provide knowledge about what characters
+/// may be escaped. Namely, most regex engines permit "superfluous" escapes
+/// where characters without any special significance may be escaped even
+/// though there is no actual _need_ to do so.
+///
+/// This will return false for some characters. For example, `e` is not
+/// escapeable. Therefore, `\e` will either result in a parse error (which is
+/// true today), or it could backwards compatibly evolve into a new construct
+/// with its own meaning. Indeed, that is the purpose of banning _some_
+/// superfluous escapes: it provides a way to evolve the syntax in a compatible
+/// manner.
+///
+/// # Example
+///
+/// ```
+/// use regex_syntax::is_escapeable_character;
+///
+/// assert!(is_escapeable_character('?'));
+/// assert!(is_escapeable_character('-'));
+/// assert!(is_escapeable_character('&'));
+/// assert!(is_escapeable_character('#'));
+/// assert!(is_escapeable_character('%'));
+/// assert!(is_escapeable_character('/'));
+/// assert!(is_escapeable_character('!'));
+/// assert!(is_escapeable_character('"'));
+///
+/// assert!(!is_escapeable_character('e'));
+/// ```
+pub fn is_escapeable_character(c: char) -> bool {
+    // Certainly escapeable if it's a meta character.
+    if is_meta_character(c) {
+        return true;
+    }
+    // Any character that isn't ASCII is definitely not escapeable. There's
+    // no real need to allow things like \☃ right?
+    if !c.is_ascii() {
+        return false;
+    }
+    // Otherwise, we basically say that everything is escapeable unless it's a
+    // letter or digit. Things like \3 are either octal (when enabled) or an
+    // error, and we should keep it that way. Otherwise, letters are reserved
+    // for adding new syntax in a backwards compatible way.
+    match c {
+        '0'..='9' | 'A'..='Z' | 'a'..='z' => false,
+        // While not currently supported, we keep these as not escapeable to
+        // give us some flexibility with respect to supporting the \< and
+        // \> word boundary assertions in the future. By rejecting them as
+        // escapeable, \< and \> will result in a parse error. Thus, we can
+        // turn them into something else in the future without it being a
+        // backwards incompatible change.
+        '<' | '>' => false,
+        _ => true,
+    }
+}
+
 /// Returns true if and only if the given character is a Unicode word
 /// character.
 ///
@@ -224,10 +334,9 @@ pub fn is_meta_character(c: char) -> bool {
 ///
 /// # Panics
 ///
-/// If the `unicode-perl` feature is not enabled, then this function panics.
-/// For this reason, it is recommended that callers use
-/// [`try_is_word_character`](fn.try_is_word_character.html)
-/// instead.
+/// If the `unicode-perl` feature is not enabled, then this function
+/// panics. For this reason, it is recommended that callers use
+/// [`try_is_word_character`] instead.
 pub fn is_word_character(c: char) -> bool {
     try_is_word_character(c).expect("unicode-perl feature must be enabled")
 }
@@ -248,7 +357,7 @@ pub fn is_word_character(c: char) -> bool {
 /// returns an error.
 pub fn try_is_word_character(
     c: char,
-) -> std::result::Result<bool, UnicodeWordError> {
+) -> core::result::Result<bool, UnicodeWordError> {
     unicode::is_word_character(c)
 }
 
@@ -265,6 +374,8 @@ pub fn is_word_byte(c: u8) -> bool {
 
 #[cfg(test)]
 mod tests {
+    use alloc::string::ToString;
+
     use super::*;
 
     #[test]
diff --git a/regex-syntax/src/parser.rs b/regex-syntax/src/parser.rs
index ded95b280a..2e7a2bb80c 100644
--- a/regex-syntax/src/parser.rs
+++ b/regex-syntax/src/parser.rs
@@ -1,16 +1,26 @@
-use crate::ast;
-use crate::hir;
+use crate::{ast, hir, Error};
 
-use crate::Result;
+/// A convenience routine for parsing a regex using default options.
+///
+/// This is equivalent to `Parser::new().parse(pattern)`.
+///
+/// If you need to set non-default options, then use a [`ParserBuilder`].
+///
+/// This routine returns an [`Hir`](hir::Hir) value. Namely, it automatically
+/// parses the pattern as an [`Ast`](ast::Ast) and then invokes the translator
+/// to convert the `Ast` into an `Hir`. If you need access to the `Ast`, then
+/// you should use a [`ast::parse::Parser`].
+pub fn parse(pattern: &str) -> Result<hir::Hir, Error> {
+    Parser::new().parse(pattern)
+}
 
 /// A builder for a regular expression parser.
 ///
 /// This builder permits modifying configuration options for the parser.
 ///
-/// This type combines the builder options for both the
-/// [AST `ParserBuilder`](ast/parse/struct.ParserBuilder.html)
-/// and the
-/// [HIR `TranslatorBuilder`](hir/translate/struct.TranslatorBuilder.html).
+/// This type combines the builder options for both the [AST
+/// `ParserBuilder`](ast::parse::ParserBuilder) and the [HIR
+/// `TranslatorBuilder`](hir::translate::TranslatorBuilder).
 #[derive(Clone, Debug, Default)]
 pub struct ParserBuilder {
     ast: ast::parse::ParserBuilder,
@@ -78,19 +88,23 @@ impl ParserBuilder {
         self
     }
 
-    /// When enabled, the parser will permit the construction of a regular
+    /// When disabled, translation will permit the construction of a regular
     /// expression that may match invalid UTF-8.
     ///
-    /// When disabled (the default), the parser is guaranteed to produce
-    /// an expression that will only ever match valid UTF-8 (otherwise, the
-    /// parser will return an error).
-    ///
-    /// Perhaps surprisingly, when invalid UTF-8 isn't allowed, a negated ASCII
-    /// word boundary (uttered as `(?-u:\B)` in the concrete syntax) will cause
-    /// the parser to return an error. Namely, a negated ASCII word boundary
-    /// can result in matching positions that aren't valid UTF-8 boundaries.
-    pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut ParserBuilder {
-        self.hir.allow_invalid_utf8(yes);
+    /// When enabled (the default), the translator is guaranteed to produce an
+    /// expression that, for non-empty matches, will only ever produce spans
+    /// that are entirely valid UTF-8 (otherwise, the translator will return an
+    /// error).
+    ///
+    /// Perhaps surprisingly, when UTF-8 is enabled, an empty regex or even
+    /// a negated ASCII word boundary (uttered as `(?-u:\B)` in the concrete
+    /// syntax) will be allowed even though they can produce matches that split
+    /// a UTF-8 encoded codepoint. This only applies to zero-width or "empty"
+    /// matches, and it is expected that the regex engine itself must handle
+    /// these cases if necessary (perhaps by suppressing any zero-width matches
+    /// that split a codepoint).
+    pub fn utf8(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.hir.utf8(yes);
         self
     }
 
@@ -134,6 +148,23 @@ impl ParserBuilder {
         self
     }
 
+    /// Enable or disable the CRLF mode flag by default.
+    ///
+    /// By default this is disabled. It may alternatively be selectively
+    /// enabled in the regular expression itself via the `R` flag.
+    ///
+    /// When CRLF mode is enabled, the following happens:
+    ///
+    /// * Unless `dot_matches_new_line` is enabled, `.` will match any character
+    /// except for `\r` and `\n`.
+    /// * When `multi_line` mode is enabled, `^` and `$` will treat `\r\n`,
+    /// `\r` and `\n` as line terminators. And in particular, neither will
+    /// match between a `\r` and a `\n`.
+    pub fn crlf(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.hir.crlf(yes);
+        self
+    }
+
     /// Enable or disable the "swap greed" flag by default.
     ///
     /// By default this is disabled. It may alternatively be selectively
@@ -148,9 +179,9 @@ impl ParserBuilder {
     /// By default this is **enabled**. It may alternatively be selectively
     /// disabled in the regular expression itself via the `u` flag.
     ///
-    /// Note that unless `allow_invalid_utf8` is enabled (it's disabled by
-    /// default), a regular expression will fail to parse if Unicode mode is
-    /// disabled and a sub-expression could possibly match invalid UTF-8.
+    /// Note that unless `utf8` is disabled (it's enabled by default), a
+    /// regular expression will fail to parse if Unicode mode is disabled and a
+    /// sub-expression could possibly match invalid UTF-8.
     pub fn unicode(&mut self, yes: bool) -> &mut ParserBuilder {
         self.hir.unicode(yes);
         self
@@ -167,10 +198,9 @@ impl ParserBuilder {
 /// convenience for never having to deal with it at all.
 ///
 /// If callers have more fine grained use cases that need an AST, then please
-/// see the [`ast::parse`](ast/parse/index.html) module.
+/// see the [`ast::parse`] module.
 ///
-/// A `Parser` can be configured in more detail via a
-/// [`ParserBuilder`](struct.ParserBuilder.html).
+/// A `Parser` can be configured in more detail via a [`ParserBuilder`].
 #[derive(Clone, Debug)]
 pub struct Parser {
     ast: ast::parse::Parser,
@@ -184,15 +214,14 @@ impl Parser {
     /// a high level intermediate representation of the given regular
     /// expression.
     ///
-    /// To set configuration options on the parser, use
-    /// [`ParserBuilder`](struct.ParserBuilder.html).
+    /// To set configuration options on the parser, use [`ParserBuilder`].
     pub fn new() -> Parser {
         ParserBuilder::new().build()
     }
 
     /// Parse the regular expression into a high level intermediate
     /// representation.
-    pub fn parse(&mut self, pattern: &str) -> Result<hir::Hir> {
+    pub fn parse(&mut self, pattern: &str) -> Result<hir::Hir, Error> {
         let ast = self.ast.parse(pattern)?;
         let hir = self.hir.translate(pattern, &ast)?;
         Ok(hir)
diff --git a/regex-syntax/src/rank.rs b/regex-syntax/src/rank.rs
new file mode 100644
index 0000000000..ccb25a20ae
--- /dev/null
+++ b/regex-syntax/src/rank.rs
@@ -0,0 +1,258 @@
+pub(crate) const BYTE_FREQUENCIES: [u8; 256] = [
+    55,  // '\x00'
+    52,  // '\x01'
+    51,  // '\x02'
+    50,  // '\x03'
+    49,  // '\x04'
+    48,  // '\x05'
+    47,  // '\x06'
+    46,  // '\x07'
+    45,  // '\x08'
+    103, // '\t'
+    242, // '\n'
+    66,  // '\x0b'
+    67,  // '\x0c'
+    229, // '\r'
+    44,  // '\x0e'
+    43,  // '\x0f'
+    42,  // '\x10'
+    41,  // '\x11'
+    40,  // '\x12'
+    39,  // '\x13'
+    38,  // '\x14'
+    37,  // '\x15'
+    36,  // '\x16'
+    35,  // '\x17'
+    34,  // '\x18'
+    33,  // '\x19'
+    56,  // '\x1a'
+    32,  // '\x1b'
+    31,  // '\x1c'
+    30,  // '\x1d'
+    29,  // '\x1e'
+    28,  // '\x1f'
+    255, // ' '
+    148, // '!'
+    164, // '"'
+    149, // '#'
+    136, // '$'
+    160, // '%'
+    155, // '&'
+    173, // "'"
+    221, // '('
+    222, // ')'
+    134, // '*'
+    122, // '+'
+    232, // ','
+    202, // '-'
+    215, // '.'
+    224, // '/'
+    208, // '0'
+    220, // '1'
+    204, // '2'
+    187, // '3'
+    183, // '4'
+    179, // '5'
+    177, // '6'
+    168, // '7'
+    178, // '8'
+    200, // '9'
+    226, // ':'
+    195, // ';'
+    154, // '<'
+    184, // '='
+    174, // '>'
+    126, // '?'
+    120, // '@'
+    191, // 'A'
+    157, // 'B'
+    194, // 'C'
+    170, // 'D'
+    189, // 'E'
+    162, // 'F'
+    161, // 'G'
+    150, // 'H'
+    193, // 'I'
+    142, // 'J'
+    137, // 'K'
+    171, // 'L'
+    176, // 'M'
+    185, // 'N'
+    167, // 'O'
+    186, // 'P'
+    112, // 'Q'
+    175, // 'R'
+    192, // 'S'
+    188, // 'T'
+    156, // 'U'
+    140, // 'V'
+    143, // 'W'
+    123, // 'X'
+    133, // 'Y'
+    128, // 'Z'
+    147, // '['
+    138, // '\\'
+    146, // ']'
+    114, // '^'
+    223, // '_'
+    151, // '`'
+    249, // 'a'
+    216, // 'b'
+    238, // 'c'
+    236, // 'd'
+    253, // 'e'
+    227, // 'f'
+    218, // 'g'
+    230, // 'h'
+    247, // 'i'
+    135, // 'j'
+    180, // 'k'
+    241, // 'l'
+    233, // 'm'
+    246, // 'n'
+    244, // 'o'
+    231, // 'p'
+    139, // 'q'
+    245, // 'r'
+    243, // 's'
+    251, // 't'
+    235, // 'u'
+    201, // 'v'
+    196, // 'w'
+    240, // 'x'
+    214, // 'y'
+    152, // 'z'
+    182, // '{'
+    205, // '|'
+    181, // '}'
+    127, // '~'
+    27,  // '\x7f'
+    212, // '\x80'
+    211, // '\x81'
+    210, // '\x82'
+    213, // '\x83'
+    228, // '\x84'
+    197, // '\x85'
+    169, // '\x86'
+    159, // '\x87'
+    131, // '\x88'
+    172, // '\x89'
+    105, // '\x8a'
+    80,  // '\x8b'
+    98,  // '\x8c'
+    96,  // '\x8d'
+    97,  // '\x8e'
+    81,  // '\x8f'
+    207, // '\x90'
+    145, // '\x91'
+    116, // '\x92'
+    115, // '\x93'
+    144, // '\x94'
+    130, // '\x95'
+    153, // '\x96'
+    121, // '\x97'
+    107, // '\x98'
+    132, // '\x99'
+    109, // '\x9a'
+    110, // '\x9b'
+    124, // '\x9c'
+    111, // '\x9d'
+    82,  // '\x9e'
+    108, // '\x9f'
+    118, // '\xa0'
+    141, // '¡'
+    113, // '¢'
+    129, // '£'
+    119, // '¤'
+    125, // '¥'
+    165, // '¦'
+    117, // '§'
+    92,  // '¨'
+    106, // '©'
+    83,  // 'ª'
+    72,  // '«'
+    99,  // '¬'
+    93,  // '\xad'
+    65,  // '®'
+    79,  // '¯'
+    166, // '°'
+    237, // '±'
+    163, // '²'
+    199, // '³'
+    190, // '´'
+    225, // 'µ'
+    209, // '¶'
+    203, // '·'
+    198, // '¸'
+    217, // '¹'
+    219, // 'º'
+    206, // '»'
+    234, // '¼'
+    248, // '½'
+    158, // '¾'
+    239, // '¿'
+    255, // 'À'
+    255, // 'Á'
+    255, // 'Â'
+    255, // 'Ã'
+    255, // 'Ä'
+    255, // 'Å'
+    255, // 'Æ'
+    255, // 'Ç'
+    255, // 'È'
+    255, // 'É'
+    255, // 'Ê'
+    255, // 'Ë'
+    255, // 'Ì'
+    255, // 'Í'
+    255, // 'Î'
+    255, // 'Ï'
+    255, // 'Ð'
+    255, // 'Ñ'
+    255, // 'Ò'
+    255, // 'Ó'
+    255, // 'Ô'
+    255, // 'Õ'
+    255, // 'Ö'
+    255, // '×'
+    255, // 'Ø'
+    255, // 'Ù'
+    255, // 'Ú'
+    255, // 'Û'
+    255, // 'Ü'
+    255, // 'Ý'
+    255, // 'Þ'
+    255, // 'ß'
+    255, // 'à'
+    255, // 'á'
+    255, // 'â'
+    255, // 'ã'
+    255, // 'ä'
+    255, // 'å'
+    255, // 'æ'
+    255, // 'ç'
+    255, // 'è'
+    255, // 'é'
+    255, // 'ê'
+    255, // 'ë'
+    255, // 'ì'
+    255, // 'í'
+    255, // 'î'
+    255, // 'ï'
+    255, // 'ð'
+    255, // 'ñ'
+    255, // 'ò'
+    255, // 'ó'
+    255, // 'ô'
+    255, // 'õ'
+    255, // 'ö'
+    255, // '÷'
+    255, // 'ø'
+    255, // 'ù'
+    255, // 'ú'
+    255, // 'û'
+    255, // 'ü'
+    255, // 'ý'
+    255, // 'þ'
+    255, // 'ÿ'
+];
diff --git a/regex-syntax/src/unicode.rs b/regex-syntax/src/unicode.rs
index 8194d7f55b..91bd4b1203 100644
--- a/regex-syntax/src/unicode.rs
+++ b/regex-syntax/src/unicode.rs
@@ -1,12 +1,10 @@
-use std::error;
-use std::fmt;
-use std::result;
+use alloc::{
+    string::{String, ToString},
+    vec::Vec,
+};
 
 use crate::hir;
 
-/// A type alias for errors specific to Unicode handling of classes.
-pub type Result<T> = result::Result<T, Error>;
-
 /// An inclusive range of codepoints from a generated file (hence the static
 /// lifetime).
 type Range = &'static [(char, char)];
@@ -24,9 +22,6 @@ pub enum Error {
     PerlClassNotFound,
 }
 
-/// A type alias for errors specific to Unicode case folding.
-pub type FoldResult<T> = result::Result<T, CaseFoldError>;
-
 /// An error that occurs when Unicode-aware simple case folding fails.
 ///
 /// This error can occur when the case mapping tables necessary for Unicode
@@ -35,10 +30,11 @@ pub type FoldResult<T> = result::Result<T, CaseFoldError>;
 #[derive(Debug)]
 pub struct CaseFoldError(());
 
-impl error::Error for CaseFoldError {}
+#[cfg(feature = "std")]
+impl std::error::Error for CaseFoldError {}
 
-impl fmt::Display for CaseFoldError {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+impl core::fmt::Display for CaseFoldError {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
         write!(
             f,
             "Unicode-aware case folding is not available \
@@ -55,10 +51,11 @@ impl fmt::Display for CaseFoldError {
 #[derive(Debug)]
 pub struct UnicodeWordError(());
 
-impl error::Error for UnicodeWordError {}
+#[cfg(feature = "std")]
+impl std::error::Error for UnicodeWordError {}
 
-impl fmt::Display for UnicodeWordError {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+impl core::fmt::Display for UnicodeWordError {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
         write!(
             f,
             "Unicode-aware \\w class is not available \
@@ -67,74 +64,122 @@ impl fmt::Display for UnicodeWordError {
     }
 }
 
-/// Return an iterator over the equivalence class of simple case mappings
-/// for the given codepoint. The equivalence class does not include the
-/// given codepoint.
+/// A state oriented traverser of the simple case folding table.
 ///
-/// If the equivalence class is empty, then this returns the next scalar
-/// value that has a non-empty equivalence class, if it exists. If no such
-/// scalar value exists, then `None` is returned. The point of this behavior
-/// is to permit callers to avoid calling `simple_fold` more than they need
-/// to, since there is some cost to fetching the equivalence class.
+/// A case folder can be constructed via `SimpleCaseFolder::new()`, which will
+/// return an error if the underlying case folding table is unavailable.
 ///
-/// This returns an error if the Unicode case folding tables are not available.
-pub fn simple_fold(
-    c: char,
-) -> FoldResult<result::Result<impl Iterator<Item = char>, Option<char>>> {
-    #[cfg(not(feature = "unicode-case"))]
-    fn imp(
-        _: char,
-    ) -> FoldResult<result::Result<impl Iterator<Item = char>, Option<char>>>
-    {
-        use std::option::IntoIter;
-        Err::<result::Result<IntoIter<char>, _>, _>(CaseFoldError(()))
-    }
+/// After construction, it is expected that callers will use
+/// `SimpleCaseFolder::mapping` by calling it with codepoints in strictly
+/// increasing order. For example, calling it on `b` and then on `a` is illegal
+/// and will result in a panic.
+///
+/// The main idea of this type is that it tries hard to make mapping lookups
+/// fast by exploiting the structure of the underlying table, and the ordering
+/// assumption enables this.
+#[derive(Debug)]
+pub struct SimpleCaseFolder {
+    /// The simple case fold table. It's a sorted association list, where the
+    /// keys are Unicode scalar values and the values are the corresponding
+    /// equivalence class (not including the key) of the "simple" case folded
+    /// Unicode scalar values.
+    table: &'static [(char, &'static [char])],
+    /// The last codepoint that was used for a lookup.
+    last: Option<char>,
+    /// The index to the entry in `table` corresponding to the smallest key `k`
+    /// such that `k > k0`, where `k0` is the most recent key lookup. Note that
+    /// in particular, `k0` may not be in the table!
+    next: usize,
+}
 
-    #[cfg(feature = "unicode-case")]
-    fn imp(
-        c: char,
-    ) -> FoldResult<result::Result<impl Iterator<Item = char>, Option<char>>>
-    {
-        use crate::unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE;
-
-        Ok(CASE_FOLDING_SIMPLE
-            .binary_search_by_key(&c, |&(c1, _)| c1)
-            .map(|i| CASE_FOLDING_SIMPLE[i].1.iter().copied())
-            .map_err(|i| {
-                if i >= CASE_FOLDING_SIMPLE.len() {
-                    None
-                } else {
-                    Some(CASE_FOLDING_SIMPLE[i].0)
-                }
-            }))
+impl SimpleCaseFolder {
+    /// Create a new simple case folder, returning an error if the underlying
+    /// case folding table is unavailable.
+    pub fn new() -> Result<SimpleCaseFolder, CaseFoldError> {
+        #[cfg(not(feature = "unicode-case"))]
+        {
+            Err(CaseFoldError(()))
+        }
+        #[cfg(feature = "unicode-case")]
+        {
+            Ok(SimpleCaseFolder {
+                table: crate::unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE,
+                last: None,
+                next: 0,
+            })
+        }
     }
 
-    imp(c)
-}
-
-/// Returns true if and only if the given (inclusive) range contains at least
-/// one Unicode scalar value that has a non-empty non-trivial simple case
-/// mapping.
-///
-/// This function panics if `end < start`.
-///
-/// This returns an error if the Unicode case folding tables are not available.
-pub fn contains_simple_case_mapping(
-    start: char,
-    end: char,
-) -> FoldResult<bool> {
-    #[cfg(not(feature = "unicode-case"))]
-    fn imp(_: char, _: char) -> FoldResult<bool> {
-        Err(CaseFoldError(()))
+    /// Return the equivalence class of case folded codepoints for the given
+    /// codepoint. The equivalence class returned never includes the codepoint
+    /// given. If the given codepoint has no case folded codepoints (i.e.,
+    /// no entry in the underlying case folding table), then this returns an
+    /// empty slice.
+    ///
+    /// # Panics
+    ///
+    /// This panics when called with a `c` that is less than or equal to the
+    /// previous call. In other words, callers need to use this method with
+    /// strictly increasing values of `c`.
+    pub fn mapping(&mut self, c: char) -> &'static [char] {
+        if let Some(last) = self.last {
+            assert!(
+                last < c,
+                "got codepoint U+{:X} which occurs before \
+                 last codepoint U+{:X}",
+                u32::from(c),
+                u32::from(last),
+            );
+        }
+        self.last = Some(c);
+        if self.next >= self.table.len() {
+            return &[];
+        }
+        let (k, v) = self.table[self.next];
+        if k == c {
+            self.next += 1;
+            return v;
+        }
+        match self.get(c) {
+            Err(i) => {
+                self.next = i;
+                &[]
+            }
+            Ok(i) => {
+                // Since we require lookups to proceed
+                // in order, anything we find should be
+                // after whatever we thought might be
+                // next. Otherwise, the caller is either
+                // going out of order or we would have
+                // found our next key at 'self.next'.
+                assert!(i > self.next);
+                self.next = i + 1;
+                self.table[i].1
+            }
+        }
     }
 
-    #[cfg(feature = "unicode-case")]
-    fn imp(start: char, end: char) -> FoldResult<bool> {
-        use crate::unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE;
-        use std::cmp::Ordering;
+    /// Returns true if and only if the given range overlaps with any region
+    /// of the underlying case folding table. That is, when true, there exists
+    /// at least one codepoint in the inclusive range `[start, end]` that has
+    /// a non-trivial equivalence class of case folded codepoints. Conversely,
+    /// when this returns false, all codepoints in the range `[start, end]`
+    /// correspond to the trivial equivalence class of case folded codepoints,
+    /// i.e., itself.
+    ///
+    /// This is useful to call before iterating over the codepoints in the
+    /// range and looking up the mapping for each. If you know none of the
+    /// mappings will return anything, then you might be able to skip doing it
+    /// altogether.
+    ///
+    /// # Panics
+    ///
+    /// This panics when `end < start`.
+    pub fn overlaps(&self, start: char, end: char) -> bool {
+        use core::cmp::Ordering;
 
         assert!(start <= end);
-        Ok(CASE_FOLDING_SIMPLE
+        self.table
             .binary_search_by(|&(c, _)| {
                 if start <= c && c <= end {
                     Ordering::Equal
@@ -144,10 +189,15 @@ pub fn contains_simple_case_mapping(
                     Ordering::Less
                 }
             })
-            .is_ok())
+            .is_ok()
     }
 
-    imp(start, end)
+    /// Returns the index at which `c` occurs in the simple case fold table. If
+    /// `c` does not occur, then this returns an `i` such that `table[i-1].0 <
+    /// c` and `table[i].0 > c`.
+    fn get(&self, c: char) -> Result<usize, usize> {
+        self.table.binary_search_by_key(&c, |&(c1, _)| c1)
+    }
 }
 
 /// A query for finding a character class defined by Unicode. This supports
@@ -185,7 +235,7 @@ pub enum ClassQuery<'a> {
 }
 
 impl<'a> ClassQuery<'a> {
-    fn canonicalize(&self) -> Result<CanonicalClassQuery> {
+    fn canonicalize(&self) -> Result<CanonicalClassQuery, Error> {
         match *self {
             ClassQuery::OneLetter(c) => self.canonical_binary(&c.to_string()),
             ClassQuery::Binary(name) => self.canonical_binary(name),
@@ -234,7 +284,10 @@ impl<'a> ClassQuery<'a> {
         }
     }
 
-    fn canonical_binary(&self, name: &str) -> Result<CanonicalClassQuery> {
+    fn canonical_binary(
+        &self,
+        name: &str,
+    ) -> Result<CanonicalClassQuery, Error> {
         let norm = symbolic_name_normalize(name);
 
         // This is a special case where 'cf' refers to the 'Format' general
@@ -243,7 +296,17 @@ impl<'a> ClassQuery<'a> {
         // a general category. (Currently, we don't even support the
         // 'Case_Folding' property. But if we do in the future, users will be
         // required to spell it out.)
-        if norm != "cf" {
+        //
+        // Also 'sc' refers to the 'Currency_Symbol' general category, but is
+        // also the abbreviation for the 'Script' property. So we avoid calling
+        // 'canonical_prop' for it too, which would erroneously normalize it
+        // to 'Script'.
+        //
+        // Another case: 'lc' is an abbreviation for the 'Cased_Letter'
+        // general category, but is also an abbreviation for the 'Lowercase_Mapping'
+        // property. We don't currently support the latter, so as with 'cf'
+        // above, we treat 'lc' as 'Cased_Letter'.
+        if norm != "cf" && norm != "sc" && norm != "lc" {
             if let Some(canon) = canonical_prop(&norm)? {
                 return Ok(CanonicalClassQuery::Binary(canon));
             }
@@ -285,7 +348,7 @@ enum CanonicalClassQuery {
 
 /// Looks up a Unicode class given a query. If one doesn't exist, then
 /// `None` is returned.
-pub fn class(query: ClassQuery<'_>) -> Result<hir::ClassUnicode> {
+pub fn class(query: ClassQuery<'_>) -> Result<hir::ClassUnicode, Error> {
     use self::CanonicalClassQuery::*;
 
     match query.canonicalize()? {
@@ -322,14 +385,14 @@ pub fn class(query: ClassQuery<'_>) -> Result<hir::ClassUnicode> {
 /// Returns a Unicode aware class for \w.
 ///
 /// This returns an error if the data is not available for \w.
-pub fn perl_word() -> Result<hir::ClassUnicode> {
+pub fn perl_word() -> Result<hir::ClassUnicode, Error> {
     #[cfg(not(feature = "unicode-perl"))]
-    fn imp() -> Result<hir::ClassUnicode> {
+    fn imp() -> Result<hir::ClassUnicode, Error> {
         Err(Error::PerlClassNotFound)
     }
 
     #[cfg(feature = "unicode-perl")]
-    fn imp() -> Result<hir::ClassUnicode> {
+    fn imp() -> Result<hir::ClassUnicode, Error> {
         use crate::unicode_tables::perl_word::PERL_WORD;
         Ok(hir_class(PERL_WORD))
     }
@@ -340,20 +403,20 @@ pub fn perl_word() -> Result<hir::ClassUnicode> {
 /// Returns a Unicode aware class for \s.
 ///
 /// This returns an error if the data is not available for \s.
-pub fn perl_space() -> Result<hir::ClassUnicode> {
+pub fn perl_space() -> Result<hir::ClassUnicode, Error> {
     #[cfg(not(any(feature = "unicode-perl", feature = "unicode-bool")))]
-    fn imp() -> Result<hir::ClassUnicode> {
+    fn imp() -> Result<hir::ClassUnicode, Error> {
         Err(Error::PerlClassNotFound)
     }
 
     #[cfg(all(feature = "unicode-perl", not(feature = "unicode-bool")))]
-    fn imp() -> Result<hir::ClassUnicode> {
+    fn imp() -> Result<hir::ClassUnicode, Error> {
         use crate::unicode_tables::perl_space::WHITE_SPACE;
         Ok(hir_class(WHITE_SPACE))
     }
 
     #[cfg(feature = "unicode-bool")]
-    fn imp() -> Result<hir::ClassUnicode> {
+    fn imp() -> Result<hir::ClassUnicode, Error> {
         use crate::unicode_tables::property_bool::WHITE_SPACE;
         Ok(hir_class(WHITE_SPACE))
     }
@@ -364,20 +427,20 @@ pub fn perl_space() -> Result<hir::ClassUnicode> {
 /// Returns a Unicode aware class for \d.
 ///
 /// This returns an error if the data is not available for \d.
-pub fn perl_digit() -> Result<hir::ClassUnicode> {
+pub fn perl_digit() -> Result<hir::ClassUnicode, Error> {
     #[cfg(not(any(feature = "unicode-perl", feature = "unicode-gencat")))]
-    fn imp() -> Result<hir::ClassUnicode> {
+    fn imp() -> Result<hir::ClassUnicode, Error> {
         Err(Error::PerlClassNotFound)
     }
 
     #[cfg(all(feature = "unicode-perl", not(feature = "unicode-gencat")))]
-    fn imp() -> Result<hir::ClassUnicode> {
+    fn imp() -> Result<hir::ClassUnicode, Error> {
         use crate::unicode_tables::perl_decimal::DECIMAL_NUMBER;
         Ok(hir_class(DECIMAL_NUMBER))
     }
 
     #[cfg(feature = "unicode-gencat")]
-    fn imp() -> Result<hir::ClassUnicode> {
+    fn imp() -> Result<hir::ClassUnicode, Error> {
         use crate::unicode_tables::general_category::DECIMAL_NUMBER;
         Ok(hir_class(DECIMAL_NUMBER))
     }
@@ -397,23 +460,24 @@ pub fn hir_class(ranges: &[(char, char)]) -> hir::ClassUnicode {
 /// Returns true only if the given codepoint is in the `\w` character class.
 ///
 /// If the `unicode-perl` feature is not enabled, then this returns an error.
-pub fn is_word_character(c: char) -> result::Result<bool, UnicodeWordError> {
+pub fn is_word_character(c: char) -> Result<bool, UnicodeWordError> {
     #[cfg(not(feature = "unicode-perl"))]
-    fn imp(_: char) -> result::Result<bool, UnicodeWordError> {
+    fn imp(_: char) -> Result<bool, UnicodeWordError> {
         Err(UnicodeWordError(()))
     }
 
     #[cfg(feature = "unicode-perl")]
-    fn imp(c: char) -> result::Result<bool, UnicodeWordError> {
-        use crate::is_word_byte;
-        use crate::unicode_tables::perl_word::PERL_WORD;
-        use std::cmp::Ordering;
+    fn imp(c: char) -> Result<bool, UnicodeWordError> {
+        use crate::{is_word_byte, unicode_tables::perl_word::PERL_WORD};
 
-        if c <= 0x7F as char && is_word_byte(c as u8) {
+        // MSRV(1.59): Use 'u8::try_from(c)' instead.
+        if u8::try_from(u32::from(c)).map_or(false, is_word_byte) {
             return Ok(true);
         }
         Ok(PERL_WORD
             .binary_search_by(|&(start, end)| {
+                use core::cmp::Ordering;
+
                 if start <= c && c <= end {
                     Ordering::Equal
                 } else if start > c {
@@ -435,7 +499,9 @@ pub fn is_word_character(c: char) -> result::Result<bool, UnicodeWordError> {
 /// value.
 type PropertyValues = &'static [(&'static str, &'static str)];
 
-fn canonical_gencat(normalized_value: &str) -> Result<Option<&'static str>> {
+fn canonical_gencat(
+    normalized_value: &str,
+) -> Result<Option<&'static str>, Error> {
     Ok(match normalized_value {
         "any" => Some("Any"),
         "assigned" => Some("Assigned"),
@@ -447,7 +513,9 @@ fn canonical_gencat(normalized_value: &str) -> Result<Option<&'static str>> {
     })
 }
 
-fn canonical_script(normalized_value: &str) -> Result<Option<&'static str>> {
+fn canonical_script(
+    normalized_value: &str,
+) -> Result<Option<&'static str>, Error> {
     let scripts = property_values("Script")?.unwrap();
     Ok(canonical_value(scripts, normalized_value))
 }
@@ -460,7 +528,9 @@ fn canonical_script(normalized_value: &str) -> Result<Option<&'static str>> {
 /// UAX44 LM3, which can be done using `symbolic_name_normalize`.
 ///
 /// If the property names data is not available, then an error is returned.
-fn canonical_prop(normalized_name: &str) -> Result<Option<&'static str>> {
+fn canonical_prop(
+    normalized_name: &str,
+) -> Result<Option<&'static str>, Error> {
     #[cfg(not(any(
         feature = "unicode-age",
         feature = "unicode-bool",
@@ -469,7 +539,7 @@ fn canonical_prop(normalized_name: &str) -> Result<Option<&'static str>> {
         feature = "unicode-script",
         feature = "unicode-segment",
     )))]
-    fn imp(_: &str) -> Result<Option<&'static str>> {
+    fn imp(_: &str) -> Result<Option<&'static str>, Error> {
         Err(Error::PropertyNotFound)
     }
 
@@ -481,7 +551,7 @@ fn canonical_prop(normalized_name: &str) -> Result<Option<&'static str>> {
         feature = "unicode-script",
         feature = "unicode-segment",
     ))]
-    fn imp(name: &str) -> Result<Option<&'static str>> {
+    fn imp(name: &str) -> Result<Option<&'static str>, Error> {
         use crate::unicode_tables::property_names::PROPERTY_NAMES;
 
         Ok(PROPERTY_NAMES
@@ -517,7 +587,7 @@ fn canonical_value(
 /// If the property values data is not available, then an error is returned.
 fn property_values(
     canonical_property_name: &'static str,
-) -> Result<Option<PropertyValues>> {
+) -> Result<Option<PropertyValues>, Error> {
     #[cfg(not(any(
         feature = "unicode-age",
         feature = "unicode-bool",
@@ -526,7 +596,7 @@ fn property_values(
         feature = "unicode-script",
         feature = "unicode-segment",
     )))]
-    fn imp(_: &'static str) -> Result<Option<PropertyValues>> {
+    fn imp(_: &'static str) -> Result<Option<PropertyValues>, Error> {
         Err(Error::PropertyValueNotFound)
     }
 
@@ -538,7 +608,7 @@ fn property_values(
         feature = "unicode-script",
         feature = "unicode-segment",
     ))]
-    fn imp(name: &'static str) -> Result<Option<PropertyValues>> {
+    fn imp(name: &'static str) -> Result<Option<PropertyValues>, Error> {
         use crate::unicode_tables::property_values::PROPERTY_VALUES;
 
         Ok(PROPERTY_VALUES
@@ -569,15 +639,15 @@ fn property_set(
 ///
 /// If the given age value isn't valid or if the data isn't available, then an
 /// error is returned instead.
-fn ages(canonical_age: &str) -> Result<impl Iterator<Item = Range>> {
+fn ages(canonical_age: &str) -> Result<impl Iterator<Item = Range>, Error> {
     #[cfg(not(feature = "unicode-age"))]
-    fn imp(_: &str) -> Result<impl Iterator<Item = Range>> {
-        use std::option::IntoIter;
+    fn imp(_: &str) -> Result<impl Iterator<Item = Range>, Error> {
+        use core::option::IntoIter;
         Err::<IntoIter<Range>, _>(Error::PropertyNotFound)
     }
 
     #[cfg(feature = "unicode-age")]
-    fn imp(canonical_age: &str) -> Result<impl Iterator<Item = Range>> {
+    fn imp(canonical_age: &str) -> Result<impl Iterator<Item = Range>, Error> {
         use crate::unicode_tables::age;
 
         const AGES: &[(&str, Range)] = &[
@@ -625,14 +695,14 @@ fn ages(canonical_age: &str) -> Result<impl Iterator<Item = Range>> {
 ///
 /// If the given general category could not be found, or if the general
 /// category data is not available, then an error is returned.
-fn gencat(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
+fn gencat(canonical_name: &'static str) -> Result<hir::ClassUnicode, Error> {
     #[cfg(not(feature = "unicode-gencat"))]
-    fn imp(_: &'static str) -> Result<hir::ClassUnicode> {
+    fn imp(_: &'static str) -> Result<hir::ClassUnicode, Error> {
         Err(Error::PropertyNotFound)
     }
 
     #[cfg(feature = "unicode-gencat")]
-    fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
+    fn imp(name: &'static str) -> Result<hir::ClassUnicode, Error> {
         use crate::unicode_tables::general_category::BY_NAME;
         match name {
             "ASCII" => Ok(hir_class(&[('\0', '\x7F')])),
@@ -660,14 +730,14 @@ fn gencat(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
 ///
 /// If the given script could not be found, or if the script data is not
 /// available, then an error is returned.
-fn script(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
+fn script(canonical_name: &'static str) -> Result<hir::ClassUnicode, Error> {
     #[cfg(not(feature = "unicode-script"))]
-    fn imp(_: &'static str) -> Result<hir::ClassUnicode> {
+    fn imp(_: &'static str) -> Result<hir::ClassUnicode, Error> {
         Err(Error::PropertyNotFound)
     }
 
     #[cfg(feature = "unicode-script")]
-    fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
+    fn imp(name: &'static str) -> Result<hir::ClassUnicode, Error> {
         use crate::unicode_tables::script::BY_NAME;
         property_set(BY_NAME, name)
             .map(hir_class)
@@ -685,14 +755,14 @@ fn script(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
 /// not available, then an error is returned.
 fn script_extension(
     canonical_name: &'static str,
-) -> Result<hir::ClassUnicode> {
+) -> Result<hir::ClassUnicode, Error> {
     #[cfg(not(feature = "unicode-script"))]
-    fn imp(_: &'static str) -> Result<hir::ClassUnicode> {
+    fn imp(_: &'static str) -> Result<hir::ClassUnicode, Error> {
         Err(Error::PropertyNotFound)
     }
 
     #[cfg(feature = "unicode-script")]
-    fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
+    fn imp(name: &'static str) -> Result<hir::ClassUnicode, Error> {
         use crate::unicode_tables::script_extension::BY_NAME;
         property_set(BY_NAME, name)
             .map(hir_class)
@@ -709,14 +779,16 @@ fn script_extension(
 ///
 /// If the given boolean property could not be found, or if the boolean
 /// property data is not available, then an error is returned.
-fn bool_property(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
+fn bool_property(
+    canonical_name: &'static str,
+) -> Result<hir::ClassUnicode, Error> {
     #[cfg(not(feature = "unicode-bool"))]
-    fn imp(_: &'static str) -> Result<hir::ClassUnicode> {
+    fn imp(_: &'static str) -> Result<hir::ClassUnicode, Error> {
         Err(Error::PropertyNotFound)
     }
 
     #[cfg(feature = "unicode-bool")]
-    fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
+    fn imp(name: &'static str) -> Result<hir::ClassUnicode, Error> {
         use crate::unicode_tables::property_bool::BY_NAME;
         property_set(BY_NAME, name)
             .map(hir_class)
@@ -737,14 +809,14 @@ fn bool_property(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
 ///
 /// If the given property could not be found, or if the corresponding data is
 /// not available, then an error is returned.
-fn gcb(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
+fn gcb(canonical_name: &'static str) -> Result<hir::ClassUnicode, Error> {
     #[cfg(not(feature = "unicode-segment"))]
-    fn imp(_: &'static str) -> Result<hir::ClassUnicode> {
+    fn imp(_: &'static str) -> Result<hir::ClassUnicode, Error> {
         Err(Error::PropertyNotFound)
     }
 
     #[cfg(feature = "unicode-segment")]
-    fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
+    fn imp(name: &'static str) -> Result<hir::ClassUnicode, Error> {
         use crate::unicode_tables::grapheme_cluster_break::BY_NAME;
         property_set(BY_NAME, name)
             .map(hir_class)
@@ -761,14 +833,14 @@ fn gcb(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
 ///
 /// If the given property could not be found, or if the corresponding data is
 /// not available, then an error is returned.
-fn wb(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
+fn wb(canonical_name: &'static str) -> Result<hir::ClassUnicode, Error> {
     #[cfg(not(feature = "unicode-segment"))]
-    fn imp(_: &'static str) -> Result<hir::ClassUnicode> {
+    fn imp(_: &'static str) -> Result<hir::ClassUnicode, Error> {
         Err(Error::PropertyNotFound)
     }
 
     #[cfg(feature = "unicode-segment")]
-    fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
+    fn imp(name: &'static str) -> Result<hir::ClassUnicode, Error> {
         use crate::unicode_tables::word_break::BY_NAME;
         property_set(BY_NAME, name)
             .map(hir_class)
@@ -785,14 +857,14 @@ fn wb(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
 ///
 /// If the given property could not be found, or if the corresponding data is
 /// not available, then an error is returned.
-fn sb(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
+fn sb(canonical_name: &'static str) -> Result<hir::ClassUnicode, Error> {
     #[cfg(not(feature = "unicode-segment"))]
-    fn imp(_: &'static str) -> Result<hir::ClassUnicode> {
+    fn imp(_: &'static str) -> Result<hir::ClassUnicode, Error> {
         Err(Error::PropertyNotFound)
     }
 
     #[cfg(feature = "unicode-segment")]
-    fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
+    fn imp(name: &'static str) -> Result<hir::ClassUnicode, Error> {
         use crate::unicode_tables::sentence_break::BY_NAME;
         property_set(BY_NAME, name)
             .map(hir_class)
@@ -873,72 +945,45 @@ fn symbolic_name_normalize_bytes(slice: &mut [u8]) -> &mut [u8] {
 
 #[cfg(test)]
 mod tests {
-    use super::{
-        contains_simple_case_mapping, simple_fold, symbolic_name_normalize,
-        symbolic_name_normalize_bytes,
-    };
+    use super::*;
 
     #[cfg(feature = "unicode-case")]
     fn simple_fold_ok(c: char) -> impl Iterator<Item = char> {
-        simple_fold(c).unwrap().unwrap()
-    }
-
-    #[cfg(feature = "unicode-case")]
-    fn simple_fold_err(c: char) -> Option<char> {
-        match simple_fold(c).unwrap() {
-            Ok(_) => unreachable!("simple_fold returned Ok iterator"),
-            Err(next) => next,
-        }
+        SimpleCaseFolder::new().unwrap().mapping(c).iter().copied()
     }
 
     #[cfg(feature = "unicode-case")]
     fn contains_case_map(start: char, end: char) -> bool {
-        contains_simple_case_mapping(start, end).unwrap()
+        SimpleCaseFolder::new().unwrap().overlaps(start, end)
     }
 
     #[test]
     #[cfg(feature = "unicode-case")]
     fn simple_fold_k() {
         let xs: Vec<char> = simple_fold_ok('k').collect();
-        assert_eq!(xs, vec!['K', 'K']);
+        assert_eq!(xs, alloc::vec!['K', 'K']);
 
         let xs: Vec<char> = simple_fold_ok('K').collect();
-        assert_eq!(xs, vec!['k', 'K']);
+        assert_eq!(xs, alloc::vec!['k', 'K']);
 
         let xs: Vec<char> = simple_fold_ok('K').collect();
-        assert_eq!(xs, vec!['K', 'k']);
+        assert_eq!(xs, alloc::vec!['K', 'k']);
     }
 
     #[test]
     #[cfg(feature = "unicode-case")]
     fn simple_fold_a() {
         let xs: Vec<char> = simple_fold_ok('a').collect();
-        assert_eq!(xs, vec!['A']);
+        assert_eq!(xs, alloc::vec!['A']);
 
         let xs: Vec<char> = simple_fold_ok('A').collect();
-        assert_eq!(xs, vec!['a']);
-    }
-
-    #[test]
-    #[cfg(feature = "unicode-case")]
-    fn simple_fold_empty() {
-        assert_eq!(Some('A'), simple_fold_err('?'));
-        assert_eq!(Some('A'), simple_fold_err('@'));
-        assert_eq!(Some('a'), simple_fold_err('['));
-        assert_eq!(Some('Ⰰ'), simple_fold_err('☃'));
-    }
-
-    #[test]
-    #[cfg(feature = "unicode-case")]
-    fn simple_fold_max() {
-        assert_eq!(None, simple_fold_err('\u{10FFFE}'));
-        assert_eq!(None, simple_fold_err('\u{10FFFF}'));
+        assert_eq!(xs, alloc::vec!['a']);
     }
 
     #[test]
     #[cfg(not(feature = "unicode-case"))]
     fn simple_fold_disabled() {
-        assert!(simple_fold('a').is_err());
+        assert!(SimpleCaseFolder::new().is_err());
     }
 
     #[test]
@@ -957,12 +1002,6 @@ mod tests {
         assert!(!contains_case_map('☃', '☃'));
     }
 
-    #[test]
-    #[cfg(not(feature = "unicode-case"))]
-    fn range_contains_disabled() {
-        assert!(contains_simple_case_mapping('a', 'a').is_err());
-    }
-
     #[test]
     #[cfg(feature = "unicode-gencat")]
     fn regression_466() {
diff --git a/regex-syntax/src/utf8.rs b/regex-syntax/src/utf8.rs
index b9c8655320..e13b55abf0 100644
--- a/regex-syntax/src/utf8.rs
+++ b/regex-syntax/src/utf8.rs
@@ -3,7 +3,7 @@ Converts ranges of Unicode scalar values to equivalent ranges of UTF-8 bytes.
 
 This is sub-module is useful for constructing byte based automatons that need
 to embed UTF-8 decoding. The most common use of this module is in conjunction
-with the [`hir::ClassUnicodeRange`](../hir/struct.ClassUnicodeRange.html) type.
+with the [`hir::ClassUnicodeRange`](crate::hir::ClassUnicodeRange) type.
 
 See the documentation on the `Utf8Sequences` iterator for more details and
 an example.
@@ -80,12 +80,9 @@ I also got the idea from
 which uses it for executing automata on their term index.
 */
 
-#![deny(missing_docs)]
+use core::{char, fmt, iter::FusedIterator, slice};
 
-use std::char;
-use std::fmt;
-use std::iter::FusedIterator;
-use std::slice;
+use alloc::{vec, vec::Vec};
 
 const MAX_UTF8_BYTES: usize = 4;
 
@@ -306,7 +303,7 @@ impl Utf8Sequences {
     /// given.
     pub fn new(start: char, end: char) -> Self {
         let mut it = Utf8Sequences { range_stack: vec![] };
-        it.push(start as u32, end as u32);
+        it.push(u32::from(start), u32::from(end));
         it
     }
 
@@ -317,7 +314,7 @@ impl Utf8Sequences {
     #[doc(hidden)]
     pub fn reset(&mut self, start: char, end: char) {
         self.range_stack.clear();
-        self.push(start as u32, end as u32);
+        self.push(u32::from(start), u32::from(end));
     }
 
     fn push(&mut self, start: u32, end: u32) {
@@ -416,7 +413,9 @@ impl ScalarRange {
     /// values in this range can be encoded as a single byte.
     fn as_ascii(&self) -> Option<Utf8Range> {
         if self.is_ascii() {
-            Some(Utf8Range::new(self.start as u8, self.end as u8))
+            let start = u8::try_from(self.start).unwrap();
+            let end = u8::try_from(self.end).unwrap();
+            Some(Utf8Range::new(start, end))
         } else {
             None
         }
@@ -455,7 +454,9 @@ fn max_scalar_value(nbytes: usize) -> u32 {
 
 #[cfg(test)]
 mod tests {
-    use std::char;
+    use core::char;
+
+    use alloc::{vec, vec::Vec};
 
     use crate::utf8::{Utf8Range, Utf8Sequences};
 
@@ -472,7 +473,11 @@ mod tests {
                         "Sequence ({:X}, {:X}) contains range {:?}, \
                          which matches surrogate code point {:X} \
                          with encoded bytes {:?}",
-                        start as u32, end as u32, r, cp, buf,
+                        u32::from(start),
+                        u32::from(end),
+                        r,
+                        cp,
+                        buf,
                     );
                 }
             }
@@ -579,9 +584,9 @@ mod tests {
 
         assert!(0xD800 <= cp && cp < 0xE000);
         let mut dst = [0; 3];
-        dst[0] = (cp >> 12 & 0x0F) as u8 | TAG_THREE_B;
-        dst[1] = (cp >> 6 & 0x3F) as u8 | TAG_CONT;
-        dst[2] = (cp & 0x3F) as u8 | TAG_CONT;
+        dst[0] = u8::try_from(cp >> 12 & 0x0F).unwrap() | TAG_THREE_B;
+        dst[1] = u8::try_from(cp >> 6 & 0x3F).unwrap() | TAG_CONT;
+        dst[2] = u8::try_from(cp & 0x3F).unwrap() | TAG_CONT;
         dst
     }
 }
diff --git a/regex-syntax/test b/regex-syntax/test
index 4b1b9fb1a9..a4d6cfaba5 100755
--- a/regex-syntax/test
+++ b/regex-syntax/test
@@ -7,6 +7,7 @@ echo "===== DEFAULT FEATURES ==="
 cargo test
 
 features=(
+    std
     unicode
     unicode-age
     unicode-bool
@@ -17,6 +18,9 @@ features=(
     unicode-segment
 )
 for f in "${features[@]}"; do
-    echo "===== FEATURE: $f ==="
-    cargo test --no-default-features --features "$f"
+    echo "=== FEATURE: $f ==="
+    # We only run library tests because I couldn't figure out how to easily
+    # make doc tests run in 'no_std' mode. In particular, without the Error
+    # trait, using '?' in doc tests seems tricky.
+    cargo test --no-default-features --lib --features "$f"
 done
diff --git a/src/compile.rs b/src/compile.rs
index 90ca25015f..0030cfb108 100644
--- a/src/compile.rs
+++ b/src/compile.rs
@@ -4,7 +4,7 @@ use std::iter;
 use std::result;
 use std::sync::Arc;
 
-use regex_syntax::hir::{self, Hir};
+use regex_syntax::hir::{self, Hir, Look};
 use regex_syntax::is_word_byte;
 use regex_syntax::utf8::{Utf8Range, Utf8Sequence, Utf8Sequences};
 
@@ -142,8 +142,10 @@ impl Compiler {
         // Other matching engines handle this by baking the logic into the
         // matching engine itself.
         let mut dotstar_patch = Patch { hole: Hole::None, entry: 0 };
-        self.compiled.is_anchored_start = expr.is_anchored_start();
-        self.compiled.is_anchored_end = expr.is_anchored_end();
+        self.compiled.is_anchored_start =
+            expr.properties().look_set_prefix().contains(Look::Start);
+        self.compiled.is_anchored_end =
+            expr.properties().look_set_suffix().contains(Look::End);
         if self.compiled.needs_dotstar() {
             dotstar_patch = self.c_dotstar()?;
             self.compiled.start = dotstar_patch.entry;
@@ -159,6 +161,8 @@ impl Compiler {
         self.fill_to_next(patch.hole);
         self.compiled.matches = vec![self.insts.len()];
         self.push_compiled(Inst::Match(0));
+        self.compiled.static_captures_len =
+            expr.properties().static_explicit_captures_len();
         self.compile_finish()
     }
 
@@ -168,10 +172,12 @@ impl Compiler {
     ) -> result::Result<Program, Error> {
         debug_assert!(exprs.len() > 1);
 
-        self.compiled.is_anchored_start =
-            exprs.iter().all(|e| e.is_anchored_start());
-        self.compiled.is_anchored_end =
-            exprs.iter().all(|e| e.is_anchored_end());
+        self.compiled.is_anchored_start = exprs
+            .iter()
+            .all(|e| e.properties().look_set_prefix().contains(Look::Start));
+        self.compiled.is_anchored_end = exprs
+            .iter()
+            .all(|e| e.properties().look_set_suffix().contains(Look::End));
         let mut dotstar_patch = Patch { hole: Hole::None, entry: 0 };
         if self.compiled.needs_dotstar() {
             dotstar_patch = self.c_dotstar()?;
@@ -272,17 +278,21 @@ impl Compiler {
         self.check_size()?;
         match *expr.kind() {
             Empty => self.c_empty(),
-            Literal(hir::Literal::Unicode(c)) => self.c_char(c),
-            Literal(hir::Literal::Byte(b)) => {
-                assert!(self.compiled.uses_bytes());
-                self.c_byte(b)
+            Literal(hir::Literal(ref bytes)) => {
+                if self.compiled.is_reverse {
+                    let mut bytes = bytes.to_vec();
+                    bytes.reverse();
+                    self.c_literal(&bytes)
+                } else {
+                    self.c_literal(bytes)
+                }
             }
             Class(hir::Class::Unicode(ref cls)) => self.c_class(cls.ranges()),
             Class(hir::Class::Bytes(ref cls)) => {
                 if self.compiled.uses_bytes() {
                     self.c_class_bytes(cls.ranges())
                 } else {
-                    assert!(cls.is_all_ascii());
+                    assert!(cls.is_ascii());
                     let mut char_ranges = vec![];
                     for r in cls.iter() {
                         let (s, e) = (r.start() as char, r.end() as char);
@@ -291,92 +301,94 @@ impl Compiler {
                     self.c_class(&char_ranges)
                 }
             }
-            Anchor(hir::Anchor::StartLine) if self.compiled.is_reverse => {
-                self.byte_classes.set_range(b'\n', b'\n');
-                self.c_empty_look(prog::EmptyLook::EndLine)
-            }
-            Anchor(hir::Anchor::StartLine) => {
-                self.byte_classes.set_range(b'\n', b'\n');
-                self.c_empty_look(prog::EmptyLook::StartLine)
-            }
-            Anchor(hir::Anchor::EndLine) if self.compiled.is_reverse => {
-                self.byte_classes.set_range(b'\n', b'\n');
-                self.c_empty_look(prog::EmptyLook::StartLine)
-            }
-            Anchor(hir::Anchor::EndLine) => {
-                self.byte_classes.set_range(b'\n', b'\n');
-                self.c_empty_look(prog::EmptyLook::EndLine)
-            }
-            Anchor(hir::Anchor::StartText) if self.compiled.is_reverse => {
-                self.c_empty_look(prog::EmptyLook::EndText)
-            }
-            Anchor(hir::Anchor::StartText) => {
-                self.c_empty_look(prog::EmptyLook::StartText)
-            }
-            Anchor(hir::Anchor::EndText) if self.compiled.is_reverse => {
-                self.c_empty_look(prog::EmptyLook::StartText)
-            }
-            Anchor(hir::Anchor::EndText) => {
-                self.c_empty_look(prog::EmptyLook::EndText)
-            }
-            WordBoundary(hir::WordBoundary::Unicode) => {
-                if !cfg!(feature = "unicode-perl") {
-                    return Err(Error::Syntax(
-                        "Unicode word boundaries are unavailable when \
-                         the unicode-perl feature is disabled"
-                            .to_string(),
-                    ));
+            Look(ref look) => match *look {
+                hir::Look::Start if self.compiled.is_reverse => {
+                    self.c_empty_look(prog::EmptyLook::EndText)
                 }
-                self.compiled.has_unicode_word_boundary = true;
-                self.byte_classes.set_word_boundary();
-                // We also make sure that all ASCII bytes are in a different
-                // class from non-ASCII bytes. Otherwise, it's possible for
-                // ASCII bytes to get lumped into the same class as non-ASCII
-                // bytes. This in turn may cause the lazy DFA to falsely start
-                // when it sees an ASCII byte that maps to a byte class with
-                // non-ASCII bytes. This ensures that never happens.
-                self.byte_classes.set_range(0, 0x7F);
-                self.c_empty_look(prog::EmptyLook::WordBoundary)
-            }
-            WordBoundary(hir::WordBoundary::UnicodeNegate) => {
-                if !cfg!(feature = "unicode-perl") {
+                hir::Look::Start => {
+                    self.c_empty_look(prog::EmptyLook::StartText)
+                }
+                hir::Look::End if self.compiled.is_reverse => {
+                    self.c_empty_look(prog::EmptyLook::StartText)
+                }
+                hir::Look::End => self.c_empty_look(prog::EmptyLook::EndText),
+                hir::Look::StartLF if self.compiled.is_reverse => {
+                    self.byte_classes.set_range(b'\n', b'\n');
+                    self.c_empty_look(prog::EmptyLook::EndLine)
+                }
+                hir::Look::StartLF => {
+                    self.byte_classes.set_range(b'\n', b'\n');
+                    self.c_empty_look(prog::EmptyLook::StartLine)
+                }
+                hir::Look::EndLF if self.compiled.is_reverse => {
+                    self.byte_classes.set_range(b'\n', b'\n');
+                    self.c_empty_look(prog::EmptyLook::StartLine)
+                }
+                hir::Look::EndLF => {
+                    self.byte_classes.set_range(b'\n', b'\n');
+                    self.c_empty_look(prog::EmptyLook::EndLine)
+                }
+                hir::Look::StartCRLF | hir::Look::EndCRLF => {
                     return Err(Error::Syntax(
-                        "Unicode word boundaries are unavailable when \
-                         the unicode-perl feature is disabled"
+                        "CRLF-aware line anchors are not supported yet"
                             .to_string(),
                     ));
                 }
-                self.compiled.has_unicode_word_boundary = true;
-                self.byte_classes.set_word_boundary();
-                // See comments above for why we set the ASCII range here.
-                self.byte_classes.set_range(0, 0x7F);
-                self.c_empty_look(prog::EmptyLook::NotWordBoundary)
-            }
-            WordBoundary(hir::WordBoundary::Ascii) => {
-                self.byte_classes.set_word_boundary();
-                self.c_empty_look(prog::EmptyLook::WordBoundaryAscii)
-            }
-            WordBoundary(hir::WordBoundary::AsciiNegate) => {
-                self.byte_classes.set_word_boundary();
-                self.c_empty_look(prog::EmptyLook::NotWordBoundaryAscii)
-            }
-            Group(ref g) => match g.kind {
-                hir::GroupKind::NonCapturing => self.c(&g.hir),
-                hir::GroupKind::CaptureIndex(index) => {
-                    if index as usize >= self.compiled.captures.len() {
-                        self.compiled.captures.push(None);
+                hir::Look::WordAscii => {
+                    self.byte_classes.set_word_boundary();
+                    self.c_empty_look(prog::EmptyLook::WordBoundaryAscii)
+                }
+                hir::Look::WordAsciiNegate => {
+                    self.byte_classes.set_word_boundary();
+                    self.c_empty_look(prog::EmptyLook::NotWordBoundaryAscii)
+                }
+                hir::Look::WordUnicode => {
+                    if !cfg!(feature = "unicode-perl") {
+                        return Err(Error::Syntax(
+                            "Unicode word boundaries are unavailable when \
+                         the unicode-perl feature is disabled"
+                                .to_string(),
+                        ));
                     }
-                    self.c_capture(2 * index as usize, &g.hir)
+                    self.compiled.has_unicode_word_boundary = true;
+                    self.byte_classes.set_word_boundary();
+                    // We also make sure that all ASCII bytes are in a different
+                    // class from non-ASCII bytes. Otherwise, it's possible for
+                    // ASCII bytes to get lumped into the same class as non-ASCII
+                    // bytes. This in turn may cause the lazy DFA to falsely start
+                    // when it sees an ASCII byte that maps to a byte class with
+                    // non-ASCII bytes. This ensures that never happens.
+                    self.byte_classes.set_range(0, 0x7F);
+                    self.c_empty_look(prog::EmptyLook::WordBoundary)
                 }
-                hir::GroupKind::CaptureName { index, ref name } => {
-                    if index as usize >= self.compiled.captures.len() {
-                        let n = name.to_string();
-                        self.compiled.captures.push(Some(n.clone()));
-                        self.capture_name_idx.insert(n, index as usize);
+                hir::Look::WordUnicodeNegate => {
+                    if !cfg!(feature = "unicode-perl") {
+                        return Err(Error::Syntax(
+                            "Unicode word boundaries are unavailable when \
+                         the unicode-perl feature is disabled"
+                                .to_string(),
+                        ));
                     }
-                    self.c_capture(2 * index as usize, &g.hir)
+                    self.compiled.has_unicode_word_boundary = true;
+                    self.byte_classes.set_word_boundary();
+                    // See comments above for why we set the ASCII range here.
+                    self.byte_classes.set_range(0, 0x7F);
+                    self.c_empty_look(prog::EmptyLook::NotWordBoundary)
                 }
             },
+            Capture(hir::Capture { index, ref name, ref sub }) => {
+                if index as usize >= self.compiled.captures.len() {
+                    let name = match *name {
+                        None => None,
+                        Some(ref boxed_str) => Some(boxed_str.to_string()),
+                    };
+                    self.compiled.captures.push(name.clone());
+                    if let Some(name) = name {
+                        self.capture_name_idx.insert(name, index as usize);
+                    }
+                }
+                self.c_capture(2 * index as usize, sub)
+            }
             Concat(ref es) => {
                 if self.compiled.is_reverse {
                     self.c_concat(es.iter().rev())
@@ -420,21 +432,19 @@ impl Compiler {
     }
 
     fn c_dotstar(&mut self) -> Result {
-        Ok(if !self.compiled.only_utf8() {
-            self.c(&Hir::repetition(hir::Repetition {
-                kind: hir::RepetitionKind::ZeroOrMore,
-                greedy: false,
-                hir: Box::new(Hir::any(true)),
-            }))?
-            .unwrap()
+        let hir = if self.compiled.only_utf8() {
+            Hir::dot(hir::Dot::AnyChar)
         } else {
-            self.c(&Hir::repetition(hir::Repetition {
-                kind: hir::RepetitionKind::ZeroOrMore,
+            Hir::dot(hir::Dot::AnyByte)
+        };
+        Ok(self
+            .c(&Hir::repetition(hir::Repetition {
+                min: 0,
+                max: None,
                 greedy: false,
-                hir: Box::new(Hir::any(false)),
+                sub: Box::new(hir),
             }))?
-            .unwrap()
-        })
+            .unwrap())
     }
 
     fn c_char(&mut self, c: char) -> ResultOrEmpty {
@@ -457,7 +467,11 @@ impl Compiler {
     fn c_class(&mut self, ranges: &[hir::ClassUnicodeRange]) -> ResultOrEmpty {
         use std::mem::size_of;
 
-        assert!(!ranges.is_empty());
+        if ranges.is_empty() {
+            return Err(Error::Syntax(
+                "empty character classes are not allowed".to_string(),
+            ));
+        }
         if self.compiled.uses_bytes() {
             Ok(Some(CompileClass { c: self, ranges }.compile()?))
         } else {
@@ -482,7 +496,11 @@ impl Compiler {
         &mut self,
         ranges: &[hir::ClassBytesRange],
     ) -> ResultOrEmpty {
-        debug_assert!(!ranges.is_empty());
+        if ranges.is_empty() {
+            return Err(Error::Syntax(
+                "empty character classes are not allowed".to_string(),
+            ));
+        }
 
         let first_split_entry = self.insts.len();
         let mut holes = vec![];
@@ -513,6 +531,52 @@ impl Compiler {
         Ok(Some(Patch { hole, entry: self.insts.len() - 1 }))
     }
 
+    fn c_literal(&mut self, bytes: &[u8]) -> ResultOrEmpty {
+        match core::str::from_utf8(bytes) {
+            Ok(string) => {
+                let mut it = string.chars();
+                let Patch { mut hole, entry } = loop {
+                    match it.next() {
+                        None => return self.c_empty(),
+                        Some(ch) => {
+                            if let Some(p) = self.c_char(ch)? {
+                                break p;
+                            }
+                        }
+                    }
+                };
+                for ch in it {
+                    if let Some(p) = self.c_char(ch)? {
+                        self.fill(hole, p.entry);
+                        hole = p.hole;
+                    }
+                }
+                Ok(Some(Patch { hole, entry }))
+            }
+            Err(_) => {
+                assert!(self.compiled.uses_bytes());
+                let mut it = bytes.iter().copied();
+                let Patch { mut hole, entry } = loop {
+                    match it.next() {
+                        None => return self.c_empty(),
+                        Some(byte) => {
+                            if let Some(p) = self.c_byte(byte)? {
+                                break p;
+                            }
+                        }
+                    }
+                };
+                for byte in it {
+                    if let Some(p) = self.c_byte(byte)? {
+                        self.fill(hole, p.entry);
+                        hole = p.hole;
+                    }
+                }
+                Ok(Some(Patch { hole, entry }))
+            }
+        }
+    }
+
     fn c_concat<'a, I>(&mut self, exprs: I) -> ResultOrEmpty
     where
         I: IntoIterator<Item = &'a Hir>,
@@ -587,19 +651,15 @@ impl Compiler {
     }
 
     fn c_repeat(&mut self, rep: &hir::Repetition) -> ResultOrEmpty {
-        use regex_syntax::hir::RepetitionKind::*;
-        match rep.kind {
-            ZeroOrOne => self.c_repeat_zero_or_one(&rep.hir, rep.greedy),
-            ZeroOrMore => self.c_repeat_zero_or_more(&rep.hir, rep.greedy),
-            OneOrMore => self.c_repeat_one_or_more(&rep.hir, rep.greedy),
-            Range(hir::RepetitionRange::Exactly(min_max)) => {
-                self.c_repeat_range(&rep.hir, rep.greedy, min_max, min_max)
-            }
-            Range(hir::RepetitionRange::AtLeast(min)) => {
-                self.c_repeat_range_min_or_more(&rep.hir, rep.greedy, min)
+        match (rep.min, rep.max) {
+            (0, Some(1)) => self.c_repeat_zero_or_one(&rep.sub, rep.greedy),
+            (0, None) => self.c_repeat_zero_or_more(&rep.sub, rep.greedy),
+            (1, None) => self.c_repeat_one_or_more(&rep.sub, rep.greedy),
+            (min, None) => {
+                self.c_repeat_range_min_or_more(&rep.sub, rep.greedy, min)
             }
-            Range(hir::RepetitionRange::Bounded(min, max)) => {
-                self.c_repeat_range(&rep.hir, rep.greedy, min, max)
+            (min, Some(max)) => {
+                self.c_repeat_range(&rep.sub, rep.greedy, min, max)
             }
         }
     }
diff --git a/src/dfa.rs b/src/dfa.rs
index dc9952120e..78ed71021e 100644
--- a/src/dfa.rs
+++ b/src/dfa.rs
@@ -1576,7 +1576,7 @@ impl<'a> Fsm<'a> {
     /// inputs, a new state could be created for every byte of input. (This is
     /// bad for memory use, so we bound it with a cache.)
     fn approximate_size(&self) -> usize {
-        self.cache.size + self.prog.approximate_size()
+        self.cache.size
     }
 }
 
diff --git a/src/error.rs b/src/error.rs
index 3e0ec75210..6c341f604b 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -6,8 +6,26 @@ use std::iter::repeat;
 pub enum Error {
     /// A syntax error.
     Syntax(String),
-    /// The compiled program exceeded the set size limit.
-    /// The argument is the size limit imposed.
+    /// The compiled program exceeded the set size
+    /// limit. The argument is the size limit imposed by
+    /// [`RegexBuilder::size_limit`](crate::RegexBuilder::size_limit). Even
+    /// when not configured explicitly, it defaults to a reasonable limit.
+    ///
+    /// If you're getting this error, it occurred because your regex has been
+    /// compiled to an intermediate state that is too big. It is important to
+    /// note that exceeding this limit does _not_ mean the regex is too big to
+    /// _work_, but rather, the regex is big enough that it may wind up being
+    /// surprisingly slow when used in a search. In other words, this error is
+    /// meant to be a practical heuristic for avoiding a performance footgun,
+    /// and especially so for the case where the regex pattern is coming from
+    /// an untrusted source.
+    ///
+    /// There are generally two ways to move forward if you hit this error.
+    /// The first is to find some way to use a smaller regex. The second is to
+    /// increase the size limit via `RegexBuilder::size_limit`. However, if
+    /// your regex pattern is not from a trusted source, then neither of these
+    /// approaches may be appropriate. Instead, you'll have to determine just
+    /// how big of a regex you want to allow.
     CompiledTooBig(usize),
     /// Hints that destructuring should not be exhaustive.
     ///
diff --git a/src/exec.rs b/src/exec.rs
index b9abcdc040..778a39d4c3 100644
--- a/src/exec.rs
+++ b/src/exec.rs
@@ -4,9 +4,9 @@ use std::panic::AssertUnwindSafe;
 use std::sync::Arc;
 
 #[cfg(feature = "perf-literal")]
-use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
-use regex_syntax::hir::literal::Literals;
-use regex_syntax::hir::Hir;
+use aho_corasick::{AhoCorasick, MatchKind};
+use regex_syntax::hir::literal;
+use regex_syntax::hir::{Hir, Look};
 use regex_syntax::ParserBuilder;
 
 use crate::backtrack;
@@ -78,15 +78,18 @@ struct ExecReadOnly {
     /// not supported.) Note that this program contains an embedded `.*?`
     /// preceding the first capture group, unless the regex is anchored at the
     /// beginning.
+    #[allow(dead_code)]
     dfa: Program,
     /// The same as above, except the program is reversed (and there is no
     /// preceding `.*?`). This is used by the DFA to find the starting location
     /// of matches.
+    #[allow(dead_code)]
     dfa_reverse: Program,
     /// A set of suffix literals extracted from the regex.
     ///
     /// Prefix literals are stored on the `Program`, since they are used inside
     /// the matching engines.
+    #[allow(dead_code)]
     suffixes: LiteralSearcher,
     /// An Aho-Corasick automaton with leftmost-first match semantics.
     ///
@@ -98,7 +101,7 @@ struct ExecReadOnly {
     /// if we were to exhaust the ID space, we probably would have long
     /// surpassed the compilation size limit.
     #[cfg(feature = "perf-literal")]
-    ac: Option<AhoCorasick<u32>>,
+    ac: Option<AhoCorasick>,
     /// match_type encodes as much upfront knowledge about how we're going to
     /// execute a search as possible.
     match_type: MatchType,
@@ -121,8 +124,8 @@ pub struct ExecBuilder {
 /// literals.
 struct Parsed {
     exprs: Vec<Hir>,
-    prefixes: Literals,
-    suffixes: Literals,
+    prefixes: literal::Seq,
+    suffixes: literal::Seq,
     bytes: bool,
 }
 
@@ -228,8 +231,8 @@ impl ExecBuilder {
     /// Parse the current set of patterns into their AST and extract literals.
     fn parse(&self) -> Result<Parsed, Error> {
         let mut exprs = Vec::with_capacity(self.options.pats.len());
-        let mut prefixes = Some(Literals::empty());
-        let mut suffixes = Some(Literals::empty());
+        let mut prefixes = Some(literal::Seq::empty());
+        let mut suffixes = Some(literal::Seq::empty());
         let mut bytes = false;
         let is_set = self.options.pats.len() > 1;
         // If we're compiling a regex set and that set has any anchored
@@ -243,54 +246,102 @@ impl ExecBuilder {
                 .swap_greed(self.options.swap_greed)
                 .ignore_whitespace(self.options.ignore_whitespace)
                 .unicode(self.options.unicode)
-                .allow_invalid_utf8(!self.only_utf8)
+                .utf8(self.only_utf8)
                 .nest_limit(self.options.nest_limit)
                 .build();
             let expr =
                 parser.parse(pat).map_err(|e| Error::Syntax(e.to_string()))?;
-            bytes = bytes || !expr.is_always_utf8();
+            let props = expr.properties();
+            // This used to just check whether the HIR matched valid UTF-8
+            // or not, but in regex-syntax 0.7, we changed our definition of
+            // "matches valid UTF-8" to exclude zero-width matches. And in
+            // particular, previously, we considered WordAsciiNegate (that
+            // is '(?-u:\B)') to be capable of matching invalid UTF-8. Our
+            // matcher engines were built under this assumption and fixing
+            // them is not worth it with the imminent plan to switch over to
+            // regex-automata. So for now, we retain the previous behavior by
+            // just explicitly treating the presence of a negated ASCII word
+            // boundary as forcing use to use a byte oriented automaton.
+            bytes = bytes
+                || !props.is_utf8()
+                || props.look_set().contains(Look::WordAsciiNegate);
 
             if cfg!(feature = "perf-literal") {
-                if !expr.is_anchored_start() && expr.is_any_anchored_start() {
+                if !props.look_set_prefix().contains(Look::Start)
+                    && props.look_set().contains(Look::Start)
+                {
                     // Partial anchors unfortunately make it hard to use
                     // prefixes, so disable them.
                     prefixes = None;
-                } else if is_set && expr.is_anchored_start() {
+                } else if is_set
+                    && props.look_set_prefix().contains(Look::Start)
+                {
                     // Regex sets with anchors do not go well with literal
                     // optimizations.
                     prefixes = None;
+                } else if props.look_set_prefix().contains_word() {
+                    // The new literal extractor ignores look-around while
+                    // the old one refused to extract prefixes from regexes
+                    // that began with a \b. These old creaky regex internals
+                    // can't deal with it, so we drop it.
+                    prefixes = None;
+                } else if props.look_set().contains(Look::StartLF) {
+                    // Similar to the reasoning for word boundaries, this old
+                    // regex engine can't handle literal prefixes with '(?m:^)'
+                    // at the beginning of a regex.
+                    prefixes = None;
                 }
-                prefixes = prefixes.and_then(|mut prefixes| {
-                    if !prefixes.union_prefixes(&expr) {
-                        None
-                    } else {
-                        Some(prefixes)
-                    }
-                });
 
-                if !expr.is_anchored_end() && expr.is_any_anchored_end() {
+                if !props.look_set_suffix().contains(Look::End)
+                    && props.look_set().contains(Look::End)
+                {
                     // Partial anchors unfortunately make it hard to use
                     // suffixes, so disable them.
                     suffixes = None;
-                } else if is_set && expr.is_anchored_end() {
+                } else if is_set && props.look_set_suffix().contains(Look::End)
+                {
                     // Regex sets with anchors do not go well with literal
                     // optimizations.
                     suffixes = None;
+                } else if props.look_set_suffix().contains_word() {
+                    // See the prefix case for reasoning here.
+                    suffixes = None;
+                } else if props.look_set().contains(Look::EndLF) {
+                    // See the prefix case for reasoning here.
+                    suffixes = None;
                 }
-                suffixes = suffixes.and_then(|mut suffixes| {
-                    if !suffixes.union_suffixes(&expr) {
-                        None
+
+                let (mut pres, mut suffs) =
+                    if prefixes.is_none() && suffixes.is_none() {
+                        (literal::Seq::infinite(), literal::Seq::infinite())
                     } else {
-                        Some(suffixes)
-                    }
+                        literal_analysis(&expr)
+                    };
+                // These old creaky regex internals can't handle cases where
+                // the literal sequences are exact but there are look-around
+                // assertions. So we make sure the sequences are inexact if
+                // there are look-around assertions anywhere. This forces the
+                // regex engines to run instead of assuming that a literal
+                // match implies an overall match.
+                if !props.look_set().is_empty() {
+                    pres.make_inexact();
+                    suffs.make_inexact();
+                }
+                prefixes = prefixes.and_then(|mut prefixes| {
+                    prefixes.union(&mut pres);
+                    Some(prefixes)
+                });
+                suffixes = suffixes.and_then(|mut suffixes| {
+                    suffixes.union(&mut suffs);
+                    Some(suffixes)
                 });
             }
             exprs.push(expr);
         }
         Ok(Parsed {
             exprs,
-            prefixes: prefixes.unwrap_or_else(Literals::empty),
-            suffixes: suffixes.unwrap_or_else(Literals::empty),
+            prefixes: prefixes.unwrap_or_else(literal::Seq::empty),
+            suffixes: suffixes.unwrap_or_else(literal::Seq::empty),
             bytes,
         })
     }
@@ -356,7 +407,7 @@ impl ExecBuilder {
     }
 
     #[cfg(feature = "perf-literal")]
-    fn build_aho_corasick(&self, parsed: &Parsed) -> Option<AhoCorasick<u32>> {
+    fn build_aho_corasick(&self, parsed: &Parsed) -> Option<AhoCorasick> {
         if parsed.exprs.len() != 1 {
             return None;
         }
@@ -370,10 +421,9 @@ impl ExecBuilder {
             return None;
         }
         Some(
-            AhoCorasickBuilder::new()
+            AhoCorasick::builder()
                 .match_kind(MatchKind::LeftmostFirst)
-                .auto_configure(&lits)
-                .build_with_size::<u32, _, _>(&lits)
+                .build(&lits)
                 // This should never happen because we'd long exceed the
                 // compilation limit for regexes first.
                 .expect("AC automaton too big"),
@@ -1311,6 +1361,12 @@ impl Exec {
     pub fn capture_name_idx(&self) -> &Arc<HashMap<String, usize>> {
         &self.ro.nfa.capture_name_idx
     }
+
+    /// If the number of capture groups in every match is always the same, then
+    /// return that number. Otherwise return `None`.
+    pub fn static_captures_len(&self) -> Option<usize> {
+        self.ro.nfa.static_captures_len
+    }
 }
 
 impl Clone for Exec {
@@ -1557,7 +1613,7 @@ fn alternation_literals(expr: &Hir) -> Option<Vec<Vec<u8>>> {
     // optimization pipeline, because this is a terribly inflexible way to go
     // about things.
 
-    if !expr.is_alternation_literal() {
+    if !expr.properties().is_alternation_literal() {
         return None;
     }
     let alts = match *expr.kind() {
@@ -1565,25 +1621,19 @@ fn alternation_literals(expr: &Hir) -> Option<Vec<Vec<u8>>> {
         _ => return None, // one literal isn't worth it
     };
 
-    let extendlit = |lit: &Literal, dst: &mut Vec<u8>| match *lit {
-        Literal::Unicode(c) => {
-            let mut buf = [0; 4];
-            dst.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
-        }
-        Literal::Byte(b) => {
-            dst.push(b);
-        }
-    };
-
     let mut lits = vec![];
     for alt in alts {
         let mut lit = vec![];
         match *alt.kind() {
-            HirKind::Literal(ref x) => extendlit(x, &mut lit),
+            HirKind::Literal(Literal(ref bytes)) => {
+                lit.extend_from_slice(bytes)
+            }
             HirKind::Concat(ref exprs) => {
                 for e in exprs {
                     match *e.kind() {
-                        HirKind::Literal(ref x) => extendlit(x, &mut lit),
+                        HirKind::Literal(Literal(ref bytes)) => {
+                            lit.extend_from_slice(bytes);
+                        }
                         _ => unreachable!("expected literal, got {:?}", e),
                     }
                 }
@@ -1595,6 +1645,48 @@ fn alternation_literals(expr: &Hir) -> Option<Vec<Vec<u8>>> {
     Some(lits)
 }
 
+#[cfg(not(feature = "perf-literal"))]
+fn literal_analysis(_: &Hir) -> (literal::Seq, literal::Seq) {
+    (literal::Seq::infinite(), literal::Seq::infinite())
+}
+
+#[cfg(feature = "perf-literal")]
+fn literal_analysis(expr: &Hir) -> (literal::Seq, literal::Seq) {
+    const ATTEMPTS: [(usize, usize); 3] = [(5, 50), (4, 30), (3, 20)];
+
+    let mut prefixes = literal::Extractor::new()
+        .kind(literal::ExtractKind::Prefix)
+        .extract(expr);
+    for (keep, limit) in ATTEMPTS {
+        let len = match prefixes.len() {
+            None => break,
+            Some(len) => len,
+        };
+        if len <= limit {
+            break;
+        }
+        prefixes.keep_first_bytes(keep);
+        prefixes.minimize_by_preference();
+    }
+
+    let mut suffixes = literal::Extractor::new()
+        .kind(literal::ExtractKind::Suffix)
+        .extract(expr);
+    for (keep, limit) in ATTEMPTS {
+        let len = match suffixes.len() {
+            None => break,
+            Some(len) => len,
+        };
+        if len <= limit {
+            break;
+        }
+        suffixes.keep_last_bytes(keep);
+        suffixes.minimize_by_preference();
+    }
+
+    (prefixes, suffixes)
+}
+
 #[cfg(test)]
 mod test {
     #[test]
diff --git a/src/expand.rs b/src/expand.rs
index 67b514926a..98fafc949f 100644
--- a/src/expand.rs
+++ b/src/expand.rs
@@ -182,7 +182,8 @@ fn find_cap_ref_braced(rep: &[u8], mut i: usize) -> Option<CaptureRef<'_>> {
     })
 }
 
-/// Returns true if and only if the given byte is allowed in a capture name.
+/// Returns true if and only if the given byte is allowed in a capture name
+/// written in non-brace form.
 fn is_valid_cap_letter(b: u8) -> bool {
     match b {
         b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' => true,
@@ -236,4 +237,11 @@ mod tests {
     find!(find_cap_ref17, "$x_$y", c!("x_", 3));
     find!(find_cap_ref18, "${#}", c!("#", 4));
     find!(find_cap_ref19, "${Z[}", c!("Z[", 5));
+    find!(find_cap_ref20, "${¾}", c!("¾", 5));
+    find!(find_cap_ref21, "${¾a}", c!("¾a", 6));
+    find!(find_cap_ref22, "${a¾}", c!("a¾", 6));
+    find!(find_cap_ref23, "${☃}", c!("☃", 6));
+    find!(find_cap_ref24, "${a☃}", c!("a☃", 7));
+    find!(find_cap_ref25, "${☃a}", c!("☃a", 7));
+    find!(find_cap_ref26, "${名字}", c!("名字", 9));
 }
diff --git a/src/lib.rs b/src/lib.rs
index 6b95739c5c..82c1b77ad8 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -199,6 +199,8 @@ instead.)
 This implementation executes regular expressions **only** on valid UTF-8
 while exposing match locations as byte indices into the search string. (To
 relax this restriction, use the [`bytes`](bytes/index.html) sub-module.)
+Conceptually, the regex engine works by matching a haystack as if it were a
+sequence of Unicode scalar values.
 
 Only simple case folding is supported. Namely, when matching
 case-insensitively, the characters are first mapped using the "simple" case
@@ -285,9 +287,9 @@ a separate crate, [`regex-syntax`](https://docs.rs/regex-syntax).
 .             any character except new line (includes new line with s flag)
 \d            digit (\p{Nd})
 \D            not digit
-\pN           One-letter name Unicode character class
+\pX           Unicode character class identified by a one-letter name
 \p{Greek}     Unicode character class (general category or script)
-\PN           Negated one-letter name Unicode character class
+\PX           Negated Unicode character class identified by a one-letter name
 \P{Greek}     negated Unicode character class (general category or script)
 </pre>
 
@@ -325,6 +327,25 @@ xy    concatenation (x followed by y)
 x|y   alternation (x or y, prefer x)
 </pre>
 
+This example shows how an alternation works, and what it means to prefer a
+branch in the alternation over subsequent branches.
+
+```
+use regex::Regex;
+
+let haystack = "samwise";
+// If 'samwise' comes first in our alternation, then it is
+// preferred as a match, even if the regex engine could
+// technically detect that 'sam' led to a match earlier.
+let re = Regex::new(r"samwise|sam").unwrap();
+assert_eq!("samwise", re.find(haystack).unwrap().as_str());
+// But if 'sam' comes first, then it will match instead.
+// In this case, it is impossible for 'samwise' to match
+// because 'sam' is a prefix of it.
+let re = Regex::new(r"sam|samwise").unwrap();
+assert_eq!("sam", re.find(haystack).unwrap().as_str());
+```
+
 ## Repetitions
 
 <pre class="rust">
@@ -360,12 +381,19 @@ regex matches `abc` at positions `0`, `1`, `2` and `3`.
 
 <pre class="rust">
 (exp)          numbered capture group (indexed by opening parenthesis)
-(?P&lt;name&gt;exp)  named (also numbered) capture group (allowed chars: [_0-9a-zA-Z.\[\]])
+(?P&lt;name&gt;exp)  named (also numbered) capture group (names must be alpha-numeric)
+(?&lt;name&gt;exp)   named (also numbered) capture group (names must be alpha-numeric)
 (?:exp)        non-capturing group
 (?flags)       set flags within current group
 (?flags:exp)   set flags for exp (non-capturing)
 </pre>
 
+Capture group names must be any sequence of alpha-numeric Unicode codepoints,
+in addition to `.`, `_`, `[` and `]`. Names must start with either an `_` or
+an alphabetic codepoint. Alphabetic codepoints correspond to the `Alphabetic`
+Unicode property, while numeric codepoints correspond to the union of the
+`Decimal_Number`, `Letter_Number` and `Other_Number` general categories.
+
 Flags are each a single character. For example, `(?x)` sets the flag `x`
 and `(?-x)` clears the flag `x`. Multiple flags can be set or cleared at
 the same time: `(?xy)` sets both the `x` and `y` flags and `(?x-y)` sets
@@ -379,9 +407,13 @@ m     multi-line mode: ^ and $ match begin/end of line
 s     allow . to match \n
 U     swap the meaning of x* and x*?
 u     Unicode support (enabled by default)
-x     ignore whitespace and allow line comments (starting with `#`)
+x     verbose mode, ignores whitespace and allow line comments (starting with `#`)
 </pre>
 
+Note that in verbose mode, whitespace is ignored everywhere, including within
+character classes. To insert whitespace, use its escaped form or a hex literal.
+For example, `\ ` or `\x20` for an ASCII space.
+
 Flags can be toggled within a pattern. Here's an example that matches
 case-insensitively for the first part but case-sensitively for the second part:
 
diff --git a/src/literal/imp.rs b/src/literal/imp.rs
index 90b2f11606..75fa6e37b2 100644
--- a/src/literal/imp.rs
+++ b/src/literal/imp.rs
@@ -1,8 +1,8 @@
 use std::mem;
 
-use aho_corasick::{self, packed, AhoCorasick, AhoCorasickBuilder};
+use aho_corasick::{self, packed, AhoCorasick};
 use memchr::{memchr, memchr2, memchr3, memmem};
-use regex_syntax::hir::literal::{Literal, Literals};
+use regex_syntax::hir::literal::{Literal, Seq};
 
 /// A prefix extracted from a compiled regular expression.
 ///
@@ -26,7 +26,7 @@ enum Matcher {
     /// A single substring, using vector accelerated routines when available.
     Memmem(Memmem),
     /// An Aho-Corasick automaton.
-    AC { ac: AhoCorasick<u32>, lits: Vec<Literal> },
+    AC { ac: AhoCorasick, lits: Vec<Literal> },
     /// A packed multiple substring searcher, using SIMD.
     ///
     /// Note that Aho-Corasick will actually use this packed searcher
@@ -39,27 +39,26 @@ enum Matcher {
 impl LiteralSearcher {
     /// Returns a matcher that never matches and never advances the input.
     pub fn empty() -> Self {
-        Self::new(Literals::empty(), Matcher::Empty)
+        Self::new(Seq::infinite(), Matcher::Empty)
     }
 
     /// Returns a matcher for literal prefixes from the given set.
-    pub fn prefixes(lits: Literals) -> Self {
+    pub fn prefixes(lits: Seq) -> Self {
         let matcher = Matcher::prefixes(&lits);
         Self::new(lits, matcher)
     }
 
     /// Returns a matcher for literal suffixes from the given set.
-    pub fn suffixes(lits: Literals) -> Self {
+    pub fn suffixes(lits: Seq) -> Self {
         let matcher = Matcher::suffixes(&lits);
         Self::new(lits, matcher)
     }
 
-    fn new(lits: Literals, matcher: Matcher) -> Self {
-        let complete = lits.all_complete();
+    fn new(lits: Seq, matcher: Matcher) -> Self {
         LiteralSearcher {
-            complete,
-            lcp: Memmem::new(lits.longest_common_prefix()),
-            lcs: Memmem::new(lits.longest_common_suffix()),
+            complete: lits.is_exact(),
+            lcp: Memmem::new(lits.longest_common_prefix().unwrap_or(b"")),
+            lcs: Memmem::new(lits.longest_common_suffix().unwrap_or(b"")),
             matcher,
         }
     }
@@ -150,7 +149,7 @@ impl LiteralSearcher {
             Empty => 0,
             Bytes(ref sset) => sset.dense.len(),
             Memmem(_) => 1,
-            AC { ref ac, .. } => ac.pattern_count(),
+            AC { ref ac, .. } => ac.patterns_len(),
             Packed { ref lits, .. } => lits.len(),
         }
     }
@@ -162,27 +161,31 @@ impl LiteralSearcher {
             Empty => 0,
             Bytes(ref sset) => sset.approximate_size(),
             Memmem(ref single) => single.approximate_size(),
-            AC { ref ac, .. } => ac.heap_bytes(),
-            Packed { ref s, .. } => s.heap_bytes(),
+            AC { ref ac, .. } => ac.memory_usage(),
+            Packed { ref s, .. } => s.memory_usage(),
         }
     }
 }
 
 impl Matcher {
-    fn prefixes(lits: &Literals) -> Self {
+    fn prefixes(lits: &Seq) -> Self {
         let sset = SingleByteSet::prefixes(lits);
         Matcher::new(lits, sset)
     }
 
-    fn suffixes(lits: &Literals) -> Self {
+    fn suffixes(lits: &Seq) -> Self {
         let sset = SingleByteSet::suffixes(lits);
         Matcher::new(lits, sset)
     }
 
-    fn new(lits: &Literals, sset: SingleByteSet) -> Self {
-        if lits.literals().is_empty() {
+    fn new(lits: &Seq, sset: SingleByteSet) -> Self {
+        if lits.is_empty() || lits.min_literal_len() == Some(0) {
             return Matcher::Empty;
         }
+        let lits = match lits.literals() {
+            None => return Matcher::Empty,
+            Some(members) => members,
+        };
         if sset.dense.len() >= 26 {
             // Avoid trying to match a large number of single bytes.
             // This is *very* sensitive to a frequency analysis comparison
@@ -195,26 +198,26 @@ impl Matcher {
         if sset.complete {
             return Matcher::Bytes(sset);
         }
-        if lits.literals().len() == 1 {
-            return Matcher::Memmem(Memmem::new(&lits.literals()[0]));
+        if lits.len() == 1 {
+            return Matcher::Memmem(Memmem::new(lits[0].as_bytes()));
         }
 
-        let pats = lits.literals().to_owned();
+        let pats: Vec<&[u8]> = lits.iter().map(|lit| lit.as_bytes()).collect();
         let is_aho_corasick_fast = sset.dense.len() <= 1 && sset.all_ascii;
-        if lits.literals().len() <= 100 && !is_aho_corasick_fast {
+        if lits.len() <= 100 && !is_aho_corasick_fast {
             let mut builder = packed::Config::new()
                 .match_kind(packed::MatchKind::LeftmostFirst)
                 .builder();
             if let Some(s) = builder.extend(&pats).build() {
-                return Matcher::Packed { s, lits: pats };
+                return Matcher::Packed { s, lits: lits.to_owned() };
             }
         }
-        let ac = AhoCorasickBuilder::new()
+        let ac = AhoCorasick::builder()
             .match_kind(aho_corasick::MatchKind::LeftmostFirst)
-            .dfa(true)
-            .build_with_size::<u32, _, _>(&pats)
+            .kind(Some(aho_corasick::AhoCorasickKind::DFA))
+            .build(&pats)
             .unwrap();
-        Matcher::AC { ac, lits: pats }
+        Matcher::AC { ac, lits: lits.to_owned() }
     }
 }
 
@@ -257,7 +260,7 @@ impl<'a> Iterator for LiteralIter<'a> {
                 } else {
                     let next = &lits[0];
                     *lits = &lits[1..];
-                    Some(&**next)
+                    Some(next.as_bytes())
                 }
             }
             LiteralIter::Packed(ref mut lits) => {
@@ -266,7 +269,7 @@ impl<'a> Iterator for LiteralIter<'a> {
                 } else {
                     let next = &lits[0];
                     *lits = &lits[1..];
-                    Some(&**next)
+                    Some(next.as_bytes())
                 }
             }
         }
@@ -291,11 +294,15 @@ impl SingleByteSet {
         }
     }
 
-    fn prefixes(lits: &Literals) -> SingleByteSet {
+    fn prefixes(lits: &Seq) -> SingleByteSet {
         let mut sset = SingleByteSet::new();
-        for lit in lits.literals() {
+        let lits = match lits.literals() {
+            None => return sset,
+            Some(lits) => lits,
+        };
+        for lit in lits.iter() {
             sset.complete = sset.complete && lit.len() == 1;
-            if let Some(&b) = lit.get(0) {
+            if let Some(&b) = lit.as_bytes().get(0) {
                 if !sset.sparse[b as usize] {
                     if b > 0x7F {
                         sset.all_ascii = false;
@@ -308,11 +315,15 @@ impl SingleByteSet {
         sset
     }
 
-    fn suffixes(lits: &Literals) -> SingleByteSet {
+    fn suffixes(lits: &Seq) -> SingleByteSet {
         let mut sset = SingleByteSet::new();
-        for lit in lits.literals() {
+        let lits = match lits.literals() {
+            None => return sset,
+            Some(lits) => lits,
+        };
+        for lit in lits.iter() {
             sset.complete = sset.complete && lit.len() == 1;
-            if let Some(&b) = lit.get(lit.len().checked_sub(1).unwrap()) {
+            if let Some(&b) = lit.as_bytes().last() {
                 if !sset.sparse[b as usize] {
                     if b > 0x7F {
                         sset.all_ascii = false;
diff --git a/src/literal/mod.rs b/src/literal/mod.rs
index 980f523309..b9fb77aed9 100644
--- a/src/literal/mod.rs
+++ b/src/literal/mod.rs
@@ -6,7 +6,7 @@ mod imp;
 #[allow(missing_docs)]
 #[cfg(not(feature = "perf-literal"))]
 mod imp {
-    use regex_syntax::hir::literal::Literals;
+    use regex_syntax::hir::literal::Seq;
 
     #[derive(Clone, Debug)]
     pub struct LiteralSearcher(());
@@ -16,11 +16,11 @@ mod imp {
             LiteralSearcher(())
         }
 
-        pub fn prefixes(_: Literals) -> Self {
+        pub fn prefixes(_: Seq) -> Self {
             LiteralSearcher(())
         }
 
-        pub fn suffixes(_: Literals) -> Self {
+        pub fn suffixes(_: Seq) -> Self {
             LiteralSearcher(())
         }
 
diff --git a/src/prog.rs b/src/prog.rs
index c211f71d8a..100862cf1b 100644
--- a/src/prog.rs
+++ b/src/prog.rs
@@ -27,6 +27,9 @@ pub struct Program {
     pub captures: Vec<Option<String>>,
     /// Pointers to all named capture groups into `captures`.
     pub capture_name_idx: Arc<HashMap<String, usize>>,
+    /// If the number of capture groups is the same for all possible matches,
+    /// then this is that number.
+    pub static_captures_len: Option<usize>,
     /// A pointer to the start instruction. This can vary depending on how
     /// the program was compiled. For example, programs for use with the DFA
     /// engine have a `.*?` inserted at the beginning of unanchored regular
@@ -83,6 +86,7 @@ impl Program {
             matches: vec![],
             captures: vec![],
             capture_name_idx: Arc::new(HashMap::new()),
+            static_captures_len: None,
             start: 0,
             byte_classes: vec![0; 256],
             only_utf8: true,
diff --git a/src/re_bytes.rs b/src/re_bytes.rs
index 07e9f98acc..e3a3b019b5 100644
--- a/src/re_bytes.rs
+++ b/src/re_bytes.rs
@@ -17,7 +17,7 @@ use crate::re_trait::{self, RegularExpression, SubCapturesPosIter};
 /// Match represents a single match of a regex in a haystack.
 ///
 /// The lifetime parameter `'t` refers to the lifetime of the matched text.
-#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+#[derive(Copy, Clone, Eq, PartialEq)]
 pub struct Match<'t> {
     text: &'t [u8],
     start: usize,
@@ -37,6 +37,18 @@ impl<'t> Match<'t> {
         self.end
     }
 
+    /// Returns true if and only if this match has a length of zero.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.start == self.end
+    }
+
+    /// Returns the length, in bytes, of this match.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.end - self.start
+    }
+
     /// Returns the range over the starting and ending byte offsets of the
     /// match in the haystack.
     #[inline]
@@ -57,6 +69,24 @@ impl<'t> Match<'t> {
     }
 }
 
+impl<'t> std::fmt::Debug for Match<'t> {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        let mut fmt = f.debug_struct("Match");
+        fmt.field("start", &self.start).field("end", &self.end);
+        if let Ok(s) = std::str::from_utf8(self.as_bytes()) {
+            fmt.field("bytes", &s);
+        } else {
+            // FIXME: It would be nice if this could be printed as a string
+            // with invalid UTF-8 replaced with hex escapes. A alloc would
+            // probably okay if that makes it easier, but regex-automata does
+            // (at time of writing) have internal routines that do this. So
+            // maybe we should expose them.
+            fmt.field("bytes", &self.as_bytes());
+        }
+        fmt.finish()
+    }
+}
+
 impl<'t> From<Match<'t>> for Range<usize> {
     fn from(m: Match<'t>) -> Range<usize> {
         m.range()
@@ -253,12 +283,7 @@ impl Regex {
     /// The `0`th capture group is always unnamed, so it must always be
     /// accessed with `get(0)` or `[0]`.
     pub fn captures<'t>(&self, text: &'t [u8]) -> Option<Captures<'t>> {
-        let mut locs = self.capture_locations();
-        self.captures_read_at(&mut locs, text, 0).map(move |_| Captures {
-            text,
-            locs: locs.0,
-            named_groups: self.0.capture_name_idx().clone(),
-        })
+        self.captures_at(text, 0)
     }
 
     /// Returns an iterator over all the non-overlapping capture groups matched
@@ -537,7 +562,14 @@ impl Regex {
     /// This method may have the same performance characteristics as
     /// `is_match`, except it provides an end location for a match. In
     /// particular, the location returned *may be shorter* than the proper end
-    /// of the leftmost-first match.
+    /// of the leftmost-first match that you would find via `Regex::find`.
+    ///
+    /// Note that it is not guaranteed that this routine finds the shortest or
+    /// "earliest" possible match. Instead, the main idea of this API is that
+    /// it returns the offset at the point at which the internal regex engine
+    /// has determined that a match has occurred. This may vary depending on
+    /// which internal regex engine is used, and thus, the offset itself may
+    /// change.
     ///
     /// # Example
     ///
@@ -598,6 +630,25 @@ impl Regex {
             .map(|(s, e)| Match::new(text, s, e))
     }
 
+    /// Returns the same as [`Regex::captures`], but starts the search at the
+    /// given offset.
+    ///
+    /// The significance of the starting point is that it takes the surrounding
+    /// context into consideration. For example, the `\A` anchor can only
+    /// match when `start == 0`.
+    pub fn captures_at<'t>(
+        &self,
+        text: &'t [u8],
+        start: usize,
+    ) -> Option<Captures<'t>> {
+        let mut locs = self.capture_locations();
+        self.captures_read_at(&mut locs, text, start).map(move |_| Captures {
+            text,
+            locs: locs.0,
+            named_groups: self.0.capture_name_idx().clone(),
+        })
+    }
+
     /// This is like `captures`, but uses
     /// [`CaptureLocations`](struct.CaptureLocations.html)
     /// instead of
@@ -667,6 +718,46 @@ impl Regex {
         self.0.capture_names().len()
     }
 
+    /// Returns the total number of capturing groups that appear in every
+    /// possible match.
+    ///
+    /// If the number of capture groups can vary depending on the match, then
+    /// this returns `None`. That is, a value is only returned when the number
+    /// of matching groups is invariant or "static."
+    ///
+    /// Note that like [`Regex::captures_len`], this **does** include the
+    /// implicit capturing group corresponding to the entire match. Therefore,
+    /// when a non-None value is returned, it is guaranteed to be at least `1`.
+    /// Stated differently, a return value of `Some(0)` is impossible.
+    ///
+    /// # Example
+    ///
+    /// This shows a few cases where a static number of capture groups is
+    /// available and a few cases where it is not.
+    ///
+    /// ```
+    /// use regex::bytes::Regex;
+    ///
+    /// let len = |pattern| {
+    ///     Regex::new(pattern).map(|re| re.static_captures_len())
+    /// };
+    ///
+    /// assert_eq!(Some(1), len("a")?);
+    /// assert_eq!(Some(2), len("(a)")?);
+    /// assert_eq!(Some(2), len("(a)|(b)")?);
+    /// assert_eq!(Some(3), len("(a)(b)|(c)(d)")?);
+    /// assert_eq!(None, len("(a)|b")?);
+    /// assert_eq!(None, len("a|(b)")?);
+    /// assert_eq!(None, len("(b)*")?);
+    /// assert_eq!(Some(2), len("(b)+")?);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn static_captures_len(&self) -> Option<usize> {
+        self.0.static_captures_len().map(|len| len.saturating_add(1))
+    }
+
     /// Returns an empty set of capture locations that can be reused in
     /// multiple calls to `captures_read` or `captures_read_at`.
     pub fn capture_locations(&self) -> CaptureLocations {
@@ -856,6 +947,27 @@ impl<'r> FusedIterator for CaptureNames<'r> {}
 /// In order to build a value of this type, you'll need to call the
 /// `capture_locations` method on the `Regex` being used to execute the search.
 /// The value returned can then be reused in subsequent searches.
+///
+/// # Example
+///
+/// This example shows how to create and use `CaptureLocations` in a search.
+///
+/// ```
+/// use regex::bytes::Regex;
+///
+/// let re = Regex::new(r"(?<first>\w+)\s+(?<last>\w+)").unwrap();
+/// let mut locs = re.capture_locations();
+/// let m = re.captures_read(&mut locs, b"Bruce Springsteen").unwrap();
+/// assert_eq!(0..17, m.range());
+/// assert_eq!(Some((0, 17)), locs.get(0));
+/// assert_eq!(Some((0, 5)), locs.get(1));
+/// assert_eq!(Some((6, 17)), locs.get(2));
+///
+/// // Asking for an invalid capture group always returns None.
+/// assert_eq!(None, locs.get(3));
+/// assert_eq!(None, locs.get(34973498648));
+/// assert_eq!(None, locs.get(9944060567225171988));
+/// ```
 #[derive(Clone, Debug)]
 pub struct CaptureLocations(re_trait::Locations);
 
diff --git a/src/re_set.rs b/src/re_set.rs
index a6d886d761..7c8253f0ca 100644
--- a/src/re_set.rs
+++ b/src/re_set.rs
@@ -289,6 +289,12 @@ impl RegexSet {
     }
 }
 
+impl Default for RegexSet {
+    fn default() -> Self {
+        RegexSet::empty()
+    }
+}
+
 /// A set of matches returned by a regex set.
 #[derive(Clone, Debug)]
 pub struct SetMatches {
@@ -315,6 +321,11 @@ impl SetMatches {
     }
 
     /// The total number of regexes in the set that created these matches.
+    ///
+    /// **WARNING:** This always returns the same value as [`RegexSet::len`].
+    /// In particular, it does *not* return the number of elements yielded by
+    /// [`SetMatches::iter`]. The only way to determine the total number of
+    /// matched regexes is to iterate over them.
     pub fn len(&self) -> usize {
         self.matches.len()
     }
diff --git a/src/re_trait.rs b/src/re_trait.rs
index d0c717df5a..505810c848 100644
--- a/src/re_trait.rs
+++ b/src/re_trait.rs
@@ -20,7 +20,7 @@ impl Locations {
     /// not match anything. The positions returned are *always* byte indices
     /// with respect to the original string matched.
     pub fn pos(&self, i: usize) -> Option<(usize, usize)> {
-        let (s, e) = (i * 2, i * 2 + 1);
+        let (s, e) = (i.checked_mul(2)?, i.checked_mul(2)?.checked_add(1)?);
         match (self.0.get(s), self.0.get(e)) {
             (Some(&Some(s)), Some(&Some(e))) => Some((s, e)),
             _ => None,
diff --git a/src/re_unicode.rs b/src/re_unicode.rs
index 197510ea0d..57689086dc 100644
--- a/src/re_unicode.rs
+++ b/src/re_unicode.rs
@@ -25,7 +25,7 @@ pub fn escape(text: &str) -> String {
 /// Match represents a single match of a regex in a haystack.
 ///
 /// The lifetime parameter `'t` refers to the lifetime of the matched text.
-#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+#[derive(Copy, Clone, Eq, PartialEq)]
 pub struct Match<'t> {
     text: &'t str,
     start: usize,
@@ -45,6 +45,18 @@ impl<'t> Match<'t> {
         self.end
     }
 
+    /// Returns true if and only if this match has a length of zero.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.start == self.end
+    }
+
+    /// Returns the length, in bytes, of this match.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.end - self.start
+    }
+
     /// Returns the range over the starting and ending byte offsets of the
     /// match in the haystack.
     #[inline]
@@ -65,6 +77,16 @@ impl<'t> Match<'t> {
     }
 }
 
+impl<'t> std::fmt::Debug for Match<'t> {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        f.debug_struct("Match")
+            .field("start", &self.start)
+            .field("end", &self.end)
+            .field("string", &self.as_str())
+            .finish()
+    }
+}
+
 impl<'t> From<Match<'t>> for &'t str {
     fn from(m: Match<'t>) -> &'t str {
         m.as_str()
@@ -309,12 +331,7 @@ impl Regex {
     /// The `0`th capture group is always unnamed, so it must always be
     /// accessed with `get(0)` or `[0]`.
     pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>> {
-        let mut locs = self.capture_locations();
-        self.captures_read_at(&mut locs, text, 0).map(move |_| Captures {
-            text,
-            locs: locs.0,
-            named_groups: self.0.capture_name_idx().clone(),
-        })
+        self.captures_at(text, 0)
     }
 
     /// Returns an iterator over all the non-overlapping capture groups matched
@@ -595,7 +612,14 @@ impl Regex {
     /// This method may have the same performance characteristics as
     /// `is_match`, except it provides an end location for a match. In
     /// particular, the location returned *may be shorter* than the proper end
-    /// of the leftmost-first match.
+    /// of the leftmost-first match that you would find via `Regex::find`.
+    ///
+    /// Note that it is not guaranteed that this routine finds the shortest or
+    /// "earliest" possible match. Instead, the main idea of this API is that
+    /// it returns the offset at the point at which the internal regex engine
+    /// has determined that a match has occurred. This may vary depending on
+    /// which internal regex engine is used, and thus, the offset itself may
+    /// change.
     ///
     /// # Example
     ///
@@ -615,12 +639,12 @@ impl Regex {
         self.shortest_match_at(text, 0)
     }
 
-    /// Returns the same as shortest_match, but starts the search at the given
-    /// offset.
+    /// Returns the same as `shortest_match`, but starts the search at the
+    /// given offset.
     ///
     /// The significance of the starting point is that it takes the surrounding
-    /// context into consideration. For example, the `\A` anchor can only
-    /// match when `start == 0`.
+    /// context into consideration. For example, the `\A` anchor can only match
+    /// when `start == 0`.
     pub fn shortest_match_at(
         &self,
         text: &str,
@@ -656,6 +680,25 @@ impl Regex {
             .map(|(s, e)| Match::new(text, s, e))
     }
 
+    /// Returns the same as [`Regex::captures`], but starts the search at the
+    /// given offset.
+    ///
+    /// The significance of the starting point is that it takes the surrounding
+    /// context into consideration. For example, the `\A` anchor can only
+    /// match when `start == 0`.
+    pub fn captures_at<'t>(
+        &self,
+        text: &'t str,
+        start: usize,
+    ) -> Option<Captures<'t>> {
+        let mut locs = self.capture_locations();
+        self.captures_read_at(&mut locs, text, start).map(move |_| Captures {
+            text,
+            locs: locs.0,
+            named_groups: self.0.capture_name_idx().clone(),
+        })
+    }
+
     /// This is like `captures`, but uses
     /// [`CaptureLocations`](struct.CaptureLocations.html)
     /// instead of
@@ -725,6 +768,46 @@ impl Regex {
         self.0.capture_names().len()
     }
 
+    /// Returns the total number of capturing groups that appear in every
+    /// possible match.
+    ///
+    /// If the number of capture groups can vary depending on the match, then
+    /// this returns `None`. That is, a value is only returned when the number
+    /// of matching groups is invariant or "static."
+    ///
+    /// Note that like [`Regex::captures_len`], this **does** include the
+    /// implicit capturing group corresponding to the entire match. Therefore,
+    /// when a non-None value is returned, it is guaranteed to be at least `1`.
+    /// Stated differently, a return value of `Some(0)` is impossible.
+    ///
+    /// # Example
+    ///
+    /// This shows a few cases where a static number of capture groups is
+    /// available and a few cases where it is not.
+    ///
+    /// ```
+    /// use regex::Regex;
+    ///
+    /// let len = |pattern| {
+    ///     Regex::new(pattern).map(|re| re.static_captures_len())
+    /// };
+    ///
+    /// assert_eq!(Some(1), len("a")?);
+    /// assert_eq!(Some(2), len("(a)")?);
+    /// assert_eq!(Some(2), len("(a)|(b)")?);
+    /// assert_eq!(Some(3), len("(a)(b)|(c)(d)")?);
+    /// assert_eq!(None, len("(a)|b")?);
+    /// assert_eq!(None, len("a|(b)")?);
+    /// assert_eq!(None, len("(b)*")?);
+    /// assert_eq!(Some(2), len("(b)+")?);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn static_captures_len(&self) -> Option<usize> {
+        self.0.static_captures_len().map(|len| len.saturating_add(1))
+    }
+
     /// Returns an empty set of capture locations that can be reused in
     /// multiple calls to `captures_read` or `captures_read_at`.
     pub fn capture_locations(&self) -> CaptureLocations {
@@ -866,6 +949,27 @@ impl<'r, 't> FusedIterator for SplitN<'r, 't> {}
 /// In order to build a value of this type, you'll need to call the
 /// `capture_locations` method on the `Regex` being used to execute the search.
 /// The value returned can then be reused in subsequent searches.
+///
+/// # Example
+///
+/// This example shows how to create and use `CaptureLocations` in a search.
+///
+/// ```
+/// use regex::Regex;
+///
+/// let re = Regex::new(r"(?<first>\w+)\s+(?<last>\w+)").unwrap();
+/// let mut locs = re.capture_locations();
+/// let m = re.captures_read(&mut locs, "Bruce Springsteen").unwrap();
+/// assert_eq!(0..17, m.range());
+/// assert_eq!(Some((0, 17)), locs.get(0));
+/// assert_eq!(Some((0, 5)), locs.get(1));
+/// assert_eq!(Some((6, 17)), locs.get(2));
+///
+/// // Asking for an invalid capture group always returns None.
+/// assert_eq!(None, locs.get(3));
+/// assert_eq!(None, locs.get(34973498648));
+/// assert_eq!(None, locs.get(9944060567225171988));
+/// ```
 #[derive(Clone, Debug)]
 pub struct CaptureLocations(re_trait::Locations);
 
diff --git a/tests/replace.rs b/tests/replace.rs
index d65be072ff..f23c575515 100644
--- a/tests/replace.rs
+++ b/tests/replace.rs
@@ -15,7 +15,7 @@ replace!(all, replace_all, r"[0-9]", "age: 26", t!("Z"), "age: ZZ");
 replace!(
     groups,
     replace,
-    r"(?-u)(\S+)\s+(\S+)",
+    r"([^ ]+)[ ]+([^ ]+)",
     "w1 w2",
     t!("$2 $1"),
     "w2 w1"
@@ -23,7 +23,7 @@ replace!(
 replace!(
     double_dollar,
     replace,
-    r"(?-u)(\S+)\s+(\S+)",
+    r"([^ ]+)[ ]+([^ ]+)",
     "w1 w2",
     t!("$2 $$1"),
     "w2 $1"
@@ -33,7 +33,7 @@ replace!(
 replace!(
     named,
     replace_all,
-    r"(?-u)(?P<first>\S+)\s+(?P<last>\S+)(?P<space>\s*)",
+    r"(?P<first>[^ ]+)[ ]+(?P<last>[^ ]+)(?P<space>[ ]*)",
     "w1 w2 w3 w4",
     t!("$last $first$space"),
     "w2 w1 w4 w3"
@@ -51,7 +51,7 @@ replace!(number_hypen, replace, r"(.)(.)", "ab", t!("$1-$2"), "a-b");
 replace!(
     simple_expand,
     replace_all,
-    r"(?-u)(\w) (\w)",
+    r"([a-z]) ([a-z])",
     "a b",
     t!("$2 $1"),
     "b a"
@@ -59,7 +59,7 @@ replace!(
 replace!(
     literal_dollar1,
     replace_all,
-    r"(?-u)(\w+) (\w+)",
+    r"([a-z]+) ([a-z]+)",
     "a b",
     t!("$$1"),
     "$1"
@@ -67,7 +67,7 @@ replace!(
 replace!(
     literal_dollar2,
     replace_all,
-    r"(?-u)(\w+) (\w+)",
+    r"([a-z]+) ([a-z]+)",
     "a b",
     t!("$2 $$c $1"),
     "b $c a"
@@ -75,7 +75,7 @@ replace!(
 replace!(
     no_expand1,
     replace,
-    r"(?-u)(\S+)\s+(\S+)",
+    r"([^ ]+)[ ]+([^ ]+)",
     "w1 w2",
     no_expand!("$2 $1"),
     "$2 $1"
@@ -83,7 +83,7 @@ replace!(
 replace!(
     no_expand2,
     replace,
-    r"(?-u)(\S+)\s+(\S+)",
+    r"([^ ]+)[ ]+([^ ]+)",
     "w1 w2",
     no_expand!("$$1"),
     "$$1"
diff --git a/tests/set.rs b/tests/set.rs
index 37fcf8700c..d1144d6623 100644
--- a/tests/set.rs
+++ b/tests/set.rs
@@ -65,3 +65,10 @@ fn len_and_empty() {
     assert_eq!(not_empty.len(), 2);
     assert!(!not_empty.is_empty());
 }
+
+#[test]
+fn default_set_is_empty() {
+    let set: regex::bytes::RegexSet = Default::default();
+    assert_eq!(set.len(), 0);
+    assert!(set.is_empty());
+}
diff --git a/tests/unicode.rs b/tests/unicode.rs
index 9b32286247..d7dbdd31b8 100644
--- a/tests/unicode.rs
+++ b/tests/unicode.rs
@@ -35,6 +35,8 @@ mat!(uni_not_boundary_ogham, r"\d\B", "6 ", None);
 // We should test more, but there's a lot. Write a script to generate more of
 // these tests.
 mat!(uni_class_gencat_cased_letter, r"\p{Cased_Letter}", "Ａ", Some((0, 3)));
+mat!(uni_class_gencat_cased_letter2, r"\p{gc=LC}", "Ａ", Some((0, 3)));
+mat!(uni_class_gencat_cased_letter3, r"\p{LC}", "Ａ", Some((0, 3)));
 mat!(
     uni_class_gencat_close_punctuation,
     r"\p{Close_Punctuation}",
@@ -77,6 +79,7 @@ mat!(uni_class_gencat_format, r"\p{Format}", "\u{E007F}", Some((0, 4)));
 // See: https://github.com/rust-lang/regex/issues/719
 mat!(uni_class_gencat_format_abbrev1, r"\p{cf}", "\u{E007F}", Some((0, 4)));
 mat!(uni_class_gencat_format_abbrev2, r"\p{gc=cf}", "\u{E007F}", Some((0, 4)));
+mat!(uni_class_gencat_format_abbrev3, r"\p{Sc}", "$", Some((0, 1)));
 mat!(
     uni_class_gencat_initial_punctuation,
     r"\p{Initial_Punctuation}",