Skip to content

Commit

Permalink
Small cleanups in unescaping code.
Browse files Browse the repository at this point in the history
- Rename `unescape_raw_str_or_raw_byte_str` as
  `unescape_raw_str_or_byte_str`, which is more accurate.
- Remove the unused `Mode::in_single_quotes` method.
- Make some assertions more precise, and add a missing one to
  `unescape_char_or_byte`.
- Change all the assertions to `debug_assert!`, because this code is
  reasonably hot, and the assertions aren't required for memory safety,
  and any violations are likely to be sufficiently obvious that normal
  tests will trigger them.
  • Loading branch information
nnethercote committed Sep 27, 2022
1 parent c91c647 commit 94cb5e8
Showing 1 changed file with 11 additions and 14 deletions.
25 changes: 11 additions & 14 deletions compiler/rustc_lexer/src/unescape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ where
// NOTE: Raw strings do not perform any explicit character escaping, here we
// only translate CRLF to LF and produce errors on bare CR.
Mode::RawStr | Mode::RawByteStr => {
unescape_raw_str_or_byte_str(literal_text, mode, callback)
unescape_raw_str_or_raw_byte_str(literal_text, mode, callback)
}
}
}
Expand All @@ -105,7 +105,7 @@ pub fn unescape_byte_literal<F>(literal_text: &str, mode: Mode, callback: &mut F
where
F: FnMut(Range<usize>, Result<u8, EscapeError>),
{
assert!(mode.is_bytes());
debug_assert!(mode.is_bytes());
unescape_literal(literal_text, mode, &mut |range, result| {
callback(range, result.map(byte_from_char));
})
Expand All @@ -129,7 +129,7 @@ pub fn unescape_byte(literal_text: &str) -> Result<u8, (usize, EscapeError)> {
}

/// What kind of literal do we parse.
#[derive(Debug, Clone, Copy)]
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Mode {
Char,
Str,
Expand All @@ -140,17 +140,13 @@ pub enum Mode {
}

impl Mode {
pub fn in_single_quotes(self) -> bool {
pub fn in_double_quotes(self) -> bool {
match self {
Mode::Char | Mode::Byte => true,
Mode::Str | Mode::ByteStr | Mode::RawStr | Mode::RawByteStr => false,
Mode::Str | Mode::ByteStr | Mode::RawStr | Mode::RawByteStr => true,
Mode::Char | Mode::Byte => false,
}
}

pub fn in_double_quotes(self) -> bool {
!self.in_single_quotes()
}

pub fn is_bytes(self) -> bool {
match self {
Mode::Byte | Mode::ByteStr | Mode::RawByteStr => true,
Expand Down Expand Up @@ -263,6 +259,7 @@ fn ascii_check(first_char: char, mode: Mode) -> Result<char, EscapeError> {
}

fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
debug_assert!(mode == Mode::Char || mode == Mode::Byte);
let first_char = chars.next().ok_or(EscapeError::ZeroChars)?;
let res = match first_char {
'\\' => scan_escape(chars, mode),
Expand All @@ -282,7 +279,7 @@ fn unescape_str_or_byte_str<F>(src: &str, mode: Mode, callback: &mut F)
where
F: FnMut(Range<usize>, Result<char, EscapeError>),
{
assert!(mode.in_double_quotes());
debug_assert!(mode == Mode::Str || mode == Mode::ByteStr);
let initial_len = src.len();
let mut chars = src.chars();
while let Some(first_char) = chars.next() {
Expand Down Expand Up @@ -344,11 +341,11 @@ where
/// sequence of characters or errors.
/// NOTE: Raw strings do not perform any explicit character escaping, here we
/// only translate CRLF to LF and produce errors on bare CR.
fn unescape_raw_str_or_byte_str<F>(literal_text: &str, mode: Mode, callback: &mut F)
fn unescape_raw_str_or_raw_byte_str<F>(literal_text: &str, mode: Mode, callback: &mut F)
where
F: FnMut(Range<usize>, Result<char, EscapeError>),
{
assert!(mode.in_double_quotes());
debug_assert!(mode == Mode::RawStr || mode == Mode::RawByteStr);
let initial_len = literal_text.len();

let mut chars = literal_text.chars();
Expand All @@ -368,7 +365,7 @@ where

fn byte_from_char(c: char) -> u8 {
let res = c as u32;
assert!(res <= u8::MAX as u32, "guaranteed because of Mode::ByteStr");
debug_assert!(res <= u8::MAX as u32, "guaranteed because of Mode::ByteStr");
res as u8
}

Expand Down

0 comments on commit 94cb5e8

Please sign in to comment.