Skip to content

Commit

Permalink
Optimize escape_ascii
Browse files Browse the repository at this point in the history
  • Loading branch information
clarfonthey committed Sep 1, 2024
1 parent a48861a commit 3af7d57
Showing 1 changed file with 94 additions and 23 deletions.
117 changes: 94 additions & 23 deletions library/core/src/escape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,38 +18,109 @@ const fn backslash<const N: usize>(a: ascii::Char) -> ([ascii::Char; N], Range<u
(output, 0..2)
}

#[inline]
const fn hex_escape<const N: usize>(byte: u8) -> ([ascii::Char; N], Range<u8>) {
const { assert!(N >= 4) };

let mut output = [ascii::Char::Null; N];

let hi = HEX_DIGITS[(byte >> 4) as usize];
let lo = HEX_DIGITS[(byte & 0xf) as usize];

output[0] = ascii::Char::ReverseSolidus;
output[1] = ascii::Char::SmallX;
output[2] = hi;
output[3] = lo;

(output, 0..4)
}

#[inline]
const fn verbatim<const N: usize>(a: ascii::Char) -> ([ascii::Char; N], Range<u8>) {
const { assert!(N >= 1) };

let mut output = [ascii::Char::Null; N];

output[0] = a;

(output, 0..1)
}

/// Escapes an ASCII character.
///
/// Returns a buffer and the length of the escaped representation.
const fn escape_ascii<const N: usize>(byte: u8) -> ([ascii::Char; N], Range<u8>) {
const { assert!(N >= 4) };

match byte {
b'\t' => backslash(ascii::Char::SmallT),
b'\r' => backslash(ascii::Char::SmallR),
b'\n' => backslash(ascii::Char::SmallN),
b'\\' => backslash(ascii::Char::ReverseSolidus),
b'\'' => backslash(ascii::Char::Apostrophe),
b'\"' => backslash(ascii::Char::QuotationMark),
byte => {
let mut output = [ascii::Char::Null; N];

if let Some(c) = byte.as_ascii()
&& !byte.is_ascii_control()
{
output[0] = c;
(output, 0..1)
} else {
let hi = HEX_DIGITS[(byte >> 4) as usize];
let lo = HEX_DIGITS[(byte & 0xf) as usize];
#[cfg(feature = "optimize_for_size")]
{
match byte {
b'\t' => backslash(ascii::Char::SmallT),
b'\r' => backslash(ascii::Char::SmallR),
b'\n' => backslash(ascii::Char::SmallN),
b'\\' => backslash(ascii::Char::ReverseSolidus),
b'\'' => backslash(ascii::Char::Apostrophe),
b'\"' => backslash(ascii::Char::QuotationMark),
0x00..=0x1F => hex_escape(byte),
_ => match ascii::Char::from_u8(byte) {
Some(a) => verbatim(a),
None => hex_escape(byte),
},
}
}

#[cfg(not(feature = "optimize_for_size"))]
{
/// Lookup table helps us determine how to display character.
///
/// Since ASCII characters will always be 7 bits, we can exploit this to store the 8th bit to
/// indicate whether the result is escaped or unescaped.
///
/// We additionally use 0x80 (escaped NUL character) to indicate hex-escaped bytes, since
/// escaped NUL will not occur.
const LOOKUP: [u8; 256] = {
let mut arr = [0; 256];
let mut idx = 0;
loop {
arr[idx as usize] = match idx {
// use 8th bit to indicate escaped
b'\t' => 0x80 | b't',
b'\r' => 0x80 | b'r',
b'\n' => 0x80 | b'n',
b'\\' => 0x80 | b'\\',
b'\'' => 0x80 | b'\'',
b'"' => 0x80 | b'"',

// use NUL to indicate hex-escaped
0x00..=0x1F | 0x7F..=0xFF => 0x80 | b'\0',

_ => idx,
};
if idx == 255 {
break;
}
idx += 1;
}
arr
};

output[0] = ascii::Char::ReverseSolidus;
output[1] = ascii::Char::SmallX;
output[2] = hi;
output[3] = lo;
let lookup = LOOKUP[byte as usize];

(output, 0..4)
// 8th bit indicates escape
let lookup_escaped = lookup & 0x80 != 0;

// SAFETY: We explicitly mask out the eighth bit to get a 7-bit ASCII character.
let lookup_ascii = unsafe { ascii::Char::from_u8_unchecked(lookup & 0x7F) };

if lookup_escaped {
// NUL indicates hex-escaped
if matches!(lookup_ascii, ascii::Char::Null) {
hex_escape(byte)
} else {
backslash(lookup_ascii)
}
} else {
verbatim(lookup_ascii)
}
}
}
Expand Down

0 comments on commit 3af7d57

Please sign in to comment.