From f1455245bfe62b8f5fe9e03298ef12967bb798e6 Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Sat, 9 May 2020 17:38:42 -0400 Subject: [PATCH 1/9] Convert header::name to use MaybeUninit The use of MaybeUninit to replace the depreciated mem::uninitialized() is supported because the minimum supported version of Rust is 1.39. This change moves most of the uninitialized memory related use of unsafe into the parse_hdr() function. --- src/header/name.rs | 186 +++++++++++++++++++++++---------------------- 1 file changed, 94 insertions(+), 92 deletions(-) diff --git a/src/header/name.rs b/src/header/name.rs index 4d70ff30..270c3bc5 100644 --- a/src/header/name.rs +++ b/src/header/name.rs @@ -5,8 +5,9 @@ use std::borrow::Borrow; use std::error::Error; use std::convert::{TryFrom}; use std::hash::{Hash, Hasher}; +use std::mem::MaybeUninit; use std::str::FromStr; -use std::{fmt, mem}; +use std::fmt; /// Represents an HTTP header field name /// @@ -1045,7 +1046,7 @@ macro_rules! eq { $($cmp) && * }; (($($cmp:expr,)*) $v:ident[$n:expr] == $a:tt $($rest:tt)*) => { - eq!(($($cmp,)* $v[$n] == $a,) $v[$n+1] == $($rest)*) + eq!(($($cmp,)* unsafe {*($v[$n].as_ptr())} == $a,) $v[$n+1] == $($rest)*) }; ($v:ident == $($rest:tt)+) => { eq!(() $v[0] == $($rest)+) @@ -1061,15 +1062,15 @@ macro_rules! eq { /// See /~https://github.com/DenisKolodin/yew/issues/478 fn parse_hdr<'a>( data: &'a [u8], - b: &'a mut [u8; 64], + b: &'a mut [MaybeUninit; SCRATCH_BUF_SIZE], table: &[u8; 256], ) -> Result, InvalidHeaderName> { use self::StandardHeader::*; let len = data.len(); - let validate = |buf: &'a [u8], len: usize| { - let buf = &buf[..len]; + let validate = |buf: &'a [MaybeUninit]| { + let buf = unsafe {slice_assume_init(buf)}; if buf.iter().any(|&b| b == 0) { Err(InvalidHeaderName::new()) } else { @@ -1079,41 +1080,41 @@ fn parse_hdr<'a>( macro_rules! to_lower { - ($d:ident, $src:ident, 1) => { $d[0] = table[$src[0] as usize]; }; - ($d:ident, $src:ident, 2) => { to_lower!($d, $src, 1); $d[1] = table[$src[1] as usize]; }; - ($d:ident, $src:ident, 3) => { to_lower!($d, $src, 2); $d[2] = table[$src[2] as usize]; }; - ($d:ident, $src:ident, 4) => { to_lower!($d, $src, 3); $d[3] = table[$src[3] as usize]; }; - ($d:ident, $src:ident, 5) => { to_lower!($d, $src, 4); $d[4] = table[$src[4] as usize]; }; - ($d:ident, $src:ident, 6) => { to_lower!($d, $src, 5); $d[5] = table[$src[5] as usize]; }; - ($d:ident, $src:ident, 7) => { to_lower!($d, $src, 6); $d[6] = table[$src[6] as usize]; }; - ($d:ident, $src:ident, 8) => { to_lower!($d, $src, 7); $d[7] = table[$src[7] as usize]; }; - ($d:ident, $src:ident, 9) => { to_lower!($d, $src, 8); $d[8] = table[$src[8] as usize]; }; - ($d:ident, $src:ident, 10) => { to_lower!($d, $src, 9); $d[9] = table[$src[9] as usize]; }; - ($d:ident, $src:ident, 11) => { to_lower!($d, $src, 10); $d[10] = table[$src[10] as usize]; }; - ($d:ident, $src:ident, 12) => { to_lower!($d, $src, 11); $d[11] = table[$src[11] as usize]; }; - ($d:ident, $src:ident, 13) => { to_lower!($d, $src, 12); $d[12] = table[$src[12] as usize]; }; - ($d:ident, $src:ident, 14) => { to_lower!($d, $src, 13); $d[13] = table[$src[13] as usize]; }; - ($d:ident, $src:ident, 15) => { to_lower!($d, $src, 14); $d[14] = table[$src[14] as usize]; }; - ($d:ident, $src:ident, 16) => { to_lower!($d, $src, 15); $d[15] = table[$src[15] as usize]; }; - ($d:ident, $src:ident, 17) => { to_lower!($d, $src, 16); $d[16] = table[$src[16] as usize]; }; - ($d:ident, $src:ident, 18) => { to_lower!($d, $src, 17); $d[17] = table[$src[17] as usize]; }; - ($d:ident, $src:ident, 19) => { to_lower!($d, $src, 18); $d[18] = table[$src[18] as usize]; }; - ($d:ident, $src:ident, 20) => { to_lower!($d, $src, 19); $d[19] = table[$src[19] as usize]; }; - ($d:ident, $src:ident, 21) => { to_lower!($d, $src, 20); $d[20] = table[$src[20] as usize]; }; - ($d:ident, $src:ident, 22) => { to_lower!($d, $src, 21); $d[21] = table[$src[21] as usize]; }; - ($d:ident, $src:ident, 23) => { to_lower!($d, $src, 22); $d[22] = table[$src[22] as usize]; }; - ($d:ident, $src:ident, 24) => { to_lower!($d, $src, 23); $d[23] = table[$src[23] as usize]; }; - ($d:ident, $src:ident, 25) => { to_lower!($d, $src, 24); $d[24] = table[$src[24] as usize]; }; - ($d:ident, $src:ident, 26) => { to_lower!($d, $src, 25); $d[25] = table[$src[25] as usize]; }; - ($d:ident, $src:ident, 27) => { to_lower!($d, $src, 26); $d[26] = table[$src[26] as usize]; }; - ($d:ident, $src:ident, 28) => { to_lower!($d, $src, 27); $d[27] = table[$src[27] as usize]; }; - ($d:ident, $src:ident, 29) => { to_lower!($d, $src, 28); $d[28] = table[$src[28] as usize]; }; - ($d:ident, $src:ident, 30) => { to_lower!($d, $src, 29); $d[29] = table[$src[29] as usize]; }; - ($d:ident, $src:ident, 31) => { to_lower!($d, $src, 30); $d[30] = table[$src[30] as usize]; }; - ($d:ident, $src:ident, 32) => { to_lower!($d, $src, 31); $d[31] = table[$src[31] as usize]; }; - ($d:ident, $src:ident, 33) => { to_lower!($d, $src, 32); $d[32] = table[$src[32] as usize]; }; - ($d:ident, $src:ident, 34) => { to_lower!($d, $src, 33); $d[33] = table[$src[33] as usize]; }; - ($d:ident, $src:ident, 35) => { to_lower!($d, $src, 34); $d[34] = table[$src[34] as usize]; }; + ($d:ident, $src:ident, 1) => { unsafe {*($d[0].as_mut_ptr()) = table[$src[0] as usize];} }; + ($d:ident, $src:ident, 2) => { to_lower!($d, $src, 1); unsafe {*($d[1].as_mut_ptr()) = table[$src[1] as usize];} }; + ($d:ident, $src:ident, 3) => { to_lower!($d, $src, 2); unsafe {*($d[2].as_mut_ptr()) = table[$src[2] as usize];} }; + ($d:ident, $src:ident, 4) => { to_lower!($d, $src, 3); unsafe {*($d[3].as_mut_ptr()) = table[$src[3] as usize];} }; + ($d:ident, $src:ident, 5) => { to_lower!($d, $src, 4); unsafe {*($d[4].as_mut_ptr()) = table[$src[4] as usize];} }; + ($d:ident, $src:ident, 6) => { to_lower!($d, $src, 5); unsafe {*($d[5].as_mut_ptr()) = table[$src[5] as usize];} }; + ($d:ident, $src:ident, 7) => { to_lower!($d, $src, 6); unsafe {*($d[6].as_mut_ptr()) = table[$src[6] as usize];} }; + ($d:ident, $src:ident, 8) => { to_lower!($d, $src, 7); unsafe {*($d[7].as_mut_ptr()) = table[$src[7] as usize];} }; + ($d:ident, $src:ident, 9) => { to_lower!($d, $src, 8); unsafe {*($d[8].as_mut_ptr()) = table[$src[8] as usize];} }; + ($d:ident, $src:ident, 10) => { to_lower!($d, $src, 9); unsafe {*($d[9].as_mut_ptr()) = table[$src[9] as usize];} }; + ($d:ident, $src:ident, 11) => { to_lower!($d, $src, 10); unsafe {*($d[10].as_mut_ptr()) = table[$src[10] as usize];} }; + ($d:ident, $src:ident, 12) => { to_lower!($d, $src, 11); unsafe {*($d[11].as_mut_ptr()) = table[$src[11] as usize];} }; + ($d:ident, $src:ident, 13) => { to_lower!($d, $src, 12); unsafe {*($d[12].as_mut_ptr()) = table[$src[12] as usize];} }; + ($d:ident, $src:ident, 14) => { to_lower!($d, $src, 13); unsafe {*($d[13].as_mut_ptr()) = table[$src[13] as usize];} }; + ($d:ident, $src:ident, 15) => { to_lower!($d, $src, 14); unsafe {*($d[14].as_mut_ptr()) = table[$src[14] as usize];} }; + ($d:ident, $src:ident, 16) => { to_lower!($d, $src, 15); unsafe {*($d[15].as_mut_ptr()) = table[$src[15] as usize];} }; + ($d:ident, $src:ident, 17) => { to_lower!($d, $src, 16); unsafe {*($d[16].as_mut_ptr()) = table[$src[16] as usize];} }; + ($d:ident, $src:ident, 18) => { to_lower!($d, $src, 17); unsafe {*($d[17].as_mut_ptr()) = table[$src[17] as usize];} }; + ($d:ident, $src:ident, 19) => { to_lower!($d, $src, 18); unsafe {*($d[18].as_mut_ptr()) = table[$src[18] as usize];} }; + ($d:ident, $src:ident, 20) => { to_lower!($d, $src, 19); unsafe {*($d[19].as_mut_ptr()) = table[$src[19] as usize];} }; + ($d:ident, $src:ident, 21) => { to_lower!($d, $src, 20); unsafe {*($d[20].as_mut_ptr()) = table[$src[20] as usize];} }; + ($d:ident, $src:ident, 22) => { to_lower!($d, $src, 21); unsafe {*($d[21].as_mut_ptr()) = table[$src[21] as usize];} }; + ($d:ident, $src:ident, 23) => { to_lower!($d, $src, 22); unsafe {*($d[22].as_mut_ptr()) = table[$src[22] as usize];} }; + ($d:ident, $src:ident, 24) => { to_lower!($d, $src, 23); unsafe {*($d[23].as_mut_ptr()) = table[$src[23] as usize];} }; + ($d:ident, $src:ident, 25) => { to_lower!($d, $src, 24); unsafe {*($d[24].as_mut_ptr()) = table[$src[24] as usize];} }; + ($d:ident, $src:ident, 26) => { to_lower!($d, $src, 25); unsafe {*($d[25].as_mut_ptr()) = table[$src[25] as usize];} }; + ($d:ident, $src:ident, 27) => { to_lower!($d, $src, 26); unsafe {*($d[26].as_mut_ptr()) = table[$src[26] as usize];} }; + ($d:ident, $src:ident, 28) => { to_lower!($d, $src, 27); unsafe {*($d[27].as_mut_ptr()) = table[$src[27] as usize];} }; + ($d:ident, $src:ident, 29) => { to_lower!($d, $src, 28); unsafe {*($d[28].as_mut_ptr()) = table[$src[28] as usize];} }; + ($d:ident, $src:ident, 30) => { to_lower!($d, $src, 29); unsafe {*($d[29].as_mut_ptr()) = table[$src[29] as usize];} }; + ($d:ident, $src:ident, 31) => { to_lower!($d, $src, 30); unsafe {*($d[30].as_mut_ptr()) = table[$src[30] as usize];} }; + ($d:ident, $src:ident, 32) => { to_lower!($d, $src, 31); unsafe {*($d[31].as_mut_ptr()) = table[$src[31] as usize];} }; + ($d:ident, $src:ident, 33) => { to_lower!($d, $src, 32); unsafe {*($d[32].as_mut_ptr()) = table[$src[32] as usize];} }; + ($d:ident, $src:ident, 34) => { to_lower!($d, $src, 33); unsafe {*($d[33].as_mut_ptr()) = table[$src[33] as usize];} }; + ($d:ident, $src:ident, 35) => { to_lower!($d, $src, 34); unsafe {*($d[34].as_mut_ptr()) = table[$src[34] as usize];} }; } assert!(len < super::MAX_HEADER_NAME_LEN, @@ -1128,7 +1129,7 @@ fn parse_hdr<'a>( if eq!(b == b't' b'e') { Ok(Te.into()) } else { - validate(b, len) + validate(&b[..len]) } } 3 => { @@ -1141,7 +1142,7 @@ fn parse_hdr<'a>( } else if eq!(b == b'd' b'n' b't') { Ok(Dnt.into()) } else { - validate(b, len) + validate(&b[..len]) } } 4 => { @@ -1160,7 +1161,7 @@ fn parse_hdr<'a>( } else if eq!(b == b'v' b'a' b'r' b'y') { Ok(Vary.into()) } else { - validate(b, len) + validate(&b[..len]) } } 5 => { @@ -1171,7 +1172,7 @@ fn parse_hdr<'a>( } else if eq!(b == b'r' b'a' b'n' b'g' b'e') { Ok(Range.into()) } else { - validate(b, len) + validate(&b[..len]) } } 6 => { @@ -1187,13 +1188,13 @@ fn parse_hdr<'a>( return Ok(Origin.into()); } else if eq!(b == b'p' b'r' b'a' b'g' b'm' b'a') { return Ok(Pragma.into()); - } else if b[0] == b's' { + } else if unsafe {*(b[0].as_ptr())} == b's' { if eq!(b[1] == b'e' b'r' b'v' b'e' b'r') { return Ok(Server.into()); } } - validate(b, len) + validate(&b[..len]) } 7 => { to_lower!(b, data, 7); @@ -1213,7 +1214,7 @@ fn parse_hdr<'a>( } else if eq!(b == b'w' b'a' b'r' b'n' b'i' b'n' b'g') { Ok(Warning.into()) } else { - validate(b, len) + validate(&b[..len]) } } 8 => { @@ -1229,7 +1230,7 @@ fn parse_hdr<'a>( return Ok(Location.into()); } - validate(b, len) + validate(&b[..len]) } 9 => { to_lower!(b, data, 9); @@ -1237,7 +1238,7 @@ fn parse_hdr<'a>( if eq!(b == b'f' b'o' b'r' b'w' b'a' b'r' b'd' b'e' b'd') { Ok(Forwarded.into()) } else { - validate(b, len) + validate(&b[..len]) } } 10 => { @@ -1250,7 +1251,7 @@ fn parse_hdr<'a>( } else if eq!(b == b'u' b's' b'e' b'r' b'-' b'a' b'g' b'e' b'n' b't') { Ok(UserAgent.into()) } else { - validate(b, len) + validate(&b[..len]) } } 11 => { @@ -1259,7 +1260,7 @@ fn parse_hdr<'a>( if eq!(b == b'r' b'e' b't' b'r' b'y' b'-' b'a' b'f' b't' b'e' b'r') { Ok(RetryAfter.into()) } else { - validate(b, len) + validate(&b[..len]) } } 12 => { @@ -1270,19 +1271,19 @@ fn parse_hdr<'a>( } else if eq!(b == b'm' b'a' b'x' b'-' b'f' b'o' b'r' b'w' b'a' b'r' b'd' b's') { Ok(MaxForwards.into()) } else { - validate(b, len) + validate(&b[..len]) } } 13 => { to_lower!(b, data, 13); - if b[0] == b'a' { + if unsafe {*(b[0].as_ptr())} == b'a' { if eq!(b[1] == b'c' b'c' b'e' b'p' b't' b'-' b'r' b'a' b'n' b'g' b'e' b's') { return Ok(AcceptRanges.into()); } else if eq!(b[1] == b'u' b't' b'h' b'o' b'r' b'i' b'z' b'a' b't' b'i' b'o' b'n') { return Ok(Authorization.into()); } - } else if b[0] == b'c' { + } else if unsafe {*(b[0].as_ptr())} == b'c' { if eq!(b[1] == b'a' b'c' b'h' b'e' b'-' b'c' b'o' b'n' b't' b'r' b'o' b'l') { return Ok(CacheControl.into()); } else if eq!(b[1] == b'o' b'n' b't' b'e' b'n' b't' b'-' b'r' b'a' b'n' b'g' b'e' ) @@ -1295,7 +1296,7 @@ fn parse_hdr<'a>( return Ok(LastModified.into()); } - validate(b, len) + validate(&b[..len]) } 14 => { to_lower!(b, data, 14); @@ -1306,7 +1307,7 @@ fn parse_hdr<'a>( { Ok(ContentLength.into()) } else { - validate(b, len) + validate(&b[..len]) } } 15 => { @@ -1327,7 +1328,7 @@ fn parse_hdr<'a>( return Ok(ReferrerPolicy.into()) } - validate(b, len) + validate(&b[..len]) } 16 => { to_lower!(b, data, 16); @@ -1346,7 +1347,7 @@ fn parse_hdr<'a>( return Ok(XXssProtection.into()) } - validate(b, len) + validate(&b[..len]) } 17 => { to_lower!(b, data, 17); @@ -1358,7 +1359,7 @@ fn parse_hdr<'a>( } else if eq!(b == b's' b'e' b'c' b'-' b'w' b'e' b'b' b's' b'o' b'c' b'k' b'e' b't' b'-' b'k' b'e' b'y') { Ok(SecWebSocketKey.into()) } else { - validate(b, len) + validate(&b[..len]) } } 18 => { @@ -1367,7 +1368,7 @@ fn parse_hdr<'a>( if eq!(b == b'p' b'r' b'o' b'x' b'y' b'-' b'a' b'u' b't' b'h' b'e' b'n' b't' b'i' b'c' b'a' b't' b'e') { Ok(ProxyAuthenticate.into()) } else { - validate(b, len) + validate(&b[..len]) } } 19 => { @@ -1380,7 +1381,7 @@ fn parse_hdr<'a>( } else if eq!(b == b'p' b'r' b'o' b'x' b'y' b'-' b'a' b'u' b't' b'h' b'o' b'r' b'i' b'z' b'a' b't' b'i' b'o' b'n') { Ok(ProxyAuthorization.into()) } else { - validate(b, len) + validate(&b[..len]) } } 20 => { @@ -1389,7 +1390,7 @@ fn parse_hdr<'a>( if eq!(b == b's' b'e' b'c' b'-' b'w' b'e' b'b' b's' b'o' b'c' b'k' b'e' b't' b'-' b'a' b'c' b'c' b'e' b'p' b't') { Ok(SecWebSocketAccept.into()) } else { - validate(b, len) + validate(&b[..len]) } } 21 => { @@ -1398,7 +1399,7 @@ fn parse_hdr<'a>( if eq!(b == b's' b'e' b'c' b'-' b'w' b'e' b'b' b's' b'o' b'c' b'k' b'e' b't' b'-' b'v' b'e' b'r' b's' b'i' b'o' b'n') { Ok(SecWebSocketVersion.into()) } else { - validate(b, len) + validate(&b[..len]) } } 22 => { @@ -1413,7 +1414,7 @@ fn parse_hdr<'a>( } else if eq!(b == b's' b'e' b'c' b'-' b'w' b'e' b'b' b's' b'o' b'c' b'k' b'e' b't' b'-' b'p' b'r' b'o' b't' b'o' b'c' b'o' b'l') { Ok(SecWebSocketProtocol.into()) } else { - validate(b, len) + validate(&b[..len]) } } 23 => { @@ -1422,7 +1423,7 @@ fn parse_hdr<'a>( if eq!(b == b'c' b'o' b'n' b't' b'e' b'n' b't' b'-' b's' b'e' b'c' b'u' b'r' b'i' b't' b'y' b'-' b'p' b'o' b'l' b'i' b'c' b'y') { Ok(ContentSecurityPolicy.into()) } else { - validate(b, len) + validate(&b[..len]) } } 24 => { @@ -1431,7 +1432,7 @@ fn parse_hdr<'a>( if eq!(b == b's' b'e' b'c' b'-' b'w' b'e' b'b' b's' b'o' b'c' b'k' b'e' b't' b'-' b'e' b'x' b't' b'e' b'n' b's' b'i' b'o' b'n' b's') { Ok(SecWebSocketExtensions.into()) } else { - validate(b, len) + validate(&b[..len]) } } 25 => { @@ -1442,7 +1443,7 @@ fn parse_hdr<'a>( } else if eq!(b == b'u' b'p' b'g' b'r' b'a' b'd' b'e' b'-' b'i' b'n' b's' b'e' b'c' b'u' b'r' b'e' b'-' b'r' b'e' b'q' b'u' b'e' b's' b't' b's') { Ok(UpgradeInsecureRequests.into()) } else { - validate(b, len) + validate(&b[..len]) } } 27 => { @@ -1453,7 +1454,7 @@ fn parse_hdr<'a>( } else if eq!(b == b'p' b'u' b'b' b'l' b'i' b'c' b'-' b'k' b'e' b'y' b'-' b'p' b'i' b'n' b's' b'-' b'r' b'e' b'p' b'o' b'r' b't' b'-' b'o' b'n' b'l' b'y') { Ok(PublicKeyPinsReportOnly.into()) } else { - validate(b, len) + validate(&b[..len]) } } 28 => { @@ -1467,7 +1468,7 @@ fn parse_hdr<'a>( } } - validate(b, len) + validate(&b[..len]) } 29 => { to_lower!(b, data, 29); @@ -1480,7 +1481,7 @@ fn parse_hdr<'a>( } } - validate(b, len) + validate(&b[..len]) } 30 => { to_lower!(b, data, 30); @@ -1488,7 +1489,7 @@ fn parse_hdr<'a>( if eq!(b == b'a' b'c' b'c' b'e' b's' b's' b'-' b'c' b'o' b'n' b't' b'r' b'o' b'l' b'-' b'r' b'e' b'q' b'u' b'e' b's' b't' b'-' b'h' b'e' b'a' b'd' b'e' b'r' b's') { Ok(AccessControlRequestHeaders.into()) } else { - validate(b, len) + validate(&b[..len]) } } 32 => { @@ -1497,7 +1498,7 @@ fn parse_hdr<'a>( if eq!(b == b'a' b'c' b'c' b'e' b's' b's' b'-' b'c' b'o' b'n' b't' b'r' b'o' b'l' b'-' b'a' b'l' b'l' b'o' b'w' b'-' b'c' b'r' b'e' b'd' b'e' b'n' b't' b'i' b'a' b'l' b's') { Ok(AccessControlAllowCredentials.into()) } else { - validate(b, len) + validate(&b[..len]) } } 35 => { @@ -1506,16 +1507,16 @@ fn parse_hdr<'a>( if eq!(b == b'c' b'o' b'n' b't' b'e' b'n' b't' b'-' b's' b'e' b'c' b'u' b'r' b'i' b't' b'y' b'-' b'p' b'o' b'l' b'i' b'c' b'y' b'-' b'r' b'e' b'p' b'o' b'r' b't' b'-' b'o' b'n' b'l' b'y') { Ok(ContentSecurityPolicyReportOnly.into()) } else { - validate(b, len) + validate(&b[..len]) } } _ => { if len < 64 { for i in 0..len { - b[i] = table[data[i] as usize]; + unsafe {*(b[i].as_mut_ptr()) = table[data[i] as usize]; } } - validate(b, len) + validate(&b[..len]) } else { Ok(HdrName::custom(data, false)) } @@ -1527,7 +1528,7 @@ fn parse_hdr<'a>( /// This version works best in debug mode in wasm fn parse_hdr<'a>( data: &'a [u8], - b: &'a mut [u8; 64], + b: &'a mut [MaybeUninit; SCRATCH_BUF_SIZE], table: &[u8; 256], ) -> Result, InvalidHeaderName> { use self::StandardHeader::*; @@ -1554,7 +1555,8 @@ fn parse_hdr<'a>( len if len > 64 => Ok(HdrName::custom(data, false)), len => { // Read from data into the buffer - transforming using `table` as we go - data.iter().zip(b.iter_mut()).for_each(|(index, out)| *out = table[*index as usize]); + data.iter().zip(b.iter_mut()).for_each(|(index, out)| unsafe {*(out.as_mut_ptr()) = table[*index as usize]}); + let b = unsafe {slice_assume_init(&b[..len])}; match &b[0..len] { b"te" => Ok(Te.into()), b"age" => Ok(Age.into()), @@ -1655,10 +1657,8 @@ impl HeaderName { /// Converts a slice of bytes to an HTTP header name. /// /// This function normalizes the input. - #[allow(deprecated)] pub fn from_bytes(src: &[u8]) -> Result { - #[allow(deprecated)] - let mut buf = unsafe { mem::uninitialized() }; + let mut buf = uninit_u8_array(); match parse_hdr(src, &mut buf, &HEADER_CHARS)?.inner { Repr::Standard(std) => Ok(std.into()), Repr::Custom(MaybeLower { buf, lower: true }) => { @@ -1705,10 +1705,8 @@ impl HeaderName { /// // Parsing a header that contains uppercase characters /// assert!(HeaderName::from_lowercase(b"Content-Length").is_err()); /// ``` - #[allow(deprecated)] pub fn from_lowercase(src: &[u8]) -> Result { - #[allow(deprecated)] - let mut buf = unsafe { mem::uninitialized() }; + let mut buf = uninit_u8_array(); match parse_hdr(src, &mut buf, &HEADER_CHARS_H2)?.inner { Repr::Standard(std) => Ok(std.into()), Repr::Custom(MaybeLower { buf, lower: true }) => { @@ -1765,11 +1763,9 @@ impl HeaderName { /// let a = HeaderName::from_static("foobar"); /// let b = HeaderName::from_static("FOOBAR"); // This line panics! /// ``` - #[allow(deprecated)] pub fn from_static(src: &'static str) -> HeaderName { let bytes = src.as_bytes(); - #[allow(deprecated)] - let mut buf = unsafe { mem::uninitialized() }; + let mut buf = uninit_u8_array(); match parse_hdr(bytes, &mut buf, &HEADER_CHARS_H2) { Ok(hdr_name) => match hdr_name.inner { Repr::Standard(std) => std.into(), @@ -2021,23 +2017,19 @@ impl<'a> HdrName<'a> { } } - #[allow(deprecated)] pub fn from_bytes(hdr: &[u8], f: F) -> Result where F: FnOnce(HdrName<'_>) -> U, { - #[allow(deprecated)] - let mut buf = unsafe { mem::uninitialized() }; + let mut buf = uninit_u8_array(); let hdr = parse_hdr(hdr, &mut buf, &HEADER_CHARS)?; Ok(f(hdr)) } - #[allow(deprecated)] pub fn from_static(hdr: &'static str, f: F) -> U where F: FnOnce(HdrName<'_>) -> U, { - #[allow(deprecated)] - let mut buf = unsafe { mem::uninitialized() }; + let mut buf = uninit_u8_array(); let hdr = parse_hdr(hdr.as_bytes(), &mut buf, &HEADER_CHARS).expect("static str is invalid name"); f(hdr) @@ -2137,6 +2129,16 @@ fn eq_ignore_ascii_case(lower: &[u8], s: &[u8]) -> bool { }) } +const SCRATCH_BUF_SIZE: usize = 64; + +fn uninit_u8_array() -> [MaybeUninit; SCRATCH_BUF_SIZE] { + unsafe { MaybeUninit::<[MaybeUninit; SCRATCH_BUF_SIZE]>::uninit().assume_init() } +} + +unsafe fn slice_assume_init(slice: &[MaybeUninit]) -> &[T] { + &*(slice as *const [MaybeUninit] as *const [T]) + } + #[cfg(test)] mod tests { use super::*; From 0a9f51b7c326d90743c8e5775c3596e11465546a Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Sun, 10 May 2020 08:59:02 -0400 Subject: [PATCH 2/9] Refactor MaybeUninit support in mod header::name The general pattern follows the "Initilizing an array element-by-element" example in the documentation of MaybeUninit. This change removes some unnecessary use of unsafe. --- src/header/name.rs | 103 ++++++++++++++++++++++++++------------------- 1 file changed, 60 insertions(+), 43 deletions(-) diff --git a/src/header/name.rs b/src/header/name.rs index 270c3bc5..0658e76e 100644 --- a/src/header/name.rs +++ b/src/header/name.rs @@ -1046,7 +1046,7 @@ macro_rules! eq { $($cmp) && * }; (($($cmp:expr,)*) $v:ident[$n:expr] == $a:tt $($rest:tt)*) => { - eq!(($($cmp,)* unsafe {*($v[$n].as_ptr())} == $a,) $v[$n+1] == $($rest)*) + eq!(($($cmp,)* unsafe {assume_init_eq($v[$n], $a)} ,) $v[$n+1] == $($rest)*) }; ($v:ident == $($rest:tt)+) => { eq!(() $v[0] == $($rest)+) @@ -1080,41 +1080,41 @@ fn parse_hdr<'a>( macro_rules! to_lower { - ($d:ident, $src:ident, 1) => { unsafe {*($d[0].as_mut_ptr()) = table[$src[0] as usize];} }; - ($d:ident, $src:ident, 2) => { to_lower!($d, $src, 1); unsafe {*($d[1].as_mut_ptr()) = table[$src[1] as usize];} }; - ($d:ident, $src:ident, 3) => { to_lower!($d, $src, 2); unsafe {*($d[2].as_mut_ptr()) = table[$src[2] as usize];} }; - ($d:ident, $src:ident, 4) => { to_lower!($d, $src, 3); unsafe {*($d[3].as_mut_ptr()) = table[$src[3] as usize];} }; - ($d:ident, $src:ident, 5) => { to_lower!($d, $src, 4); unsafe {*($d[4].as_mut_ptr()) = table[$src[4] as usize];} }; - ($d:ident, $src:ident, 6) => { to_lower!($d, $src, 5); unsafe {*($d[5].as_mut_ptr()) = table[$src[5] as usize];} }; - ($d:ident, $src:ident, 7) => { to_lower!($d, $src, 6); unsafe {*($d[6].as_mut_ptr()) = table[$src[6] as usize];} }; - ($d:ident, $src:ident, 8) => { to_lower!($d, $src, 7); unsafe {*($d[7].as_mut_ptr()) = table[$src[7] as usize];} }; - ($d:ident, $src:ident, 9) => { to_lower!($d, $src, 8); unsafe {*($d[8].as_mut_ptr()) = table[$src[8] as usize];} }; - ($d:ident, $src:ident, 10) => { to_lower!($d, $src, 9); unsafe {*($d[9].as_mut_ptr()) = table[$src[9] as usize];} }; - ($d:ident, $src:ident, 11) => { to_lower!($d, $src, 10); unsafe {*($d[10].as_mut_ptr()) = table[$src[10] as usize];} }; - ($d:ident, $src:ident, 12) => { to_lower!($d, $src, 11); unsafe {*($d[11].as_mut_ptr()) = table[$src[11] as usize];} }; - ($d:ident, $src:ident, 13) => { to_lower!($d, $src, 12); unsafe {*($d[12].as_mut_ptr()) = table[$src[12] as usize];} }; - ($d:ident, $src:ident, 14) => { to_lower!($d, $src, 13); unsafe {*($d[13].as_mut_ptr()) = table[$src[13] as usize];} }; - ($d:ident, $src:ident, 15) => { to_lower!($d, $src, 14); unsafe {*($d[14].as_mut_ptr()) = table[$src[14] as usize];} }; - ($d:ident, $src:ident, 16) => { to_lower!($d, $src, 15); unsafe {*($d[15].as_mut_ptr()) = table[$src[15] as usize];} }; - ($d:ident, $src:ident, 17) => { to_lower!($d, $src, 16); unsafe {*($d[16].as_mut_ptr()) = table[$src[16] as usize];} }; - ($d:ident, $src:ident, 18) => { to_lower!($d, $src, 17); unsafe {*($d[17].as_mut_ptr()) = table[$src[17] as usize];} }; - ($d:ident, $src:ident, 19) => { to_lower!($d, $src, 18); unsafe {*($d[18].as_mut_ptr()) = table[$src[18] as usize];} }; - ($d:ident, $src:ident, 20) => { to_lower!($d, $src, 19); unsafe {*($d[19].as_mut_ptr()) = table[$src[19] as usize];} }; - ($d:ident, $src:ident, 21) => { to_lower!($d, $src, 20); unsafe {*($d[20].as_mut_ptr()) = table[$src[20] as usize];} }; - ($d:ident, $src:ident, 22) => { to_lower!($d, $src, 21); unsafe {*($d[21].as_mut_ptr()) = table[$src[21] as usize];} }; - ($d:ident, $src:ident, 23) => { to_lower!($d, $src, 22); unsafe {*($d[22].as_mut_ptr()) = table[$src[22] as usize];} }; - ($d:ident, $src:ident, 24) => { to_lower!($d, $src, 23); unsafe {*($d[23].as_mut_ptr()) = table[$src[23] as usize];} }; - ($d:ident, $src:ident, 25) => { to_lower!($d, $src, 24); unsafe {*($d[24].as_mut_ptr()) = table[$src[24] as usize];} }; - ($d:ident, $src:ident, 26) => { to_lower!($d, $src, 25); unsafe {*($d[25].as_mut_ptr()) = table[$src[25] as usize];} }; - ($d:ident, $src:ident, 27) => { to_lower!($d, $src, 26); unsafe {*($d[26].as_mut_ptr()) = table[$src[26] as usize];} }; - ($d:ident, $src:ident, 28) => { to_lower!($d, $src, 27); unsafe {*($d[27].as_mut_ptr()) = table[$src[27] as usize];} }; - ($d:ident, $src:ident, 29) => { to_lower!($d, $src, 28); unsafe {*($d[28].as_mut_ptr()) = table[$src[28] as usize];} }; - ($d:ident, $src:ident, 30) => { to_lower!($d, $src, 29); unsafe {*($d[29].as_mut_ptr()) = table[$src[29] as usize];} }; - ($d:ident, $src:ident, 31) => { to_lower!($d, $src, 30); unsafe {*($d[30].as_mut_ptr()) = table[$src[30] as usize];} }; - ($d:ident, $src:ident, 32) => { to_lower!($d, $src, 31); unsafe {*($d[31].as_mut_ptr()) = table[$src[31] as usize];} }; - ($d:ident, $src:ident, 33) => { to_lower!($d, $src, 32); unsafe {*($d[32].as_mut_ptr()) = table[$src[32] as usize];} }; - ($d:ident, $src:ident, 34) => { to_lower!($d, $src, 33); unsafe {*($d[33].as_mut_ptr()) = table[$src[33] as usize];} }; - ($d:ident, $src:ident, 35) => { to_lower!($d, $src, 34); unsafe {*($d[34].as_mut_ptr()) = table[$src[34] as usize];} }; + ($d:ident, $src:ident, 1) => { $d[0] = MaybeUninit::new(table[$src[0] as usize]); }; + ($d:ident, $src:ident, 2) => { to_lower!($d, $src, 1); $d[1] = MaybeUninit::new(table[$src[1] as usize]); }; + ($d:ident, $src:ident, 3) => { to_lower!($d, $src, 2); $d[2] = MaybeUninit::new(table[$src[2] as usize]); }; + ($d:ident, $src:ident, 4) => { to_lower!($d, $src, 3); $d[3] = MaybeUninit::new(table[$src[3] as usize]); }; + ($d:ident, $src:ident, 5) => { to_lower!($d, $src, 4); $d[4] = MaybeUninit::new(table[$src[4] as usize]); }; + ($d:ident, $src:ident, 6) => { to_lower!($d, $src, 5); $d[5] = MaybeUninit::new(table[$src[5] as usize]); }; + ($d:ident, $src:ident, 7) => { to_lower!($d, $src, 6); $d[6] = MaybeUninit::new(table[$src[6] as usize]); }; + ($d:ident, $src:ident, 8) => { to_lower!($d, $src, 7); $d[7] = MaybeUninit::new(table[$src[7] as usize]); }; + ($d:ident, $src:ident, 9) => { to_lower!($d, $src, 8); $d[8] = MaybeUninit::new(table[$src[8] as usize]); }; + ($d:ident, $src:ident, 10) => { to_lower!($d, $src, 9); $d[9] = MaybeUninit::new(table[$src[9] as usize]); }; + ($d:ident, $src:ident, 11) => { to_lower!($d, $src, 10); $d[10] = MaybeUninit::new(table[$src[10] as usize]); }; + ($d:ident, $src:ident, 12) => { to_lower!($d, $src, 11); $d[11] = MaybeUninit::new(table[$src[11] as usize]); }; + ($d:ident, $src:ident, 13) => { to_lower!($d, $src, 12); $d[12] = MaybeUninit::new(table[$src[12] as usize]); }; + ($d:ident, $src:ident, 14) => { to_lower!($d, $src, 13); $d[13] = MaybeUninit::new(table[$src[13] as usize]); }; + ($d:ident, $src:ident, 15) => { to_lower!($d, $src, 14); $d[14] = MaybeUninit::new(table[$src[14] as usize]); }; + ($d:ident, $src:ident, 16) => { to_lower!($d, $src, 15); $d[15] = MaybeUninit::new(table[$src[15] as usize]); }; + ($d:ident, $src:ident, 17) => { to_lower!($d, $src, 16); $d[16] = MaybeUninit::new(table[$src[16] as usize]); }; + ($d:ident, $src:ident, 18) => { to_lower!($d, $src, 17); $d[17] = MaybeUninit::new(table[$src[17] as usize]); }; + ($d:ident, $src:ident, 19) => { to_lower!($d, $src, 18); $d[18] = MaybeUninit::new(table[$src[18] as usize]); }; + ($d:ident, $src:ident, 20) => { to_lower!($d, $src, 19); $d[19] = MaybeUninit::new(table[$src[19] as usize]); }; + ($d:ident, $src:ident, 21) => { to_lower!($d, $src, 20); $d[20] = MaybeUninit::new(table[$src[20] as usize]); }; + ($d:ident, $src:ident, 22) => { to_lower!($d, $src, 21); $d[21] = MaybeUninit::new(table[$src[21] as usize]); }; + ($d:ident, $src:ident, 23) => { to_lower!($d, $src, 22); $d[22] = MaybeUninit::new(table[$src[22] as usize]); }; + ($d:ident, $src:ident, 24) => { to_lower!($d, $src, 23); $d[23] = MaybeUninit::new(table[$src[23] as usize]); }; + ($d:ident, $src:ident, 25) => { to_lower!($d, $src, 24); $d[24] = MaybeUninit::new(table[$src[24] as usize]); }; + ($d:ident, $src:ident, 26) => { to_lower!($d, $src, 25); $d[25] = MaybeUninit::new(table[$src[25] as usize]); }; + ($d:ident, $src:ident, 27) => { to_lower!($d, $src, 26); $d[26] = MaybeUninit::new(table[$src[26] as usize]); }; + ($d:ident, $src:ident, 28) => { to_lower!($d, $src, 27); $d[27] = MaybeUninit::new(table[$src[27] as usize]); }; + ($d:ident, $src:ident, 29) => { to_lower!($d, $src, 28); $d[28] = MaybeUninit::new(table[$src[28] as usize]); }; + ($d:ident, $src:ident, 30) => { to_lower!($d, $src, 29); $d[29] = MaybeUninit::new(table[$src[29] as usize]); }; + ($d:ident, $src:ident, 31) => { to_lower!($d, $src, 30); $d[30] = MaybeUninit::new(table[$src[30] as usize]); }; + ($d:ident, $src:ident, 32) => { to_lower!($d, $src, 31); $d[31] = MaybeUninit::new(table[$src[31] as usize]); }; + ($d:ident, $src:ident, 33) => { to_lower!($d, $src, 32); $d[32] = MaybeUninit::new(table[$src[32] as usize]); }; + ($d:ident, $src:ident, 34) => { to_lower!($d, $src, 33); $d[33] = MaybeUninit::new(table[$src[33] as usize]); }; + ($d:ident, $src:ident, 35) => { to_lower!($d, $src, 34); $d[34] = MaybeUninit::new(table[$src[34] as usize]); }; } assert!(len < super::MAX_HEADER_NAME_LEN, @@ -1188,7 +1188,7 @@ fn parse_hdr<'a>( return Ok(Origin.into()); } else if eq!(b == b'p' b'r' b'a' b'g' b'm' b'a') { return Ok(Pragma.into()); - } else if unsafe {*(b[0].as_ptr())} == b's' { + } else if unsafe {assume_init_eq(b[0], b's')} { if eq!(b[1] == b'e' b'r' b'v' b'e' b'r') { return Ok(Server.into()); } @@ -1277,13 +1277,13 @@ fn parse_hdr<'a>( 13 => { to_lower!(b, data, 13); - if unsafe {*(b[0].as_ptr())} == b'a' { + if unsafe {assume_init_eq(b[0], b'a')} { if eq!(b[1] == b'c' b'c' b'e' b'p' b't' b'-' b'r' b'a' b'n' b'g' b'e' b's') { return Ok(AcceptRanges.into()); } else if eq!(b[1] == b'u' b't' b'h' b'o' b'r' b'i' b'z' b'a' b't' b'i' b'o' b'n') { return Ok(Authorization.into()); } - } else if unsafe {*(b[0].as_ptr())} == b'c' { + } else if unsafe {assume_init_eq(b[0], b'c')} { if eq!(b[1] == b'a' b'c' b'h' b'e' b'-' b'c' b'o' b'n' b't' b'r' b'o' b'l') { return Ok(CacheControl.into()); } else if eq!(b[1] == b'o' b'n' b't' b'e' b'n' b't' b'-' b'r' b'a' b'n' b'g' b'e' ) @@ -1513,7 +1513,7 @@ fn parse_hdr<'a>( _ => { if len < 64 { for i in 0..len { - unsafe {*(b[i].as_mut_ptr()) = table[data[i] as usize]; } + b[i] = MaybeUninit::new(table[data[i] as usize]); } validate(&b[..len]) @@ -1555,7 +1555,7 @@ fn parse_hdr<'a>( len if len > 64 => Ok(HdrName::custom(data, false)), len => { // Read from data into the buffer - transforming using `table` as we go - data.iter().zip(b.iter_mut()).for_each(|(index, out)| unsafe {*(out.as_mut_ptr()) = table[*index as usize]}); + data.iter().zip(b.iter_mut()).for_each(|(index, out)| *out = MaybeUninit::new(table[*index as usize])); let b = unsafe {slice_assume_init(&b[..len])}; match &b[0..len] { b"te" => Ok(Te.into()), @@ -2129,15 +2129,32 @@ fn eq_ignore_ascii_case(lower: &[u8], s: &[u8]) -> bool { }) } +// Utility functions for MaybeUninit<>. These are drawn from unstable API's on +// MaybeUninit<> itself. const SCRATCH_BUF_SIZE: usize = 64; fn uninit_u8_array() -> [MaybeUninit; SCRATCH_BUF_SIZE] { - unsafe { MaybeUninit::<[MaybeUninit; SCRATCH_BUF_SIZE]>::uninit().assume_init() } + let arr = MaybeUninit::<[MaybeUninit; SCRATCH_BUF_SIZE]>::uninit(); + // Safety: assume_init() is claiming that an array of MaybeUninit<> + // has been initilized, but MaybeUninit<>'s do not require initilizaton. + unsafe { arr.assume_init() } } +// Assuming all the elements are initilized, get a slice of them. +// +// Safety: All elements of `slice` must be initilized to prevent +// undefined behavior. unsafe fn slice_assume_init(slice: &[MaybeUninit]) -> &[T] { &*(slice as *const [MaybeUninit] as *const [T]) - } +} + +// Compare `rhs` to `lhs` assuming the latter is initilized. +// +// Safety: `lhs` must be initilized to avoid undefined behavior. +#[cfg(any(not(debug_assertions), not(target_arch = "wasm32")))] +unsafe fn assume_init_eq(lhs: MaybeUninit, rhs: T) -> bool { + *(lhs.as_ptr()) == rhs +} #[cfg(test)] mod tests { From 9a61c45d170a2a111da2affff4980768fea41a45 Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Sun, 10 May 2020 18:37:03 -0400 Subject: [PATCH 3/9] Add criterion benchmark for header::HeaderName This introduces a dev-dependency on the criterion crate. The new benchmark for HeaderName uses different standard header names of increasing lengths. --- Cargo.toml | 6 +++++ benches/header_name2.rs | 52 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 benches/header_name2.rs diff --git a/Cargo.toml b/Cargo.toml index 0357da6f..c813f18f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,6 +34,7 @@ seahash = "3.0.5" serde = "1.0" serde_json = "1.0" doc-comment = "0.3" +criterion = "0.3.2" [[bench]] name = "header_map" @@ -43,6 +44,11 @@ path = "benches/header_map/mod.rs" name = "header_name" path = "benches/header_name.rs" +[[bench]] +name = "header_name2" +path = "benches/header_name2.rs" +harness = false + [[bench]] name = "header_value" path = "benches/header_value.rs" diff --git a/benches/header_name2.rs b/benches/header_name2.rs new file mode 100644 index 00000000..4562fd66 --- /dev/null +++ b/benches/header_name2.rs @@ -0,0 +1,52 @@ +use criterion::{criterion_group, criterion_main, BenchmarkId,Criterion, Throughput}; +use http::header::HeaderName; + +// This is a list of some of the standard headers ordered by increasing size. +// It has exactly one standard header per size (some sizes don't have a standard +// header). +const STANDARD_HEADERS_BY_SIZE: &[&str] = &[ + "te", + "age", + "date", + "allow", + "accept", + "alt-svc", + "if-match", + "forwarded", + "connection", + "retry-after", + "content-type", + "accept-ranges", + "accept-charset", + "accept-encoding", + "content-encoding", + "if-modified-since", + "proxy-authenticate", + "content-disposition", + "sec-websocket-accept", + "sec-websocket-version", + "access-control-max-age", + "content-security-policy", + "sec-websocket-extensions", + "strict-transport-security", + "access-control-allow-origin", + "access-control-allow-headers", + "access-control-expose-headers", + "access-control-request-headers", + "access-control-allow-credentials", + "content-security-policy-report-only", +]; + +fn header_name_by_size(c: &mut Criterion) { + let mut group = c.benchmark_group("std_hdr"); + for name in STANDARD_HEADERS_BY_SIZE { + group.throughput(Throughput::Bytes(name.len() as u64)); + group.bench_with_input(BenchmarkId::from_parameter(name), name, |b, name| { + b.iter(|| HeaderName::from_static(name) ); + }); + } + group.finish(); +} + +criterion_group!(benches, header_name_by_size); +criterion_main!(benches); From c154b5a7a96d197287622a30f948b2e870722f24 Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Wed, 13 May 2020 20:18:28 -0400 Subject: [PATCH 4/9] Fix performance regression in header::name A previous commit had extracted the comparison of a presumed-initialized MaybeUninit and a static u8 to its own function. While clearer, this lead to a performance regression so this commit manually inlines this method again. This restores most (but not yet all) of the performance that regressed. --- src/header/name.rs | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/src/header/name.rs b/src/header/name.rs index 0658e76e..99d186eb 100644 --- a/src/header/name.rs +++ b/src/header/name.rs @@ -1046,7 +1046,7 @@ macro_rules! eq { $($cmp) && * }; (($($cmp:expr,)*) $v:ident[$n:expr] == $a:tt $($rest:tt)*) => { - eq!(($($cmp,)* unsafe {assume_init_eq($v[$n], $a)} ,) $v[$n+1] == $($rest)*) + eq!(($($cmp,)* unsafe {*($v[$n].as_ptr())} == $a ,) $v[$n+1] == $($rest)*) }; ($v:ident == $($rest:tt)+) => { eq!(() $v[0] == $($rest)+) @@ -1188,7 +1188,7 @@ fn parse_hdr<'a>( return Ok(Origin.into()); } else if eq!(b == b'p' b'r' b'a' b'g' b'm' b'a') { return Ok(Pragma.into()); - } else if unsafe {assume_init_eq(b[0], b's')} { + } else if unsafe {*(b[0].as_ptr())} == b's' { if eq!(b[1] == b'e' b'r' b'v' b'e' b'r') { return Ok(Server.into()); } @@ -1277,13 +1277,13 @@ fn parse_hdr<'a>( 13 => { to_lower!(b, data, 13); - if unsafe {assume_init_eq(b[0], b'a')} { + if unsafe {*(b[0].as_ptr())} == b'a' { if eq!(b[1] == b'c' b'c' b'e' b'p' b't' b'-' b'r' b'a' b'n' b'g' b'e' b's') { return Ok(AcceptRanges.into()); } else if eq!(b[1] == b'u' b't' b'h' b'o' b'r' b'i' b'z' b'a' b't' b'i' b'o' b'n') { return Ok(Authorization.into()); } - } else if unsafe {assume_init_eq(b[0], b'c')} { + } else if unsafe {*(b[0].as_ptr())} == b'c' { if eq!(b[1] == b'a' b'c' b'h' b'e' b'-' b'c' b'o' b'n' b't' b'r' b'o' b'l') { return Ok(CacheControl.into()); } else if eq!(b[1] == b'o' b'n' b't' b'e' b'n' b't' b'-' b'r' b'a' b'n' b'g' b'e' ) @@ -2148,14 +2148,6 @@ unsafe fn slice_assume_init(slice: &[MaybeUninit]) -> &[T] { &*(slice as *const [MaybeUninit] as *const [T]) } -// Compare `rhs` to `lhs` assuming the latter is initilized. -// -// Safety: `lhs` must be initilized to avoid undefined behavior. -#[cfg(any(not(debug_assertions), not(target_arch = "wasm32")))] -unsafe fn assume_init_eq(lhs: MaybeUninit, rhs: T) -> bool { - *(lhs.as_ptr()) == rhs -} - #[cfg(test)] mod tests { use super::*; From 46c9e77396b8314c2b857a0d8115178864425705 Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Fri, 29 May 2020 20:57:17 -0400 Subject: [PATCH 5/9] Rearrange code in header::name mod Move the eq! macro into the parsh_hdr() function which locallized all of the unsafe code related to MaybeUninit. --- src/header/name.rs | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/src/header/name.rs b/src/header/name.rs index 99d186eb..53e39891 100644 --- a/src/header/name.rs +++ b/src/header/name.rs @@ -1040,22 +1040,6 @@ const HEADER_CHARS_H2: [u8; 256] = [ 0, 0, 0, 0, 0, 0 // 25x ]; -#[cfg(any(not(debug_assertions), not(target_arch = "wasm32")))] -macro_rules! eq { - (($($cmp:expr,)*) $v:ident[$n:expr] ==) => { - $($cmp) && * - }; - (($($cmp:expr,)*) $v:ident[$n:expr] == $a:tt $($rest:tt)*) => { - eq!(($($cmp,)* unsafe {*($v[$n].as_ptr())} == $a ,) $v[$n+1] == $($rest)*) - }; - ($v:ident == $($rest:tt)+) => { - eq!(() $v[0] == $($rest)+) - }; - ($v:ident[$n:expr] == $($rest:tt)+) => { - eq!(() $v[$n] == $($rest)+) - }; -} - #[cfg(any(not(debug_assertions), not(target_arch = "wasm32")))] /// This version is best under optimized mode, however in a wasm debug compile, /// the `eq` macro expands to 1 + 1 + 1 + 1... and wasm explodes when this chain gets too long @@ -1078,6 +1062,21 @@ fn parse_hdr<'a>( } }; + macro_rules! eq { + (($($cmp:expr,)*) $v:ident[$n:expr] ==) => { + $($cmp) && * + }; + (($($cmp:expr,)*) $v:ident[$n:expr] == $a:tt $($rest:tt)*) => { + eq!(($($cmp,)* unsafe {*($v[$n].as_ptr())} == $a ,) $v[$n+1] == $($rest)*) + }; + ($v:ident == $($rest:tt)+) => { + eq!(() $v[0] == $($rest)+) + }; + ($v:ident[$n:expr] == $($rest:tt)+) => { + eq!(() $v[$n] == $($rest)+) + }; + } + macro_rules! to_lower { ($d:ident, $src:ident, 1) => { $d[0] = MaybeUninit::new(table[$src[0] as usize]); }; From 4aa98febea18354f188249934ec6c917084450bc Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Sat, 30 May 2020 14:34:16 -0400 Subject: [PATCH 6/9] Add safety comments to header::name module The comments describe the pre-condition and post-conditions on the various different parts of the two parse_hdr() implementations that combine to make the use of MaybeUninit in that function sound. The process of assessing the soundness of the use of MaybeUninit also included manually checking that the number of parameters to each all eq!() invocation matches the number of bytes initilized by the immediatly preceeding invocation of to_lower!() (which is necessary to avoid undefined behavior). To avoid being overly repetative the general pattern that assures soundness is documented in the comments but not each instance of that pattern. Each instance, though, was checked. --- src/header/name.rs | 68 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/src/header/name.rs b/src/header/name.rs index 53e39891..c02a259f 100644 --- a/src/header/name.rs +++ b/src/header/name.rs @@ -1053,7 +1053,9 @@ fn parse_hdr<'a>( let len = data.len(); + // Precondition: each element of buf must be intitialized let validate = |buf: &'a [MaybeUninit]| { + // Safety: follows from the precondtion let buf = unsafe {slice_assume_init(buf)}; if buf.iter().any(|&b| b == 0) { Err(InvalidHeaderName::new()) @@ -1062,11 +1064,18 @@ fn parse_hdr<'a>( } }; + // Called as either eq!(b == b'a' b'b' b'c') or eq!(b[i] == b'a' b'b' b'c') + // Precondition: the first n elements of b (or the first n starting at i) + // must be intitialized, where n is the number of bytes listed after the '==' + // in the invocation. macro_rules! eq { (($($cmp:expr,)*) $v:ident[$n:expr] ==) => { $($cmp) && * }; (($($cmp:expr,)*) $v:ident[$n:expr] == $a:tt $($rest:tt)*) => { + // Safety: this arm is matched once for each byte after the '==' in + // the invocation (starting at 0 or i depending on the form of the call). + // By the precondtion $v[$n] is intitialized for each such match. eq!(($($cmp,)* unsafe {*($v[$n].as_ptr())} == $a ,) $v[$n+1] == $($rest)*) }; ($v:ident == $($rest:tt)+) => { @@ -1078,6 +1087,10 @@ fn parse_hdr<'a>( } + // Post-condition: the first n elements of $d are intitialized where n is the + // third paramter to the macro. Note that this macro overwrite the first n elements + // of $d without dropping the existing contents (if any) but the elements of $d + // are u8's so no drop is necessary. macro_rules! to_lower { ($d:ident, $src:ident, 1) => { $d[0] = MaybeUninit::new(table[$src[0] as usize]); }; ($d:ident, $src:ident, 2) => { to_lower!($d, $src, 1); $d[1] = MaybeUninit::new(table[$src[1] as usize]); }; @@ -1120,14 +1133,30 @@ fn parse_hdr<'a>( "header name too long -- max length is {}", super::MAX_HEADER_NAME_LEN); + // Most of the arms of the match below have a variation of the following pattern: + // to_lower!(b, data, n); + // if eq!(b == b'1' b'2' ... b'n') { + // Ok(StandardHeaderElement.into()) + // } else { + // validate(&b[..n]) + // } + // The soundness of the arms following this pattern is described once in the + // match arm for 2. The soundness of exception to this pattern are described in + // each such match arm. match len { 0 => Err(InvalidHeaderName::new()), 2 => { to_lower!(b, data, 2); + // Precondition: the post-condition on to_lower!() ensures the first 2 + // elements of b are intitialized and the eq!() call lists 2 bytes + // after the ==. if eq!(b == b't' b'e') { Ok(Te.into()) } else { + // Precondition: the post-condition on to_lower!() ensures that the + // first 2 elements of b are intitialized. len == 2 so all of + // b[..len] is intitialized. validate(&b[..len]) } } @@ -1175,6 +1204,7 @@ fn parse_hdr<'a>( } } 6 => { + // this arm mostly follows the pattern except as indicated to_lower!(b, data, 6); if eq!(b == b'a' b'c' b'c' b'e' b'p' b't') { @@ -1187,7 +1217,12 @@ fn parse_hdr<'a>( return Ok(Origin.into()); } else if eq!(b == b'p' b'r' b'a' b'g' b'm' b'a') { return Ok(Pragma.into()); + // Safety: the post-condtion on to_lower!() means the first 6 + // elements of b are intitialized so, in particular, b[0] is. } else if unsafe {*(b[0].as_ptr())} == b's' { + // Precondition: the post-condtion on to_lower!() means the + // first 6 elements of b (and hence the first 5 elements starting + // at b[1]) are intitialized. if eq!(b[1] == b'e' b'r' b'v' b'e' b'r') { return Ok(Server.into()); } @@ -1219,7 +1254,11 @@ fn parse_hdr<'a>( 8 => { to_lower!(b, data, 8); + // Precondition: the post-condition on to_lower!() means the first + // 8 elements of b are intitialized so, in particular, the first 3 are. if eq!(b == b'i' b'f' b'-') { + // Precondition: (here and next eq!()) the first 5 elements of b + // starting at b[3] are intitialized because the first 8 are. if eq!(b[3] == b'm' b'a' b't' b'c' b'h') { return Ok(IfMatch.into()); } else if eq!(b[3] == b'r' b'a' b'n' b'g' b'e') { @@ -1276,7 +1315,12 @@ fn parse_hdr<'a>( 13 => { to_lower!(b, data, 13); + // Safety: (here and next else if) The post-condition on to_lower!() + // means the first 13 bytes of b are intitialized so b[0] is. if unsafe {*(b[0].as_ptr())} == b'a' { + // Precondition: (here and next calls of eq!() with b[1]) the + // first 13 bytes of b are intitialized so the first 12 starting + // at b[1] are. if eq!(b[1] == b'c' b'c' b'e' b'p' b't' b'-' b'r' b'a' b'n' b'g' b'e' b's') { return Ok(AcceptRanges.into()); } else if eq!(b[1] == b'u' b't' b'h' b'o' b'r' b'i' b'z' b'a' b't' b'i' b'o' b'n') { @@ -1312,7 +1356,11 @@ fn parse_hdr<'a>( 15 => { to_lower!(b, data, 15); + // Precondition: The post-condition on to_lower!() ensures the first 15 + // bytes of b are intitialized so, in particular the first 7 are. if eq!(b == b'a' b'c' b'c' b'e' b'p' b't' b'-') { // accept- + // Precondition: The first 15 bytes of 5 are intitialized so the + // first 8 starting at b[7] are. if eq!(b[7] == b'e' b'n' b'c' b'o' b'd' b'i' b'n' b'g') { return Ok(AcceptEncoding.into()) } else if eq!(b[7] == b'l' b'a' b'n' b'g' b'u' b'a' b'g' b'e') { @@ -1332,7 +1380,12 @@ fn parse_hdr<'a>( 16 => { to_lower!(b, data, 16); + // Precondition: The post-condition on to_lower!() means that the first + // 16 bytes of b are intitialized so, in particular, the first 8 bytes + // are. if eq!(b == b'c' b'o' b'n' b't' b'e' b'n' b't' b'-') { + // Precondition: The first 16 bytes of b are intitialized so the + // first 8 bytes starting at b[8] are. if eq!(b[8] == b'l' b'a' b'n' b'g' b'u' b'a' b'g' b'e') { return Ok(ContentLanguage.into()) } else if eq!(b[8] == b'l' b'o' b'c' b'a' b't' b'i' b'o' b'n') { @@ -1459,7 +1512,11 @@ fn parse_hdr<'a>( 28 => { to_lower!(b, data, 28); + // Precondition: The post-condition of to_lower!() ensures that the first 28 bytes of b + // are intitialized so, in particular, the first 21 are. if eq!(b == b'a' b'c' b'c' b'e' b's' b's' b'-' b'c' b'o' b'n' b't' b'r' b'o' b'l' b'-' b'a' b'l' b'l' b'o' b'w' b'-') { + // Precondition: The first 28 bytes of b are intitialized so the first 7 bytes + // starting at b[21] are. if eq!(b[21] == b'h' b'e' b'a' b'd' b'e' b'r' b's') { return Ok(AccessControlAllowHeaders.into()) } else if eq!(b[21] == b'm' b'e' b't' b'h' b'o' b'd' b's') { @@ -1472,7 +1529,11 @@ fn parse_hdr<'a>( 29 => { to_lower!(b, data, 29); + // Precondition: The post-condition of to_lower!() ensures the fist 29 bytes of b are + // intitialized so, in particular, the first 15 bytes are. if eq!(b == b'a' b'c' b'c' b'e' b's' b's' b'-' b'c' b'o' b'n' b't' b'r' b'o' b'l' b'-') { + // Precondition: The fisr 29 bytes of b are intitialized so the first 14 bytes + // starting at b[15] are. if eq!(b[15] == b'e' b'x' b'p' b'o' b's' b'e' b'-' b'h' b'e' b'a' b'd' b'e' b'r' b's') { return Ok(AccessControlExposeHeaders.into()) } else if eq!(b[15] == b'r' b'e' b'q' b'u' b'e' b's' b't' b'-' b'm' b'e' b't' b'h' b'o' b'd') { @@ -1515,6 +1576,8 @@ fn parse_hdr<'a>( b[i] = MaybeUninit::new(table[data[i] as usize]); } + // Precondition: the first len bytes of b are intitialized in the loop above so + // b[..len] is intitialized. validate(&b[..len]) } else { Ok(HdrName::custom(data, false)) @@ -1553,8 +1616,11 @@ fn parse_hdr<'a>( 0 => Err(InvalidHeaderName::new()), len if len > 64 => Ok(HdrName::custom(data, false)), len => { - // Read from data into the buffer - transforming using `table` as we go + // Read from data into the buffer - transforming using `table` as we go. + // The assignment to *out ensures that each byte is intitialized. Since + // *out is a u8 it doesn't matter that we are not dropping *out before accessing it. data.iter().zip(b.iter_mut()).for_each(|(index, out)| *out = MaybeUninit::new(table[*index as usize])); + // Safety: We just intitialized the first len bytes of b in the previous line. let b = unsafe {slice_assume_init(&b[..len])}; match &b[0..len] { b"te" => Ok(Te.into()), From a3a7800c30521d07146329e37450dc767720a4e7 Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Sat, 30 May 2020 14:44:32 -0400 Subject: [PATCH 7/9] Fix spelling error in one comment --- src/header/name.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/header/name.rs b/src/header/name.rs index c02a259f..533e389f 100644 --- a/src/header/name.rs +++ b/src/header/name.rs @@ -1359,7 +1359,7 @@ fn parse_hdr<'a>( // Precondition: The post-condition on to_lower!() ensures the first 15 // bytes of b are intitialized so, in particular the first 7 are. if eq!(b == b'a' b'c' b'c' b'e' b'p' b't' b'-') { // accept- - // Precondition: The first 15 bytes of 5 are intitialized so the + // Precondition: The first 15 bytes of b are intitialized so the // first 8 starting at b[7] are. if eq!(b[7] == b'e' b'n' b'c' b'o' b'd' b'i' b'n' b'g') { return Ok(AcceptEncoding.into()) From ccf88892adf634d16fda239e6086e666f0d96b13 Mon Sep 17 00:00:00 2001 From: Steven Bosnick Date: Sat, 30 May 2020 17:15:03 -0400 Subject: [PATCH 8/9] Add safety comments to header::name module The comments document the invariant, preconditions, and post-conditions that together ensure that the use of unsafe related to UTF-8 assumptions (in calls to ByteStr::from_utf8_unchecked()) are sound. --- src/header/name.rs | 83 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 64 insertions(+), 19 deletions(-) diff --git a/src/header/name.rs b/src/header/name.rs index 533e389f..2e7cc232 100644 --- a/src/header/name.rs +++ b/src/header/name.rs @@ -51,6 +51,7 @@ enum Repr { struct Custom(ByteStr); #[derive(Debug, Clone)] +// Invariant: If lower then buf is valid UTF-8. struct MaybeLower<'a> { buf: &'a [u8], lower: bool, @@ -979,6 +980,8 @@ standard_headers! { /// / DIGIT / ALPHA /// ; any VCHAR, except delimiters /// ``` +// HEADER_CHARS maps every byte that is 128 or larger to 0 so everything that is +// mapped by HEADER_CHARS, maps to a valid single-byte UTF-8 codepoint. const HEADER_CHARS: [u8; 256] = [ // 0 1 2 3 4 5 6 7 8 9 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // x @@ -1010,6 +1013,8 @@ const HEADER_CHARS: [u8; 256] = [ ]; /// Valid header name characters for HTTP/2.0 and HTTP/3.0 +// HEADER_CHARS_H2 maps every byte that is 128 or larger to 0 so everything that is +// mapped by HEADER_CHARS_H2, maps to a valid single-byte UTF-8 codepoint. const HEADER_CHARS_H2: [u8; 256] = [ // 0 1 2 3 4 5 6 7 8 9 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // x @@ -1044,6 +1049,7 @@ const HEADER_CHARS_H2: [u8; 256] = [ /// This version is best under optimized mode, however in a wasm debug compile, /// the `eq` macro expands to 1 + 1 + 1 + 1... and wasm explodes when this chain gets too long /// See /~https://github.com/DenisKolodin/yew/issues/478 +// Precondition: table maps all bytes that are not valid single-byte UTF-8 to something that is. fn parse_hdr<'a>( data: &'a [u8], b: &'a mut [MaybeUninit; SCRATCH_BUF_SIZE], @@ -1053,30 +1059,33 @@ fn parse_hdr<'a>( let len = data.len(); - // Precondition: each element of buf must be intitialized + // Precondition: each element of buf must be intitialized and must be + // a valid single-byte UTF-8 codepoint. let validate = |buf: &'a [MaybeUninit]| { // Safety: follows from the precondtion let buf = unsafe {slice_assume_init(buf)}; if buf.iter().any(|&b| b == 0) { Err(InvalidHeaderName::new()) } else { + // Precondition: satified by the precondition of validate. Ok(HdrName::custom(buf, true)) } }; // Called as either eq!(b == b'a' b'b' b'c') or eq!(b[i] == b'a' b'b' b'c') - // Precondition: the first n elements of b (or the first n starting at i) - // must be intitialized, where n is the number of bytes listed after the '==' - // in the invocation. + // Precondition: the first n elements of b (or the first n starting at i) must be + // intitialized, where n is the number of bytes listed after the '==' in the + // invocation. macro_rules! eq { (($($cmp:expr,)*) $v:ident[$n:expr] ==) => { $($cmp) && * }; (($($cmp:expr,)*) $v:ident[$n:expr] == $a:tt $($rest:tt)*) => { - // Safety: this arm is matched once for each byte after the '==' in - // the invocation (starting at 0 or i depending on the form of the call). - // By the precondtion $v[$n] is intitialized for each such match. - eq!(($($cmp,)* unsafe {*($v[$n].as_ptr())} == $a ,) $v[$n+1] == $($rest)*) + // Safety: this arm is matched once for each byte after the '==' in the + // invocation (starting at 0 or i depending on the form of the call). By + // the precondtion $v[$n] is intitialized for each such match. + eq!(($($cmp,)* unsafe {*($v[$n].as_ptr())} == $a ,) $v[$n+1] == + $($rest)*) }; ($v:ident == $($rest:tt)+) => { eq!(() $v[0] == $($rest)+) @@ -1086,11 +1095,12 @@ fn parse_hdr<'a>( }; } - - // Post-condition: the first n elements of $d are intitialized where n is the - // third paramter to the macro. Note that this macro overwrite the first n elements - // of $d without dropping the existing contents (if any) but the elements of $d - // are u8's so no drop is necessary. + // Post-condition: the first n elements of $d are intitialized to a valid + // single-byte UTF-8 codepoint where n is the third paramter to the macro. Note + // that this macro overwrite the first n elements of $d without dropping the + // existing contents (if any) but the elements of $d are u8's so no drop is + // necessary. The UTF-8 part of the post-condition follows from the precondition + // on table that is a part of parse_hdr(). macro_rules! to_lower { ($d:ident, $src:ident, 1) => { $d[0] = MaybeUninit::new(table[$src[0] as usize]); }; ($d:ident, $src:ident, 2) => { to_lower!($d, $src, 1); $d[1] = MaybeUninit::new(table[$src[1] as usize]); }; @@ -1155,8 +1165,9 @@ fn parse_hdr<'a>( Ok(Te.into()) } else { // Precondition: the post-condition on to_lower!() ensures that the - // first 2 elements of b are intitialized. len == 2 so all of - // b[..len] is intitialized. + // first 2 elements of b are intitialized and are valid single-byte + // UTF-8. len == 2 so all of b[..len] is intitialized and is valid + // UTF-8. validate(&b[..len]) } } @@ -1573,11 +1584,13 @@ fn parse_hdr<'a>( _ => { if len < 64 { for i in 0..len { + // The precondition on table for parse_hdr() means that b[i] is + // intitialized to a valid single-byte UTF-8 codepoint. b[i] = MaybeUninit::new(table[data[i] as usize]); } // Precondition: the first len bytes of b are intitialized in the loop above so - // b[..len] is intitialized. + // b[..len] is intitialized and is valid UTF-8. validate(&b[..len]) } else { Ok(HdrName::custom(data, false)) @@ -1588,6 +1601,7 @@ fn parse_hdr<'a>( #[cfg(all(debug_assertions, target_arch = "wasm32"))] /// This version works best in debug mode in wasm +// Precondition: table maps all bytes that are not valid single-byte UTF-8 to something that is. fn parse_hdr<'a>( data: &'a [u8], b: &'a mut [MaybeUninit; SCRATCH_BUF_SIZE], @@ -1597,11 +1611,13 @@ fn parse_hdr<'a>( let len = data.len(); + // Precondition: the first len bytes of buf are valid UTF-8. let validate = |buf: &'a [u8], len: usize| { let buf = &buf[..len]; if buf.iter().any(|&b| b == 0) { Err(InvalidHeaderName::new()) } else { + // Precondition: follows from the precondtion on validate. Ok(HdrName::custom(buf, true)) } }; @@ -1617,9 +1633,13 @@ fn parse_hdr<'a>( len if len > 64 => Ok(HdrName::custom(data, false)), len => { // Read from data into the buffer - transforming using `table` as we go. - // The assignment to *out ensures that each byte is intitialized. Since - // *out is a u8 it doesn't matter that we are not dropping *out before accessing it. - data.iter().zip(b.iter_mut()).for_each(|(index, out)| *out = MaybeUninit::new(table[*index as usize])); + // The assignment to *out ensures that each byte is intitialized. Since + // *out is a u8 it doesn't matter that we are not dropping *out before + // accessing it. The precondition on table for parse_hdr() means that + // each intitialized byte of b is valid UTF-8. + data.iter().zip(b.iter_mut()).for_each(|(index, out)| *out = + MaybeUninit::new(table[*index as + usize])); // Safety: We just intitialized the first len bytes of b in the previous line. let b = unsafe {slice_assume_init(&b[..len])}; match &b[0..len] { @@ -1704,6 +1724,8 @@ fn parse_hdr<'a>( b"content-security-policy-report-only" => { Ok(ContentSecurityPolicyReportOnly.into()) } + // Precondition: other is the first len bytes of b which was + // initialized to valid UTF-8 above. other => validate(other, len), } } @@ -1724,10 +1746,12 @@ impl HeaderName { /// This function normalizes the input. pub fn from_bytes(src: &[u8]) -> Result { let mut buf = uninit_u8_array(); + // Precondition: HEADER_CHARS is a valid table for parse_hdr(). match parse_hdr(src, &mut buf, &HEADER_CHARS)?.inner { Repr::Standard(std) => Ok(std.into()), Repr::Custom(MaybeLower { buf, lower: true }) => { let buf = Bytes::copy_from_slice(buf); + // Safety: the invariant on MaybeLower ensures buf is valid UTF-8. let val = unsafe { ByteStr::from_utf8_unchecked(buf) }; Ok(Custom(val).into()) } @@ -1736,6 +1760,7 @@ impl HeaderName { let mut dst = BytesMut::with_capacity(buf.len()); for b in buf.iter() { + // HEADER_CHARS maps all bytes to valid single-byte UTF-8 let b = HEADER_CHARS[*b as usize]; if b == 0 { @@ -1745,6 +1770,9 @@ impl HeaderName { dst.put_u8(b); } + // Safety: the loop above maps all bytes in buf to valid single byte + // UTF-8 before copying them into dst. This means that dst (and hence + // dst.freeze()) is valid UTF-8. let val = unsafe { ByteStr::from_utf8_unchecked(dst.freeze()) }; Ok(Custom(val).into()) @@ -1772,21 +1800,27 @@ impl HeaderName { /// ``` pub fn from_lowercase(src: &[u8]) -> Result { let mut buf = uninit_u8_array(); + // Precondition: HEADER_CHARS_H2 is a valid table for parse_hdr() match parse_hdr(src, &mut buf, &HEADER_CHARS_H2)?.inner { Repr::Standard(std) => Ok(std.into()), Repr::Custom(MaybeLower { buf, lower: true }) => { let buf = Bytes::copy_from_slice(buf); + // Safety: the invariant on MaybeLower ensures buf is valid UTF-8. let val = unsafe { ByteStr::from_utf8_unchecked(buf) }; Ok(Custom(val).into()) } Repr::Custom(MaybeLower { buf, lower: false }) => { for &b in buf.iter() { + // HEADER_CHARS maps all bytes that are not valid single-byte + // UTF-8 to 0 so this check returns an error for invalid UTF-8. if b != HEADER_CHARS[b as usize] { return Err(InvalidHeaderName::new()); } } let buf = Bytes::copy_from_slice(buf); + // Safety: the loop above checks that each byte of buf (either + // version) is valid UTF-8. let val = unsafe { ByteStr::from_utf8_unchecked(buf) }; Ok(Custom(val).into()) } @@ -1831,6 +1865,7 @@ impl HeaderName { pub fn from_static(src: &'static str) -> HeaderName { let bytes = src.as_bytes(); let mut buf = uninit_u8_array(); + // Precondition: HEADER_CHARS_H2 is a valid table for parse_hdr() match parse_hdr(bytes, &mut buf, &HEADER_CHARS_H2) { Ok(hdr_name) => match hdr_name.inner { Repr::Standard(std) => std.into(), @@ -2073,8 +2108,10 @@ impl Error for InvalidHeaderName {} // ===== HdrName ===== impl<'a> HdrName<'a> { + // Precondition: if lower then buf is valid UTF-8 fn custom(buf: &'a [u8], lower: bool) -> HdrName<'a> { HdrName { + // Invariant (on MaybeLower): follows from the precondition inner: Repr::Custom(MaybeLower { buf: buf, lower: lower, @@ -2086,6 +2123,7 @@ impl<'a> HdrName<'a> { where F: FnOnce(HdrName<'_>) -> U, { let mut buf = uninit_u8_array(); + // Precondition: HEADER_CHARS is a valid table for parse_hdr(). let hdr = parse_hdr(hdr, &mut buf, &HEADER_CHARS)?; Ok(f(hdr)) } @@ -2096,6 +2134,7 @@ impl<'a> HdrName<'a> { { let mut buf = uninit_u8_array(); let hdr = + // Precondition: HEADER_CHARS is a valid table for parse_hdr(). parse_hdr(hdr.as_bytes(), &mut buf, &HEADER_CHARS).expect("static str is invalid name"); f(hdr) } @@ -2111,6 +2150,7 @@ impl<'a> From> for HeaderName { Repr::Custom(maybe_lower) => { if maybe_lower.lower { let buf = Bytes::copy_from_slice(&maybe_lower.buf[..]); + // Safety: the invariant on MaybeLower ensures buf is valid UTF-8. let byte_str = unsafe { ByteStr::from_utf8_unchecked(buf) }; HeaderName { @@ -2121,9 +2161,14 @@ impl<'a> From> for HeaderName { let mut dst = BytesMut::with_capacity(maybe_lower.buf.len()); for b in maybe_lower.buf.iter() { + // HEADER_CHARS maps each byte to a valid single-byte UTF-8 + // codepoint. dst.put_u8(HEADER_CHARS[*b as usize]); } + // Safety: the loop above maps each byte of maybe_lower.buf to a + // valid single-byte UTF-8 codepoint before copying it into dst. + // dst (and hence dst.freeze()) is thus valid UTF-8. let buf = unsafe { ByteStr::from_utf8_unchecked(dst.freeze()) }; HeaderName { From 84ad04ff3f1068d45b2f734da9ee2492414c1e17 Mon Sep 17 00:00:00 2001 From: Jed Denlea Date: Fri, 20 Aug 2021 16:46:29 -0700 Subject: [PATCH 9/9] Make HeaderName::from_static const ... plus some clean-up. It was only after I came up with the scheme using `const fn from_bytes(&[u8]) -> Option` that I noticed the debug+wasm32-wasi version of `parse_hdr`, which had something very similar. While cleaning up that function, I realized it still would still panic if an attempted name was too long, which had been fixed for all other targets and profiles in #433. Then, I thought it would be worth seeing if the use of `eq!` in the primary version of `parse_hdr` still made any difference. And, it would not appear so. At least not on x86_64, nor wasm32-wasi run via wasmtime. I've run the benchmarks a number of times now, and it seems the only significant performance change anywhere is actually that of `HeaderName::from_static` itself, which now seems to run in about 2/3 the time on average. Unfortunately, `const fn` still cannot `panic!`, but I've followed the lead from `HeaderValue::from_static`. While that version required 1.46, this new function requires 1.49. That is almost 8 months old, so hopefully this isn't too controversial! --- .github/workflows/ci.yml | 2 +- Cargo.toml | 2 +- benches/header_name.rs | 139 ++++++ src/byte_str.rs | 2 +- src/header/name.rs | 884 ++++++++------------------------------- 5 files changed, 318 insertions(+), 711 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0b5bf748..9e07c21f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,7 +21,7 @@ jobs: - nightly # When updating this value, don't forget to also adjust the # `rust-version` field in the `Cargo.toml` file. - - 1.46.0 + - 1.49.0 include: - rust: nightly diff --git a/Cargo.toml b/Cargo.toml index c143dc99..b3bc968d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ keywords = ["http"] categories = ["web-programming"] edition = "2018" # When updating this value, don't forget to also adjust the GitHub Actions config. -rust-version = "1.46.0" +rust-version = "1.49.0" [dependencies] bytes = "1" diff --git a/benches/header_name.rs b/benches/header_name.rs index d65f7d94..4249f987 100644 --- a/benches/header_name.rs +++ b/benches/header_name.rs @@ -130,6 +130,128 @@ fn make_all_known_headers() -> Vec> { ] } +static ALL_KNOWN_HEADERS: &[&str] = &[ + // Standard request headers + "a-im", + "accept", + "accept-charset", + "accept-datetime", + "accept-encoding", + "accept-language", + "access-control-request-method", + "authorization", + "cache-control", + "connection", + "permanent", + "content-length", + "content-md5", + "content-type", + "cookie", + "date", + "expect", + "forwarded", + "from", + "host", + "permanent", + "http2-settings", + "if-match", + "if-modified-since", + "if-none-match", + "if-range", + "if-unmodified-since", + "max-forwards", + "origin", + "pragma", + "proxy-authorization", + "range", + "referer", + "te", + "user-agent", + "upgrade", + "via", + "warning", + // common_non_standard + "upgrade-insecure-requests", + "upgrade-insecure-requests", + "x-requested-with", + "dnt", + "x-forwarded-for", + "x-forwarded-host", + "x-forwarded-proto", + "front-end-https", + "x-http-method-override", + "x-att-deviceid", + "x-wap-profile", + "proxy-connection", + "x-uidh", + "x-csrf-token", + "x-request-id", + "x-correlation-id", + "save-data", + // standard_response_headers + "accept-patch", + "accept-ranges", + "access-control-allow-credentials", + "access-control-allow-headers", + "access-control-allow-methods", + "access-control-allow-origin", + "access-control-expose-headers", + "access-control-max-age", + "age", + "allow", + "alt-svc", + "cache-control", + "connection", + "content-disposition", + "content-encoding", + "content-language", + "content-length", + "content-location", + "content-md5", + "content-range", + "content-type", + "date", + "delta-base", + "etag", + "expires", + "im", + "last-modified", + "link", + "location", + "p3p", + "permanent", + "pragma", + "proxy-authenticate", + "public-key-pins", + "retry-after", + "server", + "set-cookie", + "strict-transport-security", + "tk", + "trailer", + "transfer-encoding", + "upgrade", + "vary", + "via", + "warning", + "www-authenticate", + "x-frame-options", + // common_non_standard_response + "content-security-policy", + "refresh", + "status", + "timing-allow-origin", + "x-content-duration", + "x-content-security-policy", + "x-content-type-options", + "x-correlation-id", + "x-powered-by", + "x-request-id", + "x-ua-compatible", + "x-webkit-csp", + "x-xss-protection", +]; + #[bench] fn header_name_easy(b: &mut Bencher) { let name = b"Content-type"; @@ -138,6 +260,14 @@ fn header_name_easy(b: &mut Bencher) { }); } +#[bench] +fn header_name_custom(b: &mut Bencher) { + let name = b"Foo-Bar-Baz-Blah"; + b.iter(|| { + HeaderName::from_bytes(&name[..]).unwrap(); + }); +} + #[bench] fn header_name_bad(b: &mut Bencher) { let name = b"bad header name"; @@ -155,3 +285,12 @@ fn header_name_various(b: &mut Bencher) { } }); } + +#[bench] +fn header_name_from_static(b: &mut Bencher) { + b.iter(|| { + for name in ALL_KNOWN_HEADERS { + HeaderName::from_static(name); + } + }); +} diff --git a/src/byte_str.rs b/src/byte_str.rs index 04e3e15e..e83ff75d 100644 --- a/src/byte_str.rs +++ b/src/byte_str.rs @@ -18,7 +18,7 @@ impl ByteStr { } #[inline] - pub fn from_static(val: &'static str) -> ByteStr { + pub const fn from_static(val: &'static str) -> ByteStr { ByteStr { // Invariant: val is a str so contains vaid UTF-8. bytes: Bytes::from_static(val.as_bytes()), diff --git a/src/header/name.rs b/src/header/name.rs index eb17a423..f8872257 100644 --- a/src/header/name.rs +++ b/src/header/name.rs @@ -64,7 +64,7 @@ macro_rules! standard_headers { ( $( $(#[$docs:meta])* - ($konst:ident, $upcase:ident, $name:expr); + ($konst:ident, $upcase:ident, $name_bytes:literal); )+ ) => { #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)] @@ -85,52 +85,60 @@ macro_rules! standard_headers { #[inline] fn as_str(&self) -> &'static str { match *self { + // Safety: test_parse_standard_headers ensures these &[u8]s are &str-safe. $( - StandardHeader::$konst => $name, + StandardHeader::$konst => unsafe { std::str::from_utf8_unchecked( $name_bytes ) }, )+ } } + + const fn from_bytes(name_bytes: &[u8]) -> Option { + match name_bytes { + $( + $name_bytes => Some(StandardHeader::$konst), + )+ + _ => None, + } + } } #[cfg(test)] - const TEST_HEADERS: &'static [(StandardHeader, &'static str)] = &[ + const TEST_HEADERS: &'static [(StandardHeader, &'static [u8])] = &[ $( - (StandardHeader::$konst, $name), + (StandardHeader::$konst, $name_bytes), )+ ]; #[test] fn test_parse_standard_headers() { - for &(std, name) in TEST_HEADERS { + for &(std, name_bytes) in TEST_HEADERS { // Test lower case - assert_eq!(HeaderName::from_bytes(name.as_bytes()).unwrap(), HeaderName::from(std)); + assert_eq!(HeaderName::from_bytes(name_bytes).unwrap(), HeaderName::from(std)); // Test upper case - let upper = name.to_uppercase().to_string(); + let upper = std::str::from_utf8(name_bytes).expect("byte string constants are all utf-8").to_uppercase(); assert_eq!(HeaderName::from_bytes(upper.as_bytes()).unwrap(), HeaderName::from(std)); } } #[test] fn test_standard_headers_into_bytes() { - for &(std, name) in TEST_HEADERS { + for &(std, name_bytes) in TEST_HEADERS { + let name = std::str::from_utf8(name_bytes).unwrap(); let std = HeaderName::from(std); // Test lower case - let name_bytes = name.as_bytes(); let bytes: Bytes = HeaderName::from_bytes(name_bytes).unwrap().inner.into(); - assert_eq!(bytes, name_bytes); + assert_eq!(bytes, name); assert_eq!(HeaderName::from_bytes(name_bytes).unwrap(), std); // Test upper case - let upper = name.to_uppercase().to_string(); + let upper = name.to_uppercase(); let bytes: Bytes = HeaderName::from_bytes(upper.as_bytes()).unwrap().inner.into(); - assert_eq!(bytes, name.as_bytes()); + assert_eq!(bytes, name_bytes); assert_eq!(HeaderName::from_bytes(upper.as_bytes()).unwrap(), std); - - } } @@ -154,7 +162,7 @@ standard_headers! { /// where the request is done: when fetching a CSS stylesheet a different /// value is set for the request than when fetching an image, video or a /// script. - (Accept, ACCEPT, "accept"); + (Accept, ACCEPT, b"accept"); /// Advertises which character set the client is able to understand. /// @@ -169,7 +177,7 @@ standard_headers! { /// theoretically send back a 406 (Not Acceptable) error code. But, for a /// better user experience, this is rarely done and the more common way is /// to ignore the Accept-Charset header in this case. - (AcceptCharset, ACCEPT_CHARSET, "accept-charset"); + (AcceptCharset, ACCEPT_CHARSET, b"accept-charset"); /// Advertises which content encoding the client is able to understand. /// @@ -197,7 +205,7 @@ standard_headers! { /// forbidden, by an identity;q=0 or a *;q=0 without another explicitly set /// value for identity, the server must never send back a 406 Not Acceptable /// error. - (AcceptEncoding, ACCEPT_ENCODING, "accept-encoding"); + (AcceptEncoding, ACCEPT_ENCODING, b"accept-encoding"); /// Advertises which languages the client is able to understand. /// @@ -222,7 +230,7 @@ standard_headers! { /// send back a 406 (Not Acceptable) error code. But, for a better user /// experience, this is rarely done and more common way is to ignore the /// Accept-Language header in this case. - (AcceptLanguage, ACCEPT_LANGUAGE, "accept-language"); + (AcceptLanguage, ACCEPT_LANGUAGE, b"accept-language"); /// Marker used by the server to advertise partial request support. /// @@ -232,7 +240,7 @@ standard_headers! { /// /// In presence of an Accept-Ranges header, the browser may try to resume an /// interrupted download, rather than to start it from the start again. - (AcceptRanges, ACCEPT_RANGES, "accept-ranges"); + (AcceptRanges, ACCEPT_RANGES, b"accept-ranges"); /// Preflight response indicating if the response to the request can be /// exposed to the page. @@ -257,7 +265,7 @@ standard_headers! { /// be set on both sides (the Access-Control-Allow-Credentials header and in /// the XHR or Fetch request) in order for the CORS request with credentials /// to succeed. - (AccessControlAllowCredentials, ACCESS_CONTROL_ALLOW_CREDENTIALS, "access-control-allow-credentials"); + (AccessControlAllowCredentials, ACCESS_CONTROL_ALLOW_CREDENTIALS, b"access-control-allow-credentials"); /// Preflight response indicating permitted HTTP headers. /// @@ -273,33 +281,33 @@ standard_headers! { /// /// This header is required if the request has an /// Access-Control-Request-Headers header. - (AccessControlAllowHeaders, ACCESS_CONTROL_ALLOW_HEADERS, "access-control-allow-headers"); + (AccessControlAllowHeaders, ACCESS_CONTROL_ALLOW_HEADERS, b"access-control-allow-headers"); /// Preflight header response indicating permitted access methods. /// /// The Access-Control-Allow-Methods response header specifies the method or /// methods allowed when accessing the resource in response to a preflight /// request. - (AccessControlAllowMethods, ACCESS_CONTROL_ALLOW_METHODS, "access-control-allow-methods"); + (AccessControlAllowMethods, ACCESS_CONTROL_ALLOW_METHODS, b"access-control-allow-methods"); /// Indicates whether the response can be shared with resources with the /// given origin. - (AccessControlAllowOrigin, ACCESS_CONTROL_ALLOW_ORIGIN, "access-control-allow-origin"); + (AccessControlAllowOrigin, ACCESS_CONTROL_ALLOW_ORIGIN, b"access-control-allow-origin"); /// Indicates which headers can be exposed as part of the response by /// listing their names. - (AccessControlExposeHeaders, ACCESS_CONTROL_EXPOSE_HEADERS, "access-control-expose-headers"); + (AccessControlExposeHeaders, ACCESS_CONTROL_EXPOSE_HEADERS, b"access-control-expose-headers"); /// Indicates how long the results of a preflight request can be cached. - (AccessControlMaxAge, ACCESS_CONTROL_MAX_AGE, "access-control-max-age"); + (AccessControlMaxAge, ACCESS_CONTROL_MAX_AGE, b"access-control-max-age"); /// Informs the server which HTTP headers will be used when an actual /// request is made. - (AccessControlRequestHeaders, ACCESS_CONTROL_REQUEST_HEADERS, "access-control-request-headers"); + (AccessControlRequestHeaders, ACCESS_CONTROL_REQUEST_HEADERS, b"access-control-request-headers"); /// Informs the server know which HTTP method will be used when the actual /// request is made. - (AccessControlRequestMethod, ACCESS_CONTROL_REQUEST_METHOD, "access-control-request-method"); + (AccessControlRequestMethod, ACCESS_CONTROL_REQUEST_METHOD, b"access-control-request-method"); /// Indicates the time in seconds the object has been in a proxy cache. /// @@ -307,7 +315,7 @@ standard_headers! { /// probably just fetched from the origin server; otherwise It is usually /// calculated as a difference between the proxy's current date and the Date /// general header included in the HTTP response. - (Age, AGE, "age"); + (Age, AGE, b"age"); /// Lists the set of methods support by a resource. /// @@ -316,16 +324,16 @@ standard_headers! { /// empty Allow header indicates that the resource allows no request /// methods, which might occur temporarily for a given resource, for /// example. - (Allow, ALLOW, "allow"); + (Allow, ALLOW, b"allow"); /// Advertises the availability of alternate services to clients. - (AltSvc, ALT_SVC, "alt-svc"); + (AltSvc, ALT_SVC, b"alt-svc"); /// Contains the credentials to authenticate a user agent with a server. /// /// Usually this header is included after the server has responded with a /// 401 Unauthorized status and the WWW-Authenticate header. - (Authorization, AUTHORIZATION, "authorization"); + (Authorization, AUTHORIZATION, b"authorization"); /// Specifies directives for caching mechanisms in both requests and /// responses. @@ -333,7 +341,7 @@ standard_headers! { /// Caching directives are unidirectional, meaning that a given directive in /// a request is not implying that the same directive is to be given in the /// response. - (CacheControl, CACHE_CONTROL, "cache-control"); + (CacheControl, CACHE_CONTROL, b"cache-control"); /// Controls whether or not the network connection stays open after the /// current transaction finishes. @@ -348,7 +356,7 @@ standard_headers! { /// to consume them and not to forward them further. Standard hop-by-hop /// headers can be listed too (it is often the case of Keep-Alive, but this /// is not mandatory. - (Connection, CONNECTION, "connection"); + (Connection, CONNECTION, b"connection"); /// Indicates if the content is expected to be displayed inline. /// @@ -368,7 +376,7 @@ standard_headers! { /// to HTTP forms and POST requests. Only the value form-data, as well as /// the optional directive name and filename, can be used in the HTTP /// context. - (ContentDisposition, CONTENT_DISPOSITION, "content-disposition"); + (ContentDisposition, CONTENT_DISPOSITION, b"content-disposition"); /// Used to compress the media-type. /// @@ -380,7 +388,7 @@ standard_headers! { /// use this field, but some types of resources, like jpeg images, are /// already compressed. Sometimes using additional compression doesn't /// reduce payload size and can even make the payload longer. - (ContentEncoding, CONTENT_ENCODING, "content-encoding"); + (ContentEncoding, CONTENT_ENCODING, b"content-encoding"); /// Used to describe the languages intended for the audience. /// @@ -395,13 +403,13 @@ standard_headers! { /// intended for all language audiences. Multiple language tags are also /// possible, as well as applying the Content-Language header to various /// media types and not only to textual documents. - (ContentLanguage, CONTENT_LANGUAGE, "content-language"); + (ContentLanguage, CONTENT_LANGUAGE, b"content-language"); /// Indicates the size of the entity-body. /// /// The header value must be a decimal indicating the number of octets sent /// to the recipient. - (ContentLength, CONTENT_LENGTH, "content-length"); + (ContentLength, CONTENT_LENGTH, b"content-length"); /// Indicates an alternate location for the returned data. /// @@ -414,10 +422,10 @@ standard_headers! { /// without the need of further content negotiation. Location is a header /// associated with the response, while Content-Location is associated with /// the entity returned. - (ContentLocation, CONTENT_LOCATION, "content-location"); + (ContentLocation, CONTENT_LOCATION, b"content-location"); /// Indicates where in a full body message a partial message belongs. - (ContentRange, CONTENT_RANGE, "content-range"); + (ContentRange, CONTENT_RANGE, b"content-range"); /// Allows controlling resources the user agent is allowed to load for a /// given page. @@ -425,7 +433,7 @@ standard_headers! { /// With a few exceptions, policies mostly involve specifying server origins /// and script endpoints. This helps guard against cross-site scripting /// attacks (XSS). - (ContentSecurityPolicy, CONTENT_SECURITY_POLICY, "content-security-policy"); + (ContentSecurityPolicy, CONTENT_SECURITY_POLICY, b"content-security-policy"); /// Allows experimenting with policies by monitoring their effects. /// @@ -433,7 +441,7 @@ standard_headers! { /// developers to experiment with policies by monitoring (but not enforcing) /// their effects. These violation reports consist of JSON documents sent /// via an HTTP POST request to the specified URI. - (ContentSecurityPolicyReportOnly, CONTENT_SECURITY_POLICY_REPORT_ONLY, "content-security-policy-report-only"); + (ContentSecurityPolicyReportOnly, CONTENT_SECURITY_POLICY_REPORT_ONLY, b"content-security-policy-report-only"); /// Used to indicate the media type of the resource. /// @@ -445,23 +453,23 @@ standard_headers! { /// /// In requests, (such as POST or PUT), the client tells the server what /// type of data is actually sent. - (ContentType, CONTENT_TYPE, "content-type"); + (ContentType, CONTENT_TYPE, b"content-type"); /// Contains stored HTTP cookies previously sent by the server with the /// Set-Cookie header. /// /// The Cookie header might be omitted entirely, if the privacy setting of /// the browser are set to block them, for example. - (Cookie, COOKIE, "cookie"); + (Cookie, COOKIE, b"cookie"); /// Indicates the client's tracking preference. /// /// This header lets users indicate whether they would prefer privacy rather /// than personalized content. - (Dnt, DNT, "dnt"); + (Dnt, DNT, b"dnt"); /// Contains the date and time at which the message was originated. - (Date, DATE, "date"); + (Date, DATE, b"date"); /// Identifier for a specific version of a resource. /// @@ -477,7 +485,7 @@ standard_headers! { /// to quickly determine whether two representations of a resource are the /// same, but they might also be set to persist indefinitely by a tracking /// server. - (Etag, ETAG, "etag"); + (Etag, ETAG, b"etag"); /// Indicates expectations that need to be fulfilled by the server in order /// to properly handle the request. @@ -496,7 +504,7 @@ standard_headers! { /// /// No common browsers send the Expect header, but some other clients such /// as cURL do so by default. - (Expect, EXPECT, "expect"); + (Expect, EXPECT, b"expect"); /// Contains the date/time after which the response is considered stale. /// @@ -505,7 +513,7 @@ standard_headers! { /// /// If there is a Cache-Control header with the "max-age" or "s-max-age" /// directive in the response, the Expires header is ignored. - (Expires, EXPIRES, "expires"); + (Expires, EXPIRES, b"expires"); /// Contains information from the client-facing side of proxy servers that /// is altered or lost when a proxy is involved in the path of the request. @@ -517,7 +525,7 @@ standard_headers! { /// location-dependent content and by design it exposes privacy sensitive /// information, such as the IP address of the client. Therefore the user's /// privacy must be kept in mind when deploying this header. - (Forwarded, FORWARDED, "forwarded"); + (Forwarded, FORWARDED, b"forwarded"); /// Contains an Internet email address for a human user who controls the /// requesting user agent. @@ -526,7 +534,7 @@ standard_headers! { /// header should be sent, so you can be contacted if problems occur on /// servers, such as if the robot is sending excessive, unwanted, or invalid /// requests. - (From, FROM, "from"); + (From, FROM, b"from"); /// Specifies the domain name of the server and (optionally) the TCP port /// number on which the server is listening. @@ -537,7 +545,7 @@ standard_headers! { /// A Host header field must be sent in all HTTP/1.1 request messages. A 400 /// (Bad Request) status code will be sent to any HTTP/1.1 request message /// that lacks a Host header field or contains more than one. - (Host, HOST, "host"); + (Host, HOST, b"host"); /// Makes a request conditional based on the E-Tag. /// @@ -562,7 +570,7 @@ standard_headers! { /// that has been done since the original resource was fetched. If the /// request cannot be fulfilled, the 412 (Precondition Failed) response is /// returned. - (IfMatch, IF_MATCH, "if-match"); + (IfMatch, IF_MATCH, b"if-match"); /// Makes a request conditional based on the modification date. /// @@ -579,7 +587,7 @@ standard_headers! { /// /// The most common use case is to update a cached entity that has no /// associated ETag. - (IfModifiedSince, IF_MODIFIED_SINCE, "if-modified-since"); + (IfModifiedSince, IF_MODIFIED_SINCE, b"if-modified-since"); /// Makes a request conditional based on the E-Tag. /// @@ -615,7 +623,7 @@ standard_headers! { /// guaranteeing that another upload didn't happen before, losing the data /// of the previous put; this problems is the variation of the lost update /// problem. - (IfNoneMatch, IF_NONE_MATCH, "if-none-match"); + (IfNoneMatch, IF_NONE_MATCH, b"if-none-match"); /// Makes a request conditional based on range. /// @@ -631,7 +639,7 @@ standard_headers! { /// The most common use case is to resume a download, to guarantee that the /// stored resource has not been modified since the last fragment has been /// received. - (IfRange, IF_RANGE, "if-range"); + (IfRange, IF_RANGE, b"if-range"); /// Makes the request conditional based on the last modification date. /// @@ -652,14 +660,14 @@ standard_headers! { /// * In conjunction with a range request with a If-Range header, it can be /// used to ensure that the new fragment requested comes from an unmodified /// document. - (IfUnmodifiedSince, IF_UNMODIFIED_SINCE, "if-unmodified-since"); + (IfUnmodifiedSince, IF_UNMODIFIED_SINCE, b"if-unmodified-since"); /// Content-Types that are acceptable for the response. - (LastModified, LAST_MODIFIED, "last-modified"); + (LastModified, LAST_MODIFIED, b"last-modified"); /// Allows the server to point an interested client to another resource /// containing metadata about the requested resource. - (Link, LINK, "link"); + (Link, LINK, b"link"); /// Indicates the URL to redirect a page to. /// @@ -690,11 +698,11 @@ standard_headers! { /// when content negotiation happened, without the need of further content /// negotiation. Location is a header associated with the response, while /// Content-Location is associated with the entity returned. - (Location, LOCATION, "location"); + (Location, LOCATION, b"location"); /// Indicates the max number of intermediaries the request should be sent /// through. - (MaxForwards, MAX_FORWARDS, "max-forwards"); + (MaxForwards, MAX_FORWARDS, b"max-forwards"); /// Indicates where a fetch originates from. /// @@ -702,7 +710,7 @@ standard_headers! { /// sent with CORS requests, as well as with POST requests. It is similar to /// the Referer header, but, unlike this header, it doesn't disclose the /// whole path. - (Origin, ORIGIN, "origin"); + (Origin, ORIGIN, b"origin"); /// HTTP/1.0 header usually used for backwards compatibility. /// @@ -710,7 +718,7 @@ standard_headers! { /// that may have various effects along the request-response chain. It is /// used for backwards compatibility with HTTP/1.0 caches where the /// Cache-Control HTTP/1.1 header is not yet present. - (Pragma, PRAGMA, "pragma"); + (Pragma, PRAGMA, b"pragma"); /// Defines the authentication method that should be used to gain access to /// a proxy. @@ -728,14 +736,14 @@ standard_headers! { /// /// The `proxy-authenticate` header is sent along with a `407 Proxy /// Authentication Required`. - (ProxyAuthenticate, PROXY_AUTHENTICATE, "proxy-authenticate"); + (ProxyAuthenticate, PROXY_AUTHENTICATE, b"proxy-authenticate"); /// Contains the credentials to authenticate a user agent to a proxy server. /// /// This header is usually included after the server has responded with a /// 407 Proxy Authentication Required status and the Proxy-Authenticate /// header. - (ProxyAuthorization, PROXY_AUTHORIZATION, "proxy-authorization"); + (ProxyAuthorization, PROXY_AUTHORIZATION, b"proxy-authorization"); /// Associates a specific cryptographic public key with a certain server. /// @@ -743,14 +751,14 @@ standard_headers! { /// or several keys are pinned and none of them are used by the server, the /// browser will not accept the response as legitimate, and will not display /// it. - (PublicKeyPins, PUBLIC_KEY_PINS, "public-key-pins"); + (PublicKeyPins, PUBLIC_KEY_PINS, b"public-key-pins"); /// Sends reports of pinning violation to the report-uri specified in the /// header. /// /// Unlike `Public-Key-Pins`, this header still allows browsers to connect /// to the server if the pinning is violated. - (PublicKeyPinsReportOnly, PUBLIC_KEY_PINS_REPORT_ONLY, "public-key-pins-report-only"); + (PublicKeyPinsReportOnly, PUBLIC_KEY_PINS_REPORT_ONLY, b"public-key-pins-report-only"); /// Indicates the part of a document that the server should return. /// @@ -760,7 +768,7 @@ standard_headers! { /// the ranges are invalid, the server returns the 416 Range Not Satisfiable /// error. The server can also ignore the Range header and return the whole /// document with a 200 status code. - (Range, RANGE, "range"); + (Range, RANGE, b"range"); /// Contains the address of the previous web page from which a link to the /// currently requested page was followed. @@ -768,15 +776,15 @@ standard_headers! { /// The Referer header allows servers to identify where people are visiting /// them from and may use that data for analytics, logging, or optimized /// caching, for example. - (Referer, REFERER, "referer"); + (Referer, REFERER, b"referer"); /// Governs which referrer information should be included with requests /// made. - (ReferrerPolicy, REFERRER_POLICY, "referrer-policy"); + (ReferrerPolicy, REFERRER_POLICY, b"referrer-policy"); /// Informs the web browser that the current page or frame should be /// refreshed. - (Refresh, REFRESH, "refresh"); + (Refresh, REFRESH, b"refresh"); /// The Retry-After response HTTP header indicates how long the user agent /// should wait before making a follow-up request. There are two main cases @@ -788,20 +796,20 @@ standard_headers! { /// * When sent with a redirect response, such as 301 (Moved Permanently), /// it indicates the minimum time that the user agent is asked to wait /// before issuing the redirected request. - (RetryAfter, RETRY_AFTER, "retry-after"); + (RetryAfter, RETRY_AFTER, b"retry-after"); /// The |Sec-WebSocket-Accept| header field is used in the WebSocket /// opening handshake. It is sent from the server to the client to /// confirm that the server is willing to initiate the WebSocket /// connection. - (SecWebSocketAccept, SEC_WEBSOCKET_ACCEPT, "sec-websocket-accept"); + (SecWebSocketAccept, SEC_WEBSOCKET_ACCEPT, b"sec-websocket-accept"); /// The |Sec-WebSocket-Extensions| header field is used in the WebSocket /// opening handshake. It is initially sent from the client to the /// server, and then subsequently sent from the server to the client, to /// agree on a set of protocol-level extensions to use for the duration /// of the connection. - (SecWebSocketExtensions, SEC_WEBSOCKET_EXTENSIONS, "sec-websocket-extensions"); + (SecWebSocketExtensions, SEC_WEBSOCKET_EXTENSIONS, b"sec-websocket-extensions"); /// The |Sec-WebSocket-Key| header field is used in the WebSocket opening /// handshake. It is sent from the client to the server to provide part @@ -810,14 +818,14 @@ standard_headers! { /// does not accept connections from non-WebSocket clients (e.g., HTTP /// clients) that are being abused to send data to unsuspecting WebSocket /// servers. - (SecWebSocketKey, SEC_WEBSOCKET_KEY, "sec-websocket-key"); + (SecWebSocketKey, SEC_WEBSOCKET_KEY, b"sec-websocket-key"); /// The |Sec-WebSocket-Protocol| header field is used in the WebSocket /// opening handshake. It is sent from the client to the server and back /// from the server to the client to confirm the subprotocol of the /// connection. This enables scripts to both select a subprotocol and be /// sure that the server agreed to serve that subprotocol. - (SecWebSocketProtocol, SEC_WEBSOCKET_PROTOCOL, "sec-websocket-protocol"); + (SecWebSocketProtocol, SEC_WEBSOCKET_PROTOCOL, b"sec-websocket-protocol"); /// The |Sec-WebSocket-Version| header field is used in the WebSocket /// opening handshake. It is sent from the client to the server to @@ -825,7 +833,7 @@ standard_headers! { /// servers to correctly interpret the opening handshake and subsequent /// data being sent from the data, and close the connection if the server /// cannot interpret that data in a safe manner. - (SecWebSocketVersion, SEC_WEBSOCKET_VERSION, "sec-websocket-version"); + (SecWebSocketVersion, SEC_WEBSOCKET_VERSION, b"sec-websocket-version"); /// Contains information about the software used by the origin server to /// handle the request. @@ -834,13 +842,13 @@ standard_headers! { /// potentially reveal internal implementation details that might make it /// (slightly) easier for attackers to find and exploit known security /// holes. - (Server, SERVER, "server"); + (Server, SERVER, b"server"); /// Used to send cookies from the server to the user agent. - (SetCookie, SET_COOKIE, "set-cookie"); + (SetCookie, SET_COOKIE, b"set-cookie"); /// Tells the client to communicate with HTTPS instead of using HTTP. - (StrictTransportSecurity, STRICT_TRANSPORT_SECURITY, "strict-transport-security"); + (StrictTransportSecurity, STRICT_TRANSPORT_SECURITY, b"strict-transport-security"); /// Informs the server of transfer encodings willing to be accepted as part /// of the response. @@ -850,11 +858,11 @@ standard_headers! { /// recipients and you that don't have to specify "chunked" using the TE /// header. However, it is useful for setting if the client is accepting /// trailer fields in a chunked transfer coding using the "trailers" value. - (Te, TE, "te"); + (Te, TE, b"te"); /// Allows the sender to include additional fields at the end of chunked /// messages. - (Trailer, TRAILER, "trailer"); + (Trailer, TRAILER, b"trailer"); /// Specifies the form of encoding used to safely transfer the entity to the /// client. @@ -868,18 +876,18 @@ standard_headers! { /// When present on a response to a `HEAD` request that has no body, it /// indicates the value that would have applied to the corresponding `GET` /// message. - (TransferEncoding, TRANSFER_ENCODING, "transfer-encoding"); + (TransferEncoding, TRANSFER_ENCODING, b"transfer-encoding"); /// Contains a string that allows identifying the requesting client's /// software. - (UserAgent, USER_AGENT, "user-agent"); + (UserAgent, USER_AGENT, b"user-agent"); /// Used as part of the exchange to upgrade the protocol. - (Upgrade, UPGRADE, "upgrade"); + (Upgrade, UPGRADE, b"upgrade"); /// Sends a signal to the server expressing the client’s preference for an /// encrypted and authenticated response. - (UpgradeInsecureRequests, UPGRADE_INSECURE_REQUESTS, "upgrade-insecure-requests"); + (UpgradeInsecureRequests, UPGRADE_INSECURE_REQUESTS, b"upgrade-insecure-requests"); /// Determines how to match future requests with cached responses. /// @@ -891,7 +899,7 @@ standard_headers! { /// /// The `vary` header should be set on a 304 Not Modified response exactly /// like it would have been set on an equivalent 200 OK response. - (Vary, VARY, "vary"); + (Vary, VARY, b"vary"); /// Added by proxies to track routing. /// @@ -900,7 +908,7 @@ standard_headers! { /// It is used for tracking message forwards, avoiding request loops, and /// identifying the protocol capabilities of senders along the /// request/response chain. - (Via, VIA, "via"); + (Via, VIA, b"via"); /// General HTTP header contains information about possible problems with /// the status of the message. @@ -908,11 +916,11 @@ standard_headers! { /// More than one `warning` header may appear in a response. Warning header /// fields can in general be applied to any message, however some warn-codes /// are specific to caches and can only be applied to response messages. - (Warning, WARNING, "warning"); + (Warning, WARNING, b"warning"); /// Defines the authentication method that should be used to gain access to /// a resource. - (WwwAuthenticate, WWW_AUTHENTICATE, "www-authenticate"); + (WwwAuthenticate, WWW_AUTHENTICATE, b"www-authenticate"); /// Marker used by the server to indicate that the MIME types advertised in /// the `content-type` headers should not be changed and be followed. @@ -927,7 +935,7 @@ standard_headers! { /// less aggressive. /// /// Site security testers usually expect this header to be set. - (XContentTypeOptions, X_CONTENT_TYPE_OPTIONS, "x-content-type-options"); + (XContentTypeOptions, X_CONTENT_TYPE_OPTIONS, b"x-content-type-options"); /// Controls DNS prefetching. /// @@ -940,7 +948,7 @@ standard_headers! { /// This prefetching is performed in the background, so that the DNS is /// likely to have been resolved by the time the referenced items are /// needed. This reduces latency when the user clicks a link. - (XDnsPrefetchControl, X_DNS_PREFETCH_CONTROL, "x-dns-prefetch-control"); + (XDnsPrefetchControl, X_DNS_PREFETCH_CONTROL, b"x-dns-prefetch-control"); /// Indicates whether or not a browser should be allowed to render a page in /// a frame. @@ -950,7 +958,7 @@ standard_headers! { /// /// The added security is only provided if the user accessing the document /// is using a browser supporting `x-frame-options`. - (XFrameOptions, X_FRAME_OPTIONS, "x-frame-options"); + (XFrameOptions, X_FRAME_OPTIONS, b"x-frame-options"); /// Stop pages from loading when an XSS attack is detected. /// @@ -961,7 +969,7 @@ standard_headers! { /// implement a strong Content-Security-Policy that disables the use of /// inline JavaScript ('unsafe-inline'), they can still provide protections /// for users of older web browsers that don't yet support CSP. - (XXssProtection, X_XSS_PROTECTION, "x-xss-protection"); + (XXssProtection, X_XSS_PROTECTION, b"x-xss-protection"); } /// Valid header name characters @@ -1039,602 +1047,30 @@ const HEADER_CHARS_H2: [u8; 256] = [ 0, 0, 0, 0, 0, 0 // 25x ]; -#[cfg(any(not(debug_assertions), not(target_arch = "wasm32")))] -macro_rules! eq { - (($($cmp:expr,)*) $v:ident[$n:expr] ==) => { - $($cmp) && * - }; - (($($cmp:expr,)*) $v:ident[$n:expr] == $a:tt $($rest:tt)*) => { - eq!(($($cmp,)* $v[$n] == $a,) $v[$n+1] == $($rest)*) - }; - ($v:ident == $($rest:tt)+) => { - eq!(() $v[0] == $($rest)+) - }; - ($v:ident[$n:expr] == $($rest:tt)+) => { - eq!(() $v[$n] == $($rest)+) - }; -} - -#[cfg(any(not(debug_assertions), not(target_arch = "wasm32")))] -/// This version is best under optimized mode, however in a wasm debug compile, -/// the `eq` macro expands to 1 + 1 + 1 + 1... and wasm explodes when this chain gets too long -/// See /~https://github.com/DenisKolodin/yew/issues/478 -fn parse_hdr<'a>( - data: &'a [u8], - b: &'a mut [u8; 64], - table: &[u8; 256], -) -> Result, InvalidHeaderName> { - use self::StandardHeader::*; - - let len = data.len(); - - let validate = |buf: &'a [u8], len: usize| { - let buf = &buf[..len]; - if buf.iter().any(|&b| b == 0) { - Err(InvalidHeaderName::new()) - } else { - Ok(HdrName::custom(buf, true)) - } - }; - - - macro_rules! to_lower { - ($d:ident, $src:ident, 1) => { $d[0] = table[$src[0] as usize]; }; - ($d:ident, $src:ident, 2) => { to_lower!($d, $src, 1); $d[1] = table[$src[1] as usize]; }; - ($d:ident, $src:ident, 3) => { to_lower!($d, $src, 2); $d[2] = table[$src[2] as usize]; }; - ($d:ident, $src:ident, 4) => { to_lower!($d, $src, 3); $d[3] = table[$src[3] as usize]; }; - ($d:ident, $src:ident, 5) => { to_lower!($d, $src, 4); $d[4] = table[$src[4] as usize]; }; - ($d:ident, $src:ident, 6) => { to_lower!($d, $src, 5); $d[5] = table[$src[5] as usize]; }; - ($d:ident, $src:ident, 7) => { to_lower!($d, $src, 6); $d[6] = table[$src[6] as usize]; }; - ($d:ident, $src:ident, 8) => { to_lower!($d, $src, 7); $d[7] = table[$src[7] as usize]; }; - ($d:ident, $src:ident, 9) => { to_lower!($d, $src, 8); $d[8] = table[$src[8] as usize]; }; - ($d:ident, $src:ident, 10) => { to_lower!($d, $src, 9); $d[9] = table[$src[9] as usize]; }; - ($d:ident, $src:ident, 11) => { to_lower!($d, $src, 10); $d[10] = table[$src[10] as usize]; }; - ($d:ident, $src:ident, 12) => { to_lower!($d, $src, 11); $d[11] = table[$src[11] as usize]; }; - ($d:ident, $src:ident, 13) => { to_lower!($d, $src, 12); $d[12] = table[$src[12] as usize]; }; - ($d:ident, $src:ident, 14) => { to_lower!($d, $src, 13); $d[13] = table[$src[13] as usize]; }; - ($d:ident, $src:ident, 15) => { to_lower!($d, $src, 14); $d[14] = table[$src[14] as usize]; }; - ($d:ident, $src:ident, 16) => { to_lower!($d, $src, 15); $d[15] = table[$src[15] as usize]; }; - ($d:ident, $src:ident, 17) => { to_lower!($d, $src, 16); $d[16] = table[$src[16] as usize]; }; - ($d:ident, $src:ident, 18) => { to_lower!($d, $src, 17); $d[17] = table[$src[17] as usize]; }; - ($d:ident, $src:ident, 19) => { to_lower!($d, $src, 18); $d[18] = table[$src[18] as usize]; }; - ($d:ident, $src:ident, 20) => { to_lower!($d, $src, 19); $d[19] = table[$src[19] as usize]; }; - ($d:ident, $src:ident, 21) => { to_lower!($d, $src, 20); $d[20] = table[$src[20] as usize]; }; - ($d:ident, $src:ident, 22) => { to_lower!($d, $src, 21); $d[21] = table[$src[21] as usize]; }; - ($d:ident, $src:ident, 23) => { to_lower!($d, $src, 22); $d[22] = table[$src[22] as usize]; }; - ($d:ident, $src:ident, 24) => { to_lower!($d, $src, 23); $d[23] = table[$src[23] as usize]; }; - ($d:ident, $src:ident, 25) => { to_lower!($d, $src, 24); $d[24] = table[$src[24] as usize]; }; - ($d:ident, $src:ident, 26) => { to_lower!($d, $src, 25); $d[25] = table[$src[25] as usize]; }; - ($d:ident, $src:ident, 27) => { to_lower!($d, $src, 26); $d[26] = table[$src[26] as usize]; }; - ($d:ident, $src:ident, 28) => { to_lower!($d, $src, 27); $d[27] = table[$src[27] as usize]; }; - ($d:ident, $src:ident, 29) => { to_lower!($d, $src, 28); $d[28] = table[$src[28] as usize]; }; - ($d:ident, $src:ident, 30) => { to_lower!($d, $src, 29); $d[29] = table[$src[29] as usize]; }; - ($d:ident, $src:ident, 31) => { to_lower!($d, $src, 30); $d[30] = table[$src[30] as usize]; }; - ($d:ident, $src:ident, 32) => { to_lower!($d, $src, 31); $d[31] = table[$src[31] as usize]; }; - ($d:ident, $src:ident, 33) => { to_lower!($d, $src, 32); $d[32] = table[$src[32] as usize]; }; - ($d:ident, $src:ident, 34) => { to_lower!($d, $src, 33); $d[33] = table[$src[33] as usize]; }; - ($d:ident, $src:ident, 35) => { to_lower!($d, $src, 34); $d[34] = table[$src[34] as usize]; }; - } - - match len { - 0 => Err(InvalidHeaderName::new()), - 2 => { - to_lower!(b, data, 2); - - if eq!(b == b't' b'e') { - Ok(Te.into()) - } else { - validate(b, len) - } - } - 3 => { - to_lower!(b, data, 3); - - if eq!(b == b'a' b'g' b'e') { - Ok(Age.into()) - } else if eq!(b == b'v' b'i' b'a') { - Ok(Via.into()) - } else if eq!(b == b'd' b'n' b't') { - Ok(Dnt.into()) - } else { - validate(b, len) - } - } - 4 => { - to_lower!(b, data, 4); - - if eq!(b == b'd' b'a' b't' b'e') { - Ok(Date.into()) - } else if eq!(b == b'e' b't' b'a' b'g') { - Ok(Etag.into()) - } else if eq!(b == b'f' b'r' b'o' b'm') { - Ok(From.into()) - } else if eq!(b == b'h' b'o' b's' b't') { - Ok(Host.into()) - } else if eq!(b == b'l' b'i' b'n' b'k') { - Ok(Link.into()) - } else if eq!(b == b'v' b'a' b'r' b'y') { - Ok(Vary.into()) - } else { - validate(b, len) - } - } - 5 => { - to_lower!(b, data, 5); - - if eq!(b == b'a' b'l' b'l' b'o' b'w') { - Ok(Allow.into()) - } else if eq!(b == b'r' b'a' b'n' b'g' b'e') { - Ok(Range.into()) - } else { - validate(b, len) - } - } - 6 => { - to_lower!(b, data, 6); - - if eq!(b == b'a' b'c' b'c' b'e' b'p' b't') { - return Ok(Accept.into()); - } else if eq!(b == b'c' b'o' b'o' b'k' b'i' b'e') { - return Ok(Cookie.into()); - } else if eq!(b == b'e' b'x' b'p' b'e' b'c' b't') { - return Ok(Expect.into()); - } else if eq!(b == b'o' b'r' b'i' b'g' b'i' b'n') { - return Ok(Origin.into()); - } else if eq!(b == b'p' b'r' b'a' b'g' b'm' b'a') { - return Ok(Pragma.into()); - } else if b[0] == b's' { - if eq!(b[1] == b'e' b'r' b'v' b'e' b'r') { - return Ok(Server.into()); - } - } - - validate(b, len) - } - 7 => { - to_lower!(b, data, 7); - - if eq!(b == b'a' b'l' b't' b'-' b's' b'v' b'c') { - Ok(AltSvc.into()) - } else if eq!(b == b'e' b'x' b'p' b'i' b'r' b'e' b's') { - Ok(Expires.into()) - } else if eq!(b == b'r' b'e' b'f' b'e' b'r' b'e' b'r') { - Ok(Referer.into()) - } else if eq!(b == b'r' b'e' b'f' b'r' b'e' b's' b'h') { - Ok(Refresh.into()) - } else if eq!(b == b't' b'r' b'a' b'i' b'l' b'e' b'r') { - Ok(Trailer.into()) - } else if eq!(b == b'u' b'p' b'g' b'r' b'a' b'd' b'e') { - Ok(Upgrade.into()) - } else if eq!(b == b'w' b'a' b'r' b'n' b'i' b'n' b'g') { - Ok(Warning.into()) - } else { - validate(b, len) - } - } - 8 => { - to_lower!(b, data, 8); - - if eq!(b == b'i' b'f' b'-') { - if eq!(b[3] == b'm' b'a' b't' b'c' b'h') { - return Ok(IfMatch.into()); - } else if eq!(b[3] == b'r' b'a' b'n' b'g' b'e') { - return Ok(IfRange.into()); - } - } else if eq!(b == b'l' b'o' b'c' b'a' b't' b'i' b'o' b'n') { - return Ok(Location.into()); - } - - validate(b, len) - } - 9 => { - to_lower!(b, data, 9); - - if eq!(b == b'f' b'o' b'r' b'w' b'a' b'r' b'd' b'e' b'd') { - Ok(Forwarded.into()) - } else { - validate(b, len) - } - } - 10 => { - to_lower!(b, data, 10); - - if eq!(b == b'c' b'o' b'n' b'n' b'e' b'c' b't' b'i' b'o' b'n') { - Ok(Connection.into()) - } else if eq!(b == b's' b'e' b't' b'-' b'c' b'o' b'o' b'k' b'i' b'e') { - Ok(SetCookie.into()) - } else if eq!(b == b'u' b's' b'e' b'r' b'-' b'a' b'g' b'e' b'n' b't') { - Ok(UserAgent.into()) - } else { - validate(b, len) - } - } - 11 => { - to_lower!(b, data, 11); - - if eq!(b == b'r' b'e' b't' b'r' b'y' b'-' b'a' b'f' b't' b'e' b'r') { - Ok(RetryAfter.into()) - } else { - validate(b, len) - } - } - 12 => { - to_lower!(b, data, 12); - - if eq!(b == b'c' b'o' b'n' b't' b'e' b'n' b't' b'-' b't' b'y' b'p' b'e') { - Ok(ContentType.into()) - } else if eq!(b == b'm' b'a' b'x' b'-' b'f' b'o' b'r' b'w' b'a' b'r' b'd' b's') { - Ok(MaxForwards.into()) - } else { - validate(b, len) - } - } - 13 => { - to_lower!(b, data, 13); - - if b[0] == b'a' { - if eq!(b[1] == b'c' b'c' b'e' b'p' b't' b'-' b'r' b'a' b'n' b'g' b'e' b's') { - return Ok(AcceptRanges.into()); - } else if eq!(b[1] == b'u' b't' b'h' b'o' b'r' b'i' b'z' b'a' b't' b'i' b'o' b'n') { - return Ok(Authorization.into()); - } - } else if b[0] == b'c' { - if eq!(b[1] == b'a' b'c' b'h' b'e' b'-' b'c' b'o' b'n' b't' b'r' b'o' b'l') { - return Ok(CacheControl.into()); - } else if eq!(b[1] == b'o' b'n' b't' b'e' b'n' b't' b'-' b'r' b'a' b'n' b'g' b'e' ) - { - return Ok(ContentRange.into()); - } - } else if eq!(b == b'i' b'f' b'-' b'n' b'o' b'n' b'e' b'-' b'm' b'a' b't' b'c' b'h') { - return Ok(IfNoneMatch.into()); - } else if eq!(b == b'l' b'a' b's' b't' b'-' b'm' b'o' b'd' b'i' b'f' b'i' b'e' b'd') { - return Ok(LastModified.into()); - } - - validate(b, len) - } - 14 => { - to_lower!(b, data, 14); - - if eq!(b == b'a' b'c' b'c' b'e' b'p' b't' b'-' b'c' b'h' b'a' b'r' b's' b'e' b't') { - Ok(AcceptCharset.into()) - } else if eq!(b == b'c' b'o' b'n' b't' b'e' b'n' b't' b'-' b'l' b'e' b'n' b'g' b't' b'h') - { - Ok(ContentLength.into()) - } else { - validate(b, len) - } - } - 15 => { - to_lower!(b, data, 15); - - if eq!(b == b'a' b'c' b'c' b'e' b'p' b't' b'-') { // accept- - if eq!(b[7] == b'e' b'n' b'c' b'o' b'd' b'i' b'n' b'g') { - return Ok(AcceptEncoding.into()) - } else if eq!(b[7] == b'l' b'a' b'n' b'g' b'u' b'a' b'g' b'e') { - return Ok(AcceptLanguage.into()) - } - } else if eq!(b == b'p' b'u' b'b' b'l' b'i' b'c' b'-' b'k' b'e' b'y' b'-' b'p' b'i' b'n' b's') { - return Ok(PublicKeyPins.into()) - } else if eq!(b == b'x' b'-' b'f' b'r' b'a' b'm' b'e' b'-' b'o' b'p' b't' b'i' b'o' b'n' b's') { - return Ok(XFrameOptions.into()) - } - else if eq!(b == b'r' b'e' b'f' b'e' b'r' b'r' b'e' b'r' b'-' b'p' b'o' b'l' b'i' b'c' b'y') { - return Ok(ReferrerPolicy.into()) - } - - validate(b, len) - } - 16 => { - to_lower!(b, data, 16); - - if eq!(b == b'c' b'o' b'n' b't' b'e' b'n' b't' b'-') { - if eq!(b[8] == b'l' b'a' b'n' b'g' b'u' b'a' b'g' b'e') { - return Ok(ContentLanguage.into()) - } else if eq!(b[8] == b'l' b'o' b'c' b'a' b't' b'i' b'o' b'n') { - return Ok(ContentLocation.into()) - } else if eq!(b[8] == b'e' b'n' b'c' b'o' b'd' b'i' b'n' b'g') { - return Ok(ContentEncoding.into()) - } - } else if eq!(b == b'w' b'w' b'w' b'-' b'a' b'u' b't' b'h' b'e' b'n' b't' b'i' b'c' b'a' b't' b'e') { - return Ok(WwwAuthenticate.into()) - } else if eq!(b == b'x' b'-' b'x' b's' b's' b'-' b'p' b'r' b'o' b't' b'e' b'c' b't' b'i' b'o' b'n') { - return Ok(XXssProtection.into()) - } - - validate(b, len) - } - 17 => { - to_lower!(b, data, 17); - - if eq!(b == b't' b'r' b'a' b'n' b's' b'f' b'e' b'r' b'-' b'e' b'n' b'c' b'o' b'd' b'i' b'n' b'g') { - Ok(TransferEncoding.into()) - } else if eq!(b == b'i' b'f' b'-' b'm' b'o' b'd' b'i' b'f' b'i' b'e' b'd' b'-' b's' b'i' b'n' b'c' b'e') { - Ok(IfModifiedSince.into()) - } else if eq!(b == b's' b'e' b'c' b'-' b'w' b'e' b'b' b's' b'o' b'c' b'k' b'e' b't' b'-' b'k' b'e' b'y') { - Ok(SecWebSocketKey.into()) - } else { - validate(b, len) - } - } - 18 => { - to_lower!(b, data, 18); - - if eq!(b == b'p' b'r' b'o' b'x' b'y' b'-' b'a' b'u' b't' b'h' b'e' b'n' b't' b'i' b'c' b'a' b't' b'e') { - Ok(ProxyAuthenticate.into()) - } else { - validate(b, len) - } - } - 19 => { - to_lower!(b, data, 19); - - if eq!(b == b'c' b'o' b'n' b't' b'e' b'n' b't' b'-' b'd' b'i' b's' b'p' b'o' b's' b'i' b't' b'i' b'o' b'n') { - Ok(ContentDisposition.into()) - } else if eq!(b == b'i' b'f' b'-' b'u' b'n' b'm' b'o' b'd' b'i' b'f' b'i' b'e' b'd' b'-' b's' b'i' b'n' b'c' b'e') { - Ok(IfUnmodifiedSince.into()) - } else if eq!(b == b'p' b'r' b'o' b'x' b'y' b'-' b'a' b'u' b't' b'h' b'o' b'r' b'i' b'z' b'a' b't' b'i' b'o' b'n') { - Ok(ProxyAuthorization.into()) - } else { - validate(b, len) - } - } - 20 => { - to_lower!(b, data, 20); - - if eq!(b == b's' b'e' b'c' b'-' b'w' b'e' b'b' b's' b'o' b'c' b'k' b'e' b't' b'-' b'a' b'c' b'c' b'e' b'p' b't') { - Ok(SecWebSocketAccept.into()) - } else { - validate(b, len) - } - } - 21 => { - to_lower!(b, data, 21); - - if eq!(b == b's' b'e' b'c' b'-' b'w' b'e' b'b' b's' b'o' b'c' b'k' b'e' b't' b'-' b'v' b'e' b'r' b's' b'i' b'o' b'n') { - Ok(SecWebSocketVersion.into()) - } else { - validate(b, len) - } - } - 22 => { - to_lower!(b, data, 22); - - if eq!(b == b'a' b'c' b'c' b'e' b's' b's' b'-' b'c' b'o' b'n' b't' b'r' b'o' b'l' b'-' b'm' b'a' b'x' b'-' b'a' b'g' b'e') { - Ok(AccessControlMaxAge.into()) - } else if eq!(b == b'x' b'-' b'c' b'o' b'n' b't' b'e' b'n' b't' b'-' b't' b'y' b'p' b'e' b'-' b'o' b'p' b't' b'i' b'o' b'n' b's') { - Ok(XContentTypeOptions.into()) - } else if eq!(b == b'x' b'-' b'd' b'n' b's' b'-' b'p' b'r' b'e' b'f' b'e' b't' b'c' b'h' b'-' b'c' b'o' b'n' b't' b'r' b'o' b'l') { - Ok(XDnsPrefetchControl.into()) - } else if eq!(b == b's' b'e' b'c' b'-' b'w' b'e' b'b' b's' b'o' b'c' b'k' b'e' b't' b'-' b'p' b'r' b'o' b't' b'o' b'c' b'o' b'l') { - Ok(SecWebSocketProtocol.into()) - } else { - validate(b, len) - } - } - 23 => { - to_lower!(b, data, 23); - - if eq!(b == b'c' b'o' b'n' b't' b'e' b'n' b't' b'-' b's' b'e' b'c' b'u' b'r' b'i' b't' b'y' b'-' b'p' b'o' b'l' b'i' b'c' b'y') { - Ok(ContentSecurityPolicy.into()) - } else { - validate(b, len) - } - } - 24 => { - to_lower!(b, data, 24); - - if eq!(b == b's' b'e' b'c' b'-' b'w' b'e' b'b' b's' b'o' b'c' b'k' b'e' b't' b'-' b'e' b'x' b't' b'e' b'n' b's' b'i' b'o' b'n' b's') { - Ok(SecWebSocketExtensions.into()) - } else { - validate(b, len) - } - } - 25 => { - to_lower!(b, data, 25); - - if eq!(b == b's' b't' b'r' b'i' b'c' b't' b'-' b't' b'r' b'a' b'n' b's' b'p' b'o' b'r' b't' b'-' b's' b'e' b'c' b'u' b'r' b'i' b't' b'y') { - Ok(StrictTransportSecurity.into()) - } else if eq!(b == b'u' b'p' b'g' b'r' b'a' b'd' b'e' b'-' b'i' b'n' b's' b'e' b'c' b'u' b'r' b'e' b'-' b'r' b'e' b'q' b'u' b'e' b's' b't' b's') { - Ok(UpgradeInsecureRequests.into()) - } else { - validate(b, len) - } - } - 27 => { - to_lower!(b, data, 27); - - if eq!(b == b'a' b'c' b'c' b'e' b's' b's' b'-' b'c' b'o' b'n' b't' b'r' b'o' b'l' b'-' b'a' b'l' b'l' b'o' b'w' b'-' b'o' b'r' b'i' b'g' b'i' b'n') { - Ok(AccessControlAllowOrigin.into()) - } else if eq!(b == b'p' b'u' b'b' b'l' b'i' b'c' b'-' b'k' b'e' b'y' b'-' b'p' b'i' b'n' b's' b'-' b'r' b'e' b'p' b'o' b'r' b't' b'-' b'o' b'n' b'l' b'y') { - Ok(PublicKeyPinsReportOnly.into()) - } else { - validate(b, len) - } - } - 28 => { - to_lower!(b, data, 28); - - if eq!(b == b'a' b'c' b'c' b'e' b's' b's' b'-' b'c' b'o' b'n' b't' b'r' b'o' b'l' b'-' b'a' b'l' b'l' b'o' b'w' b'-') { - if eq!(b[21] == b'h' b'e' b'a' b'd' b'e' b'r' b's') { - return Ok(AccessControlAllowHeaders.into()) - } else if eq!(b[21] == b'm' b'e' b't' b'h' b'o' b'd' b's') { - return Ok(AccessControlAllowMethods.into()) - } - } - - validate(b, len) - } - 29 => { - to_lower!(b, data, 29); - - if eq!(b == b'a' b'c' b'c' b'e' b's' b's' b'-' b'c' b'o' b'n' b't' b'r' b'o' b'l' b'-') { - if eq!(b[15] == b'e' b'x' b'p' b'o' b's' b'e' b'-' b'h' b'e' b'a' b'd' b'e' b'r' b's') { - return Ok(AccessControlExposeHeaders.into()) - } else if eq!(b[15] == b'r' b'e' b'q' b'u' b'e' b's' b't' b'-' b'm' b'e' b't' b'h' b'o' b'd') { - return Ok(AccessControlRequestMethod.into()) - } - } - - validate(b, len) - } - 30 => { - to_lower!(b, data, 30); - - if eq!(b == b'a' b'c' b'c' b'e' b's' b's' b'-' b'c' b'o' b'n' b't' b'r' b'o' b'l' b'-' b'r' b'e' b'q' b'u' b'e' b's' b't' b'-' b'h' b'e' b'a' b'd' b'e' b'r' b's') { - Ok(AccessControlRequestHeaders.into()) - } else { - validate(b, len) - } - } - 32 => { - to_lower!(b, data, 32); - - if eq!(b == b'a' b'c' b'c' b'e' b's' b's' b'-' b'c' b'o' b'n' b't' b'r' b'o' b'l' b'-' b'a' b'l' b'l' b'o' b'w' b'-' b'c' b'r' b'e' b'd' b'e' b'n' b't' b'i' b'a' b'l' b's') { - Ok(AccessControlAllowCredentials.into()) - } else { - validate(b, len) - } - } - 35 => { - to_lower!(b, data, 35); - - if eq!(b == b'c' b'o' b'n' b't' b'e' b'n' b't' b'-' b's' b'e' b'c' b'u' b'r' b'i' b't' b'y' b'-' b'p' b'o' b'l' b'i' b'c' b'y' b'-' b'r' b'e' b'p' b'o' b'r' b't' b'-' b'o' b'n' b'l' b'y') { - Ok(ContentSecurityPolicyReportOnly.into()) - } else { - validate(b, len) - } - } - len if len < 64 => { - for i in 0..len { - b[i] = table[data[i] as usize]; - } - validate(b, len) - } - len if len <= super::MAX_HEADER_NAME_LEN => { - Ok(HdrName::custom(data, false)) - } - _ => Err(InvalidHeaderName::new()), - } -} - -#[cfg(all(debug_assertions, target_arch = "wasm32"))] -/// This version works best in debug mode in wasm fn parse_hdr<'a>( data: &'a [u8], b: &'a mut [u8; 64], table: &[u8; 256], ) -> Result, InvalidHeaderName> { - use self::StandardHeader::*; - - let len = data.len(); - - let validate = |buf: &'a [u8], len: usize| { - let buf = &buf[..len]; - if buf.iter().any(|&b| b == 0) { - Err(InvalidHeaderName::new()) - } else { - Ok(HdrName::custom(buf, true)) - } - }; - - assert!( - len < super::MAX_HEADER_NAME_LEN, - "header name too long -- max length is {}", - super::MAX_HEADER_NAME_LEN - ); - - match len { + match data.len() { 0 => Err(InvalidHeaderName::new()), - len if len > 64 => Ok(HdrName::custom(data, false)), - len => { + len @ 1..=64 => { // Read from data into the buffer - transforming using `table` as we go data.iter().zip(b.iter_mut()).for_each(|(index, out)| *out = table[*index as usize]); - match &b[0..len] { - b"te" => Ok(Te.into()), - b"age" => Ok(Age.into()), - b"via" => Ok(Via.into()), - b"dnt" => Ok(Dnt.into()), - b"date" => Ok(Date.into()), - b"etag" => Ok(Etag.into()), - b"from" => Ok(From.into()), - b"host" => Ok(Host.into()), - b"link" => Ok(Link.into()), - b"vary" => Ok(Vary.into()), - b"allow" => Ok(Allow.into()), - b"range" => Ok(Range.into()), - b"accept" => Ok(Accept.into()), - b"cookie" => Ok(Cookie.into()), - b"expect" => Ok(Expect.into()), - b"origin" => Ok(Origin.into()), - b"pragma" => Ok(Pragma.into()), - b"server" => Ok(Server.into()), - b"alt-svc" => Ok(AltSvc.into()), - b"expires" => Ok(Expires.into()), - b"referer" => Ok(Referer.into()), - b"refresh" => Ok(Refresh.into()), - b"trailer" => Ok(Trailer.into()), - b"upgrade" => Ok(Upgrade.into()), - b"warning" => Ok(Warning.into()), - b"if-match" => Ok(IfMatch.into()), - b"if-range" => Ok(IfRange.into()), - b"location" => Ok(Location.into()), - b"forwarded" => Ok(Forwarded.into()), - b"connection" => Ok(Connection.into()), - b"set-cookie" => Ok(SetCookie.into()), - b"user-agent" => Ok(UserAgent.into()), - b"retry-after" => Ok(RetryAfter.into()), - b"content-type" => Ok(ContentType.into()), - b"max-forwards" => Ok(MaxForwards.into()), - b"accept-ranges" => Ok(AcceptRanges.into()), - b"authorization" => Ok(Authorization.into()), - b"cache-control" => Ok(CacheControl.into()), - b"content-range" => Ok(ContentRange.into()), - b"if-none-match" => Ok(IfNoneMatch.into()), - b"last-modified" => Ok(LastModified.into()), - b"accept-charset" => Ok(AcceptCharset.into()), - b"content-length" => Ok(ContentLength.into()), - b"accept-encoding" => Ok(AcceptEncoding.into()), - b"accept-language" => Ok(AcceptLanguage.into()), - b"public-key-pins" => Ok(PublicKeyPins.into()), - b"x-frame-options" => Ok(XFrameOptions.into()), - b"referrer-policy" => Ok(ReferrerPolicy.into()), - b"content-language" => Ok(ContentLanguage.into()), - b"content-location" => Ok(ContentLocation.into()), - b"content-encoding" => Ok(ContentEncoding.into()), - b"www-authenticate" => Ok(WwwAuthenticate.into()), - b"x-xss-protection" => Ok(XXssProtection.into()), - b"transfer-encoding" => Ok(TransferEncoding.into()), - b"if-modified-since" => Ok(IfModifiedSince.into()), - b"sec-websocket-key" => Ok(SecWebSocketKey.into()), - b"proxy-authenticate" => Ok(ProxyAuthenticate.into()), - b"content-disposition" => Ok(ContentDisposition.into()), - b"if-unmodified-since" => Ok(IfUnmodifiedSince.into()), - b"proxy-authorization" => Ok(ProxyAuthorization.into()), - b"sec-websocket-accept" => Ok(SecWebSocketAccept.into()), - b"sec-websocket-version" => Ok(SecWebSocketVersion.into()), - b"access-control-max-age" => Ok(AccessControlMaxAge.into()), - b"x-content-type-options" => Ok(XContentTypeOptions.into()), - b"x-dns-prefetch-control" => Ok(XDnsPrefetchControl.into()), - b"sec-websocket-protocol" => Ok(SecWebSocketProtocol.into()), - b"content-security-policy" => Ok(ContentSecurityPolicy.into()), - b"sec-websocket-extensions" => Ok(SecWebSocketExtensions.into()), - b"strict-transport-security" => Ok(StrictTransportSecurity.into()), - b"upgrade-insecure-requests" => Ok(UpgradeInsecureRequests.into()), - b"access-control-allow-origin" => Ok(AccessControlAllowOrigin.into()), - b"public-key-pins-report-only" => Ok(PublicKeyPinsReportOnly.into()), - b"access-control-allow-headers" => Ok(AccessControlAllowHeaders.into()), - b"access-control-allow-methods" => Ok(AccessControlAllowMethods.into()), - b"access-control-expose-headers" => Ok(AccessControlExposeHeaders.into()), - b"access-control-request-method" => Ok(AccessControlRequestMethod.into()), - b"access-control-request-headers" => Ok(AccessControlRequestHeaders.into()), - b"access-control-allow-credentials" => Ok(AccessControlAllowCredentials.into()), - b"content-security-policy-report-only" => { - Ok(ContentSecurityPolicyReportOnly.into()) + let name = &b[0..len]; + match StandardHeader::from_bytes(name) { + Some(sh) => Ok(sh.into()), + None => { + if name.contains(&0) { + Err(InvalidHeaderName::new()) + } else { + Ok(HdrName::custom(name, true)) + } } - other => validate(other, len), } } + 65..=super::MAX_HEADER_NAME_LEN => Ok(HdrName::custom(data, false)), + _ => Err(InvalidHeaderName::new()), } } @@ -1727,12 +1163,34 @@ impl HeaderName { /// Converts a static string to a HTTP header name. /// - /// This function panics when the static string is a invalid header. - /// /// This function requires the static string to only contain lowercase /// characters, numerals and symbols, as per the HTTP/2.0 specification /// and header names internal representation within this library. /// + /// # Panics + /// + /// This function panics when the static string is a invalid header. + /// + /// Until [Allow panicking in constants](/~https://github.com/rust-lang/rfcs/pull/2345) + /// makes its way into stable, the panic message at compile-time is + /// going to look cryptic, but should at least point at your header value: + /// + /// ```text + /// error: any use of this value will cause an error + /// --> http/src/header/name.rs:1241:13 + /// | + /// 1241 | ([] as [u8; 0])[0]; // Invalid header name + /// | ^^^^^^^^^^^^^^^^^^ + /// | | + /// | index out of bounds: the length is 0 but the index is 0 + /// | inside `http::HeaderName::from_static` at http/src/header/name.rs:1241:13 + /// | inside `INVALID_NAME` at src/main.rs:3:34 + /// | + /// ::: src/main.rs:3:1 + /// | + /// 3 | const INVALID_NAME: HeaderName = HeaderName::from_static("Capitalized"); + /// | ------------------------------------------------------------------------ + /// ``` /// /// # Examples /// @@ -1760,33 +1218,31 @@ impl HeaderName { /// let a = HeaderName::from_static("foobar"); /// let b = HeaderName::from_static("FOOBAR"); // This line panics! /// ``` - #[allow(deprecated)] - pub fn from_static(src: &'static str) -> HeaderName { - let bytes = src.as_bytes(); - #[allow(deprecated)] - let mut buf = unsafe { mem::uninitialized() }; - match parse_hdr(bytes, &mut buf, &HEADER_CHARS_H2) { - Ok(hdr_name) => match hdr_name.inner { - Repr::Standard(std) => std.into(), - Repr::Custom(MaybeLower { buf: _, lower: true }) => { - let val = ByteStr::from_static(src); - Custom(val).into() - }, - Repr::Custom(MaybeLower { buf: _, lower: false }) => { - // With lower false, the string is left unchecked by - // parse_hdr and must be validated manually. - for &b in bytes.iter() { - if HEADER_CHARS_H2[b as usize] == 0 { - panic!("invalid header name") - } - } + #[allow(unconditional_panic)] // required for the panic circumvention + pub const fn from_static(src: &'static str) -> HeaderName { + let name_bytes = src.as_bytes(); + if let Some(standard) = StandardHeader::from_bytes(name_bytes) { + return HeaderName{ + inner: Repr::Standard(standard), + }; + } - let val = ByteStr::from_static(src); - Custom(val).into() + if name_bytes.len() == 0 || name_bytes.len() > super::MAX_HEADER_NAME_LEN || { + let mut i = 0; + loop { + if i >= name_bytes.len() { + break false; + } else if HEADER_CHARS_H2[name_bytes[i] as usize] == 0 { + break true; } - }, + i += 1; + } + } { + ([] as [u8; 0])[0]; // Invalid header name + } - Err(_) => panic!("invalid header name") + HeaderName { + inner: Repr::Custom(Custom(ByteStr::from_static(src))) } } @@ -2169,24 +1625,36 @@ mod tests { } } + const ONE_TOO_LONG: &[u8] = &[b'a'; super::super::MAX_HEADER_NAME_LEN+1]; + #[test] fn test_invalid_name_lengths() { assert!( HeaderName::from_bytes(&[]).is_err(), "zero-length header name is an error", ); - let mut long = vec![b'a'; super::super::MAX_HEADER_NAME_LEN]; + + let long = &ONE_TOO_LONG[0..super::super::MAX_HEADER_NAME_LEN]; + + let long_str = std::str::from_utf8(long).unwrap(); + assert_eq!(HeaderName::from_static(long_str), long_str); // shouldn't panic! + assert!( - HeaderName::from_bytes(long.as_slice()).is_ok(), + HeaderName::from_bytes(long).is_ok(), "max header name length is ok", ); - long.push(b'a'); assert!( - HeaderName::from_bytes(long.as_slice()).is_err(), + HeaderName::from_bytes(ONE_TOO_LONG).is_err(), "longer than max header name length is an error", ); } + #[test] + #[should_panic] + fn test_static_invalid_name_lengths() { + let _ = HeaderName::from_static(unsafe { std::str::from_utf8_unchecked(ONE_TOO_LONG) }); + } + #[test] fn test_from_hdr_name() { use self::StandardHeader::Vary;