From 0a11090053d73af94de3c1a96d125c4980a16626 Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Mon, 10 May 2021 21:56:27 +0100 Subject: [PATCH 01/15] faster parsing when not possible to overflow --- library/core/src/num/mod.rs | 85 ++++++++++++++++++++++--------------- 1 file changed, 50 insertions(+), 35 deletions(-) diff --git a/library/core/src/num/mod.rs b/library/core/src/num/mod.rs index dca8ffa4e2c89..5fd697fd4285f 100644 --- a/library/core/src/num/mod.rs +++ b/library/core/src/num/mod.rs @@ -970,12 +970,14 @@ pub enum FpCategory { #[doc(hidden)] trait FromStrRadixHelper: PartialOrd + Copy { - fn min_value() -> Self; - fn max_value() -> Self; + const MIN: Self; fn from_u32(u: u32) -> Self; fn checked_mul(&self, other: u32) -> Option; fn checked_sub(&self, other: u32) -> Option; fn checked_add(&self, other: u32) -> Option; + unsafe fn unchecked_mul(&self, other: u32) -> Self; + unsafe fn unchecked_sub(&self, other: u32) -> Self; + unsafe fn unchecked_add(&self, other: u32) -> Self; } macro_rules! from_str_radix_int_impl { @@ -993,10 +995,7 @@ from_str_radix_int_impl! { isize i8 i16 i32 i64 i128 usize u8 u16 u32 u64 u128 } macro_rules! doit { ($($t:ty)*) => ($(impl FromStrRadixHelper for $t { - #[inline] - fn min_value() -> Self { Self::MIN } - #[inline] - fn max_value() -> Self { Self::MAX } + const MIN: Self = Self::MIN; #[inline] fn from_u32(u: u32) -> Self { u as Self } #[inline] @@ -1011,6 +1010,27 @@ macro_rules! doit { fn checked_add(&self, other: u32) -> Option { Self::checked_add(*self, other as Self) } + #[inline] + unsafe fn unchecked_mul(&self, other: u32) -> Self { + // SAFETY: Conditions of `Self::unchecked_mul` must be upheld by the caller. + unsafe { + Self::unchecked_mul(*self, other as Self) + } + } + #[inline] + unsafe fn unchecked_sub(&self, other: u32) -> Self { + // SAFETY: Conditions of `Self::unchecked_sub` must be upheld by the caller. + unsafe { + Self::unchecked_sub(*self, other as Self) + } + } + #[inline] + unsafe fn unchecked_add(&self, other: u32) -> Self { + // SAFETY: Conditions of `Self::unchecked_add` must be upheld by the caller. + unsafe { + Self::unchecked_add(*self, other as Self) + } + } })*) } doit! { i8 i16 i32 i64 i128 isize u8 u16 u32 u64 u128 usize } @@ -1029,7 +1049,7 @@ fn from_str_radix(src: &str, radix: u32) -> Result T::min_value(); + let is_signed_ty = T::from_u32(0) > T::MIN; // all valid digits are ascii, so we will just iterate over the utf8 bytes // and cast them to chars. .to_digit() will safely return None for anything @@ -1047,37 +1067,32 @@ fn from_str_radix(src: &str, radix: u32) -> Result x, - None => return Err(PIE { kind: InvalidDigit }), - }; - result = match result.checked_mul(radix) { - Some(result) => result, - None => return Err(PIE { kind: PosOverflow }), - }; - result = match result.checked_add(x) { - Some(result) => result, - None => return Err(PIE { kind: PosOverflow }), - }; + + if radix <= 16 && digits.len() <= mem::size_of::() * 2 - is_signed_ty as usize { + // SAFETY: Consider the highest radix of 16: + // `u8::MAX` is `ff` (2 characters), `u16::MAX` is `ffff` (4 characters) + // We can be sure that any src len of 2 would fit in a u8 so we don't need + // to check for overflow. + unsafe { + let unchecked_additive_op = + if is_positive { T::unchecked_add } else { T::unchecked_sub }; + + for &c in digits { + result = result.unchecked_mul(radix); + let x = (c as char).to_digit(radix).ok_or(PIE { kind: InvalidDigit })?; + result = unchecked_additive_op(&result, x); + } } } else { - // The number is negative + let additive_op = if is_positive { T::checked_add } else { T::checked_sub }; + let overflow_err = || PIE { kind: if is_positive { PosOverflow } else { NegOverflow } }; + for &c in digits { - let x = match (c as char).to_digit(radix) { - Some(x) => x, - None => return Err(PIE { kind: InvalidDigit }), - }; - result = match result.checked_mul(radix) { - Some(result) => result, - None => return Err(PIE { kind: NegOverflow }), - }; - result = match result.checked_sub(x) { - Some(result) => result, - None => return Err(PIE { kind: NegOverflow }), - }; + let mul = result.checked_mul(radix); + let x = (c as char).to_digit(radix).ok_or(PIE { kind: InvalidDigit })?; + // multiply done early for performance reasons. + result = mul.ok_or_else(overflow_err)?; + result = additive_op(&result, x).ok_or_else(overflow_err)?; } } Ok(result) From 13d85ea8809910a685c52acf32f4d37632905eda Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Tue, 18 May 2021 08:51:20 +0100 Subject: [PATCH 02/15] add likely and clearer comments --- library/core/src/num/mod.rs | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/library/core/src/num/mod.rs b/library/core/src/num/mod.rs index 5fd697fd4285f..1941933483925 100644 --- a/library/core/src/num/mod.rs +++ b/library/core/src/num/mod.rs @@ -1068,8 +1068,13 @@ fn from_str_radix(src: &str, radix: u32) -> Result() * 2 - is_signed_ty as usize { - // SAFETY: Consider the highest radix of 16: + if intrinsics::likely( + radix <= 16 && digits.len() <= mem::size_of::() * 2 - is_signed_ty as usize, + ) { + // SAFETY: We can take this fast path when `radix.pow(digits.len()) - 1 <= T::MAX` + // but the condition above is a faster (conservative) approximation of this. + // + // Consider the highest radix of 16: // `u8::MAX` is `ff` (2 characters), `u16::MAX` is `ffff` (4 characters) // We can be sure that any src len of 2 would fit in a u8 so we don't need // to check for overflow. @@ -1088,9 +1093,14 @@ fn from_str_radix(src: &str, radix: u32) -> Result Date: Tue, 18 May 2021 18:22:04 +0100 Subject: [PATCH 03/15] Update library/core/src/num/mod.rs Co-authored-by: LingMan --- library/core/src/num/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/library/core/src/num/mod.rs b/library/core/src/num/mod.rs index 1941933483925..a833f4025c29b 100644 --- a/library/core/src/num/mod.rs +++ b/library/core/src/num/mod.rs @@ -1094,8 +1094,8 @@ fn from_str_radix(src: &str, radix: u32) -> Result Date: Tue, 18 May 2021 18:22:21 +0100 Subject: [PATCH 04/15] Update library/core/src/num/mod.rs Co-authored-by: LingMan --- library/core/src/num/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/library/core/src/num/mod.rs b/library/core/src/num/mod.rs index a833f4025c29b..9cd278e752c6e 100644 --- a/library/core/src/num/mod.rs +++ b/library/core/src/num/mod.rs @@ -1097,8 +1097,8 @@ fn from_str_radix(src: &str, radix: u32) -> Result