From 8a2671a2889fcf176c38844e1197ed545fe1d59d Mon Sep 17 00:00:00 2001 From: Mikhail Zabaluev Date: Tue, 13 Aug 2024 08:21:16 +0300 Subject: [PATCH] Revert to original loop for const pow exponents Give LLVM the for original, optimizable loop in pow and wrapped_pow functions in the case when the exponent is statically known. --- core/src/num/int_macros.rs | 135 +++++++++++++++--------------------- core/src/num/uint_macros.rs | 135 +++++++++++++++--------------------- 2 files changed, 110 insertions(+), 160 deletions(-) diff --git a/core/src/num/int_macros.rs b/core/src/num/int_macros.rs index be0e6a2a03b70..d8ef36f21ac63 100644 --- a/core/src/num/int_macros.rs +++ b/core/src/num/int_macros.rs @@ -2174,54 +2174,41 @@ macro_rules! int_impl { #[inline] #[rustc_allow_const_fn_unstable(is_val_statically_known)] pub const fn wrapping_pow(self, mut exp: u32) -> Self { + if exp == 0 { + return 1; + } let mut base = self; + let mut acc: Self = 1; if intrinsics::is_val_statically_known(exp) { - // Unroll multiplications for small exponent values. - // This gives the optimizer a way to efficiently inline call sites - // for the most common use cases with constant exponents. - // Currently, LLVM is unable to unroll the loop below. - match exp { - 0 => return 1, - 1 => return base, - 2 => return base.wrapping_mul(base), - 3 => { - let squared = base.wrapping_mul(base); - return squared.wrapping_mul(base); - } - 4 => { - let squared = base.wrapping_mul(base); - return squared.wrapping_mul(squared); + while exp > 1 { + if (exp & 1) == 1 { + acc = acc.wrapping_mul(base); } - 5 => { - let squared = base.wrapping_mul(base); - return squared.wrapping_mul(squared).wrapping_mul(base); - } - 6 => { - let cubed = base.wrapping_mul(base).wrapping_mul(base); - return cubed.wrapping_mul(cubed); - } - _ => {} + exp /= 2; + base = base.wrapping_mul(base); } - } else { - if exp == 0 { - return 1; - } - } - debug_assert!(exp != 0); - let mut acc: Self = 1; - - loop { - if (exp & 1) == 1 { - acc = acc.wrapping_mul(base); - // since exp!=0, finally the exp must be 1. - if exp == 1 { - return acc; + // since exp!=0, finally the exp must be 1. + // Deal with the final bit of the exponent separately, since + // squaring the base afterwards is not necessary. + acc.wrapping_mul(base) + } else { + // This is faster than the above when the exponent is not known + // at compile time. We can't use the same code for the constant + // exponent case because LLVM is currently unable to unroll + // this loop. + loop { + if (exp & 1) == 1 { + acc = acc.wrapping_mul(base); + // since exp!=0, finally the exp must be 1. + if exp == 1 { + return acc; + } } + exp /= 2; + base = base.wrapping_mul(base); } - exp /= 2; - base = base.wrapping_mul(base); } } @@ -2753,54 +2740,42 @@ macro_rules! int_impl { #[rustc_inherit_overflow_checks] #[rustc_allow_const_fn_unstable(is_val_statically_known)] pub const fn pow(self, mut exp: u32) -> Self { + if exp == 0 { + return 1; + } let mut base = self; + let mut acc = 1; if intrinsics::is_val_statically_known(exp) { - // Unroll multiplications for small exponent values. - // This gives the optimizer a way to efficiently inline call sites - // for the most common use cases with constant exponents. - // Currently, LLVM is unable to unroll the loop below. - match exp { - 0 => return 1, - 1 => return base, - 2 => return base * base, - 3 => { - let squared = base * base; - return squared * base; - } - 4 => { - let squared = base * base; - return squared * squared; + while exp > 1 { + if (exp & 1) == 1 { + acc = acc * base; } - 5 => { - let squared = base * base; - return squared * squared * base; - } - 6 => { - let cubed = base * base * base; - return cubed * cubed; - } - _ => {} + exp /= 2; + base = base * base; } - } else { - if exp == 0 { - return 1; - } - } - debug_assert!(exp != 0); - let mut acc = 1; - - loop { - if (exp & 1) == 1 { - acc = acc * base; - // since exp!=0, finally the exp must be 1. - if exp == 1 { - return acc; + // since exp!=0, finally the exp must be 1. + // Deal with the final bit of the exponent separately, since + // squaring the base afterwards is not necessary and may cause a + // needless overflow. + acc * base + } else { + // This is faster than the above when the exponent is not known + // at compile time. We can't use the same code for the constant + // exponent case because LLVM is currently unable to unroll + // this loop. + loop { + if (exp & 1) == 1 { + acc = acc * base; + // since exp!=0, finally the exp must be 1. + if exp == 1 { + return acc; + } } + exp /= 2; + base = base * base; } - exp /= 2; - base = base * base; } } diff --git a/core/src/num/uint_macros.rs b/core/src/num/uint_macros.rs index 24352593fca3d..5b3ef78d39a3d 100644 --- a/core/src/num/uint_macros.rs +++ b/core/src/num/uint_macros.rs @@ -2050,54 +2050,41 @@ macro_rules! uint_impl { #[inline] #[rustc_allow_const_fn_unstable(is_val_statically_known)] pub const fn wrapping_pow(self, mut exp: u32) -> Self { + if exp == 0 { + return 1; + } let mut base = self; + let mut acc: Self = 1; if intrinsics::is_val_statically_known(exp) { - // Unroll multiplications for small exponent values. - // This gives the optimizer a way to efficiently inline call sites - // for the most common use cases with constant exponents. - // Currently, LLVM is unable to unroll the loop below. - match exp { - 0 => return 1, - 1 => return base, - 2 => return base.wrapping_mul(base), - 3 => { - let squared = base.wrapping_mul(base); - return squared.wrapping_mul(base); - } - 4 => { - let squared = base.wrapping_mul(base); - return squared.wrapping_mul(squared); + while exp > 1 { + if (exp & 1) == 1 { + acc = acc.wrapping_mul(base); } - 5 => { - let squared = base.wrapping_mul(base); - return squared.wrapping_mul(squared).wrapping_mul(base); - } - 6 => { - let cubed = base.wrapping_mul(base).wrapping_mul(base); - return cubed.wrapping_mul(cubed); - } - _ => {} + exp /= 2; + base = base.wrapping_mul(base); } - } else { - if exp == 0 { - return 1; - } - } - debug_assert!(exp != 0); - let mut acc: Self = 1; - - loop { - if (exp & 1) == 1 { - acc = acc.wrapping_mul(base); - // since exp!=0, finally the exp must be 1. - if exp == 1 { - return acc; + // since exp!=0, finally the exp must be 1. + // Deal with the final bit of the exponent separately, since + // squaring the base afterwards is not necessary. + acc.wrapping_mul(base) + } else { + // This is faster than the above when the exponent is not known + // at compile time. We can't use the same code for the constant + // exponent case because LLVM is currently unable to unroll + // this loop. + loop { + if (exp & 1) == 1 { + acc = acc.wrapping_mul(base); + // since exp!=0, finally the exp must be 1. + if exp == 1 { + return acc; + } } + exp /= 2; + base = base.wrapping_mul(base); } - exp /= 2; - base = base.wrapping_mul(base); } } @@ -2578,54 +2565,42 @@ macro_rules! uint_impl { #[rustc_inherit_overflow_checks] #[rustc_allow_const_fn_unstable(is_val_statically_known)] pub const fn pow(self, mut exp: u32) -> Self { + if exp == 0 { + return 1; + } let mut base = self; + let mut acc = 1; if intrinsics::is_val_statically_known(exp) { - // Unroll multiplications for small exponent values. - // This gives the optimizer a way to efficiently inline call sites - // for the most common use cases with constant exponents. - // Currently, LLVM is unable to unroll the loop below. - match exp { - 0 => return 1, - 1 => return base, - 2 => return base * base, - 3 => { - let squared = base * base; - return squared * base; - } - 4 => { - let squared = base * base; - return squared * squared; + while exp > 1 { + if (exp & 1) == 1 { + acc = acc * base; } - 5 => { - let squared = base * base; - return squared * squared * base; - } - 6 => { - let cubed = base * base * base; - return cubed * cubed; - } - _ => {} + exp /= 2; + base = base * base; } - } else { - if exp == 0 { - return 1; - } - } - debug_assert!(exp != 0); - let mut acc = 1; - - loop { - if (exp & 1) == 1 { - acc = acc * base; - // since exp!=0, finally the exp must be 1. - if exp == 1 { - return acc; + // since exp!=0, finally the exp must be 1. + // Deal with the final bit of the exponent separately, since + // squaring the base afterwards is not necessary and may cause a + // needless overflow. + acc * base + } else { + // This is faster than the above when the exponent is not known + // at compile time. We can't use the same code for the constant + // exponent case because LLVM is currently unable to unroll + // this loop. + loop { + if (exp & 1) == 1 { + acc = acc * base; + // since exp!=0, finally the exp must be 1. + if exp == 1 { + return acc; + } } + exp /= 2; + base = base * base; } - exp /= 2; - base = base * base; } }