diff --git a/core/src/slice/sort/select.rs b/core/src/slice/sort/select.rs index 2b3e14755302c..28ca9dcad39a7 100644 --- a/core/src/slice/sort/select.rs +++ b/core/src/slice/sort/select.rs @@ -42,14 +42,12 @@ where let min_idx = min_index(v, &mut is_less).unwrap(); v.swap(min_idx, index); } else { - #[cfg(not(feature = "optimize_for_size"))] - { - partition_at_index_loop(v, index, None, &mut is_less); - } - - #[cfg(feature = "optimize_for_size")] - { - median_of_medians(v, &mut is_less, index); + cfg_if! { + if #[cfg(feature = "optimize_for_size")] { + median_of_medians(v, &mut is_less, index); + } else { + partition_at_index_loop(v, index, None, &mut is_less); + } } } @@ -178,14 +176,12 @@ fn median_of_medians bool>(mut v: &mut [T], is_less: &mut loop { if v.len() <= INSERTION_SORT_THRESHOLD { if v.len() >= 2 { - #[cfg(not(feature = "optimize_for_size"))] - { - insertion_sort_shift_left(v, 1, is_less); - } - - #[cfg(feature = "optimize_for_size")] - { - bubble_sort(v, is_less); + cfg_if! { + if #[cfg(feature = "optimize_for_size")] { + bubble_sort(v, is_less); + } else { + insertion_sort_shift_left(v, 1, is_less); + } } } diff --git a/core/src/slice/sort/stable/mod.rs b/core/src/slice/sort/stable/mod.rs index a61a95a225455..3472401c4dcf8 100644 --- a/core/src/slice/sort/stable/mod.rs +++ b/core/src/slice/sort/stable/mod.rs @@ -39,40 +39,38 @@ pub fn sort bool, BufT: BufGuard>(v: &mut [T], is_less return; } - #[cfg(not(feature = "optimize_for_size"))] - { - // More advanced sorting methods than insertion sort are faster if called in - // a hot loop for small inputs, but for general-purpose code the small - // binary size of insertion sort is more important. The instruction cache in - // modern processors is very valuable, and for a single sort call in general - // purpose code any gains from an advanced method are cancelled by i-cache - // misses during the sort, and thrashing the i-cache for surrounding code. - const MAX_LEN_ALWAYS_INSERTION_SORT: usize = 20; - if intrinsics::likely(len <= MAX_LEN_ALWAYS_INSERTION_SORT) { - insertion_sort_shift_left(v, 1, is_less); - return; - } - - driftsort_main::(v, is_less); - } - - #[cfg(feature = "optimize_for_size")] - { - let alloc_len = len / 2; - - // For small inputs 4KiB of stack storage suffices, which allows us to avoid - // calling the (de-)allocator. Benchmarks showed this was quite beneficial. - let mut stack_buf = AlignedStorage::::new(); - let stack_scratch = stack_buf.as_uninit_slice_mut(); - let mut heap_buf; - let scratch = if stack_scratch.len() >= alloc_len { - stack_scratch + cfg_if! { + if #[cfg(feature = "optimize_for_size")] { + let alloc_len = len / 2; + + // For small inputs 4KiB of stack storage suffices, which allows us to avoid + // calling the (de-)allocator. Benchmarks showed this was quite beneficial. + let mut stack_buf = AlignedStorage::::new(); + let stack_scratch = stack_buf.as_uninit_slice_mut(); + let mut heap_buf; + let scratch = if stack_scratch.len() >= alloc_len { + stack_scratch + } else { + heap_buf = BufT::with_capacity(alloc_len); + heap_buf.as_uninit_slice_mut() + }; + + tiny::mergesort(v, scratch, is_less); } else { - heap_buf = BufT::with_capacity(alloc_len); - heap_buf.as_uninit_slice_mut() - }; - - tiny::mergesort(v, scratch, is_less); + // More advanced sorting methods than insertion sort are faster if called in + // a hot loop for small inputs, but for general-purpose code the small + // binary size of insertion sort is more important. The instruction cache in + // modern processors is very valuable, and for a single sort call in general + // purpose code any gains from an advanced method are cancelled by i-cache + // misses during the sort, and thrashing the i-cache for surrounding code. + const MAX_LEN_ALWAYS_INSERTION_SORT: usize = 20; + if intrinsics::likely(len <= MAX_LEN_ALWAYS_INSERTION_SORT) { + insertion_sort_shift_left(v, 1, is_less); + return; + } + + driftsort_main::(v, is_less); + } } } diff --git a/core/src/slice/sort/unstable/mod.rs b/core/src/slice/sort/unstable/mod.rs index faac97eab02b8..130be21ee3fe8 100644 --- a/core/src/slice/sort/unstable/mod.rs +++ b/core/src/slice/sort/unstable/mod.rs @@ -30,28 +30,26 @@ pub fn sort bool>(v: &mut [T], is_less: &mut F) { return; } - #[cfg(not(feature = "optimize_for_size"))] - { - // More advanced sorting methods than insertion sort are faster if called in - // a hot loop for small inputs, but for general-purpose code the small - // binary size of insertion sort is more important. The instruction cache in - // modern processors is very valuable, and for a single sort call in general - // purpose code any gains from an advanced method are cancelled by i-cache - // misses during the sort, and thrashing the i-cache for surrounding code. - const MAX_LEN_ALWAYS_INSERTION_SORT: usize = 20; - if intrinsics::likely(len <= MAX_LEN_ALWAYS_INSERTION_SORT) { - insertion_sort_shift_left(v, 1, is_less); - return; - } - - ipnsort(v, is_less); - } + cfg_if! { + if #[cfg(feature = "optimize_for_size")] { + // SAFETY: We checked that `len >= 2`. + unsafe { + heapsort::heapsort(v, is_less); + } + } else { + // More advanced sorting methods than insertion sort are faster if called in + // a hot loop for small inputs, but for general-purpose code the small + // binary size of insertion sort is more important. The instruction cache in + // modern processors is very valuable, and for a single sort call in general + // purpose code any gains from an advanced method are cancelled by i-cache + // misses during the sort, and thrashing the i-cache for surrounding code. + const MAX_LEN_ALWAYS_INSERTION_SORT: usize = 20; + if intrinsics::likely(len <= MAX_LEN_ALWAYS_INSERTION_SORT) { + insertion_sort_shift_left(v, 1, is_less); + return; + } - #[cfg(feature = "optimize_for_size")] - { - // SAFETY: We checked that `len >= 2`. - unsafe { - heapsort::heapsort(v, is_less); + ipnsort(v, is_less); } } } diff --git a/core/src/slice/sort/unstable/quicksort.rs b/core/src/slice/sort/unstable/quicksort.rs index 83751d99cc261..9c59ccdb70005 100644 --- a/core/src/slice/sort/unstable/quicksort.rs +++ b/core/src/slice/sort/unstable/quicksort.rs @@ -141,15 +141,12 @@ const fn inst_partition bool>() -> fn(&mut [T], &T, &mut if mem::size_of::() <= MAX_BRANCHLESS_PARTITION_SIZE { // Specialize for types that are relatively cheap to copy, where branchless optimizations // have large leverage e.g. `u64` and `String`. - - #[cfg(not(feature = "optimize_for_size"))] - { - partition_lomuto_branchless_cyclic:: - } - - #[cfg(feature = "optimize_for_size")] - { - partition_lomuto_branchless_simple:: + cfg_if! { + if #[cfg(feature = "optimize_for_size")] { + partition_lomuto_branchless_simple:: + } else { + partition_lomuto_branchless_cyclic:: + } } } else { partition_hoare_branchy_cyclic::