From f46fcfed64476c113e4caf7ffcd87c32761b60cd Mon Sep 17 00:00:00 2001 From: Lukas Bergdoll Date: Mon, 26 Aug 2024 11:17:38 +0200 Subject: [PATCH] Reduce code duplication by moving partition_lomuto_branchless_simple into quicksort module --- core/src/slice/sort/select.rs | 78 +---------------------- core/src/slice/sort/unstable/mod.rs | 1 - core/src/slice/sort/unstable/quicksort.rs | 39 +++++++++++- 3 files changed, 39 insertions(+), 79 deletions(-) diff --git a/core/src/slice/sort/select.rs b/core/src/slice/sort/select.rs index b60e33b3a2f0d..2b3e14755302c 100644 --- a/core/src/slice/sort/select.rs +++ b/core/src/slice/sort/select.rs @@ -6,13 +6,11 @@ //! for pivot selection. Using this as a fallback ensures O(n) worst case running time with //! better performance than one would get using heapsort as fallback. -use crate::intrinsics; use crate::mem::{self, SizedTypeProperties}; #[cfg(not(feature = "optimize_for_size"))] use crate::slice::sort::shared::pivot::choose_pivot; #[cfg(not(feature = "optimize_for_size"))] use crate::slice::sort::shared::smallsort::insertion_sort_shift_left; -#[cfg(not(feature = "optimize_for_size"))] use crate::slice::sort::unstable::quicksort::partition; /// Reorders the slice such that the element at `index` is at its final sorted position. @@ -252,15 +250,7 @@ fn median_of_ninthers bool>(v: &mut [T], is_less: &mut F) median_of_medians(&mut v[lo..lo + frac], is_less, pivot); - #[cfg(not(feature = "optimize_for_size"))] - { - partition(v, lo + pivot, is_less) - } - - #[cfg(feature = "optimize_for_size")] - { - partition_size_opt(v, lo + pivot, is_less) - } + partition(v, lo + pivot, is_less) } /// Moves around the 9 elements at the indices a..i, such that @@ -351,69 +341,3 @@ fn bubble_sort bool>(v: &mut [T], is_less: &mut F) { n -= 1; } } - -#[cfg(feature = "optimize_for_size")] -fn partition_size_opt(v: &mut [T], pivot: usize, is_less: &mut F) -> usize -where - F: FnMut(&T, &T) -> bool, -{ - let len = v.len(); - - // Allows for panic-free code-gen by proving this property to the compiler. - if len == 0 { - return 0; - } - - if pivot >= len { - intrinsics::abort(); - } - - // SAFETY: We checked that `pivot` is in-bounds. - unsafe { - // Place the pivot at the beginning of slice. - v.swap_unchecked(0, pivot); - } - let (pivot, v_without_pivot) = v.split_at_mut(1); - - // Assuming that Rust generates noalias LLVM IR we can be sure that a partition function - // signature of the form `(v: &mut [T], pivot: &T)` guarantees that pivot and v can't alias. - // Having this guarantee is crucial for optimizations. It's possible to copy the pivot value - // into a stack value, but this creates issues for types with interior mutability mandating - // a drop guard. - let pivot = &mut pivot[0]; - - let num_lt = partition_lomuto_branchless_simple(v_without_pivot, pivot, is_less); - - if num_lt >= len { - intrinsics::abort(); - } - - // SAFETY: We checked that `num_lt` is in-bounds. - unsafe { - // Place the pivot between the two partitions. - v.swap_unchecked(0, num_lt); - } - - num_lt -} - -#[cfg(feature = "optimize_for_size")] -fn partition_lomuto_branchless_simple bool>( - v: &mut [T], - pivot: &T, - is_less: &mut F, -) -> usize { - let mut left = 0; - - for right in 0..v.len() { - // SAFETY: `left` can at max be incremented by 1 each loop iteration, which implies that - // left <= right and that both are in-bounds. - unsafe { - let right_is_lt = is_less(v.get_unchecked(right), pivot); - v.swap_unchecked(left, right); - left += right_is_lt as usize; - } - } - - left -} diff --git a/core/src/slice/sort/unstable/mod.rs b/core/src/slice/sort/unstable/mod.rs index 43b5cc79fc7f3..faac97eab02b8 100644 --- a/core/src/slice/sort/unstable/mod.rs +++ b/core/src/slice/sort/unstable/mod.rs @@ -8,7 +8,6 @@ use crate::slice::sort::shared::find_existing_run; use crate::slice::sort::shared::smallsort::insertion_sort_shift_left; pub(crate) mod heapsort; -#[cfg(not(feature = "optimize_for_size"))] pub(crate) mod quicksort; /// Unstable sort called ipnsort by Lukas Bergdoll and Orson Peters. diff --git a/core/src/slice/sort/unstable/quicksort.rs b/core/src/slice/sort/unstable/quicksort.rs index e55ebe67e1306..83751d99cc261 100644 --- a/core/src/slice/sort/unstable/quicksort.rs +++ b/core/src/slice/sort/unstable/quicksort.rs @@ -1,7 +1,9 @@ //! This module contains an unstable quicksort and two partition implementations. use crate::mem::{self, ManuallyDrop}; +#[cfg(not(feature = "optimize_for_size"))] use crate::slice::sort::shared::pivot::choose_pivot; +#[cfg(not(feature = "optimize_for_size"))] use crate::slice::sort::shared::smallsort::UnstableSmallSortTypeImpl; use crate::{intrinsics, ptr}; @@ -11,6 +13,7 @@ use crate::{intrinsics, ptr}; /// /// `limit` is the number of allowed imbalanced partitions before switching to `heapsort`. If zero, /// this function will immediately switch to heapsort. +#[cfg(not(feature = "optimize_for_size"))] pub(crate) fn quicksort<'a, T, F>( mut v: &'a mut [T], mut ancestor_pivot: Option<&'a T>, @@ -138,7 +141,16 @@ const fn inst_partition bool>() -> fn(&mut [T], &T, &mut if mem::size_of::() <= MAX_BRANCHLESS_PARTITION_SIZE { // Specialize for types that are relatively cheap to copy, where branchless optimizations // have large leverage e.g. `u64` and `String`. - partition_lomuto_branchless_cyclic:: + + #[cfg(not(feature = "optimize_for_size"))] + { + partition_lomuto_branchless_cyclic:: + } + + #[cfg(feature = "optimize_for_size")] + { + partition_lomuto_branchless_simple:: + } } else { partition_hoare_branchy_cyclic:: } @@ -224,6 +236,7 @@ where } } +#[cfg(not(feature = "optimize_for_size"))] struct PartitionState { // The current element that is being looked at, scans left to right through slice. right: *mut T, @@ -234,6 +247,7 @@ struct PartitionState { gap: GapGuardRaw, } +#[cfg(not(feature = "optimize_for_size"))] fn partition_lomuto_branchless_cyclic(v: &mut [T], pivot: &T, is_less: &mut F) -> usize where F: FnMut(&T, &T) -> bool, @@ -325,6 +339,27 @@ where } } +#[cfg(feature = "optimize_for_size")] +fn partition_lomuto_branchless_simple bool>( + v: &mut [T], + pivot: &T, + is_less: &mut F, +) -> usize { + let mut left = 0; + + for right in 0..v.len() { + // SAFETY: `left` can at max be incremented by 1 each loop iteration, which implies that + // left <= right and that both are in-bounds. + unsafe { + let right_is_lt = is_less(v.get_unchecked(right), pivot); + v.swap_unchecked(left, right); + left += right_is_lt as usize; + } + } + + left +} + struct GapGuard { pos: *mut T, value: ManuallyDrop, @@ -342,11 +377,13 @@ impl Drop for GapGuard { /// Ideally this wouldn't be needed and we could just use the regular GapGuard. /// See comment in [`partition_lomuto_branchless_cyclic`]. +#[cfg(not(feature = "optimize_for_size"))] struct GapGuardRaw { pos: *mut T, value: *mut T, } +#[cfg(not(feature = "optimize_for_size"))] impl Drop for GapGuardRaw { fn drop(&mut self) { // SAFETY: `self` MUST be constructed in a way that makes copying the gap value into