diff --git a/NEWS.md b/NEWS.md index edcd43a9c7..8391f5bbbd 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,11 @@ # DataFrames.jl changes on main since last release notes +## Bug fixes + +* fix bug in how `issorted` handles custom orderings and improve performance + of sorting when complex custom orderings are passed + ([#2746](/~https://github.com/JuliaData/DataFrames.jl/pull/2746)) + ## Performance improvements * `SubDataFrame` creation is now more efficient if row selector is diff --git a/src/abstractdataframe/sort.jl b/src/abstractdataframe/sort.jl index d62856402f..20edb51e12 100755 --- a/src/abstractdataframe/sort.jl +++ b/src/abstractdataframe/sort.jl @@ -93,14 +93,15 @@ ordering(col::ColumnIndex, lt::Function, by::Function, rev::Bool, order::Orderin # DFPerm: defines a permutation on a particular DataFrame, using # a single ordering (O<:Ordering) or a list of column orderings -# (O<:AbstractVector{Ordering}), one per DataFrame column +# (NTuple of Ordering), one per DataFrame column # # If a user only specifies a few columns, the DataFrame # contained in the DFPerm only contains those columns, and # the permutation induced by this ordering is used to # sort the original (presumably larger) DataFrame -struct DFPerm{O<:Union{Ordering, AbstractVector}, T<:Tuple} <: Ordering +struct DFPerm{O<:Union{Ordering, Tuple{Vararg{Ordering}}}, + T<:Tuple{Vararg{AbstractVector}}} <: Ordering ord::O cols::T end @@ -109,24 +110,50 @@ function DFPerm(ords::AbstractVector{O}, cols::T) where {O<:Ordering, T<:Tuple} if length(ords) != length(cols) error("DFPerm: number of column orderings does not equal the number of columns") end - DFPerm{typeof(ords), T}(ords, cols) + DFPerm(Tuple(ords), cols) end DFPerm(o::Union{Ordering, AbstractVector}, df::AbstractDataFrame) = DFPerm(o, ntuple(i -> df[!, i], ncol(df))) -# get ordering function for the i-th column used for ordering -col_ordering(o::DFPerm{O}, i::Int) where {O<:Ordering} = o.ord -col_ordering(o::DFPerm{V}, i::Int) where {V<:AbstractVector} = o.ord[i] +@inline col_ordering(o::Ordering) = o +@inline ord_tail(o::Ordering) = o +@inline col_ordering(o::Tuple{Vararg{Ordering}}) = @inbounds o[1] +@inline ord_tail(o::Tuple{Vararg{Ordering}}) = Base.tail(o) + +Sort.lt(o::DFPerm{<:Any, Tuple{}}, a, b) = false + +function Sort.lt(o::DFPerm{<:Any, <:Tuple}, a, b) + ord = o.ord + cols = o.cols + # if there are too many columns fall back to type unstable mode to avoid high compilation cost + # it is expected that in practice users sort data frames on only few columns + length(cols) > 16 && return unstable_lt(ord, cols, a, b) -function Sort.lt(o::DFPerm, a, b) - @inbounds for i in 1:length(o.cols) - ord = col_ordering(o, i) - col = o.cols[i] + @inbounds begin + ord1 = col_ordering(ord) + col = first(cols) va = col[a] vb = col[b] - lt(ord, va, vb) && return true - lt(ord, vb, va) && return false + lt(ord1, va, vb) && return true + lt(ord1, vb, va) && return false + end + return Sort.lt(DFPerm(ord_tail(ord), Base.tail(cols)), a, b) +end + +# get ordering function for the i-th column used for ordering +col_ordering(o::Ordering, i::Int) where {O<:Ordering} = o +col_ordering(o::Tuple{Vararg{Ordering}}, i::Int) = @inbounds o[i] + +function unstable_lt(ord::Union{Ordering, Tuple{Vararg{Ordering}}}, + cols::Tuple{Vararg{AbstractVector}}, a, b) + for i in 1:length(cols) + ordi = col_ordering(ord, i) + @inbounds coli = cols[i] + @inbounds va = coli[a] + @inbounds vb = coli[b] + lt(ordi, va, vb) && return true + lt(ordi, vb, va) && return false end false # a and b are equal end @@ -325,7 +352,7 @@ function Base.issorted(df::AbstractDataFrame, cols=[]; end if cols isa ColumnIndex return issorted(df[!, cols], lt=lt, by=by, rev=rev, order=order) - elseif length(cols) == 1 + elseif cols isa AbstractVector{<:ColumnIndex} && length(cols) == 1 return issorted(df[!, cols[1]], lt=lt, by=by, rev=rev, order=order) else return issorted(1:nrow(df), ordering(df, cols, lt, by, rev, order)) diff --git a/src/other/precompile.jl b/src/other/precompile.jl index fae084bca0..56a863e93a 100644 --- a/src/other/precompile.jl +++ b/src/other/precompile.jl @@ -138,7 +138,6 @@ function precompile(all=false) Base.precompile(Tuple{typeof(transform!),DataFrame,Any}) Base.precompile(Tuple{typeof(transform),DataFrame,Any}) else - Base.precompile(Tuple{typeof(_sortperm),DataFrame,Base.Sort.MergeSortAlg,DFPerm{Vector{Ordering}, Tuple{Vector{Int}, Vector{Int}, Vector{Int}}}}) Base.precompile(Tuple{typeof(flatten),DataFrame,All{Tuple{}}}) Base.precompile(Tuple{Reduce{typeof(Base.add_sum), Nothing, typeof(/)},Vector{Int},GroupedDataFrame{DataFrame}}) Base.precompile(Tuple{typeof(view),DataFrame,Function,All{Tuple{}}}) @@ -212,7 +211,6 @@ function precompile(all=false) Base.precompile(Tuple{typeof(_innerjoin_sorted),OnCol{Tuple{Vector{Union{Missing, String}}, Vector{Union{Missing, String}}}, 2},OnCol{Tuple{Vector{String}, Vector{String}}, 2}}) Base.precompile(Tuple{typeof(do_call),ComposedFunction{typeof(prod), typeof(skipmissing)},Vector{Int},Vector{Int},Vector{Int},GroupedDataFrame{DataFrame},Tuple{Vector{DataFrame}},Int}) Base.precompile(Tuple{typeof(_innerjoin_unsorted),Vector{String},Vector{Union{Missing, String}}}) - Base.precompile(Tuple{typeof(sort!),DataFrame,Base.Sort.MergeSortAlg,DFPerm{ForwardOrdering, Tuple{Vector{Any}, Vector{Any}}}}) Base.precompile(Tuple{typeof(_transformation_helper),SubDataFrame{DataFrame, SubIndex{Index, Vector{Int}, Vector{Int}}, UnitRange{Int}},Vector{Int},Base.RefValue{Any}}) Base.precompile(Tuple{typeof(row_group_slots),Tuple{Vector{Union{Missing, Float64}}, Vector{Union{Missing, Float64}}},Val{false},Vector{Int},Bool,Bool}) Base.precompile(Tuple{Aggregate{typeof(length), Nothing},Vector{Any},GroupedDataFrame{DataFrame}}) @@ -334,7 +332,6 @@ function precompile(all=false) Base.precompile(Tuple{typeof(_combine_tables_with_first!),SubDataFrame{DataFrame, SubIndex{Index, UnitRange{Int}, UnitRange{Int}}, UnitRange{Int}},Tuple{},Vector{Int},Int,Int,Function,GroupedDataFrame{DataFrame},Tuple{Vector{Bool}},Tuple{},FirstMultiCol}) Base.precompile(Tuple{typeof(_innerjoin_sorted),Vector{UInt32},Vector{Union{Missing, UInt32}}}) Base.precompile(Tuple{typeof(_innerjoin_unsorted),OnCol{Tuple{Vector{Union{Missing, Symbol}}, Vector{Union{Missing, Symbol}}}, 2},OnCol{Tuple{Vector{Union{Missing, Symbol}}, Vector{Union{Missing, Symbol}}}, 2}}) - Base.precompile(Tuple{typeof(sort!),DataFrame,SortingAlgorithms.TimSortAlg,DFPerm{ForwardOrdering, Tuple{Vector{Int}, Vector{Int}, Vector{Union{Missing, Int}}}}}) Base.precompile(Tuple{Reduce{typeof(Base.mul_prod), Nothing, Nothing},Vector{Union{Int, Int8}},GroupedDataFrame{DataFrame}}) Base.precompile(Tuple{typeof(row_group_slots),Tuple{PooledVector{Int, UInt32, Vector{UInt32}}, PooledVector{String, UInt32, Vector{UInt32}}},Val{false},Vector{Int},Bool,Bool}) Base.precompile(Tuple{typeof(row_group_slots),Tuple{Vector{Union{Missing, String}}, PooledVector{String, UInt32, Vector{UInt32}}},Val{false},Vector{Int},Bool,Bool}) @@ -415,7 +412,6 @@ function precompile(all=false) Base.precompile(Tuple{typeof(groupreduce!),Vector{Any},Function,Function,Function,Function,Bool,Vector{Union{Missing, Real}},GroupedDataFrame{DataFrame}}) Base.precompile(Tuple{typeof(do_call),typeof(maximum),Vector{Int},Vector{Int},Vector{Int},GroupedDataFrame{DataFrame},Tuple{Vector{Union{Missing, Number}}},Int}) Base.precompile(Tuple{typeof(do_call),ComposedFunction{typeof(prod), typeof(skipmissing)},Vector{Int},Vector{Int},Vector{Int},GroupedDataFrame{DataFrame},Tuple{Vector{UnitRange{Int}}},Int}) - Base.precompile(Tuple{typeof(sort!),DataFrame,SortingAlgorithms.TimSortAlg,DFPerm{ForwardOrdering, Tuple{Vector{Int}, Vector{Union{Missing, Int}}, Vector{Union{Missing, Int}}}}}) Base.precompile(Tuple{typeof(row_group_slots),Tuple{Vector{Union{Missing, Float64}}},Tuple{IntegerRefpool{Union{Missing, Int}}},Tuple{IntegerRefarray{Vector{Union{Missing, Float64}}}},Val{false},Vector{Int},Bool,Bool}) Base.precompile(Tuple{typeof(getindex),SubDataFrame{DataFrame, Index, Base.OneTo{Int}},Colon,All{Tuple{}}}) Base.precompile(Tuple{Aggregate{typeof(var), Nothing},Vector{Bool},GroupedDataFrame{DataFrame}}) @@ -530,12 +526,10 @@ function precompile(all=false) end end Base.precompile(Tuple{typeof(_semijoin_sorted),OnCol{Tuple{Vector{Union{Missing, UInt32}}, Vector{Union{Missing, UInt32}}}, 2},OnCol{Tuple{Vector{UInt32}, Vector{UInt32}}, 2},BitVector}) - Base.precompile(Tuple{typeof(_sortperm),DataFrame,Base.Sort.MergeSortAlg,DFPerm{ForwardOrdering, Tuple{Vector{Int}, Vector{Float64}}}}) Base.precompile(Tuple{typeof(map2refs),PooledVector{Union{Missing, Int}, UInt32, Vector{UInt32}},Dict{Int, UInt32}}) Base.precompile(Tuple{Core.kwftype(typeof(outerjoin)),NamedTuple{(:on,), Tuple{Vector{Pair{String, String}}}},typeof(outerjoin),DataFrame,DataFrame,DataFrame}) Base.precompile(Tuple{typeof(row_group_slots),Tuple{PooledVector{String, UInt32, Vector{UInt32}}, Vector{Union{Missing, String}}},Val{false},Vector{Int},Bool,Bool}) Base.precompile(Tuple{typeof(do_call),ComposedFunction{typeof(minimum), typeof(skipmissing)},Vector{Int},Vector{Int},Vector{Int},GroupedDataFrame{DataFrame},Tuple{Vector{Union{Missing, Number}}},Int}) - Base.precompile(Tuple{typeof(sort!),DataFrame,SortingAlgorithms.TimSortAlg,DFPerm{ForwardOrdering, Tuple{Vector{Int}, Vector{Union{Missing, Int}}, Vector{Int}}}}) Base.precompile(Tuple{Core.kwftype(typeof(Type)),NamedTuple{(:x1,), Tuple{Vector{String}}},Type{DataFrame}}) Base.precompile(Tuple{typeof(stack),DataFrame,InvertedIndex{Symbol}}) Base.precompile(Tuple{Core.kwftype(typeof(outerjoin)),NamedTuple{(:on,), Tuple{String}},typeof(outerjoin),DataFrame,DataFrame,DataFrame}) @@ -599,7 +593,6 @@ function precompile(all=false) Base.precompile(Tuple{typeof(_innerjoin_sorted),OnCol{Tuple{PooledVector{Union{Missing, Int}, UInt32, Vector{UInt32}}, PooledVector{Union{Missing, Int}, UInt32, Vector{UInt32}}}, 2},OnCol{Tuple{Vector{Int}, Vector{Int}}, 2}}) Base.precompile(Tuple{typeof(do_call),typeof(minimum),Vector{Int},Vector{Int},Vector{Int},GroupedDataFrame{DataFrame},Tuple{Vector{Union{Missing, DataFrame}}},Int}) Base.precompile(Tuple{typeof(view),SubDataFrame{DataFrame, Index, Base.OneTo{Int}},UnitRange{Int},All{Tuple{}}}) - Base.precompile(Tuple{typeof(_sortperm),DataFrame,Base.Sort.MergeSortAlg,DFPerm{ReverseOrdering{ForwardOrdering}, Tuple{Vector{Int}, Vector{Int}, Vector{Int}}}}) Base.precompile(Tuple{typeof(_innerjoin_unsorted_int),Vector{Int},Vector{Int}}) Base.precompile(Tuple{typeof(transform),DataFrame,Any,Any,Vararg{Any, N} where N}) Base.precompile(Tuple{typeof(push!),DataFrame,Tuple{Int, String}}) @@ -640,10 +633,8 @@ function precompile(all=false) Base.precompile(Tuple{typeof(row_group_slots),Tuple{Vector{Union{Missing, String}}, Vector{Union{Missing, String}}, PooledVector{Union{Missing, String}, UInt32, Vector{UInt32}}},Val{false},Vector{Int},Bool,Bool}) Base.precompile(Tuple{typeof(do_call),typeof(prod),Vector{Int},Vector{Int},Vector{Int},GroupedDataFrame{DataFrame},Tuple{Vector{Any}},Int}) Base.precompile(Tuple{typeof(rename),DataFrame,Pair{BigInt, Symbol}}) - Base.precompile(Tuple{typeof(_sortperm),SubDataFrame{DataFrame, Index, Vector{Int}},Base.Sort.MergeSortAlg,DFPerm{ForwardOrdering, Tuple{SubArray{Union{Missing, String}, 1, Vector{Union{Missing, String}}, Tuple{Vector{Int}}, false}, SubArray{Union{Missing, String}, 1, PooledVector{Union{Missing, String}, UInt32, Vector{UInt32}}, Tuple{Vector{Int}}, false}, SubArray{Union{Missing, String}, 1, Vector{Union{Missing, String}}, Tuple{Vector{Int}}, false}}}}) Base.precompile(Tuple{Aggregate{typeof(length), Nothing},Vector{Real},GroupedDataFrame{DataFrame}}) Base.precompile(Tuple{typeof(getindex),DataFrame,UnitRange{Int},Regex}) - Base.precompile(Tuple{typeof(_sortperm),SubDataFrame{DataFrame, SubIndex{Index, UnitRange{Int}, UnitRange{Int}}, UnitRange{Int}},Base.Sort.MergeSortAlg,DFPerm{ForwardOrdering, Tuple{SubArray{Float64, 1, Vector{Float64}, Tuple{UnitRange{Int}}, true}, SubArray{Float64, 1, Vector{Float64}, Tuple{UnitRange{Int}}, true}}}}) Base.precompile(Tuple{Reduce{typeof(max), Nothing, Nothing},Vector{Bool},GroupedDataFrame{DataFrame}}) Base.precompile(Tuple{Aggregate{typeof(length), Nothing},Vector{BigFloat},GroupedDataFrame{DataFrame}}) Base.precompile(Tuple{typeof(setindex!),DataFrame,Dict{String, Int},Int,Vector{String}}) @@ -718,7 +709,6 @@ function precompile(all=false) end end Base.precompile(Tuple{typeof(unstack),DataFrame,Symbol,Symbol,Symbol}) - Base.precompile(Tuple{typeof(sort!),DataFrame,SortingAlgorithms.TimSortAlg,DFPerm{ForwardOrdering, Tuple{Vector{Int}, Vector{Int}}}}) Base.precompile(Tuple{Type{DataFrame},Tables.MatrixTable{Matrix{Any}}}) Base.precompile(Tuple{typeof(show),GroupedDataFrame{DataFrame}}) Base.precompile(Tuple{typeof(==),GroupKey{GroupedDataFrame{DataFrame}},DataFrameRow{DataFrame, Index}}) @@ -730,7 +720,6 @@ function precompile(all=false) Base.precompile(Tuple{typeof(row_group_slots),Tuple{Vector{Int}},Tuple{IntegerRefpool{Int}},Tuple{IntegerRefarray{Vector{Int}}},Val{false},Vector{Int},Bool,Bool}) Base.precompile(Tuple{typeof(do_call),ComposedFunction{typeof(minimum), typeof(skipmissing)},Vector{Int},Vector{Int},Vector{Int},GroupedDataFrame{DataFrame},Tuple{Vector{Union{Missing, UnitRange{Int}}}},Int}) Base.precompile(Tuple{typeof(_semijoin_unsorted),OnCol{Tuple{Vector{Int}, Vector{Int}}, 2},OnCol{Tuple{PooledVector{Union{Missing, Int}, UInt32, Vector{UInt32}}, PooledVector{Union{Missing, Int}, UInt32, Vector{UInt32}}}, 2},BitVector,Bool}) - Base.precompile(Tuple{typeof(_sortperm),SubDataFrame{DataFrame, Index, Vector{Int}},Base.Sort.MergeSortAlg,DFPerm{ForwardOrdering, Tuple{SubArray{Union{Missing, String}, 1, PooledVector{Union{Missing, String}, UInt32, Vector{UInt32}}, Tuple{Vector{Int}}, false}, SubArray{String, 1, Vector{String}, Tuple{Vector{Int}}, false}}}}) Base.precompile(Tuple{Core.kwftype(typeof(isapprox)),NamedTuple{(:atol,), Tuple{Float64}},typeof(isapprox),DataFrame,DataFrame}) Base.precompile(Tuple{typeof(select),SubDataFrame{DataFrame, SubIndex{Index, Vector{Int}, Vector{Int}}, UnitRange{Int}},Any,Any,Vararg{Any, N} where N}) Base.precompile(Tuple{typeof(row_group_slots),Tuple{SubArray{Int, 1, Vector{Int}, Tuple{UnitRange{Int}}, true}},Tuple{IntegerRefpool{Int}},Tuple{IntegerRefarray{SubArray{Int, 1, Vector{Int}, Tuple{UnitRange{Int}}, true}}},Val{false},Vector{Int},Bool,Bool}) @@ -743,7 +732,6 @@ function precompile(all=false) Base.precompile(Tuple{typeof(show),IOBuffer,MIME{Symbol("text/csv")},GroupedDataFrame{DataFrame}}) Base.precompile(Tuple{typeof(row_group_slots),Tuple{SubArray{Union{Missing, Int}, 1, Vector{Union{Missing, Int}}, Tuple{Base.OneTo{Int}}, true}},Tuple{IntegerRefpool{Union{Missing, Int}}},Tuple{IntegerRefarray{SubArray{Union{Missing, Int}, 1, Vector{Union{Missing, Int}}, Tuple{Base.OneTo{Int}}, true}}},Val{false},Vector{Int},Bool,Bool}) Base.precompile(Tuple{typeof(push!),DataFrame,Tuple{Int, String, Int}}) - Base.precompile(Tuple{typeof(_sortperm),DataFrame,Base.Sort.MergeSortAlg,DFPerm{ForwardOrdering, NTuple{4, Vector{Float64}}}}) Base.precompile(Tuple{typeof(flatten),DataFrame,String}) Base.precompile(Tuple{typeof(row_group_slots),Tuple{Vector{Bool}},Tuple{IntegerRefpool{Int}},Tuple{IntegerRefarray{Vector{Bool}}},Val{false},Vector{Int},Bool,Bool}) let fbody = try __lookup_kwbody__(which(stack, (DataFrame,Vector{Symbol},Vector{Symbol},))) catch missing end @@ -811,9 +799,7 @@ function precompile(all=false) end end Base.precompile(Tuple{typeof(row_group_slots),Tuple{Vector{Int}, Vector{Missing}},Val{false},Vector{Int},Bool,Bool}) - Base.precompile(Tuple{typeof(_sortperm),DataFrame,Base.Sort.MergeSortAlg,DFPerm{ForwardOrdering, Tuple{Vector{Int}, Vector{Int}, Vector{Int}}}}) Base.precompile(Tuple{typeof(do_call),typeof(prod),Vector{Int},Vector{Int},Vector{Int},GroupedDataFrame{DataFrame},Tuple{Vector{String}},Int}) - Base.precompile(Tuple{typeof(_sortperm),DataFrame,Base.Sort.MergeSortAlg,DFPerm{ForwardOrdering, Tuple{Vector{String}, Vector{Int}}}}) Base.precompile(Tuple{Core.kwftype(typeof(append!)),NamedTuple{(:cols,), Tuple{Symbol}},typeof(append!),DataFrame,Dict{Symbol, AbstractVector{T} where T}}) Base.precompile(Tuple{typeof(_semijoin_unsorted),OnCol{Tuple{Vector{Int}, Vector{Int}, Vector{Int}}, 3},OnCol{Tuple{Vector{Int}, Vector{Int}, Vector{Int}}, 3},BitVector,Bool}) Base.precompile(Tuple{typeof(row_group_slots),Tuple{Vector{String}},Val{false},Vector{Int},Bool,Bool}) @@ -850,13 +836,11 @@ function precompile(all=false) Base.precompile(Tuple{Reduce{typeof(Base.add_sum), Nothing, Nothing},Vector{BigFloat},GroupedDataFrame{DataFrame}}) Base.precompile(Tuple{Core.kwftype(typeof(show)),NamedTuple{(:allrows, :allcols, :rowlabel, :summary, :eltypes, :truncate), Tuple{Bool, Bool, Symbol, Bool, Bool, Int}},typeof(show),Base.PipeEndpoint,DataFrame}) Base.precompile(Tuple{typeof(_transformation_helper),SubDataFrame{DataFrame, SubIndex{Index, UnitRange{Int}, UnitRange{Int}}, UnitRange{Int}},UnitRange{Int},Base.RefValue{Any}}) - Base.precompile(Tuple{typeof(_sortperm),SubDataFrame{DataFrame, Index, Vector{Int}},Base.Sort.MergeSortAlg,DFPerm{ForwardOrdering, Tuple{SubArray{Union{Missing, Symbol}, 1, Vector{Union{Missing, Symbol}}, Tuple{Vector{Int}}, false}, SubArray{Int, 1, Vector{Int}, Tuple{Vector{Int}}, false}}}}) Base.precompile(Tuple{typeof(row_group_slots),Tuple{Vector{Float64}, Vector{Float64}},Tuple{IntegerRefpool{Int}, IntegerRefpool{Int}},Tuple{IntegerRefarray{Vector{Float64}}, IntegerRefarray{Vector{Float64}}},Val{false},Vector{Int},Bool,Bool}) Base.precompile(Tuple{typeof(push!),DataFrame,Dict{Any, Int}}) Base.precompile(Tuple{typeof(view),DataFrame,UnitRange{Int},Between{Int, Int}}) Base.precompile(Tuple{typeof(getindex),DataFrame,BitVector,UnitRange{Int}}) Base.precompile(Tuple{typeof(_combine_tables_with_first!),NamedTuple{(:x1,), Tuple{Vector{Float64}}},Tuple{Vector{Float64}},Vector{Int},Int,Int,Function,GroupedDataFrame{DataFrame},Nothing,Tuple{Symbol},FirstSingleCol}) - Base.precompile(Tuple{typeof(_sortperm),DataFrame,Base.Sort.MergeSortAlg,DFPerm{ForwardOrdering, Tuple{Vector{Union{Missing, Int}}, Vector{Union{Missing, Int}}}}}) Base.precompile(Tuple{typeof(setindex!),DataFrameRow{DataFrame, Index},Matrix{Int},InvertedIndex{Int}}) Base.precompile(Tuple{typeof(funname),ComposedFunction{ComposedFunction{typeof(sum), typeof(skipmissing)}, typeof(Base.div12)}}) Base.precompile(Tuple{typeof(_semijoin_unsorted),OnCol{Tuple{Vector{String}, Vector{String}}, 2},OnCol{Tuple{Vector{String}, Vector{String}}, 2},BitVector,Bool}) @@ -1218,7 +1202,6 @@ function precompile(all=false) Base.precompile(Tuple{typeof(_innerjoin_sorted),OnCol{Tuple{Vector{Missing}, Vector{Missing}, Vector{Missing}}, 3},OnCol{Tuple{Vector{UInt32}, Vector{UInt32}, Vector{UInt32}}, 3}}) Base.precompile(Tuple{Core.kwftype(typeof(manipulate)),NamedTuple{(:copycols, :keeprows, :renamecols), Tuple{Bool, Bool, Bool}},typeof(manipulate),DataFrame,Cols{Tuple{Symbol, Int}}}) Base.precompile(Tuple{Reduce{typeof(Base.mul_prod), Nothing, Nothing},Vector{Union{Missing, UInt8}},GroupedDataFrame{DataFrame}}) - Base.precompile(Tuple{typeof(sort!),DataFrame,SortingAlgorithms.TimSortAlg,DFPerm{ForwardOrdering, Tuple{Vector{Int}, Vector{Int}, Vector{Int}}}}) Base.precompile(Tuple{typeof(_semijoin_sorted),Vector{Int},Vector{Int},BitVector}) Base.precompile(Tuple{Type{SubIndex},Index,Cols{Tuple{Colon}}}) Base.precompile(Tuple{typeof(do_call),typeof(prod),Vector{Int},Vector{Int},Vector{Int},GroupedDataFrame{DataFrame},Tuple{Vector{Union{Missing, DataFrame}}},Int}) @@ -1344,10 +1327,8 @@ function precompile(all=false) end Base.precompile(Tuple{typeof(delete!),DataFrame,InvertedIndex{InvertedIndices.TupleVector{Tuple{Int, Int}}}}) Base.precompile(Tuple{Core.kwftype(typeof(innerjoin)),NamedTuple{(:on,), Tuple{Vector{Pair{Symbol, Symbol}}}},typeof(innerjoin),DataFrame,DataFrame}) - Base.precompile(Tuple{typeof(_sortperm),DataFrame,Base.Sort.MergeSortAlg,DFPerm{ForwardOrdering, Tuple{Vector{Float64}, Vector{Float64}}}}) Base.precompile(Tuple{typeof(_semijoin_sorted),Vector{String},Vector{Union{Missing, String}},BitVector}) Base.precompile(Tuple{typeof(_semijoin_sorted),Vector{Union{Missing, Int}},PooledVector{Union{Missing, Int}, UInt32, Vector{UInt32}},BitVector}) - Base.precompile(Tuple{typeof(_sortperm),SubDataFrame{DataFrame, Index, Vector{Int}},Base.Sort.MergeSortAlg,DFPerm{ForwardOrdering, Tuple{SubArray{Union{Missing, String}, 1, PooledVector{Union{Missing, String}, UInt32, Vector{UInt32}}, Tuple{Vector{Int}}, false}, SubArray{Union{Missing, String}, 1, Vector{Union{Missing, String}}, Tuple{Vector{Int}}, false}, SubArray{Union{Missing, String}, 1, PooledVector{Union{Missing, String}, UInt32, Vector{UInt32}}, Tuple{Vector{Int}}, false}}}}) Base.precompile(Tuple{typeof(row_group_slots),Tuple{Vector{Union{Missing, Float64}}, Vector{Union{Missing, Float64}}},Tuple{IntegerRefpool{Union{Missing, Int}}, IntegerRefpool{Union{Missing, Int}}},Tuple{IntegerRefarray{Vector{Union{Missing, Float64}}}, IntegerRefarray{Vector{Union{Missing, Float64}}}},Val{false},Vector{Int},Bool,Bool}) Base.precompile(Tuple{typeof(do_call),ComposedFunction{typeof(sum), typeof(skipmissing)},Vector{Int},Vector{Int},Vector{Int},GroupedDataFrame{DataFrame},Tuple{Vector{Matrix{Float64}}},Int}) Base.precompile(Tuple{typeof(genkeymap),GroupedDataFrame{DataFrame},Tuple{Vector{Int}}}) @@ -1359,7 +1340,6 @@ function precompile(all=false) Base.precompile(Tuple{Core.kwftype(typeof(innerjoin)),NamedTuple{(:on,), Tuple{Symbol}},typeof(innerjoin),SubDataFrame{DataFrame, Index, Vector{Int}},DataFrame}) Base.precompile(Tuple{typeof(do_call),typeof(var),Vector{Int},Vector{Int},Vector{Int},GroupedDataFrame{DataFrame},Tuple{Vector{Union{Missing, DataFrame}}},Int}) Base.precompile(Tuple{typeof(_gen_colnames),Any,Type{AsTable}}) - Base.precompile(Tuple{typeof(_sortperm),DataFrame,Base.Sort.MergeSortAlg,DFPerm{ForwardOrdering, Tuple{Vector{Int}, Vector{Union{Missing, Int}}}}}) Base.precompile(Tuple{typeof(isequal),OnColRow{Tuple{Vector{String}, Vector{Int}, Vector{Float64}, Vector{String}}},OnColRow{Tuple{Vector{String}, Vector{Int}, Vector{Float64}, Vector{String}}}}) Base.precompile(Tuple{typeof(row_group_slots),Tuple{Vector{Union{Missing, String}}},Val{true}}) Base.precompile(Tuple{typeof(groupreduce!),Vector{BigFloat},Function,Function,Nothing,Function,Bool,Vector{BigInt},GroupedDataFrame{DataFrame}}) @@ -1412,7 +1392,6 @@ function precompile(all=false) Base.precompile(Tuple{Core.kwftype(typeof(select!)),NamedTuple{(:renamecols,), Tuple{Bool}},typeof(select!),GroupedDataFrame{DataFrame},Union{Regex, AbstractString, Function, Signed, Symbol, Unsigned, Pair, AbstractVector{T} where T, Type, All, Between, Cols, InvertedIndex},Union{Regex, AbstractString, Function, Signed, Symbol, Unsigned, Pair, AbstractVector{T} where T, Type, All, Between, Cols, InvertedIndex},Vararg{Union{Regex, AbstractString, Function, Signed, Symbol, Unsigned, Pair, AbstractVector{T} where T, Type, All, Between, Cols, InvertedIndex}, N} where N}) Base.precompile(Tuple{typeof(row_group_slots),Tuple{Vector{Union{Missing, String}}, Vector{String}},Val{false},Vector{Int},Bool,Bool}) Base.precompile(Tuple{typeof(row_group_slots),Tuple{Vector{Int}, Vector{Union{Missing, Int}}},Val{false},Vector{Int},Bool,Bool}) - Base.precompile(Tuple{typeof(_sortperm),DataFrame,Base.Sort.MergeSortAlg,DFPerm{ForwardOrdering, NTuple{4, Vector{Int}}}}) Base.precompile(Tuple{typeof(row_group_slots),Tuple{SubArray{Int, 1, Vector{Int}, Tuple{Base.OneTo{Int}}, true}},Tuple{IntegerRefpool{Int}},Tuple{IntegerRefarray{SubArray{Int, 1, Vector{Int}, Tuple{Base.OneTo{Int}}, true}}},Val{false},Vector{Int},Bool,Bool}) Base.precompile(Tuple{Core.kwftype(typeof(outerjoin)),NamedTuple{(:on,), Tuple{Vector{Pair{Symbol, String}}}},typeof(outerjoin),DataFrame,DataFrame}) Base.precompile(Tuple{typeof(_semijoin_sorted),OnCol{Tuple{Vector{UInt32}, Vector{UInt32}, Vector{UInt32}}, 3},OnCol{Tuple{Vector{Union{Missing, UInt32}}, Vector{Union{Missing, UInt32}}, Vector{Union{Missing, UInt32}}}, 3},BitVector}) @@ -1454,10 +1433,8 @@ function precompile(all=false) Base.precompile(Tuple{typeof(do_call),typeof(minimum),Vector{Int},Vector{Int},Vector{Int},GroupedDataFrame{DataFrame},Tuple{Vector{Number}},Int}) Base.precompile(Tuple{typeof(getindex),DataFrame,Colon,Cols{Tuple{Int, Int, Symbol}}}) Base.precompile(Tuple{typeof(do_call),typeof(sum),Vector{Int},Vector{Int},Vector{Int},GroupedDataFrame{DataFrame},Tuple{Vector{DataFrame}},Int}) - Base.precompile(Tuple{typeof(_sortperm),DataFrame,Base.Sort.MergeSortAlg,DFPerm{ForwardOrdering, Tuple{Vector{Float64}, Vector{Float64}, Vector{Float64}, Vector{Int}, Vector{Float64}, Vector{Float64}, Vector{Float64}, Vector{Float64}, Vector{Float64}}}}) Base.precompile(Tuple{typeof(getindex),Index,Vector{Union{Missing, BigInt}}}) Base.precompile(Tuple{typeof(_semijoin_unsorted),OnCol{Tuple{Vector{String}, Vector{String}, Vector{String}}, 3},OnCol{Tuple{Vector{Union{Missing, String}}, Vector{Union{Missing, String}}, Vector{Union{Missing, String}}}, 3},BitVector,Bool}) - Base.precompile(Tuple{typeof(_sortperm),SubDataFrame{DataFrame, Index, Vector{Int}},Base.Sort.MergeSortAlg,DFPerm{ForwardOrdering, Tuple{SubArray{Union{Missing, String}, 1, PooledVector{Union{Missing, String}, UInt32, Vector{UInt32}}, Tuple{Vector{Int}}, false}, SubArray{Union{Missing, String}, 1, Vector{Union{Missing, String}}, Tuple{Vector{Int}}, false}, SubArray{Union{Missing, String}, 1, Vector{Union{Missing, String}}, Tuple{Vector{Int}}, false}}}}) Base.precompile(Tuple{typeof(do_call),typeof(mean),Vector{Int},Vector{Int},Vector{Int},GroupedDataFrame{DataFrame},Tuple{Vector{UnitRange{Int}}},Int}) Base.precompile(Tuple{typeof(insertcols!),DataFrame,Int,Pair{Symbol, Vector{Union{Missing, Int}}}}) Base.precompile(Tuple{Reduce{typeof(Base.add_sum), Nothing, typeof(/)},Vector{Rational{Int}},GroupedDataFrame{DataFrame}}) @@ -1467,7 +1444,6 @@ function precompile(all=false) end end Base.precompile(Tuple{typeof(row_group_slots),Tuple{Vector{Union{Missing, Int}}, Vector{Union{Missing, Float64}}},Val{false},Vector{Int},Bool,Bool}) - Base.precompile(Tuple{typeof(_sortperm),SubDataFrame{DataFrame, Index, Vector{Int}},Base.Sort.MergeSortAlg,DFPerm{ForwardOrdering, Tuple{SubArray{Union{Missing, String}, 1, Vector{Union{Missing, String}}, Tuple{Vector{Int}}, false}, SubArray{String, 1, PooledVector{String, UInt32, Vector{UInt32}}, Tuple{Vector{Int}}, false}}}}) Base.precompile(Tuple{typeof(_innerjoin_unsorted),OnCol{Tuple{Vector{String}, Vector{String}}, 2},OnCol{Tuple{Vector{String}, Vector{String}}, 2}}) Base.precompile(Tuple{typeof(push!),DataFrame,Matrix{Int}}) Base.precompile(Tuple{typeof(_semijoin_sorted),OnCol{Tuple{Vector{Int}, Vector{Int}, Vector{Int}}, 3},OnCol{Tuple{PooledVector{Int, UInt32, Vector{UInt32}}, PooledVector{Int, UInt32, Vector{UInt32}}, PooledVector{Int, UInt32, Vector{UInt32}}}, 3},BitVector}) @@ -1556,13 +1532,11 @@ function precompile(all=false) Base.precompile(Tuple{Reduce{typeof(max), Nothing, Nothing},Vector{BigFloat},GroupedDataFrame{DataFrame}}) Base.precompile(Tuple{typeof(==),DataFrame,SubDataFrame{DataFrame, SubIndex{Index, UnitRange{Int}, UnitRange{Int}}, UnitRange{Int}}}) Base.precompile(Tuple{typeof(show),IOBuffer,MIME{Symbol("text/latex")},SubDataFrame{DataFrame, SubIndex{Index, UnitRange{Int}, UnitRange{Int}}, Base.OneTo{Int}}}) - Base.precompile(Tuple{typeof(_sortperm),DataFrame,Base.Sort.MergeSortAlg,DFPerm{ForwardOrdering, Tuple{Vector{Float64}, Vector{Int}}}}) Base.precompile(Tuple{typeof(do_call),ComposedFunction{typeof(minimum), typeof(skipmissing)},Vector{Int},Vector{Int},Vector{Int},GroupedDataFrame{DataFrame},Tuple{Vector{Union{Missing, DataFrame}}},Int}) Base.precompile(Tuple{typeof(transform),Union{Function, Type},DataFrame}) Base.precompile(Tuple{typeof(do_call),typeof(var),Vector{Int},Vector{Int},Vector{Int},GroupedDataFrame{DataFrame},Tuple{Vector{Any}},Int}) Base.precompile(Tuple{Reduce{typeof(Base.add_sum), Nothing, Nothing},Vector{Union{Missing, Float64}},GroupedDataFrame{DataFrame}}) Base.precompile(Tuple{Core.kwftype(typeof(rightjoin)),NamedTuple{(:on,), Tuple{String}},typeof(rightjoin),DataFrame,DataFrame}) - Base.precompile(Tuple{typeof(_sortperm),SubDataFrame{DataFrame, Index, Vector{Int}},Base.Sort.MergeSortAlg,DFPerm{ForwardOrdering, Tuple{SubArray{Union{Missing, String}, 1, PooledVector{Union{Missing, String}, UInt32, Vector{UInt32}}, Tuple{Vector{Int}}, false}, SubArray{Union{Missing, String}, 1, PooledVector{Union{Missing, String}, UInt32, Vector{UInt32}}, Tuple{Vector{Int}}, false}, SubArray{Union{Missing, String}, 1, Vector{Union{Missing, String}}, Tuple{Vector{Int}}, false}}}}) Base.precompile(Tuple{typeof(_semijoin_unsorted),OnCol{Tuple{Vector{Int}, Vector{Int}}, 2},OnCol{Tuple{Vector{Union{Missing, Int}}, Vector{Union{Missing, Int}}}, 2},BitVector,Bool}) Base.precompile(Tuple{typeof(view),SubDataFrame{DataFrame, Index, Base.OneTo{Int}},InvertedIndex{Int},Between{Int, Int}}) Base.precompile(Tuple{typeof(subset),GroupedDataFrame{DataFrame},Any}) @@ -1585,6 +1559,22 @@ function precompile(all=false) Base.precompile(Tuple{typeof(_innerjoin_sorted),OnCol{Tuple{Vector{Union{Missing, Int}}, Vector{Union{Missing, Int}}, Vector{Union{Missing, Int}}}, 3},OnCol{Tuple{PooledVector{Int, UInt32, Vector{UInt32}}, PooledVector{Int, UInt32, Vector{UInt32}}, PooledVector{Int, UInt32, Vector{UInt32}}}, 3}}) Base.precompile(Tuple{Reduce{typeof(max), Nothing, Nothing},Vector{Int},GroupedDataFrame{DataFrame}}) Base.precompile(Tuple{Type{OnCol},Vector{String},Vararg{AbstractVector{T} where T, N} where N}) + Base.precompile(Tuple{typeof(_sortperm),DataFrame,SortingAlgorithms.TimSortAlg,DFPerm{Tuple{ForwardOrdering, ReverseOrdering{ForwardOrdering}, ForwardOrdering}, Tuple{Vector{Int}, Vector{Int}, Vector{Int}}}}) + Base.precompile(Tuple{typeof(_sortperm),DataFrame,SortingAlgorithms.TimSortAlg,DFPerm{Tuple{ForwardOrdering, ReverseOrdering{ForwardOrdering}, ReverseOrdering{ForwardOrdering}}, Tuple{Vector{Int}, Vector{Int}, Vector{Int}}}}) + Base.precompile(Tuple{typeof(_sortperm),SubDataFrame{DataFrame, SubIndex{Index, UnitRange{Int}, UnitRange{Int}}, UnitRange{Int}},Base.Sort.MergeSortAlg,DFPerm{ForwardOrdering, Tuple{SubArray{Float64, 1, Vector{Float64}, Tuple{UnitRange{Int}}, true}, SubArray{Float64, 1, Vector{Float64}, Tuple{UnitRange{Int}}, true}}}}) + Base.precompile(Tuple{typeof(_sortperm),DataFrame,SortingAlgorithms.TimSortAlg,DFPerm{Tuple{ReverseOrdering{ForwardOrdering}, ReverseOrdering{ForwardOrdering}, ForwardOrdering}, Tuple{Vector{Int}, Vector{Int}, Vector{Int}}}}) + Base.precompile(Tuple{typeof(_sortperm),DataFrame,Base.Sort.MergeSortAlg,DFPerm{ForwardOrdering, Tuple{Vector{Float64}, Vector{Int}}}}) + Base.precompile(Tuple{typeof(_sortperm),DataFrame,SortingAlgorithms.TimSortAlg,DFPerm{Tuple{ReverseOrdering{ForwardOrdering}, ForwardOrdering, ReverseOrdering{ForwardOrdering}}, Tuple{Vector{Int}, Vector{Int}, Vector{Int}}}}) + Base.precompile(Tuple{typeof(_sortperm),DataFrame,SortingAlgorithms.TimSortAlg,DFPerm{ForwardOrdering, Tuple{Vector{Int}, Vector{Int}}}}) + Base.precompile(Tuple{typeof(_sortperm),DataFrame,SortingAlgorithms.TimSortAlg,DFPerm{ForwardOrdering, Tuple{Vector{Int}, Vector{Int}, Vector{Int}}}}) + Base.precompile(Tuple{typeof(_sortperm),DataFrame,SortingAlgorithms.TimSortAlg,DFPerm{Tuple{ForwardOrdering, ForwardOrdering, ReverseOrdering{ForwardOrdering}}, Tuple{Vector{Int}, Vector{Int}, Vector{Int}}}}) + Base.precompile(Tuple{typeof(_sortperm),DataFrame,SortingAlgorithms.TimSortAlg,DFPerm{Tuple{ForwardOrdering, ReverseOrdering{ForwardOrdering}}, Tuple{Vector{Int}, Vector{Int}}}}) + Base.precompile(Tuple{typeof(_sortperm),DataFrame,Base.Sort.MergeSortAlg,DFPerm{ForwardOrdering, NTuple{4, Vector{Float64}}}}) + Base.precompile(Tuple{typeof(_sortperm),DataFrame,SortingAlgorithms.TimSortAlg,DFPerm{ReverseOrdering{ForwardOrdering}, Tuple{Vector{Int}, Vector{Int}, Vector{Int}}}}) + Base.precompile(Tuple{typeof(_sortperm),DataFrame,Base.Sort.MergeSortAlg,DFPerm{ForwardOrdering, Tuple{Vector{Int}, Vector{Int}}}}) + Base.precompile(Tuple{typeof(_sortperm),DataFrame,SortingAlgorithms.TimSortAlg,DFPerm{ReverseOrdering{ForwardOrdering}, Tuple{Vector{Int}, Vector{Int}}}}) + Base.precompile(Tuple{typeof(_sortperm),DataFrame,SortingAlgorithms.TimSortAlg,DFPerm{Tuple{ReverseOrdering{ForwardOrdering}, ForwardOrdering, ForwardOrdering}, Tuple{Vector{Int}, Vector{Int}, Vector{Int}}}}) + Base.precompile(Tuple{typeof(_sortperm),DataFrame,SortingAlgorithms.TimSortAlg,DFPerm{Tuple{ReverseOrdering{ForwardOrdering}, ForwardOrdering}, Tuple{Vector{Int}, Vector{Int}}}}) for v in ([1, 2], [2, 1], [2, 2, 1], Int32[1, 2], Int32[2, 1], Int32[2, 2, 1]), op in (identity, x -> string.(x), x -> PooledArrays.PooledArray(string.(x))), diff --git a/test/sort.jl b/test/sort.jl index 43b29ef7a1..73c927c58f 100644 --- a/test/sort.jl +++ b/test/sort.jl @@ -160,4 +160,52 @@ end @test sort(view(df, 1:2, 1:2), view=true) == sort(view(df, 1:2, 1:2)) end +@testset "hard tests of different sorting orders" begin + Random.seed!(1234) + df = DataFrame(rand([0, 1], 10^5, 3), :auto) + @test sortperm(df, :x1) == sortperm(df.x1) + @test sortperm(df, [:x1, :x2]) == sortperm(tuple.(df.x1, df.x2)) + @test sortperm(df, [:x1, :x2, :x3]) == sortperm(tuple.(df.x1, df.x2, df.x3)) + @test sortperm(df, :x1, rev=true) == sortperm(df.x1, rev=true) + @test sortperm(df, [:x1, :x2], rev=true) == + sortperm(tuple.(df.x1, df.x2), rev=true) + @test sortperm(df, [:x1, :x2, :x3], rev=true) == + sortperm(tuple.(df.x1, df.x2, df.x3), rev=true) + + @test issorted(sort(df, :x1), :x1) + @test issorted(sort(df, [:x1, :x2]), [:x1, :x2]) + @test issorted(sort(df, [:x1, :x2, :x3]), [:x1, :x2, :x3]) + @test issorted(sort(df, :x1, rev=true), :x1, rev=true) + @test issorted(sort(df, [:x1, :x2], rev=true), [:x1, :x2], rev=true) + @test issorted(sort(df, [:x1, :x2, :x3], rev=true), [:x1, :x2, :x3], rev=true) + + for r1 in (true, false) + @test sortperm(df, order(:x1, rev=r1)) == sortperm((1 - 2*r1) * df.x1) + @test issorted(sort(df, order(:x1, rev=r1)), order(:x1, rev=r1)) + for r2 in (true, false) + @test sortperm(df, [order(:x1, rev=r1), order(:x2, rev=r2)]) == + sortperm(tuple.((1 - 2*r1) * df.x1, (1 - 2*r2) * df.x2)) + @test issorted(sort(df, [order(:x1, rev=r1), order(:x2, rev=r2)]), + [order(:x1, rev=r1), order(:x2, rev=r2)]) + for r3 in (true, false) + @test sortperm(df, [order(:x1, rev=r1), order(:x2, rev=r2), order(:x3, rev=r3)]) == + sortperm(tuple.((1 - 2*r1) * df.x1, (1 - 2*r2) * df.x2, (1 - 2*r3) * df.x3)) + @test issorted(sort(df, [order(:x1, rev=r1), order(:x2, rev=r2), order(:x3, rev=r3)]), + [order(:x1, rev=r1), order(:x2, rev=r2), order(:x3, rev=r3)]) + end + end + end + + for i in 2:20 + df = DataFrame(ones(10, i), :auto) + df[!, end] = randperm(10) + @test sortperm(df) == sortperm(df[!, end]) + @test sortperm(df, [1:i-1; order(i, rev=true)]) == sortperm(df[!, end], rev=true) + df[!, 1] = randperm(10) + @test sortperm(df) == sortperm(df[!, 1]) + @test sortperm(df, [order(1, rev=true); 2:i-1; order(i, rev=true)]) == + sortperm(df[!, 1], rev=true) + end +end + end # module