Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

allow passing empty sets of columns to ByRow and filter #2476

Merged
merged 7 commits into from
Oct 12, 2020
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@
which if set to `true` makes them retun a `SubDataFrame` view into the passed
data frame.
* add `only` method for `AbstractDataFrame` ([#2449](/~https://github.com/JuliaData/DataFrames.jl/pull/2449))
* passig empty sets of columns in `filter`/`filter!` and in `select`/`transform`/`combine`
bkamins marked this conversation as resolved.
Show resolved Hide resolved
with `ByRow` is now accepted ([#2476](/~https://github.com/JuliaData/DataFrames.jl/pull/2476))

## Deprecated

Expand Down
28 changes: 17 additions & 11 deletions src/abstractdataframe/abstractdataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -994,9 +994,10 @@ end
@inline function Base.filter((cols, f)::Pair, df::AbstractDataFrame; view::Bool=false)
int_cols = index(df)[cols] # it will be AbstractVector{Int} or Int
if length(int_cols) == 0
throw(ArgumentError("At least one column must be passed to filter on"))
rowidxs = [f() for _ in axes(df, 1)]
else
rowidxs = _filter_helper(f, (df[!, i] for i in int_cols)...)
end
rowidxs = _filter_helper(f, (df[!, i] for i in int_cols)...)
return view ? Base.view(df, rowidxs, :) : df[rowidxs, :]
end

Expand All @@ -1006,9 +1007,10 @@ end
AbstractVector{<:Symbol}}},
df::AbstractDataFrame; view::Bool=false)
if length(cols) == 0
throw(ArgumentError("At least one column must be passed to filter on"))
rowidxs = [f() for _ in axes(df, 1)]
else
rowidxs = _filter_helper(f, (df[!, i] for i in cols)...)
end
rowidxs = _filter_helper(f, (df[!, i] for i in cols)...)
return view ? Base.view(df, rowidxs, :) : df[rowidxs, :]
end

Expand All @@ -1018,9 +1020,10 @@ _filter_helper(f, cols...)::BitVector = ((x...) -> f(x...)::Bool).(cols...)
view::Bool=false)
df_tmp = select(df, cols.cols, copycols=false)
if ncol(df_tmp) == 0
throw(ArgumentError("At least one column must be passed to filter on"))
rowidxs = [f(NamedTuple()) for _ in axes(df, 1)]
else
rowidxs = _filter_helper_astable(f, Tables.namedtupleiterator(df_tmp))
end
rowidxs = _filter_helper_astable(f, Tables.namedtupleiterator(df_tmp))
return view ? Base.view(df, rowidxs, :) : df[rowidxs, :]
end

Expand Down Expand Up @@ -1101,7 +1104,7 @@ julia> filter!(AsTable(:) => nt -> nt.x == 1 || nt.y == "b", df)
│ 3 │ 1 │ b │
```
"""
Base.filter!(f, df::AbstractDataFrame) = _filter!_helper(df, f, eachrow(df))
Base.filter!(f, df::AbstractDataFrame) = delete!(df, findall(!f, eachrow(df)))
Base.filter!((col, f)::Pair{<:ColumnIndex}, df::AbstractDataFrame) =
_filter!_helper(df, f, df[!, col])
Base.filter!((cols, f)::Pair{<:AbstractVector{Symbol}}, df::AbstractDataFrame) =
Expand All @@ -1115,17 +1118,20 @@ Base.filter!((cols, f)::Pair{<:AbstractVector{Int}}, df::AbstractDataFrame) =

function _filter!_helper(df::AbstractDataFrame, f, cols...)
if length(cols) == 0
throw(ArgumentError("At least one column must be passed to filter on"))
rowidxs = findall(x -> !f(), axes(df, 1))
else
rowidxs = findall(((x...) -> !(f(x...)::Bool)).(cols...))
end
return delete!(df, findall(((x...) -> !(f(x...)::Bool)).(cols...)))
return delete!(df, rowidxs)
end

function Base.filter!((cols, f)::Pair{<:AsTable}, df::AbstractDataFrame)
dff = select(df, cols.cols, copycols=false)
if ncol(dff) == 0
throw(ArgumentError("At least one column must be passed to filter on"))
return delete!(df, findall(x -> !f(NamedTuple()), axes(df, 1)))
else
return _filter!_helper_astable(df, Tables.namedtupleiterator(dff), f)
end
return _filter!_helper_astable(df, Tables.namedtupleiterator(dff), f)
end

_filter!_helper_astable(df::AbstractDataFrame, nti::Tables.NamedTupleIterator, f) =
Expand Down
14 changes: 11 additions & 3 deletions src/groupeddataframe/splitapplycombine.jl
Original file line number Diff line number Diff line change
Expand Up @@ -711,7 +711,11 @@ end
function do_call(f::Any, idx::AbstractVector{<:Integer},
starts::AbstractVector{<:Integer}, ends::AbstractVector{<:Integer},
gd::GroupedDataFrame, incols::Tuple{}, i::Integer)
f()
if f isa ByRow
return [f.fun() for _ in 1:(ends[i] - starts[i] + 1)]
else
return f()
end
end

function do_call(f::Any, idx::AbstractVector{<:Integer},
Expand Down Expand Up @@ -753,8 +757,12 @@ end
function do_call(f::Any, idx::AbstractVector{<:Integer},
starts::AbstractVector{<:Integer}, ends::AbstractVector{<:Integer},
gd::GroupedDataFrame, incols::NamedTuple, i::Integer)
idx = idx[starts[i]:ends[i]]
return f(map(c -> view(c, idx), incols))
if f isa ByRow && isempty(incols)
return [f.fun(NamedTuple()) for _ in 1:(ends[i] - starts[i] + 1)]
else
idx = idx[starts[i]:ends[i]]
return f(map(c -> view(c, idx), incols))
end
end

function do_call(f::Any, idx::AbstractVector{<:Integer},
Expand Down
42 changes: 38 additions & 4 deletions test/data.jl
Original file line number Diff line number Diff line change
Expand Up @@ -453,11 +453,45 @@ end
@test filter(AsTable("x") => testfun, df) == DataFrame(x=[3, 2], y=["b", "a"])
filter!(AsTable("x") => testfun, df)
@test df == DataFrame(x=[3, 2], y=["b", "a"])
end

@testset "empty arg to filter and filter!" begin
df = DataFrame(x = [3, 1, 2, 1], y = ["b", "c", "a", "b"])

@test filter([] => () -> true, df) == df
@test filter(AsTable(r"z") => x -> true, df) == df
@test filter!([] => () -> true, copy(df)) == df
@test filter!(AsTable(r"z") => x -> true, copy(df)) == df

flipflop0 = let
state = false
() -> (state = !state)
end

flipflop1 = let
state = false
x -> (state = !state)
end

@test_throws ArgumentError filter([] => () -> true, df)
@test_throws ArgumentError filter(AsTable(r"z") => () -> true, df)
@test_throws ArgumentError filter!([] => () -> true, df)
@test_throws ArgumentError filter!(AsTable(r"z") => () -> true, df)
@test filter([] => flipflop0, df) == df[[1,3], :]
@test filter(Int[] => flipflop0, df) == df[[1,3], :]
@test filter(String[] => flipflop0, df) == df[[1,3], :]
@test filter(Symbol[] => flipflop0, df) == df[[1,3], :]
@test filter(r"z" => flipflop0, df) == df[[1,3], :]
@test filter(Not(All()) => flipflop0, df) == df[[1,3], :]
@test filter(AsTable(r"z") => flipflop1, df) == df[[1,3], :]
@test filter(AsTable([]) => flipflop1, df) == df[[1,3], :]
@test filter!([] => flipflop0, copy(df)) == df[[1,3], :]
@test filter!(Int[] => flipflop0, copy(df)) == df[[1,3], :]
@test filter!(String[] => flipflop0, copy(df)) == df[[1,3], :]
@test filter!(Symbol[] => flipflop0, copy(df)) == df[[1,3], :]
@test filter!(r"z" => flipflop0, copy(df)) == df[[1,3], :]
@test filter!(Not(All()) => flipflop0, copy(df)) == df[[1,3], :]
@test filter!(AsTable(r"z") => flipflop1, copy(df)) == df[[1,3], :]
@test filter!(AsTable([]) => flipflop1, copy(df)) == df[[1,3], :]

@test_throws MethodError filter([] => flipflop1, df)
@test_throws MethodError filter(AsTable([]) => flipflop0, df)
end

@testset "names with cols" begin
Expand Down
41 changes: 41 additions & 0 deletions test/grouping.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2874,4 +2874,45 @@ end
@test df == DataFrame(a=1:3, b=4:6, c=7:9, d=10:12, a_b=5:2:9, a_b_etc=22:4:30)
end

@testset "empty ByRow" begin
inc0 = let
state = 0
() -> (state += 1)
end

inc1 = let
state = 0
x -> (state += 1)
end

df = DataFrame(a=[1,1,1,2,2,3,4,4,5,5,5,5], b=1:12)
gdf = groupby_checked(df, :a)

@test select(gdf, [] => ByRow(inc0) => :bin) ==
DataFrame(a=df.a, bin=1:12)
@test combine(gdf, [] => ByRow(inc0) => :bin) ==
DataFrame(a=df.a, bin=13:24)
@test select(gdf, AsTable([]) => ByRow(inc1) => :bin) ==
DataFrame(a=df.a, bin=1:12)
@test combine(gdf, AsTable([]) => ByRow(inc1) => :bin) ==
DataFrame(a=df.a, bin=13:24)
@test combine(gdf[Not(2)], [] => ByRow(inc0) => :bin) ==
DataFrame(a=df.a[Not(4:5)], bin=25:34)
@test combine(gdf[Not(2)], AsTable([]) => ByRow(inc1) => :bin) ==
DataFrame(a=df.a[Not(4:5)], bin=25:34)

# note that type inference in a comprehension does not always work
bkamins marked this conversation as resolved.
Show resolved Hide resolved
@test isequal_coltyped(combine(gdf[[]], [] => ByRow(inc0) => :bin),
DataFrame(a=Int[], bin=Any[]))
@test isequal_coltyped(combine(gdf[[]], [] => ByRow(rand) => :bin),
DataFrame(a=Int[], bin=Float64[]))
@test isequal_coltyped(combine(gdf[[]], AsTable([]) => ByRow(inc1) => :bin),
DataFrame(a=Int[], bin=Any[]))
@test isequal_coltyped(combine(gdf[[]], AsTable([]) => ByRow(x -> rand()) => :bin),
DataFrame(a=Int[], bin=Float64[]))

@test_throws MethodError select(gdf, [] => ByRow(inc1) => :bin)
@test_throws MethodError select(gdf, AsTable([]) => ByRow(inc0) => :bin)
end

end # module