JuliaData · bkamins · Oct 12, 2020 · Oct 9, 2020 · Oct 9, 2020 · Oct 11, 2020
diff --git a/NEWS.md b/NEWS.md
@@ -74,6 +74,8 @@
   which if set to `true` makes them retun a `SubDataFrame` view into the passed
   data frame.
 * add `only` method for `AbstractDataFrame` ([#2449](/~https://github.com/JuliaData/DataFrames.jl/pull/2449))
+* passing empty sets of columns in `filter`/`filter!` and in `select`/`transform`/`combine`
+  with `ByRow` is now accepted ([#2476](/~https://github.com/JuliaData/DataFrames.jl/pull/2476))
 
 ## Deprecated
 

diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl
@@ -994,9 +994,10 @@ end
 @inline function Base.filter((cols, f)::Pair, df::AbstractDataFrame; view::Bool=false)
     int_cols = index(df)[cols] # it will be AbstractVector{Int} or Int
     if length(int_cols) == 0
-        throw(ArgumentError("At least one column must be passed to filter on"))
+        rowidxs = [f() for _ in axes(df, 1)]
+    else
+        rowidxs = _filter_helper(f, (df[!, i] for i in int_cols)...)
     end
-    rowidxs = _filter_helper(f, (df[!, i] for i in int_cols)...)
     return view ? Base.view(df, rowidxs, :) : df[rowidxs, :]
 end
 
@@ -1006,9 +1007,10 @@ end
                                                      AbstractVector{<:Symbol}}},
                              df::AbstractDataFrame; view::Bool=false)
     if length(cols) == 0
-        throw(ArgumentError("At least one column must be passed to filter on"))
+        rowidxs = [f() for _ in axes(df, 1)]
+    else
+        rowidxs = _filter_helper(f, (df[!, i] for i in cols)...)
     end
-    rowidxs = _filter_helper(f, (df[!, i] for i in cols)...)
     return view ? Base.view(df, rowidxs, :) : df[rowidxs, :]
 end
 
@@ -1018,9 +1020,10 @@ _filter_helper(f, cols...)::BitVector = ((x...) -> f(x...)::Bool).(cols...)
                              view::Bool=false)
     df_tmp = select(df, cols.cols, copycols=false)
     if ncol(df_tmp) == 0
-        throw(ArgumentError("At least one column must be passed to filter on"))
+        rowidxs = [f(NamedTuple()) for _ in axes(df, 1)]
+    else
+        rowidxs = _filter_helper_astable(f, Tables.namedtupleiterator(df_tmp))
     end
-    rowidxs = _filter_helper_astable(f, Tables.namedtupleiterator(df_tmp))
     return view ? Base.view(df, rowidxs, :) : df[rowidxs, :]
 end
 
@@ -1101,7 +1104,7 @@ julia> filter!(AsTable(:) => nt -> nt.x == 1 || nt.y == "b", df)
 │ 3   │ 1     │ b      │
 ```
 """
-Base.filter!(f, df::AbstractDataFrame) = _filter!_helper(df, f, eachrow(df))
+Base.filter!(f, df::AbstractDataFrame) = delete!(df, findall(!f, eachrow(df)))
 Base.filter!((col, f)::Pair{<:ColumnIndex}, df::AbstractDataFrame) =
     _filter!_helper(df, f, df[!, col])
 Base.filter!((cols, f)::Pair{<:AbstractVector{Symbol}}, df::AbstractDataFrame) =
@@ -1115,17 +1118,20 @@ Base.filter!((cols, f)::Pair{<:AbstractVector{Int}}, df::AbstractDataFrame) =
 
 function _filter!_helper(df::AbstractDataFrame, f, cols...)
     if length(cols) == 0
-        throw(ArgumentError("At least one column must be passed to filter on"))
+        rowidxs = findall(x -> !f(), axes(df, 1))
+    else
+        rowidxs = findall(((x...) -> !(f(x...)::Bool)).(cols...))
     end
-    return delete!(df, findall(((x...) -> !(f(x...)::Bool)).(cols...)))
+    return delete!(df, rowidxs)
 end
 
 function Base.filter!((cols, f)::Pair{<:AsTable}, df::AbstractDataFrame)
     dff = select(df, cols.cols, copycols=false)
     if ncol(dff) == 0
-        throw(ArgumentError("At least one column must be passed to filter on"))
+        return delete!(df, findall(x -> !f(NamedTuple()), axes(df, 1)))
+    else
+        return _filter!_helper_astable(df, Tables.namedtupleiterator(dff), f)
     end
-    return _filter!_helper_astable(df, Tables.namedtupleiterator(dff), f)
 end
 
 _filter!_helper_astable(df::AbstractDataFrame, nti::Tables.NamedTupleIterator, f) =

diff --git a/src/abstractdataframe/selection.jl b/src/abstractdataframe/selection.jl
@@ -67,10 +67,11 @@ normalize_selection(idx::AbstractIndex, sel::Pair{<:ColumnIndex, <:AbstractStrin
     normalize_selection(idx, first(sel) => Symbol(last(sel)), renamecols::Bool)
 
 function normalize_selection(idx::AbstractIndex,
-                             sel::Pair{<:Any,<:Pair{<:Base.Callable,
-                                                    <:Union{Symbol, AbstractString, DataType,
-                                                            AbstractVector{Symbol},
-                                                            AbstractVector{<:AbstractString}}}},
+                             @nospecialize(sel::Pair{<:Any,
+                                                     <:Pair{<:Base.Callable,
+                                                            <:Union{Symbol, AbstractString, DataType,
+                                                                    AbstractVector{Symbol},
+                                                                    AbstractVector{<:AbstractString}}}}),
                              renamecols::Bool)
     lls = last(last(sel))
     if lls isa DataType
@@ -170,31 +171,29 @@ function normalize_selection(idx::AbstractIndex,
     return (wanttable ? AsTable(c) : c) => fun => newcol
 end
 
-function _transformation_helper(df::AbstractDataFrame,
-                                col_idx::Union{Nothing, Int, AbstractVector{Int}, AsTable},
-                                @nospecialize(fun))
-    if col_idx === nothing
-        return fun(df)
-    elseif col_idx isa Int
-        return fun(df[!, col_idx])
-    elseif col_idx isa AsTable
-        tbl = Tables.columntable(select(df, col_idx.cols, copycols=false))
-        if isempty(tbl) && fun isa ByRow
-            return [fun.fun(NamedTuple()) for _ in 1:nrow(df)]
-        else
-            return fun(tbl)
-        end
+_transformation_helper(df::AbstractDataFrame, col_idx::Nothing, fun) = fun(df)
+_transformation_helper(df::AbstractDataFrame, col_idx::Int, fun) = fun(df[!, col_idx])
+
+_empty_astable_helper(fun, len) = [fun(NamedTuple()) for _ in 1:len]
+
+function _transformation_helper(df::AbstractDataFrame, col_idx::AsTable, fun)
+    tbl = Tables.columntable(select(df, col_idx.cols, copycols=false))
+    if isempty(tbl) && fun isa ByRow
+        return _empty_astable_helper(fun.fun, nrow(df))
     else
-        # it should be fast enough here as we do not expect to do it millions of times
-        @assert col_idx isa AbstractVector{Int}
-        if isempty(col_idx) && fun isa ByRow
-            return [fun.fun() for _ in 1:nrow(df)]
-        else
-            cdf = eachcol(df)
-            return fun(map(c -> cdf[c], col_idx)...)
-        end
+        return fun(tbl)
+    end
+end
+
+_empty_selector_helper(fun, len) = [fun() for _ in 1:len]
+
+function _transformation_helper(df::AbstractDataFrame, col_idx::AbstractVector{Int}, fun)
+    if isempty(col_idx) && fun isa ByRow
+        return _empty_selector_helper(fun.fun, nrow(df))
+    else
+        cdf = eachcol(df)
+        return fun(map(c -> cdf[c], col_idx)...)
     end
-    throw(ErrorException("unreachable reached"))
 end
 
 function _gen_colnames(@nospecialize(res), newname::Union{AbstractVector{Symbol},
@@ -426,6 +425,8 @@ function select_transform!(@nospecialize(nc::Union{Base.Callable, Pair{<:Union{I
     end
 end
 
+@nospecialize
+
 SELECT_ARG_RULES =
     """
     Arguments passed as `args...` can be:
@@ -1101,3 +1102,5 @@ function manipulate(dfv::SubDataFrame, args...; copycols::Bool, keeprows::Bool,
         return view(dfv, :, isempty(newinds) ? [] : All(newinds...))
     end
 end
+
+@specialize
diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl
@@ -711,7 +711,11 @@ end
 function do_call(f::Any, idx::AbstractVector{<:Integer},
                  starts::AbstractVector{<:Integer}, ends::AbstractVector{<:Integer},
                  gd::GroupedDataFrame, incols::Tuple{}, i::Integer)
-    f()
+    if f isa ByRow
+        return [f.fun() for _ in 1:(ends[i] - starts[i] + 1)]
+    else
+        return f()
+    end
 end
 
 function do_call(f::Any, idx::AbstractVector{<:Integer},
@@ -753,8 +757,12 @@ end
 function do_call(f::Any, idx::AbstractVector{<:Integer},
                  starts::AbstractVector{<:Integer}, ends::AbstractVector{<:Integer},
                  gd::GroupedDataFrame, incols::NamedTuple, i::Integer)
-    idx = idx[starts[i]:ends[i]]
-    return f(map(c -> view(c, idx), incols))
+    if f isa ByRow && isempty(incols)
+        return [f.fun(NamedTuple()) for _ in 1:(ends[i] - starts[i] + 1)]
+    else
+        idx = idx[starts[i]:ends[i]]
+        return f(map(c -> view(c, idx), incols))
+    end
 end
 
 function do_call(f::Any, idx::AbstractVector{<:Integer},

diff --git a/test/data.jl b/test/data.jl
@@ -453,11 +453,45 @@ end
     @test filter(AsTable("x") => testfun, df) == DataFrame(x=[3, 2], y=["b", "a"])
     filter!(AsTable("x") => testfun, df)
     @test df == DataFrame(x=[3, 2], y=["b", "a"])
+end
+
+@testset "empty arg to filter and filter!" begin
+    df = DataFrame(x = [3, 1, 2, 1], y = ["b", "c", "a", "b"])
+
+    @test filter([] => () -> true, df) == df
+    @test filter(AsTable(r"z") => x -> true, df) == df
+    @test filter!([] => () -> true, copy(df)) == df
+    @test filter!(AsTable(r"z") => x -> true, copy(df)) == df
+
+    flipflop0 = let
+        state = false
+        () -> (state = !state)
+    end
+
+    flipflop1 = let
+        state = false
+        x -> (state = !state)
+    end
 
-    @test_throws ArgumentError filter([] => () -> true, df)
-    @test_throws ArgumentError filter(AsTable(r"z") => () -> true, df)
-    @test_throws ArgumentError filter!([] => () -> true, df)
-    @test_throws ArgumentError filter!(AsTable(r"z") => () -> true, df)
+    @test filter([] => flipflop0, df) == df[[1,3], :]
+    @test filter(Int[] => flipflop0, df) == df[[1,3], :]
+    @test filter(String[] => flipflop0, df) == df[[1,3], :]
+    @test filter(Symbol[] => flipflop0, df) == df[[1,3], :]
+    @test filter(r"z" => flipflop0, df) == df[[1,3], :]
+    @test filter(Not(All()) => flipflop0, df) == df[[1,3], :]
+    @test filter(AsTable(r"z") => flipflop1, df) == df[[1,3], :]
+    @test filter(AsTable([]) => flipflop1, df) == df[[1,3], :]
+    @test filter!([] => flipflop0, copy(df)) == df[[1,3], :]
+    @test filter!(Int[] => flipflop0, copy(df)) == df[[1,3], :]
+    @test filter!(String[] => flipflop0, copy(df)) == df[[1,3], :]
+    @test filter!(Symbol[] => flipflop0, copy(df)) == df[[1,3], :]
+    @test filter!(r"z" => flipflop0, copy(df)) == df[[1,3], :]
+    @test filter!(Not(All()) => flipflop0, copy(df)) == df[[1,3], :]
+    @test filter!(AsTable(r"z") => flipflop1, copy(df)) == df[[1,3], :]
+    @test filter!(AsTable([]) => flipflop1, copy(df)) == df[[1,3], :]
+
+    @test_throws MethodError filter([] => flipflop1, df)
+    @test_throws MethodError filter(AsTable([]) => flipflop0, df)
 end
 
 @testset "names with cols" begin

diff --git a/test/grouping.jl b/test/grouping.jl
@@ -2874,4 +2874,45 @@ end
     @test df == DataFrame(a=1:3, b=4:6, c=7:9, d=10:12, a_b=5:2:9, a_b_etc=22:4:30)
 end
 
+@testset "empty ByRow" begin
+    inc0 = let
+        state = 0
+        () -> (state += 1)
+    end
+
+    inc1 = let
+        state = 0
+        x -> (state += 1)
+    end
+
+    df = DataFrame(a=[1,1,1,2,2,3,4,4,5,5,5,5], b=1:12)
+    gdf = groupby_checked(df, :a)
+
+    @test select(gdf, [] => ByRow(inc0) => :bin) ==
+          DataFrame(a=df.a, bin=1:12)
+    @test combine(gdf, [] => ByRow(inc0) => :bin) ==
+          DataFrame(a=df.a, bin=13:24)
+    @test select(gdf, AsTable([]) => ByRow(inc1) => :bin) ==
+          DataFrame(a=df.a, bin=1:12)
+    @test combine(gdf, AsTable([]) => ByRow(inc1) => :bin) ==
+          DataFrame(a=df.a, bin=13:24)
+    @test combine(gdf[Not(2)], [] => ByRow(inc0) => :bin) ==
+          DataFrame(a=df.a[Not(4:5)], bin=25:34)
+    @test combine(gdf[Not(2)], AsTable([]) => ByRow(inc1) => :bin) ==
+          DataFrame(a=df.a[Not(4:5)], bin=25:34)
+
+    # note that type inference in a comprehension does not always work
+    @test isequal_coltyped(combine(gdf[[]], [] => ByRow(inc0) => :bin),
+                           DataFrame(a=Int[], bin=Any[]))
+    @test isequal_coltyped(combine(gdf[[]], [] => ByRow(rand) => :bin),
+                           DataFrame(a=Int[], bin=Float64[]))
+    @test isequal_coltyped(combine(gdf[[]], AsTable([]) => ByRow(inc1) => :bin),
+                           DataFrame(a=Int[], bin=Any[]))
+    @test isequal_coltyped(combine(gdf[[]], AsTable([]) => ByRow(x -> rand()) => :bin),
+                           DataFrame(a=Int[], bin=Float64[]))
+
+    @test_throws MethodError select(gdf, [] => ByRow(inc1) => :bin)
+    @test_throws MethodError select(gdf, AsTable([]) => ByRow(inc0) => :bin)
+end
+
 end # module