Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BREAKING] improve naming of ComposedFunction in aggregation #2274

Merged
merged 10 commits into from
Jun 8, 2020
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions src/other/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,16 @@ function funname(f)
n = nameof(f)
String(n)[1] == '#' ? :function : n
end

if isdefined(Base, :ComposedFunction)
bkamins marked this conversation as resolved.
Show resolved Hide resolved
bkamins marked this conversation as resolved.
Show resolved Hide resolved
using Base: ComposedFunction
else
const ComposedFunction = let h = identity ∘ convert
@assert h.f === identity
@assert h.g === convert
getfield(parentmodule(typeof(h)), nameof(typeof(h)))
end
@assert identity ∘ convert isa ComposedFunction
end

funname(c::ComposedFunction) = Symbol(funname(c.f), :_, funname(c.g))
29 changes: 15 additions & 14 deletions test/grouping.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2222,29 +2222,30 @@ end
df = DataFrame(g=[1,1,1,2,2,2], x=Any[1,1,1,1.5,1.5,1.5])
gdf = groupby_checked(df, :g)
@test combine(gdf, :x => sum) == DataFrame(g=1:2, x_sum=[3.0, 4.5])
@test combine(gdf, :x => sum∘skipmissing) == DataFrame(g=1:2, x_function=[3.0, 4.5])

@test combine(gdf, :x => sum∘skipmissing) == DataFrame(g=1:2, x_sum_skipmissing=[3.0, 4.5])
@test combine(gdf, :x => mean∘skipmissing) == DataFrame(g=1:2, x_mean_skipmissing=[1.0, 1.5])
@test combine(gdf, :x => var∘skipmissing) == DataFrame(g=1:2, x_var_skipmissing=[0.0, 0.0])
@test combine(gdf, :x => mean) == DataFrame(g=1:2, x_mean=[1.0, 1.5])
@test combine(gdf, :x => mean∘skipmissing) == DataFrame(g=1:2, x_function=[1.0, 1.5])
@test combine(gdf, :x => var) == DataFrame(g=1:2, x_var=[0.0, 0.0])
@test combine(gdf, :x => var∘skipmissing) == DataFrame(g=1:2, x_function=[0.0, 0.0])

df = DataFrame(g=[1,1,1,2,2,2], x=Any[1,1,1,1,1,missing])
gdf = groupby_checked(df, :g)
@test combine(gdf, :x => sum∘skipmissing) == DataFrame(g=1:2, x_sum_skipmissing=[3, 2])
@test combine(gdf, :x => mean∘skipmissing) == DataFrame(g=1:2, x_mean_skipmissing=[1.0, 1.0])
@test combine(gdf, :x => var∘skipmissing) == DataFrame(g=1:2, x_var_skipmissing=[0.0, 0.0])
@test combine(gdf, :x => sum) ≅ DataFrame(g=1:2, x_sum=[3, missing])
@test combine(gdf, :x => sum∘skipmissing) == DataFrame(g=1:2, x_function=[3, 2])
@test combine(gdf, :x => mean) ≅ DataFrame(g=1:2, x_mean=[1.0, missing])
@test combine(gdf, :x => mean∘skipmissing) == DataFrame(g=1:2, x_function=[1.0, 1.0])
@test combine(gdf, :x => var) ≅ DataFrame(g=1:2, x_var=[0.0, missing])
@test combine(gdf, :x => var∘skipmissing) == DataFrame(g=1:2, x_function=[0.0, 0.0])

df = DataFrame(g=[1,1,1,2,2,2], x=Union{Real, Missing}[1,1,1,1,1,missing])
gdf = groupby_checked(df, :g)
@test combine(gdf, :x => sum∘skipmissing) == DataFrame(g=1:2, x_sum_skipmissing=[3, 2])
@test combine(gdf, :x => mean∘skipmissing) == DataFrame(g=1:2, x_mean_skipmissing=[1.0, 1.0])
@test combine(gdf, :x => var∘skipmissing) == DataFrame(g=1:2, x_var_skipmissing=[0.0, 0.0])
@test combine(gdf, :x => sum) ≅ DataFrame(g=1:2, x_sum=[3, missing])
@test combine(gdf, :x => sum∘skipmissing) == DataFrame(g=1:2, x_function=[3, 2])
@test combine(gdf, :x => mean) ≅ DataFrame(g=1:2, x_mean=[1.0, missing])
@test combine(gdf, :x => mean∘skipmissing) == DataFrame(g=1:2, x_function=[1.0, 1.0])
@test combine(gdf, :x => var) ≅ DataFrame(g=1:2, x_var=[0.0, missing])
@test combine(gdf, :x => var∘skipmissing) == DataFrame(g=1:2, x_function=[0.0, 0.0])

Random.seed!(1)
df = DataFrame(g = rand(1:2, 1000), x1 = rand(Int, 1000))
Expand Down Expand Up @@ -2283,7 +2284,7 @@ end
gdf = groupby_checked(df, :g)
@test combine(gdf, :x => sum)[1, 2] isa Missing
@test eltype(combine(gdf, :x => sum)[!, 2]) === Missing
@test combine(gdf, :x => sum∘skipmissing) == DataFrame(g=1, x_function=0)
@test combine(gdf, :x => sum∘skipmissing) == DataFrame(g=1, x_sum_skipmissing=0)
@test eltype(combine(gdf, :x => sum∘skipmissing)[!, 2]) === Int
df = DataFrame(g=[1,1,1,1,1,1], x=convert(Vector{Union{Int, Missing}}, fill(missing, 6)))
gdf = groupby_checked(df, :g)
Expand All @@ -2308,12 +2309,12 @@ end
combine(gdf, :x => (x -> sum(x)) => :a, :x => (x -> prod(x)) => :b)
df = DataFrame(g=[1, 1], x=[missing, "a"])
gdf = groupby_checked(df, :g)
@test combine(gdf, :x => sum∘skipmissing => :a, :x => prod∘skipmissing => :b) ==
combine(gdf, :x => (x -> sum(skipmissing(x))) => :a, :x => (x -> prod(skipmissing(x))) => :b)
@test Matrix(combine(gdf, :x => sum∘skipmissing => :a, :x => prod∘skipmissing => :b)) ==
bkamins marked this conversation as resolved.
Show resolved Hide resolved
Matrix(combine(gdf, :x => (x -> sum(skipmissing(x))) => :a, :x => (x -> prod(skipmissing(x))) => :b))
df = DataFrame(g=[1, 1], x=Any[missing, "a"])
gdf = groupby_checked(df, :g)
@test combine(gdf, :x => sum∘skipmissing => :a, :x => prod∘skipmissing => :b) ==
combine(gdf, :x => (x -> sum(skipmissing(x))) => :a, :x => (x -> prod(skipmissing(x))) => :b)
@test Matrix(combine(gdf, :x => sum∘skipmissing => :a, :x => prod∘skipmissing => :b)) ==
Matrix(combine(gdf, :x => (x -> sum(skipmissing(x))) => :a, :x => (x -> prod(skipmissing(x))) => :b))

df = DataFrame(g=[1, 2], x=Any[nothing, "a"])
gdf = groupby_checked(df, :g)
Expand Down
15 changes: 8 additions & 7 deletions test/select.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1106,12 +1106,13 @@ end
DataFrame(a_b_c_sum=map(sum, eachrow(df)))
@test transform(df, AsTable(:) => sum) ==
DataFrame(a=1:3, b=4:6, c=7:9, a_b_c_sum=map(sum, eachrow(df)))

@test select(df, AsTable(:) => sum ∘ sum) ==
repeat(DataFrame(a_b_c_function=45), nrow(df))
repeat(DataFrame(a_b_c_sum_sum=45), nrow(df))
@test combine(df, AsTable(:) => sum ∘ sum) ==
DataFrame(a_b_c_function=45)
DataFrame(a_b_c_sum_sum=45)
@test transform(df, AsTable(:) => sum ∘ sum) ==
DataFrame(a=1:3, b=4:6, c=7:9, a_b_c_function=45)
DataFrame(a=1:3, b=4:6, c=7:9, a_b_c_sum_sum=45)

@test select(df, AsTable(:) => ByRow(x -> [x])) ==
DataFrame(a_b_c_function=[[(a = 1, b = 4, c = 7)],
Expand Down Expand Up @@ -1175,9 +1176,9 @@ end
@test df2[:, 1] !== df.x

@test combine(df, :x => sum, :y => collect ∘ extrema) ==
DataFrame(x_sum=[6, 6], y_function = [4, 6])
DataFrame(x_sum=[6, 6], y_collect_extrema = [4, 6])
@test combine(df, :y => collect ∘ extrema, :x => sum) ==
DataFrame(y_function = [4, 6], x_sum=[6, 6])
DataFrame(y_collect_extrema = [4, 6], x_sum=[6, 6])
@test combine(df, :x => sum, :y => x -> []) ==
DataFrame(x_sum=[], y_function = [])
@test combine(df, :y => x -> [], :x => sum) ==
Expand All @@ -1195,9 +1196,9 @@ end
@test df2[:, 1] !== dfv.x

@test combine(dfv, :x => sum, :y => collect ∘ extrema) ==
DataFrame(x_sum=[3, 3], y_function = [4, 5])
DataFrame(x_sum=[3, 3], y_collect_extrema = [4, 5])
@test combine(dfv, :y => collect ∘ extrema, :x => sum) ==
DataFrame(y_function = [4, 5], x_sum=[3, 3])
DataFrame(y_collect_extrema = [4, 5], x_sum=[3, 3])
end

@testset "select and transform AbstractDataFrame" begin
Expand Down
6 changes: 6 additions & 0 deletions test/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -89,4 +89,10 @@ end
@test_throws MethodError repeat!(view(df, 1:2, :), inner = 2, outer = 3)
end

@testset "funname" begin
@test DataFrames.funname(sum ∘ skipmissing ∘ Base.div12) ==
:sum_skipmissing_div12

end

end # module