From e3b3da1d0384c5d663ecc560a8e24c7944d41a7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Tue, 30 Mar 2021 09:29:16 +0200 Subject: [PATCH 01/22] split _combine_prepare --- src/groupeddataframe/splitapplycombine.jl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl index d1f81b932c..1f7df2c3b1 100644 --- a/src/groupeddataframe/splitapplycombine.jl +++ b/src/groupeddataframe/splitapplycombine.jl @@ -37,6 +37,14 @@ function _combine_prepare(gd::GroupedDataFrame, push!(cs_vec, p) end end + return _combine_prepare_norm(gd, cs_vec, keepkeys, ungroup, copycols, + keeprows, renamecols) +end + +function _combine_prepare_norm(gd::GroupedDataFrame, + cs_vec::Vector{Any}, + keepkeys::Bool, ungroup::Bool, copycols::Bool, + keeprows::Bool, renamecols::Bool) if any(x -> x isa Pair && first(x) isa Tuple, cs_vec) x = cs_vec[findfirst(x -> first(x) isa Tuple, cs_vec)] # an explicit error is thrown as this was allowed in the past From bb90d773cc6c7310da152425f7011a753e568e6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Tue, 30 Mar 2021 11:23:04 +0200 Subject: [PATCH 02/22] add despecialization to split-apply-combine --- src/groupeddataframe/complextransforms.jl | 19 +++++++----- src/groupeddataframe/splitapplycombine.jl | 38 +++++++++++++---------- 2 files changed, 32 insertions(+), 25 deletions(-) diff --git a/src/groupeddataframe/complextransforms.jl b/src/groupeddataframe/complextransforms.jl index a90e722cee..72b2928685 100644 --- a/src/groupeddataframe/complextransforms.jl +++ b/src/groupeddataframe/complextransforms.jl @@ -12,16 +12,19 @@ function _combine_multicol(firstres, fun::Base.Callable, gd::GroupedDataFrame, idx_agg = Vector{Int}(undef, length(gd)) fillfirst!(nothing, idx_agg, 1:length(gd.groups), gd) else - idx_agg = nothing + idx_agg = NOTHING_IDX_AGG end - return _combine_with_first(wrap(firstres), fun, gd, incols, + return _combine_with_first(Ref{Any}(wrap(firstres)), Ref{Any}(fun), gd, incols, Val(firstmulticol), idx_agg) end -function _combine_with_first(first::Union{NamedTuple, DataFrameRow, AbstractDataFrame}, - f::Base.Callable, gd::GroupedDataFrame, +function _combine_with_first(first::Ref{Any}, + f::Ref{Any}, gd::GroupedDataFrame, incols::Union{Nothing, AbstractVector, Tuple, NamedTuple}, - firstmulticol::Val, idx_agg::Union{Nothing, AbstractVector{<:Integer}}) + firstmulticol::Val, idx_agg::Vector{Int}) + @assert only(first) isa Union{NamedTuple, DataFrameRow, AbstractDataFrame} + @assert only(f) isa Base.Callable + first = only(first) extrude = false if first isa AbstractDataFrame @@ -45,7 +48,7 @@ function _combine_with_first(first::Union{NamedTuple, DataFrameRow, AbstractData throw(ArgumentError("mixing single values and vectors in a named tuple is not allowed")) end end - idx = isnothing(idx_agg) ? Vector{Int}(undef, n) : idx_agg + idx = idx_agg === NOTHING_IDX_AGG ? Vector{Int}(undef, n) : idx_agg local initialcols let eltys=eltys, n=n # Workaround for julia#15276 initialcols = ntuple(i -> Tables.allocatecolumn(eltys[i], n), _ncol(first)) @@ -54,11 +57,11 @@ function _combine_with_first(first::Union{NamedTuple, DataFrameRow, AbstractData if !extrude && first isa Union{AbstractDataFrame, NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}} outcols, finalcolnames = _combine_tables_with_first!(first, initialcols, idx, 1, 1, - f, gd, incols, targetcolnames, + only(f), gd, incols, targetcolnames, firstmulticol) else outcols, finalcolnames = _combine_rows_with_first!(first, initialcols, - f, gd, incols, targetcolnames, + only(f), gd, incols, targetcolnames, firstmulticol) end return idx, outcols, collect(Symbol, finalcolnames) diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl index 1f7df2c3b1..bd48c0b485 100644 --- a/src/groupeddataframe/splitapplycombine.jl +++ b/src/groupeddataframe/splitapplycombine.jl @@ -4,6 +4,10 @@ # in combine are considered to produce multiple columns in the resulting data frame const MULTI_COLS_TYPE = Union{AbstractDataFrame, NamedTuple, DataFrameRow, AbstractMatrix} +# use a constant Vector{Int} as a sentinel to signal that idx_agg has not been computed yet +# to avoid excessive specialization +const NOTHING_IDX_AGG = Int[] + function gen_groups(idx::Vector{Int}) groups = zeros(Int, length(idx)) groups[1] = 1 @@ -175,7 +179,7 @@ function fillfirst!(condf, outcol::AbstractVector, incol::AbstractVector, outcol end -function _agg2idx_map_helper(idx::AbstractVector, idx_agg::AbstractVector) +function _agg2idx_map_helper(idx::Vector{Int}, idx_agg::Vector{Int}) agg2idx_map = fill(-1, length(idx)) aggj = 1 @inbounds for (j, idxj) in enumerate(idx) @@ -202,7 +206,7 @@ function _combine_process_agg(@nospecialize(cs_i::Pair{Int, <:Pair{<:Function, S gd::GroupedDataFrame, seen_cols::Dict{Symbol, Tuple{Bool, Int}}, trans_res::Vector{TransformationResult}, - idx_agg::AbstractVector{Int}) + idx_agg::Vector{Int}) @assert isagg(cs_i, gd) @assert !optional_i out_col_name = last(last(cs_i)) @@ -271,16 +275,16 @@ function _combine_process_callable(@nospecialize(cs_i::Base.Callable), gd::GroupedDataFrame, seen_cols::Dict{Symbol, Tuple{Bool, Int}}, trans_res::Vector{TransformationResult}, - idx_agg::Ref{Union{Nothing, Vector{Int}}}) + idx_agg::Ref{Vector{Int}}) firstres = length(gd) > 0 ? cs_i(gd[1]) : cs_i(similar(parentdf, 0)) idx, outcols, nms = _combine_multicol(firstres, cs_i, gd, nothing) if !(firstres isa Union{AbstractVecOrMat, AbstractDataFrame, NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}}) lock(gd.lazy_lock) do - # if idx_agg was not computed yet it is nothing + # if idx_agg was not computed yet it is NOTHING_IDX_AGG # in this case if we are not passed a vector compute it. - if isnothing(idx_agg[]) + if idx_agg[] === NOTHING_IDX_AGG idx_agg[] = Vector{Int}(undef, length(gd)) fillfirst!(nothing, idx_agg[], 1:length(gd.groups), gd) end @@ -317,7 +321,7 @@ function _combine_process_pair_symbol(optional_i::Bool, gd::GroupedDataFrame, seen_cols::Dict{Symbol, Tuple{Bool, Int}}, trans_res::Vector{TransformationResult}, - idx_agg::Ref{Union{Nothing, Vector{Int}}}, + idx_agg::Ref{Vector{Int}}, out_col_name::Symbol, firstmulticol::Bool, firstres::Any, @@ -329,7 +333,7 @@ function _combine_process_pair_symbol(optional_i::Bool, # if idx_agg was not computed yet it is nothing # in this case if we are not passed a vector compute it. lock(gd.lazy_lock) do - if !(firstres isa AbstractVector) && isnothing(idx_agg[]) + if !(firstres isa AbstractVector) && idx_agg[] === NOTHING_IDX_AGG idx_agg[] = Vector{Int}(undef, length(gd)) fillfirst!(nothing, idx_agg[], 1:length(gd.groups), gd) end @@ -340,10 +344,10 @@ function _combine_process_pair_symbol(optional_i::Bool, # the last argument passed to _combine_with_first informs it about precomputed # idx. Currently we do it only for single-row return values otherwise we pass - # nothing to signal that idx has to be computed in _combine_with_first - idx, outcols, _ = _combine_with_first(wrap(firstres), fun, gd, incols, + # NOTHING_IDX_AGG to signal that idx has to be computed in _combine_with_first + idx, outcols, _ = _combine_with_first(Ref{Any}(wrap(firstres)), Ref{Any}(fun), gd, incols, Val(firstmulticol), - firstres isa AbstractVector ? nothing : idx_agg[]) + firstres isa AbstractVector ? NOTHING_IDX_AGG : idx_agg[]) @assert length(outcols) == 1 outcol = outcols[1] @@ -370,14 +374,14 @@ function _combine_process_pair_astable(optional_i::Bool, gd::GroupedDataFrame, seen_cols::Dict{Symbol, Tuple{Bool, Int}}, trans_res::Vector{TransformationResult}, - idx_agg::Ref{Union{Nothing, Vector{Int}}}, + idx_agg::Ref{Vector{Int}}, out_col_name::Union{Type{AsTable}, AbstractVector{Symbol}}, firstmulticol::Bool, firstres::Any, @nospecialize(fun::Base.Callable), incols::Union{Tuple, NamedTuple}) if firstres isa AbstractVector - idx, outcol_vec, _ = _combine_with_first(wrap(firstres), fun, gd, incols, + idx, outcol_vec, _ = _combine_with_first(Ref{Any}(wrap(firstres)), Ref{Any}(fun), gd, incols, Val(firstmulticol), nothing) @assert length(outcol_vec) == 1 res = outcol_vec[1] @@ -407,7 +411,7 @@ function _combine_process_pair_astable(optional_i::Bool, lock(gd.lazy_lock) do # if idx_agg was not computed yet it is nothing # in this case if we are not passed a vector compute it. - if isnothing(idx_agg[]) + if idx_agg[] === NOTHING_IDX_AGG idx_agg[] = Vector{Int}(undef, length(gd)) fillfirst!(nothing, idx_agg[], 1:length(gd.groups), gd) end @@ -457,7 +461,7 @@ function _combine_process_pair(@nospecialize(cs_i::Pair), gd::GroupedDataFrame, seen_cols::Dict{Symbol, Tuple{Bool, Int}}, trans_res::Vector{TransformationResult}, - idx_agg::Ref{Union{Nothing, Vector{Int}}}) + idx_agg::Ref{Vector{Int}}) source_cols, (fun, out_col_name) = cs_i if source_cols isa Int @@ -527,7 +531,7 @@ function _combine(gd::GroupedDataFrame, idx_keeprows = nothing end - idx_agg = Ref{Union{Nothing, Vector{Int}}}(nothing) + idx_agg = Ref(NOTHING_IDX_AGG) if length(gd) > 0 && any(x -> isagg(x, gd), cs_norm) # Compute indices of representative rows only once for all AbstractAggregates idx_agg[] = Vector{Int}(undef, length(gd)) @@ -599,11 +603,11 @@ function _combine(gd::GroupedDataFrame, end isempty(trans_res) && return Int[], DataFrame() - # idx_agg === nothing then we have only functions that + # idx_agg[] === NOTHING_IDX_AGG then we have only functions that # returned multiple rows and idx_loc = 1 idx_loc = findfirst(x -> x.col_idx !== idx_agg[], trans_res) if !keeprows && isnothing(idx_loc) - @assert !isnothing(idx_agg[]) + @assert idx_agg[] !== NOTHING_IDX_AGG idx = idx_agg[] else idx = keeprows ? idx_keeprows : trans_res[idx_loc].col_idx From 9754a5504619d0e81f6a3350a25c77402b2b8cc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Tue, 30 Mar 2021 14:45:40 +0200 Subject: [PATCH 03/22] fix missing nothing replacement --- src/groupeddataframe/splitapplycombine.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl index bd48c0b485..8fe2dfaab7 100644 --- a/src/groupeddataframe/splitapplycombine.jl +++ b/src/groupeddataframe/splitapplycombine.jl @@ -382,7 +382,7 @@ function _combine_process_pair_astable(optional_i::Bool, incols::Union{Tuple, NamedTuple}) if firstres isa AbstractVector idx, outcol_vec, _ = _combine_with_first(Ref{Any}(wrap(firstres)), Ref{Any}(fun), gd, incols, - Val(firstmulticol), nothing) + Val(firstmulticol), NOTHING_IDX_AGG) @assert length(outcol_vec) == 1 res = outcol_vec[1] @assert length(res) > 0 From 5635ee603b55e7a59e8e1ebbd991098f40cd2f8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Tue, 30 Mar 2021 16:28:06 +0200 Subject: [PATCH 04/22] some more Ref{Any} cases --- src/groupeddataframe/splitapplycombine.jl | 47 ++++++++++++++++------- 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl index 8fe2dfaab7..9484fab5a7 100644 --- a/src/groupeddataframe/splitapplycombine.jl +++ b/src/groupeddataframe/splitapplycombine.jl @@ -23,10 +23,15 @@ function gen_groups(idx::Vector{Int}) end function _combine_prepare(gd::GroupedDataFrame, - @nospecialize(cs::Union{Pair, Base.Callable, - ColumnIndex, MultiColumnIndex}...); + wcs::Ref{Any}; keepkeys::Bool, ungroup::Bool, copycols::Bool, keeprows::Bool, renamecols::Bool) + cs = only(wcs) + for cei in cs + if !(cei isa Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}) + throw(ArgumentError("Unrecognized transformation specification $cei")) + end + end if !ungroup && !keepkeys throw(ArgumentError("keepkeys=false when ungroup=false is not allowed")) end @@ -200,13 +205,15 @@ struct TransformationResult end # the transformation is an aggregation for which we have the fast path -function _combine_process_agg(@nospecialize(cs_i::Pair{Int, <:Pair{<:Function, Symbol}}), +function _combine_process_agg(wcs_i::Ref{Any}, optional_i::Bool, parentdf::AbstractDataFrame, gd::GroupedDataFrame, seen_cols::Dict{Symbol, Tuple{Bool, Int}}, trans_res::Vector{TransformationResult}, idx_agg::Vector{Int}) + cs_i = only(wcs_i) + @assert csi isa Pair{Int, <:Pair{<:Function, Symbol}} @assert isagg(cs_i, gd) @assert !optional_i out_col_name = last(last(cs_i)) @@ -269,13 +276,15 @@ function _combine_process_noop(cs_i::Pair{<:Union{Int, AbstractVector{Int}}, Pai end # perform a transformation taking SubDataFrame as an input -function _combine_process_callable(@nospecialize(cs_i::Base.Callable), +function _combine_process_callable(wcs_i::Ref{Any}, optional_i::Bool, parentdf::AbstractDataFrame, gd::GroupedDataFrame, seen_cols::Dict{Symbol, Tuple{Bool, Int}}, trans_res::Vector{TransformationResult}, idx_agg::Ref{Vector{Int}}) + cs_i = only(wcs_i) + @assert cs_i isa Base.Callable firstres = length(gd) > 0 ? cs_i(gd[1]) : cs_i(similar(parentdf, 0)) idx, outcols, nms = _combine_multicol(firstres, cs_i, gd, nothing) @@ -325,8 +334,11 @@ function _combine_process_pair_symbol(optional_i::Bool, out_col_name::Symbol, firstmulticol::Bool, firstres::Any, - @nospecialize(fun::Base.Callable), + wfun::Ref{Any}, incols::Union{Tuple, NamedTuple}) + fun = only(wfun) + @assert fun isa Base.Callable + if firstmulticol throw(ArgumentError("a single value or vector result is required (got $(typeof(firstres)))")) end @@ -378,8 +390,10 @@ function _combine_process_pair_astable(optional_i::Bool, out_col_name::Union{Type{AsTable}, AbstractVector{Symbol}}, firstmulticol::Bool, firstres::Any, - @nospecialize(fun::Base.Callable), + wfun::Ref{Any}, incols::Union{Tuple, NamedTuple}) + fun = only(wfun) + @assert fun isa Base.Callable if firstres isa AbstractVector idx, outcol_vec, _ = _combine_with_first(Ref{Any}(wrap(firstres)), Ref{Any}(fun), gd, incols, Val(firstmulticol), NOTHING_IDX_AGG) @@ -455,13 +469,16 @@ end # perform a transformation specified using the Pair notation # cs_i is a Pair that has many possible forms so this function is used to dispatch # to an appropriate more specialized function -function _combine_process_pair(@nospecialize(cs_i::Pair), +function _combine_process_pair(wcs_i::Ref{Any}, optional_i::Bool, parentdf::AbstractDataFrame, gd::GroupedDataFrame, seen_cols::Dict{Symbol, Tuple{Bool, Int}}, trans_res::Vector{TransformationResult}, idx_agg::Ref{Vector{Int}}) + cs_i = only(wcs_i) + @assert cs_i isa Pair + source_cols, (fun, out_col_name) = cs_i if source_cols isa Int @@ -482,11 +499,13 @@ function _combine_process_pair(@nospecialize(cs_i::Pair), if out_col_name isa Symbol return _combine_process_pair_symbol(optional_i, gd, seen_cols, trans_res, idx_agg, - out_col_name, firstmulticol, firstres, fun, incols) + out_col_name, firstmulticol, firstres, + Ref{Any}(fun), incols) end if out_col_name == AsTable || out_col_name isa AbstractVector{Symbol} return _combine_process_pair_astable(optional_i, gd, seen_cols, trans_res, idx_agg, - out_col_name, firstmulticol, firstres, fun, incols) + out_col_name, firstmulticol, firstres, + Ref{Any}(fun), incols) end throw(ArgumentError("unsupported target column name specifier $out_col_name")) end @@ -561,16 +580,16 @@ function _combine(gd::GroupedDataFrame, optional_i = optional_transform[i] tasks[i] = @spawn if length(gd) > 0 && isagg(cs_i, gd) - _combine_process_agg(cs_i, optional_i, parentdf, gd, seen_cols, trans_res, idx_agg[]) + _combine_process_agg(Ref{Any}(cs_i), optional_i, parentdf, gd, seen_cols, trans_res, idx_agg[]) elseif keeprows && cs_i isa Pair && first(last(cs_i)) === identity && !(first(cs_i) isa AsTable) && (last(last(cs_i)) isa Symbol) # this is a fast path used when we pass a column or rename a column in select or transform _combine_process_noop(cs_i, optional_i, parentdf, seen_cols, trans_res, idx_keeprows, copycols) elseif cs_i isa Base.Callable - _combine_process_callable(cs_i, optional_i, parentdf, gd, seen_cols, trans_res, idx_agg) + _combine_process_callable(Ref{Any}(cs_i), optional_i, parentdf, gd, seen_cols, trans_res, idx_agg) else @assert cs_i isa Pair - _combine_process_pair(cs_i, optional_i, parentdf, gd, seen_cols, trans_res, idx_agg) + _combine_process_pair(Ref{Any}(cs_i), optional_i, parentdf, gd, seen_cols, trans_res, idx_agg) end end # Workaround JuliaLang/julia#38931: @@ -684,7 +703,7 @@ combine(f::Pair, gd::GroupedDataFrame; combine(gd::GroupedDataFrame, cs::Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}...; keepkeys::Bool=true, ungroup::Bool=true, renamecols::Bool=true) = - _combine_prepare(gd, cs..., keepkeys=keepkeys, ungroup=ungroup, + _combine_prepare(gd, Ref{Any}(cs), keepkeys=keepkeys, ungroup=ungroup, copycols=true, keeprows=false, renamecols=renamecols) function select(f::Base.Callable, gd::GroupedDataFrame; copycols::Bool=true, @@ -698,7 +717,7 @@ end select(gd::GroupedDataFrame, args...; copycols::Bool=true, keepkeys::Bool=true, ungroup::Bool=true, renamecols::Bool=true) = - _combine_prepare(gd, args..., copycols=copycols, keepkeys=keepkeys, + _combine_prepare(gd, Ref{Any}(args), copycols=copycols, keepkeys=keepkeys, ungroup=ungroup, keeprows=true, renamecols=renamecols) function transform(f::Base.Callable, gd::GroupedDataFrame; copycols::Bool=true, From 0d266b396495bdcc5325cd90ce54defc077195f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Tue, 30 Mar 2021 16:38:07 +0200 Subject: [PATCH 05/22] fix typo --- src/groupeddataframe/splitapplycombine.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl index 9484fab5a7..45fbc6411b 100644 --- a/src/groupeddataframe/splitapplycombine.jl +++ b/src/groupeddataframe/splitapplycombine.jl @@ -213,7 +213,7 @@ function _combine_process_agg(wcs_i::Ref{Any}, trans_res::Vector{TransformationResult}, idx_agg::Vector{Int}) cs_i = only(wcs_i) - @assert csi isa Pair{Int, <:Pair{<:Function, Symbol}} + @assert cs_i isa Pair{Int, <:Pair{<:Function, Symbol}} @assert isagg(cs_i, gd) @assert !optional_i out_col_name = last(last(cs_i)) From fbf900f8fbf143904ef011d40aec3d24d127fd65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Tue, 30 Mar 2021 17:37:34 +0200 Subject: [PATCH 06/22] make combine, select and transform signatures consistent --- src/groupeddataframe/splitapplycombine.jl | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl index 45fbc6411b..87aced7d88 100644 --- a/src/groupeddataframe/splitapplycombine.jl +++ b/src/groupeddataframe/splitapplycombine.jl @@ -28,9 +28,7 @@ function _combine_prepare(gd::GroupedDataFrame, keeprows::Bool, renamecols::Bool) cs = only(wcs) for cei in cs - if !(cei isa Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}) - throw(ArgumentError("Unrecognized transformation specification $cei")) - end + @assert cei isa Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}) end if !ungroup && !keepkeys throw(ArgumentError("keepkeys=false when ungroup=false is not allowed")) @@ -715,8 +713,8 @@ function select(f::Base.Callable, gd::GroupedDataFrame; copycols::Bool=true, end -select(gd::GroupedDataFrame, args...; copycols::Bool=true, keepkeys::Bool=true, - ungroup::Bool=true, renamecols::Bool=true) = +select(gd::GroupedDataFrame, args::Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}...; + copycols::Bool=true, keepkeys::Bool=true, ungroup::Bool=true, renamecols::Bool=true) = _combine_prepare(gd, Ref{Any}(args), copycols=copycols, keepkeys=keepkeys, ungroup=ungroup, keeprows=true, renamecols=renamecols) @@ -728,8 +726,8 @@ function transform(f::Base.Callable, gd::GroupedDataFrame; copycols::Bool=true, return transform(gd, f, copycols=copycols, keepkeys=keepkeys, ungroup=ungroup) end -function transform(gd::GroupedDataFrame, args...; copycols::Bool=true, - keepkeys::Bool=true, ungroup::Bool=true, renamecols::Bool=true) +function transform(gd::GroupedDataFrame, args::Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}...; + copycols::Bool=true, keepkeys::Bool=true, ungroup::Bool=true, renamecols::Bool=true) res = select(gd, :, args..., copycols=copycols, keepkeys=keepkeys, ungroup=ungroup, renamecols=renamecols) # res can be a GroupedDataFrame based on DataFrame or a DataFrame, @@ -745,7 +743,8 @@ function select!(f::Base.Callable, gd::GroupedDataFrame; ungroup::Bool=true, ren return select!(gd, f, ungroup=ungroup) end -function select!(gd::GroupedDataFrame{DataFrame}, args...; +function select!(gd::GroupedDataFrame{DataFrame}, + args::Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}...; ungroup::Bool=true, renamecols::Bool=true) newdf = select(gd, args..., copycols=false, renamecols=renamecols) df = parent(gd) @@ -760,7 +759,8 @@ function transform!(f::Base.Callable, gd::GroupedDataFrame; ungroup::Bool=true, return transform!(gd, f, ungroup=ungroup) end -function transform!(gd::GroupedDataFrame{DataFrame}, args...; +function transform!(gd::GroupedDataFrame{DataFrame}, + args::Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}...; ungroup::Bool=true, renamecols::Bool=true) newdf = select(gd, :, args..., copycols=false, renamecols=renamecols) df = parent(gd) From 782e147ba8f100b379987d52ecb5f6271fff3866 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Tue, 30 Mar 2021 18:05:16 +0200 Subject: [PATCH 07/22] fix typo --- src/groupeddataframe/splitapplycombine.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl index 87aced7d88..3d123b8772 100644 --- a/src/groupeddataframe/splitapplycombine.jl +++ b/src/groupeddataframe/splitapplycombine.jl @@ -28,7 +28,7 @@ function _combine_prepare(gd::GroupedDataFrame, keeprows::Bool, renamecols::Bool) cs = only(wcs) for cei in cs - @assert cei isa Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}) + @assert cei isa Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex} end if !ungroup && !keepkeys throw(ArgumentError("keepkeys=false when ungroup=false is not allowed")) From 3a18c42523b9edd15934a2346bee14eb3df5a3de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Wed, 31 Mar 2021 00:32:48 +0200 Subject: [PATCH 08/22] update subset --- src/abstractdataframe/subset.jl | 43 +++++++++++++---------- src/groupeddataframe/splitapplycombine.jl | 4 +++ 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/src/abstractdataframe/subset.jl b/src/abstractdataframe/subset.jl index 6f0cdfca31..3025138824 100644 --- a/src/abstractdataframe/subset.jl +++ b/src/abstractdataframe/subset.jl @@ -1,11 +1,3 @@ -# subset allows a transformation specification without a target column name or a column - -_process_subset_pair(i::Int, a::ColumnIndex) = a => Symbol(:x, i) -_process_subset_pair(i::Int, @nospecialize(a::Pair{<:Any, <:Base.Callable})) = - first(a) => last(a) => Symbol(:x, i) -_process_subset_pair(i::Int, a) = - throw(ArgumentError("condition specifier $a is not supported by `subset`")) - _and() = throw(ArgumentError("at least one condition must be passed")) _and(x::Bool) = x _and(x::Bool, y::Bool...) = x && _and(y...) @@ -39,12 +31,25 @@ function _and_missing(x::Any...) "but only true, false, or missing are allowed")) end +@nospecialize # Note that _get_subset_conditions will have a large compilation time # if more than 32 conditions are passed as `args`. function _get_subset_conditions(df::Union{AbstractDataFrame, GroupedDataFrame}, - @nospecialize(args), skipmissing::Bool) - conditions = Any[_process_subset_pair(i, a) for (i, a) in enumerate(args)] + wargs::Ref{Any}, skipmissing::Bool) + args = only(wargs) + conditions = Any[] + + # subset allows a transformation specification without a target column name or a column + for (i, a) in enumerate(args) + if a isa ColumnIndex + push!(conditions, a => Symbol(:x, i)) + elseif a isa Pair{<:Any, <:Base.Callable} + push!(conditions, first(a) => last(a) => Symbol(:x, i)) + else + throw(ArgumentError("condition specifier $a is not supported by `subset`")) + end + end isempty(conditions) && throw(ArgumentError("at least one condition must be passed")) @@ -153,16 +158,16 @@ julia> subset(groupby(df, :y), :v => x -> x .> minimum(x)) 2 │ 4 false false missing 12 ``` """ -function subset(df::AbstractDataFrame, @nospecialize(args...); +function subset(df::AbstractDataFrame, args...; skipmissing::Bool=false, view::Bool=false) - row_selector = _get_subset_conditions(df, args, skipmissing) + row_selector = _get_subset_conditions(df, Ref{Any}(args), skipmissing) return view ? Base.view(df, row_selector, :) : df[row_selector, :] end -function subset(gdf::GroupedDataFrame, @nospecialize(args...); +function subset(gdf::GroupedDataFrame, args...; skipmissing::Bool=false, view::Bool=false, ungroup::Bool=true) - row_selector = _get_subset_conditions(gdf, args, skipmissing) + row_selector = _get_subset_conditions(gdf, Ref{Any}(args), skipmissing) df = parent(gdf) res = view ? Base.view(df, row_selector, :) : df[row_selector, :] # TODO: in some cases it might be faster to groupby gdf.groups[row_selector] @@ -268,16 +273,18 @@ julia> df 2 │ 4 false false missing 12 ``` """ -function subset!(df::AbstractDataFrame, @nospecialize(args...); skipmissing::Bool=false) - row_selector = _get_subset_conditions(df, args, skipmissing) +function subset!(df::AbstractDataFrame, args...; skipmissing::Bool=false) + row_selector = _get_subset_conditions(df, Ref{Any}(args), skipmissing) return delete!(df, findall(!, row_selector)) end -function subset!(gdf::GroupedDataFrame, @nospecialize(args...); skipmissing::Bool=false, +function subset!(gdf::GroupedDataFrame, args...; skipmissing::Bool=false, ungroup::Bool=true) - row_selector = _get_subset_conditions(gdf, args, skipmissing) + row_selector = _get_subset_conditions(gdf, Ref{Any}(args), skipmissing) df = parent(gdf) res = delete!(df, findall(!, row_selector)) # TODO: in some cases it might be faster to groupby gdf.groups[row_selector] return ungroup ? res : groupby(res, groupcols(gdf)) end + +@specialize diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl index 3d123b8772..996c82cd0d 100644 --- a/src/groupeddataframe/splitapplycombine.jl +++ b/src/groupeddataframe/splitapplycombine.jl @@ -684,6 +684,8 @@ function _combine(gd::GroupedDataFrame, return idx, DataFrame(outcols, nms, copycols=false) end +@nospecialize + function combine(f::Base.Callable, gd::GroupedDataFrame; keepkeys::Bool=true, ungroup::Bool=true, renamecols::Bool=true) if f isa Colon @@ -768,3 +770,5 @@ function transform!(gd::GroupedDataFrame{DataFrame}, _replace_columns!(df, newdf) return ungroup ? df : gd end + +@specialize From a286ed8c9cbed82cbeb631ecb04bd7b3c47f6023 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Wed, 31 Mar 2021 01:18:18 +0200 Subject: [PATCH 09/22] update data frame selection --- src/abstractdataframe/selection.jl | 89 +++++++++++++++++------------- 1 file changed, 50 insertions(+), 39 deletions(-) diff --git a/src/abstractdataframe/selection.jl b/src/abstractdataframe/selection.jl index cd0b1934be..c0a07f6532 100644 --- a/src/abstractdataframe/selection.jl +++ b/src/abstractdataframe/selection.jl @@ -174,6 +174,8 @@ end # add a method to funname defined in other/utils.jl funname(row::ByRow) = funname(row.fun) +@nospecialize + normalize_selection(idx::AbstractIndex, sel, renamecols::Bool) = try idx[sel] @@ -326,12 +328,15 @@ function normalize_selection(idx::AbstractIndex, return (wanttable ? AsTable(c) : c) => fun => newcol end -_transformation_helper(df::AbstractDataFrame, col_idx::Nothing, fun) = fun(df) -_transformation_helper(df::AbstractDataFrame, col_idx::Int, fun) = fun(df[!, col_idx]) +_transformation_helper(df::AbstractDataFrame, col_idx::Nothing, wfun::Ref{Any}) = + only(wfun)(df) +_transformation_helper(df::AbstractDataFrame, col_idx::Int, wfun::Ref{Any}) = + only(wfun)(df[!, col_idx]) _empty_astable_helper(fun, len) = [fun(NamedTuple()) for _ in 1:len] -function _transformation_helper(df::AbstractDataFrame, col_idx::AsTable, fun) +function _transformation_helper(df::AbstractDataFrame, col_idx::AsTable, wfun::Ref{Any}) + fun = only(wfun) tbl = Tables.columntable(select(df, col_idx.cols, copycols=false)) if isempty(tbl) && fun isa ByRow return _empty_astable_helper(fun.fun, nrow(df)) @@ -342,7 +347,8 @@ end _empty_selector_helper(fun, len) = [fun() for _ in 1:len] -function _transformation_helper(df::AbstractDataFrame, col_idx::AbstractVector{Int}, fun) +function _transformation_helper(df::AbstractDataFrame, col_idx::AbstractVector{Int}, wfun::Ref{Any}) + fun = only(wfun) if isempty(col_idx) && fun isa ByRow return _empty_selector_helper(fun.fun, nrow(df)) else @@ -351,8 +357,8 @@ function _transformation_helper(df::AbstractDataFrame, col_idx::AbstractVector{I end end -function _gen_colnames(@nospecialize(res), newname::Union{AbstractVector{Symbol}, - Type{AsTable}, Nothing}) +function _gen_colnames(res, newname::Union{AbstractVector{Symbol}, + Type{AsTable}, Nothing}) if res isa AbstractMatrix colnames = gennames(size(res, 2)) else @@ -412,7 +418,8 @@ end function _fix_existing_columns_for_vector(newdf::DataFrame, df::AbstractDataFrame, allow_resizing_newdf::Ref{Bool}, lr::Int, - @nospecialize(fun)) + wfun::Ref{Any}) + fun = only(wfun) # allow shortening to 0 rows if allow_resizing_newdf[] && nrow(newdf) == 1 newdfcols = _columns(newdf) @@ -434,8 +441,9 @@ end function _add_col_check_copy(newdf::DataFrame, df::AbstractDataFrame, col_idx::Union{Nothing, Int, AbstractVector{Int}, AsTable}, - copycols::Bool, @nospecialize(fun), + copycols::Bool, wfun::Ref{Any}, newname::Symbol, v::AbstractVector) + fun = only(wfun) cdf = eachcol(df) vpar = parent(v) parent_cols = col_idx isa AsTable ? col_idx.cols : something(col_idx, 1:ncol(df)) @@ -448,24 +456,24 @@ end function _add_multicol_res(res::AbstractDataFrame, newdf::DataFrame, df::AbstractDataFrame, colnames::AbstractVector{Symbol}, - allow_resizing_newdf::Ref{Bool}, @nospecialize(fun), + allow_resizing_newdf::Ref{Bool}, wfun::Ref{Any}, col_idx::Union{Nothing, Int, AbstractVector{Int}, AsTable}, copycols::Bool, newname::Union{Nothing, Type{AsTable}, AbstractVector{Symbol}}) lr = nrow(res) - _fix_existing_columns_for_vector(newdf, df, allow_resizing_newdf, lr, fun) + _fix_existing_columns_for_vector(newdf, df, allow_resizing_newdf, lr, wfun) @assert length(colnames) == ncol(res) for (newname, v) in zip(colnames, eachcol(res)) - _add_col_check_copy(newdf, df, col_idx, copycols, fun, newname, v) + _add_col_check_copy(newdf, df, col_idx, copycols, wfun, newname, v) end end function _add_multicol_res(res::AbstractMatrix, newdf::DataFrame, df::AbstractDataFrame, colnames::AbstractVector{Symbol}, - allow_resizing_newdf::Ref{Bool}, @nospecialize(fun), + allow_resizing_newdf::Ref{Bool}, wfun::Ref{Any}, col_idx::Union{Nothing, Int, AbstractVector{Int}, AsTable}, copycols::Bool, newname::Union{Nothing, Type{AsTable}, AbstractVector{Symbol}}) lr = size(res, 1) - _fix_existing_columns_for_vector(newdf, df, allow_resizing_newdf, lr, fun) + _fix_existing_columns_for_vector(newdf, df, allow_resizing_newdf, lr, wfun) @assert length(colnames) == size(res, 2) for (i, newname) in enumerate(colnames) newdf[!, newname] = res[:, i] @@ -475,20 +483,20 @@ end function _add_multicol_res(res::NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}, newdf::DataFrame, df::AbstractDataFrame, colnames::AbstractVector{Symbol}, - allow_resizing_newdf::Ref{Bool}, @nospecialize(fun), + allow_resizing_newdf::Ref{Bool}, wfun::Ref{Any}, col_idx::Union{Nothing, Int, AbstractVector{Int}, AsTable}, copycols::Bool, newname::Union{Nothing, Type{AsTable}, AbstractVector{Symbol}}) lr = length(res[1]) - _fix_existing_columns_for_vector(newdf, df, allow_resizing_newdf, lr, fun) + _fix_existing_columns_for_vector(newdf, df, allow_resizing_newdf, lr, wfun) @assert length(colnames) == length(res) for (newname, v) in zip(colnames, res) - _add_col_check_copy(newdf, df, col_idx, copycols, fun, newname, v) + _add_col_check_copy(newdf, df, col_idx, copycols, wfun, newname, v) end end function _add_multicol_res(res::NamedTuple, newdf::DataFrame, df::AbstractDataFrame, colnames::AbstractVector{Symbol}, - allow_resizing_newdf::Ref{Bool}, @nospecialize(fun), + allow_resizing_newdf::Ref{Bool}, wfun::Ref{Any}, col_idx::Union{Nothing, Int, AbstractVector{Int}, AsTable}, copycols::Bool, newname::Union{Nothing, Type{AsTable}, AbstractVector{Symbol}}) if any(v -> v isa AbstractVector, res) @@ -500,32 +508,33 @@ end function _add_multicol_res(res::DataFrameRow, newdf::DataFrame, df::AbstractDataFrame, colnames::AbstractVector{Symbol}, - allow_resizing_newdf::Ref{Bool}, @nospecialize(fun), + allow_resizing_newdf::Ref{Bool}, wfun::Ref{Any}, col_idx::Union{Nothing, Int, AbstractVector{Int}, AsTable}, copycols::Bool, newname::Union{Nothing, Type{AsTable}, AbstractVector{Symbol}}) _insert_row_multicolumn(newdf, df, allow_resizing_newdf, colnames, res) end -function select_transform!(@nospecialize(nc::Union{Base.Callable, Pair{<:Union{Int, AbstractVector{Int}, AsTable}, - <:Pair{<:Base.Callable, - <:Union{Symbol, - AbstractVector{Symbol}, - DataType}}}}), - df::AbstractDataFrame, newdf::DataFrame, +function select_transform!(wnc::Ref{Any}, df::AbstractDataFrame, newdf::DataFrame, transformed_cols::Set{Symbol}, copycols::Bool, allow_resizing_newdf::Ref{Bool}) + nc = only(wnc) + @assert nc isa Union{Base.Callable, + Pair{<:Union{Int, AbstractVector{Int}, AsTable}, + <:Pair{<:Base.Callable, <:Union{Symbol, AbstractVector{Symbol}, DataType}}}} if nc isa Base.Callable col_idx, fun, newname = nothing, nc, nothing else col_idx, (fun, newname) = nc end + wfun = Ref{Any}(fun) + if newname isa DataType newname === AsTable || throw(ArgumentError("Only DataType supported as target is AsTable")) end # It is allowed to request a tranformation operation into a newname column # only once. This is ensured by the logic related to transformed_cols dictionaly # in _manipulate, therefore in select_transform! such a duplicate should not happen - res = _transformation_helper(df, col_idx, fun) + res = _transformation_helper(df, col_idx, Ref{Any}(fun)) if newname === AsTable || newname isa AbstractVector{Symbol} res = _expand_to_table(res) @@ -546,7 +555,7 @@ function select_transform!(@nospecialize(nc::Union{Base.Callable, Pair{<:Union{I union!(transformed_cols, colnames) @assert startlen + length(colnames) == length(transformed_cols) end - _add_multicol_res(res, newdf, df, colnames, allow_resizing_newdf, fun, + _add_multicol_res(res, newdf, df, colnames, allow_resizing_newdf, wfun, col_idx, copycols, newname) elseif res isa AbstractVector if newname === nothing @@ -558,8 +567,8 @@ function select_transform!(@nospecialize(nc::Union{Base.Callable, Pair{<:Union{I push!(transformed_cols, newname) end lr = length(res) - _fix_existing_columns_for_vector(newdf, df, allow_resizing_newdf, lr, fun) - _add_col_check_copy(newdf, df, col_idx, copycols, fun, newname, res) + _fix_existing_columns_for_vector(newdf, df, allow_resizing_newdf, lr, wfun) + _add_col_check_copy(newdf, df, col_idx, copycols, wfun, newname, res) else if newname === nothing newname = :x1 @@ -609,7 +618,7 @@ See [`select`](@ref) for examples. ``` """ -select!(df::DataFrame, @nospecialize(args...); renamecols::Bool=true) = +select!(df::DataFrame, args...; renamecols::Bool=true) = _replace_columns!(df, select(df, args..., copycols=false, renamecols=renamecols)) function select!(arg::Base.Callable, df::AbstractDataFrame; renamecols::Bool=true) @@ -639,7 +648,7 @@ $TRANSFORMATION_COMMON_RULES See [`select`](@ref) for examples. """ -transform!(df::DataFrame, @nospecialize(args...); renamecols::Bool=true) = +transform!(df::DataFrame, args...; renamecols::Bool=true) = select!(df, :, args..., renamecols=renamecols) function transform!(arg::Base.Callable, df::AbstractDataFrame; renamecols::Bool=true) @@ -853,7 +862,7 @@ julia> select(gd, :, AsTable(Not(:a)) => sum, renamecols=false) ``` """ -select(df::AbstractDataFrame, @nospecialize(args...); copycols::Bool=true, renamecols::Bool=true) = +select(df::AbstractDataFrame, args...; copycols::Bool=true, renamecols::Bool=true) = manipulate(df, args..., copycols=copycols, keeprows=true, renamecols=renamecols) function select(arg::Base.Callable, df::AbstractDataFrame; renamecols::Bool=true) @@ -919,7 +928,7 @@ ERROR: ArgumentError: column :x in returned data frame is not equal to grouping See [`select`](@ref) for more examples. """ -transform(df::AbstractDataFrame, @nospecialize(args...); copycols::Bool=true, renamecols::Bool=true) = +transform(df::AbstractDataFrame, args...; copycols::Bool=true, renamecols::Bool=true) = select(df, :, args..., copycols=copycols, renamecols=renamecols) function transform(arg::Base.Callable, df::AbstractDataFrame; renamecols::Bool=true) @@ -1172,7 +1181,7 @@ julia> combine(gd, :, AsTable(Not(:a)) => sum, renamecols=false) 8 │ 4 1 8 9 ``` """ -combine(df::AbstractDataFrame, @nospecialize(args...); renamecols::Bool=true) = +combine(df::AbstractDataFrame, args...; renamecols::Bool=true) = manipulate(df, args..., copycols=true, keeprows=false, renamecols=renamecols) function combine(arg::Base.Callable, df::AbstractDataFrame; renamecols::Bool=true) @@ -1206,7 +1215,7 @@ manipulate(df::DataFrame, c::ColumnIndex; copycols::Bool, keeprows::Bool, renamecols::Bool) = manipulate(df, [c], copycols=copycols, keeprows=keeprows, renamecols=renamecols) -function manipulate(df::DataFrame, @nospecialize(cs...); copycols::Bool, keeprows::Bool, renamecols::Bool) +function manipulate(df::DataFrame, cs...; copycols::Bool, keeprows::Bool, renamecols::Bool) cs_vec = [] for v in cs if v isa AbstractVecOrMat{<:Pair} @@ -1215,11 +1224,11 @@ function manipulate(df::DataFrame, @nospecialize(cs...); copycols::Bool, keeprow push!(cs_vec, v) end end - return _manipulate(df, [normalize_selection(index(df), c, renamecols) for c in cs_vec], + return _manipulate(df, Any[normalize_selection(index(df), c, renamecols) for c in cs_vec], copycols, keeprows) end -function _manipulate(df::AbstractDataFrame, @nospecialize(normalized_cs), copycols::Bool, keeprows::Bool) +function _manipulate(df::AbstractDataFrame, normalized_cs::Vector{Any}, copycols::Bool, keeprows::Bool) @assert !(df isa SubDataFrame && copycols==false) newdf = DataFrame() # the role of transformed_cols is the following @@ -1287,7 +1296,7 @@ function _manipulate(df::AbstractDataFrame, @nospecialize(normalized_cs), copyco end end else - select_transform!(nc, df, newdf, transformed_cols, copycols, + select_transform!(Ref{Any}(nc), df, newdf, transformed_cols, copycols, allow_resizing_newdf) end end @@ -1308,7 +1317,7 @@ function manipulate(dfv::SubDataFrame, args::MultiColumnIndex; end end -function manipulate(dfv::SubDataFrame, @nospecialize(args...); copycols::Bool, keeprows::Bool, +function manipulate(dfv::SubDataFrame, args...; copycols::Bool, keeprows::Bool, renamecols::Bool) if copycols cs_vec = [] @@ -1319,7 +1328,7 @@ function manipulate(dfv::SubDataFrame, @nospecialize(args...); copycols::Bool, k push!(cs_vec, v) end end - return _manipulate(dfv, [normalize_selection(index(dfv), c, renamecols) for c in cs_vec], + return _manipulate(dfv, Any[normalize_selection(index(dfv), c, renamecols) for c in cs_vec], true, keeprows) else # we do not support transformations here @@ -1348,3 +1357,5 @@ function manipulate(dfv::SubDataFrame, @nospecialize(args...); copycols::Bool, k return view(dfv, :, Cols(newinds...)) end end + +@specialize From 3fe9afbc702c412daba71e181cfb20a4472a6e58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Wed, 31 Mar 2021 14:38:56 +0200 Subject: [PATCH 10/22] keep simple selections fast --- src/abstractdataframe/selection.jl | 66 +++++++++++++++--------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/src/abstractdataframe/selection.jl b/src/abstractdataframe/selection.jl index c0a07f6532..eac61e38c1 100644 --- a/src/abstractdataframe/selection.jl +++ b/src/abstractdataframe/selection.jl @@ -1196,25 +1196,6 @@ combine(f::Pair, gd::AbstractDataFrame; renamecols::Bool=true) = "You can pass a `Pair` as the second argument of the transformation. If you want the return " * "value to be processed as having multiple columns add `=> AsTable` suffix to the pair.")) -manipulate(df::DataFrame, args::AbstractVector{Int}; copycols::Bool, keeprows::Bool, - renamecols::Bool) = - DataFrame(_columns(df)[args], Index(_names(df)[args]), copycols=copycols) - -function manipulate(df::DataFrame, c::MultiColumnIndex; copycols::Bool, keeprows::Bool, - renamecols::Bool) - if c isa AbstractVector{<:Pair} - return manipulate(df, c..., copycols=copycols, keeprows=keeprows, - renamecols=renamecols) - else - return manipulate(df, index(df)[c], copycols=copycols, keeprows=keeprows, - renamecols=renamecols) - end -end - -manipulate(df::DataFrame, c::ColumnIndex; copycols::Bool, keeprows::Bool, - renamecols::Bool) = - manipulate(df, [c], copycols=copycols, keeprows=keeprows, renamecols=renamecols) - function manipulate(df::DataFrame, cs...; copycols::Bool, keeprows::Bool, renamecols::Bool) cs_vec = [] for v in cs @@ -1303,20 +1284,6 @@ function _manipulate(df::AbstractDataFrame, normalized_cs::Vector{Any}, copycols return newdf end -manipulate(dfv::SubDataFrame, ind::ColumnIndex; copycols::Bool, keeprows::Bool, - renamecols::Bool) = - manipulate(dfv, [ind], copycols=copycols, keeprows=keeprows, renamecols=renamecols) - -function manipulate(dfv::SubDataFrame, args::MultiColumnIndex; - copycols::Bool, keeprows::Bool, renamecols::Bool) - if args isa AbstractVector{<:Pair} - return manipulate(dfv, args..., copycols=copycols, keeprows=keeprows, - renamecols=renamecols) - else - return copycols ? dfv[:, args] : view(dfv, :, args) - end -end - function manipulate(dfv::SubDataFrame, args...; copycols::Bool, keeprows::Bool, renamecols::Bool) if copycols @@ -1359,3 +1326,36 @@ function manipulate(dfv::SubDataFrame, args...; copycols::Bool, keeprows::Bool, end @specialize + +manipulate(df::DataFrame, args::AbstractVector{Int}; copycols::Bool, keeprows::Bool, + renamecols::Bool) = + DataFrame(_columns(df)[args], Index(_names(df)[args]), copycols=copycols) + +function manipulate(df::DataFrame, c::MultiColumnIndex; copycols::Bool, keeprows::Bool, + renamecols::Bool) + if c isa AbstractVector{<:Pair} + return manipulate(df, c..., copycols=copycols, keeprows=keeprows, + renamecols=renamecols) + else + return manipulate(df, index(df)[c], copycols=copycols, keeprows=keeprows, + renamecols=renamecols) + end +end + +function manipulate(dfv::SubDataFrame, args::MultiColumnIndex; + copycols::Bool, keeprows::Bool, renamecols::Bool) + if args isa AbstractVector{<:Pair} + return manipulate(dfv, args..., copycols=copycols, keeprows=keeprows, + renamecols=renamecols) + else + return copycols ? dfv[:, args] : view(dfv, :, args) + end +end + +manipulate(df::DataFrame, c::ColumnIndex; copycols::Bool, keeprows::Bool, + renamecols::Bool) = + manipulate(df, Int[index(df)[c]], copycols=copycols, keeprows=keeprows, renamecols=renamecols) + +manipulate(dfv::SubDataFrame, c::ColumnIndex; copycols::Bool, keeprows::Bool, + renamecols::Bool) = + manipulate(dfv, Int[index(df)[c]], copycols=copycols, keeprows=keeprows, renamecols=renamecols) From 83dfae5a7aff295d4959d56eef4590ab0e606ab6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Wed, 31 Mar 2021 21:56:20 +0200 Subject: [PATCH 11/22] fix copy-paste error --- src/abstractdataframe/selection.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/abstractdataframe/selection.jl b/src/abstractdataframe/selection.jl index eac61e38c1..9573dfa56c 100644 --- a/src/abstractdataframe/selection.jl +++ b/src/abstractdataframe/selection.jl @@ -1358,4 +1358,4 @@ manipulate(df::DataFrame, c::ColumnIndex; copycols::Bool, keeprows::Bool, manipulate(dfv::SubDataFrame, c::ColumnIndex; copycols::Bool, keeprows::Bool, renamecols::Bool) = - manipulate(dfv, Int[index(df)[c]], copycols=copycols, keeprows=keeprows, renamecols=renamecols) + manipulate(dfv, Int[index(dfv)[c]], copycols=copycols, keeprows=keeprows, renamecols=renamecols) From fac78ffa04182916156ad521776ea4defa24b72c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Thu, 1 Apr 2021 00:46:59 +0200 Subject: [PATCH 12/22] do not use broadcasting in ByRow --- src/abstractdataframe/selection.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/abstractdataframe/selection.jl b/src/abstractdataframe/selection.jl index 9573dfa56c..5f2de5e088 100644 --- a/src/abstractdataframe/selection.jl +++ b/src/abstractdataframe/selection.jl @@ -168,8 +168,8 @@ struct ByRow{T} <: Function fun::T end -(f::ByRow)(cols::AbstractVector...) = f.fun.(cols...) -(f::ByRow)(table::NamedTuple) = f.fun.(Tables.namedtupleiterator(table)) +(f::ByRow)(cols::AbstractVector...) = map(f.fun, cols...) +(f::ByRow)(table::NamedTuple) = [f.fun(nt) for nt in Tables.namedtupleiterator(table)] # add a method to funname defined in other/utils.jl funname(row::ByRow) = funname(row.fun) From 937ad7e68b3b2fd8aee9db173a1466b2af4157b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Thu, 1 Apr 2021 15:30:55 +0200 Subject: [PATCH 13/22] nospecialize selectively --- src/abstractdataframe/selection.jl | 60 +++++++++++------------ src/abstractdataframe/subset.jl | 12 ++--- src/groupeddataframe/complextransforms.jl | 19 ++++--- src/groupeddataframe/splitapplycombine.jl | 57 +++++++++++---------- 4 files changed, 72 insertions(+), 76 deletions(-) diff --git a/src/abstractdataframe/selection.jl b/src/abstractdataframe/selection.jl index 5f2de5e088..0cf19c2791 100644 --- a/src/abstractdataframe/selection.jl +++ b/src/abstractdataframe/selection.jl @@ -174,9 +174,7 @@ end # add a method to funname defined in other/utils.jl funname(row::ByRow) = funname(row.fun) -@nospecialize - -normalize_selection(idx::AbstractIndex, sel, renamecols::Bool) = +normalize_selection(idx::AbstractIndex, @nospecialize(sel), renamecols::Bool) = try idx[sel] catch e @@ -187,7 +185,7 @@ normalize_selection(idx::AbstractIndex, sel, renamecols::Bool) = end end -normalize_selection(idx::AbstractIndex, sel::Base.Callable, renamecols::Bool) = sel +normalize_selection(idx::AbstractIndex, @nospecialize(sel::Base.Callable), renamecols::Bool) = sel normalize_selection(idx::AbstractIndex, sel::Colon, renamecols::Bool) = idx[:] normalize_selection(idx::AbstractIndex, sel::Pair{typeof(nrow), Symbol}, @@ -215,20 +213,20 @@ normalize_selection(idx::AbstractIndex, sel::Pair{<:ColumnIndex, <:AbstractStrin normalize_selection(idx, first(sel) => Symbol(last(sel)), renamecols::Bool) function normalize_selection(idx::AbstractIndex, - sel::Pair{<:ColumnIndex, - <:Pair{<:Base.Callable, - <:Union{Symbol, AbstractString}}}, + @nospecialize(sel::Pair{<:ColumnIndex, + <:Pair{<:Base.Callable, + <:Union{Symbol, AbstractString}}}), renamecols::Bool) src, (fun, dst) = sel return idx[src] => fun => Symbol(dst) end function normalize_selection(idx::AbstractIndex, - sel::Pair{<:Any, - <:Pair{<:Base.Callable, - <:Union{Symbol, AbstractString, DataType, - AbstractVector{Symbol}, - AbstractVector{<:AbstractString}}}}, + @nospecialize(sel::Pair{<:Any, + <:Pair{<:Base.Callable, + <:Union{Symbol, AbstractString, DataType, + AbstractVector{Symbol}, + AbstractVector{<:AbstractString}}}}), renamecols::Bool) lls = last(last(sel)) if lls isa DataType @@ -270,7 +268,7 @@ function normalize_selection(idx::AbstractIndex, end function normalize_selection(idx::AbstractIndex, - sel::Pair{<:ColumnIndex, <:Base.Callable}, renamecols::Bool) + @nospecialize(sel::Pair{<:ColumnIndex, <:Base.Callable}), renamecols::Bool) c = idx[first(sel)] fun = last(sel) if renamecols @@ -282,7 +280,7 @@ function normalize_selection(idx::AbstractIndex, end function normalize_selection(idx::AbstractIndex, - sel::Pair{<:Any, <:Base.Callable}, renamecols::Bool) + @nospecialize(sel::Pair{<:Any, <:Base.Callable}), renamecols::Bool) if first(sel) isa AsTable rawc = first(sel).cols wanttable = true @@ -401,7 +399,7 @@ end function _insert_row_multicolumn(newdf::DataFrame, df::AbstractDataFrame, allow_resizing_newdf::Ref{Bool}, colnames::AbstractVector{Symbol}, - res::Union{NamedTuple, DataFrameRow}) + @nospecialize(res::Union{NamedTuple, DataFrameRow})) if ncol(newdf) == 0 # if allow_resizing_newdf[] is false we know this is select or transform rows = allow_resizing_newdf[] ? 1 : nrow(df) @@ -480,7 +478,7 @@ function _add_multicol_res(res::AbstractMatrix, newdf::DataFrame, df::AbstractDa end end -function _add_multicol_res(res::NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}, +function _add_multicol_res(@nospecialize(res::NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}), newdf::DataFrame, df::AbstractDataFrame, colnames::AbstractVector{Symbol}, allow_resizing_newdf::Ref{Bool}, wfun::Ref{Any}, @@ -494,7 +492,7 @@ function _add_multicol_res(res::NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector} end end -function _add_multicol_res(res::NamedTuple, newdf::DataFrame, df::AbstractDataFrame, +function _add_multicol_res(@nospecialize(res::NamedTuple), newdf::DataFrame, df::AbstractDataFrame, colnames::AbstractVector{Symbol}, allow_resizing_newdf::Ref{Bool}, wfun::Ref{Any}, col_idx::Union{Nothing, Int, AbstractVector{Int}, AsTable}, @@ -618,10 +616,10 @@ See [`select`](@ref) for examples. ``` """ -select!(df::DataFrame, args...; renamecols::Bool=true) = +select!(df::DataFrame, @nospecialize(args...); renamecols::Bool=true) = _replace_columns!(df, select(df, args..., copycols=false, renamecols=renamecols)) -function select!(arg::Base.Callable, df::AbstractDataFrame; renamecols::Bool=true) +function select!(@nospecialize(arg::Base.Callable), df::AbstractDataFrame; renamecols::Bool=true) if arg isa Colon throw(ArgumentError("First argument must be a transformation if the second argument is a data frame")) end @@ -648,10 +646,10 @@ $TRANSFORMATION_COMMON_RULES See [`select`](@ref) for examples. """ -transform!(df::DataFrame, args...; renamecols::Bool=true) = +transform!(df::DataFrame, @nospecialize(args...); renamecols::Bool=true) = select!(df, :, args..., renamecols=renamecols) -function transform!(arg::Base.Callable, df::AbstractDataFrame; renamecols::Bool=true) +function transform!(@nospecialize(arg::Base.Callable), df::AbstractDataFrame; renamecols::Bool=true) if arg isa Colon throw(ArgumentError("First argument must be a transformation if the second argument is a data frame")) end @@ -862,10 +860,10 @@ julia> select(gd, :, AsTable(Not(:a)) => sum, renamecols=false) ``` """ -select(df::AbstractDataFrame, args...; copycols::Bool=true, renamecols::Bool=true) = +select(df::AbstractDataFrame, @nospecialize(args...); copycols::Bool=true, renamecols::Bool=true) = manipulate(df, args..., copycols=copycols, keeprows=true, renamecols=renamecols) -function select(arg::Base.Callable, df::AbstractDataFrame; renamecols::Bool=true) +function select(@nospecialize(arg::Base.Callable), df::AbstractDataFrame; renamecols::Bool=true) if arg isa Colon throw(ArgumentError("First argument must be a transformation if the second argument is a data frame")) end @@ -928,10 +926,10 @@ ERROR: ArgumentError: column :x in returned data frame is not equal to grouping See [`select`](@ref) for more examples. """ -transform(df::AbstractDataFrame, args...; copycols::Bool=true, renamecols::Bool=true) = +transform(df::AbstractDataFrame, @nospecialize(args...); copycols::Bool=true, renamecols::Bool=true) = select(df, :, args..., copycols=copycols, renamecols=renamecols) -function transform(arg::Base.Callable, df::AbstractDataFrame; renamecols::Bool=true) +function transform(@nospecialize(arg::Base.Callable), df::AbstractDataFrame; renamecols::Bool=true) if arg isa Colon throw(ArgumentError("First argument to must be a transformation if the second argument is a data frame")) end @@ -1181,22 +1179,22 @@ julia> combine(gd, :, AsTable(Not(:a)) => sum, renamecols=false) 8 │ 4 1 8 9 ``` """ -combine(df::AbstractDataFrame, args...; renamecols::Bool=true) = +combine(df::AbstractDataFrame, @nospecialize(args...); renamecols::Bool=true) = manipulate(df, args..., copycols=true, keeprows=false, renamecols=renamecols) -function combine(arg::Base.Callable, df::AbstractDataFrame; renamecols::Bool=true) +function combine(@nospecialize(arg::Base.Callable), df::AbstractDataFrame; renamecols::Bool=true) if arg isa Colon throw(ArgumentError("First argument to select! must be a transformation if the second argument is a data frame")) end return combine(df, arg) end -combine(f::Pair, gd::AbstractDataFrame; renamecols::Bool=true) = +combine(@nospecialize(f::Pair), gd::AbstractDataFrame; renamecols::Bool=true) = throw(ArgumentError("First argument must be a transformation if the second argument is a data frame. " * "You can pass a `Pair` as the second argument of the transformation. If you want the return " * "value to be processed as having multiple columns add `=> AsTable` suffix to the pair.")) -function manipulate(df::DataFrame, cs...; copycols::Bool, keeprows::Bool, renamecols::Bool) +function manipulate(df::DataFrame, @nospecialize(cs...); copycols::Bool, keeprows::Bool, renamecols::Bool) cs_vec = [] for v in cs if v isa AbstractVecOrMat{<:Pair} @@ -1284,7 +1282,7 @@ function _manipulate(df::AbstractDataFrame, normalized_cs::Vector{Any}, copycols return newdf end -function manipulate(dfv::SubDataFrame, args...; copycols::Bool, keeprows::Bool, +function manipulate(dfv::SubDataFrame, @nospecialize(args...); copycols::Bool, keeprows::Bool, renamecols::Bool) if copycols cs_vec = [] @@ -1325,8 +1323,6 @@ function manipulate(dfv::SubDataFrame, args...; copycols::Bool, keeprows::Bool, end end -@specialize - manipulate(df::DataFrame, args::AbstractVector{Int}; copycols::Bool, keeprows::Bool, renamecols::Bool) = DataFrame(_columns(df)[args], Index(_names(df)[args]), copycols=copycols) diff --git a/src/abstractdataframe/subset.jl b/src/abstractdataframe/subset.jl index 3025138824..5d15345c46 100644 --- a/src/abstractdataframe/subset.jl +++ b/src/abstractdataframe/subset.jl @@ -31,8 +31,6 @@ function _and_missing(x::Any...) "but only true, false, or missing are allowed")) end -@nospecialize - # Note that _get_subset_conditions will have a large compilation time # if more than 32 conditions are passed as `args`. function _get_subset_conditions(df::Union{AbstractDataFrame, GroupedDataFrame}, @@ -158,13 +156,13 @@ julia> subset(groupby(df, :y), :v => x -> x .> minimum(x)) 2 │ 4 false false missing 12 ``` """ -function subset(df::AbstractDataFrame, args...; +function subset(df::AbstractDataFrame, @nospecialize(args...); skipmissing::Bool=false, view::Bool=false) row_selector = _get_subset_conditions(df, Ref{Any}(args), skipmissing) return view ? Base.view(df, row_selector, :) : df[row_selector, :] end -function subset(gdf::GroupedDataFrame, args...; +function subset(gdf::GroupedDataFrame, @nospecialize(args...); skipmissing::Bool=false, view::Bool=false, ungroup::Bool=true) row_selector = _get_subset_conditions(gdf, Ref{Any}(args), skipmissing) @@ -273,12 +271,12 @@ julia> df 2 │ 4 false false missing 12 ``` """ -function subset!(df::AbstractDataFrame, args...; skipmissing::Bool=false) +function subset!(df::AbstractDataFrame, @nospecialize(args...); skipmissing::Bool=false) row_selector = _get_subset_conditions(df, Ref{Any}(args), skipmissing) return delete!(df, findall(!, row_selector)) end -function subset!(gdf::GroupedDataFrame, args...; skipmissing::Bool=false, +function subset!(gdf::GroupedDataFrame, @nospecialize(args...); skipmissing::Bool=false, ungroup::Bool=true) row_selector = _get_subset_conditions(gdf, Ref{Any}(args), skipmissing) df = parent(gdf) @@ -286,5 +284,3 @@ function subset!(gdf::GroupedDataFrame, args...; skipmissing::Bool=false, # TODO: in some cases it might be faster to groupby gdf.groups[row_selector] return ungroup ? res : groupby(res, groupcols(gdf)) end - -@specialize diff --git a/src/groupeddataframe/complextransforms.jl b/src/groupeddataframe/complextransforms.jl index 72b2928685..0c20dc9edb 100644 --- a/src/groupeddataframe/complextransforms.jl +++ b/src/groupeddataframe/complextransforms.jl @@ -4,8 +4,11 @@ _nrow(x::NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}) = _ncol(df::AbstractDataFrame) = ncol(df) _ncol(x::Union{NamedTuple, DataFrameRow}) = length(x) -function _combine_multicol(firstres, fun::Base.Callable, gd::GroupedDataFrame, - incols::Union{Nothing, AbstractVector, Tuple, NamedTuple}) +function _combine_multicol(wfirstres::Ref{Any}, wfun::Ref{Any}, gd::GroupedDataFrame, + wincols::Ref{Any}) + firstres = only(wfirstres) + @assert only(wfun) isa Base.Callable + @assert only(wincols) isa Union{Nothing, AbstractVector, Tuple, NamedTuple} firstmulticol = firstres isa MULTI_COLS_TYPE if !(firstres isa Union{AbstractVecOrMat, AbstractDataFrame, NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}}) @@ -14,17 +17,19 @@ function _combine_multicol(firstres, fun::Base.Callable, gd::GroupedDataFrame, else idx_agg = NOTHING_IDX_AGG end - return _combine_with_first(Ref{Any}(wrap(firstres)), Ref{Any}(fun), gd, incols, + return _combine_with_first(Ref{Any}(wrap(firstres)), wfun, gd, wincols, Val(firstmulticol), idx_agg) end -function _combine_with_first(first::Ref{Any}, +function _combine_with_first(wfirst::Ref{Any}, f::Ref{Any}, gd::GroupedDataFrame, - incols::Union{Nothing, AbstractVector, Tuple, NamedTuple}, + wincols::Ref{Any}, firstmulticol::Val, idx_agg::Vector{Int}) - @assert only(first) isa Union{NamedTuple, DataFrameRow, AbstractDataFrame} @assert only(f) isa Base.Callable - first = only(first) + incols = only(wincols) + @assert incols isa Union{Nothing, AbstractVector, Tuple, NamedTuple} + first = only(wfirst) + @assert first isa Union{NamedTuple, DataFrameRow, AbstractDataFrame} extrude = false if first isa AbstractDataFrame diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl index 996c82cd0d..d56e1e07d8 100644 --- a/src/groupeddataframe/splitapplycombine.jl +++ b/src/groupeddataframe/splitapplycombine.jl @@ -284,7 +284,7 @@ function _combine_process_callable(wcs_i::Ref{Any}, cs_i = only(wcs_i) @assert cs_i isa Base.Callable firstres = length(gd) > 0 ? cs_i(gd[1]) : cs_i(similar(parentdf, 0)) - idx, outcols, nms = _combine_multicol(firstres, cs_i, gd, nothing) + idx, outcols, nms = _combine_multicol(Ref{Any}(firstres), wcs_i, gd, Ref{Any}(nothing)) if !(firstres isa Union{AbstractVecOrMat, AbstractDataFrame, NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}}) @@ -331,11 +331,12 @@ function _combine_process_pair_symbol(optional_i::Bool, idx_agg::Ref{Vector{Int}}, out_col_name::Symbol, firstmulticol::Bool, - firstres::Any, + wfirstres::Ref{Any}, wfun::Ref{Any}, - incols::Union{Tuple, NamedTuple}) - fun = only(wfun) - @assert fun isa Base.Callable + wincols::Ref{Any}) + firstres = only(wfirstres) + @assert only(wfun) isa Base.Callable + @assert only(wincols) isa Union{Tuple, NamedTuple} if firstmulticol throw(ArgumentError("a single value or vector result is required (got $(typeof(firstres)))")) @@ -355,7 +356,7 @@ function _combine_process_pair_symbol(optional_i::Bool, # the last argument passed to _combine_with_first informs it about precomputed # idx. Currently we do it only for single-row return values otherwise we pass # NOTHING_IDX_AGG to signal that idx has to be computed in _combine_with_first - idx, outcols, _ = _combine_with_first(Ref{Any}(wrap(firstres)), Ref{Any}(fun), gd, incols, + idx, outcols, _ = _combine_with_first(Ref{Any}(wrap(firstres)), wfun, gd, wincols, Val(firstmulticol), firstres isa AbstractVector ? NOTHING_IDX_AGG : idx_agg[]) @assert length(outcols) == 1 @@ -387,13 +388,15 @@ function _combine_process_pair_astable(optional_i::Bool, idx_agg::Ref{Vector{Int}}, out_col_name::Union{Type{AsTable}, AbstractVector{Symbol}}, firstmulticol::Bool, - firstres::Any, + wfirstres::Ref{Any}, wfun::Ref{Any}, - incols::Union{Tuple, NamedTuple}) + wincols::Ref{Any}) + firstres = only(wfirstres) fun = only(wfun) @assert fun isa Base.Callable + @assert only(wincols) isa Union{Tuple, NamedTuple} if firstres isa AbstractVector - idx, outcol_vec, _ = _combine_with_first(Ref{Any}(wrap(firstres)), Ref{Any}(fun), gd, incols, + idx, outcol_vec, _ = _combine_with_first(Ref{Any}(wrap(firstres)), wfun, gd, wincols, Val(firstmulticol), NOTHING_IDX_AGG) @assert length(outcol_vec) == 1 res = outcol_vec[1] @@ -416,7 +419,7 @@ function _combine_process_pair_astable(optional_i::Bool, oldfun = fun fun = (x...) -> Tables.columntable(oldfun(x...)) end - idx, outcols, nms = _combine_multicol(firstres, fun, gd, incols) + idx, outcols, nms = _combine_multicol(Ref{Any}(firstres), Ref{Any}(fun), gd, wincols) if !(firstres isa Union{AbstractVecOrMat, AbstractDataFrame, NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}}) @@ -497,13 +500,13 @@ function _combine_process_pair(wcs_i::Ref{Any}, if out_col_name isa Symbol return _combine_process_pair_symbol(optional_i, gd, seen_cols, trans_res, idx_agg, - out_col_name, firstmulticol, firstres, - Ref{Any}(fun), incols) + out_col_name, firstmulticol, Ref{Any}(firstres), + Ref{Any}(fun), Ref{Any}(incols)) end if out_col_name == AsTable || out_col_name isa AbstractVector{Symbol} return _combine_process_pair_astable(optional_i, gd, seen_cols, trans_res, idx_agg, - out_col_name, firstmulticol, firstres, - Ref{Any}(fun), incols) + out_col_name, firstmulticol, Ref{Any}(firstres), + Ref{Any}(fun), Ref{Any}(incols)) end throw(ArgumentError("unsupported target column name specifier $out_col_name")) end @@ -684,9 +687,7 @@ function _combine(gd::GroupedDataFrame, return idx, DataFrame(outcols, nms, copycols=false) end -@nospecialize - -function combine(f::Base.Callable, gd::GroupedDataFrame; +function combine(@nospecialize(f::Base.Callable), gd::GroupedDataFrame; keepkeys::Bool=true, ungroup::Bool=true, renamecols::Bool=true) if f isa Colon throw(ArgumentError("First argument must be a transformation if the second argument is a GroupedDataFrame")) @@ -694,19 +695,19 @@ function combine(f::Base.Callable, gd::GroupedDataFrame; return combine(gd, f, keepkeys=keepkeys, ungroup=ungroup, renamecols=renamecols) end -combine(f::Pair, gd::GroupedDataFrame; +combine(@nospecialize(f::Pair), gd::GroupedDataFrame; keepkeys::Bool=true, ungroup::Bool=true, renamecols::Bool=true) = throw(ArgumentError("First argument must be a transformation if the second argument is a GroupedDataFrame. " * "You can pass a `Pair` as the second argument of the transformation. If you want the return " * "value to be processed as having multiple columns add `=> AsTable` suffix to the pair.")) combine(gd::GroupedDataFrame, - cs::Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}...; + @nospecialize(cs::Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}...); keepkeys::Bool=true, ungroup::Bool=true, renamecols::Bool=true) = _combine_prepare(gd, Ref{Any}(cs), keepkeys=keepkeys, ungroup=ungroup, copycols=true, keeprows=false, renamecols=renamecols) -function select(f::Base.Callable, gd::GroupedDataFrame; copycols::Bool=true, +function select(@nospecialize(f::Base.Callable), gd::GroupedDataFrame; copycols::Bool=true, keepkeys::Bool=true, ungroup::Bool=true, renamecols::Bool=true) if f isa Colon throw(ArgumentError("First argument must be a transformation if the second argument is a grouped data frame")) @@ -715,12 +716,12 @@ function select(f::Base.Callable, gd::GroupedDataFrame; copycols::Bool=true, end -select(gd::GroupedDataFrame, args::Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}...; +select(gd::GroupedDataFrame, @nospecialize(args::Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}...); copycols::Bool=true, keepkeys::Bool=true, ungroup::Bool=true, renamecols::Bool=true) = _combine_prepare(gd, Ref{Any}(args), copycols=copycols, keepkeys=keepkeys, ungroup=ungroup, keeprows=true, renamecols=renamecols) -function transform(f::Base.Callable, gd::GroupedDataFrame; copycols::Bool=true, +function transform(@nospecialize(f::Base.Callable), gd::GroupedDataFrame; copycols::Bool=true, keepkeys::Bool=true, ungroup::Bool=true, renamecols::Bool=true) if f isa Colon throw(ArgumentError("First argument must be a transformation if the second argument is a grouped data frame")) @@ -728,7 +729,7 @@ function transform(f::Base.Callable, gd::GroupedDataFrame; copycols::Bool=true, return transform(gd, f, copycols=copycols, keepkeys=keepkeys, ungroup=ungroup) end -function transform(gd::GroupedDataFrame, args::Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}...; +function transform(gd::GroupedDataFrame, @nospecialize(args::Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}...); copycols::Bool=true, keepkeys::Bool=true, ungroup::Bool=true, renamecols::Bool=true) res = select(gd, :, args..., copycols=copycols, keepkeys=keepkeys, ungroup=ungroup, renamecols=renamecols) @@ -738,7 +739,7 @@ function transform(gd::GroupedDataFrame, args::Union{Pair, Base.Callable, Column return res end -function select!(f::Base.Callable, gd::GroupedDataFrame; ungroup::Bool=true, renamecols::Bool=true) +function select!(@nospecialize(f::Base.Callable), gd::GroupedDataFrame; ungroup::Bool=true, renamecols::Bool=true) if f isa Colon throw(ArgumentError("First argument must be a transformation if the second argument is a grouped data frame")) end @@ -746,7 +747,7 @@ function select!(f::Base.Callable, gd::GroupedDataFrame; ungroup::Bool=true, ren end function select!(gd::GroupedDataFrame{DataFrame}, - args::Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}...; + @nospecialize(args::Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}...); ungroup::Bool=true, renamecols::Bool=true) newdf = select(gd, args..., copycols=false, renamecols=renamecols) df = parent(gd) @@ -754,7 +755,7 @@ function select!(gd::GroupedDataFrame{DataFrame}, return ungroup ? df : gd end -function transform!(f::Base.Callable, gd::GroupedDataFrame; ungroup::Bool=true, renamecols::Bool=true) +function transform!(@nospecialize(f::Base.Callable), gd::GroupedDataFrame; ungroup::Bool=true, renamecols::Bool=true) if f isa Colon throw(ArgumentError("First argument must be a transformation if the second argument is a grouped data frame")) end @@ -762,7 +763,7 @@ function transform!(f::Base.Callable, gd::GroupedDataFrame; ungroup::Bool=true, end function transform!(gd::GroupedDataFrame{DataFrame}, - args::Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}...; + @nospecialize(args::Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}...); ungroup::Bool=true, renamecols::Bool=true) newdf = select(gd, :, args..., copycols=false, renamecols=renamecols) df = parent(gd) @@ -770,5 +771,3 @@ function transform!(gd::GroupedDataFrame{DataFrame}, _replace_columns!(df, newdf) return ungroup ? df : gd end - -@specialize From 67366c5bc5438cb4c6350e68dbab99b477f5907d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Fri, 2 Apr 2021 11:58:52 +0200 Subject: [PATCH 14/22] Apply suggestions from code review Co-authored-by: Milan Bouchet-Valat --- src/abstractdataframe/selection.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/abstractdataframe/selection.jl b/src/abstractdataframe/selection.jl index 0cf19c2791..7e3d1c1c15 100644 --- a/src/abstractdataframe/selection.jl +++ b/src/abstractdataframe/selection.jl @@ -1339,7 +1339,7 @@ function manipulate(df::DataFrame, c::MultiColumnIndex; copycols::Bool, keeprows end function manipulate(dfv::SubDataFrame, args::MultiColumnIndex; - copycols::Bool, keeprows::Bool, renamecols::Bool) + copycols::Bool, keeprows::Bool, renamecols::Bool) if args isa AbstractVector{<:Pair} return manipulate(dfv, args..., copycols=copycols, keeprows=keeprows, renamecols=renamecols) From 5ccb53887227a6c0316d7d68c4115c6f233b8ec8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Fri, 2 Apr 2021 16:52:43 +0200 Subject: [PATCH 15/22] unwrap Ref{Any} in function signature --- src/abstractdataframe/selection.jl | 23 +++++++++-------------- src/abstractdataframe/subset.jl | 11 ++++++++--- src/groupeddataframe/complextransforms.jl | 9 +++------ src/groupeddataframe/splitapplycombine.jl | 15 +++++---------- 4 files changed, 25 insertions(+), 33 deletions(-) diff --git a/src/abstractdataframe/selection.jl b/src/abstractdataframe/selection.jl index 0cf19c2791..d1ecbe29fa 100644 --- a/src/abstractdataframe/selection.jl +++ b/src/abstractdataframe/selection.jl @@ -326,15 +326,14 @@ function normalize_selection(idx::AbstractIndex, return (wanttable ? AsTable(c) : c) => fun => newcol end -_transformation_helper(df::AbstractDataFrame, col_idx::Nothing, wfun::Ref{Any}) = - only(wfun)(df) -_transformation_helper(df::AbstractDataFrame, col_idx::Int, wfun::Ref{Any}) = - only(wfun)(df[!, col_idx]) +_transformation_helper(df::AbstractDataFrame, col_idx::Nothing, (fun,)::Ref{Any}) = + fun(df) +_transformation_helper(df::AbstractDataFrame, col_idx::Int, (fun,)::Ref{Any}) = + fun(df[!, col_idx]) _empty_astable_helper(fun, len) = [fun(NamedTuple()) for _ in 1:len] -function _transformation_helper(df::AbstractDataFrame, col_idx::AsTable, wfun::Ref{Any}) - fun = only(wfun) +function _transformation_helper(df::AbstractDataFrame, col_idx::AsTable, (fun,)::Ref{Any}) tbl = Tables.columntable(select(df, col_idx.cols, copycols=false)) if isempty(tbl) && fun isa ByRow return _empty_astable_helper(fun.fun, nrow(df)) @@ -345,8 +344,7 @@ end _empty_selector_helper(fun, len) = [fun() for _ in 1:len] -function _transformation_helper(df::AbstractDataFrame, col_idx::AbstractVector{Int}, wfun::Ref{Any}) - fun = only(wfun) +function _transformation_helper(df::AbstractDataFrame, col_idx::AbstractVector{Int}, (fun,)::Ref{Any}) if isempty(col_idx) && fun isa ByRow return _empty_selector_helper(fun.fun, nrow(df)) else @@ -416,8 +414,7 @@ end function _fix_existing_columns_for_vector(newdf::DataFrame, df::AbstractDataFrame, allow_resizing_newdf::Ref{Bool}, lr::Int, - wfun::Ref{Any}) - fun = only(wfun) + (fun,)::Ref{Any}) # allow shortening to 0 rows if allow_resizing_newdf[] && nrow(newdf) == 1 newdfcols = _columns(newdf) @@ -439,9 +436,8 @@ end function _add_col_check_copy(newdf::DataFrame, df::AbstractDataFrame, col_idx::Union{Nothing, Int, AbstractVector{Int}, AsTable}, - copycols::Bool, wfun::Ref{Any}, + copycols::Bool, (fun,)::Ref{Any}, newname::Symbol, v::AbstractVector) - fun = only(wfun) cdf = eachcol(df) vpar = parent(v) parent_cols = col_idx isa AsTable ? col_idx.cols : something(col_idx, 1:ncol(df)) @@ -512,10 +508,9 @@ function _add_multicol_res(res::DataFrameRow, newdf::DataFrame, df::AbstractData _insert_row_multicolumn(newdf, df, allow_resizing_newdf, colnames, res) end -function select_transform!(wnc::Ref{Any}, df::AbstractDataFrame, newdf::DataFrame, +function select_transform!((nc,)::Ref{Any}, df::AbstractDataFrame, newdf::DataFrame, transformed_cols::Set{Symbol}, copycols::Bool, allow_resizing_newdf::Ref{Bool}) - nc = only(wnc) @assert nc isa Union{Base.Callable, Pair{<:Union{Int, AbstractVector{Int}, AsTable}, <:Pair{<:Base.Callable, <:Union{Symbol, AbstractVector{Symbol}, DataType}}}} diff --git a/src/abstractdataframe/subset.jl b/src/abstractdataframe/subset.jl index 5d15345c46..93ec471a0c 100644 --- a/src/abstractdataframe/subset.jl +++ b/src/abstractdataframe/subset.jl @@ -34,9 +34,14 @@ end # Note that _get_subset_conditions will have a large compilation time # if more than 32 conditions are passed as `args`. function _get_subset_conditions(df::Union{AbstractDataFrame, GroupedDataFrame}, - wargs::Ref{Any}, skipmissing::Bool) - args = only(wargs) - conditions = Any[] + (args,)::Ref{Any}, skipmissing::Bool) + conditions = Any[if a isa ColumnIndex + a => Symbol(:x, i)) + elseif a isa Pair{<:Any, <:Base.Callable} + first(a) => last(a) => Symbol(:x, i) + else + throw(ArgumentError("condition specifier $a is not supported by `subset`")) + end for (i, a) in enumerate(args)] # subset allows a transformation specification without a target column name or a column for (i, a) in enumerate(args) diff --git a/src/groupeddataframe/complextransforms.jl b/src/groupeddataframe/complextransforms.jl index 0c20dc9edb..924837ea01 100644 --- a/src/groupeddataframe/complextransforms.jl +++ b/src/groupeddataframe/complextransforms.jl @@ -4,9 +4,8 @@ _nrow(x::NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}) = _ncol(df::AbstractDataFrame) = ncol(df) _ncol(x::Union{NamedTuple, DataFrameRow}) = length(x) -function _combine_multicol(wfirstres::Ref{Any}, wfun::Ref{Any}, gd::GroupedDataFrame, +function _combine_multicol((firstres,)::Ref{Any}, wfun::Ref{Any}, gd::GroupedDataFrame, wincols::Ref{Any}) - firstres = only(wfirstres) @assert only(wfun) isa Base.Callable @assert only(wincols) isa Union{Nothing, AbstractVector, Tuple, NamedTuple} firstmulticol = firstres isa MULTI_COLS_TYPE @@ -21,14 +20,12 @@ function _combine_multicol(wfirstres::Ref{Any}, wfun::Ref{Any}, gd::GroupedDataF Val(firstmulticol), idx_agg) end -function _combine_with_first(wfirst::Ref{Any}, +function _combine_with_first((first,)::Ref{Any}, f::Ref{Any}, gd::GroupedDataFrame, - wincols::Ref{Any}, + (incols,)::Ref{Any}, firstmulticol::Val, idx_agg::Vector{Int}) @assert only(f) isa Base.Callable - incols = only(wincols) @assert incols isa Union{Nothing, AbstractVector, Tuple, NamedTuple} - first = only(wfirst) @assert first isa Union{NamedTuple, DataFrameRow, AbstractDataFrame} extrude = false diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl index d56e1e07d8..84c2cfdcab 100644 --- a/src/groupeddataframe/splitapplycombine.jl +++ b/src/groupeddataframe/splitapplycombine.jl @@ -23,10 +23,9 @@ function gen_groups(idx::Vector{Int}) end function _combine_prepare(gd::GroupedDataFrame, - wcs::Ref{Any}; + (cs,)::Ref{Any}; keepkeys::Bool, ungroup::Bool, copycols::Bool, keeprows::Bool, renamecols::Bool) - cs = only(wcs) for cei in cs @assert cei isa Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex} end @@ -203,14 +202,13 @@ struct TransformationResult end # the transformation is an aggregation for which we have the fast path -function _combine_process_agg(wcs_i::Ref{Any}, +function _combine_process_agg((cs_i,)::Ref{Any}, optional_i::Bool, parentdf::AbstractDataFrame, gd::GroupedDataFrame, seen_cols::Dict{Symbol, Tuple{Bool, Int}}, trans_res::Vector{TransformationResult}, idx_agg::Vector{Int}) - cs_i = only(wcs_i) @assert cs_i isa Pair{Int, <:Pair{<:Function, Symbol}} @assert isagg(cs_i, gd) @assert !optional_i @@ -331,10 +329,9 @@ function _combine_process_pair_symbol(optional_i::Bool, idx_agg::Ref{Vector{Int}}, out_col_name::Symbol, firstmulticol::Bool, - wfirstres::Ref{Any}, + (firstres,)::Ref{Any}, wfun::Ref{Any}, wincols::Ref{Any}) - firstres = only(wfirstres) @assert only(wfun) isa Base.Callable @assert only(wincols) isa Union{Tuple, NamedTuple} @@ -388,10 +385,9 @@ function _combine_process_pair_astable(optional_i::Bool, idx_agg::Ref{Vector{Int}}, out_col_name::Union{Type{AsTable}, AbstractVector{Symbol}}, firstmulticol::Bool, - wfirstres::Ref{Any}, + (firstres,)::Ref{Any}, wfun::Ref{Any}, wincols::Ref{Any}) - firstres = only(wfirstres) fun = only(wfun) @assert fun isa Base.Callable @assert only(wincols) isa Union{Tuple, NamedTuple} @@ -470,14 +466,13 @@ end # perform a transformation specified using the Pair notation # cs_i is a Pair that has many possible forms so this function is used to dispatch # to an appropriate more specialized function -function _combine_process_pair(wcs_i::Ref{Any}, +function _combine_process_pair((cs_i,)::Ref{Any}, optional_i::Bool, parentdf::AbstractDataFrame, gd::GroupedDataFrame, seen_cols::Dict{Symbol, Tuple{Bool, Int}}, trans_res::Vector{TransformationResult}, idx_agg::Ref{Vector{Int}}) - cs_i = only(wcs_i) @assert cs_i isa Pair source_cols, (fun, out_col_name) = cs_i From 0a34b84d1aaa40543fcb5366c81b70966008f8e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Fri, 2 Apr 2021 17:19:39 +0200 Subject: [PATCH 16/22] fix typo --- src/abstractdataframe/subset.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/abstractdataframe/subset.jl b/src/abstractdataframe/subset.jl index 93ec471a0c..3201628aa4 100644 --- a/src/abstractdataframe/subset.jl +++ b/src/abstractdataframe/subset.jl @@ -36,7 +36,7 @@ end function _get_subset_conditions(df::Union{AbstractDataFrame, GroupedDataFrame}, (args,)::Ref{Any}, skipmissing::Bool) conditions = Any[if a isa ColumnIndex - a => Symbol(:x, i)) + a => Symbol(:x, i) elseif a isa Pair{<:Any, <:Base.Callable} first(a) => last(a) => Symbol(:x, i) else From 5aa0bd98939b37f933f867fca83ca4d0b1c4ec1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Fri, 2 Apr 2021 19:03:50 +0200 Subject: [PATCH 17/22] remove unneeded code --- src/abstractdataframe/subset.jl | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/src/abstractdataframe/subset.jl b/src/abstractdataframe/subset.jl index 3201628aa4..f50cf7636f 100644 --- a/src/abstractdataframe/subset.jl +++ b/src/abstractdataframe/subset.jl @@ -35,6 +35,7 @@ end # if more than 32 conditions are passed as `args`. function _get_subset_conditions(df::Union{AbstractDataFrame, GroupedDataFrame}, (args,)::Ref{Any}, skipmissing::Bool) + # subset allows a transformation specification without a target column name or a column conditions = Any[if a isa ColumnIndex a => Symbol(:x, i) elseif a isa Pair{<:Any, <:Base.Callable} @@ -42,18 +43,6 @@ function _get_subset_conditions(df::Union{AbstractDataFrame, GroupedDataFrame}, else throw(ArgumentError("condition specifier $a is not supported by `subset`")) end for (i, a) in enumerate(args)] - - # subset allows a transformation specification without a target column name or a column - for (i, a) in enumerate(args) - if a isa ColumnIndex - push!(conditions, a => Symbol(:x, i)) - elseif a isa Pair{<:Any, <:Base.Callable} - push!(conditions, first(a) => last(a) => Symbol(:x, i)) - else - throw(ArgumentError("condition specifier $a is not supported by `subset`")) - end - end - isempty(conditions) && throw(ArgumentError("at least one condition must be passed")) if df isa AbstractDataFrame From 198cd8f18788d78a7865ddfde9856a42f5d94e2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Fri, 2 Apr 2021 22:56:50 +0200 Subject: [PATCH 18/22] inline expand_to_table --- src/abstractdataframe/selection.jl | 40 ++++++++++++++---------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/src/abstractdataframe/selection.jl b/src/abstractdataframe/selection.jl index 6b260244b9..88a4ae4238 100644 --- a/src/abstractdataframe/selection.jl +++ b/src/abstractdataframe/selection.jl @@ -374,27 +374,6 @@ function _gen_colnames(res, newname::Union{AbstractVector{Symbol}, return colnames isa Vector{Symbol} ? colnames : collect(Symbol, colnames) end -_expand_to_table(res) = Tables.columntable(res) -_expand_to_table(res::Union{AbstractDataFrame, NamedTuple, DataFrameRow, AbstractMatrix}) = res - -function _expand_to_table(res::AbstractVector) - isempty(res) && return Tables.columntable(res) - kp1 = keys(res[1]) - prepend = all(x -> x isa Integer, kp1) - if !(prepend || all(x -> x isa Symbol, kp1) || all(x -> x isa AbstractString, kp1)) - throw(ArgumentError("keys of the returned elements must be " * - "`Symbol`s, strings or integers")) - end - if any(x -> !isequal(keys(x), kp1), res) - throw(ArgumentError("keys of the returned elements must be identical")) - end - newres = DataFrame() - for n in kp1 - newres[!, prepend ? Symbol("x", n) : Symbol(n)] = [x[n] for x in res] - end - return newres -end - function _insert_row_multicolumn(newdf::DataFrame, df::AbstractDataFrame, allow_resizing_newdf::Ref{Bool}, colnames::AbstractVector{Symbol}, @nospecialize(res::Union{NamedTuple, DataFrameRow})) @@ -530,7 +509,24 @@ function select_transform!((nc,)::Ref{Any}, df::AbstractDataFrame, newdf::DataFr res = _transformation_helper(df, col_idx, Ref{Any}(fun)) if newname === AsTable || newname isa AbstractVector{Symbol} - res = _expand_to_table(res) + if res isa AbstractVector && !isempty(res) + kp1 = keys(res[1]) + prepend = all(x -> x isa Integer, kp1) + if !(prepend || all(x -> x isa Symbol, kp1) || all(x -> x isa AbstractString, kp1)) + throw(ArgumentError("keys of the returned elements must be " * + "`Symbol`s, strings or integers")) + end + if any(x -> !isequal(keys(x), kp1), res) + throw(ArgumentError("keys of the returned elements must be identical")) + end + newres = DataFrame() + for n in kp1 + newres[!, prepend ? Symbol("x", n) : Symbol(n)] = [x[n] for x in res] + end + res = newres + elseif !(res isa Union{AbstractDataFrame, NamedTuple, DataFrameRow, AbstractMatrix}) + res = Tables.columntable(res) + end end if res isa Union{AbstractDataFrame, NamedTuple, DataFrameRow, AbstractMatrix} From 195feff2fdf8c831bd831affce6daccced844ae4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Fri, 2 Apr 2021 23:55:45 +0200 Subject: [PATCH 19/22] remove unused method --- src/other/precompile.jl | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/src/other/precompile.jl b/src/other/precompile.jl index 27b2609edf..02c7912955 100644 --- a/src/other/precompile.jl +++ b/src/other/precompile.jl @@ -424,7 +424,6 @@ function precompile(all=false) Base.precompile(Tuple{typeof(DataFrames._combine_with_first),NamedTuple{(:x1,),Tuple{SubArray{SubArray{Int,1,Array{Int,1},Tuple{Array{Int,1}},false},0,Array{SubArray{Int,1,Array{Int,1},Tuple{Array{Int,1}},false},1},Tuple{Int},true}}},Function,GroupedDataFrame{DataFrame},Tuple{Array{Int,1}},Val{false},Array{Int,1}}) Base.precompile(Tuple{typeof(getindex),DataFrame,Colon,Cols{Tuple{Symbol,Symbol}}}) Base.precompile(Tuple{typeof(DataFrames._combine_process_pair_astable),Bool,GroupedDataFrame{DataFrame},Dict{Symbol,Tuple{Bool,Int}},Array{DataFrames.TransformationResult,1},Nothing,Type{AsTable},Bool,NamedTuple{(:p, :q),Tuple{SubArray{Int,1,Array{Int,1},Tuple{Array{Int,1}},false},SubArray{Int,1,Array{Int,1},Tuple{Array{Int,1}},false}}},Union{Function, Type},Tuple{Array{Int,1},Array{Int,1}}}) - Base.precompile(Tuple{typeof(DataFrames._expand_to_table),Array{DataFrameRow{DataFrame,DataFrames.Index},1}}) Base.precompile(Tuple{typeof(DataFrames._combine_with_first),NamedTuple{(:x1,),Tuple{Int}},Function,GroupedDataFrame{DataFrame},Tuple{Array{Int,1}},Val{false},Array{Int,1}}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:id, :x1, :x2),Tuple{Array{Int,1},Array{Int,1},Array{Int,1}}},Type{DataFrame}}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.manipulate)),NamedTuple{(:copycols, :keeprows, :renamecols),Tuple{Bool,Bool,Bool}},typeof(DataFrames.manipulate),SubDataFrame{DataFrame,DataFrames.SubIndex{DataFrames.Index,UnitRange{Int},UnitRange{Int}},UnitRange{Int}},Array{typeof(nrow),1}}) @@ -688,7 +687,6 @@ function precompile(all=false) Base.precompile(fbody, (Symbol,Symbol,Bool,Type{T} where T,typeof(stack),DataFrame,Array{Int,1},InvertedIndex{Array{Int,1}},)) end end - Base.precompile(Tuple{typeof(DataFrames._expand_to_table),Array{Tuple{Int,String},1}}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.select)),NamedTuple{(:copycols, :renamecols),Tuple{Bool,Bool}},typeof(select),DataFrame,Function,Pair{Symbol,typeof(+)},Vararg{Any,N} where N}) Base.precompile(Tuple{typeof(DataFrames._combine_multicol),Int,Function,GroupedDataFrame{DataFrame},Nothing}) Base.precompile(Tuple{typeof(DataFrames._combine_process_pair_symbol),Bool,GroupedDataFrame{DataFrame},Dict{Symbol,Tuple{Bool,Int}},Array{DataFrames.TransformationResult,1},Nothing,Symbol,Bool,Irrational{:π},Union{Function, Type},Tuple{Array{Union{Irrational{:π}, Missing},1}}}) @@ -943,7 +941,6 @@ function precompile(all=false) Base.precompile(fbody, (Bool,Bool,Bool,typeof(DataFrames.manipulate),DataFrame,Any,Vararg{Any,N} where N,)) end end - Base.precompile(Tuple{typeof(DataFrames._expand_to_table),Array{NamedTuple{(:s, :t),Tuple{Int,Int}},1}}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames._show)),NamedTuple{(:rowid,),Tuple{Int}},typeof(DataFrames._show),Base.TTY,MIME{Symbol("text/html")},DataFrame}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.stack)),NamedTuple{(:variable_eltype,),Tuple{UnionAll}},typeof(stack),DataFrame,Array{Symbol,1}}) Base.precompile(Tuple{typeof(DataFrames._combine_rows_with_first!),NamedTuple{(:x1,),Tuple{Missing}},Tuple{Array{Missing,1}},Int,Int,Function,GroupedDataFrame{DataFrame},Tuple{Array{Union{Missing, UnitRange{Int}},1}},Tuple{Symbol},Val{false}}) @@ -1138,7 +1135,6 @@ function precompile(all=false) Base.precompile(Tuple{typeof(DataFrames._combine_multicol),NamedTuple{(:c_identity,),Tuple{SubArray{Int,1,Array{Int,1},Tuple{Array{Int,1}},false}}},Function,GroupedDataFrame{DataFrame},Nothing}) Base.precompile(Tuple{typeof(DataFrames._combine_tables_with_first!),NamedTuple{(:x1,),Tuple{Array{Int,1}}},Tuple{Array{Int,1}},Array{Int,1},Int,Int,Function,GroupedDataFrame{DataFrame},NTuple{4,Array{Int,1}},Tuple{Symbol},Val{false}}) Base.precompile(Tuple{typeof(getindex),DataFrame,Colon,All{Tuple{String,String}}}) - Base.precompile(Tuple{typeof(DataFrames._expand_to_table),Array{Array{Float64,2},1}}) Base.precompile(Tuple{typeof(iterate),Array{Pair{AsTable,Pair{ByRow{typeof(identity)},Symbol}},1}}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:a, :c, :d),Tuple{Array{Union{Missing, String},1},Array{Union{Missing, String},1},Array{Union{Missing, Int},1}}},Type{DataFrame}}) Base.precompile(Tuple{typeof(DataFrames._combine_with_first),NamedTuple{(:x1,),Tuple{Int}},Function,GroupedDataFrame{DataFrame},Tuple{Array{Irrational{:π},1}},Val{false},Array{Int,1}}) @@ -1172,7 +1168,6 @@ function precompile(all=false) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.manipulate)),NamedTuple{(:copycols, :keeprows, :renamecols),Tuple{Bool,Bool,Bool}},typeof(DataFrames.manipulate),SubDataFrame{DataFrame,DataFrames.SubIndex{DataFrames.Index,UnitRange{Int},UnitRange{Int}},UnitRange{Int}},InvertedIndex{Regex}}) Base.precompile(Tuple{typeof(sort),DataFrame,InvertedIndex{Array{Any,1}}}) Base.precompile(Tuple{typeof(DataFrames._copyto_helper!),SubArray{Float64,1,Array{Float64,1},Tuple{Base.OneTo{Int}},true},Base.Broadcast.Broadcasted{DataFrames.DataFrameStyle,Tuple{Base.OneTo{Int},Base.OneTo{Int}},typeof(identity),Tuple{Base.Broadcast.Extruded{Array{Int,1},Tuple{Bool},Tuple{Int}}}},Int}) - Base.precompile(Tuple{typeof(DataFrames._expand_to_table),Array{Array{Float64,3},1}}) Base.precompile(Tuple{typeof(DataFrames._combine_process_pair_symbol),Bool,GroupedDataFrame{DataFrame},Dict{Symbol,Tuple{Bool,Int}},Array{DataFrames.TransformationResult,1},Nothing,Symbol,Bool,String,Union{Function, Type},Tuple{Array{Union{Missing, String},1}}}) Base.precompile(Tuple{typeof(DataFrames.copyto_widen!),Array{Int,1},Array{Real,1}}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:id, :sid, :SID),Tuple{UnitRange{Int},Array{String,1},Array{Union{Missing, String},1}}},Type{DataFrame}}) @@ -1380,7 +1375,6 @@ function precompile(all=false) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:id, :fid, :id_1),Tuple{Array{Int,1},Array{Int,1},Array{Union{Missing, Int},1}}},Type{DataFrame}}) Base.precompile(Tuple{typeof(view),SubDataFrame{DataFrame,DataFrames.Index,Base.OneTo{Int}},InvertedIndex{Int},Between{Int,Int}}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:p, :q),Tuple{SubArray{Int,1,Array{Int,1},Tuple{Array{Int,1}},false},SubArray{Int,1,Array{Int,1},Tuple{Array{Int,1}},false}}},Type{DataFrame}}) - Base.precompile(Tuple{typeof(DataFrames._expand_to_table),Array{NamedTuple,1}}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:a, :b),Tuple{Array{Union{Missing, Int},1},UnitRange{Int}}},Type{DataFrame}}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:a, :b, :copycols),Tuple{Int,Array{Any,1},Bool}},Type{DataFrame}}) Base.precompile(Tuple{typeof(unique),SubDataFrame{DataFrame,DataFrames.Index,Base.OneTo{Int}},Bool}) @@ -1767,7 +1761,6 @@ function precompile(all=false) Base.precompile(Tuple{typeof(DataFrames._combine_process_pair_astable),Bool,GroupedDataFrame{DataFrame},Dict{Symbol,Tuple{Bool,Int}},Array{DataFrames.TransformationResult,1},Nothing,Type{AsTable},Bool,Dict{Symbol,UnitRange{Int}},Union{Function, Type},Tuple{Array{Int,1}}}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.outerjoin)),NamedTuple{(:on, :renamecols, :indicator),Tuple{Array{Any,1},Pair{String,String},Symbol}},typeof(outerjoin),DataFrame,DataFrame}) Base.precompile(Tuple{typeof(DataFrames._combine_with_first),NamedTuple{(:x1,),Tuple{Array{Tuple{Int},1}}},Function,GroupedDataFrame{DataFrame},NamedTuple{(:x,),Tuple{Array{Int,1}}},Val{false},Nothing}) - Base.precompile(Tuple{typeof(DataFrames._expand_to_table),Array{Tuple{Int,Int},1}}) Base.precompile(Tuple{typeof(DataFrames._combine_with_first),NamedTuple{(:x1,),Tuple{Int}},Function,GroupedDataFrame{DataFrame},Tuple{Array{Union{Missing, UnitRange{Int}},1}},Val{false},Array{Int,1}}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:x, :y),Tuple{Array{Int,1},Array{Any,1}}},Type{DataFrame}}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:a, :b, :c, :x1, :x2),Tuple{UnitRange{Int},UnitRange{Int},UnitRange{Int},Array{Int,1},Array{Int,1}}},Type{DataFrame}}) @@ -2041,7 +2034,6 @@ function precompile(all=false) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.select)),NamedTuple{(:copycols, :renamecols),Tuple{Bool,Bool}},typeof(select),GroupedDataFrame{DataFrame},Function,Pair{Symbol,typeof(+)},Vararg{Any,N} where N}) Base.precompile(Tuple{typeof(getindex),DataFrame,Colon,Between{Int,Int}}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:Fish, :_MASS_, :_COLOR_),Tuple{Array{String,1},Array{String,1},Array{String,1}}},Type{DataFrame}}) - Base.precompile(Tuple{typeof(DataFrames._expand_to_table),Array{Tuple{Float64,Float64},1}}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.innerjoin)),NamedTuple{(:on,),Tuple{Symbol}},typeof(innerjoin),SubDataFrame{DataFrame,DataFrames.SubIndex{DataFrames.Index,UnitRange{Int},UnitRange{Int}},Array{Int,1}},DataFrame}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:g, :x_mean_skipmissing),Tuple{UnitRange{Int},Array{Float64,1}}},Type{DataFrame}}) Base.precompile(Tuple{typeof(DataFrames._combine_process_pair_astable),Bool,GroupedDataFrame{DataFrame},Dict{Symbol,Tuple{Bool,Int}},Array{DataFrames.TransformationResult,1},Nothing,Array{Symbol,1},Bool,Array{NamedTuple{(:a, :b, :c),Tuple{Int,Int,Int}},1},Union{Function, Type},Tuple{Array{Int,1}}}) @@ -2296,7 +2288,6 @@ function precompile(all=false) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.manipulate)),NamedTuple{(:copycols, :keeprows, :renamecols),Tuple{Bool,Bool,Bool}},typeof(DataFrames.manipulate),SubDataFrame{DataFrame,DataFrames.SubIndex{DataFrames.Index,Array{Int,1},Array{Int,1}},UnitRange{Int}},Regex}) Base.precompile(Tuple{DataFrames.Reduce{typeof(Base.mul_prod),Nothing,Nothing},Array{Float64,1},GroupedDataFrame{DataFrame}}) Base.precompile(Tuple{typeof(DataFrames.do_call),ByRow{typeof(sin)},Array{Int,1},UnitRange{Int},UnitRange{Int},GroupedDataFrame{DataFrame},Tuple{Array{Float64,1}},Int}) - Base.precompile(Tuple{typeof(DataFrames._expand_to_table),Array{DataFrame,1}}) Base.precompile(Tuple{typeof(DataFrames._combine_process_pair_symbol),Bool,GroupedDataFrame{DataFrame},Dict{Symbol,Tuple{Bool,Int}},Array{DataFrames.TransformationResult,1},Nothing,Symbol,Bool,Missing,Union{Function, Type},Tuple{Array{Union{Missing, Bool},1}}}) Base.precompile(Tuple{typeof(completecases),DataFrame,InvertedIndex{Array{Any,1}}}) Base.precompile(Tuple{typeof(map),Function,DataFrameRow{DataFrame,DataFrames.SubIndex{DataFrames.Index,UnitRange{Int},UnitRange{Int}}}}) @@ -2335,7 +2326,6 @@ function precompile(all=false) end Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:g, :x),Tuple{Array{Int,1},Array{DataFrame,1}}},Type{DataFrame}}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:a, :b, :c, :x1),Tuple{UnitRange{Int},UnitRange{Int},UnitRange{Int},String}},Type{DataFrame}}) - Base.precompile(Tuple{typeof(DataFrames._expand_to_table),Array{NamedTuple{names,Tuple{Int,Int}} where names,1}}) Base.precompile(Tuple{typeof(DataFrames.do_call),typeof(minimum),Array{Int,1},Array{Int,1},Array{Int,1},GroupedDataFrame{DataFrame},Tuple{Array{DataFrame,1}},Int}) Base.precompile(Tuple{typeof(DataFrames._combine_tables_with_first!),NamedTuple{(:x1,),Tuple{SubArray{Int,1,Array{Int,2},Tuple{Base.Slice{Base.OneTo{Int}},Int},true}}},Tuple{Array{Int,1}},Array{Int,1},Int,Int,Function,GroupedDataFrame{DataFrame},Tuple{Array{Int,1}},Tuple{Symbol},Val{true}}) Base.precompile(Tuple{typeof(DataFrames._sortperm),SubDataFrame{DataFrame,DataFrames.Index,Array{Int,1}},Base.Sort.MergeSortAlg,DataFrames.DFPerm{Base.Order.ForwardOrdering,Tuple{SubArray{Union{Missing, String},1,Array{Union{Missing, String},1},Tuple{Array{Int,1}},false},SubArray{String,1,Array{String,1},Tuple{Array{Int,1}},false}}}}) @@ -2362,7 +2352,6 @@ function precompile(all=false) Base.precompile(Tuple{typeof(DataFrames._combine_process_pair_symbol),Bool,GroupedDataFrame{DataFrame},Dict{Symbol,Tuple{Bool,Int}},Array{DataFrames.TransformationResult,1},Nothing,Symbol,Bool,Int,Union{Function, Type},Tuple{Array{Union{Missing, Rational{Int}},1}}}) Base.precompile(Tuple{typeof(view),SubDataFrame{DataFrame,DataFrames.Index,Base.OneTo{Int}},UnitRange{Int},Between{Int,Int}}) Base.precompile(Tuple{typeof(DataFrames._add_multicol_res),NamedTuple{(:y, :x),Tuple{SubArray{Int,1,Array{Int,1},Tuple{Array{Int,1}},false},SubArray{Int,1,Array{Int,1},Tuple{Array{Int,1}},false}}},DataFrame,SubDataFrame{DataFrame,DataFrames.SubIndex{DataFrames.Index,Array{Int,1},Array{Int,1}},Array{Int,1}},Array{Symbol,1},Base.RefValue{Bool},Any,AsTable,Bool,Type{AsTable}}) - Base.precompile(Tuple{typeof(DataFrames._expand_to_table),Array{NamedTuple{(:a, :b),Tuple{Int,String}},1}}) Base.precompile(Tuple{typeof(DataFrames.row_group_slots),Tuple{PooledArrays.PooledArray{String,UInt8,1,Array{UInt8,1}},PooledArrays.PooledArray{Union{Missing, String},UInt8,1,Array{UInt8,1}}},Val{false},Array{Int,1},Bool,Bool}) Base.precompile(Tuple{typeof(view),DataFrameRow{DataFrame,DataFrames.Index},UnitRange{Int}}) Base.precompile(Tuple{typeof(DataFrames._combine_with_first),NamedTuple{(:x1,),Tuple{Float64}},Function,GroupedDataFrame{DataFrame},Tuple{Array{Union{Missing, Real},1}},Val{false},Array{Int,1}}) @@ -2414,7 +2403,6 @@ function precompile(all=false) end end Base.precompile(Tuple{typeof(getindex),DataFrame,Colon,All{Tuple{Int,Int,String}}}) - Base.precompile(Tuple{typeof(DataFrames._expand_to_table),Array{Array{Any,1},1}}) Base.precompile(Tuple{typeof(Tables.schema),SubDataFrame{DataFrame,DataFrames.Index,Base.OneTo{Int}}}) Base.precompile(Tuple{typeof(DataFrames.groupreduce),Function,Function,Nothing,Nothing,Bool,Array{Real,1},GroupedDataFrame{DataFrame}}) Base.precompile(Tuple{typeof(show),Base.GenericIOBuffer{Array{UInt8,1}},MIME{Symbol("text/html")},DataFrame}) @@ -2927,7 +2915,6 @@ function precompile(all=false) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.innerjoin)),NamedTuple{(:on,),Tuple{Pair{Symbol,Symbol}}},typeof(innerjoin),DataFrame,DataFrame}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:A, :B),Tuple{Array{Int,1},Array{Any,1}}},Type{DataFrame}}) Base.precompile(Tuple{typeof(view),DataFrame,BitArray{1},Symbol}) - Base.precompile(Tuple{typeof(DataFrames._expand_to_table),Array{NamedTuple{(:a, :b),Tuple{Int,Int}},1}}) Base.precompile(Tuple{typeof(combine),GroupedDataFrame{DataFrame},Pair{Symbol,Pair{typeof(sum),Symbol}}}) Base.precompile(Tuple{DataFrames.Reduce{typeof(max),Nothing,Nothing},Array{Union{Irrational{:π}, Missing},1},GroupedDataFrame{DataFrame}}) Base.precompile(Tuple{typeof(push!),DataFrame,Dict{Symbol,String}}) @@ -3099,7 +3086,6 @@ function precompile(all=false) Base.precompile(Tuple{Type{DataFrame},Array{Array{String,1},1},Array{Symbol,1}}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:Key1, :Key2, :Value),Tuple{Array{Union{Missing, String},1},PooledArrays.PooledArray{String,UInt8,1,Array{UInt8,1}},UnitRange{Int}}},Type{DataFrame}}) Base.precompile(Tuple{ByRow{typeof(minmax)},SubArray{Float64,1,Array{Float64,1},Tuple{Array{Int,1}},false},Vararg{SubArray{Float64,1,Array{Float64,1},Tuple{Array{Int,1}},false},N} where N}) - Base.precompile(Tuple{typeof(DataFrames._expand_to_table),Array{Array{Int,1},1}}) Base.precompile(Tuple{typeof(Base.Broadcast.materialize),Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1},Nothing,typeof(columnindex),Tuple{Base.RefValue{SubDataFrame{DataFrame,DataFrames.Index,UnitRange{Int}}},Array{Symbol,1}}}}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:_left,),Tuple{Int}},Type{DataFrame}}) Base.precompile(Tuple{ByRow{typeof(/)},SubArray{Int,1,Array{Int,1},Tuple{Array{Int,1}},false},Vararg{SubArray{Int,1,Array{Int,1},Tuple{Array{Int,1}},false},N} where N}) @@ -3119,7 +3105,6 @@ function precompile(all=false) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:z, :nrow, :z2),Tuple{Int,Int,Int}},Type{DataFrame}}) Base.precompile(Tuple{typeof(DataFrames._combine_multicol),NamedTuple{(:a, :b),Tuple{Int,String}},Function,GroupedDataFrame{DataFrame},Nothing}) Base.precompile(Tuple{typeof(combine),GroupedDataFrame{DataFrame},Colon,Pair{Symbol,Pair{ByRow{typeof(sin)},Symbol}},Vararg{Pair{Symbol,Pair{ByRow{typeof(sin)},Symbol}},N} where N}) - Base.precompile(Tuple{typeof(DataFrames._expand_to_table),Array{NamedTuple{(:a, :b, :c),Tuple{Int,Int,Int}},1}}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:g, :x),Tuple{Array{Int,1},Array{Bool,1}}},Type{DataFrame}}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.manipulate)),NamedTuple{(:copycols, :keeprows, :renamecols),Tuple{Bool,Bool,Bool}},typeof(DataFrames.manipulate),DataFrame,Pair{Symbol,Array{Symbol,1}},Function}) Base.precompile(Tuple{typeof(Base.Broadcast.materialize),Base.Broadcast.Broadcasted{DataFrames.DataFrameStyle,Nothing,typeof(+),Tuple{DataFrame,Base.ReshapedArray{Int,2,Base.OneTo{Int},Tuple{}}}}}) From 66f7404acf209fd48ae00cd110014d552b34ef51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sat, 3 Apr 2021 14:53:21 +0200 Subject: [PATCH 20/22] revert @nospecialize --- src/abstractdataframe/selection.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/abstractdataframe/selection.jl b/src/abstractdataframe/selection.jl index 88a4ae4238..7402c6308c 100644 --- a/src/abstractdataframe/selection.jl +++ b/src/abstractdataframe/selection.jl @@ -353,8 +353,8 @@ function _transformation_helper(df::AbstractDataFrame, col_idx::AbstractVector{I end end -function _gen_colnames(res, newname::Union{AbstractVector{Symbol}, - Type{AsTable}, Nothing}) +function _gen_colnames(@nospecialize(res), newname::Union{AbstractVector{Symbol}, + Type{AsTable}, Nothing}) if res isa AbstractMatrix colnames = gennames(size(res, 2)) else From 3149a0ef50879cf0a38d4c787e584bea7557face Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sat, 3 Apr 2021 14:55:26 +0200 Subject: [PATCH 21/22] Apply suggestions from code review Co-authored-by: Milan Bouchet-Valat --- src/groupeddataframe/complextransforms.jl | 8 ++++---- src/groupeddataframe/splitapplycombine.jl | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/groupeddataframe/complextransforms.jl b/src/groupeddataframe/complextransforms.jl index 924837ea01..ae6151f4f4 100644 --- a/src/groupeddataframe/complextransforms.jl +++ b/src/groupeddataframe/complextransforms.jl @@ -21,10 +21,10 @@ function _combine_multicol((firstres,)::Ref{Any}, wfun::Ref{Any}, gd::GroupedDat end function _combine_with_first((first,)::Ref{Any}, - f::Ref{Any}, gd::GroupedDataFrame, + (f,)::Ref{Any}, gd::GroupedDataFrame, (incols,)::Ref{Any}, firstmulticol::Val, idx_agg::Vector{Int}) - @assert only(f) isa Base.Callable + @assert f isa Base.Callable @assert incols isa Union{Nothing, AbstractVector, Tuple, NamedTuple} @assert first isa Union{NamedTuple, DataFrameRow, AbstractDataFrame} extrude = false @@ -59,11 +59,11 @@ function _combine_with_first((first,)::Ref{Any}, if !extrude && first isa Union{AbstractDataFrame, NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}} outcols, finalcolnames = _combine_tables_with_first!(first, initialcols, idx, 1, 1, - only(f), gd, incols, targetcolnames, + f, gd, incols, targetcolnames, firstmulticol) else outcols, finalcolnames = _combine_rows_with_first!(first, initialcols, - only(f), gd, incols, targetcolnames, + f, gd, incols, targetcolnames, firstmulticol) end return idx, outcols, collect(Symbol, finalcolnames) diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl index 84c2cfdcab..eb382076c1 100644 --- a/src/groupeddataframe/splitapplycombine.jl +++ b/src/groupeddataframe/splitapplycombine.jl @@ -5,7 +5,7 @@ const MULTI_COLS_TYPE = Union{AbstractDataFrame, NamedTuple, DataFrameRow, AbstractMatrix} # use a constant Vector{Int} as a sentinel to signal that idx_agg has not been computed yet -# to avoid excessive specialization +# we do not use nothing to avoid excessive specialization const NOTHING_IDX_AGG = Int[] function gen_groups(idx::Vector{Int}) From 5020a07e62e7e5af942a7ee8d6d242af0e90178b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sat, 3 Apr 2021 14:55:56 +0200 Subject: [PATCH 22/22] Update src/groupeddataframe/splitapplycombine.jl Co-authored-by: Milan Bouchet-Valat --- src/groupeddataframe/splitapplycombine.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl index eb382076c1..ad4f61cff5 100644 --- a/src/groupeddataframe/splitapplycombine.jl +++ b/src/groupeddataframe/splitapplycombine.jl @@ -393,7 +393,7 @@ function _combine_process_pair_astable(optional_i::Bool, @assert only(wincols) isa Union{Tuple, NamedTuple} if firstres isa AbstractVector idx, outcol_vec, _ = _combine_with_first(Ref{Any}(wrap(firstres)), wfun, gd, wincols, - Val(firstmulticol), NOTHING_IDX_AGG) + Val(firstmulticol), NOTHING_IDX_AGG) @assert length(outcol_vec) == 1 res = outcol_vec[1] @assert length(res) > 0