From e3b3da1d0384c5d663ecc560a8e24c7944d41a7b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Tue, 30 Mar 2021 09:29:16 +0200
Subject: [PATCH 01/22] split _combine_prepare

---
 src/groupeddataframe/splitapplycombine.jl | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl
index d1f81b932c..1f7df2c3b1 100644
--- a/src/groupeddataframe/splitapplycombine.jl
+++ b/src/groupeddataframe/splitapplycombine.jl
@@ -37,6 +37,14 @@ function _combine_prepare(gd::GroupedDataFrame,
             push!(cs_vec, p)
         end
     end
+    return _combine_prepare_norm(gd, cs_vec, keepkeys, ungroup, copycols,
+                                 keeprows, renamecols)
+end
+
+function _combine_prepare_norm(gd::GroupedDataFrame,
+                               cs_vec::Vector{Any},
+                               keepkeys::Bool, ungroup::Bool, copycols::Bool,
+                               keeprows::Bool, renamecols::Bool)
     if any(x -> x isa Pair && first(x) isa Tuple, cs_vec)
         x = cs_vec[findfirst(x -> first(x) isa Tuple, cs_vec)]
         # an explicit error is thrown as this was allowed in the past

From bb90d773cc6c7310da152425f7011a753e568e6d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Tue, 30 Mar 2021 11:23:04 +0200
Subject: [PATCH 02/22] add despecialization to split-apply-combine

---
 src/groupeddataframe/complextransforms.jl | 19 +++++++-----
 src/groupeddataframe/splitapplycombine.jl | 38 +++++++++++++----------
 2 files changed, 32 insertions(+), 25 deletions(-)

diff --git a/src/groupeddataframe/complextransforms.jl b/src/groupeddataframe/complextransforms.jl
index a90e722cee..72b2928685 100644
--- a/src/groupeddataframe/complextransforms.jl
+++ b/src/groupeddataframe/complextransforms.jl
@@ -12,16 +12,19 @@ function _combine_multicol(firstres, fun::Base.Callable, gd::GroupedDataFrame,
         idx_agg = Vector{Int}(undef, length(gd))
         fillfirst!(nothing, idx_agg, 1:length(gd.groups), gd)
     else
-        idx_agg = nothing
+        idx_agg = NOTHING_IDX_AGG
     end
-    return _combine_with_first(wrap(firstres), fun, gd, incols,
+    return _combine_with_first(Ref{Any}(wrap(firstres)), Ref{Any}(fun), gd, incols,
                                Val(firstmulticol), idx_agg)
 end
 
-function _combine_with_first(first::Union{NamedTuple, DataFrameRow, AbstractDataFrame},
-                             f::Base.Callable, gd::GroupedDataFrame,
+function _combine_with_first(first::Ref{Any},
+                             f::Ref{Any}, gd::GroupedDataFrame,
                              incols::Union{Nothing, AbstractVector, Tuple, NamedTuple},
-                             firstmulticol::Val, idx_agg::Union{Nothing, AbstractVector{<:Integer}})
+                             firstmulticol::Val, idx_agg::Vector{Int})
+    @assert only(first) isa Union{NamedTuple, DataFrameRow, AbstractDataFrame}
+    @assert only(f) isa Base.Callable
+    first = only(first)
     extrude = false
 
     if first isa AbstractDataFrame
@@ -45,7 +48,7 @@ function _combine_with_first(first::Union{NamedTuple, DataFrameRow, AbstractData
             throw(ArgumentError("mixing single values and vectors in a named tuple is not allowed"))
         end
     end
-    idx = isnothing(idx_agg) ? Vector{Int}(undef, n) : idx_agg
+    idx = idx_agg === NOTHING_IDX_AGG ? Vector{Int}(undef, n) : idx_agg
     local initialcols
     let eltys=eltys, n=n # Workaround for julia#15276
         initialcols = ntuple(i -> Tables.allocatecolumn(eltys[i], n), _ncol(first))
@@ -54,11 +57,11 @@ function _combine_with_first(first::Union{NamedTuple, DataFrameRow, AbstractData
     if !extrude && first isa Union{AbstractDataFrame,
                                    NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}}
         outcols, finalcolnames = _combine_tables_with_first!(first, initialcols, idx, 1, 1,
-                                                             f, gd, incols, targetcolnames,
+                                                             only(f), gd, incols, targetcolnames,
                                                              firstmulticol)
     else
         outcols, finalcolnames = _combine_rows_with_first!(first, initialcols,
-                                                           f, gd, incols, targetcolnames,
+                                                           only(f), gd, incols, targetcolnames,
                                                            firstmulticol)
     end
     return idx, outcols, collect(Symbol, finalcolnames)
diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl
index 1f7df2c3b1..bd48c0b485 100644
--- a/src/groupeddataframe/splitapplycombine.jl
+++ b/src/groupeddataframe/splitapplycombine.jl
@@ -4,6 +4,10 @@
 # in combine are considered to produce multiple columns in the resulting data frame
 const MULTI_COLS_TYPE = Union{AbstractDataFrame, NamedTuple, DataFrameRow, AbstractMatrix}
 
+# use a constant Vector{Int} as a sentinel to signal that idx_agg has not been computed yet
+# to avoid excessive specialization
+const NOTHING_IDX_AGG = Int[]
+
 function gen_groups(idx::Vector{Int})
     groups = zeros(Int, length(idx))
     groups[1] = 1
@@ -175,7 +179,7 @@ function fillfirst!(condf, outcol::AbstractVector, incol::AbstractVector,
     outcol
 end
 
-function _agg2idx_map_helper(idx::AbstractVector, idx_agg::AbstractVector)
+function _agg2idx_map_helper(idx::Vector{Int}, idx_agg::Vector{Int})
     agg2idx_map = fill(-1, length(idx))
     aggj = 1
     @inbounds for (j, idxj) in enumerate(idx)
@@ -202,7 +206,7 @@ function _combine_process_agg(@nospecialize(cs_i::Pair{Int, <:Pair{<:Function, S
                               gd::GroupedDataFrame,
                               seen_cols::Dict{Symbol, Tuple{Bool, Int}},
                               trans_res::Vector{TransformationResult},
-                              idx_agg::AbstractVector{Int})
+                              idx_agg::Vector{Int})
     @assert isagg(cs_i, gd)
     @assert !optional_i
     out_col_name = last(last(cs_i))
@@ -271,16 +275,16 @@ function _combine_process_callable(@nospecialize(cs_i::Base.Callable),
                                    gd::GroupedDataFrame,
                                    seen_cols::Dict{Symbol, Tuple{Bool, Int}},
                                    trans_res::Vector{TransformationResult},
-                                   idx_agg::Ref{Union{Nothing, Vector{Int}}})
+                                   idx_agg::Ref{Vector{Int}})
     firstres = length(gd) > 0 ? cs_i(gd[1]) : cs_i(similar(parentdf, 0))
     idx, outcols, nms = _combine_multicol(firstres, cs_i, gd, nothing)
 
     if !(firstres isa Union{AbstractVecOrMat, AbstractDataFrame,
                             NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}})
         lock(gd.lazy_lock) do
-            # if idx_agg was not computed yet it is nothing
+            # if idx_agg was not computed yet it is NOTHING_IDX_AGG
             # in this case if we are not passed a vector compute it.
-            if isnothing(idx_agg[])
+            if idx_agg[] === NOTHING_IDX_AGG
                 idx_agg[] = Vector{Int}(undef, length(gd))
                 fillfirst!(nothing, idx_agg[], 1:length(gd.groups), gd)
             end
@@ -317,7 +321,7 @@ function _combine_process_pair_symbol(optional_i::Bool,
                                       gd::GroupedDataFrame,
                                       seen_cols::Dict{Symbol, Tuple{Bool, Int}},
                                       trans_res::Vector{TransformationResult},
-                                      idx_agg::Ref{Union{Nothing, Vector{Int}}},
+                                      idx_agg::Ref{Vector{Int}},
                                       out_col_name::Symbol,
                                       firstmulticol::Bool,
                                       firstres::Any,
@@ -329,7 +333,7 @@ function _combine_process_pair_symbol(optional_i::Bool,
     # if idx_agg was not computed yet it is nothing
     # in this case if we are not passed a vector compute it.
     lock(gd.lazy_lock) do
-        if !(firstres isa AbstractVector) && isnothing(idx_agg[])
+        if !(firstres isa AbstractVector) && idx_agg[] === NOTHING_IDX_AGG
             idx_agg[] = Vector{Int}(undef, length(gd))
             fillfirst!(nothing, idx_agg[], 1:length(gd.groups), gd)
         end
@@ -340,10 +344,10 @@ function _combine_process_pair_symbol(optional_i::Bool,
 
     # the last argument passed to _combine_with_first informs it about precomputed
     # idx. Currently we do it only for single-row return values otherwise we pass
-    # nothing to signal that idx has to be computed in _combine_with_first
-    idx, outcols, _ = _combine_with_first(wrap(firstres), fun, gd, incols,
+    # NOTHING_IDX_AGG to signal that idx has to be computed in _combine_with_first
+    idx, outcols, _ = _combine_with_first(Ref{Any}(wrap(firstres)), Ref{Any}(fun), gd, incols,
                                           Val(firstmulticol),
-                                          firstres isa AbstractVector ? nothing : idx_agg[])
+                                          firstres isa AbstractVector ? NOTHING_IDX_AGG : idx_agg[])
     @assert length(outcols) == 1
     outcol = outcols[1]
 
@@ -370,14 +374,14 @@ function _combine_process_pair_astable(optional_i::Bool,
                                        gd::GroupedDataFrame,
                                        seen_cols::Dict{Symbol, Tuple{Bool, Int}},
                                        trans_res::Vector{TransformationResult},
-                                       idx_agg::Ref{Union{Nothing, Vector{Int}}},
+                                       idx_agg::Ref{Vector{Int}},
                                        out_col_name::Union{Type{AsTable}, AbstractVector{Symbol}},
                                        firstmulticol::Bool,
                                        firstres::Any,
                                        @nospecialize(fun::Base.Callable),
                                        incols::Union{Tuple, NamedTuple})
     if firstres isa AbstractVector
-        idx, outcol_vec, _ = _combine_with_first(wrap(firstres), fun, gd, incols,
+        idx, outcol_vec, _ = _combine_with_first(Ref{Any}(wrap(firstres)), Ref{Any}(fun), gd, incols,
                                               Val(firstmulticol), nothing)
         @assert length(outcol_vec) == 1
         res = outcol_vec[1]
@@ -407,7 +411,7 @@ function _combine_process_pair_astable(optional_i::Bool,
             lock(gd.lazy_lock) do
                 # if idx_agg was not computed yet it is nothing
                 # in this case if we are not passed a vector compute it.
-                if isnothing(idx_agg[])
+                if idx_agg[] === NOTHING_IDX_AGG
                     idx_agg[] = Vector{Int}(undef, length(gd))
                     fillfirst!(nothing, idx_agg[], 1:length(gd.groups), gd)
                 end
@@ -457,7 +461,7 @@ function _combine_process_pair(@nospecialize(cs_i::Pair),
                                gd::GroupedDataFrame,
                                seen_cols::Dict{Symbol, Tuple{Bool, Int}},
                                trans_res::Vector{TransformationResult},
-                               idx_agg::Ref{Union{Nothing, Vector{Int}}})
+                               idx_agg::Ref{Vector{Int}})
     source_cols, (fun, out_col_name) = cs_i
 
     if source_cols isa Int
@@ -527,7 +531,7 @@ function _combine(gd::GroupedDataFrame,
         idx_keeprows = nothing
     end
 
-    idx_agg = Ref{Union{Nothing, Vector{Int}}}(nothing)
+    idx_agg = Ref(NOTHING_IDX_AGG)
     if length(gd) > 0 && any(x -> isagg(x, gd), cs_norm)
         # Compute indices of representative rows only once for all AbstractAggregates
         idx_agg[] = Vector{Int}(undef, length(gd))
@@ -599,11 +603,11 @@ function _combine(gd::GroupedDataFrame,
     end
 
     isempty(trans_res) && return Int[], DataFrame()
-    # idx_agg === nothing then we have only functions that
+    # idx_agg[] === NOTHING_IDX_AGG then we have only functions that
     # returned multiple rows and idx_loc = 1
     idx_loc = findfirst(x -> x.col_idx !== idx_agg[], trans_res)
     if !keeprows && isnothing(idx_loc)
-        @assert !isnothing(idx_agg[])
+        @assert idx_agg[] !== NOTHING_IDX_AGG
         idx = idx_agg[]
     else
         idx = keeprows ? idx_keeprows : trans_res[idx_loc].col_idx

From 9754a5504619d0e81f6a3350a25c77402b2b8cc2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Tue, 30 Mar 2021 14:45:40 +0200
Subject: [PATCH 03/22] fix missing nothing replacement

---
 src/groupeddataframe/splitapplycombine.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl
index bd48c0b485..8fe2dfaab7 100644
--- a/src/groupeddataframe/splitapplycombine.jl
+++ b/src/groupeddataframe/splitapplycombine.jl
@@ -382,7 +382,7 @@ function _combine_process_pair_astable(optional_i::Bool,
                                        incols::Union{Tuple, NamedTuple})
     if firstres isa AbstractVector
         idx, outcol_vec, _ = _combine_with_first(Ref{Any}(wrap(firstres)), Ref{Any}(fun), gd, incols,
-                                              Val(firstmulticol), nothing)
+                                              Val(firstmulticol), NOTHING_IDX_AGG)
         @assert length(outcol_vec) == 1
         res = outcol_vec[1]
         @assert length(res) > 0

From 5635ee603b55e7a59e8e1ebbd991098f40cd2f8e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Tue, 30 Mar 2021 16:28:06 +0200
Subject: [PATCH 04/22] some more Ref{Any} cases

---
 src/groupeddataframe/splitapplycombine.jl | 47 ++++++++++++++++-------
 1 file changed, 33 insertions(+), 14 deletions(-)

diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl
index 8fe2dfaab7..9484fab5a7 100644
--- a/src/groupeddataframe/splitapplycombine.jl
+++ b/src/groupeddataframe/splitapplycombine.jl
@@ -23,10 +23,15 @@ function gen_groups(idx::Vector{Int})
 end
 
 function _combine_prepare(gd::GroupedDataFrame,
-                          @nospecialize(cs::Union{Pair, Base.Callable,
-                                        ColumnIndex, MultiColumnIndex}...);
+                          wcs::Ref{Any};
                           keepkeys::Bool, ungroup::Bool, copycols::Bool,
                           keeprows::Bool, renamecols::Bool)
+    cs = only(wcs)
+    for cei in cs
+        if !(cei isa Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex})
+            throw(ArgumentError("Unrecognized transformation specification $cei"))
+        end
+    end
     if !ungroup && !keepkeys
         throw(ArgumentError("keepkeys=false when ungroup=false is not allowed"))
     end
@@ -200,13 +205,15 @@ struct TransformationResult
 end
 
 # the transformation is an aggregation for which we have the fast path
-function _combine_process_agg(@nospecialize(cs_i::Pair{Int, <:Pair{<:Function, Symbol}}),
+function _combine_process_agg(wcs_i::Ref{Any},
                               optional_i::Bool,
                               parentdf::AbstractDataFrame,
                               gd::GroupedDataFrame,
                               seen_cols::Dict{Symbol, Tuple{Bool, Int}},
                               trans_res::Vector{TransformationResult},
                               idx_agg::Vector{Int})
+    cs_i = only(wcs_i)
+    @assert csi isa Pair{Int, <:Pair{<:Function, Symbol}}
     @assert isagg(cs_i, gd)
     @assert !optional_i
     out_col_name = last(last(cs_i))
@@ -269,13 +276,15 @@ function _combine_process_noop(cs_i::Pair{<:Union{Int, AbstractVector{Int}}, Pai
 end
 
 # perform a transformation taking SubDataFrame as an input
-function _combine_process_callable(@nospecialize(cs_i::Base.Callable),
+function _combine_process_callable(wcs_i::Ref{Any},
                                    optional_i::Bool,
                                    parentdf::AbstractDataFrame,
                                    gd::GroupedDataFrame,
                                    seen_cols::Dict{Symbol, Tuple{Bool, Int}},
                                    trans_res::Vector{TransformationResult},
                                    idx_agg::Ref{Vector{Int}})
+    cs_i = only(wcs_i)
+    @assert cs_i isa Base.Callable
     firstres = length(gd) > 0 ? cs_i(gd[1]) : cs_i(similar(parentdf, 0))
     idx, outcols, nms = _combine_multicol(firstres, cs_i, gd, nothing)
 
@@ -325,8 +334,11 @@ function _combine_process_pair_symbol(optional_i::Bool,
                                       out_col_name::Symbol,
                                       firstmulticol::Bool,
                                       firstres::Any,
-                                      @nospecialize(fun::Base.Callable),
+                                      wfun::Ref{Any},
                                       incols::Union{Tuple, NamedTuple})
+    fun = only(wfun)
+    @assert fun isa Base.Callable
+
     if firstmulticol
         throw(ArgumentError("a single value or vector result is required (got $(typeof(firstres)))"))
     end
@@ -378,8 +390,10 @@ function _combine_process_pair_astable(optional_i::Bool,
                                        out_col_name::Union{Type{AsTable}, AbstractVector{Symbol}},
                                        firstmulticol::Bool,
                                        firstres::Any,
-                                       @nospecialize(fun::Base.Callable),
+                                       wfun::Ref{Any},
                                        incols::Union{Tuple, NamedTuple})
+    fun = only(wfun)
+    @assert fun isa Base.Callable
     if firstres isa AbstractVector
         idx, outcol_vec, _ = _combine_with_first(Ref{Any}(wrap(firstres)), Ref{Any}(fun), gd, incols,
                                               Val(firstmulticol), NOTHING_IDX_AGG)
@@ -455,13 +469,16 @@ end
 # perform a transformation specified using the Pair notation
 # cs_i is a Pair that has many possible forms so this function is used to dispatch
 # to an appropriate more specialized function
-function _combine_process_pair(@nospecialize(cs_i::Pair),
+function _combine_process_pair(wcs_i::Ref{Any},
                                optional_i::Bool,
                                parentdf::AbstractDataFrame,
                                gd::GroupedDataFrame,
                                seen_cols::Dict{Symbol, Tuple{Bool, Int}},
                                trans_res::Vector{TransformationResult},
                                idx_agg::Ref{Vector{Int}})
+    cs_i = only(wcs_i)
+    @assert cs_i isa Pair
+
     source_cols, (fun, out_col_name) = cs_i
 
     if source_cols isa Int
@@ -482,11 +499,13 @@ function _combine_process_pair(@nospecialize(cs_i::Pair),
 
     if out_col_name isa Symbol
         return _combine_process_pair_symbol(optional_i, gd, seen_cols, trans_res, idx_agg,
-                                            out_col_name, firstmulticol, firstres, fun, incols)
+                                            out_col_name, firstmulticol, firstres,
+                                            Ref{Any}(fun), incols)
     end
     if out_col_name == AsTable || out_col_name isa AbstractVector{Symbol}
         return _combine_process_pair_astable(optional_i, gd, seen_cols, trans_res, idx_agg,
-                                             out_col_name, firstmulticol, firstres, fun, incols)
+                                             out_col_name, firstmulticol, firstres,
+                                             Ref{Any}(fun), incols)
     end
     throw(ArgumentError("unsupported target column name specifier $out_col_name"))
 end
@@ -561,16 +580,16 @@ function _combine(gd::GroupedDataFrame,
         optional_i = optional_transform[i]
 
         tasks[i] = @spawn if length(gd) > 0 && isagg(cs_i, gd)
-            _combine_process_agg(cs_i, optional_i, parentdf, gd, seen_cols, trans_res, idx_agg[])
+            _combine_process_agg(Ref{Any}(cs_i), optional_i, parentdf, gd, seen_cols, trans_res, idx_agg[])
         elseif keeprows && cs_i isa Pair && first(last(cs_i)) === identity &&
                !(first(cs_i) isa AsTable) && (last(last(cs_i)) isa Symbol)
             # this is a fast path used when we pass a column or rename a column in select or transform
             _combine_process_noop(cs_i, optional_i, parentdf, seen_cols, trans_res, idx_keeprows, copycols)
         elseif cs_i isa Base.Callable
-            _combine_process_callable(cs_i, optional_i, parentdf, gd, seen_cols, trans_res, idx_agg)
+            _combine_process_callable(Ref{Any}(cs_i), optional_i, parentdf, gd, seen_cols, trans_res, idx_agg)
         else
             @assert cs_i isa Pair
-            _combine_process_pair(cs_i, optional_i, parentdf, gd, seen_cols, trans_res, idx_agg)
+            _combine_process_pair(Ref{Any}(cs_i), optional_i, parentdf, gd, seen_cols, trans_res, idx_agg)
         end
     end
     # Workaround JuliaLang/julia#38931:
@@ -684,7 +703,7 @@ combine(f::Pair, gd::GroupedDataFrame;
 combine(gd::GroupedDataFrame,
         cs::Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}...;
         keepkeys::Bool=true, ungroup::Bool=true, renamecols::Bool=true) =
-    _combine_prepare(gd, cs..., keepkeys=keepkeys, ungroup=ungroup,
+    _combine_prepare(gd, Ref{Any}(cs), keepkeys=keepkeys, ungroup=ungroup,
                      copycols=true, keeprows=false, renamecols=renamecols)
 
 function select(f::Base.Callable, gd::GroupedDataFrame; copycols::Bool=true,
@@ -698,7 +717,7 @@ end
 
 select(gd::GroupedDataFrame, args...; copycols::Bool=true, keepkeys::Bool=true,
        ungroup::Bool=true, renamecols::Bool=true) =
-    _combine_prepare(gd, args..., copycols=copycols, keepkeys=keepkeys,
+    _combine_prepare(gd, Ref{Any}(args), copycols=copycols, keepkeys=keepkeys,
                      ungroup=ungroup, keeprows=true, renamecols=renamecols)
 
 function transform(f::Base.Callable, gd::GroupedDataFrame; copycols::Bool=true,

From 0d266b396495bdcc5325cd90ce54defc077195f2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Tue, 30 Mar 2021 16:38:07 +0200
Subject: [PATCH 05/22] fix typo

---
 src/groupeddataframe/splitapplycombine.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl
index 9484fab5a7..45fbc6411b 100644
--- a/src/groupeddataframe/splitapplycombine.jl
+++ b/src/groupeddataframe/splitapplycombine.jl
@@ -213,7 +213,7 @@ function _combine_process_agg(wcs_i::Ref{Any},
                               trans_res::Vector{TransformationResult},
                               idx_agg::Vector{Int})
     cs_i = only(wcs_i)
-    @assert csi isa Pair{Int, <:Pair{<:Function, Symbol}}
+    @assert cs_i isa Pair{Int, <:Pair{<:Function, Symbol}}
     @assert isagg(cs_i, gd)
     @assert !optional_i
     out_col_name = last(last(cs_i))

From fbf900f8fbf143904ef011d40aec3d24d127fd65 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Tue, 30 Mar 2021 17:37:34 +0200
Subject: [PATCH 06/22] make combine, select and transform signatures
 consistent

---
 src/groupeddataframe/splitapplycombine.jl | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl
index 45fbc6411b..87aced7d88 100644
--- a/src/groupeddataframe/splitapplycombine.jl
+++ b/src/groupeddataframe/splitapplycombine.jl
@@ -28,9 +28,7 @@ function _combine_prepare(gd::GroupedDataFrame,
                           keeprows::Bool, renamecols::Bool)
     cs = only(wcs)
     for cei in cs
-        if !(cei isa Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex})
-            throw(ArgumentError("Unrecognized transformation specification $cei"))
-        end
+        @assert cei isa Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex})
     end
     if !ungroup && !keepkeys
         throw(ArgumentError("keepkeys=false when ungroup=false is not allowed"))
@@ -715,8 +713,8 @@ function select(f::Base.Callable, gd::GroupedDataFrame; copycols::Bool=true,
 end
 
 
-select(gd::GroupedDataFrame, args...; copycols::Bool=true, keepkeys::Bool=true,
-       ungroup::Bool=true, renamecols::Bool=true) =
+select(gd::GroupedDataFrame, args::Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}...;
+       copycols::Bool=true, keepkeys::Bool=true, ungroup::Bool=true, renamecols::Bool=true) =
     _combine_prepare(gd, Ref{Any}(args), copycols=copycols, keepkeys=keepkeys,
                      ungroup=ungroup, keeprows=true, renamecols=renamecols)
 
@@ -728,8 +726,8 @@ function transform(f::Base.Callable, gd::GroupedDataFrame; copycols::Bool=true,
     return transform(gd, f, copycols=copycols, keepkeys=keepkeys, ungroup=ungroup)
 end
 
-function transform(gd::GroupedDataFrame, args...; copycols::Bool=true,
-                   keepkeys::Bool=true, ungroup::Bool=true, renamecols::Bool=true)
+function transform(gd::GroupedDataFrame, args::Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}...;
+                   copycols::Bool=true, keepkeys::Bool=true, ungroup::Bool=true, renamecols::Bool=true)
     res = select(gd, :, args..., copycols=copycols, keepkeys=keepkeys,
                  ungroup=ungroup, renamecols=renamecols)
     # res can be a GroupedDataFrame based on DataFrame or a DataFrame,
@@ -745,7 +743,8 @@ function select!(f::Base.Callable, gd::GroupedDataFrame; ungroup::Bool=true, ren
     return select!(gd, f, ungroup=ungroup)
 end
 
-function select!(gd::GroupedDataFrame{DataFrame}, args...;
+function select!(gd::GroupedDataFrame{DataFrame},
+                 args::Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}...;
                  ungroup::Bool=true, renamecols::Bool=true)
     newdf = select(gd, args..., copycols=false, renamecols=renamecols)
     df = parent(gd)
@@ -760,7 +759,8 @@ function transform!(f::Base.Callable, gd::GroupedDataFrame; ungroup::Bool=true,
     return transform!(gd, f, ungroup=ungroup)
 end
 
-function transform!(gd::GroupedDataFrame{DataFrame}, args...;
+function transform!(gd::GroupedDataFrame{DataFrame},
+                    args::Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}...;
                     ungroup::Bool=true, renamecols::Bool=true)
     newdf = select(gd, :, args..., copycols=false, renamecols=renamecols)
     df = parent(gd)

From 782e147ba8f100b379987d52ecb5f6271fff3866 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Tue, 30 Mar 2021 18:05:16 +0200
Subject: [PATCH 07/22] fix typo

---
 src/groupeddataframe/splitapplycombine.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl
index 87aced7d88..3d123b8772 100644
--- a/src/groupeddataframe/splitapplycombine.jl
+++ b/src/groupeddataframe/splitapplycombine.jl
@@ -28,7 +28,7 @@ function _combine_prepare(gd::GroupedDataFrame,
                           keeprows::Bool, renamecols::Bool)
     cs = only(wcs)
     for cei in cs
-        @assert cei isa Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex})
+        @assert cei isa Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}
     end
     if !ungroup && !keepkeys
         throw(ArgumentError("keepkeys=false when ungroup=false is not allowed"))

From 3a18c42523b9edd15934a2346bee14eb3df5a3de Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Wed, 31 Mar 2021 00:32:48 +0200
Subject: [PATCH 08/22] update subset

---
 src/abstractdataframe/subset.jl           | 43 +++++++++++++----------
 src/groupeddataframe/splitapplycombine.jl |  4 +++
 2 files changed, 29 insertions(+), 18 deletions(-)

diff --git a/src/abstractdataframe/subset.jl b/src/abstractdataframe/subset.jl
index 6f0cdfca31..3025138824 100644
--- a/src/abstractdataframe/subset.jl
+++ b/src/abstractdataframe/subset.jl
@@ -1,11 +1,3 @@
-# subset allows a transformation specification without a target column name or a column
-
-_process_subset_pair(i::Int, a::ColumnIndex) = a => Symbol(:x, i)
-_process_subset_pair(i::Int, @nospecialize(a::Pair{<:Any, <:Base.Callable})) =
-    first(a) => last(a) => Symbol(:x, i)
-_process_subset_pair(i::Int, a) =
-    throw(ArgumentError("condition specifier $a is not supported by `subset`"))
-
 _and() = throw(ArgumentError("at least one condition must be passed"))
 _and(x::Bool) = x
 _and(x::Bool, y::Bool...) = x && _and(y...)
@@ -39,12 +31,25 @@ function _and_missing(x::Any...)
                         "but only true, false, or missing are allowed"))
 end
 
+@nospecialize
 
 # Note that _get_subset_conditions will have a large compilation time
 # if more than 32 conditions are passed as `args`.
 function _get_subset_conditions(df::Union{AbstractDataFrame, GroupedDataFrame},
-                                @nospecialize(args), skipmissing::Bool)
-    conditions = Any[_process_subset_pair(i, a) for (i, a) in enumerate(args)]
+                                wargs::Ref{Any}, skipmissing::Bool)
+    args = only(wargs)
+    conditions = Any[]
+
+    # subset allows a transformation specification without a target column name or a column
+    for (i, a) in enumerate(args)
+        if a isa ColumnIndex
+            push!(conditions, a => Symbol(:x, i))
+        elseif a isa Pair{<:Any, <:Base.Callable}
+            push!(conditions, first(a) => last(a) => Symbol(:x, i))
+        else
+            throw(ArgumentError("condition specifier $a is not supported by `subset`"))
+        end
+    end
 
     isempty(conditions) && throw(ArgumentError("at least one condition must be passed"))
 
@@ -153,16 +158,16 @@ julia> subset(groupby(df, :y), :v => x -> x .> minimum(x))
    2 │     4  false  false  missing     12
 ```
 """
-function subset(df::AbstractDataFrame, @nospecialize(args...);
+function subset(df::AbstractDataFrame, args...;
                 skipmissing::Bool=false, view::Bool=false)
-    row_selector = _get_subset_conditions(df, args, skipmissing)
+    row_selector = _get_subset_conditions(df, Ref{Any}(args), skipmissing)
     return view ? Base.view(df, row_selector, :) : df[row_selector, :]
 end
 
-function subset(gdf::GroupedDataFrame, @nospecialize(args...);
+function subset(gdf::GroupedDataFrame, args...;
                 skipmissing::Bool=false, view::Bool=false,
                         ungroup::Bool=true)
-    row_selector = _get_subset_conditions(gdf, args, skipmissing)
+    row_selector = _get_subset_conditions(gdf, Ref{Any}(args), skipmissing)
     df = parent(gdf)
     res = view ? Base.view(df, row_selector, :) : df[row_selector, :]
     # TODO: in some cases it might be faster to groupby gdf.groups[row_selector]
@@ -268,16 +273,18 @@ julia> df
    2 │     4  false  false  missing     12
 ```
 """
-function subset!(df::AbstractDataFrame, @nospecialize(args...); skipmissing::Bool=false)
-    row_selector = _get_subset_conditions(df, args, skipmissing)
+function subset!(df::AbstractDataFrame, args...; skipmissing::Bool=false)
+    row_selector = _get_subset_conditions(df, Ref{Any}(args), skipmissing)
     return delete!(df, findall(!, row_selector))
 end
 
-function subset!(gdf::GroupedDataFrame, @nospecialize(args...); skipmissing::Bool=false,
+function subset!(gdf::GroupedDataFrame, args...; skipmissing::Bool=false,
                  ungroup::Bool=true)
-    row_selector = _get_subset_conditions(gdf, args, skipmissing)
+    row_selector = _get_subset_conditions(gdf, Ref{Any}(args), skipmissing)
     df = parent(gdf)
     res = delete!(df, findall(!, row_selector))
     # TODO: in some cases it might be faster to groupby gdf.groups[row_selector]
     return ungroup ? res : groupby(res, groupcols(gdf))
 end
+
+@specialize
diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl
index 3d123b8772..996c82cd0d 100644
--- a/src/groupeddataframe/splitapplycombine.jl
+++ b/src/groupeddataframe/splitapplycombine.jl
@@ -684,6 +684,8 @@ function _combine(gd::GroupedDataFrame,
     return idx, DataFrame(outcols, nms, copycols=false)
 end
 
+@nospecialize
+
 function combine(f::Base.Callable, gd::GroupedDataFrame;
                  keepkeys::Bool=true, ungroup::Bool=true, renamecols::Bool=true)
     if f isa Colon
@@ -768,3 +770,5 @@ function transform!(gd::GroupedDataFrame{DataFrame},
     _replace_columns!(df, newdf)
     return ungroup ? df : gd
 end
+
+@specialize

From a286ed8c9cbed82cbeb631ecb04bd7b3c47f6023 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Wed, 31 Mar 2021 01:18:18 +0200
Subject: [PATCH 09/22] update data frame selection

---
 src/abstractdataframe/selection.jl | 89 +++++++++++++++++-------------
 1 file changed, 50 insertions(+), 39 deletions(-)

diff --git a/src/abstractdataframe/selection.jl b/src/abstractdataframe/selection.jl
index cd0b1934be..c0a07f6532 100644
--- a/src/abstractdataframe/selection.jl
+++ b/src/abstractdataframe/selection.jl
@@ -174,6 +174,8 @@ end
 # add a method to funname defined in other/utils.jl
 funname(row::ByRow) = funname(row.fun)
 
+@nospecialize
+
 normalize_selection(idx::AbstractIndex, sel, renamecols::Bool) =
     try
         idx[sel]
@@ -326,12 +328,15 @@ function normalize_selection(idx::AbstractIndex,
     return (wanttable ? AsTable(c) : c) => fun => newcol
 end
 
-_transformation_helper(df::AbstractDataFrame, col_idx::Nothing, fun) = fun(df)
-_transformation_helper(df::AbstractDataFrame, col_idx::Int, fun) = fun(df[!, col_idx])
+_transformation_helper(df::AbstractDataFrame, col_idx::Nothing, wfun::Ref{Any}) =
+    only(wfun)(df)
+_transformation_helper(df::AbstractDataFrame, col_idx::Int, wfun::Ref{Any}) =
+    only(wfun)(df[!, col_idx])
 
 _empty_astable_helper(fun, len) = [fun(NamedTuple()) for _ in 1:len]
 
-function _transformation_helper(df::AbstractDataFrame, col_idx::AsTable, fun)
+function _transformation_helper(df::AbstractDataFrame, col_idx::AsTable, wfun::Ref{Any})
+    fun = only(wfun)
     tbl = Tables.columntable(select(df, col_idx.cols, copycols=false))
     if isempty(tbl) && fun isa ByRow
         return _empty_astable_helper(fun.fun, nrow(df))
@@ -342,7 +347,8 @@ end
 
 _empty_selector_helper(fun, len) = [fun() for _ in 1:len]
 
-function _transformation_helper(df::AbstractDataFrame, col_idx::AbstractVector{Int}, fun)
+function _transformation_helper(df::AbstractDataFrame, col_idx::AbstractVector{Int}, wfun::Ref{Any})
+    fun = only(wfun)
     if isempty(col_idx) && fun isa ByRow
         return _empty_selector_helper(fun.fun, nrow(df))
     else
@@ -351,8 +357,8 @@ function _transformation_helper(df::AbstractDataFrame, col_idx::AbstractVector{I
     end
 end
 
-function _gen_colnames(@nospecialize(res), newname::Union{AbstractVector{Symbol},
-                                                          Type{AsTable}, Nothing})
+function _gen_colnames(res, newname::Union{AbstractVector{Symbol},
+                                           Type{AsTable}, Nothing})
     if res isa AbstractMatrix
         colnames = gennames(size(res, 2))
     else
@@ -412,7 +418,8 @@ end
 
 function _fix_existing_columns_for_vector(newdf::DataFrame, df::AbstractDataFrame,
                                           allow_resizing_newdf::Ref{Bool}, lr::Int,
-                                          @nospecialize(fun))
+                                          wfun::Ref{Any})
+    fun = only(wfun)
     # allow shortening to 0 rows
     if allow_resizing_newdf[] && nrow(newdf) == 1
         newdfcols = _columns(newdf)
@@ -434,8 +441,9 @@ end
 
 function _add_col_check_copy(newdf::DataFrame, df::AbstractDataFrame,
                              col_idx::Union{Nothing, Int, AbstractVector{Int}, AsTable},
-                             copycols::Bool, @nospecialize(fun),
+                             copycols::Bool, wfun::Ref{Any},
                              newname::Symbol, v::AbstractVector)
+    fun = only(wfun)
     cdf = eachcol(df)
     vpar = parent(v)
     parent_cols = col_idx isa AsTable ? col_idx.cols : something(col_idx, 1:ncol(df))
@@ -448,24 +456,24 @@ end
 
 function _add_multicol_res(res::AbstractDataFrame, newdf::DataFrame, df::AbstractDataFrame,
                            colnames::AbstractVector{Symbol},
-                           allow_resizing_newdf::Ref{Bool}, @nospecialize(fun),
+                           allow_resizing_newdf::Ref{Bool}, wfun::Ref{Any},
                            col_idx::Union{Nothing, Int, AbstractVector{Int}, AsTable},
                            copycols::Bool, newname::Union{Nothing, Type{AsTable}, AbstractVector{Symbol}})
     lr = nrow(res)
-    _fix_existing_columns_for_vector(newdf, df, allow_resizing_newdf, lr, fun)
+    _fix_existing_columns_for_vector(newdf, df, allow_resizing_newdf, lr, wfun)
     @assert length(colnames) == ncol(res)
     for (newname, v) in zip(colnames, eachcol(res))
-        _add_col_check_copy(newdf, df, col_idx, copycols, fun, newname, v)
+        _add_col_check_copy(newdf, df, col_idx, copycols, wfun, newname, v)
     end
 end
 
 function _add_multicol_res(res::AbstractMatrix, newdf::DataFrame, df::AbstractDataFrame,
                            colnames::AbstractVector{Symbol},
-                           allow_resizing_newdf::Ref{Bool}, @nospecialize(fun),
+                           allow_resizing_newdf::Ref{Bool}, wfun::Ref{Any},
                            col_idx::Union{Nothing, Int, AbstractVector{Int}, AsTable},
                            copycols::Bool, newname::Union{Nothing, Type{AsTable}, AbstractVector{Symbol}})
     lr = size(res, 1)
-    _fix_existing_columns_for_vector(newdf, df, allow_resizing_newdf, lr, fun)
+    _fix_existing_columns_for_vector(newdf, df, allow_resizing_newdf, lr, wfun)
     @assert length(colnames) == size(res, 2)
     for (i, newname) in enumerate(colnames)
         newdf[!, newname] = res[:, i]
@@ -475,20 +483,20 @@ end
 function _add_multicol_res(res::NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}},
                            newdf::DataFrame, df::AbstractDataFrame,
                            colnames::AbstractVector{Symbol},
-                           allow_resizing_newdf::Ref{Bool}, @nospecialize(fun),
+                           allow_resizing_newdf::Ref{Bool}, wfun::Ref{Any},
                            col_idx::Union{Nothing, Int, AbstractVector{Int}, AsTable},
                            copycols::Bool, newname::Union{Nothing, Type{AsTable}, AbstractVector{Symbol}})
     lr = length(res[1])
-    _fix_existing_columns_for_vector(newdf, df, allow_resizing_newdf, lr, fun)
+    _fix_existing_columns_for_vector(newdf, df, allow_resizing_newdf, lr, wfun)
     @assert length(colnames) == length(res)
     for (newname, v) in zip(colnames, res)
-        _add_col_check_copy(newdf, df, col_idx, copycols, fun, newname, v)
+        _add_col_check_copy(newdf, df, col_idx, copycols, wfun, newname, v)
     end
 end
 
 function _add_multicol_res(res::NamedTuple, newdf::DataFrame, df::AbstractDataFrame,
                            colnames::AbstractVector{Symbol},
-                           allow_resizing_newdf::Ref{Bool}, @nospecialize(fun),
+                           allow_resizing_newdf::Ref{Bool}, wfun::Ref{Any},
                            col_idx::Union{Nothing, Int, AbstractVector{Int}, AsTable},
                            copycols::Bool, newname::Union{Nothing, Type{AsTable}, AbstractVector{Symbol}})
     if any(v -> v isa AbstractVector, res)
@@ -500,32 +508,33 @@ end
 
 function _add_multicol_res(res::DataFrameRow, newdf::DataFrame, df::AbstractDataFrame,
                            colnames::AbstractVector{Symbol},
-                           allow_resizing_newdf::Ref{Bool}, @nospecialize(fun),
+                           allow_resizing_newdf::Ref{Bool}, wfun::Ref{Any},
                            col_idx::Union{Nothing, Int, AbstractVector{Int}, AsTable},
                            copycols::Bool, newname::Union{Nothing, Type{AsTable}, AbstractVector{Symbol}})
     _insert_row_multicolumn(newdf, df, allow_resizing_newdf, colnames, res)
 end
 
-function select_transform!(@nospecialize(nc::Union{Base.Callable, Pair{<:Union{Int, AbstractVector{Int}, AsTable},
-                                                                       <:Pair{<:Base.Callable,
-                                                                              <:Union{Symbol,
-                                                                                      AbstractVector{Symbol},
-                                                                                      DataType}}}}),
-                           df::AbstractDataFrame, newdf::DataFrame,
+function select_transform!(wnc::Ref{Any}, df::AbstractDataFrame, newdf::DataFrame,
                            transformed_cols::Set{Symbol}, copycols::Bool,
                            allow_resizing_newdf::Ref{Bool})
+    nc = only(wnc)
+    @assert nc isa Union{Base.Callable,
+                         Pair{<:Union{Int, AbstractVector{Int}, AsTable},
+                              <:Pair{<:Base.Callable, <:Union{Symbol, AbstractVector{Symbol}, DataType}}}}
     if nc isa Base.Callable
         col_idx, fun, newname = nothing, nc, nothing
     else
         col_idx, (fun, newname) = nc
     end
+    wfun = Ref{Any}(fun)
+
     if newname isa DataType
         newname === AsTable || throw(ArgumentError("Only DataType supported as target is AsTable"))
     end
     # It is allowed to request a tranformation operation into a newname column
     # only once. This is ensured by the logic related to transformed_cols dictionaly
     # in _manipulate, therefore in select_transform! such a duplicate should not happen
-    res = _transformation_helper(df, col_idx, fun)
+    res = _transformation_helper(df, col_idx, Ref{Any}(fun))
 
     if newname === AsTable || newname isa AbstractVector{Symbol}
         res = _expand_to_table(res)
@@ -546,7 +555,7 @@ function select_transform!(@nospecialize(nc::Union{Base.Callable, Pair{<:Union{I
             union!(transformed_cols, colnames)
             @assert startlen + length(colnames) == length(transformed_cols)
         end
-        _add_multicol_res(res, newdf, df, colnames, allow_resizing_newdf, fun,
+        _add_multicol_res(res, newdf, df, colnames, allow_resizing_newdf, wfun,
                           col_idx, copycols, newname)
     elseif res isa AbstractVector
         if newname === nothing
@@ -558,8 +567,8 @@ function select_transform!(@nospecialize(nc::Union{Base.Callable, Pair{<:Union{I
             push!(transformed_cols, newname)
         end
         lr = length(res)
-        _fix_existing_columns_for_vector(newdf, df, allow_resizing_newdf, lr, fun)
-        _add_col_check_copy(newdf, df, col_idx, copycols, fun, newname, res)
+        _fix_existing_columns_for_vector(newdf, df, allow_resizing_newdf, lr, wfun)
+        _add_col_check_copy(newdf, df, col_idx, copycols, wfun, newname, res)
     else
         if newname === nothing
             newname = :x1
@@ -609,7 +618,7 @@ See [`select`](@ref) for examples.
 ```
 
 """
-select!(df::DataFrame, @nospecialize(args...); renamecols::Bool=true) =
+select!(df::DataFrame, args...; renamecols::Bool=true) =
     _replace_columns!(df, select(df, args..., copycols=false, renamecols=renamecols))
 
 function select!(arg::Base.Callable, df::AbstractDataFrame; renamecols::Bool=true)
@@ -639,7 +648,7 @@ $TRANSFORMATION_COMMON_RULES
 
 See [`select`](@ref) for examples.
 """
-transform!(df::DataFrame, @nospecialize(args...); renamecols::Bool=true) =
+transform!(df::DataFrame, args...; renamecols::Bool=true) =
     select!(df, :, args..., renamecols=renamecols)
 
 function transform!(arg::Base.Callable, df::AbstractDataFrame; renamecols::Bool=true)
@@ -853,7 +862,7 @@ julia> select(gd, :, AsTable(Not(:a)) => sum, renamecols=false)
 ```
 
 """
-select(df::AbstractDataFrame, @nospecialize(args...); copycols::Bool=true, renamecols::Bool=true) =
+select(df::AbstractDataFrame, args...; copycols::Bool=true, renamecols::Bool=true) =
     manipulate(df, args..., copycols=copycols, keeprows=true, renamecols=renamecols)
 
 function select(arg::Base.Callable, df::AbstractDataFrame; renamecols::Bool=true)
@@ -919,7 +928,7 @@ ERROR: ArgumentError: column :x in returned data frame is not equal to grouping
 
 See [`select`](@ref) for more examples.
 """
-transform(df::AbstractDataFrame, @nospecialize(args...); copycols::Bool=true, renamecols::Bool=true) =
+transform(df::AbstractDataFrame, args...; copycols::Bool=true, renamecols::Bool=true) =
     select(df, :, args..., copycols=copycols, renamecols=renamecols)
 
 function transform(arg::Base.Callable, df::AbstractDataFrame; renamecols::Bool=true)
@@ -1172,7 +1181,7 @@ julia> combine(gd, :, AsTable(Not(:a)) => sum, renamecols=false)
    8 │     4      1      8      9
 ```
 """
-combine(df::AbstractDataFrame, @nospecialize(args...); renamecols::Bool=true) =
+combine(df::AbstractDataFrame, args...; renamecols::Bool=true) =
     manipulate(df, args..., copycols=true, keeprows=false, renamecols=renamecols)
 
 function combine(arg::Base.Callable, df::AbstractDataFrame; renamecols::Bool=true)
@@ -1206,7 +1215,7 @@ manipulate(df::DataFrame, c::ColumnIndex; copycols::Bool, keeprows::Bool,
            renamecols::Bool) =
     manipulate(df, [c], copycols=copycols, keeprows=keeprows, renamecols=renamecols)
 
-function manipulate(df::DataFrame, @nospecialize(cs...); copycols::Bool, keeprows::Bool, renamecols::Bool)
+function manipulate(df::DataFrame, cs...; copycols::Bool, keeprows::Bool, renamecols::Bool)
     cs_vec = []
     for v in cs
         if v isa AbstractVecOrMat{<:Pair}
@@ -1215,11 +1224,11 @@ function manipulate(df::DataFrame, @nospecialize(cs...); copycols::Bool, keeprow
             push!(cs_vec, v)
         end
     end
-    return _manipulate(df, [normalize_selection(index(df), c, renamecols) for c in cs_vec],
+    return _manipulate(df, Any[normalize_selection(index(df), c, renamecols) for c in cs_vec],
                     copycols, keeprows)
 end
 
-function _manipulate(df::AbstractDataFrame, @nospecialize(normalized_cs), copycols::Bool, keeprows::Bool)
+function _manipulate(df::AbstractDataFrame, normalized_cs::Vector{Any}, copycols::Bool, keeprows::Bool)
     @assert !(df isa SubDataFrame && copycols==false)
     newdf = DataFrame()
     # the role of transformed_cols is the following
@@ -1287,7 +1296,7 @@ function _manipulate(df::AbstractDataFrame, @nospecialize(normalized_cs), copyco
                 end
             end
         else
-            select_transform!(nc, df, newdf, transformed_cols, copycols,
+            select_transform!(Ref{Any}(nc), df, newdf, transformed_cols, copycols,
                               allow_resizing_newdf)
         end
     end
@@ -1308,7 +1317,7 @@ function manipulate(dfv::SubDataFrame, args::MultiColumnIndex;
     end
 end
 
-function manipulate(dfv::SubDataFrame, @nospecialize(args...); copycols::Bool, keeprows::Bool,
+function manipulate(dfv::SubDataFrame, args...; copycols::Bool, keeprows::Bool,
                     renamecols::Bool)
     if copycols
         cs_vec = []
@@ -1319,7 +1328,7 @@ function manipulate(dfv::SubDataFrame, @nospecialize(args...); copycols::Bool, k
                 push!(cs_vec, v)
             end
         end
-        return _manipulate(dfv, [normalize_selection(index(dfv), c, renamecols) for c in cs_vec],
+        return _manipulate(dfv, Any[normalize_selection(index(dfv), c, renamecols) for c in cs_vec],
                            true, keeprows)
     else
         # we do not support transformations here
@@ -1348,3 +1357,5 @@ function manipulate(dfv::SubDataFrame, @nospecialize(args...); copycols::Bool, k
         return view(dfv, :, Cols(newinds...))
     end
 end
+
+@specialize

From 3fe9afbc702c412daba71e181cfb20a4472a6e58 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Wed, 31 Mar 2021 14:38:56 +0200
Subject: [PATCH 10/22] keep simple selections fast

---
 src/abstractdataframe/selection.jl | 66 +++++++++++++++---------------
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/src/abstractdataframe/selection.jl b/src/abstractdataframe/selection.jl
index c0a07f6532..eac61e38c1 100644
--- a/src/abstractdataframe/selection.jl
+++ b/src/abstractdataframe/selection.jl
@@ -1196,25 +1196,6 @@ combine(f::Pair, gd::AbstractDataFrame; renamecols::Bool=true) =
                         "You can pass a `Pair` as the second argument of the transformation. If you want the return " *
                         "value to be processed as having multiple columns add `=> AsTable` suffix to the pair."))
 
-manipulate(df::DataFrame, args::AbstractVector{Int}; copycols::Bool, keeprows::Bool,
-           renamecols::Bool) =
-    DataFrame(_columns(df)[args], Index(_names(df)[args]), copycols=copycols)
-
-function manipulate(df::DataFrame, c::MultiColumnIndex; copycols::Bool, keeprows::Bool,
-                    renamecols::Bool)
-    if c isa AbstractVector{<:Pair}
-        return manipulate(df, c..., copycols=copycols, keeprows=keeprows,
-                          renamecols=renamecols)
-    else
-        return manipulate(df, index(df)[c], copycols=copycols, keeprows=keeprows,
-                          renamecols=renamecols)
-    end
-end
-
-manipulate(df::DataFrame, c::ColumnIndex; copycols::Bool, keeprows::Bool,
-           renamecols::Bool) =
-    manipulate(df, [c], copycols=copycols, keeprows=keeprows, renamecols=renamecols)
-
 function manipulate(df::DataFrame, cs...; copycols::Bool, keeprows::Bool, renamecols::Bool)
     cs_vec = []
     for v in cs
@@ -1303,20 +1284,6 @@ function _manipulate(df::AbstractDataFrame, normalized_cs::Vector{Any}, copycols
     return newdf
 end
 
-manipulate(dfv::SubDataFrame, ind::ColumnIndex; copycols::Bool, keeprows::Bool,
-           renamecols::Bool) =
-    manipulate(dfv, [ind], copycols=copycols, keeprows=keeprows, renamecols=renamecols)
-
-function manipulate(dfv::SubDataFrame, args::MultiColumnIndex;
-                 copycols::Bool, keeprows::Bool, renamecols::Bool)
-    if args isa AbstractVector{<:Pair}
-        return manipulate(dfv, args..., copycols=copycols, keeprows=keeprows,
-                          renamecols=renamecols)
-    else
-        return copycols ? dfv[:, args] : view(dfv, :, args)
-    end
-end
-
 function manipulate(dfv::SubDataFrame, args...; copycols::Bool, keeprows::Bool,
                     renamecols::Bool)
     if copycols
@@ -1359,3 +1326,36 @@ function manipulate(dfv::SubDataFrame, args...; copycols::Bool, keeprows::Bool,
 end
 
 @specialize
+
+manipulate(df::DataFrame, args::AbstractVector{Int}; copycols::Bool, keeprows::Bool,
+           renamecols::Bool) =
+    DataFrame(_columns(df)[args], Index(_names(df)[args]), copycols=copycols)
+
+function manipulate(df::DataFrame, c::MultiColumnIndex; copycols::Bool, keeprows::Bool,
+                    renamecols::Bool)
+    if c isa AbstractVector{<:Pair}
+        return manipulate(df, c..., copycols=copycols, keeprows=keeprows,
+                          renamecols=renamecols)
+    else
+        return manipulate(df, index(df)[c], copycols=copycols, keeprows=keeprows,
+                          renamecols=renamecols)
+    end
+end
+
+function manipulate(dfv::SubDataFrame, args::MultiColumnIndex;
+                 copycols::Bool, keeprows::Bool, renamecols::Bool)
+    if args isa AbstractVector{<:Pair}
+        return manipulate(dfv, args..., copycols=copycols, keeprows=keeprows,
+                          renamecols=renamecols)
+    else
+        return copycols ? dfv[:, args] : view(dfv, :, args)
+    end
+end
+
+manipulate(df::DataFrame, c::ColumnIndex; copycols::Bool, keeprows::Bool,
+           renamecols::Bool) =
+    manipulate(df, Int[index(df)[c]], copycols=copycols, keeprows=keeprows, renamecols=renamecols)
+
+manipulate(dfv::SubDataFrame, c::ColumnIndex; copycols::Bool, keeprows::Bool,
+           renamecols::Bool) =
+    manipulate(dfv, Int[index(df)[c]], copycols=copycols, keeprows=keeprows, renamecols=renamecols)

From 83dfae5a7aff295d4959d56eef4590ab0e606ab6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Wed, 31 Mar 2021 21:56:20 +0200
Subject: [PATCH 11/22] fix copy-paste error

---
 src/abstractdataframe/selection.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/abstractdataframe/selection.jl b/src/abstractdataframe/selection.jl
index eac61e38c1..9573dfa56c 100644
--- a/src/abstractdataframe/selection.jl
+++ b/src/abstractdataframe/selection.jl
@@ -1358,4 +1358,4 @@ manipulate(df::DataFrame, c::ColumnIndex; copycols::Bool, keeprows::Bool,
 
 manipulate(dfv::SubDataFrame, c::ColumnIndex; copycols::Bool, keeprows::Bool,
            renamecols::Bool) =
-    manipulate(dfv, Int[index(df)[c]], copycols=copycols, keeprows=keeprows, renamecols=renamecols)
+    manipulate(dfv, Int[index(dfv)[c]], copycols=copycols, keeprows=keeprows, renamecols=renamecols)

From fac78ffa04182916156ad521776ea4defa24b72c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Thu, 1 Apr 2021 00:46:59 +0200
Subject: [PATCH 12/22] do not use broadcasting in ByRow

---
 src/abstractdataframe/selection.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/abstractdataframe/selection.jl b/src/abstractdataframe/selection.jl
index 9573dfa56c..5f2de5e088 100644
--- a/src/abstractdataframe/selection.jl
+++ b/src/abstractdataframe/selection.jl
@@ -168,8 +168,8 @@ struct ByRow{T} <: Function
     fun::T
 end
 
-(f::ByRow)(cols::AbstractVector...) = f.fun.(cols...)
-(f::ByRow)(table::NamedTuple) = f.fun.(Tables.namedtupleiterator(table))
+(f::ByRow)(cols::AbstractVector...) = map(f.fun, cols...)
+(f::ByRow)(table::NamedTuple) = [f.fun(nt) for nt in Tables.namedtupleiterator(table)]
 
 # add a method to funname defined in other/utils.jl
 funname(row::ByRow) = funname(row.fun)

From 937ad7e68b3b2fd8aee9db173a1466b2af4157b1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Thu, 1 Apr 2021 15:30:55 +0200
Subject: [PATCH 13/22] nospecialize selectively

---
 src/abstractdataframe/selection.jl        | 60 +++++++++++------------
 src/abstractdataframe/subset.jl           | 12 ++---
 src/groupeddataframe/complextransforms.jl | 19 ++++---
 src/groupeddataframe/splitapplycombine.jl | 57 +++++++++++----------
 4 files changed, 72 insertions(+), 76 deletions(-)

diff --git a/src/abstractdataframe/selection.jl b/src/abstractdataframe/selection.jl
index 5f2de5e088..0cf19c2791 100644
--- a/src/abstractdataframe/selection.jl
+++ b/src/abstractdataframe/selection.jl
@@ -174,9 +174,7 @@ end
 # add a method to funname defined in other/utils.jl
 funname(row::ByRow) = funname(row.fun)
 
-@nospecialize
-
-normalize_selection(idx::AbstractIndex, sel, renamecols::Bool) =
+normalize_selection(idx::AbstractIndex, @nospecialize(sel), renamecols::Bool) =
     try
         idx[sel]
     catch e
@@ -187,7 +185,7 @@ normalize_selection(idx::AbstractIndex, sel, renamecols::Bool) =
         end
     end
 
-normalize_selection(idx::AbstractIndex, sel::Base.Callable, renamecols::Bool) = sel
+normalize_selection(idx::AbstractIndex, @nospecialize(sel::Base.Callable), renamecols::Bool) = sel
 normalize_selection(idx::AbstractIndex, sel::Colon, renamecols::Bool) = idx[:]
 
 normalize_selection(idx::AbstractIndex, sel::Pair{typeof(nrow), Symbol},
@@ -215,20 +213,20 @@ normalize_selection(idx::AbstractIndex, sel::Pair{<:ColumnIndex, <:AbstractStrin
     normalize_selection(idx, first(sel) => Symbol(last(sel)), renamecols::Bool)
 
 function normalize_selection(idx::AbstractIndex,
-                             sel::Pair{<:ColumnIndex,
-                                       <:Pair{<:Base.Callable,
-                                              <:Union{Symbol, AbstractString}}},
+                             @nospecialize(sel::Pair{<:ColumnIndex,
+                                                     <:Pair{<:Base.Callable,
+                                                            <:Union{Symbol, AbstractString}}}),
                              renamecols::Bool)
     src, (fun, dst) = sel
     return idx[src] => fun => Symbol(dst)
 end
 
 function normalize_selection(idx::AbstractIndex,
-                             sel::Pair{<:Any,
-                                       <:Pair{<:Base.Callable,
-                                              <:Union{Symbol, AbstractString, DataType,
-                                                      AbstractVector{Symbol},
-                                                      AbstractVector{<:AbstractString}}}},
+                             @nospecialize(sel::Pair{<:Any,
+                                                     <:Pair{<:Base.Callable,
+                                                            <:Union{Symbol, AbstractString, DataType,
+                                                                    AbstractVector{Symbol},
+                                                                    AbstractVector{<:AbstractString}}}}),
                              renamecols::Bool)
     lls = last(last(sel))
     if lls isa DataType
@@ -270,7 +268,7 @@ function normalize_selection(idx::AbstractIndex,
 end
 
 function normalize_selection(idx::AbstractIndex,
-                             sel::Pair{<:ColumnIndex, <:Base.Callable}, renamecols::Bool)
+                             @nospecialize(sel::Pair{<:ColumnIndex, <:Base.Callable}), renamecols::Bool)
     c = idx[first(sel)]
     fun = last(sel)
     if renamecols
@@ -282,7 +280,7 @@ function normalize_selection(idx::AbstractIndex,
 end
 
 function normalize_selection(idx::AbstractIndex,
-                             sel::Pair{<:Any, <:Base.Callable}, renamecols::Bool)
+                             @nospecialize(sel::Pair{<:Any, <:Base.Callable}), renamecols::Bool)
     if first(sel) isa AsTable
         rawc = first(sel).cols
         wanttable = true
@@ -401,7 +399,7 @@ end
 
 function _insert_row_multicolumn(newdf::DataFrame, df::AbstractDataFrame,
                                  allow_resizing_newdf::Ref{Bool}, colnames::AbstractVector{Symbol},
-                                 res::Union{NamedTuple, DataFrameRow})
+                                 @nospecialize(res::Union{NamedTuple, DataFrameRow}))
     if ncol(newdf) == 0
         # if allow_resizing_newdf[] is false we know this is select or transform
         rows = allow_resizing_newdf[] ? 1 : nrow(df)
@@ -480,7 +478,7 @@ function _add_multicol_res(res::AbstractMatrix, newdf::DataFrame, df::AbstractDa
     end
 end
 
-function _add_multicol_res(res::NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}},
+function _add_multicol_res(@nospecialize(res::NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}),
                            newdf::DataFrame, df::AbstractDataFrame,
                            colnames::AbstractVector{Symbol},
                            allow_resizing_newdf::Ref{Bool}, wfun::Ref{Any},
@@ -494,7 +492,7 @@ function _add_multicol_res(res::NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}
     end
 end
 
-function _add_multicol_res(res::NamedTuple, newdf::DataFrame, df::AbstractDataFrame,
+function _add_multicol_res(@nospecialize(res::NamedTuple), newdf::DataFrame, df::AbstractDataFrame,
                            colnames::AbstractVector{Symbol},
                            allow_resizing_newdf::Ref{Bool}, wfun::Ref{Any},
                            col_idx::Union{Nothing, Int, AbstractVector{Int}, AsTable},
@@ -618,10 +616,10 @@ See [`select`](@ref) for examples.
 ```
 
 """
-select!(df::DataFrame, args...; renamecols::Bool=true) =
+select!(df::DataFrame, @nospecialize(args...); renamecols::Bool=true) =
     _replace_columns!(df, select(df, args..., copycols=false, renamecols=renamecols))
 
-function select!(arg::Base.Callable, df::AbstractDataFrame; renamecols::Bool=true)
+function select!(@nospecialize(arg::Base.Callable), df::AbstractDataFrame; renamecols::Bool=true)
     if arg isa Colon
         throw(ArgumentError("First argument must be a transformation if the second argument is a data frame"))
     end
@@ -648,10 +646,10 @@ $TRANSFORMATION_COMMON_RULES
 
 See [`select`](@ref) for examples.
 """
-transform!(df::DataFrame, args...; renamecols::Bool=true) =
+transform!(df::DataFrame, @nospecialize(args...); renamecols::Bool=true) =
     select!(df, :, args..., renamecols=renamecols)
 
-function transform!(arg::Base.Callable, df::AbstractDataFrame; renamecols::Bool=true)
+function transform!(@nospecialize(arg::Base.Callable), df::AbstractDataFrame; renamecols::Bool=true)
     if arg isa Colon
         throw(ArgumentError("First argument must be a transformation if the second argument is a data frame"))
     end
@@ -862,10 +860,10 @@ julia> select(gd, :, AsTable(Not(:a)) => sum, renamecols=false)
 ```
 
 """
-select(df::AbstractDataFrame, args...; copycols::Bool=true, renamecols::Bool=true) =
+select(df::AbstractDataFrame, @nospecialize(args...); copycols::Bool=true, renamecols::Bool=true) =
     manipulate(df, args..., copycols=copycols, keeprows=true, renamecols=renamecols)
 
-function select(arg::Base.Callable, df::AbstractDataFrame; renamecols::Bool=true)
+function select(@nospecialize(arg::Base.Callable), df::AbstractDataFrame; renamecols::Bool=true)
     if arg isa Colon
         throw(ArgumentError("First argument must be a transformation if the second argument is a data frame"))
     end
@@ -928,10 +926,10 @@ ERROR: ArgumentError: column :x in returned data frame is not equal to grouping
 
 See [`select`](@ref) for more examples.
 """
-transform(df::AbstractDataFrame, args...; copycols::Bool=true, renamecols::Bool=true) =
+transform(df::AbstractDataFrame, @nospecialize(args...); copycols::Bool=true, renamecols::Bool=true) =
     select(df, :, args..., copycols=copycols, renamecols=renamecols)
 
-function transform(arg::Base.Callable, df::AbstractDataFrame; renamecols::Bool=true)
+function transform(@nospecialize(arg::Base.Callable), df::AbstractDataFrame; renamecols::Bool=true)
     if arg isa Colon
         throw(ArgumentError("First argument to must be a transformation if the second argument is a data frame"))
     end
@@ -1181,22 +1179,22 @@ julia> combine(gd, :, AsTable(Not(:a)) => sum, renamecols=false)
    8 │     4      1      8      9
 ```
 """
-combine(df::AbstractDataFrame, args...; renamecols::Bool=true) =
+combine(df::AbstractDataFrame, @nospecialize(args...); renamecols::Bool=true) =
     manipulate(df, args..., copycols=true, keeprows=false, renamecols=renamecols)
 
-function combine(arg::Base.Callable, df::AbstractDataFrame; renamecols::Bool=true)
+function combine(@nospecialize(arg::Base.Callable), df::AbstractDataFrame; renamecols::Bool=true)
     if arg isa Colon
         throw(ArgumentError("First argument to select! must be a transformation if the second argument is a data frame"))
     end
     return combine(df, arg)
 end
 
-combine(f::Pair, gd::AbstractDataFrame; renamecols::Bool=true) =
+combine(@nospecialize(f::Pair), gd::AbstractDataFrame; renamecols::Bool=true) =
     throw(ArgumentError("First argument must be a transformation if the second argument is a data frame. " *
                         "You can pass a `Pair` as the second argument of the transformation. If you want the return " *
                         "value to be processed as having multiple columns add `=> AsTable` suffix to the pair."))
 
-function manipulate(df::DataFrame, cs...; copycols::Bool, keeprows::Bool, renamecols::Bool)
+function manipulate(df::DataFrame, @nospecialize(cs...); copycols::Bool, keeprows::Bool, renamecols::Bool)
     cs_vec = []
     for v in cs
         if v isa AbstractVecOrMat{<:Pair}
@@ -1284,7 +1282,7 @@ function _manipulate(df::AbstractDataFrame, normalized_cs::Vector{Any}, copycols
     return newdf
 end
 
-function manipulate(dfv::SubDataFrame, args...; copycols::Bool, keeprows::Bool,
+function manipulate(dfv::SubDataFrame, @nospecialize(args...); copycols::Bool, keeprows::Bool,
                     renamecols::Bool)
     if copycols
         cs_vec = []
@@ -1325,8 +1323,6 @@ function manipulate(dfv::SubDataFrame, args...; copycols::Bool, keeprows::Bool,
     end
 end
 
-@specialize
-
 manipulate(df::DataFrame, args::AbstractVector{Int}; copycols::Bool, keeprows::Bool,
            renamecols::Bool) =
     DataFrame(_columns(df)[args], Index(_names(df)[args]), copycols=copycols)
diff --git a/src/abstractdataframe/subset.jl b/src/abstractdataframe/subset.jl
index 3025138824..5d15345c46 100644
--- a/src/abstractdataframe/subset.jl
+++ b/src/abstractdataframe/subset.jl
@@ -31,8 +31,6 @@ function _and_missing(x::Any...)
                         "but only true, false, or missing are allowed"))
 end
 
-@nospecialize
-
 # Note that _get_subset_conditions will have a large compilation time
 # if more than 32 conditions are passed as `args`.
 function _get_subset_conditions(df::Union{AbstractDataFrame, GroupedDataFrame},
@@ -158,13 +156,13 @@ julia> subset(groupby(df, :y), :v => x -> x .> minimum(x))
    2 │     4  false  false  missing     12
 ```
 """
-function subset(df::AbstractDataFrame, args...;
+function subset(df::AbstractDataFrame, @nospecialize(args...);
                 skipmissing::Bool=false, view::Bool=false)
     row_selector = _get_subset_conditions(df, Ref{Any}(args), skipmissing)
     return view ? Base.view(df, row_selector, :) : df[row_selector, :]
 end
 
-function subset(gdf::GroupedDataFrame, args...;
+function subset(gdf::GroupedDataFrame, @nospecialize(args...);
                 skipmissing::Bool=false, view::Bool=false,
                         ungroup::Bool=true)
     row_selector = _get_subset_conditions(gdf, Ref{Any}(args), skipmissing)
@@ -273,12 +271,12 @@ julia> df
    2 │     4  false  false  missing     12
 ```
 """
-function subset!(df::AbstractDataFrame, args...; skipmissing::Bool=false)
+function subset!(df::AbstractDataFrame, @nospecialize(args...); skipmissing::Bool=false)
     row_selector = _get_subset_conditions(df, Ref{Any}(args), skipmissing)
     return delete!(df, findall(!, row_selector))
 end
 
-function subset!(gdf::GroupedDataFrame, args...; skipmissing::Bool=false,
+function subset!(gdf::GroupedDataFrame, @nospecialize(args...); skipmissing::Bool=false,
                  ungroup::Bool=true)
     row_selector = _get_subset_conditions(gdf, Ref{Any}(args), skipmissing)
     df = parent(gdf)
@@ -286,5 +284,3 @@ function subset!(gdf::GroupedDataFrame, args...; skipmissing::Bool=false,
     # TODO: in some cases it might be faster to groupby gdf.groups[row_selector]
     return ungroup ? res : groupby(res, groupcols(gdf))
 end
-
-@specialize
diff --git a/src/groupeddataframe/complextransforms.jl b/src/groupeddataframe/complextransforms.jl
index 72b2928685..0c20dc9edb 100644
--- a/src/groupeddataframe/complextransforms.jl
+++ b/src/groupeddataframe/complextransforms.jl
@@ -4,8 +4,11 @@ _nrow(x::NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}) =
 _ncol(df::AbstractDataFrame) = ncol(df)
 _ncol(x::Union{NamedTuple, DataFrameRow}) = length(x)
 
-function _combine_multicol(firstres, fun::Base.Callable, gd::GroupedDataFrame,
-                           incols::Union{Nothing, AbstractVector, Tuple, NamedTuple})
+function _combine_multicol(wfirstres::Ref{Any}, wfun::Ref{Any}, gd::GroupedDataFrame,
+                           wincols::Ref{Any})
+    firstres = only(wfirstres)
+    @assert only(wfun) isa Base.Callable
+    @assert only(wincols) isa Union{Nothing, AbstractVector, Tuple, NamedTuple}
     firstmulticol = firstres isa MULTI_COLS_TYPE
     if !(firstres isa Union{AbstractVecOrMat, AbstractDataFrame,
                             NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}})
@@ -14,17 +17,19 @@ function _combine_multicol(firstres, fun::Base.Callable, gd::GroupedDataFrame,
     else
         idx_agg = NOTHING_IDX_AGG
     end
-    return _combine_with_first(Ref{Any}(wrap(firstres)), Ref{Any}(fun), gd, incols,
+    return _combine_with_first(Ref{Any}(wrap(firstres)), wfun, gd, wincols,
                                Val(firstmulticol), idx_agg)
 end
 
-function _combine_with_first(first::Ref{Any},
+function _combine_with_first(wfirst::Ref{Any},
                              f::Ref{Any}, gd::GroupedDataFrame,
-                             incols::Union{Nothing, AbstractVector, Tuple, NamedTuple},
+                             wincols::Ref{Any},
                              firstmulticol::Val, idx_agg::Vector{Int})
-    @assert only(first) isa Union{NamedTuple, DataFrameRow, AbstractDataFrame}
     @assert only(f) isa Base.Callable
-    first = only(first)
+    incols = only(wincols)
+    @assert incols isa Union{Nothing, AbstractVector, Tuple, NamedTuple}
+    first = only(wfirst)
+    @assert first isa Union{NamedTuple, DataFrameRow, AbstractDataFrame}
     extrude = false
 
     if first isa AbstractDataFrame
diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl
index 996c82cd0d..d56e1e07d8 100644
--- a/src/groupeddataframe/splitapplycombine.jl
+++ b/src/groupeddataframe/splitapplycombine.jl
@@ -284,7 +284,7 @@ function _combine_process_callable(wcs_i::Ref{Any},
     cs_i = only(wcs_i)
     @assert cs_i isa Base.Callable
     firstres = length(gd) > 0 ? cs_i(gd[1]) : cs_i(similar(parentdf, 0))
-    idx, outcols, nms = _combine_multicol(firstres, cs_i, gd, nothing)
+    idx, outcols, nms = _combine_multicol(Ref{Any}(firstres), wcs_i, gd, Ref{Any}(nothing))
 
     if !(firstres isa Union{AbstractVecOrMat, AbstractDataFrame,
                             NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}})
@@ -331,11 +331,12 @@ function _combine_process_pair_symbol(optional_i::Bool,
                                       idx_agg::Ref{Vector{Int}},
                                       out_col_name::Symbol,
                                       firstmulticol::Bool,
-                                      firstres::Any,
+                                      wfirstres::Ref{Any},
                                       wfun::Ref{Any},
-                                      incols::Union{Tuple, NamedTuple})
-    fun = only(wfun)
-    @assert fun isa Base.Callable
+                                      wincols::Ref{Any})
+    firstres = only(wfirstres)
+    @assert only(wfun) isa Base.Callable
+    @assert only(wincols) isa Union{Tuple, NamedTuple}
 
     if firstmulticol
         throw(ArgumentError("a single value or vector result is required (got $(typeof(firstres)))"))
@@ -355,7 +356,7 @@ function _combine_process_pair_symbol(optional_i::Bool,
     # the last argument passed to _combine_with_first informs it about precomputed
     # idx. Currently we do it only for single-row return values otherwise we pass
     # NOTHING_IDX_AGG to signal that idx has to be computed in _combine_with_first
-    idx, outcols, _ = _combine_with_first(Ref{Any}(wrap(firstres)), Ref{Any}(fun), gd, incols,
+    idx, outcols, _ = _combine_with_first(Ref{Any}(wrap(firstres)), wfun, gd, wincols,
                                           Val(firstmulticol),
                                           firstres isa AbstractVector ? NOTHING_IDX_AGG : idx_agg[])
     @assert length(outcols) == 1
@@ -387,13 +388,15 @@ function _combine_process_pair_astable(optional_i::Bool,
                                        idx_agg::Ref{Vector{Int}},
                                        out_col_name::Union{Type{AsTable}, AbstractVector{Symbol}},
                                        firstmulticol::Bool,
-                                       firstres::Any,
+                                       wfirstres::Ref{Any},
                                        wfun::Ref{Any},
-                                       incols::Union{Tuple, NamedTuple})
+                                       wincols::Ref{Any})
+    firstres = only(wfirstres)
     fun = only(wfun)
     @assert fun isa Base.Callable
+    @assert only(wincols) isa Union{Tuple, NamedTuple}
     if firstres isa AbstractVector
-        idx, outcol_vec, _ = _combine_with_first(Ref{Any}(wrap(firstres)), Ref{Any}(fun), gd, incols,
+        idx, outcol_vec, _ = _combine_with_first(Ref{Any}(wrap(firstres)), wfun, gd, wincols,
                                               Val(firstmulticol), NOTHING_IDX_AGG)
         @assert length(outcol_vec) == 1
         res = outcol_vec[1]
@@ -416,7 +419,7 @@ function _combine_process_pair_astable(optional_i::Bool,
             oldfun = fun
             fun = (x...) -> Tables.columntable(oldfun(x...))
         end
-        idx, outcols, nms = _combine_multicol(firstres, fun, gd, incols)
+        idx, outcols, nms = _combine_multicol(Ref{Any}(firstres), Ref{Any}(fun), gd, wincols)
 
         if !(firstres isa Union{AbstractVecOrMat, AbstractDataFrame,
             NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}})
@@ -497,13 +500,13 @@ function _combine_process_pair(wcs_i::Ref{Any},
 
     if out_col_name isa Symbol
         return _combine_process_pair_symbol(optional_i, gd, seen_cols, trans_res, idx_agg,
-                                            out_col_name, firstmulticol, firstres,
-                                            Ref{Any}(fun), incols)
+                                            out_col_name, firstmulticol, Ref{Any}(firstres),
+                                            Ref{Any}(fun), Ref{Any}(incols))
     end
     if out_col_name == AsTable || out_col_name isa AbstractVector{Symbol}
         return _combine_process_pair_astable(optional_i, gd, seen_cols, trans_res, idx_agg,
-                                             out_col_name, firstmulticol, firstres,
-                                             Ref{Any}(fun), incols)
+                                             out_col_name, firstmulticol, Ref{Any}(firstres),
+                                             Ref{Any}(fun), Ref{Any}(incols))
     end
     throw(ArgumentError("unsupported target column name specifier $out_col_name"))
 end
@@ -684,9 +687,7 @@ function _combine(gd::GroupedDataFrame,
     return idx, DataFrame(outcols, nms, copycols=false)
 end
 
-@nospecialize
-
-function combine(f::Base.Callable, gd::GroupedDataFrame;
+function combine(@nospecialize(f::Base.Callable), gd::GroupedDataFrame;
                  keepkeys::Bool=true, ungroup::Bool=true, renamecols::Bool=true)
     if f isa Colon
         throw(ArgumentError("First argument must be a transformation if the second argument is a GroupedDataFrame"))
@@ -694,19 +695,19 @@ function combine(f::Base.Callable, gd::GroupedDataFrame;
     return combine(gd, f, keepkeys=keepkeys, ungroup=ungroup, renamecols=renamecols)
 end
 
-combine(f::Pair, gd::GroupedDataFrame;
+combine(@nospecialize(f::Pair), gd::GroupedDataFrame;
         keepkeys::Bool=true, ungroup::Bool=true, renamecols::Bool=true) =
     throw(ArgumentError("First argument must be a transformation if the second argument is a GroupedDataFrame. " *
                         "You can pass a `Pair` as the second argument of the transformation. If you want the return " *
                         "value to be processed as having multiple columns add `=> AsTable` suffix to the pair."))
 
 combine(gd::GroupedDataFrame,
-        cs::Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}...;
+        @nospecialize(cs::Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}...);
         keepkeys::Bool=true, ungroup::Bool=true, renamecols::Bool=true) =
     _combine_prepare(gd, Ref{Any}(cs), keepkeys=keepkeys, ungroup=ungroup,
                      copycols=true, keeprows=false, renamecols=renamecols)
 
-function select(f::Base.Callable, gd::GroupedDataFrame; copycols::Bool=true,
+function select(@nospecialize(f::Base.Callable), gd::GroupedDataFrame; copycols::Bool=true,
                 keepkeys::Bool=true, ungroup::Bool=true, renamecols::Bool=true)
     if f isa Colon
         throw(ArgumentError("First argument must be a transformation if the second argument is a grouped data frame"))
@@ -715,12 +716,12 @@ function select(f::Base.Callable, gd::GroupedDataFrame; copycols::Bool=true,
 end
 
 
-select(gd::GroupedDataFrame, args::Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}...;
+select(gd::GroupedDataFrame, @nospecialize(args::Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}...);
        copycols::Bool=true, keepkeys::Bool=true, ungroup::Bool=true, renamecols::Bool=true) =
     _combine_prepare(gd, Ref{Any}(args), copycols=copycols, keepkeys=keepkeys,
                      ungroup=ungroup, keeprows=true, renamecols=renamecols)
 
-function transform(f::Base.Callable, gd::GroupedDataFrame; copycols::Bool=true,
+function transform(@nospecialize(f::Base.Callable), gd::GroupedDataFrame; copycols::Bool=true,
                 keepkeys::Bool=true, ungroup::Bool=true, renamecols::Bool=true)
     if f isa Colon
         throw(ArgumentError("First argument must be a transformation if the second argument is a grouped data frame"))
@@ -728,7 +729,7 @@ function transform(f::Base.Callable, gd::GroupedDataFrame; copycols::Bool=true,
     return transform(gd, f, copycols=copycols, keepkeys=keepkeys, ungroup=ungroup)
 end
 
-function transform(gd::GroupedDataFrame, args::Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}...;
+function transform(gd::GroupedDataFrame, @nospecialize(args::Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}...);
                    copycols::Bool=true, keepkeys::Bool=true, ungroup::Bool=true, renamecols::Bool=true)
     res = select(gd, :, args..., copycols=copycols, keepkeys=keepkeys,
                  ungroup=ungroup, renamecols=renamecols)
@@ -738,7 +739,7 @@ function transform(gd::GroupedDataFrame, args::Union{Pair, Base.Callable, Column
     return res
 end
 
-function select!(f::Base.Callable, gd::GroupedDataFrame; ungroup::Bool=true, renamecols::Bool=true)
+function select!(@nospecialize(f::Base.Callable), gd::GroupedDataFrame; ungroup::Bool=true, renamecols::Bool=true)
     if f isa Colon
         throw(ArgumentError("First argument must be a transformation if the second argument is a grouped data frame"))
     end
@@ -746,7 +747,7 @@ function select!(f::Base.Callable, gd::GroupedDataFrame; ungroup::Bool=true, ren
 end
 
 function select!(gd::GroupedDataFrame{DataFrame},
-                 args::Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}...;
+                 @nospecialize(args::Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}...);
                  ungroup::Bool=true, renamecols::Bool=true)
     newdf = select(gd, args..., copycols=false, renamecols=renamecols)
     df = parent(gd)
@@ -754,7 +755,7 @@ function select!(gd::GroupedDataFrame{DataFrame},
     return ungroup ? df : gd
 end
 
-function transform!(f::Base.Callable, gd::GroupedDataFrame; ungroup::Bool=true, renamecols::Bool=true)
+function transform!(@nospecialize(f::Base.Callable), gd::GroupedDataFrame; ungroup::Bool=true, renamecols::Bool=true)
     if f isa Colon
         throw(ArgumentError("First argument must be a transformation if the second argument is a grouped data frame"))
     end
@@ -762,7 +763,7 @@ function transform!(f::Base.Callable, gd::GroupedDataFrame; ungroup::Bool=true,
 end
 
 function transform!(gd::GroupedDataFrame{DataFrame},
-                    args::Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}...;
+                    @nospecialize(args::Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}...);
                     ungroup::Bool=true, renamecols::Bool=true)
     newdf = select(gd, :, args..., copycols=false, renamecols=renamecols)
     df = parent(gd)
@@ -770,5 +771,3 @@ function transform!(gd::GroupedDataFrame{DataFrame},
     _replace_columns!(df, newdf)
     return ungroup ? df : gd
 end
-
-@specialize

From 67366c5bc5438cb4c6350e68dbab99b477f5907d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Fri, 2 Apr 2021 11:58:52 +0200
Subject: [PATCH 14/22] Apply suggestions from code review

Co-authored-by: Milan Bouchet-Valat <nalimilan@club.fr>
---
 src/abstractdataframe/selection.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/abstractdataframe/selection.jl b/src/abstractdataframe/selection.jl
index 0cf19c2791..7e3d1c1c15 100644
--- a/src/abstractdataframe/selection.jl
+++ b/src/abstractdataframe/selection.jl
@@ -1339,7 +1339,7 @@ function manipulate(df::DataFrame, c::MultiColumnIndex; copycols::Bool, keeprows
 end
 
 function manipulate(dfv::SubDataFrame, args::MultiColumnIndex;
-                 copycols::Bool, keeprows::Bool, renamecols::Bool)
+                    copycols::Bool, keeprows::Bool, renamecols::Bool)
     if args isa AbstractVector{<:Pair}
         return manipulate(dfv, args..., copycols=copycols, keeprows=keeprows,
                           renamecols=renamecols)

From 5ccb53887227a6c0316d7d68c4115c6f233b8ec8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Fri, 2 Apr 2021 16:52:43 +0200
Subject: [PATCH 15/22] unwrap Ref{Any} in function signature

---
 src/abstractdataframe/selection.jl        | 23 +++++++++--------------
 src/abstractdataframe/subset.jl           | 11 ++++++++---
 src/groupeddataframe/complextransforms.jl |  9 +++------
 src/groupeddataframe/splitapplycombine.jl | 15 +++++----------
 4 files changed, 25 insertions(+), 33 deletions(-)

diff --git a/src/abstractdataframe/selection.jl b/src/abstractdataframe/selection.jl
index 0cf19c2791..d1ecbe29fa 100644
--- a/src/abstractdataframe/selection.jl
+++ b/src/abstractdataframe/selection.jl
@@ -326,15 +326,14 @@ function normalize_selection(idx::AbstractIndex,
     return (wanttable ? AsTable(c) : c) => fun => newcol
 end
 
-_transformation_helper(df::AbstractDataFrame, col_idx::Nothing, wfun::Ref{Any}) =
-    only(wfun)(df)
-_transformation_helper(df::AbstractDataFrame, col_idx::Int, wfun::Ref{Any}) =
-    only(wfun)(df[!, col_idx])
+_transformation_helper(df::AbstractDataFrame, col_idx::Nothing, (fun,)::Ref{Any}) =
+    fun(df)
+_transformation_helper(df::AbstractDataFrame, col_idx::Int, (fun,)::Ref{Any}) =
+    fun(df[!, col_idx])
 
 _empty_astable_helper(fun, len) = [fun(NamedTuple()) for _ in 1:len]
 
-function _transformation_helper(df::AbstractDataFrame, col_idx::AsTable, wfun::Ref{Any})
-    fun = only(wfun)
+function _transformation_helper(df::AbstractDataFrame, col_idx::AsTable, (fun,)::Ref{Any})
     tbl = Tables.columntable(select(df, col_idx.cols, copycols=false))
     if isempty(tbl) && fun isa ByRow
         return _empty_astable_helper(fun.fun, nrow(df))
@@ -345,8 +344,7 @@ end
 
 _empty_selector_helper(fun, len) = [fun() for _ in 1:len]
 
-function _transformation_helper(df::AbstractDataFrame, col_idx::AbstractVector{Int}, wfun::Ref{Any})
-    fun = only(wfun)
+function _transformation_helper(df::AbstractDataFrame, col_idx::AbstractVector{Int}, (fun,)::Ref{Any})
     if isempty(col_idx) && fun isa ByRow
         return _empty_selector_helper(fun.fun, nrow(df))
     else
@@ -416,8 +414,7 @@ end
 
 function _fix_existing_columns_for_vector(newdf::DataFrame, df::AbstractDataFrame,
                                           allow_resizing_newdf::Ref{Bool}, lr::Int,
-                                          wfun::Ref{Any})
-    fun = only(wfun)
+                                          (fun,)::Ref{Any})
     # allow shortening to 0 rows
     if allow_resizing_newdf[] && nrow(newdf) == 1
         newdfcols = _columns(newdf)
@@ -439,9 +436,8 @@ end
 
 function _add_col_check_copy(newdf::DataFrame, df::AbstractDataFrame,
                              col_idx::Union{Nothing, Int, AbstractVector{Int}, AsTable},
-                             copycols::Bool, wfun::Ref{Any},
+                             copycols::Bool, (fun,)::Ref{Any},
                              newname::Symbol, v::AbstractVector)
-    fun = only(wfun)
     cdf = eachcol(df)
     vpar = parent(v)
     parent_cols = col_idx isa AsTable ? col_idx.cols : something(col_idx, 1:ncol(df))
@@ -512,10 +508,9 @@ function _add_multicol_res(res::DataFrameRow, newdf::DataFrame, df::AbstractData
     _insert_row_multicolumn(newdf, df, allow_resizing_newdf, colnames, res)
 end
 
-function select_transform!(wnc::Ref{Any}, df::AbstractDataFrame, newdf::DataFrame,
+function select_transform!((nc,)::Ref{Any}, df::AbstractDataFrame, newdf::DataFrame,
                            transformed_cols::Set{Symbol}, copycols::Bool,
                            allow_resizing_newdf::Ref{Bool})
-    nc = only(wnc)
     @assert nc isa Union{Base.Callable,
                          Pair{<:Union{Int, AbstractVector{Int}, AsTable},
                               <:Pair{<:Base.Callable, <:Union{Symbol, AbstractVector{Symbol}, DataType}}}}
diff --git a/src/abstractdataframe/subset.jl b/src/abstractdataframe/subset.jl
index 5d15345c46..93ec471a0c 100644
--- a/src/abstractdataframe/subset.jl
+++ b/src/abstractdataframe/subset.jl
@@ -34,9 +34,14 @@ end
 # Note that _get_subset_conditions will have a large compilation time
 # if more than 32 conditions are passed as `args`.
 function _get_subset_conditions(df::Union{AbstractDataFrame, GroupedDataFrame},
-                                wargs::Ref{Any}, skipmissing::Bool)
-    args = only(wargs)
-    conditions = Any[]
+                                (args,)::Ref{Any}, skipmissing::Bool)
+    conditions = Any[if a isa ColumnIndex
+                         a => Symbol(:x, i))
+                     elseif a isa Pair{<:Any, <:Base.Callable}
+                         first(a) => last(a) => Symbol(:x, i)
+                     else
+                         throw(ArgumentError("condition specifier $a is not supported by `subset`"))
+                     end for (i, a) in enumerate(args)]
 
     # subset allows a transformation specification without a target column name or a column
     for (i, a) in enumerate(args)
diff --git a/src/groupeddataframe/complextransforms.jl b/src/groupeddataframe/complextransforms.jl
index 0c20dc9edb..924837ea01 100644
--- a/src/groupeddataframe/complextransforms.jl
+++ b/src/groupeddataframe/complextransforms.jl
@@ -4,9 +4,8 @@ _nrow(x::NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}) =
 _ncol(df::AbstractDataFrame) = ncol(df)
 _ncol(x::Union{NamedTuple, DataFrameRow}) = length(x)
 
-function _combine_multicol(wfirstres::Ref{Any}, wfun::Ref{Any}, gd::GroupedDataFrame,
+function _combine_multicol((firstres,)::Ref{Any}, wfun::Ref{Any}, gd::GroupedDataFrame,
                            wincols::Ref{Any})
-    firstres = only(wfirstres)
     @assert only(wfun) isa Base.Callable
     @assert only(wincols) isa Union{Nothing, AbstractVector, Tuple, NamedTuple}
     firstmulticol = firstres isa MULTI_COLS_TYPE
@@ -21,14 +20,12 @@ function _combine_multicol(wfirstres::Ref{Any}, wfun::Ref{Any}, gd::GroupedDataF
                                Val(firstmulticol), idx_agg)
 end
 
-function _combine_with_first(wfirst::Ref{Any},
+function _combine_with_first((first,)::Ref{Any},
                              f::Ref{Any}, gd::GroupedDataFrame,
-                             wincols::Ref{Any},
+                             (incols,)::Ref{Any},
                              firstmulticol::Val, idx_agg::Vector{Int})
     @assert only(f) isa Base.Callable
-    incols = only(wincols)
     @assert incols isa Union{Nothing, AbstractVector, Tuple, NamedTuple}
-    first = only(wfirst)
     @assert first isa Union{NamedTuple, DataFrameRow, AbstractDataFrame}
     extrude = false
 
diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl
index d56e1e07d8..84c2cfdcab 100644
--- a/src/groupeddataframe/splitapplycombine.jl
+++ b/src/groupeddataframe/splitapplycombine.jl
@@ -23,10 +23,9 @@ function gen_groups(idx::Vector{Int})
 end
 
 function _combine_prepare(gd::GroupedDataFrame,
-                          wcs::Ref{Any};
+                          (cs,)::Ref{Any};
                           keepkeys::Bool, ungroup::Bool, copycols::Bool,
                           keeprows::Bool, renamecols::Bool)
-    cs = only(wcs)
     for cei in cs
         @assert cei isa Union{Pair, Base.Callable, ColumnIndex, MultiColumnIndex}
     end
@@ -203,14 +202,13 @@ struct TransformationResult
 end
 
 # the transformation is an aggregation for which we have the fast path
-function _combine_process_agg(wcs_i::Ref{Any},
+function _combine_process_agg((cs_i,)::Ref{Any},
                               optional_i::Bool,
                               parentdf::AbstractDataFrame,
                               gd::GroupedDataFrame,
                               seen_cols::Dict{Symbol, Tuple{Bool, Int}},
                               trans_res::Vector{TransformationResult},
                               idx_agg::Vector{Int})
-    cs_i = only(wcs_i)
     @assert cs_i isa Pair{Int, <:Pair{<:Function, Symbol}}
     @assert isagg(cs_i, gd)
     @assert !optional_i
@@ -331,10 +329,9 @@ function _combine_process_pair_symbol(optional_i::Bool,
                                       idx_agg::Ref{Vector{Int}},
                                       out_col_name::Symbol,
                                       firstmulticol::Bool,
-                                      wfirstres::Ref{Any},
+                                      (firstres,)::Ref{Any},
                                       wfun::Ref{Any},
                                       wincols::Ref{Any})
-    firstres = only(wfirstres)
     @assert only(wfun) isa Base.Callable
     @assert only(wincols) isa Union{Tuple, NamedTuple}
 
@@ -388,10 +385,9 @@ function _combine_process_pair_astable(optional_i::Bool,
                                        idx_agg::Ref{Vector{Int}},
                                        out_col_name::Union{Type{AsTable}, AbstractVector{Symbol}},
                                        firstmulticol::Bool,
-                                       wfirstres::Ref{Any},
+                                       (firstres,)::Ref{Any},
                                        wfun::Ref{Any},
                                        wincols::Ref{Any})
-    firstres = only(wfirstres)
     fun = only(wfun)
     @assert fun isa Base.Callable
     @assert only(wincols) isa Union{Tuple, NamedTuple}
@@ -470,14 +466,13 @@ end
 # perform a transformation specified using the Pair notation
 # cs_i is a Pair that has many possible forms so this function is used to dispatch
 # to an appropriate more specialized function
-function _combine_process_pair(wcs_i::Ref{Any},
+function _combine_process_pair((cs_i,)::Ref{Any},
                                optional_i::Bool,
                                parentdf::AbstractDataFrame,
                                gd::GroupedDataFrame,
                                seen_cols::Dict{Symbol, Tuple{Bool, Int}},
                                trans_res::Vector{TransformationResult},
                                idx_agg::Ref{Vector{Int}})
-    cs_i = only(wcs_i)
     @assert cs_i isa Pair
 
     source_cols, (fun, out_col_name) = cs_i

From 0a34b84d1aaa40543fcb5366c81b70966008f8e6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Fri, 2 Apr 2021 17:19:39 +0200
Subject: [PATCH 16/22] fix typo

---
 src/abstractdataframe/subset.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/abstractdataframe/subset.jl b/src/abstractdataframe/subset.jl
index 93ec471a0c..3201628aa4 100644
--- a/src/abstractdataframe/subset.jl
+++ b/src/abstractdataframe/subset.jl
@@ -36,7 +36,7 @@ end
 function _get_subset_conditions(df::Union{AbstractDataFrame, GroupedDataFrame},
                                 (args,)::Ref{Any}, skipmissing::Bool)
     conditions = Any[if a isa ColumnIndex
-                         a => Symbol(:x, i))
+                         a => Symbol(:x, i)
                      elseif a isa Pair{<:Any, <:Base.Callable}
                          first(a) => last(a) => Symbol(:x, i)
                      else

From 5aa0bd98939b37f933f867fca83ca4d0b1c4ec1e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Fri, 2 Apr 2021 19:03:50 +0200
Subject: [PATCH 17/22] remove unneeded code

---
 src/abstractdataframe/subset.jl | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/src/abstractdataframe/subset.jl b/src/abstractdataframe/subset.jl
index 3201628aa4..f50cf7636f 100644
--- a/src/abstractdataframe/subset.jl
+++ b/src/abstractdataframe/subset.jl
@@ -35,6 +35,7 @@ end
 # if more than 32 conditions are passed as `args`.
 function _get_subset_conditions(df::Union{AbstractDataFrame, GroupedDataFrame},
                                 (args,)::Ref{Any}, skipmissing::Bool)
+    # subset allows a transformation specification without a target column name or a column
     conditions = Any[if a isa ColumnIndex
                          a => Symbol(:x, i)
                      elseif a isa Pair{<:Any, <:Base.Callable}
@@ -42,18 +43,6 @@ function _get_subset_conditions(df::Union{AbstractDataFrame, GroupedDataFrame},
                      else
                          throw(ArgumentError("condition specifier $a is not supported by `subset`"))
                      end for (i, a) in enumerate(args)]
-
-    # subset allows a transformation specification without a target column name or a column
-    for (i, a) in enumerate(args)
-        if a isa ColumnIndex
-            push!(conditions, a => Symbol(:x, i))
-        elseif a isa Pair{<:Any, <:Base.Callable}
-            push!(conditions, first(a) => last(a) => Symbol(:x, i))
-        else
-            throw(ArgumentError("condition specifier $a is not supported by `subset`"))
-        end
-    end
-
     isempty(conditions) && throw(ArgumentError("at least one condition must be passed"))
 
     if df isa AbstractDataFrame

From 198cd8f18788d78a7865ddfde9856a42f5d94e2f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Fri, 2 Apr 2021 22:56:50 +0200
Subject: [PATCH 18/22] inline expand_to_table

---
 src/abstractdataframe/selection.jl | 40 ++++++++++++++----------------
 1 file changed, 18 insertions(+), 22 deletions(-)

diff --git a/src/abstractdataframe/selection.jl b/src/abstractdataframe/selection.jl
index 6b260244b9..88a4ae4238 100644
--- a/src/abstractdataframe/selection.jl
+++ b/src/abstractdataframe/selection.jl
@@ -374,27 +374,6 @@ function _gen_colnames(res, newname::Union{AbstractVector{Symbol},
     return colnames isa Vector{Symbol} ? colnames : collect(Symbol, colnames)
 end
 
-_expand_to_table(res) = Tables.columntable(res)
-_expand_to_table(res::Union{AbstractDataFrame, NamedTuple, DataFrameRow, AbstractMatrix}) = res
-
-function _expand_to_table(res::AbstractVector)
-    isempty(res) && return Tables.columntable(res)
-    kp1 = keys(res[1])
-    prepend = all(x -> x isa Integer, kp1)
-    if !(prepend || all(x -> x isa Symbol, kp1) || all(x -> x isa AbstractString, kp1))
-        throw(ArgumentError("keys of the returned elements must be " *
-                            "`Symbol`s, strings or integers"))
-    end
-    if any(x -> !isequal(keys(x), kp1), res)
-        throw(ArgumentError("keys of the returned elements must be identical"))
-    end
-    newres = DataFrame()
-    for n in kp1
-        newres[!, prepend ? Symbol("x", n) : Symbol(n)] = [x[n] for x in res]
-    end
-    return newres
-end
-
 function _insert_row_multicolumn(newdf::DataFrame, df::AbstractDataFrame,
                                  allow_resizing_newdf::Ref{Bool}, colnames::AbstractVector{Symbol},
                                  @nospecialize(res::Union{NamedTuple, DataFrameRow}))
@@ -530,7 +509,24 @@ function select_transform!((nc,)::Ref{Any}, df::AbstractDataFrame, newdf::DataFr
     res = _transformation_helper(df, col_idx, Ref{Any}(fun))
 
     if newname === AsTable || newname isa AbstractVector{Symbol}
-        res = _expand_to_table(res)
+        if res isa AbstractVector && !isempty(res)
+            kp1 = keys(res[1])
+            prepend = all(x -> x isa Integer, kp1)
+            if !(prepend || all(x -> x isa Symbol, kp1) || all(x -> x isa AbstractString, kp1))
+                throw(ArgumentError("keys of the returned elements must be " *
+                                    "`Symbol`s, strings or integers"))
+            end
+            if any(x -> !isequal(keys(x), kp1), res)
+                throw(ArgumentError("keys of the returned elements must be identical"))
+            end
+            newres = DataFrame()
+            for n in kp1
+                newres[!, prepend ? Symbol("x", n) : Symbol(n)] = [x[n] for x in res]
+            end
+            res = newres
+        elseif !(res isa Union{AbstractDataFrame, NamedTuple, DataFrameRow, AbstractMatrix})
+            res = Tables.columntable(res)
+        end
     end
 
     if res isa Union{AbstractDataFrame, NamedTuple, DataFrameRow, AbstractMatrix}

From 195feff2fdf8c831bd831affce6daccced844ae4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Fri, 2 Apr 2021 23:55:45 +0200
Subject: [PATCH 19/22] remove unused method

---
 src/other/precompile.jl | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/src/other/precompile.jl b/src/other/precompile.jl
index 27b2609edf..02c7912955 100644
--- a/src/other/precompile.jl
+++ b/src/other/precompile.jl
@@ -424,7 +424,6 @@ function precompile(all=false)
         Base.precompile(Tuple{typeof(DataFrames._combine_with_first),NamedTuple{(:x1,),Tuple{SubArray{SubArray{Int,1,Array{Int,1},Tuple{Array{Int,1}},false},0,Array{SubArray{Int,1,Array{Int,1},Tuple{Array{Int,1}},false},1},Tuple{Int},true}}},Function,GroupedDataFrame{DataFrame},Tuple{Array{Int,1}},Val{false},Array{Int,1}})
         Base.precompile(Tuple{typeof(getindex),DataFrame,Colon,Cols{Tuple{Symbol,Symbol}}})
         Base.precompile(Tuple{typeof(DataFrames._combine_process_pair_astable),Bool,GroupedDataFrame{DataFrame},Dict{Symbol,Tuple{Bool,Int}},Array{DataFrames.TransformationResult,1},Nothing,Type{AsTable},Bool,NamedTuple{(:p, :q),Tuple{SubArray{Int,1,Array{Int,1},Tuple{Array{Int,1}},false},SubArray{Int,1,Array{Int,1},Tuple{Array{Int,1}},false}}},Union{Function, Type},Tuple{Array{Int,1},Array{Int,1}}})
-        Base.precompile(Tuple{typeof(DataFrames._expand_to_table),Array{DataFrameRow{DataFrame,DataFrames.Index},1}})
         Base.precompile(Tuple{typeof(DataFrames._combine_with_first),NamedTuple{(:x1,),Tuple{Int}},Function,GroupedDataFrame{DataFrame},Tuple{Array{Int,1}},Val{false},Array{Int,1}})
         Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:id, :x1, :x2),Tuple{Array{Int,1},Array{Int,1},Array{Int,1}}},Type{DataFrame}})
         Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.manipulate)),NamedTuple{(:copycols, :keeprows, :renamecols),Tuple{Bool,Bool,Bool}},typeof(DataFrames.manipulate),SubDataFrame{DataFrame,DataFrames.SubIndex{DataFrames.Index,UnitRange{Int},UnitRange{Int}},UnitRange{Int}},Array{typeof(nrow),1}})
@@ -688,7 +687,6 @@ function precompile(all=false)
                     Base.precompile(fbody, (Symbol,Symbol,Bool,Type{T} where T,typeof(stack),DataFrame,Array{Int,1},InvertedIndex{Array{Int,1}},))
                 end
             end
-        Base.precompile(Tuple{typeof(DataFrames._expand_to_table),Array{Tuple{Int,String},1}})
         Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.select)),NamedTuple{(:copycols, :renamecols),Tuple{Bool,Bool}},typeof(select),DataFrame,Function,Pair{Symbol,typeof(+)},Vararg{Any,N} where N})
         Base.precompile(Tuple{typeof(DataFrames._combine_multicol),Int,Function,GroupedDataFrame{DataFrame},Nothing})
         Base.precompile(Tuple{typeof(DataFrames._combine_process_pair_symbol),Bool,GroupedDataFrame{DataFrame},Dict{Symbol,Tuple{Bool,Int}},Array{DataFrames.TransformationResult,1},Nothing,Symbol,Bool,Irrational{:π},Union{Function, Type},Tuple{Array{Union{Irrational{:π}, Missing},1}}})
@@ -943,7 +941,6 @@ function precompile(all=false)
                     Base.precompile(fbody, (Bool,Bool,Bool,typeof(DataFrames.manipulate),DataFrame,Any,Vararg{Any,N} where N,))
                 end
             end
-        Base.precompile(Tuple{typeof(DataFrames._expand_to_table),Array{NamedTuple{(:s, :t),Tuple{Int,Int}},1}})
         Base.precompile(Tuple{Core.kwftype(typeof(DataFrames._show)),NamedTuple{(:rowid,),Tuple{Int}},typeof(DataFrames._show),Base.TTY,MIME{Symbol("text/html")},DataFrame})
         Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.stack)),NamedTuple{(:variable_eltype,),Tuple{UnionAll}},typeof(stack),DataFrame,Array{Symbol,1}})
         Base.precompile(Tuple{typeof(DataFrames._combine_rows_with_first!),NamedTuple{(:x1,),Tuple{Missing}},Tuple{Array{Missing,1}},Int,Int,Function,GroupedDataFrame{DataFrame},Tuple{Array{Union{Missing, UnitRange{Int}},1}},Tuple{Symbol},Val{false}})
@@ -1138,7 +1135,6 @@ function precompile(all=false)
         Base.precompile(Tuple{typeof(DataFrames._combine_multicol),NamedTuple{(:c_identity,),Tuple{SubArray{Int,1,Array{Int,1},Tuple{Array{Int,1}},false}}},Function,GroupedDataFrame{DataFrame},Nothing})
         Base.precompile(Tuple{typeof(DataFrames._combine_tables_with_first!),NamedTuple{(:x1,),Tuple{Array{Int,1}}},Tuple{Array{Int,1}},Array{Int,1},Int,Int,Function,GroupedDataFrame{DataFrame},NTuple{4,Array{Int,1}},Tuple{Symbol},Val{false}})
         Base.precompile(Tuple{typeof(getindex),DataFrame,Colon,All{Tuple{String,String}}})
-        Base.precompile(Tuple{typeof(DataFrames._expand_to_table),Array{Array{Float64,2},1}})
         Base.precompile(Tuple{typeof(iterate),Array{Pair{AsTable,Pair{ByRow{typeof(identity)},Symbol}},1}})
         Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:a, :c, :d),Tuple{Array{Union{Missing, String},1},Array{Union{Missing, String},1},Array{Union{Missing, Int},1}}},Type{DataFrame}})
         Base.precompile(Tuple{typeof(DataFrames._combine_with_first),NamedTuple{(:x1,),Tuple{Int}},Function,GroupedDataFrame{DataFrame},Tuple{Array{Irrational{:π},1}},Val{false},Array{Int,1}})
@@ -1172,7 +1168,6 @@ function precompile(all=false)
         Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.manipulate)),NamedTuple{(:copycols, :keeprows, :renamecols),Tuple{Bool,Bool,Bool}},typeof(DataFrames.manipulate),SubDataFrame{DataFrame,DataFrames.SubIndex{DataFrames.Index,UnitRange{Int},UnitRange{Int}},UnitRange{Int}},InvertedIndex{Regex}})
         Base.precompile(Tuple{typeof(sort),DataFrame,InvertedIndex{Array{Any,1}}})
         Base.precompile(Tuple{typeof(DataFrames._copyto_helper!),SubArray{Float64,1,Array{Float64,1},Tuple{Base.OneTo{Int}},true},Base.Broadcast.Broadcasted{DataFrames.DataFrameStyle,Tuple{Base.OneTo{Int},Base.OneTo{Int}},typeof(identity),Tuple{Base.Broadcast.Extruded{Array{Int,1},Tuple{Bool},Tuple{Int}}}},Int})
-        Base.precompile(Tuple{typeof(DataFrames._expand_to_table),Array{Array{Float64,3},1}})
         Base.precompile(Tuple{typeof(DataFrames._combine_process_pair_symbol),Bool,GroupedDataFrame{DataFrame},Dict{Symbol,Tuple{Bool,Int}},Array{DataFrames.TransformationResult,1},Nothing,Symbol,Bool,String,Union{Function, Type},Tuple{Array{Union{Missing, String},1}}})
         Base.precompile(Tuple{typeof(DataFrames.copyto_widen!),Array{Int,1},Array{Real,1}})
         Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:id, :sid, :SID),Tuple{UnitRange{Int},Array{String,1},Array{Union{Missing, String},1}}},Type{DataFrame}})
@@ -1380,7 +1375,6 @@ function precompile(all=false)
         Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:id, :fid, :id_1),Tuple{Array{Int,1},Array{Int,1},Array{Union{Missing, Int},1}}},Type{DataFrame}})
         Base.precompile(Tuple{typeof(view),SubDataFrame{DataFrame,DataFrames.Index,Base.OneTo{Int}},InvertedIndex{Int},Between{Int,Int}})
         Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:p, :q),Tuple{SubArray{Int,1,Array{Int,1},Tuple{Array{Int,1}},false},SubArray{Int,1,Array{Int,1},Tuple{Array{Int,1}},false}}},Type{DataFrame}})
-        Base.precompile(Tuple{typeof(DataFrames._expand_to_table),Array{NamedTuple,1}})
         Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:a, :b),Tuple{Array{Union{Missing, Int},1},UnitRange{Int}}},Type{DataFrame}})
         Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:a, :b, :copycols),Tuple{Int,Array{Any,1},Bool}},Type{DataFrame}})
         Base.precompile(Tuple{typeof(unique),SubDataFrame{DataFrame,DataFrames.Index,Base.OneTo{Int}},Bool})
@@ -1767,7 +1761,6 @@ function precompile(all=false)
         Base.precompile(Tuple{typeof(DataFrames._combine_process_pair_astable),Bool,GroupedDataFrame{DataFrame},Dict{Symbol,Tuple{Bool,Int}},Array{DataFrames.TransformationResult,1},Nothing,Type{AsTable},Bool,Dict{Symbol,UnitRange{Int}},Union{Function, Type},Tuple{Array{Int,1}}})
         Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.outerjoin)),NamedTuple{(:on, :renamecols, :indicator),Tuple{Array{Any,1},Pair{String,String},Symbol}},typeof(outerjoin),DataFrame,DataFrame})
         Base.precompile(Tuple{typeof(DataFrames._combine_with_first),NamedTuple{(:x1,),Tuple{Array{Tuple{Int},1}}},Function,GroupedDataFrame{DataFrame},NamedTuple{(:x,),Tuple{Array{Int,1}}},Val{false},Nothing})
-        Base.precompile(Tuple{typeof(DataFrames._expand_to_table),Array{Tuple{Int,Int},1}})
         Base.precompile(Tuple{typeof(DataFrames._combine_with_first),NamedTuple{(:x1,),Tuple{Int}},Function,GroupedDataFrame{DataFrame},Tuple{Array{Union{Missing, UnitRange{Int}},1}},Val{false},Array{Int,1}})
         Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:x, :y),Tuple{Array{Int,1},Array{Any,1}}},Type{DataFrame}})
         Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:a, :b, :c, :x1, :x2),Tuple{UnitRange{Int},UnitRange{Int},UnitRange{Int},Array{Int,1},Array{Int,1}}},Type{DataFrame}})
@@ -2041,7 +2034,6 @@ function precompile(all=false)
         Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.select)),NamedTuple{(:copycols, :renamecols),Tuple{Bool,Bool}},typeof(select),GroupedDataFrame{DataFrame},Function,Pair{Symbol,typeof(+)},Vararg{Any,N} where N})
         Base.precompile(Tuple{typeof(getindex),DataFrame,Colon,Between{Int,Int}})
         Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:Fish, :_MASS_, :_COLOR_),Tuple{Array{String,1},Array{String,1},Array{String,1}}},Type{DataFrame}})
-        Base.precompile(Tuple{typeof(DataFrames._expand_to_table),Array{Tuple{Float64,Float64},1}})
         Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.innerjoin)),NamedTuple{(:on,),Tuple{Symbol}},typeof(innerjoin),SubDataFrame{DataFrame,DataFrames.SubIndex{DataFrames.Index,UnitRange{Int},UnitRange{Int}},Array{Int,1}},DataFrame})
         Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:g, :x_mean_skipmissing),Tuple{UnitRange{Int},Array{Float64,1}}},Type{DataFrame}})
         Base.precompile(Tuple{typeof(DataFrames._combine_process_pair_astable),Bool,GroupedDataFrame{DataFrame},Dict{Symbol,Tuple{Bool,Int}},Array{DataFrames.TransformationResult,1},Nothing,Array{Symbol,1},Bool,Array{NamedTuple{(:a, :b, :c),Tuple{Int,Int,Int}},1},Union{Function, Type},Tuple{Array{Int,1}}})
@@ -2296,7 +2288,6 @@ function precompile(all=false)
         Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.manipulate)),NamedTuple{(:copycols, :keeprows, :renamecols),Tuple{Bool,Bool,Bool}},typeof(DataFrames.manipulate),SubDataFrame{DataFrame,DataFrames.SubIndex{DataFrames.Index,Array{Int,1},Array{Int,1}},UnitRange{Int}},Regex})
         Base.precompile(Tuple{DataFrames.Reduce{typeof(Base.mul_prod),Nothing,Nothing},Array{Float64,1},GroupedDataFrame{DataFrame}})
         Base.precompile(Tuple{typeof(DataFrames.do_call),ByRow{typeof(sin)},Array{Int,1},UnitRange{Int},UnitRange{Int},GroupedDataFrame{DataFrame},Tuple{Array{Float64,1}},Int})
-        Base.precompile(Tuple{typeof(DataFrames._expand_to_table),Array{DataFrame,1}})
         Base.precompile(Tuple{typeof(DataFrames._combine_process_pair_symbol),Bool,GroupedDataFrame{DataFrame},Dict{Symbol,Tuple{Bool,Int}},Array{DataFrames.TransformationResult,1},Nothing,Symbol,Bool,Missing,Union{Function, Type},Tuple{Array{Union{Missing, Bool},1}}})
         Base.precompile(Tuple{typeof(completecases),DataFrame,InvertedIndex{Array{Any,1}}})
         Base.precompile(Tuple{typeof(map),Function,DataFrameRow{DataFrame,DataFrames.SubIndex{DataFrames.Index,UnitRange{Int},UnitRange{Int}}}})
@@ -2335,7 +2326,6 @@ function precompile(all=false)
             end
         Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:g, :x),Tuple{Array{Int,1},Array{DataFrame,1}}},Type{DataFrame}})
         Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:a, :b, :c, :x1),Tuple{UnitRange{Int},UnitRange{Int},UnitRange{Int},String}},Type{DataFrame}})
-        Base.precompile(Tuple{typeof(DataFrames._expand_to_table),Array{NamedTuple{names,Tuple{Int,Int}} where names,1}})
         Base.precompile(Tuple{typeof(DataFrames.do_call),typeof(minimum),Array{Int,1},Array{Int,1},Array{Int,1},GroupedDataFrame{DataFrame},Tuple{Array{DataFrame,1}},Int})
         Base.precompile(Tuple{typeof(DataFrames._combine_tables_with_first!),NamedTuple{(:x1,),Tuple{SubArray{Int,1,Array{Int,2},Tuple{Base.Slice{Base.OneTo{Int}},Int},true}}},Tuple{Array{Int,1}},Array{Int,1},Int,Int,Function,GroupedDataFrame{DataFrame},Tuple{Array{Int,1}},Tuple{Symbol},Val{true}})
         Base.precompile(Tuple{typeof(DataFrames._sortperm),SubDataFrame{DataFrame,DataFrames.Index,Array{Int,1}},Base.Sort.MergeSortAlg,DataFrames.DFPerm{Base.Order.ForwardOrdering,Tuple{SubArray{Union{Missing, String},1,Array{Union{Missing, String},1},Tuple{Array{Int,1}},false},SubArray{String,1,Array{String,1},Tuple{Array{Int,1}},false}}}})
@@ -2362,7 +2352,6 @@ function precompile(all=false)
         Base.precompile(Tuple{typeof(DataFrames._combine_process_pair_symbol),Bool,GroupedDataFrame{DataFrame},Dict{Symbol,Tuple{Bool,Int}},Array{DataFrames.TransformationResult,1},Nothing,Symbol,Bool,Int,Union{Function, Type},Tuple{Array{Union{Missing, Rational{Int}},1}}})
         Base.precompile(Tuple{typeof(view),SubDataFrame{DataFrame,DataFrames.Index,Base.OneTo{Int}},UnitRange{Int},Between{Int,Int}})
         Base.precompile(Tuple{typeof(DataFrames._add_multicol_res),NamedTuple{(:y, :x),Tuple{SubArray{Int,1,Array{Int,1},Tuple{Array{Int,1}},false},SubArray{Int,1,Array{Int,1},Tuple{Array{Int,1}},false}}},DataFrame,SubDataFrame{DataFrame,DataFrames.SubIndex{DataFrames.Index,Array{Int,1},Array{Int,1}},Array{Int,1}},Array{Symbol,1},Base.RefValue{Bool},Any,AsTable,Bool,Type{AsTable}})
-        Base.precompile(Tuple{typeof(DataFrames._expand_to_table),Array{NamedTuple{(:a, :b),Tuple{Int,String}},1}})
         Base.precompile(Tuple{typeof(DataFrames.row_group_slots),Tuple{PooledArrays.PooledArray{String,UInt8,1,Array{UInt8,1}},PooledArrays.PooledArray{Union{Missing, String},UInt8,1,Array{UInt8,1}}},Val{false},Array{Int,1},Bool,Bool})
         Base.precompile(Tuple{typeof(view),DataFrameRow{DataFrame,DataFrames.Index},UnitRange{Int}})
         Base.precompile(Tuple{typeof(DataFrames._combine_with_first),NamedTuple{(:x1,),Tuple{Float64}},Function,GroupedDataFrame{DataFrame},Tuple{Array{Union{Missing, Real},1}},Val{false},Array{Int,1}})
@@ -2414,7 +2403,6 @@ function precompile(all=false)
                 end
             end
         Base.precompile(Tuple{typeof(getindex),DataFrame,Colon,All{Tuple{Int,Int,String}}})
-        Base.precompile(Tuple{typeof(DataFrames._expand_to_table),Array{Array{Any,1},1}})
         Base.precompile(Tuple{typeof(Tables.schema),SubDataFrame{DataFrame,DataFrames.Index,Base.OneTo{Int}}})
         Base.precompile(Tuple{typeof(DataFrames.groupreduce),Function,Function,Nothing,Nothing,Bool,Array{Real,1},GroupedDataFrame{DataFrame}})
         Base.precompile(Tuple{typeof(show),Base.GenericIOBuffer{Array{UInt8,1}},MIME{Symbol("text/html")},DataFrame})
@@ -2927,7 +2915,6 @@ function precompile(all=false)
         Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.innerjoin)),NamedTuple{(:on,),Tuple{Pair{Symbol,Symbol}}},typeof(innerjoin),DataFrame,DataFrame})
         Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:A, :B),Tuple{Array{Int,1},Array{Any,1}}},Type{DataFrame}})
         Base.precompile(Tuple{typeof(view),DataFrame,BitArray{1},Symbol})
-        Base.precompile(Tuple{typeof(DataFrames._expand_to_table),Array{NamedTuple{(:a, :b),Tuple{Int,Int}},1}})
         Base.precompile(Tuple{typeof(combine),GroupedDataFrame{DataFrame},Pair{Symbol,Pair{typeof(sum),Symbol}}})
         Base.precompile(Tuple{DataFrames.Reduce{typeof(max),Nothing,Nothing},Array{Union{Irrational{:π}, Missing},1},GroupedDataFrame{DataFrame}})
         Base.precompile(Tuple{typeof(push!),DataFrame,Dict{Symbol,String}})
@@ -3099,7 +3086,6 @@ function precompile(all=false)
         Base.precompile(Tuple{Type{DataFrame},Array{Array{String,1},1},Array{Symbol,1}})
         Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:Key1, :Key2, :Value),Tuple{Array{Union{Missing, String},1},PooledArrays.PooledArray{String,UInt8,1,Array{UInt8,1}},UnitRange{Int}}},Type{DataFrame}})
         Base.precompile(Tuple{ByRow{typeof(minmax)},SubArray{Float64,1,Array{Float64,1},Tuple{Array{Int,1}},false},Vararg{SubArray{Float64,1,Array{Float64,1},Tuple{Array{Int,1}},false},N} where N})
-        Base.precompile(Tuple{typeof(DataFrames._expand_to_table),Array{Array{Int,1},1}})
         Base.precompile(Tuple{typeof(Base.Broadcast.materialize),Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1},Nothing,typeof(columnindex),Tuple{Base.RefValue{SubDataFrame{DataFrame,DataFrames.Index,UnitRange{Int}}},Array{Symbol,1}}}})
         Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:_left,),Tuple{Int}},Type{DataFrame}})
         Base.precompile(Tuple{ByRow{typeof(/)},SubArray{Int,1,Array{Int,1},Tuple{Array{Int,1}},false},Vararg{SubArray{Int,1,Array{Int,1},Tuple{Array{Int,1}},false},N} where N})
@@ -3119,7 +3105,6 @@ function precompile(all=false)
         Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:z, :nrow, :z2),Tuple{Int,Int,Int}},Type{DataFrame}})
         Base.precompile(Tuple{typeof(DataFrames._combine_multicol),NamedTuple{(:a, :b),Tuple{Int,String}},Function,GroupedDataFrame{DataFrame},Nothing})
         Base.precompile(Tuple{typeof(combine),GroupedDataFrame{DataFrame},Colon,Pair{Symbol,Pair{ByRow{typeof(sin)},Symbol}},Vararg{Pair{Symbol,Pair{ByRow{typeof(sin)},Symbol}},N} where N})
-        Base.precompile(Tuple{typeof(DataFrames._expand_to_table),Array{NamedTuple{(:a, :b, :c),Tuple{Int,Int,Int}},1}})
         Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:g, :x),Tuple{Array{Int,1},Array{Bool,1}}},Type{DataFrame}})
         Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.manipulate)),NamedTuple{(:copycols, :keeprows, :renamecols),Tuple{Bool,Bool,Bool}},typeof(DataFrames.manipulate),DataFrame,Pair{Symbol,Array{Symbol,1}},Function})
         Base.precompile(Tuple{typeof(Base.Broadcast.materialize),Base.Broadcast.Broadcasted{DataFrames.DataFrameStyle,Nothing,typeof(+),Tuple{DataFrame,Base.ReshapedArray{Int,2,Base.OneTo{Int},Tuple{}}}}})

From 66f7404acf209fd48ae00cd110014d552b34ef51 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Sat, 3 Apr 2021 14:53:21 +0200
Subject: [PATCH 20/22] revert @nospecialize

---
 src/abstractdataframe/selection.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/abstractdataframe/selection.jl b/src/abstractdataframe/selection.jl
index 88a4ae4238..7402c6308c 100644
--- a/src/abstractdataframe/selection.jl
+++ b/src/abstractdataframe/selection.jl
@@ -353,8 +353,8 @@ function _transformation_helper(df::AbstractDataFrame, col_idx::AbstractVector{I
     end
 end
 
-function _gen_colnames(res, newname::Union{AbstractVector{Symbol},
-                                           Type{AsTable}, Nothing})
+function _gen_colnames(@nospecialize(res), newname::Union{AbstractVector{Symbol},
+                                                          Type{AsTable}, Nothing})
     if res isa AbstractMatrix
         colnames = gennames(size(res, 2))
     else

From 3149a0ef50879cf0a38d4c787e584bea7557face Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Sat, 3 Apr 2021 14:55:26 +0200
Subject: [PATCH 21/22] Apply suggestions from code review

Co-authored-by: Milan Bouchet-Valat <nalimilan@club.fr>
---
 src/groupeddataframe/complextransforms.jl | 8 ++++----
 src/groupeddataframe/splitapplycombine.jl | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/groupeddataframe/complextransforms.jl b/src/groupeddataframe/complextransforms.jl
index 924837ea01..ae6151f4f4 100644
--- a/src/groupeddataframe/complextransforms.jl
+++ b/src/groupeddataframe/complextransforms.jl
@@ -21,10 +21,10 @@ function _combine_multicol((firstres,)::Ref{Any}, wfun::Ref{Any}, gd::GroupedDat
 end
 
 function _combine_with_first((first,)::Ref{Any},
-                             f::Ref{Any}, gd::GroupedDataFrame,
+                             (f,)::Ref{Any}, gd::GroupedDataFrame,
                              (incols,)::Ref{Any},
                              firstmulticol::Val, idx_agg::Vector{Int})
-    @assert only(f) isa Base.Callable
+    @assert f isa Base.Callable
     @assert incols isa Union{Nothing, AbstractVector, Tuple, NamedTuple}
     @assert first isa Union{NamedTuple, DataFrameRow, AbstractDataFrame}
     extrude = false
@@ -59,11 +59,11 @@ function _combine_with_first((first,)::Ref{Any},
     if !extrude && first isa Union{AbstractDataFrame,
                                    NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}}
         outcols, finalcolnames = _combine_tables_with_first!(first, initialcols, idx, 1, 1,
-                                                             only(f), gd, incols, targetcolnames,
+                                                             f, gd, incols, targetcolnames,
                                                              firstmulticol)
     else
         outcols, finalcolnames = _combine_rows_with_first!(first, initialcols,
-                                                           only(f), gd, incols, targetcolnames,
+                                                           f, gd, incols, targetcolnames,
                                                            firstmulticol)
     end
     return idx, outcols, collect(Symbol, finalcolnames)
diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl
index 84c2cfdcab..eb382076c1 100644
--- a/src/groupeddataframe/splitapplycombine.jl
+++ b/src/groupeddataframe/splitapplycombine.jl
@@ -5,7 +5,7 @@
 const MULTI_COLS_TYPE = Union{AbstractDataFrame, NamedTuple, DataFrameRow, AbstractMatrix}
 
 # use a constant Vector{Int} as a sentinel to signal that idx_agg has not been computed yet
-# to avoid excessive specialization
+# we do not use nothing to avoid excessive specialization
 const NOTHING_IDX_AGG = Int[]
 
 function gen_groups(idx::Vector{Int})

From 5020a07e62e7e5af942a7ee8d6d242af0e90178b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Sat, 3 Apr 2021 14:55:56 +0200
Subject: [PATCH 22/22] Update src/groupeddataframe/splitapplycombine.jl

Co-authored-by: Milan Bouchet-Valat <nalimilan@club.fr>
---
 src/groupeddataframe/splitapplycombine.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl
index eb382076c1..ad4f61cff5 100644
--- a/src/groupeddataframe/splitapplycombine.jl
+++ b/src/groupeddataframe/splitapplycombine.jl
@@ -393,7 +393,7 @@ function _combine_process_pair_astable(optional_i::Bool,
     @assert only(wincols) isa Union{Tuple, NamedTuple}
     if firstres isa AbstractVector
         idx, outcol_vec, _ = _combine_with_first(Ref{Any}(wrap(firstres)), wfun, gd, wincols,
-                                              Val(firstmulticol), NOTHING_IDX_AGG)
+                                                 Val(firstmulticol), NOTHING_IDX_AGG)
         @assert length(outcol_vec) == 1
         res = outcol_vec[1]
         @assert length(res) > 0