JuliaData · bkamins · Sep 26, 2022 · Sep 25, 2022 · Sep 25, 2022 · Sep 25, 2022
diff --git a/NEWS.md b/NEWS.md
@@ -88,6 +88,13 @@
   ([#3081](https://github.com/JuliaData/DataFrames.jl/pull/3081))
 * Make `subset` preserves group ordering when `ungroup=false` like `subset!` already does
   ([#3094](https://github.com/JuliaData/DataFrames.jl/pull/3094))
+* Fix incorrect behavior of `GroupDataFrame` indexing in corner cases
+  ([#3179](https://github.com/JuliaData/DataFrames.jl/pull/3179))
+* Fix incorrect behavior of `insertcols!` when no columns to add are passed
+  ([#3179](https://github.com/JuliaData/DataFrames.jl/pull/3179))
+* Fix incorrect behavior of `minimum` and `maximum` aggregates
+  when processing `GroupedDataFrame` with `combine` in corner cases
+  ([#3179](https://github.com/JuliaData/DataFrames.jl/pull/3179))
 
 ## Performance
 

diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl
@@ -3095,7 +3095,7 @@ julia> insertcols!(df, :b, :d => 7:9, after=true)
    3 │ c         9      4      5      3
 ```
 """
-function insertcols!(df::AbstractDataFrame, col::ColumnIndex, name_cols::Pair{Symbol, <:Any}...;
+function insertcols!(df::AbstractDataFrame, col::ColumnIndex, name_cols::Pair{Symbol}...;
                      after::Bool=false, makeunique::Bool=false, copycols::Bool=true)
     if !is_column_insertion_allowed(df)
         throw(ArgumentError("insertcols! is only supported for DataFrame, or for " *
@@ -3220,31 +3220,47 @@ function insertcols!(df::AbstractDataFrame, col::ColumnIndex, name_cols::Pair{Sy
     return df
 end
 
-insertcols!(df::AbstractDataFrame, col::ColumnIndex, name_cols::Pair{<:AbstractString, <:Any}...;
+insertcols!(df::AbstractDataFrame, col::ColumnIndex, name_cols::Pair{<:AbstractString}...;
             after::Bool=false, makeunique::Bool=false, copycols::Bool=true) =
     insertcols!(df, col, (Symbol(n) => v for (n, v) in name_cols)...,
                 after=after, makeunique=makeunique, copycols=copycols)
 
-insertcols!(df::AbstractDataFrame, name_cols::Pair{Symbol, <:Any}...;
+insertcols!(df::AbstractDataFrame, name_cols::Pair{Symbol}...;
             after::Bool=false, makeunique::Bool=false, copycols::Bool=true) =
     insertcols!(df, ncol(df)+1, name_cols..., after=after,
                 makeunique=makeunique, copycols=copycols)
 
-insertcols!(df::AbstractDataFrame, name_cols::Pair{<:AbstractString, <:Any}...;
+insertcols!(df::AbstractDataFrame, name_cols::Pair{<:AbstractString}...;
             after::Bool=false, makeunique::Bool=false, copycols::Bool=true) =
     insertcols!(df, (Symbol(n) => v for (n, v) in name_cols)...,
                 after=after, makeunique=makeunique, copycols=copycols)
 
-function insertcols!(df::AbstractDataFrame, col::Int=ncol(df)+1; makeunique::Bool=false, name_cols...)
-    if !(0 < col <= ncol(df) + 1)
-        throw(ArgumentError("attempt to insert a column to a data frame with " *
-                            "$(ncol(df)) columns at index $col"))
+function insertcols!(df::AbstractDataFrame, col::ColumnIndex; after::Bool=false,
+                     makeunique::Bool=false, copycols::Bool=true)
+    if col isa SymbolOrString
+        col_ind = Int(columnindex(df, col))
+        if col_ind == 0
+            throw(ArgumentError("column $col does not exist in data frame"))
+        end
+    else
+        col_ind = Int(col)
     end
-    if !isempty(name_cols)
-        # an explicit error is thrown as keyword argument was supported in the past
-        throw(ArgumentError("inserting columns using a keyword argument is not supported, " *
-                            "pass a Pair as a positional argument instead"))
+
+    if after
+        col_ind += 1
     end
+
+    if !(0 < col_ind <= ncol(df) + 1)
+        throw(ArgumentError("attempt to insert a column to a data frame with " *
+                            "$(ncol(df)) columns at index $col_ind"))
+    end
+
+    _drop_all_nonnote_metadata!(parent(df))
+    return df
+end
+
+function insertcols!(df::AbstractDataFrame; after::Bool=false,
+                     makeunique::Bool=false, copycols::Bool=true)
     _drop_all_nonnote_metadata!(parent(df))
     return df
 end
diff --git a/src/abstractdataframe/sort.jl b/src/abstractdataframe/sort.jl
@@ -414,6 +414,10 @@ function Base.issorted(df::AbstractDataFrame, cols=All();
     end
 end
 
+Base.issorted(::AbstractDataFrame, ::Base.Order.Ordering) =
+    throw(ArgumentError("second positional argument of `issorted` on " *
+                        "a data frame must be a column selector"))
+
 """
     sort(df::AbstractDataFrame, cols=All();
          alg::Union{Algorithm, Nothing}=nothing,

diff --git a/src/groupeddataframe/fastaggregates.jl b/src/groupeddataframe/fastaggregates.jl
@@ -17,20 +17,12 @@ check_aggregate(f::typeof(prod), ::AbstractVector{<:Union{Missing, Number}}) =
     Reduce(Base.mul_prod)
 check_aggregate(f::typeof(prod∘skipmissing), ::AbstractVector{<:Union{Missing, Number}}) =
     Reduce(Base.mul_prod, !ismissing)
-check_aggregate(f::typeof(maximum),
-                ::AbstractVector{<:Union{Missing, MULTI_COLS_TYPE, AbstractVector}}) = f
 check_aggregate(f::typeof(maximum), v::AbstractVector{<:Union{Missing, Real}}) =
     eltype(v) === Any ? f : Reduce(max)
-check_aggregate(f::typeof(maximum∘skipmissing),
-                ::AbstractVector{<:Union{Missing, MULTI_COLS_TYPE, AbstractVector}}) = f
 check_aggregate(f::typeof(maximum∘skipmissing), v::AbstractVector{<:Union{Missing, Real}}) =
     eltype(v) === Any ? f : Reduce(max, !ismissing, nothing, true)
-check_aggregate(f::typeof(minimum),
-                ::AbstractVector{<:Union{Missing, MULTI_COLS_TYPE, AbstractVector}}) = f
 check_aggregate(f::typeof(minimum), v::AbstractVector{<:Union{Missing, Real}}) =
     eltype(v) === Any ? f : Reduce(min)
-check_aggregate(f::typeof(minimum∘skipmissing),
-                ::AbstractVector{<:Union{Missing, MULTI_COLS_TYPE, AbstractVector}}) = f
 check_aggregate(f::typeof(minimum∘skipmissing), v::AbstractVector{<:Union{Missing, Real}}) =
     eltype(v) === Any ? f : Reduce(min, !ismissing, nothing, true)
 check_aggregate(f::typeof(mean), ::AbstractVector{<:Union{Missing, Number}}) =

diff --git a/src/groupeddataframe/groupeddataframe.jl b/src/groupeddataframe/groupeddataframe.jl
@@ -748,14 +748,13 @@ Base.IndexStyle(::Type{<:GroupKeys}) = IndexLinear()
     return GroupKey(parent(gk), i)
 end
 
-
 #
 # Non-standard indexing
 #
 
 # Non-standard indexing relies on converting to integer indices first
 # The full version (to_indices) is required rather than to_index even though
-# GroupedDataFrame behaves as a 1D array due to the behavior of Colon and Not.
+# GroupedDataFrame behaves as a 1D array due to the behavior of Not.
 # Note that this behavior would be the default if it was <:AbstractArray
 function Base.getindex(gd::GroupedDataFrame, idx...)
     length(idx) == 1 || throw(ArgumentError("GroupedDataFrame requires a single index"))
@@ -767,6 +766,10 @@ const GroupKeyTypes = Union{GroupKey, Tuple, NamedTuple, AbstractDict{Symbol}, A
 # All allowed scalar index types
 const GroupIndexTypes = Union{Integer, GroupKeyTypes}
 
+# GroupedDataFrame is not a multidimensional array, so it does not support cartesian indexing
+Base.to_indices(gd::GroupedDataFrame, (idx,)::Tuple{CartesianIndex}) =
+    throw(ArgumentError("Invalid index: $idx of type $(typeof(idx))"))
+
 # Find integer index for dictionary keys
 function Base.to_index(gd::GroupedDataFrame, key::GroupKey)
     gd === parent(key) && return getfield(key, :idx)
@@ -864,13 +867,29 @@ end
 # ambiguity in dispatch
 function Base.to_indices(gd::GroupedDataFrame,
                          (idx,)::Tuple{Not{<:Union{BitArray{1}, Vector{Bool}}}})
-    (findall(!, idx.skip),)
+    if length(idx.skip) != length(gd)
+        throw(BoundsError("attempt to index $(length(gd))-group GroupedDataFrame " *
+                          "with $(length(idx.skip))-element boolean vector"))
+    end
+    return (findall(!, idx.skip),)
 end
 function Base.to_indices(gd::GroupedDataFrame,
                          (idx,)::Tuple{Not{<:AbstractVector{Bool}}})
-    (findall(!, idx.skip),)
+    if length(idx.skip) != length(gd)
+        throw(BoundsError("attempt to index $(length(gd))-group GroupedDataFrame " *
+                          "with $(length(idx.skip))-element boolean vector"))
+    end
+    return (findall(!, idx.skip),)
 end
 
+@inline Base.to_indices(gd::GroupedDataFrame, I::Tuple{Not{<:InvertedIndices.NIdx{1}}}) =
+    throw(ArgumentError("attempt to index GroupedDataFrame with $typeof(I)"))
+
+@inline Base.to_indices(gd::GroupedDataFrame, I::Tuple{Not{<:InvertedIndices.NIdx}}) =
+    throw(ArgumentError("attempt to index GroupedDataFrame with $typeof(I)"))
+
+@inline Base.to_indices(gd::GroupedDataFrame, I::Tuple{Not{<:Union{Array{Bool}, BitArray}}}) =
+    throw(ArgumentError("attempt to index GroupedDataFrame with $typeof(I)"))
 
 #
 # Dictionary interface

diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl
@@ -14,6 +14,8 @@ Base.Broadcast.BroadcastStyle(::Type{<:AbstractDataFrame}) =
 
 Base.Broadcast.BroadcastStyle(::DataFrameStyle, ::Base.Broadcast.BroadcastStyle) =
     DataFrameStyle()
+Base.Broadcast.BroadcastStyle(::DataFrameStyle, ::Base.Broadcast.Unknown) =
+    DataFrameStyle()
 Base.Broadcast.BroadcastStyle(::Base.Broadcast.BroadcastStyle, ::DataFrameStyle) =
     DataFrameStyle()
 Base.Broadcast.BroadcastStyle(::DataFrameStyle, ::DataFrameStyle) = DataFrameStyle()
@@ -225,6 +227,8 @@ function Base.Broadcast.broadcast_unalias(dest::AbstractDataFrame, src)
     return src
 end
 
+Base.Broadcast.broadcast_unalias(::Nothing, src::AbstractDataFrame) = src
+
 function Base.Broadcast.broadcast_unalias(dest, src::AbstractDataFrame)
     wascopied = false
     for (i, col) in enumerate(eachcol(src))
@@ -371,6 +375,10 @@ end
 Base.Broadcast.broadcast_unalias(dest::DataFrameRow, src) =
     Base.Broadcast.broadcast_unalias(parent(dest), src)
 
+# this is currently impossible but is added to avoid potential dispatch ambiguity in the future
+Base.Broadcast.broadcast_unalias(dest::DataFrameRow, src::AbstractDataFrame) =
+    Base.Broadcast.broadcast_unalias(parent(dest), src)
+
 function Base.copyto!(dfr::DataFrameRow, bc::Base.Broadcast.Broadcasted)
     bc′ = Base.Broadcast.preprocess(dfr, bc)
     for I in eachindex(bc′)

diff --git a/src/subdataframe/subdataframe.jl b/src/subdataframe/subdataframe.jl
@@ -73,6 +73,10 @@ struct SubDataFrame{D<:AbstractDataFrame, S<:AbstractIndex, T<:AbstractVector{In
     rows::T # maps from subdf row indexes to parent row indexes
 end
 
+# this method should be never called by DataFrames.jl code, but is added for safety
+SubDataFrame(parent::SubDataFrame, colindex::AbstractIndex, rows::AbstractVector{Int}) =
+    throw(ArgumentError("Creation of a SubDataFrame from a SubDataFrame is not allowed"))
+
 Base.@propagate_inbounds function SubDataFrame(parent::DataFrame, rows::AbstractVector{Int}, cols)
     @boundscheck if !checkindex(Bool, axes(parent, 1), rows)
         throw(BoundsError(parent, (rows, cols)))

diff --git a/test/dataframe.jl b/test/dataframe.jl
@@ -263,7 +263,7 @@ end
     dfc = copy(df)
     @test insertcols!(df, 2) == dfc
     @test_throws ArgumentError insertcols!(df, 10)
-    @test_throws ArgumentError insertcols!(df, 2, a=1, b=2)
+    @test_throws MethodError insertcols!(df, 2, a=1, b=2)
 
     df = DataFrame()
     @test insertcols!(df, 1, :x=>[1]) == DataFrame(x=[1])
@@ -361,9 +361,17 @@ end
     @test df2[!, 1] === x
 end
 
-@testset "unsupported insertcols!" begin
+@testset "insertcols! with no cols" begin
     df = DataFrame(x=1:2)
-    @test_throws ArgumentError insertcols!(df, 2, y=2:3)
+    @test_throws ArgumentError insertcols!(df, 0)
+    @test insertcols!(df, 2) == DataFrame(x=1:2)
+    @test insertcols!(df, :x) == DataFrame(x=1:2)
+    @test insertcols!(df, "x") == DataFrame(x=1:2)
+    @test insertcols!(df, "x", after=true, makeunique=true, copycols=true) == DataFrame(x=1:2)
+    @test insertcols!(df, 0, after=true) == DataFrame(x=1:2)
+    @test_throws ArgumentError insertcols!(df, 2, after=true)
+    @test insertcols!(df) == DataFrame(x=1:2)
+    @test insertcols!(df, after=true, makeunique=true, copycols=true) == DataFrame(x=1:2)
 end
 
 @testset "insertcols! after" begin

diff --git a/test/grouping.jl b/test/grouping.jl
@@ -4276,4 +4276,40 @@ end
     end
 end
 
+@testset "maximum and minimum on missing" begin
+    df = DataFrame(id=[1,1,2,2], x=fill(missing, 4))
+    gdf = groupby_checked(df, :id)
+    @test combine(gdf, :x => maximum => :x) ≅ DataFrame(id=1:2, x=fill(missing, 2))
+    @test combine(gdf, :x => minimum => :x) ≅ DataFrame(id=1:2, x=fill(missing, 2))
+    @test_throws ArgumentError combine(gdf, :x => maximum∘skipmissing)
+    @test_throws ArgumentError combine(gdf, :x => minimum∘skipmissing)
+end
+
+@testset "corner cases of indexing" begin
+    df = DataFrame(id=1:4)
+    gdf = groupby_checked(df, :id)
+    @test_throws ArgumentError gdf[CartesianIndex(1)]
+    @test_throws ArgumentError gdf[CartesianIndex(1, 1)]
+    @test_throws ArgumentError gdf[[CartesianIndex(1)]]
+    @test_throws ArgumentError gdf[[CartesianIndex(1, 1)]]
+    @test_throws ArgumentError gdf[Any[CartesianIndex(1)]]
+    @test_throws ArgumentError gdf[Any[CartesianIndex(1, 1)]]
+
+    @test_throws ArgumentError gdf[Not(CartesianIndex(1))]
+    @test_throws ArgumentError gdf[Not(CartesianIndex(1, 1))]
+    @test_throws ArgumentError gdf[Not([CartesianIndex(1)])]
+    @test_throws ArgumentError gdf[Not([CartesianIndex(1, 1)])]
+    @test_throws ArgumentError gdf[Not(Any[CartesianIndex(1)])]
+    @test_throws ArgumentError gdf[Not(Any[CartesianIndex(1, 1)])]
+
+    @test_throws BoundsError gdf[[true]]
+    @test_throws BoundsError gdf[Not([true])]
+    @test_throws BoundsError gdf[trues(1)]
+    @test_throws BoundsError gdf[Not(trues(1))]
+    @test_throws BoundsError gdf[view([true], 1:1)]
+    @test_throws BoundsError gdf[Not(view([true], 1:1))]
+    @test_throws BoundsError gdf[[true true true true]]
+    @test_throws ArgumentError gdf[Not([true true true true])]
+end
+
 end # module
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -14,6 +14,20 @@ else
     @show Threads.nthreads()
 end
 
+ambiguities_vec = Test.detect_ambiguities(DataFrames, recursive=true)
+if !isempty(ambiguities_vec)
+    @error "Method ambiguities:"
+    display(ambiguities_vec)
+    throw(AssertionError("method dispatch ambiguities found"))
+end
+
+unbound_args_vec = Test.detect_unbound_args(DataFrames, recursive=true)
+if !isempty(unbound_args_vec)
+    @error "Unbound type parameters:"
+    display(unbound_args_vec)
+    throw(AssertionError("unbound type parameters found"))
+end
+
 my_tests = ["utils.jl",
             "cat.jl",
             "data.jl",

diff --git a/test/sort.jl b/test/sort.jl
@@ -322,6 +322,7 @@ end
         @test issorted(df, rev=fill(false, ncol(df)))
         @test issorted(df, order=Base.Forward)
         @test issorted(df, order=fill(Base.Forward, ncol(df)))
+        @test_throws ArgumentError issorted(df, Base.Order.Forward)
 
         @test issorted(df, :x, by=identity)
         @test issorted(df, :x, by=[identity])