Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid method dispatch ambiguities in DataFrames.jl #3179

Merged
merged 9 commits into from
Sep 26, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,13 @@
([#3081](https://github.com/JuliaData/DataFrames.jl/pull/3081))
* Make `subset` preserves group ordering when `ungroup=false` like `subset!` already does
([#3094](https://github.com/JuliaData/DataFrames.jl/pull/3094))
* Fix incorrect behavior of `GroupDataFrame` indexing in corner cases
([#3179](https://github.com/JuliaData/DataFrames.jl/pull/3179))
* Fix incorrect behavior of `insertcols!` when no columns to add are passed
([#3179](https://github.com/JuliaData/DataFrames.jl/pull/3179))
bkamins marked this conversation as resolved.
Show resolved Hide resolved
* Fix incorrect behavior of `minimum` and `maximum` aggregates
when processing `GroupedDataFrame` with `combine` in corner cases
([#3179](https://github.com/JuliaData/DataFrames.jl/pull/3179))

## Performance

Expand Down
40 changes: 28 additions & 12 deletions src/abstractdataframe/abstractdataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3095,7 +3095,7 @@ julia> insertcols!(df, :b, :d => 7:9, after=true)
3 │ c 9 4 5 3
```
"""
function insertcols!(df::AbstractDataFrame, col::ColumnIndex, name_cols::Pair{Symbol, <:Any}...;
function insertcols!(df::AbstractDataFrame, col::ColumnIndex, name_cols::Pair{Symbol}...;
after::Bool=false, makeunique::Bool=false, copycols::Bool=true)
if !is_column_insertion_allowed(df)
throw(ArgumentError("insertcols! is only supported for DataFrame, or for " *
Expand Down Expand Up @@ -3220,31 +3220,47 @@ function insertcols!(df::AbstractDataFrame, col::ColumnIndex, name_cols::Pair{Sy
return df
end

insertcols!(df::AbstractDataFrame, col::ColumnIndex, name_cols::Pair{<:AbstractString, <:Any}...;
insertcols!(df::AbstractDataFrame, col::ColumnIndex, name_cols::Pair{<:AbstractString}...;
after::Bool=false, makeunique::Bool=false, copycols::Bool=true) =
insertcols!(df, col, (Symbol(n) => v for (n, v) in name_cols)...,
after=after, makeunique=makeunique, copycols=copycols)

insertcols!(df::AbstractDataFrame, name_cols::Pair{Symbol, <:Any}...;
insertcols!(df::AbstractDataFrame, name_cols::Pair{Symbol}...;
after::Bool=false, makeunique::Bool=false, copycols::Bool=true) =
insertcols!(df, ncol(df)+1, name_cols..., after=after,
makeunique=makeunique, copycols=copycols)

insertcols!(df::AbstractDataFrame, name_cols::Pair{<:AbstractString, <:Any}...;
insertcols!(df::AbstractDataFrame, name_cols::Pair{<:AbstractString}...;
after::Bool=false, makeunique::Bool=false, copycols::Bool=true) =
insertcols!(df, (Symbol(n) => v for (n, v) in name_cols)...,
after=after, makeunique=makeunique, copycols=copycols)

function insertcols!(df::AbstractDataFrame, col::Int=ncol(df)+1; makeunique::Bool=false, name_cols...)
if !(0 < col <= ncol(df) + 1)
throw(ArgumentError("attempt to insert a column to a data frame with " *
"$(ncol(df)) columns at index $col"))
function insertcols!(df::AbstractDataFrame, col::ColumnIndex; after::Bool=false,
bkamins marked this conversation as resolved.
Show resolved Hide resolved
makeunique::Bool=false, copycols::Bool=true)
if col isa SymbolOrString
col_ind = Int(columnindex(df, col))
if col_ind == 0
throw(ArgumentError("column $col does not exist in data frame"))
end
else
col_ind = Int(col)
end
if !isempty(name_cols)
# an explicit error is thrown as keyword argument was supported in the past
throw(ArgumentError("inserting columns using a keyword argument is not supported, " *
"pass a Pair as a positional argument instead"))

if after
col_ind += 1
end

if !(0 < col_ind <= ncol(df) + 1)
throw(ArgumentError("attempt to insert a column to a data frame with " *
"$(ncol(df)) columns at index $col_ind"))
end

_drop_all_nonnote_metadata!(parent(df))
return df
end

function insertcols!(df::AbstractDataFrame; after::Bool=false,
makeunique::Bool=false, copycols::Bool=true)
_drop_all_nonnote_metadata!(parent(df))
return df
end
4 changes: 4 additions & 0 deletions src/abstractdataframe/sort.jl
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,10 @@ function Base.issorted(df::AbstractDataFrame, cols=All();
end
end

Base.issorted(::AbstractDataFrame, ::Base.Order.Ordering) =
throw(ArgumentError("second positional argument of `issorted` on " *
"a data frame must be a column selector"))

"""
sort(df::AbstractDataFrame, cols=All();
alg::Union{Algorithm, Nothing}=nothing,
Expand Down
8 changes: 0 additions & 8 deletions src/groupeddataframe/fastaggregates.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,12 @@ check_aggregate(f::typeof(prod), ::AbstractVector{<:Union{Missing, Number}}) =
Reduce(Base.mul_prod)
check_aggregate(f::typeof(prod∘skipmissing), ::AbstractVector{<:Union{Missing, Number}}) =
Reduce(Base.mul_prod, !ismissing)
nalimilan marked this conversation as resolved.
Show resolved Hide resolved
check_aggregate(f::typeof(maximum),
::AbstractVector{<:Union{Missing, MULTI_COLS_TYPE, AbstractVector}}) = f
check_aggregate(f::typeof(maximum), v::AbstractVector{<:Union{Missing, Real}}) =
eltype(v) === Any ? f : Reduce(max)
check_aggregate(f::typeof(maximum∘skipmissing),
::AbstractVector{<:Union{Missing, MULTI_COLS_TYPE, AbstractVector}}) = f
check_aggregate(f::typeof(maximum∘skipmissing), v::AbstractVector{<:Union{Missing, Real}}) =
eltype(v) === Any ? f : Reduce(max, !ismissing, nothing, true)
check_aggregate(f::typeof(minimum),
::AbstractVector{<:Union{Missing, MULTI_COLS_TYPE, AbstractVector}}) = f
check_aggregate(f::typeof(minimum), v::AbstractVector{<:Union{Missing, Real}}) =
eltype(v) === Any ? f : Reduce(min)
check_aggregate(f::typeof(minimum∘skipmissing),
::AbstractVector{<:Union{Missing, MULTI_COLS_TYPE, AbstractVector}}) = f
check_aggregate(f::typeof(minimum∘skipmissing), v::AbstractVector{<:Union{Missing, Real}}) =
eltype(v) === Any ? f : Reduce(min, !ismissing, nothing, true)
check_aggregate(f::typeof(mean), ::AbstractVector{<:Union{Missing, Number}}) =
Expand Down
27 changes: 23 additions & 4 deletions src/groupeddataframe/groupeddataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -748,14 +748,13 @@ Base.IndexStyle(::Type{<:GroupKeys}) = IndexLinear()
return GroupKey(parent(gk), i)
end


#
# Non-standard indexing
#

# Non-standard indexing relies on converting to integer indices first
# The full version (to_indices) is required rather than to_index even though
# GroupedDataFrame behaves as a 1D array due to the behavior of Colon and Not.
# GroupedDataFrame behaves as a 1D array due to the behavior of Not.
# Note that this behavior would be the default if it was <:AbstractArray
function Base.getindex(gd::GroupedDataFrame, idx...)
length(idx) == 1 || throw(ArgumentError("GroupedDataFrame requires a single index"))
Expand All @@ -767,6 +766,10 @@ const GroupKeyTypes = Union{GroupKey, Tuple, NamedTuple, AbstractDict{Symbol}, A
# All allowed scalar index types
const GroupIndexTypes = Union{Integer, GroupKeyTypes}

# GroupedDataFrame is not a multidimensional array, so it does not support cartesian indexing
bkamins marked this conversation as resolved.
Show resolved Hide resolved
Base.to_indices(gd::GroupedDataFrame, (idx,)::Tuple{CartesianIndex}) =
throw(ArgumentError("Invalid index: $idx of type $(typeof(idx))"))

# Find integer index for dictionary keys
function Base.to_index(gd::GroupedDataFrame, key::GroupKey)
gd === parent(key) && return getfield(key, :idx)
Expand Down Expand Up @@ -864,13 +867,29 @@ end
# ambiguity in dispatch
function Base.to_indices(gd::GroupedDataFrame,
(idx,)::Tuple{Not{<:Union{BitArray{1}, Vector{Bool}}}})
(findall(!, idx.skip),)
if length(idx.skip) != length(gd)
bkamins marked this conversation as resolved.
Show resolved Hide resolved
throw(BoundsError("attempt to index $(length(gd))-group GroupedDataFrame " *
"with $(length(idx.skip))-element boolean vector"))
bkamins marked this conversation as resolved.
Show resolved Hide resolved
end
return (findall(!, idx.skip),)
end
function Base.to_indices(gd::GroupedDataFrame,
(idx,)::Tuple{Not{<:AbstractVector{Bool}}})
(findall(!, idx.skip),)
if length(idx.skip) != length(gd)
throw(BoundsError("attempt to index $(length(gd))-group GroupedDataFrame " *
"with $(length(idx.skip))-element boolean vector"))
bkamins marked this conversation as resolved.
Show resolved Hide resolved
end
return (findall(!, idx.skip),)
end

bkamins marked this conversation as resolved.
Show resolved Hide resolved
@inline Base.to_indices(gd::GroupedDataFrame, I::Tuple{Not{<:InvertedIndices.NIdx{1}}}) =
bkamins marked this conversation as resolved.
Show resolved Hide resolved
throw(ArgumentError("attempt to index GroupedDataFrame with $typeof(I)"))

@inline Base.to_indices(gd::GroupedDataFrame, I::Tuple{Not{<:InvertedIndices.NIdx}}) =
throw(ArgumentError("attempt to index GroupedDataFrame with $typeof(I)"))

@inline Base.to_indices(gd::GroupedDataFrame, I::Tuple{Not{<:Union{Array{Bool}, BitArray}}}) =
throw(ArgumentError("attempt to index GroupedDataFrame with $typeof(I)"))
bkamins marked this conversation as resolved.
Show resolved Hide resolved

#
# Dictionary interface
Expand Down
8 changes: 8 additions & 0 deletions src/other/broadcasting.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ Base.Broadcast.BroadcastStyle(::Type{<:AbstractDataFrame}) =

Base.Broadcast.BroadcastStyle(::DataFrameStyle, ::Base.Broadcast.BroadcastStyle) =
DataFrameStyle()
Base.Broadcast.BroadcastStyle(::DataFrameStyle, ::Base.Broadcast.Unknown) =
bkamins marked this conversation as resolved.
Show resolved Hide resolved
DataFrameStyle()
Base.Broadcast.BroadcastStyle(::Base.Broadcast.BroadcastStyle, ::DataFrameStyle) =
DataFrameStyle()
Base.Broadcast.BroadcastStyle(::DataFrameStyle, ::DataFrameStyle) = DataFrameStyle()
Expand Down Expand Up @@ -225,6 +227,8 @@ function Base.Broadcast.broadcast_unalias(dest::AbstractDataFrame, src)
return src
end

Base.Broadcast.broadcast_unalias(::Nothing, src::AbstractDataFrame) = src
bkamins marked this conversation as resolved.
Show resolved Hide resolved

function Base.Broadcast.broadcast_unalias(dest, src::AbstractDataFrame)
wascopied = false
for (i, col) in enumerate(eachcol(src))
Expand Down Expand Up @@ -371,6 +375,10 @@ end
Base.Broadcast.broadcast_unalias(dest::DataFrameRow, src) =
Base.Broadcast.broadcast_unalias(parent(dest), src)

# this is currently impossible but is added to avoid potential dispatch ambiguity in the future
Base.Broadcast.broadcast_unalias(dest::DataFrameRow, src::AbstractDataFrame) =
Base.Broadcast.broadcast_unalias(parent(dest), src)

function Base.copyto!(dfr::DataFrameRow, bc::Base.Broadcast.Broadcasted)
bc′ = Base.Broadcast.preprocess(dfr, bc)
for I in eachindex(bc′)
Expand Down
4 changes: 4 additions & 0 deletions src/subdataframe/subdataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ struct SubDataFrame{D<:AbstractDataFrame, S<:AbstractIndex, T<:AbstractVector{In
rows::T # maps from subdf row indexes to parent row indexes
end

# this method should be never called by DataFrames.jl code, but is added for safety
SubDataFrame(parent::SubDataFrame, colindex::AbstractIndex, rows::AbstractVector{Int}) =
throw(ArgumentError("Creation of a SubDataFrame from a SubDataFrame is not allowed"))

Base.@propagate_inbounds function SubDataFrame(parent::DataFrame, rows::AbstractVector{Int}, cols)
@boundscheck if !checkindex(Bool, axes(parent, 1), rows)
throw(BoundsError(parent, (rows, cols)))
Expand Down
14 changes: 11 additions & 3 deletions test/dataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ end
dfc = copy(df)
@test insertcols!(df, 2) == dfc
@test_throws ArgumentError insertcols!(df, 10)
@test_throws ArgumentError insertcols!(df, 2, a=1, b=2)
@test_throws MethodError insertcols!(df, 2, a=1, b=2)

df = DataFrame()
@test insertcols!(df, 1, :x=>[1]) == DataFrame(x=[1])
Expand Down Expand Up @@ -361,9 +361,17 @@ end
@test df2[!, 1] === x
end

@testset "unsupported insertcols!" begin
@testset "insertcols! with no cols" begin
df = DataFrame(x=1:2)
@test_throws ArgumentError insertcols!(df, 2, y=2:3)
@test_throws ArgumentError insertcols!(df, 0)
@test insertcols!(df, 2) == DataFrame(x=1:2)
bkamins marked this conversation as resolved.
Show resolved Hide resolved
@test insertcols!(df, :x) == DataFrame(x=1:2)
@test insertcols!(df, "x") == DataFrame(x=1:2)
@test insertcols!(df, "x", after=true, makeunique=true, copycols=true) == DataFrame(x=1:2)
@test insertcols!(df, 0, after=true) == DataFrame(x=1:2)
@test_throws ArgumentError insertcols!(df, 2, after=true)
@test insertcols!(df) == DataFrame(x=1:2)
@test insertcols!(df, after=true, makeunique=true, copycols=true) == DataFrame(x=1:2)
end

@testset "insertcols! after" begin
Expand Down
36 changes: 36 additions & 0 deletions test/grouping.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4276,4 +4276,40 @@ end
end
end

@testset "maximum and minimum on missing" begin
df = DataFrame(id=[1,1,2,2], x=fill(missing, 4))
gdf = groupby_checked(df, :id)
@test combine(gdf, :x => maximum => :x) ≅ DataFrame(id=1:2, x=fill(missing, 2))
@test combine(gdf, :x => minimum => :x) ≅ DataFrame(id=1:2, x=fill(missing, 2))
@test_throws ArgumentError combine(gdf, :x => maximum∘skipmissing)
@test_throws ArgumentError combine(gdf, :x => minimum∘skipmissing)
end

@testset "corner cases of indexing" begin
df = DataFrame(id=1:4)
gdf = groupby_checked(df, :id)
@test_throws ArgumentError gdf[CartesianIndex(1)]
@test_throws ArgumentError gdf[CartesianIndex(1, 1)]
@test_throws ArgumentError gdf[[CartesianIndex(1)]]
@test_throws ArgumentError gdf[[CartesianIndex(1, 1)]]
@test_throws ArgumentError gdf[Any[CartesianIndex(1)]]
@test_throws ArgumentError gdf[Any[CartesianIndex(1, 1)]]

@test_throws ArgumentError gdf[Not(CartesianIndex(1))]
@test_throws ArgumentError gdf[Not(CartesianIndex(1, 1))]
@test_throws ArgumentError gdf[Not([CartesianIndex(1)])]
@test_throws ArgumentError gdf[Not([CartesianIndex(1, 1)])]
@test_throws ArgumentError gdf[Not(Any[CartesianIndex(1)])]
@test_throws ArgumentError gdf[Not(Any[CartesianIndex(1, 1)])]

@test_throws BoundsError gdf[[true]]
@test_throws BoundsError gdf[Not([true])]
@test_throws BoundsError gdf[trues(1)]
@test_throws BoundsError gdf[Not(trues(1))]
@test_throws BoundsError gdf[view([true], 1:1)]
@test_throws BoundsError gdf[Not(view([true], 1:1))]
@test_throws BoundsError gdf[[true true true true]]
@test_throws ArgumentError gdf[Not([true true true true])]
end

end # module
14 changes: 14 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,20 @@ else
@show Threads.nthreads()
end

ambiguities_vec = Test.detect_ambiguities(DataFrames, recursive=true)
if !isempty(ambiguities_vec)
@error "Method ambiguities:"
display(ambiguities_vec)
throw(AssertionError("method dispatch ambiguities found"))
end

unbound_args_vec = Test.detect_unbound_args(DataFrames, recursive=true)
if !isempty(unbound_args_vec)
@error "Unbound type parameters:"
display(unbound_args_vec)
throw(AssertionError("unbound type parameters found"))
end

my_tests = ["utils.jl",
"cat.jl",
"data.jl",
Expand Down
1 change: 1 addition & 0 deletions test/sort.jl
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,7 @@ end
@test issorted(df, rev=fill(false, ncol(df)))
@test issorted(df, order=Base.Forward)
@test issorted(df, order=fill(Base.Forward, ncol(df)))
@test_throws ArgumentError issorted(df, Base.Order.Forward)

@test issorted(df, :x, by=identity)
@test issorted(df, :x, by=[identity])
Expand Down