Skip to content

Commit

Permalink
add support for Not with multiple positional indices (#3302)
Browse files Browse the repository at this point in the history
  • Loading branch information
bkamins authored Apr 4, 2023
1 parent c72a02d commit a36bbbf
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 7 deletions.
9 changes: 9 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
# DataFrames.jl v1.6 Release Notes

## New functionalities

* `Not` allows passing multiple positional arguments that are
treated as if they were wrapped in `Cols` and does not throw an error
when a vector of duplicate indices is passed when doing column selection
([#3302](https://github.com/JuliaData/DataFrames.jl/pull/3302))

# DataFrames.jl v1.5 Release Notes

## New functionalities
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ CategoricalArrays = "0.10.0"
Compat = "4.2"
DataAPI = "1.14.0"
InlineStrings = "1.3.0"
InvertedIndices = "1"
InvertedIndices = "1.3"
IteratorInterfaceExtensions = "0.1.1, 1"
Missings = "0.4.2, 1"
PooledArrays = "1.4.2"
Expand Down
2 changes: 2 additions & 0 deletions docs/src/lib/indexing.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ The following values are a valid column index:
* a `Not` expression (see
[InvertedIndices.jl](https://github.com/JuliaData/InvertedIndices.jl));
`Not(idx)` selects all indices not in the passed `idx`;
when passed as column selector `Not(idx...)` is equivalent to
`Not(Cols(idx...))`.
* a `Cols` expression (see
[DataAPI.jl](https://github.com/JuliaData/DataAPI.jl)); `Cols(idxs...)`
selects the union of the selections in `idxs`; in particular `Cols()`
Expand Down
9 changes: 9 additions & 0 deletions src/other/index.jl
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,15 @@ end
@inline Base.getindex(x::AbstractIndex, ::Colon) = Base.OneTo(length(x))
@inline Base.getindex(x::AbstractIndex, notidx::Not) =
setdiff(1:length(x), getindex(x, notidx.skip))

@inline function Base.getindex(x::AbstractIndex, notidx::Not{<:AbstractVector})
skip = notidx.skip
todrop = getindex(x, eltype(skip) === Bool ? skip : unique(skip))
return setdiff(1:length(x), todrop)
end

@inline Base.getindex(x::AbstractIndex, notidx::Not{InvertedIndices.NotMultiIndex}) =
setdiff(1:length(x), getindex(x, Cols(notidx.skip.indices...)))
@inline Base.getindex(x::AbstractIndex, idx::Between) = x[idx.first]:x[idx.last]
@inline Base.getindex(x::AbstractIndex, idx::All) =
isempty(idx.cols) ? (1:length(x)) : throw(ArgumentError("All(args...) is not supported: use Cols(args...) instead"))
Expand Down
18 changes: 15 additions & 3 deletions test/index.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,13 @@ using DataFrames: Index, SubIndex, fuzzymatch
@test_throws ArgumentError i[Not(:x)]
@test_throws ArgumentError i[Not("x")]
@test_throws BoundsError i[Not(1:3)]
@test_throws ArgumentError i[Not([1, 1])]
@test_throws ArgumentError i[Not([:A, :A])]
@test_throws ArgumentError i[Not(["A", "A"])]

@test i[Not([1, 1])] == [2]
@test i[Not([:A, :A])] == [2]
@test i[Not(["A", "A"])] == [2]
@test isempty(i[Not([true, true])])
@test i[Not([false, false])] == 1:2
@test i[Not([true, false])] == [2]

@test i[1:1] == 1:1

Expand Down Expand Up @@ -115,6 +119,9 @@ end
si7 = SubIndex(i, Not(1:2))
si8 = SubIndex(i, ["C", "D", "E"])
si9 = SubIndex(i, Not(Not(["C", "D", "E"])))
si10 = SubIndex(i, Not(1, 2))
si11 = SubIndex(i, Not(:A, :B))
si12 = SubIndex(i, Not(2, "A"))

@test copy(si1) == i
@test copy(si2) == Index([:C, :D, :E])
Expand All @@ -125,6 +132,9 @@ end
@test copy(si7) == Index([:C, :D, :E])
@test copy(si8) == Index([:C, :D, :E])
@test copy(si9) == Index([:C, :D, :E])
@test copy(si10) == Index([:C, :D, :E])
@test copy(si11) == Index([:C, :D, :E])
@test copy(si12) == Index([:C, :D, :E])

@test_throws ArgumentError SubIndex(i, 1)
@test_throws ArgumentError SubIndex(i, :A)
Expand Down Expand Up @@ -327,6 +337,8 @@ end
push!(i, :x131)
push!(i, :y13)
push!(i, :yy13)
@test i[Not(2, 4, 5)] == [1, 3]
@test i[Not(2, :y13, "yy13")] == [1, 3]
@test i[Not(Not(r"x1."))] == [2, 3]
@test isempty(i[Not(Not(r"xx"))])
@test i[Not(Not(r""))] == 1:5
Expand Down
17 changes: 17 additions & 0 deletions test/indexing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,23 @@ using Test, DataFrames, Unicode, Random
@test dfx[!, 1] === df[!, names(dfx)[1]]
end

@test df[!, Not(1, 2)] == DataFrame(c=7:9)
@test df[!, Not(1, 1, 2)] == DataFrame(c=7:9)
@test df[!, Not([1, 1, 2])] == DataFrame(c=7:9)
@test df[!, Not(:b, 1)] == DataFrame(c=7:9)
@test df[!, Not(:b, :b, 1)] == DataFrame(c=7:9)
@test df[!, Not("c", :a)] == DataFrame(b=4:6)
@test df[!, Not("c", "c", :a)] == DataFrame(b=4:6)
@test df[!, Not(:c, :c, :a)] == DataFrame(b=4:6)
@test df[!, Not([:c, :c, :a])] == DataFrame(b=4:6)
@test df[!, Not("c", "c", "a")] == DataFrame(b=4:6)
@test df[!, Not(["c", "c", "a"])] == DataFrame(b=4:6)
@test df[!, Not(:b, "c", :a)] == DataFrame()
@test df[!, Not([1, 2], :b)] == DataFrame(c=7:9)
@test df[!, Not([:c, :a], :b)] == DataFrame()
@test df[!, Not([1, 2], 2)] == DataFrame(c=7:9)
@test df[!, Not([1, 2], [1, 2])] == DataFrame(c=7:9)

@test df[1, 1] == 1
@test df[1, 1:2] isa DataFrameRow
@test df[1, r"[ab]"] isa DataFrameRow
Expand Down
9 changes: 6 additions & 3 deletions test/select.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,11 @@ Random.seed!(1234)
df = DataFrame(a=1, b=2, c=3, d=4, e=5)
@test_throws BoundsError select!(df, Not(0))
@test_throws BoundsError select!(df, Not(6))
@test_throws ArgumentError select!(df, Not([1, 1]))
@test_throws ArgumentError select!(df, Not(:f))
@test_throws BoundsError select!(df, Not([true, false]))

@test select!(copy(df), Not([1, 1])) == df[!, 2:end]

d = copy(df)
select!(d, Not([:a, :e, :c]))
@test d == DataFrame(b=2, d=4)
Expand Down Expand Up @@ -63,10 +64,11 @@ end
df = DataFrame(a=1, b=2, c=3, d=4, e=5)
@test_throws BoundsError select(df, Not(0))
@test_throws BoundsError select(df, Not(6))
@test_throws ArgumentError select(df, Not([1, 1]))
@test_throws ArgumentError select(df, Not(:f))
@test_throws BoundsError select(df, Not([true, false]))

@test select(df, Not([1, 1])) == df[!, 2:end]

df2 = copy(df)
d = select(df, Not([:a, :e, :c]))
@test d == df[:, [:b, :d]]
Expand Down Expand Up @@ -151,10 +153,11 @@ end
df = view(DataFrame(a=1, b=2, c=3, d=4, e=5), :, :)
@test_throws BoundsError select(df, Not(0))
@test_throws BoundsError select(df, Not(6))
@test_throws ArgumentError select(df, Not([1, 1]))
@test_throws ArgumentError select(df, Not(:f))
@test_throws BoundsError select(df, Not([true, false]))

@test select(df, Not([1, 1])) == df[!, 2:end]

df2 = copy(df)
d = select(df, Not([:a, :e, :c]))
@test d isa DataFrame
Expand Down

0 comments on commit a36bbbf

Please sign in to comment.