Skip to content

Commit

Permalink
add nrow, ncol, and Tables.subset for eachcol and eachrow (#3311)
Browse files Browse the repository at this point in the history
  • Loading branch information
bkamins authored Apr 8, 2023
1 parent a36bbbf commit e52b9a3
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 10 deletions.
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

## New functionalities

* `DataFrameRows` and `DataFrameColumns` now support
`nrow`, `ncol`, and `Tables.subset`
([#3311](https://github.com/JuliaData/DataFrames.jl/pull/3311))
* `Not` allows passing multiple positional arguments that are
treated as if they were wrapped in `Cols` and does not throw an error
when a vector of duplicate indices is passed when doing column selection
Expand Down
6 changes: 6 additions & 0 deletions src/abstractdataframe/iteration.jl
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ julia> eachrow(view(df, [4, 3], [2, 1]))
"""
Base.eachrow(df::AbstractDataFrame) = DataFrameRows(df)

nrow(itr::DataFrameRows) = nrow(parent(itr))
ncol(itr::DataFrameRows) = ncol(parent(itr))

Base.IndexStyle(::Type{<:DataFrameRows}) = Base.IndexLinear()
Base.size(itr::DataFrameRows) = (size(parent(itr), 1), )

Expand Down Expand Up @@ -174,6 +177,9 @@ julia> sum.(eachcol(df))
"""
Base.eachcol(df::AbstractDataFrame) = DataFrameColumns(df)

nrow(itr::DataFrameColumns) = nrow(parent(itr))
ncol(itr::DataFrameColumns) = ncol(parent(itr))

Base.IteratorSize(::Type{<:DataFrameColumns}) = Base.HasShape{1}()
Base.size(itr::DataFrameColumns) = (size(parent(itr), 2),)

Expand Down
10 changes: 10 additions & 0 deletions src/other/tables.jl
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,13 @@ TableTraits.isiterabletable(x::AbstractDataFrame) = true
return res
end
end

@inline function Tables.subset(itr::DataFrameRows, inds; viewhint::Union{Bool, Nothing}=nothing)
res = Tables.subset(parent(itr), inds, viewhint=viewhint)
return res isa AbstractDataFrame ? eachrow(res) : res
end

@inline function Tables.subset(itr::DataFrameColumns, inds; viewhint::Union{Bool, Nothing}=nothing)
res = Tables.subset(parent(itr), inds, viewhint=viewhint)
return res isa AbstractDataFrame ? eachcol(res) : res
end
4 changes: 4 additions & 0 deletions test/iteration.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ using Test, DataFrames
@testset "eachrow and eachcol" begin
df = DataFrame(A=Vector{Union{Int, Missing}}(1:2), B=Vector{Union{Int, Missing}}(2:3))

@test nrow(eachrow(df)) == nrow(df)
@test ncol(eachrow(df)) == ncol(df)
@test size(eachrow(df)) == (size(df, 1),)
@test parent(eachrow(df)) === df
@test names(eachrow(df)) == names(df)
Expand All @@ -22,6 +24,8 @@ using Test, DataFrames
@test collect(pairs(row)) isa Vector{Pair{Symbol, Int}}
end

@test nrow(eachcol(df)) == nrow(df)
@test ncol(eachcol(df)) == ncol(df)
@test Base.IteratorSize(eachcol(df)) == Base.HasShape{1}()
@test parent(eachcol(df)) === df
@test names(eachcol(df)) == names(df)
Expand Down
85 changes: 75 additions & 10 deletions test/tables.jl
Original file line number Diff line number Diff line change
Expand Up @@ -324,8 +324,9 @@ end
end

@testset "Tables.subset" begin
df = DataFrame(a=1:3, b=4:6)
dfref = DataFrame(a=1:3, b=4:6)

df = dfref
res = @inferred Tables.subset(df, :)
@test res isa DataFrame
@test res == DataFrame(a=1:3, b=4:6)
Expand Down Expand Up @@ -356,15 +357,79 @@ end
@test res isa SubDataFrame
@test res == DataFrame(a=[1, 3], b=[4, 6])

res = @inferred Tables.subset(df, 2)
@test res isa DataFrameRow
@test res == DataFrame(a=2, b=5)[1, :]
res = Tables.subset(df, 2, viewhint=false)
@test res isa NamedTuple{(:a, :b), Tuple{Int, Int}}
@test res == (a=2, b=5)
res = Tables.subset(df, 2, viewhint=true)
@test res isa DataFrameRow
@test res == DataFrame(a=2, b=5)[1, :]
df = eachcol(dfref)
res = @inferred Tables.subset(df, :)
@test res isa DataFrames.DataFrameColumns
@test res == eachcol(DataFrame(a=1:3, b=4:6))
res = Tables.subset(df, :, viewhint=false)
@test res isa DataFrames.DataFrameColumns
@test res == eachcol(DataFrame(a=1:3, b=4:6))
res = Tables.subset(df, :, viewhint=true)
@test res isa DataFrames.DataFrameColumns
@test res == eachcol(DataFrame(a=1:3, b=4:6))

res = @inferred Tables.subset(df, [3, 1])
@test res isa DataFrames.DataFrameColumns
@test res == eachcol(DataFrame(a=[3, 1], b=[6, 4]))
res = Tables.subset(df, [3, 1], viewhint=false)
@test res isa DataFrames.DataFrameColumns
@test res == eachcol(DataFrame(a=[3, 1], b=[6, 4]))
res = Tables.subset(df, [3, 1], viewhint=true)
@test res isa DataFrames.DataFrameColumns
@test res == eachcol(DataFrame(a=[3, 1], b=[6, 4]))

res = @inferred Tables.subset(df, [true, false, true])
@test res isa DataFrames.DataFrameColumns
@test res == eachcol(DataFrame(a=[1, 3], b=[4, 6]))
res = Tables.subset(df, [1, 3], viewhint=false)
@test res isa DataFrames.DataFrameColumns
@test res == eachcol(DataFrame(a=[1, 3], b=[4, 6]))
res = Tables.subset(df, [1, 3], viewhint=true)
@test res isa DataFrames.DataFrameColumns
@test res == eachcol(DataFrame(a=[1, 3], b=[4, 6]))

df = eachrow(dfref)
res = @inferred Tables.subset(df, :)
@test res isa DataFrames.DataFrameRows
@test res == eachrow(DataFrame(a=1:3, b=4:6))
res = Tables.subset(df, :, viewhint=false)
@test res isa DataFrames.DataFrameRows
@test res == eachrow(DataFrame(a=1:3, b=4:6))
res = Tables.subset(df, :, viewhint=true)
@test res isa DataFrames.DataFrameRows
@test res == eachrow(DataFrame(a=1:3, b=4:6))

res = @inferred Tables.subset(df, [3, 1])
@test res isa DataFrames.DataFrameRows
@test res == eachrow(DataFrame(a=[3, 1], b=[6, 4]))
res = Tables.subset(df, [3, 1], viewhint=false)
@test res isa DataFrames.DataFrameRows
@test res == eachrow(DataFrame(a=[3, 1], b=[6, 4]))
res = Tables.subset(df, [3, 1], viewhint=true)
@test res isa DataFrames.DataFrameRows
@test res == eachrow(DataFrame(a=[3, 1], b=[6, 4]))

res = @inferred Tables.subset(df, [true, false, true])
@test res isa DataFrames.DataFrameRows
@test res == eachrow(DataFrame(a=[1, 3], b=[4, 6]))
res = Tables.subset(df, [1, 3], viewhint=false)
@test res isa DataFrames.DataFrameRows
@test res == eachrow(DataFrame(a=[1, 3], b=[4, 6]))
res = Tables.subset(df, [1, 3], viewhint=true)
@test res isa DataFrames.DataFrameRows
@test res == eachrow(DataFrame(a=[1, 3], b=[4, 6]))

for df in (dfref, eachcol(dfref), eachrow(dfref))
res = @inferred Tables.subset(df, 2)
@test res isa DataFrameRow
@test res == DataFrame(a=2, b=5)[1, :]
res = Tables.subset(df, 2, viewhint=false)
@test res isa NamedTuple{(:a, :b), Tuple{Int, Int}}
@test res == (a=2, b=5)
res = Tables.subset(df, 2, viewhint=true)
@test res isa DataFrameRow
@test res == DataFrame(a=2, b=5)[1, :]
end
end

end # module

0 comments on commit e52b9a3

Please sign in to comment.