From 71f96c7a3c1bb685799f9d79e0c8416b63752c3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Fri, 7 Apr 2023 15:08:24 +0200 Subject: [PATCH] add nrow, ncol, and Tables.subset for eachcol and eachrow --- NEWS.md | 3 ++ src/abstractdataframe/iteration.jl | 6 +++ src/other/tables.jl | 10 ++++ test/iteration.jl | 4 ++ test/tables.jl | 85 ++++++++++++++++++++++++++---- 5 files changed, 98 insertions(+), 10 deletions(-) diff --git a/NEWS.md b/NEWS.md index d37e939d85..b76a1db7e2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,9 @@ ## New functionalities +* `DataFrameRows` and `DataFrameColumns` now support + `nrow`, `ncol`, and `Tables.subset` + ([#3311](https://github.com/JuliaData/DataFrames.jl/pull/3311)) * `Not` allows passing multiple positional arguments that are treated as if they were wrapped in `Cols` and does not throw an error when a vector of duplicate indices is passed when doing column selection diff --git a/src/abstractdataframe/iteration.jl b/src/abstractdataframe/iteration.jl index d20285e4dc..a47b4b2a6a 100644 --- a/src/abstractdataframe/iteration.jl +++ b/src/abstractdataframe/iteration.jl @@ -74,6 +74,9 @@ julia> eachrow(view(df, [4, 3], [2, 1])) """ Base.eachrow(df::AbstractDataFrame) = DataFrameRows(df) +nrow(itr::DataFrameRows) = nrow(parent(itr)) +ncol(itr::DataFrameRows) = ncol(parent(itr)) + Base.IndexStyle(::Type{<:DataFrameRows}) = Base.IndexLinear() Base.size(itr::DataFrameRows) = (size(parent(itr), 1), ) @@ -174,6 +177,9 @@ julia> sum.(eachcol(df)) """ Base.eachcol(df::AbstractDataFrame) = DataFrameColumns(df) +nrow(itr::DataFrameColumns) = nrow(parent(itr)) +ncol(itr::DataFrameColumns) = ncol(parent(itr)) + Base.IteratorSize(::Type{<:DataFrameColumns}) = Base.HasShape{1}() Base.size(itr::DataFrameColumns) = (size(parent(itr), 2),) diff --git a/src/other/tables.jl b/src/other/tables.jl index d31dbe7b16..5d412a0710 100644 --- a/src/other/tables.jl +++ b/src/other/tables.jl @@ -114,3 +114,13 @@ TableTraits.isiterabletable(x::AbstractDataFrame) = true return res end end + +@inline function Tables.subset(itr::DataFrameRows, inds; viewhint::Union{Bool, Nothing}=nothing) + res = Tables.subset(parent(itr), inds, viewhint=viewhint) + return res isa AbstractDataFrame ? eachrow(res) : res +end + +@inline function Tables.subset(itr::DataFrameColumns, inds; viewhint::Union{Bool, Nothing}=nothing) + res = Tables.subset(parent(itr), inds, viewhint=viewhint) + return res isa AbstractDataFrame ? eachcol(res) : res +end diff --git a/test/iteration.jl b/test/iteration.jl index 362035e74d..2202b33d99 100644 --- a/test/iteration.jl +++ b/test/iteration.jl @@ -6,6 +6,8 @@ using Test, DataFrames @testset "eachrow and eachcol" begin df = DataFrame(A=Vector{Union{Int, Missing}}(1:2), B=Vector{Union{Int, Missing}}(2:3)) + @test nrow(eachrow(df)) == nrow(df) + @test ncol(eachrow(df)) == ncol(df) @test size(eachrow(df)) == (size(df, 1),) @test parent(eachrow(df)) === df @test names(eachrow(df)) == names(df) @@ -22,6 +24,8 @@ using Test, DataFrames @test collect(pairs(row)) isa Vector{Pair{Symbol, Int}} end + @test nrow(eachcol(df)) == nrow(df) + @test ncol(eachcol(df)) == ncol(df) @test Base.IteratorSize(eachcol(df)) == Base.HasShape{1}() @test parent(eachcol(df)) === df @test names(eachcol(df)) == names(df) diff --git a/test/tables.jl b/test/tables.jl index b93ea2d178..c73d9a8d80 100644 --- a/test/tables.jl +++ b/test/tables.jl @@ -324,8 +324,9 @@ end end @testset "Tables.subset" begin - df = DataFrame(a=1:3, b=4:6) + dfref = DataFrame(a=1:3, b=4:6) + df = dfref res = @inferred Tables.subset(df, :) @test res isa DataFrame @test res == DataFrame(a=1:3, b=4:6) @@ -356,15 +357,79 @@ end @test res isa SubDataFrame @test res == DataFrame(a=[1, 3], b=[4, 6]) - res = @inferred Tables.subset(df, 2) - @test res isa DataFrameRow - @test res == DataFrame(a=2, b=5)[1, :] - res = Tables.subset(df, 2, viewhint=false) - @test res isa NamedTuple{(:a, :b), Tuple{Int, Int}} - @test res == (a=2, b=5) - res = Tables.subset(df, 2, viewhint=true) - @test res isa DataFrameRow - @test res == DataFrame(a=2, b=5)[1, :] + df = eachcol(dfref) + res = @inferred Tables.subset(df, :) + @test res isa DataFrames.DataFrameColumns + @test res == eachcol(DataFrame(a=1:3, b=4:6)) + res = Tables.subset(df, :, viewhint=false) + @test res isa DataFrames.DataFrameColumns + @test res == eachcol(DataFrame(a=1:3, b=4:6)) + res = Tables.subset(df, :, viewhint=true) + @test res isa DataFrames.DataFrameColumns + @test res == eachcol(DataFrame(a=1:3, b=4:6)) + + res = @inferred Tables.subset(df, [3, 1]) + @test res isa DataFrames.DataFrameColumns + @test res == eachcol(DataFrame(a=[3, 1], b=[6, 4])) + res = Tables.subset(df, [3, 1], viewhint=false) + @test res isa DataFrames.DataFrameColumns + @test res == eachcol(DataFrame(a=[3, 1], b=[6, 4])) + res = Tables.subset(df, [3, 1], viewhint=true) + @test res isa DataFrames.DataFrameColumns + @test res == eachcol(DataFrame(a=[3, 1], b=[6, 4])) + + res = @inferred Tables.subset(df, [true, false, true]) + @test res isa DataFrames.DataFrameColumns + @test res == eachcol(DataFrame(a=[1, 3], b=[4, 6])) + res = Tables.subset(df, [1, 3], viewhint=false) + @test res isa DataFrames.DataFrameColumns + @test res == eachcol(DataFrame(a=[1, 3], b=[4, 6])) + res = Tables.subset(df, [1, 3], viewhint=true) + @test res isa DataFrames.DataFrameColumns + @test res == eachcol(DataFrame(a=[1, 3], b=[4, 6])) + + df = eachrow(dfref) + res = @inferred Tables.subset(df, :) + @test res isa DataFrames.DataFrameRows + @test res == eachrow(DataFrame(a=1:3, b=4:6)) + res = Tables.subset(df, :, viewhint=false) + @test res isa DataFrames.DataFrameRows + @test res == eachrow(DataFrame(a=1:3, b=4:6)) + res = Tables.subset(df, :, viewhint=true) + @test res isa DataFrames.DataFrameRows + @test res == eachrow(DataFrame(a=1:3, b=4:6)) + + res = @inferred Tables.subset(df, [3, 1]) + @test res isa DataFrames.DataFrameRows + @test res == eachrow(DataFrame(a=[3, 1], b=[6, 4])) + res = Tables.subset(df, [3, 1], viewhint=false) + @test res isa DataFrames.DataFrameRows + @test res == eachrow(DataFrame(a=[3, 1], b=[6, 4])) + res = Tables.subset(df, [3, 1], viewhint=true) + @test res isa DataFrames.DataFrameRows + @test res == eachrow(DataFrame(a=[3, 1], b=[6, 4])) + + res = @inferred Tables.subset(df, [true, false, true]) + @test res isa DataFrames.DataFrameRows + @test res == eachrow(DataFrame(a=[1, 3], b=[4, 6])) + res = Tables.subset(df, [1, 3], viewhint=false) + @test res isa DataFrames.DataFrameRows + @test res == eachrow(DataFrame(a=[1, 3], b=[4, 6])) + res = Tables.subset(df, [1, 3], viewhint=true) + @test res isa DataFrames.DataFrameRows + @test res == eachrow(DataFrame(a=[1, 3], b=[4, 6])) + + for df in (dfref, eachcol(dfref), eachrow(dfref)) + res = @inferred Tables.subset(df, 2) + @test res isa DataFrameRow + @test res == DataFrame(a=2, b=5)[1, :] + res = Tables.subset(df, 2, viewhint=false) + @test res isa NamedTuple{(:a, :b), Tuple{Int, Int}} + @test res == (a=2, b=5) + res = Tables.subset(df, 2, viewhint=true) + @test res isa DataFrameRow + @test res == DataFrame(a=2, b=5)[1, :] + end end end # module