From aaa9b92285e29f848fe9a02c5fade1c1b14cb097 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 6 May 2019 23:36:46 +0200 Subject: [PATCH 01/30] broadcasting in DataFrames --- docs/src/lib/indexing.md | 16 ++++++++++++++++ src/DataFrames.jl | 2 ++ test/runtests.jl | 1 + 3 files changed, 19 insertions(+) diff --git a/docs/src/lib/indexing.md b/docs/src/lib/indexing.md index d73b2dadaf..514c33aa4b 100644 --- a/docs/src/lib/indexing.md +++ b/docs/src/lib/indexing.md @@ -79,3 +79,19 @@ For performance reasons, accessing, via `getindex` or `view`, a single `row` and ## `setindex!` Under construction + +## Broadcasting + +`AbstractDataFrame` is converted to a matrix in broadcasting. +`DataFrameRow`, `GroupedDataFrame`, `DataFrameRows` and `DataFrameColumns` are `collect`ed in broadcasting. + +Additionally it is possible to assign a value to `AbstractDataFrame` and `DataFrameRow` using the `.=` operator. +In such an operation `AbstractDataFrame` is two dimensional and `DataFrameRow` as a single dimensional. + +If column indexing using symbols is performed the order of columns in the operation is specified by the order of symbols. + +It is allowed to perform `df[col] .= value` even if `col` is not present in the `DataFrame` and it is a `Symbol`. +In such a case a new column will be created. +A data frame with zero columns is always considered to have zero rows. +The assignment broadcasting is not supported for `df.col` style. +Similar rules apply to `df[cols] .= value` if any of `Symbol`s in `cols` is not present in `df`. diff --git a/src/DataFrames.jl b/src/DataFrames.jl index e5dacf0546..7366d974f1 100644 --- a/src/DataFrames.jl +++ b/src/DataFrames.jl @@ -83,6 +83,8 @@ include("dataframerow/dataframerow.jl") include("groupeddataframe/grouping.jl") include("dataframerow/utils.jl") +include("other/broadcasting.jl") + include("abstractdataframe/iteration.jl") include("abstractdataframe/join.jl") include("abstractdataframe/reshape.jl") diff --git a/test/runtests.jl b/test/runtests.jl index a4ccd5ad99..2926fb1994 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -27,6 +27,7 @@ my_tests = ["utils.jl", "tables.jl", "tabletraits.jl", "indexing.jl", + "broadcasting.jl", "deprecated.jl"] println("Running tests:") From 1477b5ba61d5b500bb5dae7cc11fccfb4ef5d612 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 6 May 2019 23:46:49 +0200 Subject: [PATCH 02/30] add two new files to the repo --- src/other/broadcasting.jl | 94 ++++++++++++ test/broadcasting.jl | 291 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 385 insertions(+) create mode 100644 src/other/broadcasting.jl create mode 100644 test/broadcasting.jl diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl new file mode 100644 index 0000000000..7161162fd1 --- /dev/null +++ b/src/other/broadcasting.jl @@ -0,0 +1,94 @@ +struct DummyColumnValue end + +Base.broadcastable(df::AbstractDataFrame) = Matrix(df) + +Base.maybeview(df::AbstractDataFrame, idxs...) = view(df, idxs...) + +function Base.maybeview(df::AbstractDataFrame, idxs) + if idxs isa Symbol + if !haskey(df, idxs) + if !(df isa DataFrame) + throw(ArgumentError("column $idxs not found")) + end + df[idxs] = Vector{DummyColumnValue}(undef, nrow(df)) + return view(df, [idxs]) + end + end + if idxs isa AbstractVector{Symbol} + for idx in idxs + if !haskey(df, idx) + if !(df isa DataFrame) + throw(ArgumentError("column $idxs not found")) + end + df[idx] = Vector{DummyColumnValue}(undef, nrow(df)) + end + end + end + view(df, idxs) +end + +function Base.copyto!(df::SubDataFrame, bc) + m = Base.materialize(bc) + if ncol(df) == 1 && eltype(df[1]) === DummyColumnValue + if maximum(ndims.(Base.Broadcast.flatten(bc).args)) > 1 + throw(DimensionMismatch("cannot broadcast array to have fewer dimensions")) + end + end + for col in axes(df, 2) + if eltype(df[col]) === DummyColumnValue + colname = names(df)[col] + T = reduce(promote_type, typeof.(view(m, :, col))) + parent(df)[colname] = Tables.allocatecolumn(T, nrow(df)) + end + df[col] .= view(m, :, col) + end + df +end + +function Base.copyto!(df::SubDataFrame, bc::Base.Broadcast.Broadcasted{<:Base.Broadcast.AbstractArrayStyle{0}}) + if bc.f === identity && bc.args isa Tuple{Any} && Base.Broadcast.isflat(bc) + for col in axes(df, 2) + if eltype(df[col]) === DummyColumnValue + colname = names(df)[col] + parent(df)[colname] = Tables.allocatecolumn(typeof(bc.args[1][]), nrow(df)) + end + fill!(df[col], bc.args[1][]) + end + df + else + copyto!(dfr, convert(Broadcasted{Nothing}, bc)) + end +end + +function Base.copyto!(dfr::DataFrameRow, x) + m = Base.materialize(x) + foreach(col -> dfr[col] = m[col], 1:length(dfr)) + dfr +end + +Base.copyto!(dfr::DataFrameRow, bc::Base.Broadcast.Broadcasted{<:Base.Broadcast.AbstractArrayStyle{0}}) = + if bc.f === identity && bc.args isa Tuple{Any} && Base.Broadcast.isflat(bc) + foreach(col -> dfr[col] = bc.args[1][], 1:length(dfr)) + dfr + else + copyto!(dfr, convert(Broadcasted{Nothing}, bc)) + end + +function Base.materialize!(dest::SubDataFrame, bc::Base.Broadcast.Broadcasted{Style}) where {Style} + try + copyto!(dest, Base.Broadcast.instantiate(Base.Broadcast.Broadcasted{Style}(bc.f, bc.args, axes(dest)))) + catch + df = parent(dest) + deletecols!(df, findall(col -> eltype(col) == DummyColumnValue, eachcol(df))) + rethrow() + end +end +function Base.materialize!(dest::SubDataFrame, x) + try + copyto!(dest, Base.Broadcast.instantiate(Base.Broadcast.Broadcasted(identity, (x,), axes(dest)))) + catch + df = parent(dest) + deletecols!(df, findall(col -> eltype(col) == DummyColumnValue, eachcol(df))) + rethrow() + end +end diff --git a/test/broadcasting.jl b/test/broadcasting.jl new file mode 100644 index 0000000000..03d33c460d --- /dev/null +++ b/test/broadcasting.jl @@ -0,0 +1,291 @@ +module TestBroadcasting + +using Test, DataFrames + +refdf = DataFrame(ones(3, 5)) + +@testset "data frame and data frame row in broadcasting" begin + df = copy(refdf) + dfv = view(df, 1:2, 1:4) + dfr = df[1, 1:2] + + @test df .+ 1 == fill(2.0, 3, 5) + @test all(df .== ones(3, 5)) + @test [i for i in 3:5, j in 1:5] == @. 2 * df + (1:3) + @test df .- ones(3, 5) == zeros(3, 5) + @test_throws DimensionMismatch df .- ones(3, 4) + @test_throws DimensionMismatch df .- ones(4) + @test_throws DimensionMismatch df .- ones(3, 4, 1) + + @test dfv .+ 1 == fill(2.0, 2, 4) + @test all(dfv .== ones(2, 4)) + @test [i for i in 3:4, j in 1:4] == @. 2 * dfv + (1:2) + @test dfv .- ones(2, 4) == zeros(2, 4) + @test_throws DimensionMismatch dfv .- ones(3, 4) + @test_throws DimensionMismatch dfv .- ones(4) + @test_throws DimensionMismatch dfv .- ones(3, 4, 1) + + @test dfr .+ 1 == fill(2.0, 2) + @test all(dfr .== ones(2)) + @test [3, 4] == @. 2 * dfr + (1:2) + @test dfr .- ones(2) == zeros(2) + @test_throws DimensionMismatch dfr .- ones(3, 4) + @test_throws DimensionMismatch dfr .- ones(4) + @test_throws DimensionMismatch dfr .- ones(3, 4, 1) +end + +@testset "normal data frame and data frame row in broadcasted assignment - one column" begin + df = copy(refdf) + df[1] .+= 1 + @test df.x1 == [2, 2, 2] + @test all(df[2:end] .== ones(size(df[2:end])...)) + + dfv = @view df[1:2, 2:end] + dfv[1] .+= 1 + @test dfv.x2 == [2, 2] + @test all(dfv[2:end] .== ones(size(dfv[2:end])...)) + @test all(df[1:2, 1:2] .== 2) + + dfr = df[1, 3:end] + dfr[end-1:end] .= 10 + @test all(dfr .== [1, 10, 10]) + @test df.x3[1] == 1 && df.x4[1] == 10 && df.x5[1] == 10 + + df = copy(refdf) + df[1] .+= [1, 1, 1] + @test df.x1 == [2, 2, 2] + @test all(df[2:end] .== ones(size(df[2:end])...)) + + dfv = @view df[1:2, 2:end] + dfv[1] .+= [1, 1] + @test dfv.x2 == [2, 2] + @test all(dfv[2:end] .== ones(size(dfv[2:end])...)) + @test all(df[1:2, 1:2] .== 2) + + dfr = df[1, 3:end] + dfr[end-1:end] .= [10, 10] + @test all(dfr .== [1, 10, 10]) + @test df.x3[1] == 1 && df.x4[1] == 10 && df.x5[1] == 10 + + df = copy(refdf) + dfv = @view df[1:2, 2:end] + dfr = df[1, 3:end] + @test_throws DimensionMismatch df[1] .= rand(3, 1) + @test_throws DimensionMismatch dfv[1] .= rand(2, 1) + @test_throws DimensionMismatch dfr[end-1:end] .= rand(3, 1) + + df = copy(refdf) + df[:x1] .+= 1 + @test df.x1 == [2, 2, 2] + @test all(df[2:end] .== ones(size(df[2:end])...)) + + dfv = @view df[1:2, 2:end] + dfv[:x2] .+= 1 + @test dfv.x2 == [2, 2] + @test all(dfv[2:end] .== ones(size(dfv[2:end])...)) + @test all(df[1:2, 1:2] .== 2) + + dfr = df[1, 3:end] + dfr[[:x4, :x5]] .= 10 + @test all(dfr .== [1, 10, 10]) + @test df.x3[1] == 1 && df.x4[1] == 10 && df.x5[1] == 10 + + df = copy(refdf) + df[:x1] .+= [1, 1, 1] + @test df.x1 == [2, 2, 2] + @test all(df[2:end] .== ones(size(df[2:end])...)) + + dfv = @view df[1:2, 2:end] + dfv[:x2] .+= [1, 1] + @test dfv.x2 == [2, 2] + @test all(dfv[2:end] .== ones(size(dfv[2:end])...)) + @test all(df[1:2, 1:2] .== 2) + + dfr = df[1, 3:end] + dfr[[:x4, :x5]] .= [10, 10] + @test all(dfr .== [1, 10, 10]) + @test df.x3[1] == 1 && df.x4[1] == 10 && df.x5[1] == 10 + + df = copy(refdf) + dfv = @view df[1:2, 2:end] + dfr = df[1, 3:end] + @test_throws DimensionMismatch df[:x1] .= rand(3, 1) + @test_throws DimensionMismatch dfv[:x2] .= rand(2, 1) + @test_throws DimensionMismatch dfr[[:x4, :x5]] .= rand(3, 1) +end + +@testset "normal data frame and data frame row in broadcasted assignment - two columns" begin + df = copy(refdf) + df[[1,2]] .+= 1 + @test df.x1 == [2, 2, 2] + @test df.x2 == [2, 2, 2] + @test all(df[3:end] .== ones(size(df[3:end])...)) + + dfv = @view df[1:2, 3:end] + dfv[[1,2]] .+= 1 + @test dfv.x3 == [2, 2] + @test dfv.x4 == [2, 2] + @test all(dfv[3:end] .== ones(size(dfv[3:end])...)) + @test all(df[1:2, 1:2] .== 2) + + df = copy(refdf) + df[[1,2]] .+= [1 1 + 1 1 + 1 1] + @test df.x1 == [2, 2, 2] + @test df.x2 == [2, 2, 2] + @test all(df[3:end] .== ones(size(df[3:end])...)) + + dfv = @view df[1:2, 3:end] + dfv[[1,2]] .+= [1 1 + 1 1] + @test dfv.x3 == [2, 2] + @test dfv.x4 == [2, 2] + @test all(dfv[3:end] .== ones(size(dfv[3:end])...)) + @test all(df[1:2, 1:2] .== 2) + + df = copy(refdf) + dfv = @view df[1:2, 2:end] + dfr = df[1, 3:end] + @test_throws DimensionMismatch df[[1,2]] .= rand(3, 10) + @test_throws DimensionMismatch dfv[[1,2]] .= rand(2, 10) + + df = copy(refdf) + df[[:x1,:x2]] .+= 1 + @test df.x1 == [2, 2, 2] + @test df.x2 == [2, 2, 2] + @test all(df[3:end] .== ones(size(df[3:end])...)) + + dfv = @view df[1:2, 3:end] + dfv[[:x3,:x4]] .+= 1 + @test dfv.x3 == [2, 2] + @test dfv.x4 == [2, 2] + @test all(dfv[3:end] .== ones(size(dfv[3:end])...)) + @test all(df[1:2, 1:2] .== 2) + + df = copy(refdf) + df[[:x1,:x2]] .+= [1 1 + 1 1 + 1 1] + @test df.x1 == [2, 2, 2] + @test df.x2 == [2, 2, 2] + @test all(df[3:end] .== ones(size(df[3:end])...)) + + dfv = @view df[1:2, 3:end] + dfv[[:x3,:x4]] .+= [1 1 + 1 1] + @test dfv.x3 == [2, 2] + @test dfv.x4 == [2, 2] + @test all(dfv[3:end] .== ones(size(dfv[3:end])...)) + @test all(df[1:2, 1:2] .== 2) + + df = copy(refdf) + dfv = @view df[1:2, 2:end] + dfr = df[1, 3:end] + @test_throws DimensionMismatch df[[:x1,:x2]] .= rand(3, 10) + @test_throws DimensionMismatch dfv[[:x3,:x4]] .= rand(2, 10) +end + +@testset "assignment to a whole data frame and data frame row" begin + df = copy(refdf) + df .= 10 + @test all(df .== 10) + dfv = view(df, 1:2, 1:4) + dfv .= 100 + @test (all(df[1:2, 1:4] .== 100)) + @test (all(df[3, 1:4] .== 10)) + dfr = df[1, 1:2] + dfr .= 1000 + @test (all(df[1, 1:2] .== 1000)) + @test (all(df[3, :] .!= 1000)) +end + +@testset "extending data frame in broadcasted assignment - one column" begin + df = copy(refdf) + df[:a] .= 1 + @test all(df .== 1) + @test names(df)[end] == :a + df[:b] .= [1, 1, 1] + @test all(df .== 1) + @test names(df)[end] == :b + cdf = copy(df) + @test_throws DimensionMismatch df[:c] .= ones(3, 1) + @test df == cdf + @test_throws DimensionMismatch df[:x] .= ones(4) + @test df == cdf + @test_throws BoundsError df[10] .= ones(3) + @test df == cdf + + dfv = @view df[1:2, 2:end] + @test_throws BoundsError dfv[10] .= ones(3) + @test_throws ArgumentError dfv[:z] .= ones(3) + @test df == cdf + dfr = df[1, 3:end] + @test_throws BoundsError dfr[10] .= ones(3) + @test_throws ArgumentError dfr[:z] .= ones(3) + @test df == cdf +end + +@testset "extending data frame in broadcasted assignment - two columns" begin + df = copy(refdf) + df[[:x1, :a]] .= 10 + @test all(df.x1 .== 10) && all(df.a .== 10) + @test names(df)[end] == :a + df[[:b, :c]] .= [5 5 + 5 5 + 5 5] + @test all(df[end-1:end] .== 5) + @test names(df)[end-1:end] == [:b, :c] + cdf = copy(df) + @test_throws DimensionMismatch df[[:x1, :x]] .= ones(3, 10) + @test df == cdf + @test_throws DimensionMismatch df[[:x2, :x]] .= ones(4) + @test df == cdf + @test_throws BoundsError df[[1, 10]] .= ones(3) + @test df == cdf + + dfv = @view df[1:2, 2:end] + @test_throws BoundsError dfv[[10, 11]] .= ones(3) + @test_throws ArgumentError dfv[[:x1, :z]] .= ones(3) + @test df == cdf + dfr = df[1, 3:end] + @test_throws BoundsError dfr[[1,11]] .= ones(10) + @test_throws ArgumentError dfr[[:x1,:z]] .= ones(10) + @test df == cdf +end + +@testset "empty data frame corner case" begin + df = DataFrame() + @test_throws BoundsError df[1] .= 1 + @test_throws ArgumentError df[:a] .= [1] + @test_throws ArgumentError df[[:a,:b]] .= [1] + @test df == DataFrame() + df .= 1 + @test df == DataFrame() + df .= [1] + @test df == DataFrame() + df .= ones(1,1) + @test df == DataFrame() + @test_throws DimensionMismatch df .= ones(1,2) + @test_throws DimensionMismatch df .= ones(1,1,1) + + df[:a] .= 1 + @test nrow(df) == 0 + @test names(df) == [:a] + @test eltypes(df) == [Int] + df = DataFrame() + df[[:a, :b]] .= 1 + @test nrow(df) == 0 + @test names(df) == [:a, :b] + @test eltypes(df) == [Int, Int] + df[:c] .= 1.0 + @test nrow(df) == 0 + @test names(df) == [:a, :b, :c] + @test eltypes(df) == [Int, Int, Float64] + df[[:c,:d]] .= 1.0 + @test nrow(df) == 0 + @test names(df) == [:a, :b, :c, :d] + @test eltypes(df) == [Int, Int, Float64, Float64] +end + +end # module From 43d0e2eef0da7fd41c9a881c640634aa5e37ce42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Tue, 7 May 2019 00:17:47 +0200 Subject: [PATCH 03/30] fix copyto! --- src/other/broadcasting.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl index 7161162fd1..21e0cb06da 100644 --- a/src/other/broadcasting.jl +++ b/src/other/broadcasting.jl @@ -27,7 +27,7 @@ function Base.maybeview(df::AbstractDataFrame, idxs) view(df, idxs) end -function Base.copyto!(df::SubDataFrame, bc) +function Base.copyto!(df::AbstractDataFrame, bc) m = Base.materialize(bc) if ncol(df) == 1 && eltype(df[1]) === DummyColumnValue if maximum(ndims.(Base.Broadcast.flatten(bc).args)) > 1 @@ -45,7 +45,7 @@ function Base.copyto!(df::SubDataFrame, bc) df end -function Base.copyto!(df::SubDataFrame, bc::Base.Broadcast.Broadcasted{<:Base.Broadcast.AbstractArrayStyle{0}}) +function Base.copyto!(df::AbstractDataFrame, bc::Base.Broadcast.Broadcasted{<:Base.Broadcast.AbstractArrayStyle{0}}) if bc.f === identity && bc.args isa Tuple{Any} && Base.Broadcast.isflat(bc) for col in axes(df, 2) if eltype(df[col]) === DummyColumnValue From 428e21999c01c7e70744b17ba0eae533d281a3c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Tue, 7 May 2019 00:31:33 +0200 Subject: [PATCH 04/30] try to fix Julia 1.0 error --- src/other/broadcasting.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl index 21e0cb06da..fb9adf0ae4 100644 --- a/src/other/broadcasting.jl +++ b/src/other/broadcasting.jl @@ -1,6 +1,6 @@ struct DummyColumnValue end -Base.broadcastable(df::AbstractDataFrame) = Matrix(df) +Base.Broadcast.broadcastable(df::AbstractDataFrame) = Matrix(df) Base.maybeview(df::AbstractDataFrame, idxs...) = view(df, idxs...) @@ -74,7 +74,7 @@ Base.copyto!(dfr::DataFrameRow, bc::Base.Broadcast.Broadcasted{<:Base.Broadcast. copyto!(dfr, convert(Broadcasted{Nothing}, bc)) end -function Base.materialize!(dest::SubDataFrame, bc::Base.Broadcast.Broadcasted{Style}) where {Style} +function Base.Broadcast.materialize!(dest::SubDataFrame, bc::Base.Broadcast.Broadcasted{Style}) where {Style} try copyto!(dest, Base.Broadcast.instantiate(Base.Broadcast.Broadcasted{Style}(bc.f, bc.args, axes(dest)))) catch @@ -83,7 +83,7 @@ function Base.materialize!(dest::SubDataFrame, bc::Base.Broadcast.Broadcasted{St rethrow() end end -function Base.materialize!(dest::SubDataFrame, x) +function Base.Broadcast.materialize!(dest::SubDataFrame, x) try copyto!(dest, Base.Broadcast.instantiate(Base.Broadcast.Broadcasted(identity, (x,), axes(dest)))) catch From 554ea3b6485bbee0cddc9c3f129a7757b1f6eb3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Tue, 7 May 2019 09:05:24 +0200 Subject: [PATCH 05/30] small fixes --- docs/src/lib/indexing.md | 2 +- src/other/broadcasting.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/src/lib/indexing.md b/docs/src/lib/indexing.md index 514c33aa4b..04cb961984 100644 --- a/docs/src/lib/indexing.md +++ b/docs/src/lib/indexing.md @@ -93,5 +93,5 @@ If column indexing using symbols is performed the order of columns in the operat It is allowed to perform `df[col] .= value` even if `col` is not present in the `DataFrame` and it is a `Symbol`. In such a case a new column will be created. A data frame with zero columns is always considered to have zero rows. -The assignment broadcasting is not supported for `df.col` style. +The assignment broadcasting is not supported for `df.col` style if `col` is not present in `df`. Similar rules apply to `df[cols] .= value` if any of `Symbol`s in `cols` is not present in `df`. diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl index fb9adf0ae4..5d8b8f39b6 100644 --- a/src/other/broadcasting.jl +++ b/src/other/broadcasting.jl @@ -56,7 +56,7 @@ function Base.copyto!(df::AbstractDataFrame, bc::Base.Broadcast.Broadcasted{<:Ba end df else - copyto!(dfr, convert(Broadcasted{Nothing}, bc)) + copyto!(df, convert(Broadcasted{Nothing}, bc)) end end From bf2a5f140219e6a94f202ca9e556f065014d579d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Fri, 10 May 2019 15:22:35 +0200 Subject: [PATCH 06/30] new design after the review --- docs/src/lib/indexing.md | 6 +-- src/other/broadcasting.jl | 89 +++++++++++++-------------------------- test/broadcasting.jl | 52 +++-------------------- 3 files changed, 37 insertions(+), 110 deletions(-) diff --git a/docs/src/lib/indexing.md b/docs/src/lib/indexing.md index 04cb961984..fe000a50e4 100644 --- a/docs/src/lib/indexing.md +++ b/docs/src/lib/indexing.md @@ -90,8 +90,6 @@ In such an operation `AbstractDataFrame` is two dimensional and `DataFrameRow` a If column indexing using symbols is performed the order of columns in the operation is specified by the order of symbols. -It is allowed to perform `df[col] .= value` even if `col` is not present in the `DataFrame` and it is a `Symbol`. -In such a case a new column will be created. -A data frame with zero columns is always considered to have zero rows. +It is allowed to perform `df[col] .= value` even if `col` is not present in the `DataFrame` and it is a `Symbol` +under the condition that `df` is not empty. In such a case a new column will be created. The assignment broadcasting is not supported for `df.col` style if `col` is not present in `df`. -Similar rules apply to `df[cols] .= value` if any of `Symbol`s in `cols` is not present in `df`. diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl index 5d8b8f39b6..024be7c05a 100644 --- a/src/other/broadcasting.jl +++ b/src/other/broadcasting.jl @@ -1,46 +1,45 @@ -struct DummyColumnValue end +struct LazyNewColDataFrame + df::DataFrame + col::Symbol +end + +# we allow LazyNewColDataFrame only for data frames with some columns +Base.axes(x::LazyNewColDataFrame) = axes(x.df[1]) Base.Broadcast.broadcastable(df::AbstractDataFrame) = Matrix(df) Base.maybeview(df::AbstractDataFrame, idxs...) = view(df, idxs...) function Base.maybeview(df::AbstractDataFrame, idxs) + if ncol(df) == 0 + throw(ArgumentError("Broadcasting into a data frame with no columns is not allowed")) + end if idxs isa Symbol if !haskey(df, idxs) if !(df isa DataFrame) throw(ArgumentError("column $idxs not found")) end - df[idxs] = Vector{DummyColumnValue}(undef, nrow(df)) - return view(df, [idxs]) - end - end - if idxs isa AbstractVector{Symbol} - for idx in idxs - if !haskey(df, idx) - if !(df isa DataFrame) - throw(ArgumentError("column $idxs not found")) - end - df[idx] = Vector{DummyColumnValue}(undef, nrow(df)) - end + return LazyNewColDataFrame(df, idxs) end end view(df, idxs) end -function Base.copyto!(df::AbstractDataFrame, bc) - m = Base.materialize(bc) - if ncol(df) == 1 && eltype(df[1]) === DummyColumnValue - if maximum(ndims.(Base.Broadcast.flatten(bc).args)) > 1 - throw(DimensionMismatch("cannot broadcast array to have fewer dimensions")) - end +function Base.copyto!(lazydf::LazyNewColDataFrame, bc) + if isempty(lazydf.df) + throw(ArgumentError("Broadcasting creation of a column of an empty data frame is not allowed")) end - for col in axes(df, 2) - if eltype(df[col]) === DummyColumnValue - colname = names(df)[col] - T = reduce(promote_type, typeof.(view(m, :, col))) - parent(df)[colname] = Tables.allocatecolumn(T, nrow(df)) - end - df[col] .= view(m, :, col) + T = mapreduce(i -> typeof(bc[i]), promote_type, eachindex(bc); init=Union{}) + col = Tables.allocatecolumn(T, nrow(lazydf.df)) + copyto!(col, bc) + lazydf.df[lazydf.col] = col +end + +function Base.copyto!(df::AbstractDataFrame, bc) + for I in eachindex(bc) + # a safeguard against linear index + row, col = Tuple(I) + df[row, col] = bc[I] end df end @@ -48,10 +47,6 @@ end function Base.copyto!(df::AbstractDataFrame, bc::Base.Broadcast.Broadcasted{<:Base.Broadcast.AbstractArrayStyle{0}}) if bc.f === identity && bc.args isa Tuple{Any} && Base.Broadcast.isflat(bc) for col in axes(df, 2) - if eltype(df[col]) === DummyColumnValue - colname = names(df)[col] - parent(df)[colname] = Tables.allocatecolumn(typeof(bc.args[1][]), nrow(df)) - end fill!(df[col], bc.args[1][]) end df @@ -60,35 +55,9 @@ function Base.copyto!(df::AbstractDataFrame, bc::Base.Broadcast.Broadcasted{<:Ba end end -function Base.copyto!(dfr::DataFrameRow, x) - m = Base.materialize(x) - foreach(col -> dfr[col] = m[col], 1:length(dfr)) - dfr -end - -Base.copyto!(dfr::DataFrameRow, bc::Base.Broadcast.Broadcasted{<:Base.Broadcast.AbstractArrayStyle{0}}) = - if bc.f === identity && bc.args isa Tuple{Any} && Base.Broadcast.isflat(bc) - foreach(col -> dfr[col] = bc.args[1][], 1:length(dfr)) - dfr - else - copyto!(dfr, convert(Broadcasted{Nothing}, bc)) - end - -function Base.Broadcast.materialize!(dest::SubDataFrame, bc::Base.Broadcast.Broadcasted{Style}) where {Style} - try - copyto!(dest, Base.Broadcast.instantiate(Base.Broadcast.Broadcasted{Style}(bc.f, bc.args, axes(dest)))) - catch - df = parent(dest) - deletecols!(df, findall(col -> eltype(col) == DummyColumnValue, eachcol(df))) - rethrow() - end -end -function Base.Broadcast.materialize!(dest::SubDataFrame, x) - try - copyto!(dest, Base.Broadcast.instantiate(Base.Broadcast.Broadcasted(identity, (x,), axes(dest)))) - catch - df = parent(dest) - deletecols!(df, findall(col -> eltype(col) == DummyColumnValue, eachcol(df))) - rethrow() +function Base.copyto!(dfr::DataFrameRow, bc) + for I in eachindex(bc) + dfr[I] = bc[I] end + dfr end diff --git a/test/broadcasting.jl b/test/broadcasting.jl index 03d33c460d..267ccdd166 100644 --- a/test/broadcasting.jl +++ b/test/broadcasting.jl @@ -226,37 +226,9 @@ end @test df == cdf end -@testset "extending data frame in broadcasted assignment - two columns" begin - df = copy(refdf) - df[[:x1, :a]] .= 10 - @test all(df.x1 .== 10) && all(df.a .== 10) - @test names(df)[end] == :a - df[[:b, :c]] .= [5 5 - 5 5 - 5 5] - @test all(df[end-1:end] .== 5) - @test names(df)[end-1:end] == [:b, :c] - cdf = copy(df) - @test_throws DimensionMismatch df[[:x1, :x]] .= ones(3, 10) - @test df == cdf - @test_throws DimensionMismatch df[[:x2, :x]] .= ones(4) - @test df == cdf - @test_throws BoundsError df[[1, 10]] .= ones(3) - @test df == cdf - - dfv = @view df[1:2, 2:end] - @test_throws BoundsError dfv[[10, 11]] .= ones(3) - @test_throws ArgumentError dfv[[:x1, :z]] .= ones(3) - @test df == cdf - dfr = df[1, 3:end] - @test_throws BoundsError dfr[[1,11]] .= ones(10) - @test_throws ArgumentError dfr[[:x1,:z]] .= ones(10) - @test df == cdf -end - @testset "empty data frame corner case" begin df = DataFrame() - @test_throws BoundsError df[1] .= 1 + @test_throws ArgumentError df[1] .= 1 @test_throws ArgumentError df[:a] .= [1] @test_throws ArgumentError df[[:a,:b]] .= [1] @test df == DataFrame() @@ -269,23 +241,11 @@ end @test_throws DimensionMismatch df .= ones(1,2) @test_throws DimensionMismatch df .= ones(1,1,1) - df[:a] .= 1 - @test nrow(df) == 0 - @test names(df) == [:a] - @test eltypes(df) == [Int] - df = DataFrame() - df[[:a, :b]] .= 1 - @test nrow(df) == 0 - @test names(df) == [:a, :b] - @test eltypes(df) == [Int, Int] - df[:c] .= 1.0 - @test nrow(df) == 0 - @test names(df) == [:a, :b, :c] - @test eltypes(df) == [Int, Int, Float64] - df[[:c,:d]] .= 1.0 - @test nrow(df) == 0 - @test names(df) == [:a, :b, :c, :d] - @test eltypes(df) == [Int, Int, Float64, Float64] + @test_throws ArgumentError df[:a] .= 1 + @test_throws ArgumentError df[[:a, :b]] .= 1 + + df = DataFrame(a=[]) + @test_throws ArgumentError df[:b] .= 1 end end # module From 2f27a3d99178e86d667acba87b4cc711e7242943 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sat, 11 May 2019 17:53:49 +0200 Subject: [PATCH 07/30] Update src/other/broadcasting.jl Co-Authored-By: Milan Bouchet-Valat --- src/other/broadcasting.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl index 024be7c05a..a102fe8b9a 100644 --- a/src/other/broadcasting.jl +++ b/src/other/broadcasting.jl @@ -3,7 +3,7 @@ struct LazyNewColDataFrame col::Symbol end -# we allow LazyNewColDataFrame only for data frames with some columns +# we allow LazyNewColDataFrame only for data frames with at least one column Base.axes(x::LazyNewColDataFrame) = axes(x.df[1]) Base.Broadcast.broadcastable(df::AbstractDataFrame) = Matrix(df) From 9efdb299678941514122a0d3bde755ee74073983 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sat, 11 May 2019 18:07:29 +0200 Subject: [PATCH 08/30] Update src/other/broadcasting.jl Co-Authored-By: Milan Bouchet-Valat --- src/other/broadcasting.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl index a102fe8b9a..22b86c1753 100644 --- a/src/other/broadcasting.jl +++ b/src/other/broadcasting.jl @@ -27,7 +27,7 @@ end function Base.copyto!(lazydf::LazyNewColDataFrame, bc) if isempty(lazydf.df) - throw(ArgumentError("Broadcasting creation of a column of an empty data frame is not allowed")) + throw(ArgumentError("creating a column via broadcasting is not allowed on empty data frames")) end T = mapreduce(i -> typeof(bc[i]), promote_type, eachindex(bc); init=Union{}) col = Tables.allocatecolumn(T, nrow(lazydf.df)) From d7ccdb382a50a6881e21f254512214ef62719492 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sun, 12 May 2019 00:24:21 +0200 Subject: [PATCH 09/30] corrections after code review --- src/other/broadcasting.jl | 24 +++++---- test/broadcasting.jl | 100 +++++++++++++------------------------- 2 files changed, 49 insertions(+), 75 deletions(-) diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl index 22b86c1753..2993c04bf4 100644 --- a/src/other/broadcasting.jl +++ b/src/other/broadcasting.jl @@ -6,8 +6,6 @@ end # we allow LazyNewColDataFrame only for data frames with at least one column Base.axes(x::LazyNewColDataFrame) = axes(x.df[1]) -Base.Broadcast.broadcastable(df::AbstractDataFrame) = Matrix(df) - Base.maybeview(df::AbstractDataFrame, idxs...) = view(df, idxs...) function Base.maybeview(df::AbstractDataFrame, idxs) @@ -17,7 +15,8 @@ function Base.maybeview(df::AbstractDataFrame, idxs) if idxs isa Symbol if !haskey(df, idxs) if !(df isa DataFrame) - throw(ArgumentError("column $idxs not found")) + # this will throw an appropriate error message + df[idxs] end return LazyNewColDataFrame(df, idxs) end @@ -25,7 +24,7 @@ function Base.maybeview(df::AbstractDataFrame, idxs) view(df, idxs) end -function Base.copyto!(lazydf::LazyNewColDataFrame, bc) +function Base.copyto!(lazydf::LazyNewColDataFrame, bc::Base.Broadcast.Broadcasted) if isempty(lazydf.df) throw(ArgumentError("creating a column via broadcasting is not allowed on empty data frames")) end @@ -35,16 +34,21 @@ function Base.copyto!(lazydf::LazyNewColDataFrame, bc) lazydf.df[lazydf.col] = col end -function Base.copyto!(df::AbstractDataFrame, bc) - for I in eachindex(bc) - # a safeguard against linear index - row, col = Tuple(I) - df[row, col] = bc[I] +function _copyto_heper!(dfcol::AbstractVector, bc::Base.Broadcast.Broadcasted, col::Int) + for row in axes(dfcol, 1) + dfcol[row] = bc[CartesianIndex(row, col)] + end +end + +function Base.copyto!(df::AbstractDataFrame, bc::Base.Broadcast.Broadcasted) + for col in axes(df, 2) + _copyto_heper!(df[col], bc, col) end df end function Base.copyto!(df::AbstractDataFrame, bc::Base.Broadcast.Broadcasted{<:Base.Broadcast.AbstractArrayStyle{0}}) + # special case of fast approach when bc is providing an untransformed scalar if bc.f === identity && bc.args isa Tuple{Any} && Base.Broadcast.isflat(bc) for col in axes(df, 2) fill!(df[col], bc.args[1][]) @@ -55,7 +59,7 @@ function Base.copyto!(df::AbstractDataFrame, bc::Base.Broadcast.Broadcasted{<:Ba end end -function Base.copyto!(dfr::DataFrameRow, bc) +function Base.copyto!(dfr::DataFrameRow, bc::Base.Broadcast.Broadcasted) for I in eachindex(bc) dfr[I] = bc[I] end diff --git a/test/broadcasting.jl b/test/broadcasting.jl index 267ccdd166..d67ecb44a2 100644 --- a/test/broadcasting.jl +++ b/test/broadcasting.jl @@ -4,67 +4,37 @@ using Test, DataFrames refdf = DataFrame(ones(3, 5)) -@testset "data frame and data frame row in broadcasting" begin - df = copy(refdf) - dfv = view(df, 1:2, 1:4) - dfr = df[1, 1:2] - - @test df .+ 1 == fill(2.0, 3, 5) - @test all(df .== ones(3, 5)) - @test [i for i in 3:5, j in 1:5] == @. 2 * df + (1:3) - @test df .- ones(3, 5) == zeros(3, 5) - @test_throws DimensionMismatch df .- ones(3, 4) - @test_throws DimensionMismatch df .- ones(4) - @test_throws DimensionMismatch df .- ones(3, 4, 1) - - @test dfv .+ 1 == fill(2.0, 2, 4) - @test all(dfv .== ones(2, 4)) - @test [i for i in 3:4, j in 1:4] == @. 2 * dfv + (1:2) - @test dfv .- ones(2, 4) == zeros(2, 4) - @test_throws DimensionMismatch dfv .- ones(3, 4) - @test_throws DimensionMismatch dfv .- ones(4) - @test_throws DimensionMismatch dfv .- ones(3, 4, 1) - - @test dfr .+ 1 == fill(2.0, 2) - @test all(dfr .== ones(2)) - @test [3, 4] == @. 2 * dfr + (1:2) - @test dfr .- ones(2) == zeros(2) - @test_throws DimensionMismatch dfr .- ones(3, 4) - @test_throws DimensionMismatch dfr .- ones(4) - @test_throws DimensionMismatch dfr .- ones(3, 4, 1) -end - @testset "normal data frame and data frame row in broadcasted assignment - one column" begin df = copy(refdf) df[1] .+= 1 @test df.x1 == [2, 2, 2] - @test all(df[2:end] .== ones(size(df[2:end])...)) + @test all(Matrix(df[2:end]) .== ones(size(df[2:end])...)) dfv = @view df[1:2, 2:end] dfv[1] .+= 1 @test dfv.x2 == [2, 2] - @test all(dfv[2:end] .== ones(size(dfv[2:end])...)) - @test all(df[1:2, 1:2] .== 2) + @test all(Matrix(dfv[2:end]) .== ones(size(dfv[2:end])...)) + @test all(Matrix(df[1:2, 1:2]) .== 2) dfr = df[1, 3:end] dfr[end-1:end] .= 10 - @test all(dfr .== [1, 10, 10]) + @test all(Vector(dfr) .== [1, 10, 10]) @test df.x3[1] == 1 && df.x4[1] == 10 && df.x5[1] == 10 df = copy(refdf) df[1] .+= [1, 1, 1] @test df.x1 == [2, 2, 2] - @test all(df[2:end] .== ones(size(df[2:end])...)) + @test all(Matrix(df[2:end]) .== ones(size(df[2:end])...)) dfv = @view df[1:2, 2:end] dfv[1] .+= [1, 1] @test dfv.x2 == [2, 2] - @test all(dfv[2:end] .== ones(size(dfv[2:end])...)) - @test all(df[1:2, 1:2] .== 2) + @test all(Matrix(dfv[2:end]) .== ones(size(dfv[2:end])...)) + @test all(Matrix(df[1:2, 1:2]) .== 2) dfr = df[1, 3:end] dfr[end-1:end] .= [10, 10] - @test all(dfr .== [1, 10, 10]) + @test all(Vector(dfr) .== [1, 10, 10]) @test df.x3[1] == 1 && df.x4[1] == 10 && df.x5[1] == 10 df = copy(refdf) @@ -77,33 +47,33 @@ end df = copy(refdf) df[:x1] .+= 1 @test df.x1 == [2, 2, 2] - @test all(df[2:end] .== ones(size(df[2:end])...)) + @test all(Matrix(df[2:end]) .== ones(size(df[2:end])...)) dfv = @view df[1:2, 2:end] dfv[:x2] .+= 1 @test dfv.x2 == [2, 2] - @test all(dfv[2:end] .== ones(size(dfv[2:end])...)) - @test all(df[1:2, 1:2] .== 2) + @test all(Matrix(dfv[2:end]) .== ones(size(dfv[2:end])...)) + @test all(Matrix(df[1:2, 1:2]) .== 2) dfr = df[1, 3:end] dfr[[:x4, :x5]] .= 10 - @test all(dfr .== [1, 10, 10]) + @test all(Vector(dfr) .== [1, 10, 10]) @test df.x3[1] == 1 && df.x4[1] == 10 && df.x5[1] == 10 df = copy(refdf) df[:x1] .+= [1, 1, 1] @test df.x1 == [2, 2, 2] - @test all(df[2:end] .== ones(size(df[2:end])...)) + @test all(Matrix(df[2:end]) .== ones(size(df[2:end])...)) dfv = @view df[1:2, 2:end] dfv[:x2] .+= [1, 1] @test dfv.x2 == [2, 2] - @test all(dfv[2:end] .== ones(size(dfv[2:end])...)) - @test all(df[1:2, 1:2] .== 2) + @test all(Matrix(dfv[2:end]) .== ones(size(dfv[2:end])...)) + @test all(Matrix(df[1:2, 1:2]) .== 2) dfr = df[1, 3:end] dfr[[:x4, :x5]] .= [10, 10] - @test all(dfr .== [1, 10, 10]) + @test all(Vector(dfr) .== [1, 10, 10]) @test df.x3[1] == 1 && df.x4[1] == 10 && df.x5[1] == 10 df = copy(refdf) @@ -119,14 +89,14 @@ end df[[1,2]] .+= 1 @test df.x1 == [2, 2, 2] @test df.x2 == [2, 2, 2] - @test all(df[3:end] .== ones(size(df[3:end])...)) + @test all(Matrix(df[3:end]) .== ones(size(df[3:end])...)) dfv = @view df[1:2, 3:end] dfv[[1,2]] .+= 1 @test dfv.x3 == [2, 2] @test dfv.x4 == [2, 2] - @test all(dfv[3:end] .== ones(size(dfv[3:end])...)) - @test all(df[1:2, 1:2] .== 2) + @test all(Matrix(dfv[3:end]) .== ones(size(dfv[3:end])...)) + @test all(Matrix(df[1:2, 1:2]) .== 2) df = copy(refdf) df[[1,2]] .+= [1 1 @@ -134,15 +104,15 @@ end 1 1] @test df.x1 == [2, 2, 2] @test df.x2 == [2, 2, 2] - @test all(df[3:end] .== ones(size(df[3:end])...)) + @test all(Matrix(df[3:end]) .== ones(size(df[3:end])...)) dfv = @view df[1:2, 3:end] dfv[[1,2]] .+= [1 1 1 1] @test dfv.x3 == [2, 2] @test dfv.x4 == [2, 2] - @test all(dfv[3:end] .== ones(size(dfv[3:end])...)) - @test all(df[1:2, 1:2] .== 2) + @test all(Matrix(dfv[3:end]) .== ones(size(dfv[3:end])...)) + @test all(Matrix(df[1:2, 1:2]) .== 2) df = copy(refdf) dfv = @view df[1:2, 2:end] @@ -154,14 +124,14 @@ end df[[:x1,:x2]] .+= 1 @test df.x1 == [2, 2, 2] @test df.x2 == [2, 2, 2] - @test all(df[3:end] .== ones(size(df[3:end])...)) + @test all(Matrix(df[3:end]) .== ones(size(df[3:end])...)) dfv = @view df[1:2, 3:end] dfv[[:x3,:x4]] .+= 1 @test dfv.x3 == [2, 2] @test dfv.x4 == [2, 2] - @test all(dfv[3:end] .== ones(size(dfv[3:end])...)) - @test all(df[1:2, 1:2] .== 2) + @test all(Matrix(dfv[3:end]) .== ones(size(dfv[3:end])...)) + @test all(Matrix(df[1:2, 1:2]) .== 2) df = copy(refdf) df[[:x1,:x2]] .+= [1 1 @@ -169,15 +139,15 @@ end 1 1] @test df.x1 == [2, 2, 2] @test df.x2 == [2, 2, 2] - @test all(df[3:end] .== ones(size(df[3:end])...)) + @test all(Matrix(df[3:end]) .== ones(size(df[3:end])...)) dfv = @view df[1:2, 3:end] dfv[[:x3,:x4]] .+= [1 1 1 1] @test dfv.x3 == [2, 2] @test dfv.x4 == [2, 2] - @test all(dfv[3:end] .== ones(size(dfv[3:end])...)) - @test all(df[1:2, 1:2] .== 2) + @test all(Matrix(dfv[3:end]) .== ones(size(dfv[3:end])...)) + @test all(Matrix(df[1:2, 1:2]) .== 2) df = copy(refdf) dfv = @view df[1:2, 2:end] @@ -189,24 +159,24 @@ end @testset "assignment to a whole data frame and data frame row" begin df = copy(refdf) df .= 10 - @test all(df .== 10) + @test all(Matrix(df) .== 10) dfv = view(df, 1:2, 1:4) dfv .= 100 - @test (all(df[1:2, 1:4] .== 100)) - @test (all(df[3, 1:4] .== 10)) + @test (all(Matrix(df[1:2, 1:4]) .== 100)) + @test (all(Vector(df[3, 1:4]) .== 10)) dfr = df[1, 1:2] dfr .= 1000 - @test (all(df[1, 1:2] .== 1000)) - @test (all(df[3, :] .!= 1000)) + @test (all(Vector(df[1, 1:2]) .== 1000)) + @test (all(Vector(df[3, :]) .!= 1000)) end @testset "extending data frame in broadcasted assignment - one column" begin df = copy(refdf) df[:a] .= 1 - @test all(df .== 1) + @test all(Matrix(df) .== 1) @test names(df)[end] == :a df[:b] .= [1, 1, 1] - @test all(df .== 1) + @test all(Matrix(df) .== 1) @test names(df)[end] == :b cdf = copy(df) @test_throws DimensionMismatch df[:c] .= ones(3, 1) From 8fd53ba8bf3c0829676d4659be45c948fa7fa6a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sun, 12 May 2019 00:50:31 +0200 Subject: [PATCH 10/30] fix tests using removed RHS broadcasting --- test/broadcasting.jl | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/test/broadcasting.jl b/test/broadcasting.jl index d67ecb44a2..6ba499f2b1 100644 --- a/test/broadcasting.jl +++ b/test/broadcasting.jl @@ -86,29 +86,29 @@ end @testset "normal data frame and data frame row in broadcasted assignment - two columns" begin df = copy(refdf) - df[[1,2]] .+= 1 + df[[1,2]] .= Matrix(df[[1,2]]) .+ 1 @test df.x1 == [2, 2, 2] @test df.x2 == [2, 2, 2] @test all(Matrix(df[3:end]) .== ones(size(df[3:end])...)) dfv = @view df[1:2, 3:end] - dfv[[1,2]] .+= 1 + dfv[[1,2]] .= Matrix(dfv[[1,2]]) .+ 1 @test dfv.x3 == [2, 2] @test dfv.x4 == [2, 2] @test all(Matrix(dfv[3:end]) .== ones(size(dfv[3:end])...)) @test all(Matrix(df[1:2, 1:2]) .== 2) df = copy(refdf) - df[[1,2]] .+= [1 1 - 1 1 - 1 1] + df[[1,2]] .= Matrix(df[[1,2]]) .+ [1 1 + 1 1 + 1 1] @test df.x1 == [2, 2, 2] @test df.x2 == [2, 2, 2] @test all(Matrix(df[3:end]) .== ones(size(df[3:end])...)) dfv = @view df[1:2, 3:end] - dfv[[1,2]] .+= [1 1 - 1 1] + dfv[[1,2]] .= Matrix(dfv[[1,2]]) .+ [1 1 + 1 1] @test dfv.x3 == [2, 2] @test dfv.x4 == [2, 2] @test all(Matrix(dfv[3:end]) .== ones(size(dfv[3:end])...)) @@ -121,29 +121,29 @@ end @test_throws DimensionMismatch dfv[[1,2]] .= rand(2, 10) df = copy(refdf) - df[[:x1,:x2]] .+= 1 + df[[:x1,:x2]] .= Matrix(df[[:x1,:x2]]) .+ 1 @test df.x1 == [2, 2, 2] @test df.x2 == [2, 2, 2] @test all(Matrix(df[3:end]) .== ones(size(df[3:end])...)) dfv = @view df[1:2, 3:end] - dfv[[:x3,:x4]] .+= 1 + dfv[[:x3,:x4]] .= Matrix(dfv[[:x3,:x4]]) .+ 1 @test dfv.x3 == [2, 2] @test dfv.x4 == [2, 2] @test all(Matrix(dfv[3:end]) .== ones(size(dfv[3:end])...)) @test all(Matrix(df[1:2, 1:2]) .== 2) df = copy(refdf) - df[[:x1,:x2]] .+= [1 1 - 1 1 - 1 1] + df[[:x1,:x2]] .= Matrix(df[[:x1,:x2]]) .+ [1 1 + 1 1 + 1 1] @test df.x1 == [2, 2, 2] @test df.x2 == [2, 2, 2] @test all(Matrix(df[3:end]) .== ones(size(df[3:end])...)) dfv = @view df[1:2, 3:end] - dfv[[:x3,:x4]] .+= [1 1 - 1 1] + dfv[[:x3,:x4]] .= Matrix(dfv[[:x3,:x4]]) .+ [1 1 + 1 1] @test dfv.x3 == [2, 2] @test dfv.x4 == [2, 2] @test all(Matrix(dfv[3:end]) .== ones(size(dfv[3:end])...)) From 9216a1b5a268ef3d1c86d0bc25ba19f9c3cfad73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Tue, 14 May 2019 23:45:14 +0200 Subject: [PATCH 11/30] Apply suggestions from code review Co-Authored-By: Milan Bouchet-Valat --- src/other/broadcasting.jl | 4 ++-- test/broadcasting.jl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl index 2993c04bf4..5b85c08071 100644 --- a/src/other/broadcasting.jl +++ b/src/other/broadcasting.jl @@ -34,8 +34,8 @@ function Base.copyto!(lazydf::LazyNewColDataFrame, bc::Base.Broadcast.Broadcaste lazydf.df[lazydf.col] = col end -function _copyto_heper!(dfcol::AbstractVector, bc::Base.Broadcast.Broadcasted, col::Int) - for row in axes(dfcol, 1) +function _copyto_helper!(dfcol::AbstractVector, bc::Base.Broadcast.Broadcasted, col::Int) + @inbounds for row in axes(dfcol, 1) dfcol[row] = bc[CartesianIndex(row, col)] end end diff --git a/test/broadcasting.jl b/test/broadcasting.jl index 6ba499f2b1..43d4a61f41 100644 --- a/test/broadcasting.jl +++ b/test/broadcasting.jl @@ -8,7 +8,7 @@ refdf = DataFrame(ones(3, 5)) df = copy(refdf) df[1] .+= 1 @test df.x1 == [2, 2, 2] - @test all(Matrix(df[2:end]) .== ones(size(df[2:end])...)) + @test df[2:end] == refdf[2:end] dfv = @view df[1:2, 2:end] dfv[1] .+= 1 From 1ab4bfd1a77cc970d6116782edabc304408f09c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Wed, 15 May 2019 00:03:28 +0200 Subject: [PATCH 12/30] changes after the code review --- docs/src/lib/indexing.md | 5 +- src/other/broadcasting.jl | 9 ++- test/broadcasting.jl | 147 +++++++++++++++++++++++++++++++++++++- 3 files changed, 153 insertions(+), 8 deletions(-) diff --git a/docs/src/lib/indexing.md b/docs/src/lib/indexing.md index fe000a50e4..0b51993f8a 100644 --- a/docs/src/lib/indexing.md +++ b/docs/src/lib/indexing.md @@ -82,10 +82,7 @@ Under construction ## Broadcasting -`AbstractDataFrame` is converted to a matrix in broadcasting. -`DataFrameRow`, `GroupedDataFrame`, `DataFrameRows` and `DataFrameColumns` are `collect`ed in broadcasting. - -Additionally it is possible to assign a value to `AbstractDataFrame` and `DataFrameRow` using the `.=` operator. +It is possible to assign a value to `AbstractDataFrame` and `DataFrameRow` using the `.=` operator. In such an operation `AbstractDataFrame` is two dimensional and `DataFrameRow` as a single dimensional. If column indexing using symbols is performed the order of columns in the operation is specified by the order of symbols. diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl index 5b85c08071..0a64b5f455 100644 --- a/src/other/broadcasting.jl +++ b/src/other/broadcasting.jl @@ -35,14 +35,19 @@ function Base.copyto!(lazydf::LazyNewColDataFrame, bc::Base.Broadcast.Broadcaste end function _copyto_helper!(dfcol::AbstractVector, bc::Base.Broadcast.Broadcasted, col::Int) - @inbounds for row in axes(dfcol, 1) + if axes(dfcol, 1) != axes(bc, 1) + # this should never happen unless data frame is corrupted (has unequal column lengths) + throw(ArgumentError("Dimension mismatch in broadcasting. " * + "The updated data frame is invalid and should not be used")) + end + @inbounds for row in eachindex(dfcol) dfcol[row] = bc[CartesianIndex(row, col)] end end function Base.copyto!(df::AbstractDataFrame, bc::Base.Broadcast.Broadcasted) for col in axes(df, 2) - _copyto_heper!(df[col], bc, col) + _copyto_helper!(df[col], bc, col) end df end diff --git a/test/broadcasting.jl b/test/broadcasting.jl index 43d4a61f41..4575ebf99a 100644 --- a/test/broadcasting.jl +++ b/test/broadcasting.jl @@ -21,6 +21,17 @@ refdf = DataFrame(ones(3, 5)) @test all(Vector(dfr) .== [1, 10, 10]) @test df.x3[1] == 1 && df.x4[1] == 10 && df.x5[1] == 10 + df = copy(refdf) + df[:, 1] .+= 1 + @test df.x1 == [2, 2, 2] + @test df[2:end] == refdf[2:end] + + dfv = @view df[1:2, 2:end] + dfv[:, 1] .+= 1 + @test dfv.x2 == [2, 2] + @test all(Matrix(dfv[2:end]) .== ones(size(dfv[2:end])...)) + @test all(Matrix(df[1:2, 1:2]) .== 2) + df = copy(refdf) df[1] .+= [1, 1, 1] @test df.x1 == [2, 2, 2] @@ -37,12 +48,25 @@ refdf = DataFrame(ones(3, 5)) @test all(Vector(dfr) .== [1, 10, 10]) @test df.x3[1] == 1 && df.x4[1] == 10 && df.x5[1] == 10 + df = copy(refdf) + df[:, 1] .+= [1, 1, 1] + @test df.x1 == [2, 2, 2] + @test all(Matrix(df[2:end]) .== ones(size(df[2:end])...)) + + dfv = @view df[1:2, 2:end] + dfv[:, 1] .+= [1, 1] + @test dfv.x2 == [2, 2] + @test all(Matrix(dfv[2:end]) .== ones(size(dfv[2:end])...)) + @test all(Matrix(df[1:2, 1:2]) .== 2) + df = copy(refdf) dfv = @view df[1:2, 2:end] dfr = df[1, 3:end] @test_throws DimensionMismatch df[1] .= rand(3, 1) @test_throws DimensionMismatch dfv[1] .= rand(2, 1) @test_throws DimensionMismatch dfr[end-1:end] .= rand(3, 1) + @test_throws DimensionMismatch df[:, 1] .= rand(3, 1) + @test_throws DimensionMismatch dfv[:, 1] .= rand(2, 1) df = copy(refdf) df[:x1] .+= 1 @@ -60,6 +84,17 @@ refdf = DataFrame(ones(3, 5)) @test all(Vector(dfr) .== [1, 10, 10]) @test df.x3[1] == 1 && df.x4[1] == 10 && df.x5[1] == 10 + df = copy(refdf) + df[:, :x1] .+= 1 + @test df.x1 == [2, 2, 2] + @test all(Matrix(df[2:end]) .== ones(size(df[2:end])...)) + + dfv = @view df[1:2, 2:end] + dfv[:, :x2] .+= 1 + @test dfv.x2 == [2, 2] + @test all(Matrix(dfv[2:end]) .== ones(size(dfv[2:end])...)) + @test all(Matrix(df[1:2, 1:2]) .== 2) + df = copy(refdf) df[:x1] .+= [1, 1, 1] @test df.x1 == [2, 2, 2] @@ -76,12 +111,25 @@ refdf = DataFrame(ones(3, 5)) @test all(Vector(dfr) .== [1, 10, 10]) @test df.x3[1] == 1 && df.x4[1] == 10 && df.x5[1] == 10 + df = copy(refdf) + df[:, :x1] .+= [1, 1, 1] + @test df.x1 == [2, 2, 2] + @test all(Matrix(df[2:end]) .== ones(size(df[2:end])...)) + + dfv = @view df[1:2, 2:end] + dfv[:, :x2] .+= [1, 1] + @test dfv.x2 == [2, 2] + @test all(Matrix(dfv[2:end]) .== ones(size(dfv[2:end])...)) + @test all(Matrix(df[1:2, 1:2]) .== 2) + df = copy(refdf) dfv = @view df[1:2, 2:end] dfr = df[1, 3:end] @test_throws DimensionMismatch df[:x1] .= rand(3, 1) @test_throws DimensionMismatch dfv[:x2] .= rand(2, 1) @test_throws DimensionMismatch dfr[[:x4, :x5]] .= rand(3, 1) + @test_throws DimensionMismatch df[:, :x1] .= rand(3, 1) + @test_throws DimensionMismatch dfv[:, :x2] .= rand(2, 1) end @testset "normal data frame and data frame row in broadcasted assignment - two columns" begin @@ -98,6 +146,19 @@ end @test all(Matrix(dfv[3:end]) .== ones(size(dfv[3:end])...)) @test all(Matrix(df[1:2, 1:2]) .== 2) + df = copy(refdf) + df[:, [1,2]] .= Matrix(df[[1,2]]) .+ 1 + @test df.x1 == [2, 2, 2] + @test df.x2 == [2, 2, 2] + @test all(Matrix(df[3:end]) .== ones(size(df[3:end])...)) + + dfv = @view df[1:2, 3:end] + dfv[:, [1,2]] .= Matrix(dfv[[1,2]]) .+ 1 + @test dfv.x3 == [2, 2] + @test dfv.x4 == [2, 2] + @test all(Matrix(dfv[3:end]) .== ones(size(dfv[3:end])...)) + @test all(Matrix(df[1:2, 1:2]) .== 2) + df = copy(refdf) df[[1,2]] .= Matrix(df[[1,2]]) .+ [1 1 1 1 @@ -114,11 +175,28 @@ end @test all(Matrix(dfv[3:end]) .== ones(size(dfv[3:end])...)) @test all(Matrix(df[1:2, 1:2]) .== 2) + df = copy(refdf) + df[:, [1,2]] .= Matrix(df[[1,2]]) .+ [1 1 + 1 1 + 1 1] + @test df.x1 == [2, 2, 2] + @test df.x2 == [2, 2, 2] + @test all(Matrix(df[3:end]) .== ones(size(df[3:end])...)) + + dfv = @view df[1:2, 3:end] + dfv[:, [1,2]] .= Matrix(dfv[[1,2]]) .+ [1 1 + 1 1] + @test dfv.x3 == [2, 2] + @test dfv.x4 == [2, 2] + @test all(Matrix(dfv[3:end]) .== ones(size(dfv[3:end])...)) + @test all(Matrix(df[1:2, 1:2]) .== 2) + df = copy(refdf) dfv = @view df[1:2, 2:end] - dfr = df[1, 3:end] @test_throws DimensionMismatch df[[1,2]] .= rand(3, 10) @test_throws DimensionMismatch dfv[[1,2]] .= rand(2, 10) + @test_throws DimensionMismatch df[:, [1,2]] .= rand(3, 10) + @test_throws DimensionMismatch dfv[:, [1,2]] .= rand(2, 10) df = copy(refdf) df[[:x1,:x2]] .= Matrix(df[[:x1,:x2]]) .+ 1 @@ -133,6 +211,19 @@ end @test all(Matrix(dfv[3:end]) .== ones(size(dfv[3:end])...)) @test all(Matrix(df[1:2, 1:2]) .== 2) + df = copy(refdf) + df[:, [:x1,:x2]] .= Matrix(df[[:x1,:x2]]) .+ 1 + @test df.x1 == [2, 2, 2] + @test df.x2 == [2, 2, 2] + @test all(Matrix(df[3:end]) .== ones(size(df[3:end])...)) + + dfv = @view df[1:2, 3:end] + dfv[[:, :x3,:x4]] .= Matrix(dfv[[:x3,:x4]]) .+ 1 + @test dfv.x3 == [2, 2] + @test dfv.x4 == [2, 2] + @test all(Matrix(dfv[3:end]) .== ones(size(dfv[3:end])...)) + @test all(Matrix(df[1:2, 1:2]) .== 2) + df = copy(refdf) df[[:x1,:x2]] .= Matrix(df[[:x1,:x2]]) .+ [1 1 1 1 @@ -149,11 +240,28 @@ end @test all(Matrix(dfv[3:end]) .== ones(size(dfv[3:end])...)) @test all(Matrix(df[1:2, 1:2]) .== 2) + df = copy(refdf) + df[:, [:x1,:x2]] .= Matrix(df[[:x1,:x2]]) .+ [1 1 + 1 1 + 1 1] + @test df.x1 == [2, 2, 2] + @test df.x2 == [2, 2, 2] + @test all(Matrix(df[3:end]) .== ones(size(df[3:end])...)) + + dfv = @view df[1:2, 3:end] + dfv[:, [:x3,:x4]] .= Matrix(dfv[[:x3,:x4]]) .+ [1 1 + 1 1] + @test dfv.x3 == [2, 2] + @test dfv.x4 == [2, 2] + @test all(Matrix(dfv[3:end]) .== ones(size(dfv[3:end])...)) + @test all(Matrix(df[1:2, 1:2]) .== 2) + df = copy(refdf) dfv = @view df[1:2, 2:end] - dfr = df[1, 3:end] @test_throws DimensionMismatch df[[:x1,:x2]] .= rand(3, 10) @test_throws DimensionMismatch dfv[[:x3,:x4]] .= rand(2, 10) + @test_throws DimensionMismatch df[:, [:x1,:x2]] .= rand(3, 10) + @test_throws DimensionMismatch dfv[:, [:x3,:x4]] .= rand(2, 10) end @testset "assignment to a whole data frame and data frame row" begin @@ -168,6 +276,26 @@ end dfr .= 1000 @test (all(Vector(df[1, 1:2]) .== 1000)) @test (all(Vector(df[3, :]) .!= 1000)) + + df = copy(refdf) + df[:] .= 10 + @test all(Matrix(df) .== 10) + dfv = view(df, 1:2, 1:4) + dfv[:] .= 100 + @test (all(Matrix(df[1:2, 1:4]) .== 100)) + @test (all(Vector(df[3, 1:4]) .== 10)) + dfr = df[1, 1:2] + dfr[:] .= 1000 + @test (all(Vector(df[1, 1:2]) .== 1000)) + @test (all(Vector(df[3, :]) .!= 1000)) + + df = copy(refdf) + df[:,:] .= 10 + @test all(Matrix(df) .== 10) + dfv = view(df, 1:2, 1:4) + dfv[:, :] .= 100 + @test (all(Matrix(df[1:2, 1:4]) .== 100)) + @test (all(Vector(df[3, 1:4]) .== 10)) end @testset "extending data frame in broadcasted assignment - one column" begin @@ -218,4 +346,19 @@ end @test_throws ArgumentError df[:b] .= 1 end +@testset "test categorical values" + df = copy(refdf) + v = categorical([1,2,3]) + df[:c1] .= v + @test df.c1 == v + @test df.c1 !== v + @test df.c1 isa CategoricalVector + @test levels(df.c1) == levels(v) + @test levels(df.c1) !== levels(v) + df[:c2] .= v[1] + @test df.c2 == [1,1,1] + @test df.c2 isa CategoricalVector + @test levels(df.c2) != levels(v) +end + end # module From ed85d261b0618c776459fb157178f7b60064d6df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Wed, 15 May 2019 00:30:04 +0200 Subject: [PATCH 13/30] error fixes --- src/other/broadcasting.jl | 2 +- test/broadcasting.jl | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl index 0a64b5f455..f9227ab042 100644 --- a/src/other/broadcasting.jl +++ b/src/other/broadcasting.jl @@ -35,7 +35,7 @@ function Base.copyto!(lazydf::LazyNewColDataFrame, bc::Base.Broadcast.Broadcaste end function _copyto_helper!(dfcol::AbstractVector, bc::Base.Broadcast.Broadcasted, col::Int) - if axes(dfcol, 1) != axes(bc, 1) + if axes(dfcol, 1) != axes(bc)[1] # this should never happen unless data frame is corrupted (has unequal column lengths) throw(ArgumentError("Dimension mismatch in broadcasting. " * "The updated data frame is invalid and should not be used")) diff --git a/test/broadcasting.jl b/test/broadcasting.jl index 4575ebf99a..1d006d2c71 100644 --- a/test/broadcasting.jl +++ b/test/broadcasting.jl @@ -218,7 +218,7 @@ end @test all(Matrix(df[3:end]) .== ones(size(df[3:end])...)) dfv = @view df[1:2, 3:end] - dfv[[:, :x3,:x4]] .= Matrix(dfv[[:x3,:x4]]) .+ 1 + dfv[:, [:x3,:x4]] .= Matrix(dfv[[:x3,:x4]]) .+ 1 @test dfv.x3 == [2, 2] @test dfv.x4 == [2, 2] @test all(Matrix(dfv[3:end]) .== ones(size(dfv[3:end])...)) @@ -346,7 +346,7 @@ end @test_throws ArgumentError df[:b] .= 1 end -@testset "test categorical values" +@testset "test categorical values" begin df = copy(refdf) v = categorical([1,2,3]) df[:c1] .= v From a80527be0900690b4095afab8ae039876938216b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sun, 19 May 2019 01:00:14 +0200 Subject: [PATCH 14/30] clean up tests --- docs/src/lib/indexing.md | 5 ++++ test/broadcasting.jl | 60 ++++++++++++++++++++-------------------- 2 files changed, 35 insertions(+), 30 deletions(-) diff --git a/docs/src/lib/indexing.md b/docs/src/lib/indexing.md index 0b51993f8a..6ae7a6c714 100644 --- a/docs/src/lib/indexing.md +++ b/docs/src/lib/indexing.md @@ -85,6 +85,11 @@ Under construction It is possible to assign a value to `AbstractDataFrame` and `DataFrameRow` using the `.=` operator. In such an operation `AbstractDataFrame` is two dimensional and `DataFrameRow` as a single dimensional. +!!! note + + The rule above means that, analogously to single dimensional objects in Base, + `DataFrameRow` is considered to be column oriented. + If column indexing using symbols is performed the order of columns in the operation is specified by the order of symbols. It is allowed to perform `df[col] .= value` even if `col` is not present in the `DataFrame` and it is a `Symbol` diff --git a/test/broadcasting.jl b/test/broadcasting.jl index 1d006d2c71..72f62068d0 100644 --- a/test/broadcasting.jl +++ b/test/broadcasting.jl @@ -13,7 +13,7 @@ refdf = DataFrame(ones(3, 5)) dfv = @view df[1:2, 2:end] dfv[1] .+= 1 @test dfv.x2 == [2, 2] - @test all(Matrix(dfv[2:end]) .== ones(size(dfv[2:end])...)) + @test dfv[2:end] == refdf[1:2, 3:end] @test all(Matrix(df[1:2, 1:2]) .== 2) dfr = df[1, 3:end] @@ -29,18 +29,18 @@ refdf = DataFrame(ones(3, 5)) dfv = @view df[1:2, 2:end] dfv[:, 1] .+= 1 @test dfv.x2 == [2, 2] - @test all(Matrix(dfv[2:end]) .== ones(size(dfv[2:end])...)) + @test dfv[2:end] == refdf[1:2, 3:end] @test all(Matrix(df[1:2, 1:2]) .== 2) df = copy(refdf) df[1] .+= [1, 1, 1] @test df.x1 == [2, 2, 2] - @test all(Matrix(df[2:end]) .== ones(size(df[2:end])...)) + @test df[2:end] == refdf[2:end] dfv = @view df[1:2, 2:end] dfv[1] .+= [1, 1] @test dfv.x2 == [2, 2] - @test all(Matrix(dfv[2:end]) .== ones(size(dfv[2:end])...)) + @test dfv[2:end] == refdf[1:2, 3:end] @test all(Matrix(df[1:2, 1:2]) .== 2) dfr = df[1, 3:end] @@ -51,12 +51,12 @@ refdf = DataFrame(ones(3, 5)) df = copy(refdf) df[:, 1] .+= [1, 1, 1] @test df.x1 == [2, 2, 2] - @test all(Matrix(df[2:end]) .== ones(size(df[2:end])...)) + @test df[2:end] == refdf[2:end] dfv = @view df[1:2, 2:end] dfv[:, 1] .+= [1, 1] @test dfv.x2 == [2, 2] - @test all(Matrix(dfv[2:end]) .== ones(size(dfv[2:end])...)) + @test dfv[2:end] == refdf[1:2, 3:end] @test all(Matrix(df[1:2, 1:2]) .== 2) df = copy(refdf) @@ -71,12 +71,12 @@ refdf = DataFrame(ones(3, 5)) df = copy(refdf) df[:x1] .+= 1 @test df.x1 == [2, 2, 2] - @test all(Matrix(df[2:end]) .== ones(size(df[2:end])...)) + @test df[2:end] == refdf[2:end] dfv = @view df[1:2, 2:end] dfv[:x2] .+= 1 @test dfv.x2 == [2, 2] - @test all(Matrix(dfv[2:end]) .== ones(size(dfv[2:end])...)) + @test dfv[2:end] == refdf[1:2, 3:end] @test all(Matrix(df[1:2, 1:2]) .== 2) dfr = df[1, 3:end] @@ -87,23 +87,23 @@ refdf = DataFrame(ones(3, 5)) df = copy(refdf) df[:, :x1] .+= 1 @test df.x1 == [2, 2, 2] - @test all(Matrix(df[2:end]) .== ones(size(df[2:end])...)) + @test df[2:end] == refdf[2:end] dfv = @view df[1:2, 2:end] dfv[:, :x2] .+= 1 @test dfv.x2 == [2, 2] - @test all(Matrix(dfv[2:end]) .== ones(size(dfv[2:end])...)) + @test dfv[2:end] == refdf[1:2, 3:end] @test all(Matrix(df[1:2, 1:2]) .== 2) df = copy(refdf) df[:x1] .+= [1, 1, 1] @test df.x1 == [2, 2, 2] - @test all(Matrix(df[2:end]) .== ones(size(df[2:end])...)) + @test df[2:end] == refdf[2:end] dfv = @view df[1:2, 2:end] dfv[:x2] .+= [1, 1] @test dfv.x2 == [2, 2] - @test all(Matrix(dfv[2:end]) .== ones(size(dfv[2:end])...)) + @test dfv[2:end] == refdf[1:2, 3:end] @test all(Matrix(df[1:2, 1:2]) .== 2) dfr = df[1, 3:end] @@ -114,12 +114,12 @@ refdf = DataFrame(ones(3, 5)) df = copy(refdf) df[:, :x1] .+= [1, 1, 1] @test df.x1 == [2, 2, 2] - @test all(Matrix(df[2:end]) .== ones(size(df[2:end])...)) + @test df[2:end] == refdf[2:end] dfv = @view df[1:2, 2:end] dfv[:, :x2] .+= [1, 1] @test dfv.x2 == [2, 2] - @test all(Matrix(dfv[2:end]) .== ones(size(dfv[2:end])...)) + @test dfv[2:end] == refdf[1:2, 3:end] @test all(Matrix(df[1:2, 1:2]) .== 2) df = copy(refdf) @@ -137,26 +137,26 @@ end df[[1,2]] .= Matrix(df[[1,2]]) .+ 1 @test df.x1 == [2, 2, 2] @test df.x2 == [2, 2, 2] - @test all(Matrix(df[3:end]) .== ones(size(df[3:end])...)) + @test df[3:end] == refdf[3:end] dfv = @view df[1:2, 3:end] dfv[[1,2]] .= Matrix(dfv[[1,2]]) .+ 1 @test dfv.x3 == [2, 2] @test dfv.x4 == [2, 2] - @test all(Matrix(dfv[3:end]) .== ones(size(dfv[3:end])...)) + @test dfv[3:end] == refdf[1:2, 5:end] @test all(Matrix(df[1:2, 1:2]) .== 2) df = copy(refdf) df[:, [1,2]] .= Matrix(df[[1,2]]) .+ 1 @test df.x1 == [2, 2, 2] @test df.x2 == [2, 2, 2] - @test all(Matrix(df[3:end]) .== ones(size(df[3:end])...)) + @test df[3:end] == refdf[3:end] dfv = @view df[1:2, 3:end] dfv[:, [1,2]] .= Matrix(dfv[[1,2]]) .+ 1 @test dfv.x3 == [2, 2] @test dfv.x4 == [2, 2] - @test all(Matrix(dfv[3:end]) .== ones(size(dfv[3:end])...)) + @test dfv[3:end] == refdf[1:2, 5:end] @test all(Matrix(df[1:2, 1:2]) .== 2) df = copy(refdf) @@ -165,14 +165,14 @@ end 1 1] @test df.x1 == [2, 2, 2] @test df.x2 == [2, 2, 2] - @test all(Matrix(df[3:end]) .== ones(size(df[3:end])...)) + @test df[3:end] == refdf[3:end] dfv = @view df[1:2, 3:end] dfv[[1,2]] .= Matrix(dfv[[1,2]]) .+ [1 1 1 1] @test dfv.x3 == [2, 2] @test dfv.x4 == [2, 2] - @test all(Matrix(dfv[3:end]) .== ones(size(dfv[3:end])...)) + @test dfv[3:end] == refdf[1:2, 5:end] @test all(Matrix(df[1:2, 1:2]) .== 2) df = copy(refdf) @@ -181,14 +181,14 @@ end 1 1] @test df.x1 == [2, 2, 2] @test df.x2 == [2, 2, 2] - @test all(Matrix(df[3:end]) .== ones(size(df[3:end])...)) + @test df[3:end] == refdf[3:end] dfv = @view df[1:2, 3:end] dfv[:, [1,2]] .= Matrix(dfv[[1,2]]) .+ [1 1 1 1] @test dfv.x3 == [2, 2] @test dfv.x4 == [2, 2] - @test all(Matrix(dfv[3:end]) .== ones(size(dfv[3:end])...)) + @test dfv[3:end] == refdf[1:2, 5:end] @test all(Matrix(df[1:2, 1:2]) .== 2) df = copy(refdf) @@ -202,26 +202,26 @@ end df[[:x1,:x2]] .= Matrix(df[[:x1,:x2]]) .+ 1 @test df.x1 == [2, 2, 2] @test df.x2 == [2, 2, 2] - @test all(Matrix(df[3:end]) .== ones(size(df[3:end])...)) + @test df[3:end] == refdf[3:end] dfv = @view df[1:2, 3:end] dfv[[:x3,:x4]] .= Matrix(dfv[[:x3,:x4]]) .+ 1 @test dfv.x3 == [2, 2] @test dfv.x4 == [2, 2] - @test all(Matrix(dfv[3:end]) .== ones(size(dfv[3:end])...)) + @test dfv[3:end] == refdf[1:2, 5:end] @test all(Matrix(df[1:2, 1:2]) .== 2) df = copy(refdf) df[:, [:x1,:x2]] .= Matrix(df[[:x1,:x2]]) .+ 1 @test df.x1 == [2, 2, 2] @test df.x2 == [2, 2, 2] - @test all(Matrix(df[3:end]) .== ones(size(df[3:end])...)) + @test df[3:end] == refdf[3:end] dfv = @view df[1:2, 3:end] dfv[:, [:x3,:x4]] .= Matrix(dfv[[:x3,:x4]]) .+ 1 @test dfv.x3 == [2, 2] @test dfv.x4 == [2, 2] - @test all(Matrix(dfv[3:end]) .== ones(size(dfv[3:end])...)) + @test dfv[3:end] == refdf[1:2, 5:end] @test all(Matrix(df[1:2, 1:2]) .== 2) df = copy(refdf) @@ -230,14 +230,14 @@ end 1 1] @test df.x1 == [2, 2, 2] @test df.x2 == [2, 2, 2] - @test all(Matrix(df[3:end]) .== ones(size(df[3:end])...)) + @test df[3:end] == refdf[3:end] dfv = @view df[1:2, 3:end] dfv[[:x3,:x4]] .= Matrix(dfv[[:x3,:x4]]) .+ [1 1 1 1] @test dfv.x3 == [2, 2] @test dfv.x4 == [2, 2] - @test all(Matrix(dfv[3:end]) .== ones(size(dfv[3:end])...)) + @test dfv[3:end] == refdf[1:2, 5:end] @test all(Matrix(df[1:2, 1:2]) .== 2) df = copy(refdf) @@ -246,14 +246,14 @@ end 1 1] @test df.x1 == [2, 2, 2] @test df.x2 == [2, 2, 2] - @test all(Matrix(df[3:end]) .== ones(size(df[3:end])...)) + @test df[3:end] == refdf[3:end] dfv = @view df[1:2, 3:end] dfv[:, [:x3,:x4]] .= Matrix(dfv[[:x3,:x4]]) .+ [1 1 1 1] @test dfv.x3 == [2, 2] @test dfv.x4 == [2, 2] - @test all(Matrix(dfv[3:end]) .== ones(size(dfv[3:end])...)) + @test dfv[3:end] == refdf[1:2, 5:end] @test all(Matrix(df[1:2, 1:2]) .== 2) df = copy(refdf) From 1023e3c0b82e00d8fef52dc8885e8216c80c3825 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 20 May 2019 20:36:38 +0200 Subject: [PATCH 15/30] Update test/broadcasting.jl Co-Authored-By: Milan Bouchet-Valat --- test/broadcasting.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/broadcasting.jl b/test/broadcasting.jl index 72f62068d0..e32c5d76a5 100644 --- a/test/broadcasting.jl +++ b/test/broadcasting.jl @@ -254,7 +254,7 @@ end @test dfv.x3 == [2, 2] @test dfv.x4 == [2, 2] @test dfv[3:end] == refdf[1:2, 5:end] - @test all(Matrix(df[1:2, 1:2]) .== 2) + @test Matrix(df[1:2, 1:2]) == fill(2, (2, 2)) df = copy(refdf) dfv = @view df[1:2, 2:end] From 3f8c8ed2de9f98ed403a3804409e23c7484fe5b1 Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Wed, 29 May 2019 10:47:14 +0200 Subject: [PATCH 16/30] Reword --- docs/src/lib/indexing.md | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/docs/src/lib/indexing.md b/docs/src/lib/indexing.md index 6ae7a6c714..ba63243e53 100644 --- a/docs/src/lib/indexing.md +++ b/docs/src/lib/indexing.md @@ -82,16 +82,17 @@ Under construction ## Broadcasting -It is possible to assign a value to `AbstractDataFrame` and `DataFrameRow` using the `.=` operator. -In such an operation `AbstractDataFrame` is two dimensional and `DataFrameRow` as a single dimensional. +It is possible to assign a value to `AbstractDataFrame` and `DataFrameRow` objects using the `.=` operator. +In such an operation `AbstractDataFrame` is considered as two-dimensional and `DataFrameRow` as single-dimensional. !!! note - The rule above means that, analogously to single dimensional objects in Base, - `DataFrameRow` is considered to be column oriented. + The rule above means that, similar to single-dimensional objects in Base (e.g. vectors), + `DataFrameRow` is considered to be column-oriented. -If column indexing using symbols is performed the order of columns in the operation is specified by the order of symbols. +If column indexing using `Symbol` names is performed the order of columns in the operation is specified +by the order of names. -It is allowed to perform `df[col] .= value` even if `col` is not present in the `DataFrame` and it is a `Symbol` -under the condition that `df` is not empty. In such a case a new column will be created. -The assignment broadcasting is not supported for `df.col` style if `col` is not present in `df`. +`df[col] .= value` is allowed when `col` is a `Symbol` even if `col` is not present in the `DataFrame` +under the condition that `df` is not empty: a new column will be created. +On the contrary, `df.col .= value` is not allowed if `col` is not present in `df`. From c9a49603197eb50966929194597ee8b26379187f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Wed, 29 May 2019 12:43:16 +0200 Subject: [PATCH 17/30] Apply suggestions from code review Co-Authored-By: Milan Bouchet-Valat --- test/broadcasting.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/broadcasting.jl b/test/broadcasting.jl index e32c5d76a5..3c7e9dedd0 100644 --- a/test/broadcasting.jl +++ b/test/broadcasting.jl @@ -18,7 +18,7 @@ refdf = DataFrame(ones(3, 5)) dfr = df[1, 3:end] dfr[end-1:end] .= 10 - @test all(Vector(dfr) .== [1, 10, 10]) + @test Vector(dfr) == [1, 10, 10] @test df.x3[1] == 1 && df.x4[1] == 10 && df.x5[1] == 10 df = copy(refdf) From 974466f363921934d969171d150fa5d1affc0578 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Wed, 29 May 2019 13:42:14 +0200 Subject: [PATCH 18/30] improve tests --- test/broadcasting.jl | 371 +++++++++++++++++++++++++++++-------------- 1 file changed, 254 insertions(+), 117 deletions(-) diff --git a/test/broadcasting.jl b/test/broadcasting.jl index 72f62068d0..f04659e0c1 100644 --- a/test/broadcasting.jl +++ b/test/broadcasting.jl @@ -2,62 +2,108 @@ module TestBroadcasting using Test, DataFrames -refdf = DataFrame(ones(3, 5)) +refdf = DataFrame(DataFrame(reshape(1.5:15.5, (3,5)))) @testset "normal data frame and data frame row in broadcasted assignment - one column" begin df = copy(refdf) df[1] .+= 1 - @test df.x1 == [2, 2, 2] + @test df.x1 == [2.5, 3.5, 4.5] @test df[2:end] == refdf[2:end] dfv = @view df[1:2, 2:end] dfv[1] .+= 1 - @test dfv.x2 == [2, 2] + @test dfv.x2 == [5.5, 6.5] @test dfv[2:end] == refdf[1:2, 3:end] - @test all(Matrix(df[1:2, 1:2]) .== 2) + @test Matrix(df) == [2.5 5.5 7.5 10.5 13.5 + 3.5 6.5 8.5 11.5 14.5 + 4.5 6.5 9.5 12.5 15.5] dfr = df[1, 3:end] dfr[end-1:end] .= 10 - @test all(Vector(dfr) .== [1, 10, 10]) - @test df.x3[1] == 1 && df.x4[1] == 10 && df.x5[1] == 10 + @test Vector(dfr) == [7.5, 10.0, 10.0] + @test Matrix(df) == [2.5 5.5 7.5 10.0 10.0 + 3.5 6.5 8.5 11.5 14.5 + 4.5 6.5 9.5 12.5 15.5] df = copy(refdf) df[:, 1] .+= 1 - @test df.x1 == [2, 2, 2] + @test df.x1 == [2.5, 3.5, 4.5] @test df[2:end] == refdf[2:end] dfv = @view df[1:2, 2:end] dfv[:, 1] .+= 1 - @test dfv.x2 == [2, 2] + @test dfv.x2 == [5.5, 6.5] @test dfv[2:end] == refdf[1:2, 3:end] - @test all(Matrix(df[1:2, 1:2]) .== 2) + @test Matrix(df) == [2.5 5.5 7.5 10.5 13.5 + 3.5 6.5 8.5 11.5 14.5 + 4.5 6.5 9.5 12.5 15.5] df = copy(refdf) - df[1] .+= [1, 1, 1] - @test df.x1 == [2, 2, 2] + df[1] .+= [1, 2, 3] + @test df.x1 == [2.5, 4.5, 6.5] @test df[2:end] == refdf[2:end] dfv = @view df[1:2, 2:end] - dfv[1] .+= [1, 1] - @test dfv.x2 == [2, 2] + dfv[1] .+= [1, 2] + @test dfv.x2 == [5.5, 7.5] @test dfv[2:end] == refdf[1:2, 3:end] - @test all(Matrix(df[1:2, 1:2]) .== 2) + @test Matrix(df) == [2.5 5.5 7.5 10.5 13.5 + 4.5 7.5 8.5 11.5 14.5 + 6.5 6.5 9.5 12.5 15.5] dfr = df[1, 3:end] - dfr[end-1:end] .= [10, 10] - @test all(Vector(dfr) .== [1, 10, 10]) - @test df.x3[1] == 1 && df.x4[1] == 10 && df.x5[1] == 10 + dfr[end-1:end] .= [10, 11] + @test Vector(dfr) == [7.5, 10.0, 11.0] + @test Matrix(df) == [2.5 5.5 7.5 10.0 11.0 + 4.5 7.5 8.5 11.5 14.5 + 6.5 6.5 9.5 12.5 15.5] df = copy(refdf) - df[:, 1] .+= [1, 1, 1] - @test df.x1 == [2, 2, 2] + df[:, 1] .+= [1, 2, 3] + @test df.x1 == [2.5, 4.5, 6.5] @test df[2:end] == refdf[2:end] dfv = @view df[1:2, 2:end] - dfv[:, 1] .+= [1, 1] - @test dfv.x2 == [2, 2] + dfv[:, 1] .+= [1, 2] + @test dfv.x2 == [5.5, 7.5] @test dfv[2:end] == refdf[1:2, 3:end] - @test all(Matrix(df[1:2, 1:2]) .== 2) + @test Matrix(df) == [2.5 5.5 7.5 10.5 13.5 + 4.5 7.5 8.5 11.5 14.5 + 6.5 6.5 9.5 12.5 15.5] + + # test a more complex broadcasting pattern + df = copy(refdf) + df[1] .+= [0, 1, 2] .+ 1 + @test df.x1 == [2.5, 4.5, 6.5] + @test df[2:end] == refdf[2:end] + + dfv = @view df[1:2, 2:end] + dfv[1] .+= [0, 1] .+ 1 + @test dfv.x2 == [5.5, 7.5] + @test dfv[2:end] == refdf[1:2, 3:end] + @test Matrix(df) == [2.5 5.5 7.5 10.5 13.5 + 4.5 7.5 8.5 11.5 14.5 + 6.5 6.5 9.5 12.5 15.5] + + dfr = df[1, 3:end] + dfr[end-1:end] .= [9, 10] .+ 1 + @test Vector(dfr) == [7.5, 10.0, 11.0] + @test Matrix(df) == [2.5 5.5 7.5 10.0 11.0 + 4.5 7.5 8.5 11.5 14.5 + 6.5 6.5 9.5 12.5 15.5] + + df = copy(refdf) + df[:, 1] .+= [0, 1, 2] .+ 1 + @test df.x1 == [2.5, 4.5, 6.5] + @test df[2:end] == refdf[2:end] + + dfv = @view df[1:2, 2:end] + dfv[:, 1] .+= [0, 1] .+ 1 + @test dfv.x2 == [5.5, 7.5] + @test dfv[2:end] == refdf[1:2, 3:end] + @test Matrix(df) == [2.5 5.5 7.5 10.5 13.5 + 4.5 7.5 8.5 11.5 14.5 + 6.5 6.5 9.5 12.5 15.5] df = copy(refdf) dfv = @view df[1:2, 2:end] @@ -67,60 +113,77 @@ refdf = DataFrame(ones(3, 5)) @test_throws DimensionMismatch dfr[end-1:end] .= rand(3, 1) @test_throws DimensionMismatch df[:, 1] .= rand(3, 1) @test_throws DimensionMismatch dfv[:, 1] .= rand(2, 1) + @test_throws DimensionMismatch df[1] .= reshape(rand(3), :, 1) + @test_throws DimensionMismatch dfv[1] .= reshape(rand(2), :, 1) + @test_throws DimensionMismatch dfr[end-1:end] .= reshape(rand(3), :, 1) + @test_throws DimensionMismatch df[:, 1] .= reshape(rand(3), :, 1) + @test_throws DimensionMismatch dfv[:, 1] .= reshape(rand(2), :, 1) df = copy(refdf) df[:x1] .+= 1 - @test df.x1 == [2, 2, 2] + @test df.x1 == [2.5, 3.5, 4.5] @test df[2:end] == refdf[2:end] dfv = @view df[1:2, 2:end] dfv[:x2] .+= 1 - @test dfv.x2 == [2, 2] + @test dfv.x2 == [5.5, 6.5] @test dfv[2:end] == refdf[1:2, 3:end] - @test all(Matrix(df[1:2, 1:2]) .== 2) + @test Matrix(df) == [2.5 5.5 7.5 10.5 13.5 + 3.5 6.5 8.5 11.5 14.5 + 4.5 6.5 9.5 12.5 15.5] dfr = df[1, 3:end] dfr[[:x4, :x5]] .= 10 - @test all(Vector(dfr) .== [1, 10, 10]) - @test df.x3[1] == 1 && df.x4[1] == 10 && df.x5[1] == 10 + @test Vector(dfr) == [7.5, 10.0, 10.0] + @test Matrix(df) == [2.5 5.5 7.5 10.0 10.0 + 3.5 6.5 8.5 11.5 14.5 + 4.5 6.5 9.5 12.5 15.5] df = copy(refdf) df[:, :x1] .+= 1 - @test df.x1 == [2, 2, 2] + @test df.x1 == [2.5, 3.5, 4.5] @test df[2:end] == refdf[2:end] dfv = @view df[1:2, 2:end] dfv[:, :x2] .+= 1 - @test dfv.x2 == [2, 2] + @test dfv.x2 == [5.5, 6.5] @test dfv[2:end] == refdf[1:2, 3:end] - @test all(Matrix(df[1:2, 1:2]) .== 2) + @test Matrix(df) == [2.5 5.5 7.5 10.5 13.5 + 3.5 6.5 8.5 11.5 14.5 + 4.5 6.5 9.5 12.5 15.5] df = copy(refdf) - df[:x1] .+= [1, 1, 1] - @test df.x1 == [2, 2, 2] + df[:x1] .+= [1, 2, 3] + @test df.x1 == [2.5, 4.5, 6.5] @test df[2:end] == refdf[2:end] dfv = @view df[1:2, 2:end] - dfv[:x2] .+= [1, 1] - @test dfv.x2 == [2, 2] + dfv[:x2] .+= [1, 2] + @test dfv.x2 == [5.5, 7.5] @test dfv[2:end] == refdf[1:2, 3:end] - @test all(Matrix(df[1:2, 1:2]) .== 2) + @test Matrix(df) == [2.5 5.5 7.5 10.5 13.5 + 4.5 7.5 8.5 11.5 14.5 + 6.5 6.5 9.5 12.5 15.5] dfr = df[1, 3:end] - dfr[[:x4, :x5]] .= [10, 10] - @test all(Vector(dfr) .== [1, 10, 10]) - @test df.x3[1] == 1 && df.x4[1] == 10 && df.x5[1] == 10 + dfr[[:x4, :x5]] .= [10, 11] + @test Vector(dfr) == [7.5, 10.0, 11.0] + @test Matrix(df) == [2.5 5.5 7.5 10.0 11.0 + 4.5 7.5 8.5 11.5 14.5 + 6.5 6.5 9.5 12.5 15.5] df = copy(refdf) - df[:, :x1] .+= [1, 1, 1] - @test df.x1 == [2, 2, 2] + df[:, :x1] .+= [1, 2, 3] + @test df.x1 == [2.5, 4.5, 6.5] @test df[2:end] == refdf[2:end] dfv = @view df[1:2, 2:end] - dfv[:, :x2] .+= [1, 1] - @test dfv.x2 == [2, 2] + dfv[:, :x2] .+= [1, 2] + @test dfv.x2 == [5.5, 7.5] @test dfv[2:end] == refdf[1:2, 3:end] - @test all(Matrix(df[1:2, 1:2]) .== 2) + @test Matrix(df) == [2.5 5.5 7.5 10.5 13.5 + 4.5 7.5 8.5 11.5 14.5 + 6.5 6.5 9.5 12.5 15.5] df = copy(refdf) dfv = @view df[1:2, 2:end] @@ -130,66 +193,79 @@ refdf = DataFrame(ones(3, 5)) @test_throws DimensionMismatch dfr[[:x4, :x5]] .= rand(3, 1) @test_throws DimensionMismatch df[:, :x1] .= rand(3, 1) @test_throws DimensionMismatch dfv[:, :x2] .= rand(2, 1) + @test_throws DimensionMismatch df[1] .= reshape(rand(3), :, 1) + @test_throws DimensionMismatch dfv[1] .= reshape(rand(2), :, 1) + @test_throws DimensionMismatch dfr[end-1:end] .= reshape(rand(3), :, 1) + @test_throws DimensionMismatch df[:, 1] .= reshape(rand(3), :, 1) + @test_throws DimensionMismatch dfv[:, 1] .= reshape(rand(2), :, 1) end -@testset "normal data frame and data frame row in broadcasted assignment - two columns" begin +@testset "normal data frame and data frame view in broadcasted assignment - two columns" begin df = copy(refdf) df[[1,2]] .= Matrix(df[[1,2]]) .+ 1 - @test df.x1 == [2, 2, 2] - @test df.x2 == [2, 2, 2] + @test df.x1 == [2.5, 3.5, 4.5] + @test df.x2 == [5.5, 6.5, 7.5] @test df[3:end] == refdf[3:end] dfv = @view df[1:2, 3:end] dfv[[1,2]] .= Matrix(dfv[[1,2]]) .+ 1 - @test dfv.x3 == [2, 2] - @test dfv.x4 == [2, 2] + @test dfv.x3 == [8.5, 9.5] + @test dfv.x4 == [11.5, 12.5] @test dfv[3:end] == refdf[1:2, 5:end] - @test all(Matrix(df[1:2, 1:2]) .== 2) + @test Matrix(df) == [2.5 5.5 8.5 11.5 13.5 + 3.5 6.5 9.5 12.5 14.5 + 4.5 7.5 9.5 12.5 15.5] df = copy(refdf) df[:, [1,2]] .= Matrix(df[[1,2]]) .+ 1 - @test df.x1 == [2, 2, 2] - @test df.x2 == [2, 2, 2] + @test df.x1 == [2.5, 3.5, 4.5] + @test df.x2 == [5.5, 6.5, 7.5] @test df[3:end] == refdf[3:end] dfv = @view df[1:2, 3:end] dfv[:, [1,2]] .= Matrix(dfv[[1,2]]) .+ 1 - @test dfv.x3 == [2, 2] - @test dfv.x4 == [2, 2] + @test dfv.x3 == [8.5, 9.5] + @test dfv.x4 == [11.5, 12.5] @test dfv[3:end] == refdf[1:2, 5:end] - @test all(Matrix(df[1:2, 1:2]) .== 2) + @test Matrix(df) == [2.5 5.5 8.5 11.5 13.5 + 3.5 6.5 9.5 12.5 14.5 + 4.5 7.5 9.5 12.5 15.5] df = copy(refdf) - df[[1,2]] .= Matrix(df[[1,2]]) .+ [1 1 - 1 1 - 1 1] - @test df.x1 == [2, 2, 2] - @test df.x2 == [2, 2, 2] + df[[1,2]] .= Matrix(df[[1,2]]) .+ [1 4 + 2 5 + 3 6] + @test df.x1 == [2.5, 4.5, 6.5] + @test df.x2 == [8.5, 10.5, 12.5] @test df[3:end] == refdf[3:end] dfv = @view df[1:2, 3:end] - dfv[[1,2]] .= Matrix(dfv[[1,2]]) .+ [1 1 - 1 1] - @test dfv.x3 == [2, 2] - @test dfv.x4 == [2, 2] + dfv[[1,2]] .= Matrix(dfv[[1,2]]) .+ [1 3 + 2 4] + @test dfv.x3 == [8.5, 10.5] + @test dfv.x4 == [13.5, 15.5] @test dfv[3:end] == refdf[1:2, 5:end] - @test all(Matrix(df[1:2, 1:2]) .== 2) + @test Matrix(df) == [2.5 8.5 8.5 13.5 13.5 + 4.5 10.5 10.5 15.5 14.5 + 6.5 12.5 9.5 12.5 15.5] df = copy(refdf) - df[:, [1,2]] .= Matrix(df[[1,2]]) .+ [1 1 - 1 1 - 1 1] - @test df.x1 == [2, 2, 2] - @test df.x2 == [2, 2, 2] + df[:, [1,2]] .= Matrix(df[[1,2]]) .+ [1 4 + 2 5 + 3 6] + @test df.x1 == [2.5, 4.5, 6.5] + @test df.x2 == [8.5, 10.5, 12.5] @test df[3:end] == refdf[3:end] dfv = @view df[1:2, 3:end] - dfv[:, [1,2]] .= Matrix(dfv[[1,2]]) .+ [1 1 - 1 1] - @test dfv.x3 == [2, 2] - @test dfv.x4 == [2, 2] + dfv[:, [1,2]] .= Matrix(dfv[[1,2]]) .+ [1 3 + 2 4] + @test dfv.x3 == [8.5, 10.5] + @test dfv.x4 == [13.5, 15.5] @test dfv[3:end] == refdf[1:2, 5:end] - @test all(Matrix(df[1:2, 1:2]) .== 2) + @test Matrix(df) == [2.5 8.5 8.5 13.5 13.5 + 4.5 10.5 10.5 15.5 14.5 + 6.5 12.5 9.5 12.5 15.5] df = copy(refdf) dfv = @view df[1:2, 2:end] @@ -200,61 +276,69 @@ end df = copy(refdf) df[[:x1,:x2]] .= Matrix(df[[:x1,:x2]]) .+ 1 - @test df.x1 == [2, 2, 2] - @test df.x2 == [2, 2, 2] + @test df.x1 == [2.5, 3.5, 4.5] + @test df.x2 == [5.5, 6.5, 7.5] @test df[3:end] == refdf[3:end] dfv = @view df[1:2, 3:end] dfv[[:x3,:x4]] .= Matrix(dfv[[:x3,:x4]]) .+ 1 - @test dfv.x3 == [2, 2] - @test dfv.x4 == [2, 2] + @test dfv.x3 == [8.5, 9.5] + @test dfv.x4 == [11.5, 12.5] @test dfv[3:end] == refdf[1:2, 5:end] - @test all(Matrix(df[1:2, 1:2]) .== 2) + @test Matrix(df) == [2.5 5.5 8.5 11.5 13.5 + 3.5 6.5 9.5 12.5 14.5 + 4.5 7.5 9.5 12.5 15.5] df = copy(refdf) df[:, [:x1,:x2]] .= Matrix(df[[:x1,:x2]]) .+ 1 - @test df.x1 == [2, 2, 2] - @test df.x2 == [2, 2, 2] + @test df.x1 == [2.5, 3.5, 4.5] + @test df.x2 == [5.5, 6.5, 7.5] @test df[3:end] == refdf[3:end] dfv = @view df[1:2, 3:end] dfv[:, [:x3,:x4]] .= Matrix(dfv[[:x3,:x4]]) .+ 1 - @test dfv.x3 == [2, 2] - @test dfv.x4 == [2, 2] + @test dfv.x3 == [8.5, 9.5] + @test dfv.x4 == [11.5, 12.5] @test dfv[3:end] == refdf[1:2, 5:end] - @test all(Matrix(df[1:2, 1:2]) .== 2) + @test Matrix(df) == [2.5 5.5 8.5 11.5 13.5 + 3.5 6.5 9.5 12.5 14.5 + 4.5 7.5 9.5 12.5 15.5] df = copy(refdf) - df[[:x1,:x2]] .= Matrix(df[[:x1,:x2]]) .+ [1 1 - 1 1 - 1 1] - @test df.x1 == [2, 2, 2] - @test df.x2 == [2, 2, 2] + df[[:x1,:x2]] .= Matrix(df[[:x1,:x2]]) .+ [1 4 + 2 5 + 3 6] + @test df.x1 == [2.5, 4.5, 6.5] + @test df.x2 == [8.5, 10.5, 12.5] @test df[3:end] == refdf[3:end] dfv = @view df[1:2, 3:end] - dfv[[:x3,:x4]] .= Matrix(dfv[[:x3,:x4]]) .+ [1 1 - 1 1] - @test dfv.x3 == [2, 2] - @test dfv.x4 == [2, 2] + dfv[[:x3,:x4]] .= Matrix(dfv[[:x3,:x4]]) .+ [1 3 + 2 4] + @test dfv.x3 == [8.5, 10.5] + @test dfv.x4 == [13.5, 15.5] @test dfv[3:end] == refdf[1:2, 5:end] - @test all(Matrix(df[1:2, 1:2]) .== 2) + @test Matrix(df) == [2.5 8.5 8.5 13.5 13.5 + 4.5 10.5 10.5 15.5 14.5 + 6.5 12.5 9.5 12.5 15.5] df = copy(refdf) - df[:, [:x1,:x2]] .= Matrix(df[[:x1,:x2]]) .+ [1 1 - 1 1 - 1 1] - @test df.x1 == [2, 2, 2] - @test df.x2 == [2, 2, 2] + df[:, [:x1,:x2]] .= Matrix(df[[:x1,:x2]]) .+ [1 4 + 2 5 + 3 6] + @test df.x1 == [2.5, 4.5, 6.5] + @test df.x2 == [8.5, 10.5, 12.5] @test df[3:end] == refdf[3:end] dfv = @view df[1:2, 3:end] - dfv[:, [:x3,:x4]] .= Matrix(dfv[[:x3,:x4]]) .+ [1 1 - 1 1] - @test dfv.x3 == [2, 2] - @test dfv.x4 == [2, 2] + dfv[:, [:x3,:x4]] .= Matrix(dfv[[:x3,:x4]]) .+ [1 3 + 2 4] + @test dfv.x3 == [8.5, 10.5] + @test dfv.x4 == [13.5, 15.5] @test dfv[3:end] == refdf[1:2, 5:end] - @test all(Matrix(df[1:2, 1:2]) .== 2) + @test Matrix(df) == [2.5 8.5 8.5 13.5 13.5 + 4.5 10.5 10.5 15.5 14.5 + 6.5 12.5 9.5 12.5 15.5] df = copy(refdf) dfv = @view df[1:2, 2:end] @@ -262,6 +346,48 @@ end @test_throws DimensionMismatch dfv[[:x3,:x4]] .= rand(2, 10) @test_throws DimensionMismatch df[:, [:x1,:x2]] .= rand(3, 10) @test_throws DimensionMismatch dfv[:, [:x3,:x4]] .= rand(2, 10) + + df = copy(refdf) + df[[1,2]] .= [1 2 + 3 4 + 5 6] + @test Matrix(df) == [1.0 2.0 7.5 10.5 13.5 + 3.0 4.0 8.5 11.5 14.5 + 5.0 6.0 9.5 12.5 15.5] + + df = copy(refdf) + df[[1,2]] .= [1, 3, 5] + @test Matrix(df) == [1.0 1.0 7.5 10.5 13.5 + 3.0 3.0 8.5 11.5 14.5 + 5.0 5.0 9.5 12.5 15.5] + + df = copy(refdf) + df[[1,2]] .= 1 + @test Matrix(df) == [1.0 1.0 7.5 10.5 13.5 + 1.0 1.0 8.5 11.5 14.5 + 1.0 1.0 9.5 12.5 15.5] + + df = copy(refdf) + dfv = view(df, 2:3, 2:4) + dfv[[1,2]] .= [1 2 + 3 4] + @test Matrix(df) == [1.5 4.5 7.5 10.5 13.5 + 2.5 1.0 2.0 11.5 14.5 + 3.5 3.0 4.0 12.5 15.5] + + df = copy(refdf) + dfv = view(df, 2:3, 2:4) + dfv[[1,2]] .= [1, 3] + @test Matrix(df) == [1.5 4.5 7.5 10.5 13.5 + 2.5 1.0 1.0 11.5 14.5 + 3.5 3.0 3.0 12.5 15.5] + + df = copy(refdf) + dfv = view(df, 2:3, 2:4) + dfv[[1,2]] .= 1 + @test Matrix(df) == [1.5 4.5 7.5 10.5 13.5 + 2.5 1.0 1.0 11.5 14.5 + 3.5 1.0 1.0 12.5 15.5] end @testset "assignment to a whole data frame and data frame row" begin @@ -270,42 +396,53 @@ end @test all(Matrix(df) .== 10) dfv = view(df, 1:2, 1:4) dfv .= 100 - @test (all(Matrix(df[1:2, 1:4]) .== 100)) - @test (all(Vector(df[3, 1:4]) .== 10)) + @test Matrix(df) == [100.0 100.0 100.0 100.0 10.0 + 100.0 100.0 100.0 100.0 10.0 + 10.0 10.0 10.0 10.0 10.0] dfr = df[1, 1:2] dfr .= 1000 - @test (all(Vector(df[1, 1:2]) .== 1000)) - @test (all(Vector(df[3, :]) .!= 1000)) + @test Matrix(df) == [1000.0 1000.0 100.0 100.0 10.0 + 100.0 100.0 100.0 100.0 10.0 + 10.0 10.0 10.0 10.0 10.0] df = copy(refdf) df[:] .= 10 @test all(Matrix(df) .== 10) dfv = view(df, 1:2, 1:4) dfv[:] .= 100 - @test (all(Matrix(df[1:2, 1:4]) .== 100)) - @test (all(Vector(df[3, 1:4]) .== 10)) + @test Matrix(df) == [100.0 100.0 100.0 100.0 10.0 + 100.0 100.0 100.0 100.0 10.0 + 10.0 10.0 10.0 10.0 10.0] dfr = df[1, 1:2] dfr[:] .= 1000 - @test (all(Vector(df[1, 1:2]) .== 1000)) - @test (all(Vector(df[3, :]) .!= 1000)) + @test Matrix(df) == [1000.0 1000.0 100.0 100.0 10.0 + 100.0 100.0 100.0 100.0 10.0 + 10.0 10.0 10.0 10.0 10.0] df = copy(refdf) df[:,:] .= 10 @test all(Matrix(df) .== 10) dfv = view(df, 1:2, 1:4) dfv[:, :] .= 100 - @test (all(Matrix(df[1:2, 1:4]) .== 100)) - @test (all(Vector(df[3, 1:4]) .== 10)) + @test Matrix(df) == [100.0 100.0 100.0 100.0 10.0 + 100.0 100.0 100.0 100.0 10.0 + 10.0 10.0 10.0 10.0 10.0] end @testset "extending data frame in broadcasted assignment - one column" begin df = copy(refdf) df[:a] .= 1 - @test all(Matrix(df) .== 1) + @test Matrix(df) == [1.5 4.5 7.5 10.5 13.5 1.0 + 2.5 5.5 8.5 11.5 14.5 1.0 + 3.5 6.5 9.5 12.5 15.5 1.0] @test names(df)[end] == :a - df[:b] .= [1, 1, 1] - @test all(Matrix(df) .== 1) + @test df[1:end-1] == refdf + df[:b] .= [1, 2, 3] + @test Matrix(df) == [1.5 4.5 7.5 10.5 13.5 1.0 1.0 + 2.5 5.5 8.5 11.5 14.5 1.0 2.0 + 3.5 6.5 9.5 12.5 15.5 1.0 3.0] @test names(df)[end] == :b + @test df[1:end-2] == refdf cdf = copy(df) @test_throws DimensionMismatch df[:c] .= ones(3, 1) @test df == cdf From 06d86ce7287b685d7b3a6348578b7b7fab55e59c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Fri, 31 May 2019 11:45:48 +0200 Subject: [PATCH 19/30] Update test/broadcasting.jl Co-Authored-By: Milan Bouchet-Valat --- test/broadcasting.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/broadcasting.jl b/test/broadcasting.jl index f04659e0c1..f63e09353a 100644 --- a/test/broadcasting.jl +++ b/test/broadcasting.jl @@ -2,7 +2,7 @@ module TestBroadcasting using Test, DataFrames -refdf = DataFrame(DataFrame(reshape(1.5:15.5, (3,5)))) +refdf = DataFrame(reshape(1.5:15.5, (3,5))) @testset "normal data frame and data frame row in broadcasted assignment - one column" begin df = copy(refdf) From 1ce5d2f6b1184bcf33fde29b7a00c06d83739c52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Fri, 31 May 2019 11:50:48 +0200 Subject: [PATCH 20/30] tests of single column matrix in broadcasting --- test/broadcasting.jl | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/test/broadcasting.jl b/test/broadcasting.jl index f63e09353a..f44f43bfcd 100644 --- a/test/broadcasting.jl +++ b/test/broadcasting.jl @@ -361,6 +361,12 @@ end 3.0 3.0 8.5 11.5 14.5 5.0 5.0 9.5 12.5 15.5] + df = copy(refdf) + df[[1,2]] .= reshape([1, 3, 5], 3, 1) + @test Matrix(df) == [1.0 1.0 7.5 10.5 13.5 + 3.0 3.0 8.5 11.5 14.5 + 5.0 5.0 9.5 12.5 15.5] + df = copy(refdf) df[[1,2]] .= 1 @test Matrix(df) == [1.0 1.0 7.5 10.5 13.5 @@ -382,6 +388,13 @@ end 2.5 1.0 1.0 11.5 14.5 3.5 3.0 3.0 12.5 15.5] + df = copy(refdf) + dfv = view(df, 2:3, 2:4) + dfv[[1,2]] .= reshape([1, 3], 2, 1) + @test Matrix(df) == [1.5 4.5 7.5 10.5 13.5 + 2.5 1.0 1.0 11.5 14.5 + 3.5 3.0 3.0 12.5 15.5] + df = copy(refdf) dfv = view(df, 2:3, 2:4) dfv[[1,2]] .= 1 From 09af7d967a294e913b819901d867373bacee3d3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Fri, 31 May 2019 12:24:30 +0200 Subject: [PATCH 21/30] performance improvements --- src/other/broadcasting.jl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl index f9227ab042..a10e35f275 100644 --- a/src/other/broadcasting.jl +++ b/src/other/broadcasting.jl @@ -4,7 +4,7 @@ struct LazyNewColDataFrame end # we allow LazyNewColDataFrame only for data frames with at least one column -Base.axes(x::LazyNewColDataFrame) = axes(x.df[1]) +Base.axes(x::LazyNewColDataFrame) = (Base.OneTo(nrow(x.df)),) Base.maybeview(df::AbstractDataFrame, idxs...) = view(df, idxs...) @@ -28,7 +28,12 @@ function Base.copyto!(lazydf::LazyNewColDataFrame, bc::Base.Broadcast.Broadcaste if isempty(lazydf.df) throw(ArgumentError("creating a column via broadcasting is not allowed on empty data frames")) end - T = mapreduce(i -> typeof(bc[i]), promote_type, eachindex(bc); init=Union{}) + if bc isa Base.Broadcast.Broadcasted{<:Base.Broadcast.AbstractArrayStyle{0}} && + bc.f === identity && bc.args isa Tuple{Any} && Base.Broadcast.isflat(bc) + T = typeof(bc.args[1][]) + else + T = mapreduce(i -> typeof(bc[i]), promote_type, eachindex(bc); init=Union{}) + end col = Tables.allocatecolumn(T, nrow(lazydf.df)) copyto!(col, bc) lazydf.df[lazydf.col] = col From e0db92e14f49d38cb337b48fa37991bea51f47ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Wed, 5 Jun 2019 20:39:04 +0200 Subject: [PATCH 22/30] improved copyto! --- src/other/broadcasting.jl | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl index a10e35f275..2dd328d3e9 100644 --- a/src/other/broadcasting.jl +++ b/src/other/broadcasting.jl @@ -31,11 +31,18 @@ function Base.copyto!(lazydf::LazyNewColDataFrame, bc::Base.Broadcast.Broadcaste if bc isa Base.Broadcast.Broadcasted{<:Base.Broadcast.AbstractArrayStyle{0}} && bc.f === identity && bc.args isa Tuple{Any} && Base.Broadcast.isflat(bc) T = typeof(bc.args[1][]) + col = Tables.allocatecolumn(T, nrow(lazydf.df)) + copyto!(col, bc) else - T = mapreduce(i -> typeof(bc[i]), promote_type, eachindex(bc); init=Union{}) + tmpcol = Base.Broadcast.materialize(bc) + T = eltype(tmpcol) + if Missings.T(T) <: Union{CategoricalValue, CategoricalString} + col = Tables.allocatecolumn(T, nrow(lazydf.df)) + copyto!(col, tmpcol) + else + col = tmpcol + end end - col = Tables.allocatecolumn(T, nrow(lazydf.df)) - copyto!(col, bc) lazydf.df[lazydf.col] = col end From 491701222cbc7b1fabfbff65e8aa1bc4e17ba530 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Wed, 5 Jun 2019 21:51:23 +0200 Subject: [PATCH 23/30] add another fast branch --- src/other/broadcasting.jl | 3 +++ test/broadcasting.jl | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl index 2dd328d3e9..1841d4296a 100644 --- a/src/other/broadcasting.jl +++ b/src/other/broadcasting.jl @@ -33,6 +33,9 @@ function Base.copyto!(lazydf::LazyNewColDataFrame, bc::Base.Broadcast.Broadcaste T = typeof(bc.args[1][]) col = Tables.allocatecolumn(T, nrow(lazydf.df)) copyto!(col, bc) + elseif bc isa Base.Broadcast.Broadcasted{<:Base.Broadcast.AbstractArrayStyle{1}} && + bc.f === identity && bc.args isa Tuple{CategoricalVector} && Base.Broadcast.isflat(bc) + col = copy(bc.args[1]) else tmpcol = Base.Broadcast.materialize(bc) T = eltype(tmpcol) diff --git a/test/broadcasting.jl b/test/broadcasting.jl index f44f43bfcd..667933fc71 100644 --- a/test/broadcasting.jl +++ b/test/broadcasting.jl @@ -509,6 +509,12 @@ end @test df.c2 == [1,1,1] @test df.c2 isa CategoricalVector @test levels(df.c2) != levels(v) + df[:c3] .= (x->x).(v) + @test df.c3 == v + @test df.c3 !== v + @test df.c3 isa CategoricalVector + @test levels(df.c3) == levels(v) + @test levels(df.c3) !== levels(v) end end # module From 9e9aaad89de2ce268771ccac5cce15a3c410a8b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Thu, 6 Jun 2019 18:59:25 +0200 Subject: [PATCH 24/30] some additional tests --- test/broadcasting.jl | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/broadcasting.jl b/test/broadcasting.jl index 667933fc71..2b8ab5717f 100644 --- a/test/broadcasting.jl +++ b/test/broadcasting.jl @@ -515,6 +515,16 @@ end @test df.c3 isa CategoricalVector @test levels(df.c3) == levels(v) @test levels(df.c3) !== levels(v) + df[:c4] .= identity.(v) + @test df.c4 == v + @test df.c4 !== v + @test df.c4 isa CategoricalVector + @test levels(df.c4) == levels(v) + @test levels(df.c4) !== levels(v) + df[:c5] .= (x->v[1]).(v) + @test unique(df.c5) == [get(v[1]] + @test df.c5 isa CategoricalVector + @test length(levels(df.c5)) == 1 end end # module From 181c5fbd78f61fae2f755f43fb926d4c81431aa6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Thu, 6 Jun 2019 19:49:45 +0200 Subject: [PATCH 25/30] fix typo --- test/broadcasting.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/broadcasting.jl b/test/broadcasting.jl index 2b8ab5717f..799fc7ab68 100644 --- a/test/broadcasting.jl +++ b/test/broadcasting.jl @@ -522,7 +522,7 @@ end @test levels(df.c4) == levels(v) @test levels(df.c4) !== levels(v) df[:c5] .= (x->v[1]).(v) - @test unique(df.c5) == [get(v[1]] + @test unique(df.c5) == [get(v[1])] @test df.c5 isa CategoricalVector @test length(levels(df.c5)) == 1 end From 0c6843b6025bf58ba2d6859d5b71715191f3f569 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Fri, 7 Jun 2019 16:53:52 +0200 Subject: [PATCH 26/30] use new categoricalarrays broadcasting support --- Project.toml | 2 +- src/other/broadcasting.jl | 12 +------- test/broadcasting.jl | 60 +++++++++++++++++++++------------------ 3 files changed, 34 insertions(+), 40 deletions(-) diff --git a/Project.toml b/Project.toml index d135beb82f..355dbf4fea 100644 --- a/Project.toml +++ b/Project.toml @@ -32,7 +32,7 @@ test = ["DataStructures", "DataValues", "Dates", "LaTeXStrings", "Random", "Test [compat] julia = "1" Missings = ">= 0.2.3" -CategoricalArrays = ">= 0.5.2" +CategoricalArrays = ">= 0.5.3" StatsBase = ">= 0.11.0" Compat = ">= 0.59.0" Tables = ">= 0.1.15" diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl index 1841d4296a..c17e3d50c1 100644 --- a/src/other/broadcasting.jl +++ b/src/other/broadcasting.jl @@ -33,18 +33,8 @@ function Base.copyto!(lazydf::LazyNewColDataFrame, bc::Base.Broadcast.Broadcaste T = typeof(bc.args[1][]) col = Tables.allocatecolumn(T, nrow(lazydf.df)) copyto!(col, bc) - elseif bc isa Base.Broadcast.Broadcasted{<:Base.Broadcast.AbstractArrayStyle{1}} && - bc.f === identity && bc.args isa Tuple{CategoricalVector} && Base.Broadcast.isflat(bc) - col = copy(bc.args[1]) else - tmpcol = Base.Broadcast.materialize(bc) - T = eltype(tmpcol) - if Missings.T(T) <: Union{CategoricalValue, CategoricalString} - col = Tables.allocatecolumn(T, nrow(lazydf.df)) - copyto!(col, tmpcol) - else - col = tmpcol - end + col = copy(bc) end lazydf.df[lazydf.col] = col end diff --git a/test/broadcasting.jl b/test/broadcasting.jl index 799fc7ab68..55e1c4d29b 100644 --- a/test/broadcasting.jl +++ b/test/broadcasting.jl @@ -497,34 +497,38 @@ end end @testset "test categorical values" begin - df = copy(refdf) - v = categorical([1,2,3]) - df[:c1] .= v - @test df.c1 == v - @test df.c1 !== v - @test df.c1 isa CategoricalVector - @test levels(df.c1) == levels(v) - @test levels(df.c1) !== levels(v) - df[:c2] .= v[1] - @test df.c2 == [1,1,1] - @test df.c2 isa CategoricalVector - @test levels(df.c2) != levels(v) - df[:c3] .= (x->x).(v) - @test df.c3 == v - @test df.c3 !== v - @test df.c3 isa CategoricalVector - @test levels(df.c3) == levels(v) - @test levels(df.c3) !== levels(v) - df[:c4] .= identity.(v) - @test df.c4 == v - @test df.c4 !== v - @test df.c4 isa CategoricalVector - @test levels(df.c4) == levels(v) - @test levels(df.c4) !== levels(v) - df[:c5] .= (x->v[1]).(v) - @test unique(df.c5) == [get(v[1])] - @test df.c5 isa CategoricalVector - @test length(levels(df.c5)) == 1 + for v in [categorical([1,2,3]), categorical([1,2,3, missing]), + categorical([missing, 1,2,3]), + categorical(["1","2","3"]), categorical(["1","2","3", missing]), + categorical([missing, "1","2","3"])] + df = copy(refdf) + df[:c1] .= v + @test df.c1 == v + @test df.c1 !== v + @test df.c1 isa CategoricalVector + @test levels(df.c1) == levels(v) + @test levels(df.c1) !== levels(v) + df[:c2] .= v[2] + @test df.c2 == get.v[2], v[2], v[2]] + @test df.c2 isa CategoricalVector + @test levels(df.c2) != levels(v) + df[:c3] .= (x->x).(v) + @test df.c3 == v + @test df.c3 !== v + @test df.c3 isa CategoricalVector + @test levels(df.c3) == levels(v) + @test levels(df.c3) !== levels(v) + df[:c4] .= identity.(v) + @test df.c4 == v + @test df.c4 !== v + @test df.c4 isa CategoricalVector + @test levels(df.c4) == levels(v) + @test levels(df.c4) !== levels(v) + df[:c5] .= (x->v[1]).(v) + @test unique(df.c5) == [get(v[1])] + @test df.c5 isa CategoricalVector + @test length(levels(df.c5)) == 1 + end end end # module From e3c5d578e6d0cea6cf6681d35696a033feb34ebb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Fri, 7 Jun 2019 17:24:10 +0200 Subject: [PATCH 27/30] minor code improvements after the review --- src/other/broadcasting.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl index c17e3d50c1..abce85a7ea 100644 --- a/src/other/broadcasting.jl +++ b/src/other/broadcasting.jl @@ -31,10 +31,10 @@ function Base.copyto!(lazydf::LazyNewColDataFrame, bc::Base.Broadcast.Broadcaste if bc isa Base.Broadcast.Broadcasted{<:Base.Broadcast.AbstractArrayStyle{0}} && bc.f === identity && bc.args isa Tuple{Any} && Base.Broadcast.isflat(bc) T = typeof(bc.args[1][]) - col = Tables.allocatecolumn(T, nrow(lazydf.df)) + col = similar(T, nrow(lazydf.df)) copyto!(col, bc) else - col = copy(bc) + col = Base.Broadcast.materialize(bc) end lazydf.df[lazydf.col] = col end From 7cdee94642b90693a8ab279d240d310f172a6a6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Fri, 7 Jun 2019 20:56:46 +0200 Subject: [PATCH 28/30] fix similar --- src/other/broadcasting.jl | 2 +- test/broadcasting.jl | 22 ++++++++++++---------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl index abce85a7ea..8c9c852408 100644 --- a/src/other/broadcasting.jl +++ b/src/other/broadcasting.jl @@ -31,7 +31,7 @@ function Base.copyto!(lazydf::LazyNewColDataFrame, bc::Base.Broadcast.Broadcaste if bc isa Base.Broadcast.Broadcasted{<:Base.Broadcast.AbstractArrayStyle{0}} && bc.f === identity && bc.args isa Tuple{Any} && Base.Broadcast.isflat(bc) T = typeof(bc.args[1][]) - col = similar(T, nrow(lazydf.df)) + col = similar(Vector{T}, nrow(lazydf.df)) copyto!(col, bc) else col = Base.Broadcast.materialize(bc) diff --git a/test/broadcasting.jl b/test/broadcasting.jl index 55e1c4d29b..c4ed867f2a 100644 --- a/test/broadcasting.jl +++ b/test/broadcasting.jl @@ -2,6 +2,8 @@ module TestBroadcasting using Test, DataFrames +const ≅ = isequal + refdf = DataFrame(reshape(1.5:15.5, (3,5))) @testset "normal data frame and data frame row in broadcasted assignment - one column" begin @@ -497,35 +499,35 @@ end end @testset "test categorical values" begin - for v in [categorical([1,2,3]), categorical([1,2,3, missing]), - categorical([missing, 1,2,3]), - categorical(["1","2","3"]), categorical(["1","2","3", missing]), - categorical([missing, "1","2","3"])] + for v in [categorical([1,2,3]), categorical([1,2, missing]), + categorical([missing, 1,2]), + categorical(["1","2","3"]), categorical(["1","2", missing]), + categorical([missing, "1","2"])] df = copy(refdf) df[:c1] .= v - @test df.c1 == v + @test df.c1 ≅ v @test df.c1 !== v @test df.c1 isa CategoricalVector @test levels(df.c1) == levels(v) @test levels(df.c1) !== levels(v) df[:c2] .= v[2] - @test df.c2 == get.v[2], v[2], v[2]] + @test df.c2 == get.([v[2], v[2], v[2]]) @test df.c2 isa CategoricalVector @test levels(df.c2) != levels(v) df[:c3] .= (x->x).(v) - @test df.c3 == v + @test df.c3 ≅ v @test df.c3 !== v @test df.c3 isa CategoricalVector @test levels(df.c3) == levels(v) @test levels(df.c3) !== levels(v) df[:c4] .= identity.(v) - @test df.c4 == v + @test df.c4 ≅ v @test df.c4 !== v @test df.c4 isa CategoricalVector @test levels(df.c4) == levels(v) @test levels(df.c4) !== levels(v) - df[:c5] .= (x->v[1]).(v) - @test unique(df.c5) == [get(v[1])] + df[:c5] .= (x->v[2]).(v) + @test unique(df.c5) == [get(v[2])] @test df.c5 isa CategoricalVector @test length(levels(df.c5)) == 1 end From 1c11e70d25aec23410c5a87ff58167db4116ce94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Fri, 7 Jun 2019 22:27:59 +0200 Subject: [PATCH 29/30] Update src/other/broadcasting.jl Co-Authored-By: Milan Bouchet-Valat --- src/other/broadcasting.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl index 8c9c852408..920924cebf 100644 --- a/src/other/broadcasting.jl +++ b/src/other/broadcasting.jl @@ -13,7 +13,7 @@ function Base.maybeview(df::AbstractDataFrame, idxs) throw(ArgumentError("Broadcasting into a data frame with no columns is not allowed")) end if idxs isa Symbol - if !haskey(df, idxs) + if !haskey(index(df), idxs) if !(df isa DataFrame) # this will throw an appropriate error message df[idxs] From 43a383ad9756a10fdb68c0bdefe407e63f5b4449 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sat, 8 Jun 2019 00:00:33 +0200 Subject: [PATCH 30/30] up CatArrays version --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 355dbf4fea..a538333dce 100644 --- a/Project.toml +++ b/Project.toml @@ -32,7 +32,7 @@ test = ["DataStructures", "DataValues", "Dates", "LaTeXStrings", "Random", "Test [compat] julia = "1" Missings = ">= 0.2.3" -CategoricalArrays = ">= 0.5.3" +CategoricalArrays = ">= 0.5.4" StatsBase = ">= 0.11.0" Compat = ">= 0.59.0" Tables = ">= 0.1.15"