From 333ba91133848502099888a17b8b6723d3516c5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Thu, 17 Mar 2022 09:08:41 +0100 Subject: [PATCH] make broadcasting assignment consistent with ! (#3022) --- NEWS.md | 7 +++ docs/src/lib/indexing.md | 6 +- src/other/broadcasting.jl | 18 ++---- test/broadcasting.jl | 112 ++++++++++++++++++++++++++++------ test/dataframe.jl | 6 +- test/deprecated.jl | 9 --- test/subdataframe_mutation.jl | 12 +++- 7 files changed, 122 insertions(+), 48 deletions(-) diff --git a/NEWS.md b/NEWS.md index 10515b5e26..bcd65f3ff3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -18,6 +18,13 @@ * `first` and `last` for `GroupedDataFrame` now support passing number of elements to get ([#3006](https://github.com/JuliaData/DataFrames.jl/issues/3006)) +## Previously announced breaking changes + +* On Julia 1.7 or newer broadcasting assignment + into an existing column of a data frame replaces it. Under Julia 1.6 + or older it is an in place operation. + ([#3022](https://github.com/JuliaData/DataFrames.jl/pull/3022) + # DataFrames.jl v1.3.2 Patch Release Notes ## Bug fixes diff --git a/docs/src/lib/indexing.md b/docs/src/lib/indexing.md index 205abccee4..326b3e6d40 100644 --- a/docs/src/lib/indexing.md +++ b/docs/src/lib/indexing.md @@ -69,10 +69,10 @@ In the descriptions below `df` represents a `DataFrame`, `sdf` is a `:` always expands to `axes(df, 1)` or `axes(sdf, 1)`. `df.col` works like `df[!, col]` and `sdf.col` works like `sdf[!, col]` in all -cases except that `df.col .= v` and `sdf.col .= v` perform in-place broadcasting +cases. An exception is that under Julia 1.6 or earlier +`df.col .= v` and `sdf.col .= v` performs in-place broadcasting if `col` is present in `df`/`sdf` and is a valid identifier (this inconsistency -is deprecated and in DataFrames.jl 1.4 release under Julia 1.7 and later both -syntaxes will be always consistent). +is not present under Julia 1.7 and later). ## `getindex` and `view` diff --git a/src/other/broadcasting.jl b/src/other/broadcasting.jl index 1b162ed858..7a1cc73233 100644 --- a/src/other/broadcasting.jl +++ b/src/other/broadcasting.jl @@ -129,21 +129,11 @@ end if isdefined(Base, :dotgetproperty) function Base.dotgetproperty(df::AbstractDataFrame, col::SymbolOrString) - if columnindex(df, col) == 0 - if !is_column_insertion_allowed(df) - throw(ArgumentError("creating new columns in a SubDataFrame that subsets " * - "columns of its parent data frame is disallowed")) - end - # TODO: double check that this is tested - return LazyNewColDataFrame(df, Symbol(col)) - else - # TODO: remove the deprecation in DataFrames.jl 1.4 release - Base.depwarn("In the 1.4 release of DataFrames.jl this operation will allocate a new column " * - "instead of performing an in-place assignment. " * - "To perform an in-place assignment use `df[:, col] .= ...` instead.", - :dotgetproperty) - return getproperty(df, col) + if columnindex(df, col) == 0 && !is_column_insertion_allowed(df) + throw(ArgumentError("creating new columns in a SubDataFrame that subsets " * + "columns of its parent data frame is disallowed")) end + return LazyNewColDataFrame(df, Symbol(col)) end end diff --git a/test/broadcasting.jl b/test/broadcasting.jl index 36eb027f17..426219e1aa 100644 --- a/test/broadcasting.jl +++ b/test/broadcasting.jl @@ -1463,12 +1463,41 @@ end df = copy(refdf) v1 = df[!, 1] - df.x1 .= 'd' - @test v1 == [100.0, 100.0, 100.0] - @test_throws MethodError df[:, 1] .= "d" - @test v1 == [100.0, 100.0, 100.0] - @test_throws DimensionMismatch df[:, 1] .= [1 2 3] - @test v1 == [100.0, 100.0, 100.0] + if isdefined(Base, :dotgetproperty) + df.x1 .= 'd' + @test df.x1 == ['d', 'd', 'd'] + @test eltype(df.x1) === Char + @test_throws MethodError df[:, 1] .= "d" + @test_throws DimensionMismatch df[:, 1] .= [1 2 3] + @test v1 == [1.5, 2.5, 3.5] + else + df.x1 .= 'd' + @test v1 == [100.0, 100.0, 100.0] + @test_throws MethodError df[:, 1] .= "d" + @test v1 == [100.0, 100.0, 100.0] + @test_throws DimensionMismatch df[:, 1] .= [1 2 3] + @test v1 == [100.0, 100.0, 100.0] + end + + if isdefined(Base, :dotgetproperty) + df = DataFrame(a=1:4, b=1, c=2) + df.a .= 'a':'d' + @test df == DataFrame(a='a':'d', b=1, c=2) + dfv = view(df, 2:3, 2:3) + x = df.b + dfv.b .= 0 + @test df.b == [1, 0, 0, 1] + @test x == [1, 1, 1, 1] + else + df = DataFrame(a=1:4, b=1, c=2) + df.a .= 'a':'d' + @test df == DataFrame(a=97:100, b=1, c=2) + dfv = view(df, 2:3, 2:3) + x = df.b + dfv.b .= 0 + @test df.b == [1, 0, 0, 1] + @test x === df.b + end df = copy(refdf) if isdefined(Base, :dotgetproperty) @@ -1608,12 +1637,22 @@ end df = view(copy(refdf), :, :) v1 = df[!, 1] - df.x1 .= 'd' - @test v1 == [100.0, 100.0, 100.0] - @test_throws MethodError df[:, 1] .= "d" - @test v1 == [100.0, 100.0, 100.0] - @test_throws DimensionMismatch df[:, 1] .= [1 2 3] - @test v1 == [100.0, 100.0, 100.0] + if isdefined(Base, :dotgetproperty) + df.x1 .= 'd' + @test df.x1 == ['d', 'd', 'd'] + @test eltype(df.x1) === Any + df[:, 1] .= "d" + @test df.x1 == ["d", "d", "d"] + @test_throws DimensionMismatch df[:, 1] .= [1 2 3] + @test v1 == [1.5, 2.5, 3.5] + else + df.x1 .= 'd' + @test v1 == [100.0, 100.0, 100.0] + @test_throws MethodError df[:, 1] .= "d" + @test v1 == [100.0, 100.0, 100.0] + @test_throws DimensionMismatch df[:, 1] .= [1 2 3] + @test v1 == [100.0, 100.0, 100.0] + end df = view(copy(refdf), :, :) if VERSION >= v"1.7" @@ -1870,17 +1909,52 @@ end end @testset "broadcasting of getproperty" begin + df = DataFrame(a=1:4) if isdefined(Base, :dotgetproperty) - df = DataFrame(a=1:4) df.b .= 1 + x = df.b df.c .= 4:-1:1 - # TODO: enable this in the future when the deprecation period is finished - # df.a .= 'a':'d' - # @test df.a isa Vector{Char} - # @test df == DataFrame(a='a':'d', b=1, c=4:-1:1) - # dfv = view(df, 2:3, 2:3) - # @test_throws ArgumentError dfv.b .= 0 + df.a .= 'a':'d' + @test df.a isa Vector{Char} + @test df == DataFrame(a='a':'d', b=1, c=4:-1:1) + + # in views also column replacement is performed + dfv = view(df, 2:3, 2:3) + dfv.b .= 0 + @test x == [1, 1, 1, 1] + @test df.b !== x + @test df == DataFrame(a='a':'d', b=[1, 0, 0, 1], c=4:-1:1) + dfv.c .= ["p", "q"] + @test df == DataFrame(a='a':'d', b=[1, 0, 0, 1], c=[4, "p", "q", 1]) + else + # Julia older than 1.7 + df[!, :b] .= 1 + x = df.b + df[!, :c] .= 4:-1:1 + df.a .= 'a':'d' + dfv = view(df, 2:3, 2:3) + dfv.b .= 0 + @test x == [1, 0, 0, 1] + @test df.b === x + @test df == DataFrame(a=97:100, b=[1, 0, 0, 1], c=4:-1:1) + @test_throws MethodError dfv.c .= ["p", "q"] + @test df == DataFrame(a=97:100, b=[1, 0, 0, 1], c=[4, 3, 2, 1]) end end +@testset "dotgetproperty on SubDataFrame" begin + df = DataFrame(a=1:3, b=4:6) + dfv = @view df[[3, 1], :] + if isdefined(Base, :dotgetproperty) + dfv.c .= [1, 2] + @test df ≅ DataFrame(a=1:3, b=4:6, c=[2, missing, 1]) + else + @test_throws ArgumentError dfv.c .= [1, 2] + end + + df = DataFrame(a=1:3, b=4:6) + dfv = @view df[[3, 1], 1:2] + @test_throws ArgumentError dfv.c .= [1, 2] +end + end # module diff --git a/test/dataframe.jl b/test/dataframe.jl index 382badf512..1f2a0fe553 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -1561,7 +1561,11 @@ end @test x == 1:10 df.y .= 1 @test df.y == [1, 1, 1, 1, 1, 1, 1, 1, 1, 1] - @test df.y === y + if isdefined(Base, :dotgetproperty) + @test y == 1.0:10.0 + else + @test df.y === y + end df.z = z @test df.z === z df[!, :zz] .= 1 diff --git a/test/deprecated.jl b/test/deprecated.jl index 09d71d9d90..6e2035564f 100644 --- a/test/deprecated.jl +++ b/test/deprecated.jl @@ -9,15 +9,6 @@ const ≅ = isequal @test_throws ArgumentError aggregate() end -@testset "deprecated broadcasting assignment" begin - df = DataFrame(a=1:4, b=1, c=2) - df.a .= 'a':'d' - @test df == DataFrame(a=97:100, b=1, c=2) - dfv = view(df, 2:3, 2:3) - dfv.b .= 0 - @test df.b == [1, 0, 0, 1] -end - @testset "indicator in joins" begin name = DataFrame(ID=[1, 2, 3], Name=["John Doe", "Jane Doe", "Joe Blogs"]) job = DataFrame(ID=[1, 2, 4], Job=["Lawyer", "Doctor", "Farmer"]) diff --git a/test/subdataframe_mutation.jl b/test/subdataframe_mutation.jl index 81360e20d0..94cc0ce078 100644 --- a/test/subdataframe_mutation.jl +++ b/test/subdataframe_mutation.jl @@ -1365,7 +1365,11 @@ end df = DataFrame(a=1:3) sdf = @view df[[3, 2], :] sdf.a .= 12.0 - @test eltype(sdf.a) === Int + if isdefined(Base, :dotgetproperty) + @test eltype(sdf.a) === Float64 + else + @test eltype(sdf.a) === Int + end @test df ≅ DataFrame(a=[1, 12, 12]) if VERSION >= v"1.7" @@ -1379,7 +1383,11 @@ end sdf = @view df[[3, 2], 1:1] @test_throws ArgumentError sdf.c = [5, 6] sdf.a .= 12.0 - @test eltype(sdf.a) === Int + if isdefined(Base, :dotgetproperty) + @test eltype(sdf.a) === Float64 + else + @test eltype(sdf.a) === Int + end @test df ≅ DataFrame(a=[1, 12, 12]) end