Skip to content

Commit

Permalink
make broadcasting assignment consistent with ! (#3022)
Browse files Browse the repository at this point in the history
  • Loading branch information
bkamins authored Mar 17, 2022
1 parent e7e4435 commit 333ba91
Show file tree
Hide file tree
Showing 7 changed files with 122 additions and 48 deletions.
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@
* `first` and `last` for `GroupedDataFrame` now support passing number of elements to get
([#3006](https://github.com/JuliaData/DataFrames.jl/issues/3006))

## Previously announced breaking changes

* On Julia 1.7 or newer broadcasting assignment
into an existing column of a data frame replaces it. Under Julia 1.6
or older it is an in place operation.
([#3022](https://github.com/JuliaData/DataFrames.jl/pull/3022)

# DataFrames.jl v1.3.2 Patch Release Notes

## Bug fixes
Expand Down
6 changes: 3 additions & 3 deletions docs/src/lib/indexing.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,10 @@ In the descriptions below `df` represents a `DataFrame`, `sdf` is a
`:` always expands to `axes(df, 1)` or `axes(sdf, 1)`.

`df.col` works like `df[!, col]` and `sdf.col` works like `sdf[!, col]` in all
cases except that `df.col .= v` and `sdf.col .= v` perform in-place broadcasting
cases. An exception is that under Julia 1.6 or earlier
`df.col .= v` and `sdf.col .= v` performs in-place broadcasting
if `col` is present in `df`/`sdf` and is a valid identifier (this inconsistency
is deprecated and in DataFrames.jl 1.4 release under Julia 1.7 and later both
syntaxes will be always consistent).
is not present under Julia 1.7 and later).

## `getindex` and `view`

Expand Down
18 changes: 4 additions & 14 deletions src/other/broadcasting.jl
Original file line number Diff line number Diff line change
Expand Up @@ -129,21 +129,11 @@ end

if isdefined(Base, :dotgetproperty)
function Base.dotgetproperty(df::AbstractDataFrame, col::SymbolOrString)
if columnindex(df, col) == 0
if !is_column_insertion_allowed(df)
throw(ArgumentError("creating new columns in a SubDataFrame that subsets " *
"columns of its parent data frame is disallowed"))
end
# TODO: double check that this is tested
return LazyNewColDataFrame(df, Symbol(col))
else
# TODO: remove the deprecation in DataFrames.jl 1.4 release
Base.depwarn("In the 1.4 release of DataFrames.jl this operation will allocate a new column " *
"instead of performing an in-place assignment. " *
"To perform an in-place assignment use `df[:, col] .= ...` instead.",
:dotgetproperty)
return getproperty(df, col)
if columnindex(df, col) == 0 && !is_column_insertion_allowed(df)
throw(ArgumentError("creating new columns in a SubDataFrame that subsets " *
"columns of its parent data frame is disallowed"))
end
return LazyNewColDataFrame(df, Symbol(col))
end
end

Expand Down
112 changes: 93 additions & 19 deletions test/broadcasting.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1463,12 +1463,41 @@ end

df = copy(refdf)
v1 = df[!, 1]
df.x1 .= 'd'
@test v1 == [100.0, 100.0, 100.0]
@test_throws MethodError df[:, 1] .= "d"
@test v1 == [100.0, 100.0, 100.0]
@test_throws DimensionMismatch df[:, 1] .= [1 2 3]
@test v1 == [100.0, 100.0, 100.0]
if isdefined(Base, :dotgetproperty)
df.x1 .= 'd'
@test df.x1 == ['d', 'd', 'd']
@test eltype(df.x1) === Char
@test_throws MethodError df[:, 1] .= "d"
@test_throws DimensionMismatch df[:, 1] .= [1 2 3]
@test v1 == [1.5, 2.5, 3.5]
else
df.x1 .= 'd'
@test v1 == [100.0, 100.0, 100.0]
@test_throws MethodError df[:, 1] .= "d"
@test v1 == [100.0, 100.0, 100.0]
@test_throws DimensionMismatch df[:, 1] .= [1 2 3]
@test v1 == [100.0, 100.0, 100.0]
end

if isdefined(Base, :dotgetproperty)
df = DataFrame(a=1:4, b=1, c=2)
df.a .= 'a':'d'
@test df == DataFrame(a='a':'d', b=1, c=2)
dfv = view(df, 2:3, 2:3)
x = df.b
dfv.b .= 0
@test df.b == [1, 0, 0, 1]
@test x == [1, 1, 1, 1]
else
df = DataFrame(a=1:4, b=1, c=2)
df.a .= 'a':'d'
@test df == DataFrame(a=97:100, b=1, c=2)
dfv = view(df, 2:3, 2:3)
x = df.b
dfv.b .= 0
@test df.b == [1, 0, 0, 1]
@test x === df.b
end

df = copy(refdf)
if isdefined(Base, :dotgetproperty)
Expand Down Expand Up @@ -1608,12 +1637,22 @@ end

df = view(copy(refdf), :, :)
v1 = df[!, 1]
df.x1 .= 'd'
@test v1 == [100.0, 100.0, 100.0]
@test_throws MethodError df[:, 1] .= "d"
@test v1 == [100.0, 100.0, 100.0]
@test_throws DimensionMismatch df[:, 1] .= [1 2 3]
@test v1 == [100.0, 100.0, 100.0]
if isdefined(Base, :dotgetproperty)
df.x1 .= 'd'
@test df.x1 == ['d', 'd', 'd']
@test eltype(df.x1) === Any
df[:, 1] .= "d"
@test df.x1 == ["d", "d", "d"]
@test_throws DimensionMismatch df[:, 1] .= [1 2 3]
@test v1 == [1.5, 2.5, 3.5]
else
df.x1 .= 'd'
@test v1 == [100.0, 100.0, 100.0]
@test_throws MethodError df[:, 1] .= "d"
@test v1 == [100.0, 100.0, 100.0]
@test_throws DimensionMismatch df[:, 1] .= [1 2 3]
@test v1 == [100.0, 100.0, 100.0]
end

df = view(copy(refdf), :, :)
if VERSION >= v"1.7"
Expand Down Expand Up @@ -1870,17 +1909,52 @@ end
end

@testset "broadcasting of getproperty" begin
df = DataFrame(a=1:4)
if isdefined(Base, :dotgetproperty)
df = DataFrame(a=1:4)
df.b .= 1
x = df.b
df.c .= 4:-1:1
# TODO: enable this in the future when the deprecation period is finished
# df.a .= 'a':'d'
# @test df.a isa Vector{Char}
# @test df == DataFrame(a='a':'d', b=1, c=4:-1:1)
# dfv = view(df, 2:3, 2:3)
# @test_throws ArgumentError dfv.b .= 0
df.a .= 'a':'d'
@test df.a isa Vector{Char}
@test df == DataFrame(a='a':'d', b=1, c=4:-1:1)

# in views also column replacement is performed
dfv = view(df, 2:3, 2:3)
dfv.b .= 0
@test x == [1, 1, 1, 1]
@test df.b !== x
@test df == DataFrame(a='a':'d', b=[1, 0, 0, 1], c=4:-1:1)
dfv.c .= ["p", "q"]
@test df == DataFrame(a='a':'d', b=[1, 0, 0, 1], c=[4, "p", "q", 1])
else
# Julia older than 1.7
df[!, :b] .= 1
x = df.b
df[!, :c] .= 4:-1:1
df.a .= 'a':'d'
dfv = view(df, 2:3, 2:3)
dfv.b .= 0
@test x == [1, 0, 0, 1]
@test df.b === x
@test df == DataFrame(a=97:100, b=[1, 0, 0, 1], c=4:-1:1)
@test_throws MethodError dfv.c .= ["p", "q"]
@test df == DataFrame(a=97:100, b=[1, 0, 0, 1], c=[4, 3, 2, 1])
end
end

@testset "dotgetproperty on SubDataFrame" begin
df = DataFrame(a=1:3, b=4:6)
dfv = @view df[[3, 1], :]
if isdefined(Base, :dotgetproperty)
dfv.c .= [1, 2]
@test df DataFrame(a=1:3, b=4:6, c=[2, missing, 1])
else
@test_throws ArgumentError dfv.c .= [1, 2]
end

df = DataFrame(a=1:3, b=4:6)
dfv = @view df[[3, 1], 1:2]
@test_throws ArgumentError dfv.c .= [1, 2]
end

end # module
6 changes: 5 additions & 1 deletion test/dataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1561,7 +1561,11 @@ end
@test x == 1:10
df.y .= 1
@test df.y == [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
@test df.y === y
if isdefined(Base, :dotgetproperty)
@test y == 1.0:10.0
else
@test df.y === y
end
df.z = z
@test df.z === z
df[!, :zz] .= 1
Expand Down
9 changes: 0 additions & 9 deletions test/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,6 @@ const ≅ = isequal
@test_throws ArgumentError aggregate()
end

@testset "deprecated broadcasting assignment" begin
df = DataFrame(a=1:4, b=1, c=2)
df.a .= 'a':'d'
@test df == DataFrame(a=97:100, b=1, c=2)
dfv = view(df, 2:3, 2:3)
dfv.b .= 0
@test df.b == [1, 0, 0, 1]
end

@testset "indicator in joins" begin
name = DataFrame(ID=[1, 2, 3], Name=["John Doe", "Jane Doe", "Joe Blogs"])
job = DataFrame(ID=[1, 2, 4], Job=["Lawyer", "Doctor", "Farmer"])
Expand Down
12 changes: 10 additions & 2 deletions test/subdataframe_mutation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1365,7 +1365,11 @@ end
df = DataFrame(a=1:3)
sdf = @view df[[3, 2], :]
sdf.a .= 12.0
@test eltype(sdf.a) === Int
if isdefined(Base, :dotgetproperty)
@test eltype(sdf.a) === Float64
else
@test eltype(sdf.a) === Int
end
@test df DataFrame(a=[1, 12, 12])

if VERSION >= v"1.7"
Expand All @@ -1379,7 +1383,11 @@ end
sdf = @view df[[3, 2], 1:1]
@test_throws ArgumentError sdf.c = [5, 6]
sdf.a .= 12.0
@test eltype(sdf.a) === Int
if isdefined(Base, :dotgetproperty)
@test eltype(sdf.a) === Float64
else
@test eltype(sdf.a) === Int
end
@test df DataFrame(a=[1, 12, 12])
end

Expand Down

0 comments on commit 333ba91

Please sign in to comment.