From 7aec87da8fec28013f54c19e23e4ba98350a80f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 25 Sep 2023 22:20:04 +0200 Subject: [PATCH] add cols kwarg to rename/rename! (#3380) --- NEWS.md | 9 +++++ src/abstractdataframe/abstractdataframe.jl | 42 +++++++++++++++------- src/other/index.jl | 2 -- test/dataframe.jl | 13 +++++++ test/index.jl | 6 +--- 5 files changed, 52 insertions(+), 20 deletions(-) diff --git a/NEWS.md b/NEWS.md index 13455915d..bc0606d74 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,12 @@ +# DataFrames.jl v1.7.0 Release Notes + +## New functionalities + +* `rename` and `rename!` now allow to apply a function transforming + column names only to a subset of the columns specified by the `cols` + keyword argument + ([#3380](https://github.com/JuliaData/DataFrames.jl/pull/3380)) + # DataFrames.jl v1.6.1 Release Notes ## Bug fixes diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl index a40627c6a..600601506 100644 --- a/src/abstractdataframe/abstractdataframe.jl +++ b/src/abstractdataframe/abstractdataframe.jl @@ -123,7 +123,7 @@ Compat.hasproperty(df::AbstractDataFrame, s::AbstractString) = haskey(index(df), rename!(df::AbstractDataFrame, (from => to)::Pair...) rename!(df::AbstractDataFrame, d::AbstractDict) rename!(df::AbstractDataFrame, d::AbstractVector{<:Pair}) - rename!(f::Function, df::AbstractDataFrame) + rename!(f::Function, df::AbstractDataFrame; cols=All()) Rename columns of `df` in-place. Each name is changed at most once. Permutation of names is allowed. @@ -132,8 +132,10 @@ Each name is changed at most once. Permutation of names is allowed. - `df` : the `AbstractDataFrame` - `d` : an `AbstractDict` or an `AbstractVector` of `Pair`s that maps the original names or column numbers to new names -- `f` : a function which for each column takes the old name as a `String` - and returns the new name that gets converted to a `Symbol` +- `f` : a function which for each column selected by the `cols` keyword argument + takes the old name as a `String` + and returns the new name that gets converted to a `Symbol`; the `cols` + column selector can be any value accepted as column selector by the `names` function - `vals` : new column names as a vector of `Symbol`s or `AbstractString`s of the same length as the number of columns in `df` - `makeunique` : if `false` (the default), an error will be raised @@ -194,6 +196,14 @@ julia> rename!(uppercase, df) │ Int64 Int64 Int64 ─────┼───────────────────── 1 │ 1 2 3 + +julia> rename!(lowercase, df, cols=contains('A')) +1×3 DataFrame + Row │ a B a_1 + │ Int64 Int64 Int64 +─────┼───────────────────── + 1 │ 1 2 3 + ``` """ function rename!(df::AbstractDataFrame, vals::AbstractVector{Symbol}; @@ -252,12 +262,8 @@ end rename!(df::AbstractDataFrame, args::Pair...) = rename!(df, collect(args)) -function rename!(f::Function, df::AbstractDataFrame) - rename!(f, index(df)) - # renaming columns of SubDataFrame has to clean non-note metadata in its parent - _drop_all_nonnote_metadata!(parent(df)) - return df -end +rename!(f::Function, df::AbstractDataFrame; cols=All()) = + rename!(df, [n => Symbol(f(n)) for n in names(df, cols)]) """ rename(df::AbstractDataFrame, vals::AbstractVector{Symbol}; @@ -267,7 +273,7 @@ end rename(df::AbstractDataFrame, (from => to)::Pair...) rename(df::AbstractDataFrame, d::AbstractDict) rename(df::AbstractDataFrame, d::AbstractVector{<:Pair}) - rename(f::Function, df::AbstractDataFrame) + rename(f::Function, df::AbstractDataFrame; cols=All()) Create a new data frame that is a copy of `df` with changed column names. Each name is changed at most once. Permutation of names is allowed. @@ -277,8 +283,10 @@ Each name is changed at most once. Permutation of names is allowed. only allowed if it was created using `:` as a column selector. - `d` : an `AbstractDict` or an `AbstractVector` of `Pair`s that maps the original names or column numbers to new names -- `f` : a function which for each column takes the old name as a `String` - and returns the new name that gets converted to a `Symbol` +- `f` : a function which for each column selected by the `cols` keyword argument + takes the old name as a `String` + and returns the new name that gets converted to a `Symbol`; the `cols` + column selector can be any value accepted as column selector by the `names` function - `vals` : new column names as a vector of `Symbol`s or `AbstractString`s of the same length as the number of columns in `df` - `makeunique` : if `false` (the default), an error will be raised @@ -350,6 +358,14 @@ julia> rename(uppercase, df) │ Int64 Int64 Int64 ─────┼───────────────────── 1 │ 1 2 3 + +julia> rename(uppercase, df, cols=contains('x')) +1×3 DataFrame + Row │ i X y + │ Int64 Int64 Int64 +─────┼───────────────────── + 1 │ 1 2 3 + ``` """ rename(df::AbstractDataFrame, vals::AbstractVector{Symbol}; @@ -357,7 +373,7 @@ rename(df::AbstractDataFrame, vals::AbstractVector{Symbol}; rename(df::AbstractDataFrame, vals::AbstractVector{<:AbstractString}; makeunique::Bool=false) = rename!(copy(df), vals, makeunique=makeunique) rename(df::AbstractDataFrame, args...) = rename!(copy(df), args...) -rename(f::Function, df::AbstractDataFrame) = rename!(f, copy(df)) +rename(f::Function, df::AbstractDataFrame; cols=All()) = rename!(f, copy(df); cols=cols) """ size(df::AbstractDataFrame[, dim]) diff --git a/src/other/index.jl b/src/other/index.jl index 51aa3a31c..ae9358d38 100644 --- a/src/other/index.jl +++ b/src/other/index.jl @@ -108,8 +108,6 @@ function rename!(x::Index, nms::AbstractVector{Pair{Symbol, Symbol}}) return x end -rename!(f::Function, x::Index) = rename!(x, [(n=>Symbol(f(string(n)))) for n in x.names]) - # we do not define keys on purpose; # use names to get keys as strings with copying # or _names to get keys as Symbols without copying diff --git a/test/dataframe.jl b/test/dataframe.jl index 971d7626d..fbc2ec0ca 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -1112,6 +1112,19 @@ end df = DataFrame(A=1) asview && (df=view(df, :, :)) @test rename(x -> 1, df) == DataFrame(Symbol("1") => 1) + + for cols in (:B, Not("A"), Cols(2), Char, contains('B')) + df = DataFrame(A=1:3, B='A':'C') + asview && (df = view(df, :, :)) + @test names(rename(lowercase, df, cols=cols)) == ["A", "b"] + @test names(df) == ["A", "B"] + rename!(lowercase, df, cols=cols) + @test names(df) == ["A", "b"] + end + df = DataFrame(A=1:3, B='A':'C') + asview && (df = view(df, :, :)) + @test names(rename(lowercase, df, cols=[:A, :B])) == ["a", "b"] + @test names(rename(lowercase, df, cols=Not(:))) == ["A", "B"] end sdf = view(DataFrame(ones(2, 3), :auto), 1:2, 1:3) diff --git a/test/index.jl b/test/index.jl index fc82540ea..263b90bbb 100644 --- a/test/index.jl +++ b/test/index.jl @@ -50,7 +50,7 @@ using DataFrames: Index, SubIndex, fuzzymatch @test_throws ArgumentError i[Not(:x)] @test_throws ArgumentError i[Not("x")] @test_throws BoundsError i[Not(1:3)] - + @test i[Not([1, 1])] == [2] @test i[Not([:A, :A])] == [2] @test i[Not(["A", "A"])] == [2] @@ -84,10 +84,6 @@ end @test rename!(copy(i), [:a => :A]) == Index([:A, :b]) @test rename!(copy(i), [:a => :a]) == Index([:a, :b]) @test rename!(copy(i), [:a => :b, :b => :a]) == Index([:b, :a]) - @test rename!(x -> Symbol(uppercase(string(x))), copy(i)) == Index([:A, :B]) - @test rename!(x -> Symbol(lowercase(string(x))), copy(i)) == Index([:a, :b]) - @test rename!(uppercase, copy(i)) == Index([:A, :B]) - @test rename!(lowercase, copy(i)) == Index([:a, :b]) @test delete!(i, :a) == Index([:b]) push!(i, :C)