From 1c91afd4afbbf4b3c2c4fa06e530c7018f3f3b3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 31 Aug 2020 00:15:34 +0200 Subject: [PATCH 1/3] add Type to names as cols --- src/abstractdataframe/abstractdataframe.jl | 8 ++++++-- test/dataframe.jl | 12 ++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl index 66d47ac317..d07c8e221f 100644 --- a/src/abstractdataframe/abstractdataframe.jl +++ b/src/abstractdataframe/abstractdataframe.jl @@ -70,7 +70,8 @@ Return a freshly allocated `Vector{String}` of names of columns contained in `df If `cols` is passed then restrict returned column names to those matching the selector (this is useful in particular with regular expressions, `Not`, and `Between`). -`cols` can be any column selector ($COLUMNINDEX_STR; $MULTICOLUMNINDEX_STR). +`cols` can be any column selector ($COLUMNINDEX_STR; $MULTICOLUMNINDEX_STR) +or a `Type`, in which case columns whose `eltype` is a subtype of `cols` are returned. See also [`propertynames`](@ref) which returns a `Vector{Symbol}`. """ @@ -80,9 +81,12 @@ function Base.names(df::AbstractDataFrame, cols) nms = _names(index(df)) idx = index(df)[cols] idxs = idx isa Int ? (idx:idx) : idx - return [string(nms[i]) for i in idxs] + return [String(nms[i]) for i in idxs] end +Base.names(df::AbstractDataFrame, T::Type) = + [String(n) for (n, c) in pairs(eachcol(df)) if eltype(c) <: T] + # _names returns Vector{Symbol} without copying _names(df::AbstractDataFrame) = _names(index(df)) diff --git a/test/dataframe.jl b/test/dataframe.jl index 991ec412da..bc44aecc56 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -2049,4 +2049,16 @@ end @test_throws ArgumentError push!(df, "a") end +@testset "names for Type" begin + df = DataFrame(a1 = 1:3, a2 = [1, missing, 3], + b1 = 1.0:3.0, b2 = [1.0, missing, 3.0], + c1 = '1':'3', c2 = ['1', missing, '3']) + @test names(df, Int) == ["a1"] + @test names(df, Union{Missing, Int}) == ["a1", "a2"] + @test names(df, Real) == ["a1", "b1"] + @test names(df, Union{Missing, Real}) == ["a1", "a2", "b1", "b2"] + @test names(df, Any) == names(df) + @test names(df, Union{Char, Float64, Missing}) == ["b1", "b2", "c1", "c2"] +end + end # module From 53ae91c3c2efe237ffa6f3d9fa155e10704adc23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 31 Aug 2020 08:32:42 +0200 Subject: [PATCH 2/3] update NEWS.md --- NEWS.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index f2d0a60fba..58da7150a5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -38,13 +38,15 @@ * add `rownumber` to `DataFrameRow` ([#2356](https://github.com/JuliaData/DataFrames.jl/pull/2356)) * allow passing column name to specify the position where a new columns should be inserted in `insertcols!` ([#2365](https://github.com/JuliaData/DataFrames.jl/pull/2365)) -* allow `GroupedDataFrame`s to be indexed using a dictionary, which can use `Symbol` or string keys and +* allow `GroupedDataFrame`s to be indexed using a dictionary, which can use `Symbol` or string keys and are not dependent on the order of keys. ([#2281](https://github.com/JuliaData/DataFrames.jl/pull/2281)) * add `isapprox` method to check for approximate equality between two dataframes ([#2373](https://github.com/JuliaData/DataFrames.jl/pull/2373)) * add `columnindex` for `DataFrameRow` ([#2380](https://github.com/JuliaData/DataFrames.jl/pull/2380)) - +* `names` now accepts `Type` as a column selector + ([#2400](https://github.com/JuliaData/DataFrames.jl/pull/2400)) + ## Deprecated * `DataFrame!` is now deprecated ([#2338](https://github.com/JuliaData/DataFrames.jl/pull/2338)) From 58e5762c943485818974e8a37e0aaa7e7804e65f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 31 Aug 2020 12:10:50 +0200 Subject: [PATCH 3/3] update deprecation of categorical --- src/deprecated.jl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/deprecated.jl b/src/deprecated.jl index e4686b830c..7e954a979b 100644 --- a/src/deprecated.jl +++ b/src/deprecated.jl @@ -49,14 +49,14 @@ function CategoricalArrays.categorical(df::AbstractDataFrame, if cols === nothing cols = Union{AbstractString, Missing} Base.depwarn("`categorical(df)` is deprecated. " * - "Use `cols = names(df)[map(c -> eltype(c) <: $cols, eachcol(df))]; transform(df, cols .=> $categoricalstr .=> cols)` instead.", + "Use `cols = names(df, $cols); transform(df, cols .=> $categoricalstr .=> cols)` instead.", :categorical) else Base.depwarn("`categorical(df, T)` is deprecated. " * - "Use `cols = names(df)[map(c -> eltype(c) <: T, eachcol(df))]; transform(df, cols .=> $categoricalstr .=> cols)` instead.", + "Use `cols = names(df, T); transform(df, cols .=> $categoricalstr .=> cols)` instead.", :categorical) end - colsstr = names(df)[map(c -> eltype(c) <: cols, eachcol(df))] + colsstr = names(df, cols) return transform(df, colsstr .=> (x -> categorical(x, compress=compress)) .=> colsstr) end @@ -97,13 +97,13 @@ function categorical!(df::DataFrame, cols::Union{Type, Nothing}=nothing; if cols === nothing cols = Union{AbstractString, Missing} Base.depwarn("`categorical!(df)` is deprecated. " * - "Use `cols = names(df)[map(c -> eltype(c) <: $cols, eachcol(df))]; transform!(df, cols .=> $categoricalstr .=> cols)` instead.", + "Use `cols = names(df, $cols); transform!(df, cols .=> $categoricalstr .=> cols)` instead.", :categorical!) else Base.depwarn("`categorical!(df, T)` is deprecated. " * - "Use `cols = names(df)[map(c -> eltype(c) <: T, eachcol(df))]; transform!(df, cols .=> $categoricalstr .=> cols)` instead.", + "Use `cols = names(df, T); transform!(df, cols .=> $categoricalstr .=> cols)` instead.", :categorical!) end - colsstr = names(df)[map(c -> eltype(c) <: cols, eachcol(df))] + colsstr = names(df, cols) return transform!(df, colsstr .=> (x -> categorical(x, compress=compress)) .=> colsstr) -end \ No newline at end of file +end