From 6967260d455a9b0d310fb9c197dbf9d0991d5855 Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Sat, 24 Nov 2018 20:03:20 +0100 Subject: [PATCH 1/4] Deprecate head and tail in favor of first and last Now that we have decided that data frames are collections of rows, there is no point in defining separate head and tail functions, which are a legacy from the R API. Do not define one-argument methods defaulting to n=6 since other first and last methods in Base default to n=1, which should return a single row (as a NamedTuple or DataFrameRow). --- docs/src/lib/functions.md | 2 - docs/src/man/getting_started.md | 6 +- docs/src/man/reshaping_and_pivoting.md | 43 ++++--- docs/src/man/sorting.md | 24 ++-- docs/src/man/split_apply_combine.md | 4 +- src/abstractdataframe/abstractdataframe.jl | 39 ++---- src/dataframe/dataframe.jl | 2 +- src/deprecated.jl | 5 + test/data.jl | 2 +- test/dataframe.jl | 8 +- test/subdataframe.jl | 136 ++++++++++----------- 11 files changed, 127 insertions(+), 144 deletions(-) diff --git a/docs/src/lib/functions.md b/docs/src/lib/functions.md index 30dfdc41c5..05dedd2ab1 100644 --- a/docs/src/lib/functions.md +++ b/docs/src/lib/functions.md @@ -40,7 +40,6 @@ eachcol eltypes filter filter! -head insertcols! mapcols names! @@ -51,7 +50,6 @@ repeat show sort sort! -tail unique! permutecols! ``` diff --git a/docs/src/man/getting_started.md b/docs/src/man/getting_started.md index fd45024194..2b792d8a7f 100644 --- a/docs/src/man/getting_started.md +++ b/docs/src/man/getting_started.md @@ -208,10 +208,10 @@ julia> df = DataFrame(A = 1:2:1000, B = repeat(1:10, inner=50), C = 1:500) Printing options can be adjusted by calling the `show` function manually: `show(df, allrows=true)` prints all rows even if they do not fit on screen and `show(df, allcols=true)` does the same for columns. -The `head` and `tail` functions can be used to look at the first and last rows of a data frame (respectively): +The `first` and `last` functions can be used to look at the first and last rows of a data frame (respectively): ```jldoctest dataframe -julia> head(df) +julia> first(df, 6) 6×3 DataFrame │ Row │ A │ B │ C │ │ │ Int64 │ Int64 │ Int64 │ @@ -223,7 +223,7 @@ julia> head(df) │ 5 │ 9 │ 1 │ 5 │ │ 6 │ 11 │ 1 │ 6 │ -julia> tail(df) +julia> last(df, 6) 6×3 DataFrame │ Row │ A │ B │ C │ │ │ Int64 │ Int64 │ Int64 │ diff --git a/docs/src/man/reshaping_and_pivoting.md b/docs/src/man/reshaping_and_pivoting.md index b69b5b586e..e5e2b398f8 100644 --- a/docs/src/man/reshaping_and_pivoting.md +++ b/docs/src/man/reshaping_and_pivoting.md @@ -7,7 +7,7 @@ julia> using DataFrames, CSV julia> iris = CSV.read(joinpath(dirname(pathof(DataFrames)), "../test/data/iris.csv")); -julia> head(iris) +julia> first(iris, 6) 6×5 DataFrame │ Row │ SepalLength │ SepalWidth │ PetalLength │ PetalWidth │ Species │ │ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Categorical…⍰ │ @@ -19,7 +19,7 @@ julia> head(iris) │ 5 │ 5.0 │ 3.6 │ 1.4 │ 0.2 │ setosa │ │ 6 │ 5.4 │ 3.9 │ 1.7 │ 0.4 │ setosa │ -julia> tail(iris) +julia> last(iris, 6) 6×5 DataFrame │ Row │ SepalLength │ SepalWidth │ PetalLength │ PetalWidth │ Species │ │ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Categorical…⍰ │ @@ -33,7 +33,7 @@ julia> tail(iris) julia> d = stack(iris, 1:4); -julia> head(d) +julia> first(d, 6) 6×3 DataFrame │ Row │ variable │ value │ Species │ │ │ Symbol │ Float64⍰ │ Categorical…⍰ │ @@ -45,7 +45,7 @@ julia> head(d) │ 5 │ SepalLength │ 5.0 │ setosa │ │ 6 │ SepalLength │ 5.4 │ setosa │ -julia> tail(d) +julia> last(d, 6) 6×3 DataFrame │ Row │ variable │ value │ Species │ │ │ Symbol │ Float64⍰ │ Categorical…⍰ │ @@ -63,7 +63,7 @@ The second optional argument to `stack` indicates the columns to be stacked. The ```jldoctest reshape julia> d = stack(iris, [:SepalLength, :SepalWidth, :PetalLength, :PetalWidth]); -julia> head(d) +julia> first(d, 6) 6×3 DataFrame │ Row │ variable │ value │ Species │ │ │ Symbol │ Float64⍰ │ Categorical…⍰ │ @@ -75,8 +75,7 @@ julia> head(d) │ 5 │ SepalLength │ 5.0 │ setosa │ │ 6 │ SepalLength │ 5.4 │ setosa │ - -julia> tail(d) +julia> last(d, 6) 6×3 DataFrame │ Row │ variable │ value │ Species │ │ │ Symbol │ Float64⍰ │ Categorical…⍰ │ @@ -99,7 +98,7 @@ A third optional argument to `stack` represents the id columns that are repeated ```jldoctest reshape julia> d = stack(iris, [:SepalLength, :SepalWidth], :Species); -julia> head(d) +julia> first(d, 6) 6×3 DataFrame │ Row │ variable │ value │ Species │ │ │ Symbol │ Float64⍰ │ Categorical…⍰ │ @@ -111,7 +110,7 @@ julia> head(d) │ 5 │ SepalLength │ 5.0 │ setosa │ │ 6 │ SepalLength │ 5.4 │ setosa │ -julia> tail(d) +julia> last(d, 6) 6×3 DataFrame │ Row │ variable │ value │ Species │ │ │ Symbol │ Float64⍰ │ Categorical…⍰ │ @@ -129,7 +128,7 @@ julia> tail(d) ```jldoctest reshape julia> d = melt(iris, :Species); -julia> head(d) +julia> first(d, 6) 6×3 DataFrame │ Row │ variable │ value │ Species │ │ │ Symbol │ Float64⍰ │ Categorical…⍰ │ @@ -141,7 +140,7 @@ julia> head(d) │ 5 │ SepalLength │ 5.0 │ setosa │ │ 6 │ SepalLength │ 5.4 │ setosa │ -julia> tail(d) +julia> last(d, 6) 6×3 DataFrame │ Row │ variable │ value │ Species │ │ │ Symbol │ Float64⍰ │ Categorical…⍰ │ @@ -162,7 +161,7 @@ julia> iris[:id] = 1:size(iris, 1) julia> longdf = melt(iris, [:Species, :id]); -julia> head(longdf) +julia> first(longdf, 6) 6×4 DataFrame │ Row │ variable │ value │ Species │ id │ │ │ Symbol │ Float64⍰ │ Categorical…⍰ │ Int64 │ @@ -174,7 +173,7 @@ julia> head(longdf) │ 5 │ SepalLength │ 5.0 │ setosa │ 5 │ │ 6 │ SepalLength │ 5.4 │ setosa │ 6 │ -julia> tail(longdf) +julia> last(longdf, 6) 6×4 DataFrame │ Row │ variable │ value │ Species │ id │ │ │ Symbol │ Float64⍰ │ Categorical…⍰ │ Int64 │ @@ -188,7 +187,7 @@ julia> tail(longdf) julia> widedf = unstack(longdf, :id, :variable, :value); -julia> head(widedf) +julia> first(widedf, 6) 6×5 DataFrame │ Row │ id │ PetalLength │ PetalWidth │ SepalLength │ SepalWidth │ │ │ Int64 │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ @@ -200,7 +199,7 @@ julia> head(widedf) │ 5 │ 5 │ 1.4 │ 0.2 │ 5.0 │ 3.6 │ │ 6 │ 6 │ 1.7 │ 0.4 │ 5.4 │ 3.9 │ -julia> tail(widedf) +julia> last(widedf, 6) 6×5 DataFrame │ Row │ id │ PetalLength │ PetalWidth │ SepalLength │ SepalWidth │ │ │ Int64 │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ @@ -218,7 +217,7 @@ If the remaining columns are unique, you can skip the id variable and use: ```jldoctest reshape julia> longdf = melt(iris, [:Species, :id]); -julia> head(longdf) +julia> first(longdf, 6) 6×4 DataFrame │ Row │ variable │ value │ Species │ id │ │ │ Symbol │ Float64⍰ │ Categorical…⍰ │ Int64 │ @@ -232,7 +231,7 @@ julia> head(longdf) julia> widedf = unstack(longdf, :variable, :value); -julia> head(widedf) +julia> first(widedf, 6) 6×6 DataFrame │ Row │ Species │ id │ PetalLength │ PetalWidth │ SepalLength │ SepalWidth │ │ │ Categorical…⍰ │ Int64 │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ @@ -250,7 +249,7 @@ julia> head(widedf) ```jldoctest reshape julia> d = stackdf(iris); -julia> head(d) +julia> first(d, 6) 6×4 DataFrame │ Row │ variable │ value │ Species │ id │ │ │ Symbol │ Float64⍰ │ Categorical…⍰ │ Int64 │ @@ -262,7 +261,7 @@ julia> head(d) │ 5 │ SepalLength │ 5.0 │ setosa │ 5 │ │ 6 │ SepalLength │ 5.4 │ setosa │ 6 │ -julia> tail(d) +julia> last(d, 6) 6×4 DataFrame │ Row │ variable │ value │ Species │ id │ │ │ Symbol │ Float64⍰ │ Categorical…⍰ │ Int64 │ @@ -291,7 +290,7 @@ None of these reshaping functions perform any aggregation. To do aggregation, us ```jldoctest reshape julia> d = melt(iris, :Species); -julia> head(d) +julia> first(d, 6) 6×3 DataFrame │ Row │ variable │ value │ Species │ │ │ Symbol │ Float64⍰ │ Categorical…⍰ │ @@ -305,7 +304,7 @@ julia> head(d) julia> x = by(d, [:variable, :Species], df -> DataFrame(vsum = mean(df[:value]))); -julia> head(x) +julia> first(x, 6) 6×3 DataFrame │ Row │ variable │ Species │ vsum │ @@ -318,7 +317,7 @@ julia> head(x) │ 5 │ SepalWidth │ versicolor │ 2.77 │ │ 6 │ SepalWidth │ virginica │ 2.974 │ -julia> head(unstack(x, :Species, :vsum)) +julia> first(unstack(x, :Species, :vsum), 6) 5×4 DataFrame │ Row │ variable │ setosa │ versicolor │ virginica │ │ │ Symbol │ Float64⍰ │ Float64⍰ │ Float64⍰ │ diff --git a/docs/src/man/sorting.md b/docs/src/man/sorting.md index bc16e60d0d..af920b35d5 100644 --- a/docs/src/man/sorting.md +++ b/docs/src/man/sorting.md @@ -9,7 +9,7 @@ julia> iris = CSV.read(joinpath(dirname(pathof(DataFrames)), "../test/data/iris. julia> sort!(iris); -julia> head(iris) +julia> first(iris, 6) 6×5 DataFrame │ Row │ SepalLength │ SepalWidth │ PetalLength │ PetalWidth │ Species │ │ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Categorical…⍰ │ @@ -21,7 +21,7 @@ julia> head(iris) │ 5 │ 4.5 │ 2.3 │ 1.3 │ 0.3 │ setosa │ │ 6 │ 4.6 │ 3.1 │ 1.5 │ 0.2 │ setosa │ -julia> tail(iris) +julia> last(iris, 6) 6×5 DataFrame │ Row │ SepalLength │ SepalWidth │ PetalLength │ PetalWidth │ Species │ │ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Categorical…⍰ │ @@ -39,7 +39,7 @@ In Sorting `DataFrame`s, you may want to sort different columns with different o ```jldoctest sort julia> sort!(iris, rev = true); -julia> head(iris) +julia> first(iris, 6) 6×5 DataFrame │ Row │ SepalLength │ SepalWidth │ PetalLength │ PetalWidth │ Species │ │ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Categorical…⍰ │ @@ -51,7 +51,7 @@ julia> head(iris) │ 5 │ 7.7 │ 2.6 │ 6.9 │ 2.3 │ virginica │ │ 6 │ 7.6 │ 3.0 │ 6.6 │ 2.1 │ virginica │ -julia> tail(iris) +julia> last(iris, 6) 6×5 DataFrame │ Row │ SepalLength │ SepalWidth │ PetalLength │ PetalWidth │ Species │ │ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Categorical…⍰ │ @@ -65,7 +65,7 @@ julia> tail(iris) julia> sort!(iris, (:SepalWidth, :SepalLength)); -julia> head(iris) +julia> first(iris, 6) 6×5 DataFrame │ Row │ SepalLength │ SepalWidth │ PetalLength │ PetalWidth │ Species │ │ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Categorical…⍰ │ @@ -77,7 +77,7 @@ julia> head(iris) │ 5 │ 4.5 │ 2.3 │ 1.3 │ 0.3 │ setosa │ │ 6 │ 5.0 │ 2.3 │ 3.3 │ 1.0 │ versicolor │ -julia> tail(iris) +julia> last(iris, 6) 6×5 DataFrame │ Row │ SepalLength │ SepalWidth │ PetalLength │ PetalWidth │ Species │ │ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Categorical…⍰ │ @@ -92,7 +92,7 @@ julia> tail(iris) julia> sort!(iris, (order(:Species, by = uppercase), order(:SepalLength, rev = true))); -julia> head(iris) +julia> first(iris, 6) 6×5 DataFrame │ Row │ SepalLength │ SepalWidth │ PetalLength │ PetalWidth │ Species │ │ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Categorical…⍰ │ @@ -104,7 +104,7 @@ julia> head(iris) │ 5 │ 5.5 │ 4.2 │ 1.4 │ 0.2 │ setosa │ │ 6 │ 5.4 │ 3.4 │ 1.7 │ 0.2 │ setosa │ -julia> tail(iris) +julia> last(iris, 6) 6×5 DataFrame │ Row │ SepalLength │ SepalWidth │ PetalLength │ PetalWidth │ Species │ │ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Categorical…⍰ │ @@ -127,7 +127,7 @@ The following two examples show two ways to sort the `iris` dataset with the sam julia> sort!(iris, (:Species, :SepalLength, :SepalWidth), rev = (true, false, false)); -julia> head(iris) +julia> first(iris, 6) 6×5 DataFrame │ Row │ SepalLength │ SepalWidth │ PetalLength │ PetalWidth │ Species │ │ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Categorical…⍰ │ @@ -139,7 +139,7 @@ julia> head(iris) │ 5 │ 5.8 │ 2.7 │ 5.1 │ 1.9 │ virginica │ │ 6 │ 5.8 │ 2.8 │ 5.1 │ 2.4 │ virginica │ -julia> tail(iris) +julia> last(iris, 6) 6×5 DataFrame │ Row │ SepalLength │ SepalWidth │ PetalLength │ PetalWidth │ Species │ │ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Categorical…⍰ │ @@ -153,7 +153,7 @@ julia> tail(iris) julia> sort!(iris, (order(:Species, rev = true), :SepalLength, :SepalWidth)); -julia> head(iris) +julia> first(iris, 6) 6×5 DataFrame │ Row │ SepalLength │ SepalWidth │ PetalLength │ PetalWidth │ Species │ │ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Categorical…⍰ │ @@ -165,7 +165,7 @@ julia> head(iris) │ 5 │ 5.8 │ 2.7 │ 5.1 │ 1.9 │ virginica │ │ 6 │ 5.8 │ 2.8 │ 5.1 │ 2.4 │ virginica │ -julia> tail(iris) +julia> last(iris, 6) 6×5 DataFrame │ Row │ SepalLength │ SepalWidth │ PetalLength │ PetalWidth │ Species │ │ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Categorical…⍰ │ diff --git a/docs/src/man/split_apply_combine.md b/docs/src/man/split_apply_combine.md index 58d322c79d..9cdf9e031e 100644 --- a/docs/src/man/split_apply_combine.md +++ b/docs/src/man/split_apply_combine.md @@ -11,7 +11,7 @@ julia> using DataFrames, CSV, Statistics julia> iris = CSV.read(joinpath(dirname(pathof(DataFrames)), "../test/data/iris.csv")); -julia> head(iris) +julia> first(iris, 6) 6×5 DataFrame │ Row │ SepalLength │ SepalWidth │ PetalLength │ PetalWidth │ Species │ │ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Categorical…⍰ │ @@ -23,7 +23,7 @@ julia> head(iris) │ 5 │ 5.0 │ 3.6 │ 1.4 │ 0.2 │ setosa │ │ 6 │ 5.4 │ 3.9 │ 1.7 │ 0.4 │ setosa │ -julia> tail(iris) +julia> last(iris, 6) 6×5 DataFrame │ Row │ SepalLength │ SepalWidth │ PetalLength │ PetalWidth │ Species │ │ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Float64⍰ │ Categorical…⍰ │ diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl index b1677bc1b3..cae2d5a4c1 100644 --- a/src/abstractdataframe/abstractdataframe.jl +++ b/src/abstractdataframe/abstractdataframe.jl @@ -23,8 +23,8 @@ The following are normally implemented for AbstractDataFrames: * [`eltypes`](@ref) : `eltype` of each column * `length` : number of columns * `size` : (nrows, ncols) -* [`head`](@ref) : first `n` rows -* [`tail`](@ref) : last `n` rows +* [`first`](@ref) : first `n` rows +* [`last`](@ref) : last `n` rows * `convert` : convert to an array * [`completecases`](@ref) : boolean vector of complete cases (rows with no missings) * [`dropmissing`](@ref) : remove rows with missing values @@ -288,38 +288,19 @@ Base.isempty(df::AbstractDataFrame) = size(df, 1) == 0 || size(df, 2) == 0 ## ############################################################################## -head(df::AbstractDataFrame, r::Int) = df[1:min(r,nrow(df)), :] -head(df::AbstractDataFrame) = head(df, 6) -tail(df::AbstractDataFrame, r::Int) = df[max(1,nrow(df)-r+1):nrow(df), :] -tail(df::AbstractDataFrame) = tail(df, 6) - """ -Show the first or last part of an AbstractDataFrame + first(df::AbstractDataFrame, n::Integer) -```julia -head(df::AbstractDataFrame, r::Int = 6) -tail(df::AbstractDataFrame, r::Int = 6) -``` - -**Arguments** - -* `df` : the AbstractDataFrame -* `r` : the number of rows to show - -**Result** - -* `::AbstractDataFrame` : the first or last part of `df` - -**Examples** +Get a data frame with the `n` first rows of `df`. +""" +Base.first(df::AbstractDataFrame, n::Integer) = df[1:min(n,nrow(df)), :] -```julia -df = DataFrame(i = 1:10, x = rand(10), y = rand(["a", "b", "c"], 10)) -head(df) -tail(df) -``` +""" + last(df::AbstractDataFrame, n::Integer) +Get a data frame with the `n` last rows of `df`. """ -(head, tail) +Base.last(df::AbstractDataFrame, n::Integer) = df[max(1,nrow(df)-n+1):nrow(df), :] # get the structure of a df """ diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index 4cff773383..52b56b20a4 100644 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -68,7 +68,7 @@ df2 = DataFrame(A = 1:10, B = v, C = rand(10)) dump(df1) dump(df2) describe(df2) -head(df1) +first(df1, 10) df1[:A] + df2[:C] df1[1:4, 1:2] df1[[:A,:C]] diff --git a/src/deprecated.jl b/src/deprecated.jl index 28826b43e2..19dcfe36c1 100644 --- a/src/deprecated.jl +++ b/src/deprecated.jl @@ -1756,3 +1756,8 @@ import Base: map import Base: length @deprecate length(df::AbstractDataFrame) size(df, 2) + +@deprecate head(df::AbstractDataFrame) first(df, 6) +@deprecate tail(df::AbstractDataFrame) last(df, 6) +@deprecate head(df::AbstractDataFrame, n::Integer) first(df, n) +@deprecate tail(df::AbstractDataFrame, n::Integer) last(df, n) \ No newline at end of file diff --git a/test/data.jl b/test/data.jl index b8b8bf842c..0066c7b3b8 100644 --- a/test/data.jl +++ b/test/data.jl @@ -32,7 +32,7 @@ module TestData @test size(df6[2:2,:], 1) == 1 @test size(df6[[1, 3], [1, 3]]) == (2, 2) @test size(df6[1:2, 1:2]) == (2, 2) - @test size(head(df6,2)) == (2, 3) + @test size(first(df6,2)) == (2, 3) # lots more to do #test_group("assign") diff --git a/test/dataframe.jl b/test/dataframe.jl index a685adb082..e685683fa5 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -665,10 +665,10 @@ module TestDataFrame @testset "description" begin df = DataFrame(A = 1:10) - @test head(df) == DataFrame(A = 1:6) - @test head(df, 1) == DataFrame(A = 1) - @test tail(df) == DataFrame(A = 5:10) - @test tail(df, 1) == DataFrame(A = 10) + @test first(df, 6) == DataFrame(A = 1:6) + @test first(df, 1) == DataFrame(A = 1) + @test last(df, 6) == DataFrame(A = 5:10) + @test last(df, 1) == DataFrame(A = 10) end @testset "misc" begin diff --git a/test/subdataframe.jl b/test/subdataframe.jl index 88dd4846c6..071ea2ffbe 100644 --- a/test/subdataframe.jl +++ b/test/subdataframe.jl @@ -11,42 +11,42 @@ module TestSubDataFrame @testset "view -- DataFrame" begin df = DataFrame(x = 1:10, y = 1.0:10.0) - @test view(df, 1, :) == head(df, 1) - @test view(df, UInt(1), :) == head(df, 1) - @test view(df, BigInt(1), :) == head(df, 1) - @test view(df, 1:2, :) == head(df, 2) - @test view(df, vcat(trues(2), falses(8)), :) == head(df, 2) - @test view(df, [1, 2], :) == head(df, 2) - @test view(df, 1, :x) == head(df[[:x]], 1) - @test view(df, 1:2, :x) == head(df[[:x]], 2) - @test view(df, vcat(trues(2), falses(8)), :x) == head(df[[:x]], 2) - @test view(df, [1, 2], :x) == head(df[[:x]], 2) - @test view(df, 1, 1) == head(df[[:x]], 1) - @test view(df, 1:2, 1) == head(df[[:x]], 2) - @test view(df, vcat(trues(2), falses(8)), 1) == head(df[[:x]], 2) - @test view(df, [1, 2], 1) == head(df[[:x]], 2) - @test view(df, 1, [:x, :y]) == head(df, 1) - @test view(df, 1:2, [:x, :y]) == head(df, 2) - @test view(df, vcat(trues(2), falses(8)), [:x, :y]) == head(df, 2) - @test view(df, [1, 2], [:x, :y]) == head(df, 2) - @test view(df, 1, [1, 2]) == head(df, 1) - @test view(df, 1:2, [1, 2]) == head(df, 2) - @test view(df, vcat(trues(2), falses(8)), [1, 2]) == head(df, 2) - @test view(df, [1, 2], [1, 2]) == head(df, 2) - @test view(df, 1, trues(2)) == head(df, 1) - @test view(df, 1:2, trues(2)) == head(df, 2) - @test view(df, vcat(trues(2), falses(8)), trues(2)) == head(df, 2) - @test view(df, [1, 2], trues(2)) == head(df, 2) - @test view(df, Integer[1, 2], :) == head(df, 2) - @test view(df, UInt[1, 2], :) == head(df, 2) - @test view(df, BigInt[1, 2], :) == head(df, 2) - @test view(df, Union{Int, Missing}[1, 2], :) == head(df, 2) - @test view(df, Union{Integer, Missing}[1, 2], :) == head(df, 2) - @test view(df, Union{UInt, Missing}[1, 2], :) == head(df, 2) - @test view(df, Union{BigInt, Missing}[1, 2], :) == head(df, 2) + @test view(df, 1, :) == first(df, 1) + @test view(df, UInt(1), :) == first(df, 1) + @test view(df, BigInt(1), :) == first(df, 1) + @test view(df, 1:2, :) == first(df, 2) + @test view(df, vcat(trues(2), falses(8)), :) == first(df, 2) + @test view(df, [1, 2], :) == first(df, 2) + @test view(df, 1, :x) == first(df[[:x]], 1) + @test view(df, 1:2, :x) == first(df[[:x]], 2) + @test view(df, vcat(trues(2), falses(8)), :x) == first(df[[:x]], 2) + @test view(df, [1, 2], :x) == first(df[[:x]], 2) + @test view(df, 1, 1) == first(df[[:x]], 1) + @test view(df, 1:2, 1) == first(df[[:x]], 2) + @test view(df, vcat(trues(2), falses(8)), 1) == first(df[[:x]], 2) + @test view(df, [1, 2], 1) == first(df[[:x]], 2) + @test view(df, 1, [:x, :y]) == first(df, 1) + @test view(df, 1:2, [:x, :y]) == first(df, 2) + @test view(df, vcat(trues(2), falses(8)), [:x, :y]) == first(df, 2) + @test view(df, [1, 2], [:x, :y]) == first(df, 2) + @test view(df, 1, [1, 2]) == first(df, 1) + @test view(df, 1:2, [1, 2]) == first(df, 2) + @test view(df, vcat(trues(2), falses(8)), [1, 2]) == first(df, 2) + @test view(df, [1, 2], [1, 2]) == first(df, 2) + @test view(df, 1, trues(2)) == first(df, 1) + @test view(df, 1:2, trues(2)) == first(df, 2) + @test view(df, vcat(trues(2), falses(8)), trues(2)) == first(df, 2) + @test view(df, [1, 2], trues(2)) == first(df, 2) + @test view(df, Integer[1, 2], :) == first(df, 2) + @test view(df, UInt[1, 2], :) == first(df, 2) + @test view(df, BigInt[1, 2], :) == first(df, 2) + @test view(df, Union{Int, Missing}[1, 2], :) == first(df, 2) + @test view(df, Union{Integer, Missing}[1, 2], :) == first(df, 2) + @test view(df, Union{UInt, Missing}[1, 2], :) == first(df, 2) + @test view(df, Union{BigInt, Missing}[1, 2], :) == first(df, 2) @test view(df, :) == df @test view(df, :, :) == df - @test view(df, 1, :) == head(df, 1) + @test view(df, 1, :) == first(df, 1) @test view(df, :, 1) == df[:, [1]] @test_throws ArgumentError view(df, [missing, 1]) @test_throws ArgumentError view(df, [missing, 1], :) @@ -54,42 +54,42 @@ module TestSubDataFrame @testset "view -- SubDataFrame" begin df = view(DataFrame(x = 1:10, y = 1.0:10.0), 1:10) - @test view(df, 1, :) == head(df, 1) - @test view(df, UInt(1), :) == head(df, 1) - @test view(df, BigInt(1), :) == head(df, 1) - @test view(df, 1:2, :) == head(df, 2) - @test view(df, vcat(trues(2), falses(8)), :) == head(df, 2) - @test view(df, [1, 2], :) == head(df, 2) - @test view(df, 1, :x) == head(df[[:x]], 1) - @test view(df, 1:2, :x) == head(df[[:x]], 2) - @test view(df, vcat(trues(2), falses(8)), :x) == head(df[[:x]], 2) - @test view(df, [1, 2], :x) == head(df[[:x]], 2) - @test view(df, 1, 1) == head(df[[:x]], 1) - @test view(df, 1:2, 1) == head(df[[:x]], 2) - @test view(df, vcat(trues(2), falses(8)), 1) == head(df[[:x]], 2) - @test view(df, [1, 2], 1) == head(df[[:x]], 2) - @test view(df, 1, [:x, :y]) == head(df, 1) - @test view(df, 1:2, [:x, :y]) == head(df, 2) - @test view(df, vcat(trues(2), falses(8)), [:x, :y]) == head(df, 2) - @test view(df, [1, 2], [:x, :y]) == head(df, 2) - @test view(df, 1, [1, 2]) == head(df, 1) - @test view(df, 1:2, [1, 2]) == head(df, 2) - @test view(df, vcat(trues(2), falses(8)), [1, 2]) == head(df, 2) - @test view(df, [1, 2], [1, 2]) == head(df, 2) - @test view(df, 1, trues(2)) == head(df, 1) - @test view(df, 1:2, trues(2)) == head(df, 2) - @test view(df, vcat(trues(2), falses(8)), trues(2)) == head(df, 2) - @test view(df, [1, 2], trues(2)) == head(df, 2) - @test view(df, Integer[1, 2], :) == head(df, 2) - @test view(df, UInt[1, 2], :) == head(df, 2) - @test view(df, BigInt[1, 2], :) == head(df, 2) - @test view(df, Union{Int, Missing}[1, 2], :) == head(df, 2) - @test view(df, Union{Integer, Missing}[1, 2], :) == head(df, 2) - @test view(df, Union{UInt, Missing}[1, 2], :) == head(df, 2) - @test view(df, Union{BigInt, Missing}[1, 2], :) == head(df, 2) + @test view(df, 1, :) == first(df, 1) + @test view(df, UInt(1), :) == first(df, 1) + @test view(df, BigInt(1), :) == first(df, 1) + @test view(df, 1:2, :) == first(df, 2) + @test view(df, vcat(trues(2), falses(8)), :) == first(df, 2) + @test view(df, [1, 2], :) == first(df, 2) + @test view(df, 1, :x) == first(df[[:x]], 1) + @test view(df, 1:2, :x) == first(df[[:x]], 2) + @test view(df, vcat(trues(2), falses(8)), :x) == first(df[[:x]], 2) + @test view(df, [1, 2], :x) == first(df[[:x]], 2) + @test view(df, 1, 1) == first(df[[:x]], 1) + @test view(df, 1:2, 1) == first(df[[:x]], 2) + @test view(df, vcat(trues(2), falses(8)), 1) == first(df[[:x]], 2) + @test view(df, [1, 2], 1) == first(df[[:x]], 2) + @test view(df, 1, [:x, :y]) == first(df, 1) + @test view(df, 1:2, [:x, :y]) == first(df, 2) + @test view(df, vcat(trues(2), falses(8)), [:x, :y]) == first(df, 2) + @test view(df, [1, 2], [:x, :y]) == first(df, 2) + @test view(df, 1, [1, 2]) == first(df, 1) + @test view(df, 1:2, [1, 2]) == first(df, 2) + @test view(df, vcat(trues(2), falses(8)), [1, 2]) == first(df, 2) + @test view(df, [1, 2], [1, 2]) == first(df, 2) + @test view(df, 1, trues(2)) == first(df, 1) + @test view(df, 1:2, trues(2)) == first(df, 2) + @test view(df, vcat(trues(2), falses(8)), trues(2)) == first(df, 2) + @test view(df, [1, 2], trues(2)) == first(df, 2) + @test view(df, Integer[1, 2], :) == first(df, 2) + @test view(df, UInt[1, 2], :) == first(df, 2) + @test view(df, BigInt[1, 2], :) == first(df, 2) + @test view(df, Union{Int, Missing}[1, 2], :) == first(df, 2) + @test view(df, Union{Integer, Missing}[1, 2], :) == first(df, 2) + @test view(df, Union{UInt, Missing}[1, 2], :) == first(df, 2) + @test view(df, Union{BigInt, Missing}[1, 2], :) == first(df, 2) @test view(df, :) == df @test view(df, :, :) == df - @test view(df, 1, :) == head(df, 1) + @test view(df, 1, :) == first(df, 1) @test view(df, :, 1) == df[:, [1]] @test_throws ArgumentError view(df, [missing, 1]) @test_throws ArgumentError view(df, [missing, 1], :) From 4436646688552c305d43144712ac7b75a60af598 Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Sun, 25 Nov 2018 19:47:01 +0100 Subject: [PATCH 2/4] Remove exports --- src/DataFrames.jl | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/DataFrames.jl b/src/DataFrames.jl index b234e3479e..38966ec768 100644 --- a/src/DataFrames.jl +++ b/src/DataFrames.jl @@ -41,7 +41,6 @@ export AbstractDataFrame, eachrow, eltypes, groupby, - head, insertcols!, mapcols, melt, @@ -58,8 +57,6 @@ export AbstractDataFrame, stackdf, unique!, unstack, - head, - tail, permutecols! From cf55ae5eed2f9740d9a0a522f7229e2c55b0a0b7 Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Sun, 25 Nov 2018 19:47:15 +0100 Subject: [PATCH 3/4] Define first and last with single argument --- src/abstractdataframe/abstractdataframe.jl | 14 ++++++++++++++ test/dataframe.jl | 6 ++++++ 2 files changed, 20 insertions(+) diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl index cae2d5a4c1..537c6526db 100644 --- a/src/abstractdataframe/abstractdataframe.jl +++ b/src/abstractdataframe/abstractdataframe.jl @@ -288,6 +288,13 @@ Base.isempty(df::AbstractDataFrame) = size(df, 1) == 0 || size(df, 2) == 0 ## ############################################################################## +""" + first(df::AbstractDataFrame) + +Get the first row of `df` as a `DataFrameRow`. +""" +Base.first(df::AbstractDataFrame) = df[1, :] + """ first(df::AbstractDataFrame, n::Integer) @@ -295,6 +302,13 @@ Get a data frame with the `n` first rows of `df`. """ Base.first(df::AbstractDataFrame, n::Integer) = df[1:min(n,nrow(df)), :] +""" + last(df::AbstractDataFrame) + +Get the last row of `df` as a `DataFrameRow`. +""" +Base.last(df::AbstractDataFrame) = df[nrow(df), :] + """ last(df::AbstractDataFrame, n::Integer) diff --git a/test/dataframe.jl b/test/dataframe.jl index e685683fa5..b2fae4ac59 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -665,6 +665,12 @@ module TestDataFrame @testset "description" begin df = DataFrame(A = 1:10) + + @test first(df) == df[1, :] + @test last(df) == df[10, :] + @test_throws BoundsError first(DataFrame(x=[])) + @test_throws BoundsError last(DataFrame(x=[])) + @test first(df, 6) == DataFrame(A = 1:6) @test first(df, 1) == DataFrame(A = 1) @test last(df, 6) == DataFrame(A = 5:10) From 605f9efaac8ad7895c61ec0f61e567f0a7d66c2d Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Sun, 25 Nov 2018 21:02:21 +0100 Subject: [PATCH 4/4] Use end --- test/dataframe.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/dataframe.jl b/test/dataframe.jl index b2fae4ac59..ac46b8ad20 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -667,7 +667,7 @@ module TestDataFrame df = DataFrame(A = 1:10) @test first(df) == df[1, :] - @test last(df) == df[10, :] + @test last(df) == df[end, :] @test_throws BoundsError first(DataFrame(x=[])) @test_throws BoundsError last(DataFrame(x=[]))