Skip to content

Commit

Permalink
Add filter() and filter!() methods (#1330)
Browse files Browse the repository at this point in the history
Currently these cannot be made efficient as the column types are not encoded in DataFrameRow,
but this can be improved in the future with NamedTuples.
  • Loading branch information
nalimilan authored and ararslan committed Dec 26, 2017
1 parent f24817d commit 630660e
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 0 deletions.
2 changes: 2 additions & 0 deletions docs/src/lib/functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ dropmissing!
eachcol
eachrow
eltypes
filter
filter!
head
names
names!
Expand Down
23 changes: 23 additions & 0 deletions docs/src/man/subsets.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,26 @@ julia> df[1:3, [:B, :A]]
│ 3 │ 6 │ 3 │
```

Selecting a subset of rows matching a condition:

```jldoctest subsets
julia> filter(row -> row[:A] > 5, df)
5×2 DataFrames.DataFrame
│ Row │ A │ B │
├─────┼────┼────┤
│ 1 │ 6 │ 12 │
│ 2 │ 7 │ 14 │
│ 3 │ 8 │ 16 │
│ 4 │ 9 │ 18 │
│ 5 │ 10 │ 20 │
julia> filter(row -> (row[:A] > 5) && (row[:B] < 15), df)
2×2 DataFrames.DataFrame
│ Row │ A │ B │
├─────┼───┼────┤
│ 1 │ 6 │ 12 │
│ 2 │ 7 │ 14 │
```

`filter!` behaves the same way, but modifies the data frame in place.
57 changes: 57 additions & 0 deletions src/abstractdataframe/abstractdataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,63 @@ dropmissing!(df)
"""
dropmissing!(df::AbstractDataFrame) = deleterows!(df, find(!, completecases(df)))

"""
filter(function, df::AbstractDataFrame)
Return a copy of data frame `df` containing only rows for which `function`
returns `true`. The function is passed a `DataFrameRow` as its only argument.
# Examples
```
julia> df = DataFrame(x = [3, 1, 2, 1], y = ["b", "c", "a", "b"])
4×2 DataFrames.DataFrame
│ Row │ x │ y │
├─────┼───┼───┤
│ 1 │ 3 │ b │
│ 2 │ 1 │ c │
│ 3 │ 2 │ a │
│ 4 │ 1 │ b │
julia> filter(row -> row[:x] > 1, df)
2×2 DataFrames.DataFrame
│ Row │ x │ y │
├─────┼───┼───┤
│ 1 │ 3 │ b │
│ 2 │ 2 │ a │
```
"""
Base.filter(f, df::AbstractDataFrame) = df[collect(f(r)::Bool for r in eachrow(df)), :]

"""
filter!(function, df::AbstractDataFrame)
Remove rows from data frame `df` for which `function` returns `false`.
The function is passed a `DataFrameRow` as its only argument.
# Examples
```
julia> df = DataFrame(x = [3, 1, 2, 1], y = ["b", "c", "a", "b"])
4×2 DataFrames.DataFrame
│ Row │ x │ y │
├─────┼───┼───┤
│ 1 │ 3 │ b │
│ 2 │ 1 │ c │
│ 3 │ 2 │ a │
│ 4 │ 1 │ b │
julia> filter!(row -> row[:x] > 1, df);
julia> df
2×2 DataFrames.DataFrame
│ Row │ x │ y │
├─────┼───┼───┤
│ 1 │ 3 │ b │
│ 2 │ 2 │ a │
```
"""
Base.filter!(f, df::AbstractDataFrame) =
deleterows!(df, find(!f, eachrow(df)))

function Base.convert(::Type{Array}, df::AbstractDataFrame)
convert(Matrix, df)
end
Expand Down
8 changes: 8 additions & 0 deletions test/data.jl
Original file line number Diff line number Diff line change
Expand Up @@ -317,4 +317,12 @@ module TestData
#test unique!() with extra argument
unique!(df, [1, 3])
@test df == df1

#test filter() and filter!()
df = DataFrame(x = [3, 1, 2, 1], y = ["b", "c", "a", "b"])
@test filter(r -> r[:x] > 1, df) == DataFrame(x = [3, 2], y = ["b", "a"])
@test filter!(r -> r[:x] > 1, df) === df == DataFrame(x = [3, 2], y = ["b", "a"])
df = DataFrame(x = [3, 1, 2, 1, missing], y = ["b", "c", "a", "b", "c"])
@test_throws TypeError filter(r -> r[:x] > 1, df)
@test_throws TypeError filter!(r -> r[:x] > 1, df)
end

0 comments on commit 630660e

Please sign in to comment.