Skip to content

Commit

Permalink
Merge pull request #806 from JuliaStats/convert
Browse files Browse the repository at this point in the history
Follow deprecation of DataArrays.array
  • Loading branch information
garborg committed May 22, 2015
2 parents 645bda3 + 323f709 commit 6289a16
Show file tree
Hide file tree
Showing 10 changed files with 90 additions and 30 deletions.
21 changes: 20 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,24 @@
# DataFrames v0.6.6 Release Notes

## Deprecations
* Deprecates `array(df, ...)` in favor of `convert(Array, df, ...)` ([#806])
* Deprecates `DataArray(df, T)` in favor of `convert(DataArray{T}, df)` ([#806])

# DataFrames v0.6.3 Release Notes

## Deprecations
* Removes `save` and `loaddf`, since the format was not compatible
across Julia and DataFrames versions ([#790]). Use `writetable` or
[JLD](https://github.com/timholy/HDF5.jl) to save DataFrames

# DataFrames v0.6.1 Release Notes

## New features
* `writetable` supports `append` option ([#755])

## Changes
* Faster `read_rda` ([#754], [#759])

# DataFrames v0.6.0 Release Notes

Focus on performance improvements and rooting out bugs in corner cases.
Expand Down Expand Up @@ -218,7 +233,11 @@ Improved I/O and more-Julian idioms.
[#749]: https://github.com/JuliaStats/DataFrames.jl/issues/749
[#751]: https://github.com/JuliaStats/DataFrames.jl/issues/751
[#752]: https://github.com/JuliaStats/DataFrames.jl/issues/752
[#754]: https://github.com/JuliaStats/DataFrames.jl/issues/754
[#755]: https://github.com/JuliaStats/DataFrames.jl/issues/755
[#759]: https://github.com/JuliaStats/DataFrames.jl/issues/759
[#790]: https://github.com/JuliaStats/DataFrames.jl/issues/790
[#806]: https://github.com/JuliaStats/DataFrames.jl/issues/806

[JuliaLang/julia#4882]: https://github.com/JuliaLang/julia/issues/4882
[JuliaLang/julia#5897]: https://github.com/JuliaLang/julia/issues/5897
[JuliaLang/julia#5897]: https://github.com/JuliaLang/julia/issues/5897
4 changes: 2 additions & 2 deletions REQUIRE
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
julia 0.3.4-
DataArrays
DataArrays 0.2.15
StatsBase 0.3.9+
GZip
SortingAlgorithms
Reexport
Compat
Docile
Docile
4 changes: 2 additions & 2 deletions doc/source/getting_started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,11 @@ no ``NA`` values::
convert(Array, dv)

In addition to removing ``NA`` values and hoping they won't occur, you can
also replace any ``NA`` values using the ``array`` function, which takes a
also replace any ``NA`` values using the ``convert`` function, which takes a
replacement value as an argument::

dv = @data([NA, 3, 2, 5, 4])
mean(array(dv, 11))
mean(convert(Array, dv, 11))

Which strategy for dealing with ``NA`` values is most appropriate will
typically depend on the specific details of your data analysis pathway.
Expand Down
4 changes: 2 additions & 2 deletions docs/getting_started.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,11 @@ dv[1] = 3
convert(Array, dv)
```

In addition to removing `NA` values and hoping they won't occur, you can also replace any `NA` values using the `array` function, which takes a replacement value as an argument:
In addition to removing `NA` values and hoping they won't occur, you can also replace any `NA` values using the `convert` function, which takes a replacement value as an argument:

```julia
dv = @data([NA, 3, 2, 5, 4])
mean(array(dv, 11))
mean(convert(Array, dv, 11))
```

Which strategy for dealing with `NA` values is most appropriate will typically depend on the specific details of your data analysis pathway.
Expand Down
33 changes: 26 additions & 7 deletions src/abstractdataframe/abstractdataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ The following are normally implemented for AbstractDataFrames:
* `size(d)` : (nrows, ncols)
* `head(d, n = 5)` : first `n` rows
* `tail(d, n = 5)` : last `n` rows
* `array(d)` : convert to an array
* `convert(Array, d)` : convert to an array
* `DataArray(d)` : convert to a DataArray
* `complete_cases(d)` : indexes of complete cases (rows with no NA's)
* `complete_cases!(d)` : remove rows with NA's
Expand Down Expand Up @@ -503,9 +503,18 @@ complete_cases!(df)
"""
complete_cases!(df::AbstractDataFrame) = deleterows!(df, find(!complete_cases(df)))

function DataArrays.array(df::AbstractDataFrame)
n, p = size(df)
function Base.convert(::Type{Array}, df::AbstractDataFrame)
convert(Matrix, df)
end
function Base.convert(::Type{Matrix}, df::AbstractDataFrame)
T = reduce(typejoin, eltypes(df))
convert(Matrix{T}, df)
end
function Base.convert{T}(::Type{Array{T}}, df::AbstractDataFrame)
convert(Matrix{T}, df)
end
function Base.convert{T}(::Type{Matrix{T}}, df::AbstractDataFrame)
n, p = size(df)
res = Array(T, n, p)
idx = 1
for col in columns(df)
Expand All @@ -516,8 +525,17 @@ function DataArrays.array(df::AbstractDataFrame)
return res
end

function DataArrays.DataArray(df::AbstractDataFrame,
T::DataType = reduce(typejoin, eltypes(df)))
function Base.convert(::Type{DataArray}, df::AbstractDataFrame)
convert(DataMatrix, df)
end
function Base.convert(::Type{DataMatrix}, df::AbstractDataFrame)
T = reduce(typejoin, eltypes(df))
convert(DataMatrix{T}, df)
end
function Base.convert{T}(::Type{DataArray{T}}, df::AbstractDataFrame)
convert(DataMatrix{T}, df)
end
function Base.convert{T}(::Type{DataMatrix{T}}, df::AbstractDataFrame)
n, p = size(df)
res = DataArray(T, n, p)
idx = 1
Expand Down Expand Up @@ -561,10 +579,11 @@ function nonunique(df::AbstractDataFrame)
res = fill(false, nrow(df))
di = Dict()
for i in 1:nrow(df)
if haskey(di, array(df[i, :])) # Used to convert to Any type
arow = convert(Array, df[i, :]) # Used to convert to Any type
if haskey(di, arow)
res[i] = true
else
di[array(df[i, :])] = 1 # Used to convert to Any type
di[arow] = 1
end
end
res
Expand Down
2 changes: 1 addition & 1 deletion src/dataframerow/dataframerow.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,4 @@ Base.next(r::DataFrameRow, s) = ((_names(r)[s], r[s]), s + 1)

Base.done(r::DataFrameRow, s) = s > length(r)

DataArrays.array(r::DataFrameRow) = DataArrays.array(r.df[r.row,:])
Base.convert(::Type{Array}, r::DataFrameRow) = convert(Array, r.df[r.row,:])
8 changes: 8 additions & 0 deletions src/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,11 @@ import Base: keys, values, insert!
Base.@deprecate keys(df::AbstractDataFrame) names(df)
Base.@deprecate values(df::AbstractDataFrame) DataFrames.columns(df)
Base.@deprecate insert!(df::DataFrame, df2::AbstractDataFrame) merge!(df, df2)

import DataArrays: array, DataArray
Base.@deprecate array(df::AbstractDataFrame) convert(Array, df)
Base.@deprecate array(r::DataFrameRow) convert(Array, r)
if VERSION < v"0.4.0-"
Base.@deprecate DataArray(df::AbstractDataFrame) convert(DataArray, df)
end
Base.@deprecate DataArray(df::AbstractDataFrame, T::DataType) convert(DataArray{T}, df)
2 changes: 1 addition & 1 deletion src/other/index.jl
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ end

Base.getindex(x::Index, idx::Symbol) = x.lookup[idx]
Base.getindex(x::AbstractIndex, idx::Real) = @compat Int(idx)
Base.getindex(x::AbstractIndex, idx::AbstractDataVector{Bool}) = getindex(x, array(idx, false))
Base.getindex(x::AbstractIndex, idx::AbstractDataVector{Bool}) = getindex(x, convert(Array, idx, false))
Base.getindex{T}(x::AbstractIndex, idx::AbstractDataVector{T}) = getindex(x, dropna(idx))
Base.getindex(x::AbstractIndex, idx::AbstractVector{Bool}) = find(idx)
Base.getindex(x::AbstractIndex, idx::Range) = [idx;]
Expand Down
40 changes: 27 additions & 13 deletions test/conversions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,43 @@ module TestConversions
df = DataFrame()
df[:A] = 1:5
df[:B] = [:A, :B, :C, :D, :E]
@test isa(array(df), Matrix{Any})
@test array(df) == array(DataArray(df))
# @test isa(array(df, Any), Matrix{Any})
@test isa(convert(Array, df), Matrix{Any})
@test convert(Array, df) == convert(Array, convert(DataArray, df))
@test isa(convert(Array{Any}, df), Matrix{Any})

df = DataFrame()
df[:A] = 1:5
df[:B] = 1.0:5.0
@test isa(array(df), Matrix{Real})
@test array(df) == array(DataArray(df))
# @test isa(array(df, Any), Matrix{Any})
# @test isa(array(df, Float64), Matrix{Float64})
@test isa(convert(Array, df), Matrix{Real})
@test convert(Array, df) == convert(Array, convert(DataArray, df))
@test isa(convert(Array{Any}, df), Matrix{Any})
@test isa(convert(Array{Float64}, df), Matrix{Float64})

df = DataFrame()
df[:A] = 1.0:5.0
df[:B] = 1.0:5.0
@test isa(array(df), Matrix{Float64})
@test array(df) == array(DataArray(df))
# @test isa(matrix(df, Any), Matrix{Any})
# @test isa(matrix(df, Int), Matrix{Int})
a = convert(Array, df)
aa = convert(Array{Any}, df)
ai = convert(Array{Int}, df)
@test isa(a, Matrix{Float64})
@test a == convert(Array, convert(DataArray, df))
@test a == convert(Matrix, df)
@test isa(aa, Matrix{Any})
@test aa == convert(Matrix{Any}, df)
@test isa(ai, Matrix{Int})
@test ai == convert(Matrix{Int}, df)

df[1,1] = NA
@test_throws ErrorException array(df)
@test isa(DataArray(df), DataMatrix{Float64})
@test_throws ErrorException convert(Array, df)
da = convert(DataArray, df)
daa = convert(DataArray{Any}, df)
dai = convert(DataArray{Int}, df)
@test isa(da, DataMatrix{Float64})
@test isequal(da, convert(DataMatrix, df))
@test isa(daa, DataMatrix{Any})
@test isequal(daa, convert(DataMatrix{Any}, df))
@test isa(dai, DataMatrix{Int})
@test isequal(dai, convert(DataMatrix{Int}, df))

a = [1.0,2.0]
b = [-0.1,3]
Expand Down
2 changes: 1 addition & 1 deletion test/iteration.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ module TestIteration
@test isa(col, @compat Tuple{Symbol, AbstractDataVector})
end

@test isequal(map(x -> minimum(array(x)), eachrow(df)), Any[1,2])
@test isequal(map(x -> minimum(convert(Array, x)), eachrow(df)), Any[1,2])
@test isequal(map(minimum, eachcol(df)), DataFrame(A = [1], B = [2]))

row = DataFrameRow(df, 1)
Expand Down

0 comments on commit 6289a16

Please sign in to comment.