Merge pull request #806 from JuliaStats/convert

Follow deprecation of DataArrays.array
JuliaData · May 22, 2015 · 6289a16 · 6289a16
2 parents 645bda3 + 323f709
commit 6289a16
Show file tree

Hide file tree

Showing 10 changed files with 90 additions and 30 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -1,9 +1,24 @@
+# DataFrames v0.6.6 Release Notes
+
+## Deprecations
+* Deprecates `array(df, ...)` in favor of `convert(Array, df, ...)` ([#806])
+* Deprecates `DataArray(df, T)` in favor of `convert(DataArray{T}, df)` ([#806])
+
 # DataFrames v0.6.3 Release Notes
 
+## Deprecations
 * Removes `save` and `loaddf`, since the format was not compatible
   across Julia and DataFrames versions ([#790]). Use `writetable` or
   [JLD](https://github.com/timholy/HDF5.jl) to save DataFrames
 
+# DataFrames v0.6.1 Release Notes
+
+## New features
+* `writetable` supports `append` option ([#755])
+
+## Changes
+* Faster `read_rda` ([#754], [#759])
+
 # DataFrames v0.6.0 Release Notes
 
 Focus on performance improvements and rooting out bugs in corner cases.
@@ -218,7 +233,11 @@ Improved I/O and more-Julian idioms.
 [#749]: https://github.com/JuliaStats/DataFrames.jl/issues/749
 [#751]: https://github.com/JuliaStats/DataFrames.jl/issues/751
 [#752]: https://github.com/JuliaStats/DataFrames.jl/issues/752
+[#754]: https://github.com/JuliaStats/DataFrames.jl/issues/754
+[#755]: https://github.com/JuliaStats/DataFrames.jl/issues/755
+[#759]: https://github.com/JuliaStats/DataFrames.jl/issues/759
 [#790]: https://github.com/JuliaStats/DataFrames.jl/issues/790
+[#806]: https://github.com/JuliaStats/DataFrames.jl/issues/806
 
 [JuliaLang/julia#4882]: https://github.com/JuliaLang/julia/issues/4882
-[JuliaLang/julia#5897]: https://github.com/JuliaLang/julia/issues/5897
+[JuliaLang/julia#5897]: https://github.com/JuliaLang/julia/issues/5897
diff --git a/REQUIRE b/REQUIRE
@@ -1,8 +1,8 @@
 julia 0.3.4-
-DataArrays
+DataArrays 0.2.15
 StatsBase 0.3.9+
 GZip
 SortingAlgorithms
 Reexport
 Compat
-Docile
+Docile
diff --git a/doc/source/getting_started.rst b/doc/source/getting_started.rst
@@ -54,11 +54,11 @@ no ``NA`` values::
     convert(Array, dv)
 
 In addition to removing ``NA`` values and hoping they won't occur, you can
-also replace any ``NA`` values using the ``array`` function, which takes a
+also replace any ``NA`` values using the ``convert`` function, which takes a
 replacement value as an argument::
 
     dv = @data([NA, 3, 2, 5, 4])
-    mean(array(dv, 11))
+    mean(convert(Array, dv, 11))
 
 Which strategy for dealing with ``NA`` values is most appropriate will
 typically depend on the specific details of your data analysis pathway.

diff --git a/docs/getting_started.md b/docs/getting_started.md
@@ -52,11 +52,11 @@ dv[1] = 3
 convert(Array, dv)
 ```
 
-In addition to removing `NA` values and hoping they won't occur, you can also replace any `NA` values using the `array` function, which takes a replacement value as an argument:
+In addition to removing `NA` values and hoping they won't occur, you can also replace any `NA` values using the `convert` function, which takes a replacement value as an argument:
 
 ```julia
 dv = @data([NA, 3, 2, 5, 4])
-mean(array(dv, 11))
+mean(convert(Array, dv, 11))
 ```
 
 Which strategy for dealing with `NA` values is most appropriate will typically depend on the specific details of your data analysis pathway.

diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl
@@ -28,7 +28,7 @@ The following are normally implemented for AbstractDataFrames:
 * `size(d)` : (nrows, ncols)
 * `head(d, n = 5)` : first `n` rows
 * `tail(d, n = 5)` : last `n` rows
-* `array(d)` : convert to an array
+* `convert(Array, d)` : convert to an array
 * `DataArray(d)` : convert to a DataArray
 * `complete_cases(d)` : indexes of complete cases (rows with no NA's)
 * `complete_cases!(d)` : remove rows with NA's
@@ -503,9 +503,18 @@ complete_cases!(df)
 """
 complete_cases!(df::AbstractDataFrame) = deleterows!(df, find(!complete_cases(df)))
 
-function DataArrays.array(df::AbstractDataFrame)
-    n, p = size(df)
+function Base.convert(::Type{Array}, df::AbstractDataFrame)
+    convert(Matrix, df)
+end
+function Base.convert(::Type{Matrix}, df::AbstractDataFrame)
     T = reduce(typejoin, eltypes(df))
+    convert(Matrix{T}, df)
+end
+function Base.convert{T}(::Type{Array{T}}, df::AbstractDataFrame)
+    convert(Matrix{T}, df)
+end
+function Base.convert{T}(::Type{Matrix{T}}, df::AbstractDataFrame)
+    n, p = size(df)
     res = Array(T, n, p)
     idx = 1
     for col in columns(df)
@@ -516,8 +525,17 @@ function DataArrays.array(df::AbstractDataFrame)
     return res
 end
 
-function DataArrays.DataArray(df::AbstractDataFrame,
-                              T::DataType = reduce(typejoin, eltypes(df)))
+function Base.convert(::Type{DataArray}, df::AbstractDataFrame)
+    convert(DataMatrix, df)
+end
+function Base.convert(::Type{DataMatrix}, df::AbstractDataFrame)
+    T = reduce(typejoin, eltypes(df))
+    convert(DataMatrix{T}, df)
+end
+function Base.convert{T}(::Type{DataArray{T}}, df::AbstractDataFrame)
+    convert(DataMatrix{T}, df)
+end
+function Base.convert{T}(::Type{DataMatrix{T}}, df::AbstractDataFrame)
     n, p = size(df)
     res = DataArray(T, n, p)
     idx = 1
@@ -561,10 +579,11 @@ function nonunique(df::AbstractDataFrame)
     res = fill(false, nrow(df))
     di = Dict()
     for i in 1:nrow(df)
-        if haskey(di, array(df[i, :])) # Used to convert to Any type
+        arow = convert(Array, df[i, :]) # Used to convert to Any type
+        if haskey(di, arow)
             res[i] = true
         else
-            di[array(df[i, :])] = 1 # Used to convert to Any type
+            di[arow] = 1
         end
     end
     res

diff --git a/src/dataframerow/dataframerow.jl b/src/dataframerow/dataframerow.jl
@@ -35,4 +35,4 @@ Base.next(r::DataFrameRow, s) = ((_names(r)[s], r[s]), s + 1)
 
 Base.done(r::DataFrameRow, s) = s > length(r)
 
-DataArrays.array(r::DataFrameRow) = DataArrays.array(r.df[r.row,:])
+Base.convert(::Type{Array}, r::DataFrameRow) = convert(Array, r.df[r.row,:])
diff --git a/src/deprecated.jl b/src/deprecated.jl
@@ -8,3 +8,11 @@ import Base: keys, values, insert!
 Base.@deprecate keys(df::AbstractDataFrame) names(df)
 Base.@deprecate values(df::AbstractDataFrame) DataFrames.columns(df)
 Base.@deprecate insert!(df::DataFrame, df2::AbstractDataFrame) merge!(df, df2)
+
+import DataArrays: array, DataArray
+Base.@deprecate array(df::AbstractDataFrame) convert(Array, df)
+Base.@deprecate array(r::DataFrameRow) convert(Array, r)
+if VERSION < v"0.4.0-"
+    Base.@deprecate DataArray(df::AbstractDataFrame) convert(DataArray, df)
+end
+Base.@deprecate DataArray(df::AbstractDataFrame, T::DataType) convert(DataArray{T}, df)
diff --git a/src/other/index.jl b/src/other/index.jl
@@ -124,7 +124,7 @@ end
 
 Base.getindex(x::Index, idx::Symbol) = x.lookup[idx]
 Base.getindex(x::AbstractIndex, idx::Real) = @compat Int(idx)
-Base.getindex(x::AbstractIndex, idx::AbstractDataVector{Bool}) = getindex(x, array(idx, false))
+Base.getindex(x::AbstractIndex, idx::AbstractDataVector{Bool}) = getindex(x, convert(Array, idx, false))
 Base.getindex{T}(x::AbstractIndex, idx::AbstractDataVector{T}) = getindex(x, dropna(idx))
 Base.getindex(x::AbstractIndex, idx::AbstractVector{Bool}) = find(idx)
 Base.getindex(x::AbstractIndex, idx::Range) = [idx;]

diff --git a/test/conversions.jl b/test/conversions.jl
@@ -5,29 +5,43 @@ module TestConversions
     df = DataFrame()
     df[:A] = 1:5
     df[:B] = [:A, :B, :C, :D, :E]
-    @test isa(array(df), Matrix{Any})
-    @test array(df) == array(DataArray(df))
-    # @test isa(array(df, Any), Matrix{Any})
+    @test isa(convert(Array, df), Matrix{Any})
+    @test convert(Array, df) == convert(Array, convert(DataArray, df))
+    @test isa(convert(Array{Any}, df), Matrix{Any})
 
     df = DataFrame()
     df[:A] = 1:5
     df[:B] = 1.0:5.0
-    @test isa(array(df), Matrix{Real})
-    @test array(df) == array(DataArray(df))
-    # @test isa(array(df, Any), Matrix{Any})
-    # @test isa(array(df, Float64), Matrix{Float64})
+    @test isa(convert(Array, df), Matrix{Real})
+    @test convert(Array, df) == convert(Array, convert(DataArray, df))
+    @test isa(convert(Array{Any}, df), Matrix{Any})
+    @test isa(convert(Array{Float64}, df), Matrix{Float64})
 
     df = DataFrame()
     df[:A] = 1.0:5.0
     df[:B] = 1.0:5.0
-    @test isa(array(df), Matrix{Float64})
-    @test array(df) == array(DataArray(df))
-    # @test isa(matrix(df, Any), Matrix{Any})
-    # @test isa(matrix(df, Int), Matrix{Int})
+    a = convert(Array, df)
+    aa = convert(Array{Any}, df)
+    ai = convert(Array{Int}, df)
+    @test isa(a, Matrix{Float64})
+    @test a == convert(Array, convert(DataArray, df))
+    @test a == convert(Matrix, df)
+    @test isa(aa, Matrix{Any})
+    @test aa == convert(Matrix{Any}, df)
+    @test isa(ai, Matrix{Int})
+    @test ai == convert(Matrix{Int}, df)
 
     df[1,1] = NA
-    @test_throws ErrorException array(df)
-    @test isa(DataArray(df), DataMatrix{Float64})
+    @test_throws ErrorException convert(Array, df)
+    da = convert(DataArray, df)
+    daa = convert(DataArray{Any}, df)
+    dai = convert(DataArray{Int}, df)
+    @test isa(da, DataMatrix{Float64})
+    @test isequal(da, convert(DataMatrix, df))
+    @test isa(daa, DataMatrix{Any})
+    @test isequal(daa, convert(DataMatrix{Any}, df))
+    @test isa(dai, DataMatrix{Int})
+    @test isequal(dai, convert(DataMatrix{Int}, df))
 
     a = [1.0,2.0]
     b = [-0.1,3]

diff --git a/test/iteration.jl b/test/iteration.jl
@@ -31,7 +31,7 @@ module TestIteration
         @test isa(col, @compat Tuple{Symbol, AbstractDataVector})
     end
 
-    @test isequal(map(x -> minimum(array(x)), eachrow(df)), Any[1,2])
+    @test isequal(map(x -> minimum(convert(Array, x)), eachrow(df)), Any[1,2])
     @test isequal(map(minimum, eachcol(df)), DataFrame(A = [1], B = [2]))
 
     row = DataFrameRow(df, 1)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -35,4 +35,4 @@ Base.next(r::DataFrameRow, s) = ((_names(r)[s], r[s]), s + 1)

		Base.done(r::DataFrameRow, s) = s > length(r)

		DataArrays.array(r::DataFrameRow) = DataArrays.array(r.df[r.row,:])
		Base.convert(::Type{Array}, r::DataFrameRow) = convert(Array, r.df[r.row,:])