From 7712c2bfea7e6ba140e66896dd83ac6e15efe785 Mon Sep 17 00:00:00 2001 From: Sean Garborg Date: Thu, 12 Feb 2015 18:54:52 -0700 Subject: [PATCH 1/2] Track Julia 0.4 Array concatenation syntax --- src/abstractdataframe/join.jl | 8 ++++---- src/abstractdataframe/reshape.jl | 23 +++++++++++------------ src/abstractdataframe/sort.jl | 2 +- src/dataframe/dataframe.jl | 16 ++++++++-------- src/groupeddataframe/grouping.jl | 2 +- src/other/index.jl | 4 ++-- test/cat.jl | 2 +- test/data.jl | 18 +++++++++--------- test/dataframe.jl | 2 +- test/formula.jl | 20 ++++++++++---------- test/sort.jl | 2 +- test/utils.jl | 2 +- 12 files changed, 50 insertions(+), 51 deletions(-) diff --git a/src/abstractdataframe/join.jl b/src/abstractdataframe/join.jl index 04e588f7c3..71f26f5d99 100644 --- a/src/abstractdataframe/join.jl +++ b/src/abstractdataframe/join.jl @@ -92,7 +92,7 @@ function DataArrays.PooledDataVecs(df1::AbstractDataFrame, end ngroups = ngroups * (length(dv1.pool) + 1) end - pool = [1:ngroups] + pool = [1:ngroups;] (PooledDataArray(DataArrays.RefArray(refs1), pool), PooledDataArray(DataArrays.RefArray(refs2), pool)) end @@ -153,7 +153,7 @@ function Base.join(df1::AbstractDataFrame, elseif kind == :left df2w = without(df2, on) - left = df1[[left_idx, leftonly_idx], :] + left = df1[[left_idx; leftonly_idx], :] right = vcat(df2w[right_idx, :], nas(df2w, length(leftonly_idx))) @@ -163,7 +163,7 @@ function Base.join(df1::AbstractDataFrame, left = vcat(df1w[left_idx, :], nas(df1w, length(rightonly_idx))) - right = df2[[right_idx, rightonly_idx], :] + right = df2[[right_idx; rightonly_idx], :] return hcat!(left, right) elseif kind == :outer @@ -187,7 +187,7 @@ end function crossjoin(df1::AbstractDataFrame, df2::AbstractDataFrame) r1, r2 = size(df1, 1), size(df2, 1) - cols = [[rep(c, 1, r2) for c in columns(df1)], + cols = [[rep(c, 1, r2) for c in columns(df1)]; [rep(c, r1, 1) for c in columns(df2)]] colindex = merge(index(df1), index(df2)) DataFrame(cols, colindex) diff --git a/src/abstractdataframe/reshape.jl b/src/abstractdataframe/reshape.jl index 71b55197f3..34e7c613bc 100644 --- a/src/abstractdataframe/reshape.jl +++ b/src/abstractdataframe/reshape.jl @@ -40,7 +40,7 @@ function stack(df::AbstractDataFrame, measure_vars) stack(df, mv_inds, _setdiff(1:ncol(df), mv_inds)) end function stack(df::AbstractDataFrame) - idx = [1:length(df)][[t <: FloatingPoint for t in eltypes(df)]] + idx = [1:length(df);][[t <: FloatingPoint for t in eltypes(df)]] stack(df, idx) end @@ -69,7 +69,7 @@ function unstack(df::AbstractDataFrame, rowkey::Int, colkey::Int, value::Int) Nrow = length(refkeycol.pool) Ncol = length(keycol.pool) # TODO make fillNA(type, length) - payload = DataFrame(Any[DataArray([fill(valuecol[1], Nrow)], fill(true, Nrow)) for i in 1:Ncol], map(symbol, keycol.pool)) + payload = DataFrame(Any[DataArray(fill(valuecol[1], Nrow), fill(true, Nrow)) for i in 1:Ncol], map(symbol, keycol.pool)) nowarning = true for k in 1:nrow(df) j = int(keycol.refs[k]) @@ -97,7 +97,7 @@ function unstack(df::AbstractDataFrame, colkey::Int, value::Int) # find the indexes for each group: groupidxs = [g.idx[g.starts[i]:g.ends[i]] for i in 1:length(g.starts)] # this will be a new column to key the rows: - rowkey = PooledDataArray(zeros(Int, size(df, 1)), [1:length(groupidxs)]) + rowkey = PooledDataArray(zeros(Int, size(df, 1)), [1:length(groupidxs);]) for i in 1:length(groupidxs) rowkey[groupidxs[i]] = i end @@ -110,7 +110,7 @@ function unstack(df::AbstractDataFrame, colkey::Int, value::Int) # group on anything not a key or value: g = groupby(df, setdiff(_names(df), _names(df)[[colkey, value]])) groupidxs = [g.idx[g.starts[i]:g.ends[i]] for i in 1:length(g.starts)] - rowkey = PooledDataArray(zeros(Int, size(df, 1)), [1:length(groupidxs)]) + rowkey = PooledDataArray(zeros(Int, size(df, 1)), [1:length(groupidxs);]) for i in 1:length(groupidxs) rowkey[groupidxs[i]] = i end @@ -120,7 +120,7 @@ function unstack(df::AbstractDataFrame, colkey::Int, value::Int) keys = unique(keycol) Nrow = length(g) Ncol = length(keycol.pool) - df2 = DataFrame(Any[DataArray([fill(valuecol[1], Nrow)], fill(true, Nrow)) for i in 1:Ncol], map(symbol, keycol.pool)) + df2 = DataFrame(Any[DataArray(fill(valuecol[1], Nrow), fill(true, Nrow)) for i in 1:Ncol], map(symbol, keycol.pool)) nowarning = true for k in 1:nrow(df) j = int(keycol.refs[k]) @@ -168,7 +168,7 @@ end function Base.getindex(v::StackedVector,i::Real) lengths = [length(x)::Int for x in v.components] - cumlengths = [0, cumsum(lengths)] + cumlengths = [0; cumsum(lengths)] j = searchsortedlast(cumlengths .+ 1, i) if j > length(cumlengths) error("indexing bounds error") @@ -187,7 +187,6 @@ function Base.getindex{I<:Real}(v::StackedVector,i::AbstractVector{I}) end result end -Base.getindex(v::StackedVector,i::Union(Ranges, Vector{Bool}, BitVector)) = getindex(v, [i]) Base.size(v::StackedVector) = (length(v),) Base.length(v::StackedVector) = sum(map(length, v.components)) @@ -198,14 +197,14 @@ Base.similar(v::StackedVector, T, dims::Dims) = similar(v.components[1], T, dims DataArrays.PooledDataArray(v::StackedVector) = PooledDataArray(v[:]) # could be more efficient function Base.getindex{T,I<:Real}(v::RepeatedVector{T},i::AbstractVector{I}) - j = mod(i .- 1, length(v.parent)) .+ 1 + j = mod(i - 1, length(v.parent)) + 1 v.parent[j] end function Base.getindex{T}(v::RepeatedVector{T},i::Real) j = mod(i - 1, length(v.parent)) + 1 v.parent[j] end -Base.getindex(v::RepeatedVector,i::Union(Ranges, Vector{Bool}, BitVector)) = getindex(v, [i]) +Base.getindex(v::RepeatedVector,i::Ranges) = getindex(v, [i;]) Base.size(v::RepeatedVector) = (length(v),) Base.length(v::RepeatedVector) = v.n * length(v.parent) @@ -226,10 +225,10 @@ function Base.getindex{T}(v::EachRepeatedVector{T},i::Real) v.parent[j] end function Base.getindex{T,I<:Real}(v::EachRepeatedVector{T},i::AbstractVector{I}) - j = div(i .- 1, v.n) .+ 1 + j = div(i - 1, v.n) + 1 v.parent[j] end -Base.getindex(v::EachRepeatedVector,i::Union(Ranges, Vector{Bool}, BitVector)) = getindex(v, [i]) +Base.getindex(v::EachRepeatedVector,i::Ranges) = getindex(v, [i;]) Base.size(v::EachRepeatedVector) = (length(v),) Base.length(v::EachRepeatedVector) = v.n * length(v.parent) @@ -282,7 +281,7 @@ function stackdf(df::AbstractDataFrame, measure_vars) stackdf(df, m_inds, _setdiff(1:ncol(df), m_inds)) end function stackdf(df::AbstractDataFrame) - idx = [1:length(df)][[t <: FloatingPoint for t in eltypes(df)]] + idx = [1:length(df);][[t <: FloatingPoint for t in eltypes(df)]] stackdf(df, idx) end diff --git a/src/abstractdataframe/sort.jl b/src/abstractdataframe/sort.jl index 4c61283b4f..80bd46e087 100644 --- a/src/abstractdataframe/sort.jl +++ b/src/abstractdataframe/sort.jl @@ -303,7 +303,7 @@ for s in [:(Base.sort), :(Base.sortperm)] end Base.sort (df::AbstractDataFrame, a::Algorithm, o::Ordering) = df[sortperm(df, a, o),:] -Base.sortperm(df::AbstractDataFrame, a::Algorithm, o::Union(Perm,DFPerm)) = sort!([1:size(df, 1)], a, o) +Base.sortperm(df::AbstractDataFrame, a::Algorithm, o::Union(Perm,DFPerm)) = sort!([1:size(df, 1);], a, o) Base.sortperm(df::AbstractDataFrame, a::Algorithm, o::Ordering) = sortperm(df, a, DFPerm(o,df)) # Extras to speed up sorting diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index 6318f1fd63..f199107db9 100644 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -304,7 +304,7 @@ function insert_multiple_entries!{T <: Real}(df::DataFrame, end upgrade_vector(v::Vector) = DataArray(v, falses(length(v))) -upgrade_vector(v::Ranges) = DataArray([v], falses(length(v))) +upgrade_vector(v::Ranges) = DataArray([v;], falses(length(v))) upgrade_vector(v::BitVector) = DataArray(convert(Array{Bool}, v), falses(length(v))) upgrade_vector(adv::AbstractDataArray) = adv function upgrade_scalar(df::DataFrame, v::AbstractArray) @@ -734,18 +734,18 @@ function Base.convert(::Type{DataFrame}, d::Dict) dnames = collect(keys(d)) sort!(dnames) p = length(dnames) + p == 0 && return DataFrame() columns = Array(Any, p) - colnames = Array(Symbol,p) - if p == 0 - return DataFrame() - end + colnames = Array(Symbol, p) n = length(d[dnames[1]]) for j in 1:p - if length(d[dnames[j]]) != n + name = dnames[j] + col = d[name] + if length(col) != n throw(ArgumentError("All columns in Dict must have the same length")) end - columns[j] = DataArray([d[dnames[j]]]) - colnames[j] = symbol(dnames[j]) + columns[j] = DataArray(col) + colnames[j] = symbol(name) end return DataFrame(columns, Index(colnames)) end diff --git a/src/groupeddataframe/grouping.jl b/src/groupeddataframe/grouping.jl index 7048ab6af3..8f84f39d14 100644 --- a/src/groupeddataframe/grouping.jl +++ b/src/groupeddataframe/grouping.jl @@ -44,7 +44,7 @@ function groupby{T}(d::AbstractDataFrame, cols::Vector{T}) (idx, starts) = DataArrays.groupsort_indexer(x, ngroups) # Remove zero-length groupings starts = _uniqueofsorted(starts) - ends = [starts[2:end] .- 1] + ends = starts[2:end] - 1 GroupedDataFrame(d, cols, idx, starts[1:end-1], ends) end groupby(d::AbstractDataFrame, cols) = groupby(d, [cols]) diff --git a/src/other/index.jl b/src/other/index.jl index beda5cbb09..159be765bf 100644 --- a/src/other/index.jl +++ b/src/other/index.jl @@ -127,9 +127,9 @@ Base.getindex(x::AbstractIndex, idx::Real) = int(idx) Base.getindex(x::AbstractIndex, idx::AbstractDataVector{Bool}) = getindex(x, array(idx, false)) Base.getindex{T}(x::AbstractIndex, idx::AbstractDataVector{T}) = getindex(x, dropna(idx)) Base.getindex(x::AbstractIndex, idx::AbstractVector{Bool}) = find(idx) -Base.getindex(x::AbstractIndex, idx::Ranges) = [idx] +Base.getindex(x::AbstractIndex, idx::Ranges) = [idx;] Base.getindex{T <: Real}(x::AbstractIndex, idx::AbstractVector{T}) = convert(Vector{Int}, idx) -Base.getindex(x::AbstractIndex, idx::AbstractVector{Symbol}) = [[x.lookup[i] for i in idx]...] +Base.getindex(x::AbstractIndex, idx::AbstractVector{Symbol}) = [x.lookup[i] for i in idx] type SimpleIndex <: AbstractIndex length::Integer diff --git a/test/cat.jl b/test/cat.jl index 34ae72ccdd..41635d6132 100644 --- a/test/cat.jl +++ b/test/cat.jl @@ -67,7 +67,7 @@ module TestCat dfr = vcat(df4, df4) @test size(dfr, 1) == 8 @test names(df4) == names(dfr) - @test isequal(dfr, [df4, df4]) + @test isequal(dfr, [df4; df4]) dfr = vcat(df2, df3) @test size(dfr) == (8,2) diff --git a/test/data.jl b/test/data.jl index 90edab0344..7b5d8cc307 100644 --- a/test/data.jl +++ b/test/data.jl @@ -5,7 +5,7 @@ module TestData #test_group("DataVector creation") dvint = @data([1, 2, NA, 4]) - dvint2 = data([5:8]) + dvint2 = data([5:8;]) dvint3 = data(5:8) dvflt = @data([1.0, 2, NA, 4]) dvstr = @data(["one", "two", NA, "four"]) @@ -113,7 +113,7 @@ module TestData @test isequal(df9, df8) df10 = DataFrame( - Any[[1:4], [2:5], ["a", "a", "a", "b" ], ["c", "d", "c", "d"]], + Any[[1:4;], [2:5;], ["a", "a", "a", "b" ], ["c", "d", "c", "d"]], [:d1, :d2, :d3, :d4] ) @@ -127,11 +127,11 @@ module TestData @test ggd[2][1, :d4] == "d" #test_group("reshape") - d1 = DataFrame(a = repeat([1:3], inner = [4]), - b = repeat([1:4], inner = [3]), + d1 = DataFrame(a = repeat([1:3;], inner = [4]), + b = repeat([1:4;], inner = [3]), c = randn(12), d = randn(12), - e = map(string, ['a':'l'])) + e = map(string, 'a':'l')) stack(d1, :a) d1s = stack(d1, [:a, :b]) @@ -159,8 +159,8 @@ module TestData d1m = meltdf(d1[[1,3,4]], :a) @test names(d1m) == [:variable, :value, :a] - d1s[:id] = [1:12, 1:12] - d1s2[:id] = [1:12, 1:12] + d1s[:id] = [1:12; 1:12] + d1s2[:id] = [1:12; 1:12] d1us = unstack(d1s, :id, :variable, :value) d1us2 = unstack(d1s2) d1us3 = unstack(d1s2, :variable, :value) @@ -179,11 +179,11 @@ module TestData #test_group("merge") srand(1) - df1 = DataFrame(a = shuffle!([1:10]), + df1 = DataFrame(a = shuffle!([1:10;]), b = [:A,:B][rand(1:2, 10)], v1 = randn(10)) - df2 = DataFrame(a = shuffle!(reverse([1:5])), + df2 = DataFrame(a = shuffle!(reverse([1:5;])), b2 = [:A,:B,:C][rand(1:3, 5)], v2 = randn(5)) diff --git a/test/dataframe.jl b/test/dataframe.jl index 7d0da30b5c..2ed22e8cd7 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -187,7 +187,7 @@ module TestDataFrame # zero-row dataframe and subdataframe test df = DataFrame(x=[], y=[]) @test nrow(df) == 0 - df = DataFrame(x=[1:3], y=[3:5]) + df = DataFrame(x=[1:3;], y=[3:5;]) sdf = sub(df, df[:x] .== 4) @test size(sdf, 1) == 0 diff --git a/test/formula.jl b/test/formula.jl index ba7d5ffe36..99d15e1f9b 100644 --- a/test/formula.jl +++ b/test/formula.jl @@ -12,16 +12,16 @@ module TestFormula #test_group("Basic tests") d = DataFrame() - d[:y] = [1:4] - d[:x1] = [5:8] - d[:x2] = [9:12] - d[:x3] = [13:16] - d[:x4] = [17:20] - - x1 = [5.:8] - x2 = [9.:12] - x3 = [13.:16] - x4 = [17.:20] + d[:y] = [1:4;] + d[:x1] = [5:8;] + d[:x2] = [9:12;] + d[:x3] = [13:16;] + d[:x4] = [17:20;] + + x1 = [5.:8;] + x2 = [9.:12;] + x3 = [13.:16;] + x4 = [17.:20;] f = y ~ x1 + x2 mf = ModelFrame(f, d) ## @test mm.response_colnames == ["y"] # nope: no response_colnames diff --git a/test/sort.jl b/test/sort.jl index 41ee3af1b2..34b799845c 100644 --- a/test/sort.jl +++ b/test/sort.jl @@ -4,7 +4,7 @@ module TestSort dv1 = @data([9, 1, 8, NA, 3, 3, 7, NA]) dv2 = 1.0 * dv1 - dv3 = DataArray([1:8]) + dv3 = DataArray([1:8;]) pdv1 = convert(PooledDataArray, dv1) d = DataFrame(dv1 = dv1, dv2 = dv2, dv3 = dv3, pdv1 = pdv1) diff --git a/test/utils.jl b/test/utils.jl index acf7439d95..ee31ab1d30 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -31,7 +31,7 @@ module TestUtils "Expected if Julia was not built from source.") end - @test DataFrames.countna([1:3]) == 0 + @test DataFrames.countna([1:3;]) == 0 data = @data rand(20) @test DataFrames.countna(data) == 0 From 05dd3669057474a6bbf8e0db3fb20907a6ddefc5 Mon Sep 17 00:00:00 2001 From: Sean Garborg Date: Fri, 13 Feb 2015 07:29:32 -0700 Subject: [PATCH 2/2] Switch AppVeyor to nightlies for better errors At least until Windows I/O is worked out --- appveyor.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 69628f8909..bb4fa009ec 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,11 +1,11 @@ environment: matrix: # Releases - - JULIAVERSION: "stable/win32" - - JULIAVERSION: "stable/win64" + # - JULIAVERSION: "stable/win32" + # - JULIAVERSION: "stable/win64" # Nightlies -# - JULIAVERSION: "download/win32" -# - JULIAVERSION: "download/win64" + - JULIAVERSION: "download/win32" + - JULIAVERSION: "download/win64" notifications: - provider: Email