Skip to content

Commit

Permalink
Merge pull request #766 from JuliaStats/concat
Browse files Browse the repository at this point in the history
Track Julia 0.4 Array concatenation syntax
  • Loading branch information
garborg committed Feb 13, 2015
2 parents b6e6525 + 05dd366 commit 7c8cd64
Show file tree
Hide file tree
Showing 13 changed files with 54 additions and 55 deletions.
8 changes: 4 additions & 4 deletions appveyor.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
environment:
matrix:
# Releases
- JULIAVERSION: "stable/win32"
- JULIAVERSION: "stable/win64"
# - JULIAVERSION: "stable/win32"
# - JULIAVERSION: "stable/win64"
# Nightlies
# - JULIAVERSION: "download/win32"
# - JULIAVERSION: "download/win64"
- JULIAVERSION: "download/win32"
- JULIAVERSION: "download/win64"

notifications:
- provider: Email
Expand Down
8 changes: 4 additions & 4 deletions src/abstractdataframe/join.jl
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ function DataArrays.PooledDataVecs(df1::AbstractDataFrame,
end
ngroups = ngroups * (length(dv1.pool) + 1)
end
pool = [1:ngroups]
pool = [1:ngroups;]
(PooledDataArray(DataArrays.RefArray(refs1), pool), PooledDataArray(DataArrays.RefArray(refs2), pool))
end

Expand Down Expand Up @@ -153,7 +153,7 @@ function Base.join(df1::AbstractDataFrame,
elseif kind == :left
df2w = without(df2, on)

left = df1[[left_idx, leftonly_idx], :]
left = df1[[left_idx; leftonly_idx], :]
right = vcat(df2w[right_idx, :],
nas(df2w, length(leftonly_idx)))

Expand All @@ -163,7 +163,7 @@ function Base.join(df1::AbstractDataFrame,

left = vcat(df1w[left_idx, :],
nas(df1w, length(rightonly_idx)))
right = df2[[right_idx, rightonly_idx], :]
right = df2[[right_idx; rightonly_idx], :]

return hcat!(left, right)
elseif kind == :outer
Expand All @@ -187,7 +187,7 @@ end

function crossjoin(df1::AbstractDataFrame, df2::AbstractDataFrame)
r1, r2 = size(df1, 1), size(df2, 1)
cols = [[rep(c, 1, r2) for c in columns(df1)],
cols = [[rep(c, 1, r2) for c in columns(df1)];
[rep(c, r1, 1) for c in columns(df2)]]
colindex = merge(index(df1), index(df2))
DataFrame(cols, colindex)
Expand Down
23 changes: 11 additions & 12 deletions src/abstractdataframe/reshape.jl
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ function stack(df::AbstractDataFrame, measure_vars)
stack(df, mv_inds, _setdiff(1:ncol(df), mv_inds))
end
function stack(df::AbstractDataFrame)
idx = [1:length(df)][[t <: FloatingPoint for t in eltypes(df)]]
idx = [1:length(df);][[t <: FloatingPoint for t in eltypes(df)]]
stack(df, idx)
end

Expand Down Expand Up @@ -69,7 +69,7 @@ function unstack(df::AbstractDataFrame, rowkey::Int, colkey::Int, value::Int)
Nrow = length(refkeycol.pool)
Ncol = length(keycol.pool)
# TODO make fillNA(type, length)
payload = DataFrame(Any[DataArray([fill(valuecol[1], Nrow)], fill(true, Nrow)) for i in 1:Ncol], map(symbol, keycol.pool))
payload = DataFrame(Any[DataArray(fill(valuecol[1], Nrow), fill(true, Nrow)) for i in 1:Ncol], map(symbol, keycol.pool))
nowarning = true
for k in 1:nrow(df)
j = int(keycol.refs[k])
Expand Down Expand Up @@ -97,7 +97,7 @@ function unstack(df::AbstractDataFrame, colkey::Int, value::Int)
# find the indexes for each group:
groupidxs = [g.idx[g.starts[i]:g.ends[i]] for i in 1:length(g.starts)]
# this will be a new column to key the rows:
rowkey = PooledDataArray(zeros(Int, size(df, 1)), [1:length(groupidxs)])
rowkey = PooledDataArray(zeros(Int, size(df, 1)), [1:length(groupidxs);])
for i in 1:length(groupidxs)
rowkey[groupidxs[i]] = i
end
Expand All @@ -110,7 +110,7 @@ function unstack(df::AbstractDataFrame, colkey::Int, value::Int)
# group on anything not a key or value:
g = groupby(df, setdiff(_names(df), _names(df)[[colkey, value]]))
groupidxs = [g.idx[g.starts[i]:g.ends[i]] for i in 1:length(g.starts)]
rowkey = PooledDataArray(zeros(Int, size(df, 1)), [1:length(groupidxs)])
rowkey = PooledDataArray(zeros(Int, size(df, 1)), [1:length(groupidxs);])
for i in 1:length(groupidxs)
rowkey[groupidxs[i]] = i
end
Expand All @@ -120,7 +120,7 @@ function unstack(df::AbstractDataFrame, colkey::Int, value::Int)
keys = unique(keycol)
Nrow = length(g)
Ncol = length(keycol.pool)
df2 = DataFrame(Any[DataArray([fill(valuecol[1], Nrow)], fill(true, Nrow)) for i in 1:Ncol], map(symbol, keycol.pool))
df2 = DataFrame(Any[DataArray(fill(valuecol[1], Nrow), fill(true, Nrow)) for i in 1:Ncol], map(symbol, keycol.pool))
nowarning = true
for k in 1:nrow(df)
j = int(keycol.refs[k])
Expand Down Expand Up @@ -168,7 +168,7 @@ end

function Base.getindex(v::StackedVector,i::Real)
lengths = [length(x)::Int for x in v.components]
cumlengths = [0, cumsum(lengths)]
cumlengths = [0; cumsum(lengths)]
j = searchsortedlast(cumlengths .+ 1, i)
if j > length(cumlengths)
error("indexing bounds error")
Expand All @@ -187,7 +187,6 @@ function Base.getindex{I<:Real}(v::StackedVector,i::AbstractVector{I})
end
result
end
Base.getindex(v::StackedVector,i::Union(Ranges, Vector{Bool}, BitVector)) = getindex(v, [i])

Base.size(v::StackedVector) = (length(v),)
Base.length(v::StackedVector) = sum(map(length, v.components))
Expand All @@ -198,14 +197,14 @@ Base.similar(v::StackedVector, T, dims::Dims) = similar(v.components[1], T, dims
DataArrays.PooledDataArray(v::StackedVector) = PooledDataArray(v[:]) # could be more efficient

function Base.getindex{T,I<:Real}(v::RepeatedVector{T},i::AbstractVector{I})
j = mod(i .- 1, length(v.parent)) .+ 1
j = mod(i - 1, length(v.parent)) + 1
v.parent[j]
end
function Base.getindex{T}(v::RepeatedVector{T},i::Real)
j = mod(i - 1, length(v.parent)) + 1
v.parent[j]
end
Base.getindex(v::RepeatedVector,i::Union(Ranges, Vector{Bool}, BitVector)) = getindex(v, [i])
Base.getindex(v::RepeatedVector,i::Ranges) = getindex(v, [i;])

Base.size(v::RepeatedVector) = (length(v),)
Base.length(v::RepeatedVector) = v.n * length(v.parent)
Expand All @@ -226,10 +225,10 @@ function Base.getindex{T}(v::EachRepeatedVector{T},i::Real)
v.parent[j]
end
function Base.getindex{T,I<:Real}(v::EachRepeatedVector{T},i::AbstractVector{I})
j = div(i .- 1, v.n) .+ 1
j = div(i - 1, v.n) + 1
v.parent[j]
end
Base.getindex(v::EachRepeatedVector,i::Union(Ranges, Vector{Bool}, BitVector)) = getindex(v, [i])
Base.getindex(v::EachRepeatedVector,i::Ranges) = getindex(v, [i;])

Base.size(v::EachRepeatedVector) = (length(v),)
Base.length(v::EachRepeatedVector) = v.n * length(v.parent)
Expand Down Expand Up @@ -282,7 +281,7 @@ function stackdf(df::AbstractDataFrame, measure_vars)
stackdf(df, m_inds, _setdiff(1:ncol(df), m_inds))
end
function stackdf(df::AbstractDataFrame)
idx = [1:length(df)][[t <: FloatingPoint for t in eltypes(df)]]
idx = [1:length(df);][[t <: FloatingPoint for t in eltypes(df)]]
stackdf(df, idx)
end

Expand Down
2 changes: 1 addition & 1 deletion src/abstractdataframe/sort.jl
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ for s in [:(Base.sort), :(Base.sortperm)]
end

Base.sort (df::AbstractDataFrame, a::Algorithm, o::Ordering) = df[sortperm(df, a, o),:]
Base.sortperm(df::AbstractDataFrame, a::Algorithm, o::Union(Perm,DFPerm)) = sort!([1:size(df, 1)], a, o)
Base.sortperm(df::AbstractDataFrame, a::Algorithm, o::Union(Perm,DFPerm)) = sort!([1:size(df, 1);], a, o)
Base.sortperm(df::AbstractDataFrame, a::Algorithm, o::Ordering) = sortperm(df, a, DFPerm(o,df))

# Extras to speed up sorting
Expand Down
16 changes: 8 additions & 8 deletions src/dataframe/dataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ function insert_multiple_entries!{T <: Real}(df::DataFrame,
end

upgrade_vector(v::Vector) = DataArray(v, falses(length(v)))
upgrade_vector(v::Ranges) = DataArray([v], falses(length(v)))
upgrade_vector(v::Ranges) = DataArray([v;], falses(length(v)))
upgrade_vector(v::BitVector) = DataArray(convert(Array{Bool}, v), falses(length(v)))
upgrade_vector(adv::AbstractDataArray) = adv
function upgrade_scalar(df::DataFrame, v::AbstractArray)
Expand Down Expand Up @@ -734,18 +734,18 @@ function Base.convert(::Type{DataFrame}, d::Dict)
dnames = collect(keys(d))
sort!(dnames)
p = length(dnames)
p == 0 && return DataFrame()
columns = Array(Any, p)
colnames = Array(Symbol,p)
if p == 0
return DataFrame()
end
colnames = Array(Symbol, p)
n = length(d[dnames[1]])
for j in 1:p
if length(d[dnames[j]]) != n
name = dnames[j]
col = d[name]
if length(col) != n
throw(ArgumentError("All columns in Dict must have the same length"))
end
columns[j] = DataArray([d[dnames[j]]])
colnames[j] = symbol(dnames[j])
columns[j] = DataArray(col)
colnames[j] = symbol(name)
end
return DataFrame(columns, Index(colnames))
end
Expand Down
2 changes: 1 addition & 1 deletion src/groupeddataframe/grouping.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ function groupby{T}(d::AbstractDataFrame, cols::Vector{T})
(idx, starts) = DataArrays.groupsort_indexer(x, ngroups)
# Remove zero-length groupings
starts = _uniqueofsorted(starts)
ends = [starts[2:end] .- 1]
ends = starts[2:end] - 1
GroupedDataFrame(d, cols, idx, starts[1:end-1], ends)
end
groupby(d::AbstractDataFrame, cols) = groupby(d, [cols])
Expand Down
4 changes: 2 additions & 2 deletions src/other/index.jl
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,9 @@ Base.getindex(x::AbstractIndex, idx::Real) = int(idx)
Base.getindex(x::AbstractIndex, idx::AbstractDataVector{Bool}) = getindex(x, array(idx, false))
Base.getindex{T}(x::AbstractIndex, idx::AbstractDataVector{T}) = getindex(x, dropna(idx))
Base.getindex(x::AbstractIndex, idx::AbstractVector{Bool}) = find(idx)
Base.getindex(x::AbstractIndex, idx::Ranges) = [idx]
Base.getindex(x::AbstractIndex, idx::Ranges) = [idx;]
Base.getindex{T <: Real}(x::AbstractIndex, idx::AbstractVector{T}) = convert(Vector{Int}, idx)
Base.getindex(x::AbstractIndex, idx::AbstractVector{Symbol}) = [[x.lookup[i] for i in idx]...]
Base.getindex(x::AbstractIndex, idx::AbstractVector{Symbol}) = [x.lookup[i] for i in idx]

type SimpleIndex <: AbstractIndex
length::Integer
Expand Down
2 changes: 1 addition & 1 deletion test/cat.jl
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ module TestCat
dfr = vcat(df4, df4)
@test size(dfr, 1) == 8
@test names(df4) == names(dfr)
@test isequal(dfr, [df4, df4])
@test isequal(dfr, [df4; df4])

dfr = vcat(df2, df3)
@test size(dfr) == (8,2)
Expand Down
18 changes: 9 additions & 9 deletions test/data.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ module TestData

#test_group("DataVector creation")
dvint = @data([1, 2, NA, 4])
dvint2 = data([5:8])
dvint2 = data([5:8;])
dvint3 = data(5:8)
dvflt = @data([1.0, 2, NA, 4])
dvstr = @data(["one", "two", NA, "four"])
Expand Down Expand Up @@ -113,7 +113,7 @@ module TestData
@test isequal(df9, df8)

df10 = DataFrame(
Any[[1:4], [2:5], ["a", "a", "a", "b" ], ["c", "d", "c", "d"]],
Any[[1:4;], [2:5;], ["a", "a", "a", "b" ], ["c", "d", "c", "d"]],
[:d1, :d2, :d3, :d4]
)

Expand All @@ -127,11 +127,11 @@ module TestData
@test ggd[2][1, :d4] == "d"

#test_group("reshape")
d1 = DataFrame(a = repeat([1:3], inner = [4]),
b = repeat([1:4], inner = [3]),
d1 = DataFrame(a = repeat([1:3;], inner = [4]),
b = repeat([1:4;], inner = [3]),
c = randn(12),
d = randn(12),
e = map(string, ['a':'l']))
e = map(string, 'a':'l'))

stack(d1, :a)
d1s = stack(d1, [:a, :b])
Expand Down Expand Up @@ -159,8 +159,8 @@ module TestData
d1m = meltdf(d1[[1,3,4]], :a)
@test names(d1m) == [:variable, :value, :a]

d1s[:id] = [1:12, 1:12]
d1s2[:id] = [1:12, 1:12]
d1s[:id] = [1:12; 1:12]
d1s2[:id] = [1:12; 1:12]
d1us = unstack(d1s, :id, :variable, :value)
d1us2 = unstack(d1s2)
d1us3 = unstack(d1s2, :variable, :value)
Expand All @@ -179,11 +179,11 @@ module TestData
#test_group("merge")

srand(1)
df1 = DataFrame(a = shuffle!([1:10]),
df1 = DataFrame(a = shuffle!([1:10;]),
b = [:A,:B][rand(1:2, 10)],
v1 = randn(10))

df2 = DataFrame(a = shuffle!(reverse([1:5])),
df2 = DataFrame(a = shuffle!(reverse([1:5;])),
b2 = [:A,:B,:C][rand(1:3, 5)],
v2 = randn(5))

Expand Down
2 changes: 1 addition & 1 deletion test/dataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ module TestDataFrame
# zero-row dataframe and subdataframe test
df = DataFrame(x=[], y=[])
@test nrow(df) == 0
df = DataFrame(x=[1:3], y=[3:5])
df = DataFrame(x=[1:3;], y=[3:5;])
sdf = sub(df, df[:x] .== 4)
@test size(sdf, 1) == 0

Expand Down
20 changes: 10 additions & 10 deletions test/formula.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,16 @@ module TestFormula
#test_group("Basic tests")

d = DataFrame()
d[:y] = [1:4]
d[:x1] = [5:8]
d[:x2] = [9:12]
d[:x3] = [13:16]
d[:x4] = [17:20]

x1 = [5.:8]
x2 = [9.:12]
x3 = [13.:16]
x4 = [17.:20]
d[:y] = [1:4;]
d[:x1] = [5:8;]
d[:x2] = [9:12;]
d[:x3] = [13:16;]
d[:x4] = [17:20;]

x1 = [5.:8;]
x2 = [9.:12;]
x3 = [13.:16;]
x4 = [17.:20;]
f = y ~ x1 + x2
mf = ModelFrame(f, d)
## @test mm.response_colnames == ["y"] # nope: no response_colnames
Expand Down
2 changes: 1 addition & 1 deletion test/sort.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ module TestSort

dv1 = @data([9, 1, 8, NA, 3, 3, 7, NA])
dv2 = 1.0 * dv1
dv3 = DataArray([1:8])
dv3 = DataArray([1:8;])
pdv1 = convert(PooledDataArray, dv1)

d = DataFrame(dv1 = dv1, dv2 = dv2, dv3 = dv3, pdv1 = pdv1)
Expand Down
2 changes: 1 addition & 1 deletion test/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ module TestUtils
"Expected if Julia was not built from source.")
end

@test DataFrames.countna([1:3]) == 0
@test DataFrames.countna([1:3;]) == 0

data = @data rand(20)
@test DataFrames.countna(data) == 0
Expand Down

0 comments on commit 7c8cd64

Please sign in to comment.