Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: collect without inference #135

Merged
merged 11 commits into from
Mar 19, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/IndexedTables.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import Base:
permutedims, reducedim, serialize, deserialize, sort, sort!

export NDSparse, flush!, aggregate!, aggregate_vec, where, pairs, convertdim, columns, column, rows,
itable, update!, aggregate, reducedim_vec, dimlabels
itable, update!, aggregate, reducedim_vec, dimlabels, collect_columns

const Tup = Union{Tuple,NamedTuple}
const DimName = Union{Int,Symbol}
Expand All @@ -19,6 +19,7 @@ include("utils.jl")
include("columns.jl")
include("table.jl")
include("ndsparse.jl")
include("collect.jl")

#=
# Poor man's traits
Expand Down
113 changes: 113 additions & 0 deletions src/collect.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
"""
`collect_columns(itr)`

Collect an iterable as a `Columns` object if it iterates `Tuples` or `NamedTuples`, as a normal
`Array` otherwise.

## Examples

```jldoctest collect
julia> s = [(1,2), (3,4)];

julia> collect_columns(s)
2-element Columns{Tuple{Int64,Int64}}:
(1, 2)
(3, 4)

julia> s = Iterators.filter(isodd, 1:8);

julia> collect_columns(s)
4-element Array{Int64,1}:
1
3
5
7
```
"""
collect_columns(itr) = collect_columns(itr, Base.iteratorsize(itr))

function collect_columns(itr, ::Union{Base.HasShape, Base.HasLength})
st = start(itr)
el, st = next(itr, st)
dest = similar(arrayof(typeof(el)), length(itr))
dest[1] = el
collect_to_columns!(dest, itr, 2, st)
end

function collect_to_columns!(dest::AbstractArray{T}, itr, offs, st) where {T}
# collect to dest array, checking the type of each result. if a result does not
# match, widen the result type and re-dispatch.
i = offs
while !done(itr, st)
el, st = next(itr, st)
if fieldwise_isa(el, T)
@inbounds dest[i] = el
i += 1
else
new = widencolumns(dest, i, el, T)
@inbounds new[i] = el
return collect_to_columns!(new, itr, i+1, st)
end
end
return dest
end

function collect_columns(itr, ::Base.SizeUnknown)
st = start(itr)
el, st = next(itr, st)
dest = similar(arrayof(typeof(el)), 1)
dest[1] = el
grow_to_columns!(dest, itr, 2, st)
end

function grow_to_columns!(dest::AbstractArray{T}, itr, offs, st) where {T}
# collect to dest array, checking the type of each result. if a result does not
# match, widen the result type and re-dispatch.
i = offs
while !done(itr, st)
el, st = next(itr, st)
if fieldwise_isa(el, T)
push!(dest, el)
i += 1
else
new = widencolumns(dest, i, el, T)
push!(new, el)
return grow_to_columns!(new, itr, i+1, st)
end
end
return dest
end

@generated function fieldwise_isa(el::S, ::Type{T}) where {S<:Tup, T}
if all((s <: t) for (s, t) in zip(S.parameters, T.parameters))
return :(true)
else
return :(false)
end
end

@generated function fieldwise_isa(el::S, ::Type{T}) where {S, T}
if S <: T
return :(true)
else
return :(false)
end
end
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this doesn't need to be @generated, a Base.@pure or @inline might work here. But I guess once a package has @generated it really doesn't matter how many of them are there :-p

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Base.@pure seemed to not work as nicely, maybe we can leave as is now and do a "remove all generated functions" PR in the future (maybe in Julia 0.7).


function widencolumns(dest, i, el::S, ::Type{T}) where{S <: Tup, T}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess this should have T <: Tup as well?

sp, tp = S.parameters, T.parameters
idx = find(!(s <: t) for (s, t) in zip(sp, tp))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be good to see if sp and tp have the same length, else return Array{Tuple}(length(dest)); copy!(newcol, 1, dest, 1, i-1)?
`

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, figured out that was missing when changing map and luckily one test checked for that. More than same length, same fieldnames (we also shouldn't accept NamedTuples where things are called differently). I'll change that in the future map PR.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I should also add a test for #101 in the map PR as that should work now.

new = dest
for l in idx
newcol = Array{promote_type(sp[l], tp[l])}(length(dest))
copy!(newcol, 1, column(dest, l), 1, i-1)
new = setcol(new, l, newcol)
end
new
end

function widencolumns(dest, i, el::S, ::Type{T}) where{S, T}
new = Array{promote_type(S, T)}(length(dest))
copy!(new, 1, dest, 1, i-1)
new
end
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ using Base.Test
include("test_core.jl")
include("test_utils.jl")
include("test_tabletraits.jl")
include("test_collect.jl")

end
69 changes: 69 additions & 0 deletions test/test_collect.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
@testset "collectnamedtuples" begin
v = [@NT(a = 1, b = 2), @NT(a = 1, b = 3)]
@test collect_columns(v) == Columns(@NT(a = Int[1, 1], b = Int[2, 3]))

# test inferrability with constant eltype
itr = [@NT(a = 1, b = 2), @NT(a = 1, b = 2), @NT(a = 1, b = 12)]
st = start(itr)
el, st = next(itr, st)
dest = similar(IndexedTables.arrayof(typeof(el)), 3)
dest[1] = el
@inferred IndexedTables.collect_to_columns!(dest, itr, 2, st)

v = [@NT(a = 1, b = 2), @NT(a = 1.2, b = 3)]
@test collect_columns(v) == Columns(@NT(a = [1, 1.2], b = Int[2, 3]))
@test typeof(collect_columns(v)) == typeof(Columns(@NT(a = [1, 1.2], b = Int[2, 3])))

v = [@NT(a = 1, b = 2), @NT(a = 1.2, b = "3")]
@test collect_columns(v) == Columns(@NT(a = [1, 1.2], b = Any[2, "3"]))
@test typeof(collect_columns(v)) == typeof(Columns(@NT(a = [1, 1.2], b = Any[2, "3"])))

v = [@NT(a = 1, b = 2), @NT(a = 1.2, b = 2), @NT(a = 1, b = "3")]
@test collect_columns(v) == Columns(@NT(a = [1, 1.2, 1], b = Any[2, 2, "3"]))
@test typeof(collect_columns(v)) == typeof(Columns(@NT(a = [1, 1.2, 1], b = Any[2, 2, "3"])))

# length unknown
itr = Iterators.filter(isodd, 1:8)
tuple_itr = (@NT(a = i+1, b = i-1) for i in itr)
@test collect_columns(tuple_itr) == Columns(@NT(a = [2, 4, 6, 8], b = [0, 2, 4, 6]))
tuple_itr_real = (i == 1 ? @NT(a = 1.2, b =i-1) : @NT(a = i+1, b = i-1) for i in itr)
@test collect_columns(tuple_itr_real) == Columns(@NT(a = Real[1.2, 4, 6, 8], b = [0, 2, 4, 6]))
end

@testset "collecttuples" begin
v = [(1, 2), (1, 3)]
@test collect_columns(v) == Columns((Int[1, 1], Int[2, 3]))
@inferred collect_columns(v)

v = [(1, 2), (1.2, 3)]
@test collect_columns(v) == Columns(([1, 1.2], Int[2, 3]))

v = [(1, 2), (1.2, "3")]
@test collect_columns(v) == Columns(([1, 1.2], Any[2, "3"]))
@test typeof(collect_columns(v)) == typeof(Columns(([1, 1.2], Any[2, "3"])))

v = [(1, 2), (1.2, 2), (1, "3")]
@test collect_columns(v) == Columns(([1, 1.2, 1], Any[2, 2, "3"]))
# length unknown
itr = Iterators.filter(isodd, 1:8)
tuple_itr = ((i+1, i-1) for i in itr)
@test collect_columns(tuple_itr) == Columns(([2, 4, 6, 8], [0, 2, 4, 6]))
tuple_itr_real = (i == 1 ? (1.2, i-1) : (i+1, i-1) for i in itr)
@test collect_columns(tuple_itr_real) == Columns(([1.2, 4, 6, 8], [0, 2, 4, 6]))
@test typeof(collect_columns(tuple_itr_real)) == typeof(Columns(([1.2, 4, 6, 8], [0, 2, 4, 6])))
end

@testset "collectscalars" begin
v = (i for i in 1:3)
@test collect_columns(v) == [1,2,3]
@inferred collect_columns(v)

v = (i == 1 ? 1.2 : i for i in 1:3)
@test collect_columns(v) == collect(v)

itr = Iterators.filter(isodd, 1:100)
@test collect_columns(itr) == collect(itr)
real_itr = (i == 1 ? 1.5 : i for i in itr)
@test collect_columns(real_itr) == collect(real_itr)
@test eltype(collect_columns(real_itr)) == Float64
end