From 4b21f3c264e691316a94b4f892e5630896353c25 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Mon, 25 Apr 2016 13:04:47 -0500 Subject: [PATCH 1/4] check_sameinds -> checksame_inds, samestorageorder -> checksame_storageorder --- src/core.jl | 32 ++++++++++++++++---------------- test/internal.jl | 4 ++-- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/core.jl b/src/core.jl index d6c5330..6da46c6 100644 --- a/src/core.jl +++ b/src/core.jl @@ -101,13 +101,13 @@ end done(itr::FirstToLastIterator, i) = done(i[1], i[2]) function sync(A::AllElements, B::AllElements) - check_sameinds(A, B) - _sync(samestorageorder(A, B), A, B) + checksame_inds(A, B) + _sync(checksame_storageorder(A, B), A, B) end function sync(A::AllElements, B::AllElements...) - check_sameinds(A, B...) - _sync(samestorageorder(A, B...), A, B...) + checksame_inds(A, B...) + _sync(checksame_storageorder(A, B...), A, B...) end _sync(::Type{Val{true}}, A, B) = zip(each(A), each(B)) @@ -120,7 +120,7 @@ sync(A, B::StoredElements) = sync_stored(A, B) sync(A::StoredElements, B) = sync_stored(A, B) #function sync_stored(A, B) -# check_sameinds(A, B) +# checksame_inds(A, B) #end ### Utility methods @@ -154,12 +154,12 @@ ranges(out, A, d) = out @inline ranges(out, A, d, i, I...) = ranges((out..., i), A, d+1, I...) @inline ranges(out, A, d, i::Colon, I...) = ranges((out..., inds(A, d)), A, d+1, I...) -check_sameinds(::Type{Bool}, A::ArrayOrWrapper) = true -check_sameinds(::Type{Bool}, A::ArrayOrWrapper, B::ArrayOrWrapper) = extent_inds(A) == extent_inds(B) -check_sameinds(::Type{Bool}, A, B, C...) = check_sameinds(Bool, A, B) && check_sameinds(Bool, B, C...) -check_sameinds(A) = check_sameinds(Bool, A) -check_sameinds(A, B) = check_sameinds(Bool, A, B) || throw(DimensionMismatch("extent inds $(extent_inds(A)) and $(extent_inds(B)) do not match")) -check_sameinds(A, B, C...) = check_sameinds(A, B) && check_sameinds(B, C...) +checksame_inds(::Type{Bool}, A::ArrayOrWrapper) = true +checksame_inds(::Type{Bool}, A::ArrayOrWrapper, B::ArrayOrWrapper) = extent_inds(A) == extent_inds(B) +checksame_inds(::Type{Bool}, A, B, C...) = checksame_inds(Bool, A, B) && checksame_inds(Bool, B, C...) +checksame_inds(A) = checksame_inds(Bool, A) +checksame_inds(A, B) = checksame_inds(Bool, A, B) || throw(DimensionMismatch("extent inds $(extent_inds(A)) and $(extent_inds(B)) do not match")) +checksame_inds(A, B, C...) = checksame_inds(A, B) && checksame_inds(B, C...) # extent_inds drops sliced dimensions extent_inds(A::AbstractArray) = inds(A) @@ -174,11 +174,11 @@ columnmajoriterator(::LinearSlow, A) = FirstToLastIterator(A, CartesianRange(siz columnmajoriterator(W::ArrayIndexingWrapper) = CartesianRange(ranges(W)) -samestorageorder(A) = Val{true} -samestorageorder(A, B) = _sso(storageorder(A), storageorder(B)) -samestorageorder(A, B, C...) = samestorageorder(_sso(storageorder(A), storageorder(B)), B, C...) -samestorageorder(::Type{Val{true}}, A, B...) = samestorageorder(A, B...) -samestorageorder(::Type{Val{false}}, A, B...) = Val{false} +checksame_storageorder(A) = Val{true} +checksame_storageorder(A, B) = _sso(storageorder(A), storageorder(B)) +checksame_storageorder(A, B, C...) = checksame_storageorder(_sso(storageorder(A), storageorder(B)), B, C...) +checksame_storageorder(::Type{Val{true}}, A, B...) = checksame_storageorder(A, B...) +checksame_storageorder(::Type{Val{false}}, A, B...) = Val{false} _sso(::FirstToLast, ::FirstToLast) = Val{true} _sso{p}(::OtherOrder{p}, ::OtherOrder{p}) = Val{true} _sso(::StorageOrder, ::StorageOrder) = Val{false} diff --git a/test/internal.jl b/test/internal.jl index 475f512..f1fe4b8 100644 --- a/test/internal.jl +++ b/test/internal.jl @@ -1,6 +1,6 @@ A = rand(3,4) -AIP.check_sameinds(A, A) -@test_throws DimensionMismatch AIP.check_sameinds(A, A') +AIP.checksame_inds(A, A) +@test_throws DimensionMismatch AIP.checksame_inds(A, A') @test isa(AIP.storageorder(A), AIP.FirstToLast) R = reshape(A, (2,3,2)) @test isa(AIP.storageorder(R), AIP.FirstToLast) From 99222abc6203537814e405abe6a2c7c2e14be956 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Tue, 26 Apr 2016 09:54:41 -0500 Subject: [PATCH 2/4] Implement optimized methods for ReshapedArrays --- src/ArrayIterationPlayground.jl | 5 +-- src/core.jl | 34 ++++++++++++++++++- src/reshaped.jl | 37 +++++++++++++++++++++ src/types.jl | 12 +++++++ test/dense.jl | 59 +++++++++++++++++++++++++++++++-- test/internal.jl | 24 +++++++++++--- test/runtests.jl | 1 + 7 files changed, 162 insertions(+), 10 deletions(-) create mode 100644 src/reshaped.jl diff --git a/src/ArrayIterationPlayground.jl b/src/ArrayIterationPlayground.jl index ff8c5e2..1a6766f 100644 --- a/src/ArrayIterationPlayground.jl +++ b/src/ArrayIterationPlayground.jl @@ -1,13 +1,14 @@ module ArrayIterationPlayground -import Base: getindex, setindex!, start, next, done, length, eachindex, show, parent -using Base: ReshapedArray, linearindexing, LinearFast, LinearSlow +import Base: getindex, setindex!, start, next, done, length, eachindex, show, parent, isless +using Base: ReshapedArray, ReshapedIndex, linearindexing, LinearFast, LinearSlow, LinearIndexing using Base.PermutedDimsArrays: PermutedDimsArray export inds, index, value, stored, each, sync include("types.jl") include("core.jl") +include("reshaped.jl") include("sparse.jl") end # module diff --git a/src/core.jl b/src/core.jl index 6da46c6..efa7328 100644 --- a/src/core.jl +++ b/src/core.jl @@ -9,6 +9,10 @@ function show(io::IO, W::ArrayIndexingWrapper) print(io, "iteration hint over ", hint_string(W), " of a ", summary(W.data), " over the region ", W.indexes) end +function show(io::IO, iter::ContigCartIterator) + print(io, "Cartesian iterator with:\n domain ", iter.arrayrange, "\n start ", iter.columnrange.start, "\n stop ", iter.columnrange.stop) +end + parent(W::ArrayIndexingWrapper) = W.data """ @@ -83,7 +87,10 @@ each{T,N}(A::AbstractArray{T,N}, indexes::NTuple{N,IterIndex}) = each(ArrayIndex # Fallback definitions for each each{A,I,isstored}(W::ArrayIndexingWrapper{A,I,false,isstored}) = (itr = each(index(W)); ValueIterator{A,typeof(itr)}(W.data, itr)) each{A,N,isstored}(W::ArrayIndexingWrapper{A,NTuple{N,Colon},true,isstored}) = eachindex(W.data) -each{A,I,isstored}(W::ArrayIndexingWrapper{A,I,true,isstored}) = CartesianRange(ranges(W)) +each{A,I,isstored}(W::ArrayIndexingWrapper{A,I,true,isstored}) = _each(contiguous_index(W.indexes), W) + +_each(::Contiguous, W) = contiguous_iterator(W) +_each(::Any, W) = CartesianRange(ranges(W)) start(vi::ValueIterator) = start(vi.iter) done(vi::ValueIterator, s) = done(vi.iter, s) @@ -182,3 +189,28 @@ checksame_storageorder(::Type{Val{false}}, A, B...) = Val{false} _sso(::FirstToLast, ::FirstToLast) = Val{true} _sso{p}(::OtherOrder{p}, ::OtherOrder{p}) = Val{true} _sso(::StorageOrder, ::StorageOrder) = Val{false} + +# indexes is contiguous if it's one of: +# Colon... +# Colon..., Union{UnitRange,Int}, Int... +contiguous_index(I) = contiguous_index(Contiguous(), I...) +@inline contiguous_index(c::Contiguous, ::Colon, I...) = contiguous_index(c, I...) +@inline contiguous_index(::Contiguous, ::Any, I...) = contiguous_index(MaybeContiguous(), I...) +@inline contiguous_index(c::MaybeContiguous, ::Int, I...) = contiguous_index(c, I...) +@inline contiguous_index(::MaybeContiguous, ::Any, I...) = NonContiguous() +@inline contiguous_index(::Contiguity) = Contiguous() # won't get here for NonContiguous + +contiguous_iterator(W) = _contiguous_iterator(W, linearindexing(parent(W))) +function _contiguous_iterator(W, ::LinearFast) + f, l = firstlast(W) + f:l +end +_contiguous_iterator(W, ::LinearSlow) = CartesianRange(ranges(W)) + +function firstlast(W) + A = parent(W) + psz = size(A) + f = sub2ind(psz, map((i,j)->first(i[j]), inds(A), W.indexes)...) + l = sub2ind(psz, map((i,j)->last(i[j]), inds(A), W.indexes)...) + f, l +end diff --git a/src/reshaped.jl b/src/reshaped.jl new file mode 100644 index 0000000..0c8fe4a --- /dev/null +++ b/src/reshaped.jl @@ -0,0 +1,37 @@ +# optimized methods for ReshapedArrays + +# column iteration +start(iter::ContigCartIterator) = iter.columnrange.start +function next(iter::ContigCartIterator, state) + item, newstate = next(iter.arrayrange, state) + ReshapedIndex(item), newstate +end +done(iter::ContigCartIterator, state) = isless(iter.columnrange.stop, state) + +function _contiguous_iterator{AA<:ReshapedArray}(W::ArrayIndexingWrapper{AA}, ::LinearSlow) + fi, li = firstlast(W) + A = parent(W) + f = Base.ind2sub_rs(A.mi, fi) + l = Base.ind2sub_rs(A.mi, li) + ContigCartIterator(CartesianRange(inds(parent(A))), + CartesianRange(CartesianIndex(f), CartesianIndex(l))) +end + +# Branching implementation +# @inline isless{N}(I1::CartesianIndex{N}, I2::CartesianIndex{N}) = _isless(I1.I, I2.I) +# @inline function _isless{N}(I1::NTuple{N,Int}, I2::NTuple{N,Int}) +# i1, i2 = I1[N], I2[N] +# isless(i1, i2) && return true +# isless(i2, i1) && return false +# _isless(Base.front(I1), Base.front(I2)) +# end +# _isless(::Tuple{}, ::Tuple{}) = false + +# Select implementation +# @inline isless{N}(I1::CartesianIndex{N}, I2::CartesianIndex{N}) = _isless(0, I1.I, I2.I) +# @inline function _isless{N}(ret, I1::NTuple{N,Int}, I2::NTuple{N,Int}) +# newret = ifelse(ret==0, icmp(I1[N], I2[N]), ret) +# _isless(newret, Base.front(I1), Base.front(I2)) +# end +# _isless(ret, ::Tuple{}, ::Tuple{}) = ifelse(ret==1, true, false) +# icmp(a, b) = ifelse(isless(a,b), 1, ifelse(a==b, 0, -1)) diff --git a/src/types.jl b/src/types.jl index 79a400d..358294b 100644 --- a/src/types.jl +++ b/src/types.jl @@ -37,3 +37,15 @@ immutable FirstToLastIterator{N,AA} parent::AA itr::CartesianRange{N} end + +# Contiguous ranges +abstract Contiguity +immutable Contiguous <: Contiguity end +immutable NonContiguous <: Contiguity end +immutable MaybeContiguous <: Contiguity end # intermediate type used in assessing contiguity + +# Contiguous cartesian ranges. Sometimes needed for LinearSlow arrays. +immutable ContigCartIterator{N} + arrayrange::CartesianRange{N} + columnrange::CartesianRange{N} +end diff --git a/test/dense.jl b/test/dense.jl index d0398d1..46bfdca 100644 --- a/test/dense.jl +++ b/test/dense.jl @@ -4,8 +4,10 @@ B = sub(A, 1:2, 1:3) @test each(index(A)) == eachindex(A) == 1:length(A) @test each(index(B)) == CartesianRange((1:2, 1:3)) -@test each(index(A, :, 1:2)) == CartesianRange((1:2, 1:2)) -@test each(index(A, :, 2:3)) == CartesianRange((1:2, 2:3)) +@test each(index(A, :, 1:2)) == 1:4 +@test each(index(B, :, 1:2)) == CartesianRange((1:2, 1:2)) +@test each(index(A, :, 2:3)) == 3:6 +@test each(index(B, :, 2:3)) == CartesianRange((1:2, 2:3)) @test each(index(A, 1, :)) == CartesianRange((1, 1:3)) k = 0 @@ -187,3 +189,56 @@ for (IB, IA) in sync(index(B, :, 1), index(A, :, 2)) B[IB] = A[IA] end @test B == [A[1,2] -1; A[2,2] -1] + +## optimized ReshapedArray iterators +A = reshape(1:12, 4, 3) # LinearFast +@test each(index(A, :, 2)) == 5:8 +@test each(index(A, 2:3, 3)) == 10:11 +A = sub(copy(reshape(1:15, 5, 3)), 1:4, :) # LinearSlow +a = reshape(A, 12) +@test each(index(a, 1:4)) == CCI(CartesianRange(size(A)), + CartesianRange(CartesianIndex(1,1),CartesianIndex(4,1))) +k = 0 +for I in each(index(a, 1:4)) + @test a[I] == a[k+=1] +end +@test each(index(a, 3:9)) == CCI(CartesianRange(size(A)), + CartesianRange(CartesianIndex(3,1),CartesianIndex(1,3))) +k = 2 +for I in each(index(a, 3:9)) + @test a[I] == a[k+=1] +end + +function sum_cols_slow!(S, A) # slow for ReshapedArrays + fill!(S, 0) + @assert inds(S,2) == inds(A,2) + for j in inds(A,2) + tmp = S[1,j] + @inbounds for i in inds(A, 1) + tmp += A[i,j] + end + S[1,j] = tmp + end + S +end + +function sum_cols_fast!(S, A) + fill!(S, 0) + @assert inds(S,2) == inds(A,2) + for j in inds(A,2) + tmp = S[1,j] + @inbounds for I in each(index(A, :, j)) + tmp += A[I] + end + S[1,j] = tmp + end + S +end + +A = rand(1000,1,999) +B = sub(A, 1:size(A,1)-1, 1, 1:size(A,3)-1) +R = reshape(B, (size(B,1),size(B,3))) +@test isa(each(index(R, :, 2)), CCI) +S1 = zeros(1,size(R,2)) +S2 = similar(S1) +@test sum_cols_fast!(S1, R) == sum_cols_slow!(S2, R) diff --git a/test/internal.jl b/test/internal.jl index f1fe4b8..ef271bc 100644 --- a/test/internal.jl +++ b/test/internal.jl @@ -1,13 +1,27 @@ A = rand(3,4) AIP.checksame_inds(A, A) @test_throws DimensionMismatch AIP.checksame_inds(A, A') -@test isa(AIP.storageorder(A), AIP.FirstToLast) +@test isa(@inferred(AIP.storageorder(A)), AIP.FirstToLast) R = reshape(A, (2,3,2)) -@test isa(AIP.storageorder(R), AIP.FirstToLast) +@test isa(@inferred(AIP.storageorder(R)), AIP.FirstToLast) S = sub(A, 1:3, 1:4) R = reshape(S, (2,3,2)) -@test isa(AIP.storageorder(R), AIP.FirstToLast) +@test isa(@inferred(AIP.storageorder(R)), AIP.FirstToLast) B = PermutedDimsArray(A, [2,1]) -@test isa(AIP.storageorder(B), AIP.OtherOrder{(2,1)}) +@test isa(@inferred(AIP.storageorder(B)), AIP.OtherOrder{(2,1)}) R = reshape(B, (2,2,3)) -@test isa(AIP.storageorder(R), AIP.NoOrder) +@test isa(@inferred(AIP.storageorder(R)), AIP.NoOrder) + +@test @inferred(AIP.contiguous_index((:, :))) == AIP.Contiguous() +@test @inferred(AIP.contiguous_index((:, 3))) == AIP.Contiguous() +@test @inferred(AIP.contiguous_index((3, :))) == AIP.NonContiguous() +@test @inferred(AIP.contiguous_index((3, 3))) == AIP.Contiguous() +@test @inferred(AIP.contiguous_index((:, :, 3)) ) == AIP.Contiguous() +@test @inferred(AIP.contiguous_index((:, :, 1:2))) == AIP.Contiguous() +@test @inferred(AIP.contiguous_index((:, 3, :)) ) == AIP.NonContiguous() +@test @inferred(AIP.contiguous_index((:, 3, 1:2))) == AIP.NonContiguous() + +@test isless(CartesianIndex((1,1)), CartesianIndex((2,1))) +@test isless(CartesianIndex((1,1)), CartesianIndex((1,2))) +@test isless(CartesianIndex((2,1)), CartesianIndex((1,2))) +@test !isless(CartesianIndex((1,2)), CartesianIndex((2,1))) diff --git a/test/runtests.jl b/test/runtests.jl index 6912443..43d08bb 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,6 +3,7 @@ using Base.Test using Base.PermutedDimsArrays: PermutedDimsArray const AIP = ArrayIterationPlayground +const CCI = AIP.ContigCartIterator include("array_types.jl") # just for testing From 5cc0800fdf994bef77002235be54d695ecf357a4 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Tue, 26 Apr 2016 13:10:53 -0500 Subject: [PATCH 3/4] Optimize firstlast and CCI iteration --- src/core.jl | 16 ++++++++++++---- src/reshaped.jl | 6 +++--- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/core.jl b/src/core.jl index efa7328..854956f 100644 --- a/src/core.jl +++ b/src/core.jl @@ -193,7 +193,7 @@ _sso(::StorageOrder, ::StorageOrder) = Val{false} # indexes is contiguous if it's one of: # Colon... # Colon..., Union{UnitRange,Int}, Int... -contiguous_index(I) = contiguous_index(Contiguous(), I...) +@inline contiguous_index(I) = contiguous_index(Contiguous(), I...) @inline contiguous_index(c::Contiguous, ::Colon, I...) = contiguous_index(c, I...) @inline contiguous_index(::Contiguous, ::Any, I...) = contiguous_index(MaybeContiguous(), I...) @inline contiguous_index(c::MaybeContiguous, ::Int, I...) = contiguous_index(c, I...) @@ -209,8 +209,16 @@ _contiguous_iterator(W, ::LinearSlow) = CartesianRange(ranges(W)) function firstlast(W) A = parent(W) - psz = size(A) - f = sub2ind(psz, map((i,j)->first(i[j]), inds(A), W.indexes)...) - l = sub2ind(psz, map((i,j)->last(i[j]), inds(A), W.indexes)...) + f = firstlast(first, A, W.indexes) + l = firstlast(last, A, W.indexes) f, l end + +# This effectively implements 2-argument map, but without allocating +# intermediate tuples +@inline firstlast(f, A, indexes) = sub2ind(size(A), _firstlast((), f, A, indexes...)...) +@inline _firstlast(out, f, A) = out +@inline function _firstlast(out, f, A, i1, indexes...) + d = length(out)+1 + _firstlast((out..., f(inds(A, d)[i1])), f, A, indexes...) +end diff --git a/src/reshaped.jl b/src/reshaped.jl index 0c8fe4a..9143f20 100644 --- a/src/reshaped.jl +++ b/src/reshaped.jl @@ -1,12 +1,12 @@ # optimized methods for ReshapedArrays # column iteration -start(iter::ContigCartIterator) = iter.columnrange.start -function next(iter::ContigCartIterator, state) +@inline start(iter::ContigCartIterator) = iter.columnrange.start +@inline function next(iter::ContigCartIterator, state) item, newstate = next(iter.arrayrange, state) ReshapedIndex(item), newstate end -done(iter::ContigCartIterator, state) = isless(iter.columnrange.stop, state) +@inline done(iter::ContigCartIterator, state) = isless(iter.columnrange.stop, state) function _contiguous_iterator{AA<:ReshapedArray}(W::ArrayIndexingWrapper{AA}, ::LinearSlow) fi, li = firstlast(W) From 702e76b83b2acb7e4d7a4085970e62ac06fcc8df Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Tue, 26 Apr 2016 13:39:46 -0500 Subject: [PATCH 4/4] Tweak ReshapedArray speed-test parameters --- test/dense.jl | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/test/dense.jl b/test/dense.jl index 46bfdca..6400b97 100644 --- a/test/dense.jl +++ b/test/dense.jl @@ -235,10 +235,22 @@ function sum_cols_fast!(S, A) S end -A = rand(1000,1,999) -B = sub(A, 1:size(A,1)-1, 1, 1:size(A,3)-1) -R = reshape(B, (size(B,1),size(B,3))) +const can_inline = Base.JLOptions().can_inline == 1 +dim1 = can_inline ? 10^6 : 10 +A = rand(dim1,2,5) +B = sub(A, 1:size(A,1)-1, :, :) +R = reshape(B, (size(B,1),prod(size(B)[2:end]))) @test isa(each(index(R, :, 2)), CCI) S1 = zeros(1,size(R,2)) S2 = similar(S1) @test sum_cols_fast!(S1, R) == sum_cols_slow!(S2, R) + +if can_inline + nfailures = 0 + for i = 1:5 + ts = @elapsed sum_cols_slow!(S2, R) + tf = @elapsed sum_cols_fast!(S2, R) + nfailures += tf > ts + end + @test nfailures <= 1 +end