From cbd77c2dc003346fc3648f9aa17776090b9f535d Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Thu, 5 May 2016 12:43:03 -0400 Subject: [PATCH] simplify mapreduce_impl this is possible now that Functors are not needed for performance fixes #16185 --- base/linalg/dense.jl | 19 --------------- base/reduce.jl | 58 ++++++++++++++++---------------------------- 2 files changed, 21 insertions(+), 56 deletions(-) diff --git a/base/linalg/dense.jl b/base/linalg/dense.jl index 4724d5695acc5..e62731e1b528e 100644 --- a/base/linalg/dense.jl +++ b/base/linalg/dense.jl @@ -36,25 +36,6 @@ isposdef(x::Number) = imag(x)==0 && real(x) > 0 stride1(x::Array) = 1 stride1(x::StridedVector) = stride(x, 1)::Int -import Base: mapreduce_seq_impl - -mapreduce_seq_impl{T<:BlasReal}(::typeof(abs), ::typeof(+), a::Union{Array{T},StridedVector{T}}, ifirst::Int, ilast::Int) = - BLAS.asum(ilast-ifirst+1, pointer(a, ifirst), stride1(a)) - -function mapreduce_seq_impl{T<:BlasReal}(::typeof(abs2), ::typeof(+), a::Union{Array{T},StridedVector{T}}, ifirst::Int, ilast::Int) - n = ilast-ifirst+1 - px = pointer(a, ifirst) - incx = stride1(a) - BLAS.dot(n, px, incx, px, incx) -end - -function mapreduce_seq_impl{T<:BlasComplex}(::typeof(abs2), ::typeof(+), a::Union{Array{T},StridedVector{T}}, ifirst::Int, ilast::Int) - n = ilast-ifirst+1 - px = pointer(a, ifirst) - incx = stride1(a) - real(BLAS.dotc(n, px, incx, px, incx)) -end - function norm{T<:BlasFloat, TI<:Integer}(x::StridedVector{T}, rx::Union{UnitRange{TI},Range{TI}}) if minimum(rx) < 1 || maximum(rx) > length(x) throw(BoundsError(x, rx)) diff --git a/base/reduce.jl b/base/reduce.jl index 6906a75e4c172..f83c86bdb0970 100644 --- a/base/reduce.jl +++ b/base/reduce.jl @@ -93,33 +93,36 @@ foldr(op, itr) = mapfoldr(identity, op, itr) ## reduce & mapreduce -# mapreduce_***_impl require ifirst < ilast -function mapreduce_seq_impl(f, op, A::AbstractArray, ifirst::Int, ilast::Int) - fx1 = r_promote(op, f(A[ifirst])) - fx2 = f(A[ifirst+=1]) - v = op(fx1, fx2) - while ifirst < ilast - @inbounds Ai = A[ifirst+=1] - v = op(v, f(Ai)) - end - return v -end - -function mapreduce_pairwise_impl(f, op, A::AbstractArray, ifirst::Int, ilast::Int, blksize::Int) +function mapreduce_impl(f, op, A::AbstractArray, ifirst::Int, ilast::Int, blksize::Int=pairwise_blocksize(f, op)) if ifirst + blksize > ilast - return mapreduce_seq_impl(f, op, A, ifirst, ilast) + # sequential portion + fx1 = r_promote(op, f(A[ifirst])) + fx2 = r_promote(op, f(A[ifirst + 1])) + v = op(fx1, fx2) + @simd for i = ifirst + 2 : ilast + @inbounds Ai = A[i] + v = op(v, f(Ai)) + end + return v else + # pairwise portion imid = (ifirst + ilast) >>> 1 - v1 = mapreduce_pairwise_impl(f, op, A, ifirst, imid, blksize) - v2 = mapreduce_pairwise_impl(f, op, A, imid+1, ilast, blksize) + v1 = mapreduce_impl(f, op, A, ifirst, imid, blksize) + v2 = mapreduce_impl(f, op, A, imid+1, ilast, blksize) return op(v1, v2) end end mapreduce(f, op, itr) = mapfoldl(f, op, itr) mapreduce(f, op, v0, itr) = mapfoldl(f, op, v0, itr) -mapreduce_impl(f, op, A::AbstractArray, ifirst::Int, ilast::Int) = - mapreduce_pairwise_impl(f, op, A, ifirst, ilast, 1024) + +# Note: sum_seq usually uses four or more accumulators after partial +# unrolling, so each accumulator gets at most 256 numbers +pairwise_blocksize(f, op) = 1024 + +# This combination appears to show a benefit from a larger block size +pairwise_blocksize(::typeof(abs2), ::typeof(+)) = 4096 + # handling empty arrays mr_empty(f, op, T) = throw(ArgumentError("reducing over an empty collection is not allowed")) @@ -228,25 +231,6 @@ mapreduce(f, op::ShortCircuiting, itr::Any) = mapreduce_sc(f,op,itr) ## sum -function mapreduce_seq_impl(f, op::typeof(+), a::AbstractArray, ifirst::Int, ilast::Int) - s = r_promote(op, f(a[ifirst])) + f(a[ifirst+1]) - @simd for i = ifirst+2:ilast - @inbounds ai = a[i] - s += f(ai) - end - s -end - -# Note: sum_seq usually uses four or more accumulators after partial -# unrolling, so each accumulator gets at most 256 numbers -sum_pairwise_blocksize(f) = 1024 - -# This appears to show a benefit from a larger block size -sum_pairwise_blocksize(::typeof(abs2)) = 4096 - -mapreduce_impl(f, op::typeof(+), A::AbstractArray, ifirst::Int, ilast::Int) = - mapreduce_pairwise_impl(f, op, A, ifirst, ilast, sum_pairwise_blocksize(f)) - sum(f::Callable, a) = mapreduce(f, +, a) sum(a) = mapreduce(identity, +, a) sum(a::AbstractArray{Bool}) = countnz(a)