Skip to content

Commit

Permalink
Try working around the performance hit in common cases
Browse files Browse the repository at this point in the history
  • Loading branch information
mbauman committed Jun 13, 2018
1 parent dd88c6a commit bff1c48
Show file tree
Hide file tree
Showing 6 changed files with 106 additions and 22 deletions.
17 changes: 15 additions & 2 deletions base/broadcast.jl
Original file line number Diff line number Diff line change
Expand Up @@ -828,12 +828,25 @@ preprocess_args(dest, args::Tuple{}) = ()
end
end
bc′ = preprocess(dest, bc)
for I in eachindex(bc′)
@inbounds dest[I] = bc′[I]
if _is_simd_safe(dest, bc)
@inbounds @simd for I in eachindex(bc′)
dest[I] = bc′[I]
end
else
@inbounds for I in eachindex(bc′)
dest[I] = bc′[I]
end
end
return dest
end

_is_simd_safe(::Any, ::Any) = false
@inline _is_simd_safe(::Array, bc::Broadcasted) = _args_are_simd_safe(bc)
_args_are_simd_safe() = true
_args_are_simd_safe(::Any, args...) = false
@inline _args_are_simd_safe(::Union{Array, Number}, args...) = _args_are_simd_safe(args...)
@inline _args_are_simd_safe(bc::Broadcasted, args...) = Base.simdable(bc.f) isa Base.SIMDableFunction && _args_are_simd_safe(args...)

# Performance optimization: for BitArray outputs, we cache the result
# in a "small" Vector{Bool}, and then copy in chunks into the output
@inline function copyto!(dest::BitArray, bc::Broadcasted{Nothing})
Expand Down
21 changes: 21 additions & 0 deletions base/multidimensional.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1470,6 +1470,27 @@ end
end
return B
end
# When both arrays are ::Array the SIMD transform is safe
@noinline function extrema!(B::Array, A::Array)
sA = size(A)
sB = size(B)
for I in CartesianIndices(sB)
AI = A[I]
B[I] = (AI, AI)
end
Bmax = CartesianIndex(sB)
@inbounds @simd for I in CartesianIndices(sA)
J = min(Bmax,I)
BJ = B[J]
AI = A[I]
if AI < BJ[1]
B[J] = (AI, BJ[2])
elseif AI > BJ[2]
B[J] = (BJ[1], AI)
end
end
return B
end

# Show for pairs() with Cartesian indicies. Needs to be here rather than show.jl for bootstrap order
function Base.showarg(io::IO, r::Iterators.Pairs{<:Integer, <:Any, <:Any, T}, toplevel) where T <: Union{AbstractVector, Tuple}
Expand Down
35 changes: 27 additions & 8 deletions base/reduce.jl
Original file line number Diff line number Diff line change
Expand Up @@ -187,14 +187,7 @@ foldr(op, itr) = mapfoldr(identity, op, itr)
return mapreduce_first(f, op, a1)
elseif ifirst + blksize > ilast
# sequential portion
@inbounds a1 = A[ifirst]
@inbounds a2 = A[ifirst+1]
v = op(f(a1), f(a2))
for i = ifirst + 2 : ilast
@inbounds ai = A[i]
v = op(v, f(ai))
end
return v
return _mapreduce_impl_loop(simdable(f), simdable(op), A, ifirst, ilast)
else
# pairwise portion
imid = (ifirst + ilast) >> 1
Expand All @@ -204,6 +197,32 @@ foldr(op, itr) = mapfoldr(identity, op, itr)
end
end

# The @simd transformation is only valid in limited situations
struct SIMDableFunction{f}; end
(::SIMDableFunction{f})(args...) where {f} = f(args...)
simdable(f::Union{map(typeof, (+, *, &, |, add_sum, mul_prod, -, /, ^, identity))...}) = SIMDableFunction{f}()
simdable(f) = f
function _mapreduce_impl_loop(f::SIMDableFunction, op::SIMDableFunction, A::Array, ifirst, ilast)
@inbounds a1 = A[ifirst]
@inbounds a2 = A[ifirst+1]
v = op(f(a1), f(a2))
@simd for i = ifirst + 2 : ilast
@inbounds ai = A[i]
v = op(v, f(ai))
end
return v
end
function _mapreduce_impl_loop(f, op, A, ifirst, ilast)
@inbounds a1 = A[ifirst]
@inbounds a2 = A[ifirst+1]
v = op(f(a1), f(a2))
for i = ifirst + 2 : ilast
@inbounds ai = A[i]
v = op(v, f(ai))
end
return v
end

mapreduce_impl(f, op, A::AbstractArray, ifirst::Integer, ilast::Integer) =
mapreduce_impl(f, op, A, ifirst, ilast, pairwise_blocksize(f, op))

Expand Down
51 changes: 41 additions & 10 deletions base/reducedim.jl
Original file line number Diff line number Diff line change
Expand Up @@ -222,24 +222,55 @@ function _mapreducedim!(f, op, R::AbstractArray, A::AbstractArray)
if reducedim1(R, A)
# keep the accumulator as a local variable when reducing along the first dimension
i1 = first(indices1(R))
@inbounds for IA in CartesianIndices(indsAt)
for IA in CartesianIndices(indsAt)
IR = Broadcast.newindex(IA, keep, Idefault)
r = R[i1,IR]
for i in axes(A, 1)
r = op(r, f(A[i, IA]))
end
R[i1,IR] = r
_mapreducedim_loop1!(simdable(f), simdable(op), R, A, IR, IA, i1)
end
else
@inbounds for IA in CartesianIndices(indsAt)
for IA in CartesianIndices(indsAt)
IR = Broadcast.newindex(IA, keep, Idefault)
for i in axes(A, 1)
R[i,IR] = op(R[i,IR], f(A[i,IA]))
end
_mapreducedim_loop!(simdable(f), simdable(op), R, A, IR, IA)
end
end
return R
end

# The innermost loops are split out to allow for @simd in known safe cases
# add a few more simd-safe functions that were not available earlier in bootstrap
simdable(f::Union{map(typeof, (abs, sqrt, log, log10, log2))...}) = SIMDableFunction{f}()
@inline function _mapreducedim_loop1!(f, op, R, A, IR, IA, i1)
@inbounds begin
r = R[i1,IR]
for i in axes(A, 1)
r = op(r, f(A[i, IA]))
end
R[i1,IR] = r
end
return R
end
@inline function _mapreducedim_loop1!(f::SIMDableFunction, op::SIMDableFunction, R::Array, A::Array, IR, IA, i1)
@inbounds begin
r = R[i1,IR]
@simd for i in axes(A, 1)
r = op(r, f(A[i, IA]))
end
R[i1,IR] = r
end
return R
end
@inline function _mapreducedim_loop!(f, op, R, A, IR, IA)
@inbounds for i in axes(A, 1)
R[i,IR] = op(R[i,IR], f(A[i,IA]))
end
return R
end
@inline function _mapreducedim_loop!(f::SIMDableFunction, op::SIMDableFunction, R::Array, A::Array, IR, IA)
@inbounds @simd for i in axes(A, 1)
R[i,IR] = op(R[i,IR], f(A[i,IA]))
end
return R
end


mapreducedim!(f, op, R::AbstractArray, A::AbstractArray) =
(_mapreducedim!(f, op, R, A); R)
Expand Down
2 changes: 1 addition & 1 deletion base/simdloop.jl
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ Annotate a `for` loop to allow the compiler to take extra liberties to allow loo
This feature is experimental and could change or disappear in future versions of Julia.
Incorrect use of the `@simd` macro may cause unexpected results.
The object iterated over in a `@simd for` loop should be a one-dimensional range.
The object iterated over in a `@simd for` loop should be a one-dimensional range or a CartesianIndices iterator.
By using `@simd`, you are asserting several properties of the loop:
* It is safe to execute iterations in arbitrary or overlapping order, with special consideration for reduction variables.
Expand Down
2 changes: 1 addition & 1 deletion doc/src/manual/performance-tips.md
Original file line number Diff line number Diff line change
Expand Up @@ -1144,7 +1144,7 @@ Sometimes you can enable better optimization by promising certain program proper
like allowing floating-point re-associativity and ignoring dependent memory accesses. Again, be very
careful when asserting `@simd` as erroneously annotating a loop with dependent iterations may result
in unexpected results. In particular, note that `setindex!` on some `AbstractArray` subtypes is
enherently dependent upon iteration order. **This feature is experimental**
inherently dependent upon iteration order. **This feature is experimental**
and could change or disappear in future versions of Julia.

The common idiom of using 1:n to index into an AbstractArray is not safe if the Array uses unconventional indexing,
Expand Down

0 comments on commit bff1c48

Please sign in to comment.