Skip to content

Commit

Permalink
Merge pull request #2 from N5N3/revert-1-patch__
Browse files Browse the repository at this point in the history
Focus on copyto_unalias!, put SimdLoop in advance.
  • Loading branch information
N5N3 authored Jul 5, 2021
2 parents 17fe447 + 43d3581 commit 742c97a
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 36 deletions.
10 changes: 5 additions & 5 deletions base/Base.jl
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,11 @@ include("refpointer.jl")
include("checked.jl")
using .Checked

# SIMD loops
@pure sizeof(s::String) = Core.sizeof(s) # needed by gensym as called from simdloop
include("simdloop.jl")
using .SimdLoop

# array structures
include("indices.jl")
include("array.jl")
Expand Down Expand Up @@ -172,11 +177,6 @@ using .MultiplicativeInverses
include("abstractarraymath.jl")
include("arraymath.jl")

# SIMD loops
@pure sizeof(s::String) = Core.sizeof(s) # needed by gensym as called from simdloop
include("simdloop.jl")
using .SimdLoop

# map-reduce operators
include("reduce.jl")

Expand Down
44 changes: 21 additions & 23 deletions base/abstractarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1009,38 +1009,36 @@ end

function copyto_unaliased!(deststyle::IndexStyle, dest::AbstractArray, srcstyle::IndexStyle, src::AbstractArray)
isempty(src) && return dest
destinds, srcinds = LinearIndices(dest), LinearIndices(src)
idf, isf = first(destinds), first(srcinds)
Δi = idf - isf
(checkbounds(Bool, destinds, isf+Δi) & checkbounds(Bool, destinds, last(srcinds)+Δi)) ||
throw(BoundsError(dest, srcinds))
length(dest) >= length(src) || throw(BoundsError(dest, LinearIndices(src)))
if deststyle isa IndexLinear
if srcstyle isa IndexLinear
# Single-index implementation
@inbounds for i in srcinds
dest[i + Δi] = src[i]
Δi = firstindex(dest) - firstindex(src)
for i in eachindex(src)
@inbounds dest[i + Δi] = src[i]
end
else
# Dual-index implementation
i = idf - 1
@inbounds for a in src
dest[i+=1] = a
j = firstindex(dest) - 1
@inbounds @simd for I in eachindex(src)
dest[j+=1] = src[I]
end
end
else
iterdest, itersrc = eachindex(dest), eachindex(src)
if iterdest == itersrc
# Shared-iterator implementation
for I in iterdest
@inbounds dest[I] = src[I]
if srcstyle isa IndexLinear
i = firstindex(src) - 1
@inbounds @simd for J in eachindex(dest)
dest[J] = src[i+=1]
end
else
# Dual-iterator implementation
ret = iterate(iterdest)
@inbounds for a in src
idx, state = ret
dest[idx] = a
ret = iterate(iterdest, state)
iterdest, itersrc = eachindex(dest), eachindex(src)
if iterdest == itersrc
# Shared-iterator implementation
@inbounds @simd for I in itersrc
dest[I] = src[I]
end
else
for (I,J) in zip(itersrc, iterdest)
@inbounds dest[J] = src[I]
end
end
end
end
Expand Down
10 changes: 2 additions & 8 deletions base/broadcast.jl
Original file line number Diff line number Diff line change
Expand Up @@ -991,17 +991,11 @@ preprocess_args(dest, args::Tuple{}) = ()
# Specialize this method if all you want to do is specialize on typeof(dest)
@inline function copyto!(dest::AbstractArray, bc::Broadcasted{Nothing})
axes(dest) == axes(bc) || throwdm(axes(dest), axes(bc))
# Performance optimization: broadcast!(identity, dest, A) is equivalent to copyto!(dest, A) if indices match.
# However copyto!(dest, A) is very slow in many cases, implement a faster version here.
# Performance optimization: broadcast!(identity, dest, A) is equivalent to copyto!(dest, A) if indices match
if bc.f === identity && bc.args isa Tuple{AbstractArray} # only a single input argument to broadcast!
A = bc.args[1]
if axes(dest) == axes(A)
A′ = broadcast_unalias(dest, A)
iter = IndexStyle(dest) isa IndexCartesian ? dest : A′
@inbounds @simd for I in eachindex(iter)
dest[I] = A′[I]
end
return dest
return copyto!(dest, A)
end
end
bc′ = preprocess(dest, bc)
Expand Down

0 comments on commit 742c97a

Please sign in to comment.