diff --git a/base/broadcast.jl b/base/broadcast.jl index 8e29f437c4c28..0be1863bfd15f 100644 --- a/base/broadcast.jl +++ b/base/broadcast.jl @@ -239,38 +239,48 @@ broadcast!_function(f::Function) = (B, As...) -> broadcast!(f, B, As...) broadcast_function(f::Function) = (As...) -> broadcast(f, As...) broadcast_getindex(src::AbstractArray, I::AbstractArray...) = broadcast_getindex!(Array(eltype(src), broadcast_shape(I...)), src, I...) -@ngenerate N typeof(dest) function broadcast_getindex!(dest::AbstractArray, src::AbstractArray, I::NTuple{N, AbstractArray}...) - check_broadcast_shape(size(dest), I...) # unnecessary if this function is never called directly - checkbounds(src, I...) - @nloops N i dest d->(@nexprs N k->(j_d_k = size(I_k, d) == 1 ? 1 : i_d)) begin - @nexprs N k->(@inbounds J_k = @nref N I_k d->j_d_k) - @inbounds (@nref N dest i) = (@nref N src J) +stagedfunction broadcast_getindex!(dest::AbstractArray, src::AbstractArray, I::AbstractArray...) + N = length(I) + Isplat = Expr[:(I[$d]) for d = 1:N] + quote + @nexprs $N d->(I_d = I[d]) + check_broadcast_shape(size(dest), $(Isplat...)) # unnecessary if this function is never called directly + checkbounds(src, $(Isplat...)) + @nloops $N i dest d->(@nexprs $N k->(j_d_k = size(I_k, d) == 1 ? 1 : i_d)) begin + @nexprs $N k->(@inbounds J_k = @nref $N I_k d->j_d_k) + @inbounds (@nref $N dest i) = (@nref $N src J) + end + dest end - dest end -@ngenerate N typeof(A) function broadcast_setindex!(A::AbstractArray, x, I::NTuple{N, AbstractArray}...) - checkbounds(A, I...) - shape = broadcast_shape(I...) - @nextract N shape d->(length(shape) < d ? 1 : shape[d]) - if !isa(x, AbstractArray) - @nloops N i d->(1:shape_d) d->(@nexprs N k->(j_d_k = size(I_k, d) == 1 ? 1 : i_d)) begin - @nexprs N k->(@inbounds J_k = @nref N I_k d->j_d_k) - @inbounds (@nref N A J) = x - end - else - X = x - # To call setindex_shape_check, we need to create fake 1-d indexes of the proper size - @nexprs N d->(fakeI_d = 1:shape_d) - Base.setindex_shape_check(X, (@ntuple N fakeI)...) - k = 1 - @nloops N i d->(1:shape_d) d->(@nexprs N k->(j_d_k = size(I_k, d) == 1 ? 1 : i_d)) begin - @nexprs N k->(@inbounds J_k = @nref N I_k d->j_d_k) - @inbounds (@nref N A J) = X[k] - k += 1 +stagedfunction broadcast_setindex!(A::AbstractArray, x, I::AbstractArray...) + N = length(I) + Isplat = Expr[:(I[$d]) for d = 1:N] + quote + @nexprs $N d->(I_d = I[d]) + checkbounds(A, $(Isplat...)) + shape = broadcast_shape($(Isplat...)) + @nextract $N shape d->(length(shape) < d ? 1 : shape[d]) + if !isa(x, AbstractArray) + @nloops $N i d->(1:shape_d) d->(@nexprs $N k->(j_d_k = size(I_k, d) == 1 ? 1 : i_d)) begin + @nexprs $N k->(@inbounds J_k = @nref $N I_k d->j_d_k) + @inbounds (@nref $N A J) = x + end + else + X = x + # To call setindex_shape_check, we need to create fake 1-d indexes of the proper size + @nexprs $N d->(fakeI_d = 1:shape_d) + Base.setindex_shape_check(X, (@ntuple $N fakeI)...) + k = 1 + @nloops $N i d->(1:shape_d) d->(@nexprs $N k->(j_d_k = size(I_k, d) == 1 ? 1 : i_d)) begin + @nexprs $N k->(@inbounds J_k = @nref $N I_k d->j_d_k) + @inbounds (@nref $N A J) = X[k] + k += 1 + end end + A end - A end ## elementwise operators ## diff --git a/base/cartesian.jl b/base/cartesian.jl index 1517ed5f8c368..892f41d8bef73 100644 --- a/base/cartesian.jl +++ b/base/cartesian.jl @@ -1,275 +1,6 @@ module Cartesian -export @ngenerate, @nsplat, @nloops, @nref, @ncall, @nexprs, @nextract, @nall, @ntuple, @nif, ngenerate - -const CARTESIAN_DIMS = 4 - -### @ngenerate, for auto-generation of separate versions of functions for different dimensionalities -# Examples (deliberately trivial): -# @ngenerate N returntype myndims{T,N}(A::Array{T,N}) = N -# or alternatively -# function gen_body(N::Int) -# quote -# return $N -# end -# end -# eval(ngenerate(:N, returntypeexpr, :(myndims{T,N}(A::Array{T,N})), gen_body)) -# The latter allows you to use a single gen_body function for both ngenerate and -# when your function maintains its own method cache (e.g., reduction or broadcasting). -# -# Special syntax for function prototypes: -# @ngenerate N returntype function myfunction(A::AbstractArray, I::NTuple{N, Int}...) -# for N = 3 translates to -# function myfunction(A::AbstractArray, I_1::Int, I_2::Int, I_3::Int) -# and for the generic (cached) case as -# function myfunction(A::AbstractArray, I::Int...) -# @nextract N I I -# with N = length(I). N should _not_ be listed as a parameter of the function unless -# earlier arguments use it that way. -# To avoid ambiguity, it would be preferable to have some specific syntax for this, such as -# myfunction(A::AbstractArray, I::Int...N) -# where N can be an integer or symbol. Currently T...N generates a parser error. -macro ngenerate(itersym, returntypeexpr, funcexpr) - if isa(funcexpr, Expr) && funcexpr.head == :macrocall && funcexpr.args[1] == symbol("@inline") - funcexpr = Base._inline(funcexpr.args[2]) - end - isfuncexpr(funcexpr) || throw(ArgumentError("requires a function expression")) - esc(ngenerate(itersym, returntypeexpr, funcexpr.args[1], N->sreplace!(copy(funcexpr.args[2]), itersym, N))) -end - -# @nsplat takes an expression like -# @nsplat N 2:3 myfunction(A, I::NTuple{N,Real}...) = getindex(A, I...) -# and generates -# myfunction(A, I_1::Real, I_2::Real) = getindex(A, I_1, I_2) -# myfunction(A, I_1::Real, I_2::Real, I_3::Real) = getindex(A, I_1, I_2, I_3) -# myfunction(A, I::Real...) = getindex(A, I...) -# An @nsplat function _cannot_ have any other Cartesian macros in it. -# If you omit the range, it uses 1:CARTESIAN_DIMS. -macro nsplat(itersym, args...) - local rng - if length(args) == 1 - rng = 1:CARTESIAN_DIMS - funcexpr = args[1] - elseif length(args) == 2 - rangeexpr = args[1] - funcexpr = args[2] - if !isa(rangeexpr, Expr) || rangeexpr.head != :(:) || length(rangeexpr.args) != 2 - throw(ArgumentError("first argument must be a from:to expression")) - end - rng = rangeexpr.args[1]:rangeexpr.args[2] - else - throw(ArgumentError("wrong number of arguments")) - end - if isa(funcexpr, Expr) && funcexpr.head == :macrocall && funcexpr.args[1] == symbol("@inline") - funcexpr = Base._inline(funcexpr.args[2]) - end - isfuncexpr(funcexpr) || throw(ArgumentError("second argument must be a function expression")) - prototype = funcexpr.args[1] - body = funcexpr.args[2] - varname, T = get_splatinfo(prototype, itersym) - isempty(varname) && throw(ArgumentError("last argument must be a splat")) - explicit = [Expr(:function, resolvesplat!(copy(prototype), varname, T, N), - resolvesplats!(copy(body), varname, N)) for N in rng] - protosplat = resolvesplat!(copy(prototype), varname, T, 0) - protosplat.args[end] = Expr(:..., protosplat.args[end]) - splat = Expr(:function, protosplat, body) - esc(Expr(:block, explicit..., splat)) -end - -generate1(itersym, prototype, bodyfunc, N::Int, varname, T) = - Expr(:function, spliceint!(sreplace!(resolvesplat!(copy(prototype), varname, T, N), itersym, N)), - resolvesplats!(bodyfunc(N), varname, N)) - -function ngenerate(itersym, returntypeexpr, prototype, bodyfunc, dims=1:CARTESIAN_DIMS, makecached::Bool = true) - varname, T = get_splatinfo(prototype, itersym) - # Generate versions for specific dimensions - fdim = [generate1(itersym, prototype, bodyfunc, N, varname, T) for N in dims] - if !makecached - return Expr(:block, fdim...) - end - # Generate the generic cache-based version - if isempty(varname) - setitersym, extractvarargs = :(), N -> nothing - else - s = symbol(varname) - setitersym = hasparameter(prototype, itersym) ? (:(@assert $itersym == length($s))) : (:($itersym = length($s))) - extractvarargs = N -> Expr(:block, map(popescape, _nextract(N, s, s).args)...) - end - fsym = funcsym(prototype) - dictname = symbol(fsym,"_cache") - fargs = funcargs(prototype) - if !isempty(varname) - fargs[end] = Expr(:..., fargs[end].args[1]) - end - flocal = funcrename(copy(prototype), :_F_) - F = Expr(:function, resolvesplat!(prototype, varname, T), quote - $setitersym - if !haskey($dictname, $itersym) - gen1 = Base.Cartesian.generate1($(symbol(itersym)), $(Expr(:quote, flocal)), $bodyfunc, $itersym, $varname, $T) - $(dictname)[$itersym] = eval(quote - local _F_ - $gen1 - _F_ - end) - end - ($(dictname)[$itersym]($(fargs...)))::$returntypeexpr - end) - Expr(:block, fdim..., quote - let $dictname = Dict{Int,Function}() - $F - end - end) -end - -isfuncexpr(ex::Expr) = - ex.head == :function || (ex.head == :(=) && typeof(ex.args[1]) == Expr && ex.args[1].head == :call) -isfuncexpr(arg) = false - -sreplace!(arg, sym, val) = arg -function sreplace!(ex::Expr, sym, val) - for i = 1:length(ex.args) - ex.args[i] = sreplace!(ex.args[i], sym, val) - end - ex -end -sreplace!(s::Symbol, sym, val) = s == sym ? val : s - -# If using the syntax that will need "desplatting", -# myfunction(A::AbstractArray, I::NTuple{N, Int}...) -# return the variable name (as a string) and type -function get_splatinfo(ex::Expr, itersym::Symbol) - if ex.head == :call - a = ex.args[end] - if isa(a, Expr) && a.head == :... && length(a.args) == 1 - b = a.args[1] - if isa(b, Expr) && b.head == :(::) - varname = string(b.args[1]) - c = b.args[2] - if isa(c, Expr) && c.head == :curly && c.args[1] == :NTuple && c.args[2] == itersym - T = c.args[3] - return varname, T - end - end - end - end - "", Void -end - -# Replace splatted with desplatted for a specific number of arguments -function resolvesplat!(prototype, varname, T::Union(Type,Symbol,Expr), N::Int) - if !isempty(varname) - prototype.args[end] = N > 0 ? Expr(:(::), symbol(varname, "_1"), T) : - Expr(:(::), symbol(varname), T) - for i = 2:N - push!(prototype.args, Expr(:(::), symbol(varname, "_", i), T)) - end - end - prototype -end - -# Return the generic splatting form, e.g., -# myfunction(A::AbstractArray, I::Int...) -function resolvesplat!(prototype, varname, T::Union(Type,Symbol,Expr)) - if !isempty(varname) - svarname = symbol(varname) - prototype.args[end] = Expr(:..., :($svarname::$T)) - end - prototype -end - -# Desplatting function calls: replace func(a, b, I...) with func(a, b, I_1, I_2, I_3) -resolvesplats!(arg, varname, N) = arg -function resolvesplats!(ex::Expr, varname, N::Int) - if ex.head == :call - for i = 2:length(ex.args)-1 - resolvesplats!(ex.args[i], varname, N) - end - a = ex.args[end] - if isa(a, Expr) && a.head == :... && a.args[1] == symbol(varname) - ex.args[end] = symbol(varname, "_1") - for i = 2:N - push!(ex.args, symbol(varname, "_", i)) - end - else - resolvesplats!(a, varname, N) - end - else - for i = 1:length(ex.args) - resolvesplats!(ex.args[i], varname, N) - end - end - ex -end - -# Remove any function parameters that are integers -function spliceint!(ex::Expr) - if ex.head == :escape - return esc(spliceint!(ex.args[1])) - end - ex.head == :call || throw(ArgumentError("$ex must be a call")) - if isa(ex.args[1], Expr) && ex.args[1].head == :curly - args = ex.args[1].args - for i = length(args):-1:1 - if isa(args[i], Int) - deleteat!(args, i) - end - end - end - ex -end - -function popescape(ex::Expr) - while ex.head == :escape - ex = ex.args[1] - end - ex -end - -# Extract the "function name" -function funcsym(prototype::Expr) - prototype = popescape(prototype) - prototype.head == :call || throw(ArgumentError("$prototype must be a call")) - tmp = prototype.args[1] - if isa(tmp, Expr) && tmp.head == :curly - tmp = tmp.args[1] - end - return tmp -end - -function funcrename(prototype::Expr, name::Symbol) - prototype = popescape(prototype) - prototype.head == :call || throw(ArgumentError("$prototype must be a call")) - tmp = prototype.args[1] - if isa(tmp, Expr) && tmp.head == :curly - tmp.args[1] = name - else - prototype.args[1] = name - end - return prototype -end - -function hasparameter(prototype::Expr, sym::Symbol) - prototype = popescape(prototype) - prototype.head == :call || throw(ArgumentError("$prototype must be a call")) - tmp = prototype.args[1] - if isa(tmp, Expr) && tmp.head == :curly - for i = 2:length(tmp.args) - if tmp.args[i] == sym - return true - end - end - end - false -end - -# Extract the symbols of the function arguments -funcarg(s::Symbol) = s -funcarg(ex::Expr) = ex.args[1] -function funcargs(prototype::Expr) - prototype = popescape(prototype) - prototype.head == :call || throw(ArgumentError("$prototype must be a call")) - map(a->funcarg(a), prototype.args[2:end]) -end +export @nloops, @nref, @ncall, @nexprs, @nextract, @nall, @ntuple, @nif ### Cartesian-specific macros diff --git a/base/multidimensional.jl b/base/multidimensional.jl index 77a99f8ad33e1..4cf3872b5996d 100644 --- a/base/multidimensional.jl +++ b/base/multidimensional.jl @@ -181,9 +181,12 @@ using .IteratorsMD ### From array.jl -@ngenerate N Void function checksize(A::AbstractArray, I::NTuple{N, Any}...) - @nexprs N d->(size(A, d) == length(I_d) || throw(DimensionMismatch("index $d has length $(length(I_d)), but size(A, $d) = $(size(A,d))"))) - nothing +stagedfunction checksize(A::AbstractArray, I...) + N = length(I) + quote + @nexprs $N d->(size(A, d) == length(I[d]) || throw(DimensionMismatch("index $d has length $(length(I[d])), but size(A, $d) = $(size(A,d))"))) + nothing + end end @inline unsafe_getindex(v::BitArray, ind::Int) = Base.unsafe_bitgetindex(v.chunks, ind) @@ -194,28 +197,36 @@ end @inline unsafe_setindex!{T}(v::AbstractArray{T}, x::T, ind::Real) = unsafe_setindex!(v, x, to_index(ind)) # Version that uses cartesian indexing for src -@ngenerate N typeof(dest) function _getindex!(dest::Array, src::AbstractArray, I::NTuple{N,Union(Int,AbstractVector)}...) - checksize(dest, I...) - k = 1 - @nloops N i dest d->(@inbounds j_d = unsafe_getindex(I_d, i_d)) begin - @inbounds dest[k] = (@nref N src j) - k += 1 +stagedfunction _getindex!(dest::Array, src::AbstractArray, I::Union(Int,AbstractVector)...) + N = length(I) + Isplat = Expr[:(I[$d]) for d = 1:N] + quote + checksize(dest, $(Isplat...)) + k = 1 + @nloops $N i dest d->(@inbounds j_d = unsafe_getindex(I[d], i_d)) begin + @inbounds dest[k] = (@nref $N src j) + k += 1 + end + dest end - dest end # Version that uses linear indexing for src -@ngenerate N typeof(dest) function _getindex!(dest::Array, src::Array, I::NTuple{N,Union(Int,AbstractVector)}...) - checksize(dest, I...) - stride_1 = 1 - @nexprs N d->(stride_{d+1} = stride_d*size(src,d)) - @nexprs N d->(offset_d = 1) # only really need offset_$N = 1 - k = 1 - @nloops N i dest d->(@inbounds offset_{d-1} = offset_d + (unsafe_getindex(I_d, i_d)-1)*stride_d) begin - @inbounds dest[k] = src[offset_0] - k += 1 +stagedfunction _getindex!(dest::Array, src::Array, I::Union(Int,AbstractVector)...) + N = length(I) + Isplat = Expr[:(I[$d]) for d = 1:N] + quote + checksize(dest, $(Isplat...)) + stride_1 = 1 + @nexprs $N d->(stride_{d+1} = stride_d*size(src,d)) + @nexprs $N d->(offset_d = 1) # only really need offset_$N = 1 + k = 1 + @nloops $N i dest d->(@inbounds offset_{d-1} = offset_d + (unsafe_getindex(I[d], i_d)-1)*stride_d) begin + @inbounds dest[k] = src[offset_0] + k += 1 + end + dest end - dest end # It's most efficient to call checkbounds first, then to_index, and finally @@ -223,53 +234,70 @@ end _getindex(A, I::(Union(Int,AbstractVector)...)) = _getindex!(similar(A, index_shape(I...)), A, I...) -@nsplat N function getindex(A::Array, I::NTuple{N,Union(Real,AbstractVector)}...) - checkbounds(A, I...) - _getindex(A, to_index(I...)) +# The stagedfunction here is just to work around the performance hit +# of splatting +stagedfunction getindex(A::Array, I::Union(Real,AbstractVector)...) + N = length(I) + Isplat = Expr[:(I[$d]) for d = 1:N] + quote + checkbounds(A, $(Isplat...)) + _getindex(A, to_index($(Isplat...))) + end end # Also a safe version of getindex! -@nsplat N function getindex!(dest, src, I::NTuple{N,Union(Real,AbstractVector)}...) - checkbounds(src, I...) - _getindex!(dest, src, to_index(I...)...) +stagedfunction getindex!(dest, src, I::Union(Real,AbstractVector)...) + N = length(I) + Isplat = Expr[:(I[$d]) for d = 1:N] + Jsplat = Expr[:(to_index(I[$d])) for d = 1:N] + quote + checkbounds(src, $(Isplat...)) + _getindex!(dest, src, $(Jsplat...)) + end end -@ngenerate N typeof(A) function setindex!(A::Array, x, J::NTuple{N,Union(Real,AbstractArray)}...) - @ncall N checkbounds A J - @nexprs N d->(I_d = to_index(J_d)) - stride_1 = 1 - @nexprs N d->(stride_{d+1} = stride_d*size(A,d)) - @nexprs N d->(offset_d = 1) # really only need offset_$N = 1 - if !isa(x, AbstractArray) - @nloops N i d->(1:length(I_d)) d->(@inbounds offset_{d-1} = offset_d + (unsafe_getindex(I_d, i_d)-1)*stride_d) begin - @inbounds A[offset_0] = x - end - else - X = x - @ncall N setindex_shape_check X I - # TODO? A variant that can use cartesian indexing for RHS - k = 1 - @nloops N i d->(1:length(I_d)) d->(@inbounds offset_{d-1} = offset_d + (unsafe_getindex(I_d, i_d)-1)*stride_d) begin - @inbounds A[offset_0] = X[k] +stagedfunction setindex!(A::Array, x, J::Union(Real,AbstractArray)...) + N = length(J) + quote + @nexprs $N d->(J_d = J[d]) + @ncall $N checkbounds A J + @nexprs $N d->(I_d = to_index(J_d)) + stride_1 = 1 + @nexprs $N d->(stride_{d+1} = stride_d*size(A,d)) + @nexprs $N d->(offset_d = 1) # really only need offset_$N = 1 + if !isa(x, AbstractArray) + @nloops $N i d->(1:length(I_d)) d->(@inbounds offset_{d-1} = offset_d + (unsafe_getindex(I_d, i_d)-1)*stride_d) begin + @inbounds A[offset_0] = x + end + else + X = x + @ncall $N setindex_shape_check X I + # TODO? A variant that can use cartesian indexing for RHS + k = 1 + @nloops $N i d->(1:length(I_d)) d->(@inbounds offset_{d-1} = offset_d + (unsafe_getindex(I_d, i_d)-1)*stride_d) begin + @inbounds A[offset_0] = X[k] k += 1 + end end + A end - A end -@ngenerate N NTuple{N,Vector{Int}} function findn{T,N}(A::AbstractArray{T,N}) - nnzA = countnz(A) - @nexprs N d->(I_d = Array(Int, nnzA)) - k = 1 - @nloops N i A begin - @inbounds if (@nref N A i) != zero(T) - @nexprs N d->(I_d[k] = i_d) - k += 1 +stagedfunction findn{T,N}(A::AbstractArray{T,N}) + quote + nnzA = countnz(A) + @nexprs $N d->(I_d = Array(Int, nnzA)) + k = 1 + @nloops $N i A begin + @inbounds if (@nref $N A i) != zero(T) + @nexprs $N d->(I_d[k] = i_d) + k += 1 + end end + @ntuple $N I end - @ntuple N I end @@ -386,57 +414,74 @@ end cumsum(A::AbstractArray, axis::Integer=1) = cumsum!(similar(A, Base._cumsum_type(A)), A, axis) +cumsum!(B, A::AbstractArray) = cumsum!(B, A, 1) cumprod(A::AbstractArray, axis::Integer=1) = cumprod!(similar(A), A, axis) +cumprod!(B, A) = cumprod!(B, A, 1) for (f, op) in ((:cumsum!, :+), (:cumprod!, :*)) @eval begin - @ngenerate N typeof(B) function ($f){T,N}(B, A::AbstractArray{T,N}, axis::Integer=1) - if size(B, axis) < 1 - return B - end - size(B) == size(A) || throw(DimensionMismatch("size of B must match A")) - if axis == 1 - # We can accumulate to a temporary variable, which allows register usage and will be slightly faster - @inbounds @nloops N i d->(d > 1 ? (1:size(A,d)) : (1:1)) begin - tmp = convert(eltype(B), @nref(N, A, i)) - @nref(N, B, i) = tmp - for i_1 = 2:size(A,1) - tmp = ($op)(tmp, @nref(N, A, i)) - @nref(N, B, i) = tmp - end + stagedfunction ($f){T,N}(B, A::AbstractArray{T,N}, axis::Integer) + quote + if size(B, axis) < 1 + return B end - else - @nexprs N d->(isaxis_d = axis == d) - # Copy the initial element in each 1d vector along dimension `axis` - @inbounds @nloops N i d->(d == axis ? (1:1) : (1:size(A,d))) @nref(N, B, i) = @nref(N, A, i) - # Accumulate - @inbounds @nloops N i d->((1+isaxis_d):size(A, d)) d->(j_d = i_d - isaxis_d) begin - @nref(N, B, i) = ($op)(@nref(N, B, j), @nref(N, A, i)) + size(B) == size(A) || throw(DimensionMismatch("Size of B must match A")) + if axis == 1 + # We can accumulate to a temporary variable, which allows register usage and will be slightly faster + @inbounds @nloops $N i d->(d > 1 ? (1:size(A,d)) : (1:1)) begin + tmp = convert(eltype(B), @nref($N, A, i)) + @nref($N, B, i) = tmp + for i_1 = 2:size(A,1) + tmp = ($($op))(tmp, @nref($N, A, i)) + @nref($N, B, i) = tmp + end + end + else + @nexprs $N d->(isaxis_d = axis == d) + # Copy the initial element in each 1d vector along dimension `axis` + @inbounds @nloops $N i d->(d == axis ? (1:1) : (1:size(A,d))) @nref($N, B, i) = @nref($N, A, i) + # Accumulate + @inbounds @nloops $N i d->((1+isaxis_d):size(A, d)) d->(j_d = i_d - isaxis_d) begin + @nref($N, B, i) = ($($op))(@nref($N, B, j), @nref($N, A, i)) + end end + B end - B end end end ### from abstractarray.jl -@ngenerate N typeof(A) function fill!{T,N}(A::AbstractArray{T,N}, x) +function fill!{T}(A::AbstractArray{T}, x) xT = convert(T, x) - @nloops N i A begin - @inbounds (@nref N A i) = xT + for I in eachindex(A) + @inbounds A[I] = xT end A end -@ngenerate N typeof(dest) function copy!{T,N}(dest::AbstractArray{T,N}, src::AbstractArray{T,N}) - if @nall N d->(size(dest,d) == size(src,d)) - @nloops N i dest begin - @inbounds (@nref N dest i) = (@nref N src i) +function copy!{T,N}(dest::AbstractArray{T,N}, src::AbstractArray{T,N}) + samesize = true + for d = 1:N + if size(dest,d) != size(src,d) + samesize = false + break + end + end + if samesize + for I in eachindex(dest) + @inbounds dest[I] = src[I] end else - invoke(copy!, (typeof(dest), Any), dest, src) + length(dest) >= length(src) || throw(BoundsError()) + iterdest = eachindex(dest) + sdest = start(iterdest) + for Isrc in eachindex(src) + Idest, sdest = next(iterdest, sdest) + @inbounds dest[Idest] = src[Isrc] + end end dest end @@ -449,27 +494,33 @@ end # (uses linear indexing, which is defined in bitarray.jl) # (code is duplicated for safe and unsafe versions for performance reasons) -@ngenerate N Bool function unsafe_getindex(B::BitArray, I_0::Int, I::NTuple{N,Int}...) - stride = 1 - index = I_0 - @nexprs N d->begin - stride *= size(B,d) - index += (I_d - 1) * stride +stagedfunction unsafe_getindex(B::BitArray, I_0::Int, I::Int...) + N = length(I) + quote + stride = 1 + index = I_0 + @nexprs $N d->begin + stride *= size(B,d) + index += (I[d] - 1) * stride + end + return unsafe_getindex(B, index) end - return unsafe_getindex(B, index) end -@ngenerate N Bool function getindex(B::BitArray, I_0::Int, I::NTuple{N,Int}...) - stride = 1 - index = I_0 - @nexprs N d->begin - l = size(B,d) - stride *= l - 1 <= I_{d-1} <= l || throw(BoundsError(B, tuple(I_0, @ntuple(N,I)...))) - index += (I_d - 1) * stride +stagedfunction getindex(B::BitArray, I_0::Int, I::Int...) + N = length(I) + quote + stride = 1 + index = I_0 + @nexprs $N d->(I_d = I[d]) + @nexprs $N d->begin + l = size(B,d) + stride *= l + 1 <= I_{d-1} <= l || throw(BoundsError()) + index += (I_d - 1) * stride + end + return B[index] end - 1 <= index <= length(B) || throw(BoundsError(B, tuple(I_0, @ntuple(N,I)...))) - return unsafe_getindex(B, index) end # contiguous multidimensional indexing: if the first dimension is a range, @@ -488,53 +539,73 @@ end getindex{T<:Real}(B::BitArray, I0::UnitRange{T}) = getindex(B, to_index(I0)) -@ngenerate N BitArray{length(index_shape(I0, I...))} function unsafe_getindex(B::BitArray, I0::UnitRange{Int}, I::NTuple{N,Union(Int,UnitRange{Int})}...) - X = BitArray(index_shape(I0, I...)) - - f0 = first(I0) - l0 = length(I0) +stagedfunction unsafe_getindex(B::BitArray, I0::UnitRange{Int}, I::Union(Int,UnitRange{Int})...) + N = length(I) + Isplat = Expr[:(I[$d]) for d = 1:N] + quote + @nexprs $N d->(I_d = I[d]) + X = BitArray(index_shape(I0, $(Isplat...))) + + f0 = first(I0) + l0 = length(I0) + + gap_lst_1 = 0 + @nexprs $N d->(gap_lst_{d+1} = length(I_d)) + stride = 1 + ind = f0 + @nexprs $N d->begin + stride *= size(B, d) + stride_lst_d = stride + ind += stride * (first(I_d) - 1) + gap_lst_{d+1} *= stride + end - gap_lst_1 = 0 - @nexprs N d->(gap_lst_{d+1} = length(I_d)) - stride = 1 - ind = f0 - @nexprs N d->begin - stride *= size(B, d) - stride_lst_d = stride - ind += stride * (first(I_d) - 1) - gap_lst_{d+1} *= stride - end - - storeind = 1 - @nloops(N, i, d->I_d, - d->nothing, # PRE - d->(ind += stride_lst_d - gap_lst_d), # POST + storeind = 1 + @nloops($N, i, d->I_d, + d->nothing, # PRE + d->(ind += stride_lst_d - gap_lst_d), # POST begin # BODY copy_chunks!(X.chunks, storeind, B.chunks, ind, l0) storeind += l0 - end) - return X + end) + return X + end end # general multidimensional non-scalar indexing -@ngenerate N BitArray{length(index_shape(I...))} function unsafe_getindex(B::BitArray, I::NTuple{N,Union(Int,AbstractVector{Int})}...) - X = BitArray(index_shape(I...)) - Xc = X.chunks +stagedfunction unsafe_getindex(B::BitArray, I::Union(Int,AbstractVector{Int})...) + N = length(I) + Isplat = Expr[:(I[$d]) for d = 1:N] + quote + @nexprs $N d->(I_d = I[d]) + X = BitArray(index_shape($(Isplat...))) + Xc = X.chunks - ind = 1 - @nloops N i d->I_d begin - unsafe_bitsetindex!(Xc, (@ncall N unsafe_getindex B i), ind) - ind += 1 + stride_1 = 1 + @nexprs $N d->(stride_{d+1} = stride_d * size(B, d)) + @nexprs 1 d->(offset_{$N} = 1) + ind = 1 + @nloops($N, i, d->I_d, + d->(offset_{d-1} = offset_d + (i_d-1)*stride_d), # PRE + begin + unsafe_bitsetindex!(Xc, B[offset_0], ind) + ind += 1 + end) + return X end - return X end # general version with Real (or logical) indexing which dispatches on the appropriate method -@ngenerate N BitArray{length(index_shape(I...))} function getindex(B::BitArray, I::NTuple{N,Union(Real,AbstractVector)}...) - checkbounds(B, I...) - return unsafe_getindex(B, to_index(I...)...) +stagedfunction getindex(B::BitArray, I::Union(Real,AbstractVector)...) + N = length(I) + Isplat = Expr[:(I[$d]) for d = 1:N] + Jsplat = Expr[:(to_index(I[$d])) for d = 1:N] + quote + checkbounds(B, $(Isplat...)) + return unsafe_getindex(B, $(Jsplat...)) + end end ## setindex! @@ -544,29 +615,35 @@ end # bounds check and is defined in bitarray.jl) # (code is duplicated for safe and unsafe versions for performance reasons) -@ngenerate N typeof(B) function unsafe_setindex!(B::BitArray, x::Bool, I_0::Int, I::NTuple{N,Int}...) - stride = 1 - index = I_0 - @nexprs N d->begin - stride *= size(B,d) - index += (I_d - 1) * stride +stagedfunction unsafe_setindex!(B::BitArray, x::Bool, I_0::Int, I::Int...) + N = length(I) + quote + stride = 1 + index = I_0 + @nexprs $N d->begin + stride *= size(B,d) + index += (I[d] - 1) * stride + end + unsafe_setindex!(B, x, index) + return B end - unsafe_setindex!(B, x, index) - return B end -@ngenerate N typeof(B) function setindex!(B::BitArray, x::Bool, I_0::Int, I::NTuple{N,Int}...) - stride = 1 - index = I_0 - @nexprs N d->begin - l = size(B,d) - stride *= l - 1 <= I_{d-1} <= l || throw(BoundsError(B, tuple(I_0, @ntuple(N,I)...))) - index += (I_d - 1) * stride +stagedfunction setindex!(B::BitArray, x::Bool, I_0::Int, I::Int...) + N = length(I) + quote + stride = 1 + index = I_0 + @nexprs $N d->(I_d = I[d]) + @nexprs $N d->begin + l = size(B,d) + stride *= l + 1 <= I_{d-1} <= l || throw(BoundsError()) + index += (I_d - 1) * stride + end + B[index] = x + return B end - 1 <= index <= length(B) || throw(BoundsError(B, tuple(I_0, @ntuple(N,I)...))) - unsafe_setindex!(B, x, index) - return B end # contiguous multidimensional indexing: if the first dimension is a range, @@ -588,78 +665,92 @@ function unsafe_setindex!(B::BitArray, x::Bool, I0::UnitRange{Int}) return B end -@ngenerate N typeof(B) function unsafe_setindex!(B::BitArray, X::BitArray, I0::UnitRange{Int}, I::NTuple{N,Union(Int,UnitRange{Int})}...) - length(X) == 0 && return B - f0 = first(I0) - l0 = length(I0) +stagedfunction unsafe_setindex!(B::BitArray, X::BitArray, I0::UnitRange{Int}, I::Union(Int,UnitRange{Int})...) + N = length(I) + quote + length(X) == 0 && return B + f0 = first(I0) + l0 = length(I0) + + gap_lst_1 = 0 + @nexprs $N d->(gap_lst_{d+1} = length(I[d])) + stride = 1 + ind = f0 + @nexprs $N d->begin + stride *= size(B, d) + stride_lst_d = stride + ind += stride * (first(I[d]) - 1) + gap_lst_{d+1} *= stride + end - gap_lst_1 = 0 - @nexprs N d->(gap_lst_{d+1} = length(I_d)) - stride = 1 - ind = f0 - @nexprs N d->begin - stride *= size(B, d) - stride_lst_d = stride - ind += stride * (first(I_d) - 1) - gap_lst_{d+1} *= stride - end - - refind = 1 - @nloops(N, i, d->I_d, - d->nothing, # PRE - d->(ind += stride_lst_d - gap_lst_d), # POST + refind = 1 + @nloops($N, i, d->I[d], + d->nothing, # PRE + d->(ind += stride_lst_d - gap_lst_d), # POST begin # BODY copy_chunks!(B.chunks, ind, X.chunks, refind, l0) refind += l0 - end) + end) - return B + return B + end end -@ngenerate N typeof(B) function unsafe_setindex!(B::BitArray, x::Bool, I0::UnitRange{Int}, I::NTuple{N,Union(Int,UnitRange{Int})}...) - f0 = first(I0) - l0 = length(I0) - l0 == 0 && return B - @nexprs N d->(length(I_d) == 0 && return B) - - gap_lst_1 = 0 - @nexprs N d->(gap_lst_{d+1} = length(I_d)) - stride = 1 - ind = f0 - @nexprs N d->begin - stride *= size(B, d) - stride_lst_d = stride - ind += stride * (first(I_d) - 1) - gap_lst_{d+1} *= stride - end - - @nloops(N, i, d->I_d, - d->nothing, # PRE - d->(ind += stride_lst_d - gap_lst_d), # POST +stagedfunction unsafe_setindex!(B::BitArray, x::Bool, I0::UnitRange{Int}, I::Union(Int,UnitRange{Int})...) + N = length(I) + quote + f0 = first(I0) + l0 = length(I0) + l0 == 0 && return B + @nexprs $N d->(length(I[d]) == 0 && return B) + + gap_lst_1 = 0 + @nexprs $N d->(gap_lst_{d+1} = length(I[d])) + stride = 1 + ind = f0 + @nexprs $N d->begin + stride *= size(B, d) + stride_lst_d = stride + ind += stride * (first(I[d]) - 1) + gap_lst_{d+1} *= stride + end + + @nloops($N, i, d->I[d], + d->nothing, # PRE + d->(ind += stride_lst_d - gap_lst_d), # POST begin # BODY fill_chunks!(B.chunks, x, ind, l0) - end) + end) - return B + return B + end end # general multidimensional non-scalar indexing -@ngenerate N typeof(B) function unsafe_setindex!(B::BitArray, X::AbstractArray, I::NTuple{N,Union(Int,AbstractArray{Int})}...) - refind = 1 - @nloops N i d->I_d @inbounds begin - @ncall N unsafe_setindex! B convert(Bool,X[refind]) i - refind += 1 +stagedfunction unsafe_setindex!(B::BitArray, X::AbstractArray, I::Union(Int,AbstractArray{Int})...) + N = length(I) + quote + refind = 1 + @nexprs $N d->(I_d = I[d]) + @nloops $N i d->I_d @inbounds begin + @ncall $N unsafe_setindex! B convert(Bool,X[refind]) i + refind += 1 + end + return B end - return B end -@ngenerate N typeof(B) function unsafe_setindex!(B::BitArray, x::Bool, I::NTuple{N,Union(Int,AbstractArray{Int})}...) - @nloops N i d->I_d begin - @ncall N unsafe_setindex! B x i +stagedfunction unsafe_setindex!(B::BitArray, x::Bool, I::Union(Int,AbstractArray{Int})...) + N = length(I) + quote + @nexprs $N d->(I_d = I[d]) + @nloops $N i d->I_d begin + @ncall $N unsafe_setindex! B x i + end + return B end - return B end # general versions with Real (or logical) indexing which dispatch on the appropriate method @@ -670,11 +761,14 @@ function setindex!(B::BitArray, x, i::Real) return unsafe_setindex!(B, convert(Bool,x), to_index(i)) end -@ngenerate N typeof(B) function setindex!(B::BitArray, x, I::NTuple{N,Union(Real,AbstractArray)}...) - checkbounds(B, I...) - #return unsafe_setindex!(B, convert(Bool,x), to_index(I...)...) # segfaults! (???) - @nexprs N d->(J_d = to_index(I_d)) - return @ncall N unsafe_setindex! B convert(Bool,x) J +stagedfunction setindex!(B::BitArray, x, I::Union(Real,AbstractArray)...) + N = length(I) + quote + checkbounds(B, I...) + #return unsafe_setindex!(B, convert(Bool,x), to_index(I...)...) # segfaults! (???) + @nexprs $N d->(J_d = to_index(I[d])) + return @ncall $N unsafe_setindex! B convert(Bool,x) J + end end @@ -686,82 +780,93 @@ function setindex!(B::BitArray, X::AbstractArray, i::Real) return unsafe_setindex!(B, X, j) end -@ngenerate N typeof(B) function setindex!(B::BitArray, X::AbstractArray, I::NTuple{N,Union(Real,AbstractArray)}...) - checkbounds(B, I...) - @nexprs N d->(J_d = to_index(I_d)) - @ncall N setindex_shape_check X J - return @ncall N unsafe_setindex! B X J +stagedfunction setindex!(B::BitArray, X::AbstractArray, I::Union(Real,AbstractArray)...) + N = length(I) + quote + checkbounds(B, I...) + @nexprs $N d->(J_d = to_index(I[d])) + @ncall $N setindex_shape_check X J + return @ncall $N unsafe_setindex! B X J + end end ## findn -@ngenerate N NTuple{N,Vector{Int}} function findn{N}(B::BitArray{N}) - nnzB = countnz(B) - I = ntuple(N, x->Array(Int, nnzB)) - if nnzB > 0 - count = 1 - @nloops N i B begin - if (@nref N B i) # TODO: should avoid bounds checking - @nexprs N d->(I[d][count] = i_d) - count += 1 +stagedfunction findn{N}(B::BitArray{N}) + quote + nnzB = countnz(B) + I = ntuple($N, x->Array(Int, nnzB)) + if nnzB > 0 + count = 1 + @nloops $N i B begin + if (@nref $N B i) # TODO: should avoid bounds checking + @nexprs $N d->(I[d][count] = i_d) + count += 1 + end end end + return I end - return I end ## isassigned -@ngenerate N Bool function isassigned(B::BitArray, I_0::Int, I::NTuple{N,Int}...) - stride = 1 - index = I_0 - @nexprs N d->begin - l = size(B,d) - stride *= l - 1 <= I_{d-1} <= l || return false - index += (I_d - 1) * stride +stagedfunction isassigned(B::BitArray, I_0::Int, I::Int...) + N = length(I) + quote + @nexprs $N d->(I_d = I[d]) + stride = 1 + index = I_0 + @nexprs N d->begin + l = size(B,d) + stride *= l + 1 <= I_{d-1} <= l || return false + index += (I_d - 1) * stride + end + return isassigned(B, index) end - return isassigned(B, index) end ## permutedims for (V, PT, BT) in [((:N,), BitArray, BitArray), ((:T,:N), Array, StridedArray)] - @eval @ngenerate N typeof(P) function permutedims!{$(V...)}(P::$PT{$(V...)}, B::$BT{$(V...)}, perm) - dimsB = size(B) - length(perm) == N || throw(ArgumentError("expected permutation of size $N, but length(perm)=$(length(perm))")) - isperm(perm) || throw(ArgumentError("input is not a permutation")) - dimsP = size(P) - for i = 1:length(perm) - dimsP[i] == dimsB[perm[i]] || throw(DimensionMismatch("destination tensor of incorrect size")) - end - - #calculates all the strides - strides_1 = 0 - @nexprs N d->(strides_{d+1} = stride(B, perm[d])) + @eval stagedfunction permutedims!{$(V...)}(P::$PT{$(V...)}, B::$BT{$(V...)}, perm) + quote + dimsB = size(B) + length(perm) == N || throw(ArgumentError("expected permutation of size $N, but length(perm)=$(length(perm))")) + isperm(perm) || throw(ArgumentError("input is not a permutation")) + dimsP = size(P) + for i = 1:length(perm) + dimsP[i] == dimsB[perm[i]] || throw(DimensionMismatch("destination tensor of incorrect size")) + end - #Creates offset, because indexing starts at 1 - offset = 1 - sum(@ntuple N d->strides_{d+1}) + #calculates all the strides + strides_1 = 0 + @nexprs $N d->(strides_{d+1} = stride(B, perm[d])) - if isa(B, SubArray) - offset += first_index(B::SubArray) - 1 - B = B.parent - end + #Creates offset, because indexing starts at 1 + offset = 1 - sum(@ntuple $N d->strides_{d+1}) - ind = 1 - @nexprs 1 d->(counts_{N+1} = strides_{N+1}) # a trick to set counts_($N+1) - @nloops(N, i, P, - d->(counts_d = strides_d), # PRE - d->(counts_{d+1} += strides_{d+1}), # POST - begin # BODY - sumc = sum(@ntuple N d->counts_{d+1}) - @inbounds P[ind] = B[sumc+offset] - ind += 1 - end) + if isa(B, SubArray) + offset += first_index(B::SubArray) - 1 + B = B.parent + end - return P + ind = 1 + @nexprs 1 d->(counts_{$N+1} = strides_{$N+1}) # a trick to set counts_($N+1) + @nloops($N, i, P, + d->(counts_d = strides_d), # PRE + d->(counts_{d+1} += strides_{d+1}), # POST + begin # BODY + sumc = sum(@ntuple $N d->counts_{d+1}) + @inbounds P[ind] = B[sumc+offset] + ind += 1 + end) + + return P + end end end @@ -774,70 +879,72 @@ immutable Prehashed end hash(x::Prehashed) = x.hash -@ngenerate N typeof(A) function unique{T,N}(A::AbstractArray{T,N}, dim::Int) - 1 <= dim <= N || return copy(A) - hashes = zeros(UInt, size(A, dim)) +stagedfunction unique{T,N}(A::AbstractArray{T,N}, dim::Int) + quote + 1 <= dim <= $N || return copy(A) + hashes = zeros(UInt, size(A, dim)) - # Compute hash for each row - k = 0 - @nloops N i A d->(if d == dim; k = i_d; end) begin - @inbounds hashes[k] = hash(hashes[k], hash((@nref N A i))) - end + # Compute hash for each row + k = 0 + @nloops $N i A d->(if d == dim; k = i_d; end) begin + @inbounds hashes[k] = hash(hashes[k], hash((@nref $N A i))) + end - # Collect index of first row for each hash - uniquerow = Array(Int, size(A, dim)) - firstrow = Dict{Prehashed,Int}() - for k = 1:size(A, dim) - uniquerow[k] = get!(firstrow, Prehashed(hashes[k]), k) - end - uniquerows = collect(values(firstrow)) + # Collect index of first row for each hash + uniquerow = Array(Int, size(A, dim)) + firstrow = Dict{Prehashed,Int}() + for k = 1:size(A, dim) + uniquerow[k] = get!(firstrow, Prehashed(hashes[k]), k) + end + uniquerows = collect(values(firstrow)) - # Check for collisions - collided = falses(size(A, dim)) - @inbounds begin - @nloops N i A d->(if d == dim + # Check for collisions + collided = falses(size(A, dim)) + @inbounds begin + @nloops $N i A d->(if d == dim k = i_d j_d = uniquerow[k] else j_d = i_d end) begin - if (@nref N A j) != (@nref N A i) - collided[k] = true - end + if (@nref $N A j) != (@nref $N A i) + collided[k] = true + end + end end - end - if any(collided) - nowcollided = BitArray(size(A, dim)) - while any(collided) - # Collect index of first row for each collided hash - empty!(firstrow) - for j = 1:size(A, dim) - collided[j] || continue - uniquerow[j] = get!(firstrow, Prehashed(hashes[j]), j) - end - for v in values(firstrow) - push!(uniquerows, v) - end + if any(collided) + nowcollided = BitArray(size(A, dim)) + while any(collided) + # Collect index of first row for each collided hash + empty!(firstrow) + for j = 1:size(A, dim) + collided[j] || continue + uniquerow[j] = get!(firstrow, Prehashed(hashes[j]), j) + end + for v in values(firstrow) + push!(uniquerows, v) + end - # Check for collisions - fill!(nowcollided, false) - @nloops N i A d->begin - if d == dim - k = i_d - j_d = uniquerow[k] - (!collided[k] || j_d == k) && continue - else - j_d = i_d - end - end begin - if (@nref N A j) != (@nref N A i) - nowcollided[k] = true + # Check for collisions + fill!(nowcollided, false) + @nloops $N i A d->begin + if d == dim + k = i_d + j_d = uniquerow[k] + (!collided[k] || j_d == k) && continue + else + j_d = i_d + end + end begin + if (@nref $N A j) != (@nref $N A i) + nowcollided[k] = true + end end + (collided, nowcollided) = (nowcollided, collided) end - (collided, nowcollided) = (nowcollided, collided) end - end - @nref N A d->d == dim ? sort!(uniquerows) : (1:size(A, d)) + @nref $N A d->d == dim ? sort!(uniquerows) : (1:size(A, d)) + end end diff --git a/base/reducedim.jl b/base/reducedim.jl index 351e574458d61..efabd282d8329 100644 --- a/base/reducedim.jl +++ b/base/reducedim.jl @@ -154,38 +154,40 @@ function check_reducdims(R, A) return lsiz end -@ngenerate N typeof(R) function _mapreducedim!{T,N}(f, op, R::AbstractArray, A::AbstractArray{T,N}) - lsiz = check_reducdims(R, A) - isempty(A) && return R - @nextract N sizeR d->size(R,d) - sizA1 = size(A, 1) - - if has_fast_linear_indexing(A) && lsiz > 16 - # use mapreduce_impl, which is probably better tuned to achieve higher performance - nslices = div(length(A), lsiz) - ibase = 0 - for i = 1:nslices - @inbounds R[i] = op(R[i], mapreduce_impl(f, op, A, ibase+1, ibase+lsiz)) - ibase += lsiz - end - elseif size(R, 1) == 1 && sizA1 > 1 - # keep the accumulator as a local variable when reducing along the first dimension - @nloops N i d->(d>1? (1:size(A,d)) : (1:1)) d->(j_d = sizeR_d==1 ? 1 : i_d) begin - @inbounds r = (@nref N R j) - for i_1 = 1:sizA1 - @inbounds v = f(@nref N A i) - r = op(r, v) +stagedfunction _mapreducedim!{T,N}(f, op, R::AbstractArray, A::AbstractArray{T,N}) + quote + lsiz = check_reducdims(R, A) + isempty(A) && return R + @nextract $N sizeR d->size(R,d) + sizA1 = size(A, 1) + + if has_fast_linear_indexing(A) && lsiz > 16 + # use mapreduce_impl, which is probably better tuned to achieve higher performance + nslices = div(length(A), lsiz) + ibase = 0 + for i = 1:nslices + @inbounds R[i] = op(R[i], mapreduce_impl(f, op, A, ibase+1, ibase+lsiz)) + ibase += lsiz + end + elseif size(R, 1) == 1 && sizA1 > 1 + # keep the accumulator as a local variable when reducing along the first dimension + @nloops $N i d->(d>1? (1:size(A,d)) : (1:1)) d->(j_d = sizeR_d==1 ? 1 : i_d) begin + @inbounds r = (@nref $N R j) + for i_1 = 1:sizA1 + @inbounds v = f(@nref $N A i) + r = op(r, v) + end + @inbounds (@nref $N R j) = r + end + else + # general implementation + @nloops $N i A d->(j_d = sizeR_d==1 ? 1 : i_d) begin + @inbounds v = f(@nref $N A i) + @inbounds (@nref $N R j) = op((@nref $N R j), v) end - @inbounds (@nref N R j) = r - end - else - # general implementation - @nloops N i A d->(j_d = sizeR_d==1 ? 1 : i_d) begin - @inbounds v = f(@nref N A i) - @inbounds (@nref N R j) = op((@nref N R j), v) end + return R end - return R end mapreducedim!(f, op, R::AbstractArray, A::AbstractArray) = _mapreducedim!(f, op, R, A) @@ -290,13 +292,17 @@ function gen_findreduction_body(N, f::Function) end end -eval(ngenerate(:N, :(typeof((Rval,Rind))), :(_findmin!{T,N}(Rval::AbstractArray, Rind::AbstractArray, A::AbstractArray{T,N})), N->gen_findreduction_body(N, <))) +stagedfunction _findmin!{T,N}(Rval::AbstractArray, Rind::AbstractArray, A::AbstractArray{T,N}) + gen_findreduction_body(N, <) +end findmin!{R}(rval::AbstractArray{R}, rind::AbstractArray, A::AbstractArray; init::Bool=true) = _findmin!(initarray!(rval, typemax(R), init), rind, A) findmin{T}(A::AbstractArray{T}, region) = isempty(A) ? (similar(A,reduced_dims0(A,region)), zeros(Int,reduced_dims0(A,region))) : _findmin!(reducedim_initarray0(A, region, typemax(T)), zeros(Int,reduced_dims0(A,region)), A) -eval(ngenerate(:N, :(typeof((Rval,Rind))), :(_findmax!{T,N}(Rval::AbstractArray, Rind::AbstractArray, A::AbstractArray{T,N})), N->gen_findreduction_body(N, >))) +stagedfunction _findmax!{T,N}(Rval::AbstractArray, Rind::AbstractArray, A::AbstractArray{T,N}) + gen_findreduction_body(N, >) +end findmax!{R}(rval::AbstractArray{R}, rind::AbstractArray, A::AbstractArray; init::Bool=true) = _findmax!(initarray!(rval, typemin(R), init), rind, A) findmax{T}(A::AbstractArray{T}, region) = isempty(A) ? (similar(A,reduced_dims0(A,region)), zeros(Int,reduced_dims0(A,region))) : diff --git a/base/sharedarray.jl b/base/sharedarray.jl index 5d8f52ac05539..2125ec3ae4aa3 100644 --- a/base/sharedarray.jl +++ b/base/sharedarray.jl @@ -210,12 +210,24 @@ convert(::Type{Array}, S::SharedArray) = S.s getindex(S::SharedArray) = getindex(S.s) getindex(S::SharedArray, I::Real) = getindex(S.s, I) getindex(S::SharedArray, I::AbstractArray) = getindex(S.s, I) -@nsplat N 1:5 getindex(S::SharedArray, I::NTuple{N,Union(Real,AbstractVector)}...) = getindex(S.s, I...) +stagedfunction getindex(S::SharedArray, I::Union(Real,AbstractVector)...) + N = length(I) + Isplat = Expr[:(I[$d]) for d = 1:N] + quote + getindex(S.s, $(Isplat...)) + end +end setindex!(S::SharedArray, x) = setindex!(S.s, x) setindex!(S::SharedArray, x, I::Real) = setindex!(S.s, x, I) setindex!(S::SharedArray, x, I::AbstractArray) = setindex!(S.s, x, I) -@nsplat N 1:5 setindex!(S::SharedArray, x, I::NTuple{N,Union(Real,AbstractVector)}...) = setindex!(S.s, x, I...) +stagedfunction setindex!(S::SharedArray, x, I::Union(Real,AbstractVector)...) + N = length(I) + Isplat = Expr[:(I[$d]) for d = 1:N] + quote + setindex!(S.s, x, $(Isplat...)) + end +end function fill!(S::SharedArray, v) vT = convert(eltype(S), v) diff --git a/base/statistics.jl b/base/statistics.jl index 8a3019b40337a..d7d3ee4730e49 100644 --- a/base/statistics.jl +++ b/base/statistics.jl @@ -102,39 +102,41 @@ call(f::CentralizedAbs2Fun, x) = abs2(x - f.m) centralize_sumabs2(A::AbstractArray, m::Number, ifirst::Int, ilast::Int) = mapreduce_impl(CentralizedAbs2Fun(m), AddFun(), A, ifirst, ilast) -@ngenerate N typeof(R) function centralize_sumabs2!{S,T,N}(R::AbstractArray{S}, A::AbstractArray{T,N}, means::AbstractArray) - # following the implementation of _mapreducedim! at base/reducedim.jl - lsiz = check_reducdims(R, A) - isempty(R) || fill!(R, zero(S)) - isempty(A) && return R - @nextract N sizeR d->size(R,d) - sizA1 = size(A, 1) - - if has_fast_linear_indexing(A) && lsiz > 16 - # use centralize_sumabs2, which is probably better tuned to achieve higher performance - nslices = div(length(A), lsiz) - ibase = 0 - for i = 1:nslices - @inbounds R[i] = centralize_sumabs2(A, means[i], ibase+1, ibase+lsiz) - ibase += lsiz - end - elseif size(R, 1) == 1 && sizA1 > 1 - # keep the accumulator as a local variable when reducing along the first dimension - @nloops N i d->(d>1? (1:size(A,d)) : (1:1)) d->(j_d = sizeR_d==1 ? 1 : i_d) begin - @inbounds r = (@nref N R j) - @inbounds m = (@nref N means j) - for i_1 = 1:sizA1 - @inbounds r += abs2((@nref N A i) - m) +stagedfunction centralize_sumabs2!{S,T,N}(R::AbstractArray{S}, A::AbstractArray{T,N}, means::AbstractArray) + quote + # following the implementation of _mapreducedim! at base/reducedim.jl + lsiz = check_reducdims(R, A) + isempty(R) || fill!(R, zero(S)) + isempty(A) && return R + @nextract $N sizeR d->size(R,d) + sizA1 = size(A, 1) + + if has_fast_linear_indexing(A) && lsiz > 16 + # use centralize_sumabs2, which is probably better tuned to achieve higher performance + nslices = div(length(A), lsiz) + ibase = 0 + for i = 1:nslices + @inbounds R[i] = centralize_sumabs2(A, means[i], ibase+1, ibase+lsiz) + ibase += lsiz + end + elseif size(R, 1) == 1 && sizA1 > 1 + # keep the accumulator as a local variable when reducing along the first dimension + @nloops $N i d->(d>1? (1:size(A,d)) : (1:1)) d->(j_d = sizeR_d==1 ? 1 : i_d) begin + @inbounds r = (@nref $N R j) + @inbounds m = (@nref $N means j) + for i_1 = 1:sizA1 + @inbounds r += abs2((@nref $N A i) - m) + end + @inbounds (@nref $N R j) = r + end + else + # general implementation + @nloops $N i A d->(j_d = sizeR_d==1 ? 1 : i_d) begin + @inbounds (@nref $N R j) += abs2((@nref $N A i) - (@nref $N means j)) end - @inbounds (@nref N R j) = r - end - else - # general implementation - @nloops N i A d->(j_d = sizeR_d==1 ? 1 : i_d) begin - @inbounds (@nref N R j) += abs2((@nref N A i) - (@nref N means j)) end + return R end - return R end function varm{T}(A::AbstractArray{T}, m::Number; corrected::Bool=true)