From 2a9fed6b564cbb1ed2b043ebe4590e9a66cc2664 Mon Sep 17 00:00:00 2001 From: ayush1999 Date: Fri, 5 Oct 2018 18:24:56 +0530 Subject: [PATCH 01/10] Basic changes for cross convolutions --- src/Flux.jl | 2 +- src/layers/conv.jl | 48 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/src/Flux.jl b/src/Flux.jl index ff178450b1..3a98703ccd 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -8,7 +8,7 @@ using MacroTools: @forward export Chain, Dense, Maxout, RNN, LSTM, GRU, - Conv, ConvTranspose, MaxPool, MeanPool, DepthwiseConv, + Conv, ConvTranspose, CrossCor, MaxPool, MeanPool, DepthwiseConv, Dropout, AlphaDropout, LayerNorm, BatchNorm, InstanceNorm, params, mapleaves, cpu, gpu, f32, f64 diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 216957a70e..1644238b62 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -50,7 +50,7 @@ function (c::Conv)(x::AbstractArray) # TODO: breaks gpu broadcast :( # ndims(x) == ndims(c.weight)-1 && return squeezebatch(c(reshape(x, size(x)..., 1))) σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1) - σ.(conv(x, c.weight, stride = c.stride, pad = c.pad, dilation = c.dilation) .+ b) + σ.(conv(x, c.weight, stride = c.stride, pad = c.pad, dilation = c.dilation, mode=0) .+ b) end function Base.show(io::IO, l::Conv) @@ -165,6 +165,52 @@ function Base.show(io::IO, l::DepthwiseConv) print(io, ")") end +""" + CrossConv(size, in=>out) + CrossConv(size, in=>out, relu) + +Standard cross convolutional layer. `size` should be a tuple like `(2, 2)`. +`in` and `out` specify the number of input and output channels respectively. + +Data should be stored in WHCN order. In other words, a 100×100 RGB image would +be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array. + +Takes the keyword arguments `pad`, `stride` and `dilation`. +""" +struct CrossCor{N,F,A,V} +σ::F +weight::A +bias::V +stride::NTuple{N,Int} +pad::NTuple{N,Int} +dilation::NTuple{N,Int} +end + +CrossCor(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity; + stride = 1, pad = 0, dilation = 1) where {T,N} = + CrossCor(σ, w, b, expand.(sub2(Val(N)), (stride, pad, dilation))...) + +CrossCor(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; init = initn, + stride = 1, pad = 0, dilation = 1) where N = + CrossCor(param(init(k..., ch...)), param(zeros(ch[2])), σ, + stride = stride, pad = pad, dilation = dilation) + +@treelike CrossCor + +function (c::CrossCor)(x) + # TODO: breaks gpu broadcast :( + # ndims(x) == ndims(c.weight)-1 && return squeezebatch(c(reshape(x, size(x)..., 1))) + σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1) + σ.(conv(x, c.weight, stride = c.stride, pad = c.pad, dilation = c.dilation, mode=1) .+ b) +end + +function Base.show(io::IO, l::CrossCor) + print(io, "CrossCor(", size(l.weight)[1:ndims(l.weight)-2]) + print(io, ", ", size(l.weight, ndims(l.weight)-1), "=>", size(l.weight, ndims(l.weight))) + l.σ == identity || print(io, ", ", l.σ) + print(io, ")") +end + """ MaxPool(k) From ed8f3075728dbb26325807b61cf49de2e0037ac5 Mon Sep 17 00:00:00 2001 From: ayush1999 Date: Fri, 5 Oct 2018 18:41:39 +0530 Subject: [PATCH 02/10] some changes --- src/layers/conv.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 1644238b62..c853ed6627 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -1,4 +1,4 @@ -using NNlib: conv, ∇conv_data, depthwiseconv +using NNlib: conv, ∇conv_data, depthwiseconv, crossconv @generated sub2(::Val{N}) where N = :(Val($(N-2))) @@ -50,7 +50,7 @@ function (c::Conv)(x::AbstractArray) # TODO: breaks gpu broadcast :( # ndims(x) == ndims(c.weight)-1 && return squeezebatch(c(reshape(x, size(x)..., 1))) σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1) - σ.(conv(x, c.weight, stride = c.stride, pad = c.pad, dilation = c.dilation, mode=0) .+ b) + σ.(conv(x, c.weight, stride = c.stride, pad = c.pad, dilation = c.dilation) .+ b) end function Base.show(io::IO, l::Conv) @@ -201,7 +201,7 @@ function (c::CrossCor)(x) # TODO: breaks gpu broadcast :( # ndims(x) == ndims(c.weight)-1 && return squeezebatch(c(reshape(x, size(x)..., 1))) σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1) - σ.(conv(x, c.weight, stride = c.stride, pad = c.pad, dilation = c.dilation, mode=1) .+ b) + σ.(crossconv(x, c.weight, stride = c.stride, pad = c.pad, dilation = c.dilation) .+ b) end function Base.show(io::IO, l::CrossCor) From 670fdd9903fcf1cd1a39b38255e7ee8c5ec8f4de Mon Sep 17 00:00:00 2001 From: ayush1999 Date: Mon, 8 Oct 2018 21:39:38 +0530 Subject: [PATCH 03/10] CrossConv -> CrossCor --- src/layers/conv.jl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index c853ed6627..b176044225 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -69,6 +69,8 @@ end """ ConvTranspose(size, in=>out) ConvTranspose(size, in=>out, relu) + CrossCor(size, in=>out) + CrossCor(size, in=>out, relu) Standard convolutional transpose layer. `size` should be a tuple like `(2, 2)`. `in` and `out` specify the number of input and output channels respectively. @@ -77,6 +79,7 @@ be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array. Takes the keyword arguments `pad`, `stride` and `dilation`. """ struct ConvTranspose{N,F,A,V} +struct CrossCor{N,F,A,V} σ::F weight::A bias::V @@ -201,7 +204,7 @@ function (c::CrossCor)(x) # TODO: breaks gpu broadcast :( # ndims(x) == ndims(c.weight)-1 && return squeezebatch(c(reshape(x, size(x)..., 1))) σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1) - σ.(crossconv(x, c.weight, stride = c.stride, pad = c.pad, dilation = c.dilation) .+ b) + σ.(crosscor(x, c.weight, stride = c.stride, pad = c.pad, dilation = c.dilation) .+ b) end function Base.show(io::IO, l::CrossCor) From 9520e6b63fb5e6c74931ad6712f86b51734b518a Mon Sep 17 00:00:00 2001 From: ayush1999 Date: Tue, 9 Oct 2018 13:03:02 +0530 Subject: [PATCH 04/10] crossconv for tracker --- src/tracker/array.jl | 460 +++++++++++++++++++++++++++++++++++++++++++ test/layers/conv.jl | 13 ++ 2 files changed, 473 insertions(+) create mode 100644 src/tracker/array.jl diff --git a/src/tracker/array.jl b/src/tracker/array.jl new file mode 100644 index 0000000000..d4ebcf5ac9 --- /dev/null +++ b/src/tracker/array.jl @@ -0,0 +1,460 @@ +import Base: * + +import LinearAlgebra +import LinearAlgebra: inv, \, / + +using Statistics +using LinearAlgebra: Transpose, Adjoint, diagm, diag + +struct TrackedArray{T,N,A<:AbstractArray{T,N}} <: AbstractArray{T,N} + tracker::Tracked{A} + data::A + grad::A + TrackedArray{T,N,A}(t::Tracked{A}, data::A) where {T,N,A} = new(t, data) + TrackedArray{T,N,A}(t::Tracked{A}, data::A, grad::A) where {T,N,A} = new(t, data, grad) +end + +data(x::TrackedArray) = x.data +tracker(x::TrackedArray) = x.tracker + +TrackedVector{T,A} = TrackedArray{T,1,A} +TrackedMatrix{T,A} = TrackedArray{T,2,A} +TrackedVecOrMat{T,A} = Union{TrackedVector{T,A},TrackedMatrix{T,A}} + +track(c::Call, x::AbstractArray) = TrackedArray(c, x) + +TrackedArray(c::Call, x::A) where A <: AbstractArray = + TrackedArray{eltype(A),ndims(A),A}(Tracked{A}(c), x) + +TrackedArray(c::Call, x::A, Δ::A) where A <: AbstractArray = + TrackedArray{eltype(A),ndims(A),A}(Tracked{A}(c, Δ), x, Δ) + +TrackedArray(x::AbstractArray) = TrackedArray(Call(), x, zero(x)) + +Base.eltype(x::Type{<:TrackedArray{T}}) where T <: Real = TrackedReal{T} + +Base.show(io::IO, ::Type{TrackedArray{T,N,A}}) where {T,N,A<:AbstractArray{T,N}} = + print(io, "TrackedArray{…,$A}") + +function Base.summary(io::IO, x::TrackedArray) + print(io, "Tracked ") + summary(io, data(x)) +end + +Base.print_array(io::IO, x::TrackedArray) = Base.print_array(io, data(x)) + +Base.setindex!(xs::TrackedArray, v, i...) = + error("Can't differentiate `setindex!`") + +back!(::TrackedArray) = error("Value is not scalar; use `back!(sum(x))` or `back!(x, Δ)`") + +# Fallthrough methods + +for f in :[Base.size, Base.ndims, Base.collect].args + @eval @inline $f(x::TrackedArray, a...) = $f(data(x), a...) +end + +Base.size(x::TrackedArray, i::Integer, j::Integer, is::Integer...) = + size(data(x), i, j, is...) + +Base.similar(x::TrackedArray, dims::Union{AbstractUnitRange,Integer}...) = + similar(data(x), dims...) + +Base.similar(x::TrackedArray, T::Type) = similar(data(x), T) + +for op in [:(==), :≈] + @eval Base.$op(x::TrackedArray, y::AbstractArray) = Base.$op(data(x), y) + @eval Base.$op(x::AbstractArray, y::TrackedArray) = Base.$op(x, data(y)) + @eval Base.$op(x::TrackedArray, y::TrackedArray) = Base.$op(data(x), data(y)) +end + +# Array Stdlib + +Base.getindex(xs::TrackedArray, i...) = track(getindex, xs, i...) + +@grad function getindex(xs::AbstractArray, i...) + data(xs)[i...], function (Δ) + Δ′ = zero(xs) + Δ′[i...] = data(Δ) + (nobacksies(:getindex, Δ′), map(_->nothing, i)...) + end +end + +Base.:-(xs::TrackedArray) = track(-, xs) + +@grad -(xs) = -data(xs), Δ -> (-Δ,) + +Base.transpose(xs::TrackedArray) = track(transpose, xs) +Base.adjoint(xs::TrackedArray) = track(adjoint, xs) + +@grad transpose(xs) = transpose(data(xs)), Δ -> (reshape(transpose(Δ), size(xs)),) +@grad adjoint(xs) = data(xs)', Δ -> (reshape(Δ', size(xs)),) + +Base.repeat(xs::TrackedArray; kw...) = track(repeat, xs; kw...) + +@grad function repeat(xs; inner=ntuple(x->1, ndims(xs)), outer=ntuple(x->1, ndims(xs))) + repeat(data(xs), inner = inner, outer = outer), function (Δ) + Δ′ = zero(xs) + S = size(xs) + + # Loop through each element of Δ, calculate source dimensions, accumulate into Δ′ + for (dest_idx, val) in pairs(IndexCartesian(), data(Δ)) + # First, round dest_idx[dim] to nearest gridpoint defined by inner[dim], then + # wrap around based on original size S. + src_idx = [mod1(div(dest_idx[dim] - 1, inner[dim]) + 1, S[dim]) for dim in 1:length(S)] + Δ′[src_idx...] += val + end + (nobacksies(:repeat, Δ′),) + end +end + +for f in [:vcat, :hcat] + UArray = :(Union{TrackedArray,Vector,Matrix,Adjoint,Transpose}) + @eval begin + # This section is a bit of a hack since julia doesn't have a standardised + # promotion mechanism for concatenation yet + # https://github.com/JuliaLang/julia/pull/20815 + + # It should support tracked concatenation with rank ∈ (1,2) with a + # TrackedArray anywhere among the arguments This works as long as base has + # other functions that captures `(::Union{Vector,RowVector,Matrix}...)`. + Base.$f(a::$UArray...) = track($f, a...) + + # It should support tracked concatenation with rank>2 if the TrackedArray is + # first + Base.$f(a::TrackedArray, b::AbstractArray...) = track($f, a, b...) + Base.$f(a::TrackedArray, b::$UArray...) = track($f, a, b...) # resolves ambiguity introduced by previous row + + # It should support tracked concatenation with rank>2 if the TrackedArray is + # second + Base.$f(a::Array, b::TrackedArray, c::AbstractArray...) = track($f, a, b, c...) + Base.$f(a::Union{Vector,Matrix,Adjoint,Transpose}, b::TrackedArray, + c::$UArray...) = + track($f, a, b, c...) # resolves ambiguity introduced by previous row + end +end + +@grad function vcat(xs...) + vcat(data.(xs)...), function (Δ) + start = 0 + Δs = [begin + i = map(_ -> :, size(xsi)) |> Base.tail + d = Δ[start+1:start+size(xsi,1), i...] + start += size(xsi, 1) + d + end for xsi in xs] + return (Δs...,) + end +end + +@grad function hcat(xs...) + hcat(data.(xs)...), function (Δ) + start = 0 + Δs = [begin + d = if ndims(xsi) == 1 + Δ[:, start+1] + else + i = map(_ -> :, size(xsi)) |> Base.tail |> Base.tail + Δ[:, start+1:start+size(xsi,2), i...] + end + start += size(xsi, 2) + d + end for xsi in xs] + return (Δs...,) + end +end + +Base.cat(a::TrackedArray; dims) = track(cat, a, dims = dims) +Base.cat(a::TrackedArray, b::TrackedArray, c::AbstractArray...; dims) = track(cat, a, b, c..., dims = dims) +Base.cat(a::TrackedArray, b::AbstractArray, c::AbstractArray...; dims) = track(cat, a, b, c..., dims = dims) +Base.cat(a::AbstractArray, b::TrackedArray, c::AbstractArray...; dims) = track(cat, a, b, c..., dims = dims) + +@grad function cat(Xs...; dims) + cat(data.(Xs)..., dims = dims), function (Δ) + start = ntuple(i -> 0, Val(ndims(Δ))) + Δs = [begin + dim_xs = 1:ndims(xs) + till_xs = ntuple((i -> i in dims ? (i in dim_xs ? size(xs,i) : 1) : 0), Val(ndims(Δ))) + xs_in_Δ = ntuple(i -> till_xs[i] > 0 ? (start[i]+1:start[i]+till_xs[i]) : Colon(), Val(ndims(Δ))) + d = reshape(Δ[xs_in_Δ...],size(xs)) + start = start .+ till_xs + d + end for xs in Xs] + return (Δs...,) + end +end + +Base.reshape(xs::TrackedArray, dims::Union{Colon,Int64}...) = reshape(xs, dims) +Base.reshape(xs::TrackedArray, dims::Tuple{Vararg{Union{Int64,Colon}}}) = reshape(xs, Base._reshape_uncolon(xs, dims)) +Base.reshape(xs::TrackedArray, dims::Tuple{Vararg{Int64}}) = track(reshape, xs, dims) + +@grad reshape(xs, dims) = reshape(data(xs), dims), Δ -> (reshape(Δ, size(xs)),nothing) + +Base.permutedims(xs::TrackedArray, dims) = track(permutedims, xs, dims) +@grad permutedims(xs, dims) = permutedims(data(xs), dims), Δ -> (permutedims(Δ, invperm(dims)),nothing) + +function _kron(mat1::AbstractMatrix,mat2::AbstractMatrix) + m1, n1 = size(mat1) + mat1_rsh = reshape(mat1,(1,m1,1,n1)) + + m2, n2 = size(mat2) + mat2_rsh = reshape(mat2,(m2,1,n2,1)) + + return reshape(mat1_rsh.*mat2_rsh, (m1*m2,n1*n2)) +end + +Base.kron(a::TrackedMatrix, b::TrackedMatrix) = _kron(a, b) +Base.kron(a::TrackedMatrix, b::AbstractMatrix) = _kron(a, b) +Base.kron(a::AbstractMatrix, b::TrackedMatrix) = _kron(a, b) + + +inv(A::TrackedArray) = Tracker.track(inv, A) +@grad function inv(A) + return inv(Tracker.data(A)), function (Δ) + Ainv = inv(A) + ∇A = - Ainv' * Δ * Ainv' + return (∇A, ) + end +end + +# (/) rdivide +A::TrackedArray / B::TrackedArray = Tracker.track(/, A, B) +A::AbstractVecOrMat / B::TrackedArray = Tracker.track(/, A, B) +A::TrackedArray / B::AbstractVecOrMat = Tracker.track(/, A, B) +@grad function (A / B) + return Tracker.data(A) / Tracker.data(B), function (Δ) + Binv = inv(B) + ∇B = - Binv' * A' * Δ * Binv' + return (Δ * Binv', ∇B) + end +end + +# (\) ldivide (left vec divide needs more work to resolve dispatch ambiguity) +A::TrackedArray \ B::TrackedArray = Tracker.track(\, A, B) +A::AbstractArray \ B::TrackedArray = Tracker.track(\, A, B) +A::TrackedArray \ B::AbstractVecOrMat = Tracker.track(\, A, B) +@grad function (A \ B) + return Tracker.data(A) \ Tracker.data(B), function (Δ) + Ainv = inv(A) + ∇A = - Ainv' * Δ * B' * Ainv' + return (∇A, Ainv' * Δ) + end +end + + +# Reductions + +Base.sum(xs::TrackedArray; dims = :) = track(sum, xs, dims = dims) +Base.sum(f::Union{Function,Type},xs::TrackedArray) = sum(f.(xs)) + +@grad sum(xs; dims = :) = sum(data(xs), dims = dims), + Δ -> (zero(xs) .+ Δ, ) + +Base.prod(xs::TrackedArray, dim) = track(prod, xs, dim) +Base.prod(xs::TrackedArray) = track(prod, xs) +Base.prod(f::Union{Function, Type}, xs::TrackedArray) = prod(f.(xs)) + +@grad prod(xs) = prod(data(xs)), Δ -> (prod(xs) ./ xs .* Δ,) +@grad prod(xs, dim) = prod(data(xs), dims = dim), + Δ -> (nobacksies(:sum, + reshape(.*(circshift.([reshape(data(xs), length(xs))], 1:length(xs)-1)...), size(xs)) .* Δ), + nothing) + +Base.findfirst(xs::TrackedArray, args...) = findfirst(xs.data, args...) + +Statistics.mean(xs::TrackedArray; dims = :) = track(mean, xs, dims = dims) + +Base.maximum(xs::TrackedArray; dims = :) = track(maximum, xs, dims = dims) +Base.minimum(xs::TrackedArray; dims = :) = track(minimum, xs, dims = dims) + +import LinearAlgebra: dot + +dot(xs::TrackedVector, ys::TrackedVector) = track(dot, xs, ys) +dot(xs::AbstractVector, ys::TrackedVector) = track(dot, xs, ys) +dot(xs::TrackedVector, ys::AbstractVector) = track(dot, xs, ys) + +@grad dot(xs, ys) = dot(data(xs), data(ys)), Δ -> (Δ .* ys, Δ .* xs) + +# Hacks to get std working +Statistics.std(x::TrackedArray; dims = :, mean = Statistics.mean(x, dims = dims)) = _std(x,mean,dims) +_std(x::TrackedArray, mean, dims) = sqrt.(sum((x .- mean).^2, dims = dims) ./ (mapreduce(i -> size(x,i),*, dims) - 1)) +_std(x::TrackedArray, mean, ::Colon) = sqrt.(sum((x .- mean).^2) ./ (length(x) - 1)) + +LinearAlgebra.norm(x::TrackedArray, p::Real = 2) = + sum(abs.(x).^p .+ eps(0f0))^(1/p) # avoid d(sqrt(x))/dx == Inf at 0 + +@grad mean(xs; dims = :) = mean(data(xs), dims=dims), Δ -> (_backmean(xs,Δ,dims),) +_backmean(xs, Δ, ::Colon) = zero(xs) .+ Δ ./ length(xs) +_backmean(xs, Δ, dims) = zero(xs) .+ Δ ./ mapreduce(i -> size(data(xs),i),*,dims) + +@grad function maximum(xs; dims = dims) + maximum(data(xs), dims = dims), function (Δ) + Δ′ = zero(xs) + _, i = findmax(data(xs), dims = dims) + Δ′[i] = data(Δ) + return (nobacksies(:maximum, Δ′),) + end +end + +@grad function minimum(xs; dims = dims) + minimum(data(xs), dims = dims), function (Δ) + Δ′ = zero(xs) + _, i = findmin(data(xs), dims = dims) + Δ′[i] = data(Δ) + return (nobacksies(:minimum, Δ′),) + end +end + +# BLAS + +LinearAlgebra.diagm(x::TrackedVector) = track(diagm, x) +@grad diagm(x) = diagm(data(x)), Δ -> (diag(Δ),) + +x::TrackedMatrix * y::AbstractMatrix = track(*, x, y) +x::AbstractMatrix * y::TrackedMatrix = track(*, x, y) +x::TrackedMatrix * y::TrackedMatrix = track(*, x, y) + +x::TrackedMatrix * y::AbstractVector = track(*, x, y) +x::AbstractMatrix * y::TrackedVector = track(*, x, y) +x::TrackedMatrix * y::TrackedVector = track(*, x, y) + +x::TrackedVector * y::AbstractVector = track(*, x, y) +x::AbstractVector * y::TrackedVector = track(*, x, y) +x::TrackedVector * y::TrackedVector = track(*, x, y) + +@grad a::AbstractMatrix * b::AbstractVecOrMat = + data(a)*data(b), Δ -> (Δ * transpose(b), transpose(a) * Δ) + +# NNlib + +using NNlib +import NNlib: softmax, ∇softmax, logsoftmax, ∇logsoftmax, conv, maxpool, meanpool, crosscor + +softmax(xs::TrackedArray) = track(softmax, xs) + +@grad softmax(xs) = softmax(data(xs)), Δ -> (nobacksies(:softmax, ∇softmax(data(Δ), data(xs))),) + +logsoftmax(xs::TrackedArray) = track(logsoftmax, xs) + +@grad logsoftmax(xs) = logsoftmax(data(xs)), Δ -> (nobacksies(:logsoftmax, ∇logsoftmax(data(Δ), data(xs))),) + +conv(x::TrackedArray, w::TrackedArray; kw...) = track(conv, x, w; kw...) +conv(x::AbstractArray, w::TrackedArray; kw...) = track(conv, x, w; kw...) +conv(x::TrackedArray, w::AbstractArray; kw...) = track(conv, x, w; kw...) + +@grad conv(x, w; kw...) = + conv(data(x), data(w); kw...), + Δ -> nobacksies(:conv, + (NNlib.∇conv_data(data.((Δ, x, w))...; kw...), + NNlib.∇conv_filter(data.((Δ, x, w))...; kw...))) + +crosscor(x::TrackedArray, w::TrackedArray; kw...) = track(crosscor, x, w; kw...) +crosscor(x::AbstractArray, w::TrackedArray; kw...) = track(crosscor, x, w; kw...) +crosscor(x::TrackedArray, w::AbstractArray; kw...) = track(crosscor, x, w; kw...) + +@grad crosscor(x, w; kw...) = + crosscor(data(x), data(w); kw...), + Δ -> nobacksies(:crosscor, + (NNlib.∇conv_data(data.((Δ, x, w))...; kw...), + NNlib.∇conv_filter(data.((Δ, x, w))...; kw...))) + +maxpool(x::TrackedArray, k; kw...) = track(maxpool, x, k; kw...) + +@grad function maxpool(x, k; kw...) + y = maxpool(data(x), k; kw...) + y, Δ -> (nobacksies(:maxpool, NNlib.∇maxpool(data.((Δ, y, x))..., k; kw...)), nothing) +end + +meanpool(x::TrackedArray, k; kw...) = track(meanpool, x, k; kw...) + +@grad function meanpool(x, k; kw...) + y = meanpool(data(x), k; kw...) + y, Δ -> (nobacksies(:maxpool, NNlib.∇meanpool(data.((Δ, y, x))..., k; kw...)), nothing) +end + +# Broadcasting + +using ForwardDiff: Dual, partials, value + +trim(x, Δ) = reshape(Δ, ntuple(i -> size(Δ, i), Val(ndims(x)))) + +unbroadcast(x::AbstractArray, Δ) = + size(x) == size(Δ) ? Δ : + length(x) == length(Δ) ? trim(x, Δ) : + trim(x, sum(Δ, dims = ntuple(i -> size(x, i) == 1 ? i : ndims(Δ)+1, Val(ndims(Δ))))) + +unbroadcast(x::Number, Δ) = sum(Δ) +unbroadcast(x::Base.RefValue{<:Function}, _) = nothing +unbroadcast(x::Base.RefValue{<:Val}, _) = nothing + +dual(x, p) = x +dual(x::Real, p) = Dual(x, p) + +function partial(f::F, Δ, i, args::Vararg{Any,N}) where {F,N} + dargs = ntuple(j -> dual(args[j], i==j), Val(N)) + return Δ * f(dargs...).partials[1] +end + +@inline function ∇broadcast(f::F, args::Vararg{Any,N}) where {F,N} + y = broadcast(f, data.(args)...) + eltype(y) <: Real || return y + eltype(y) == Bool && return y + function back(Δ) + Δargs = ntuple(i -> partial.(f, Δ, i, args...), Val(N)) + dxs = map(unbroadcast, args, Δargs) + return dxs + end + # So we can return non-tracked arrays + track(Call(back, tracker.(args)), y) +end + +using Base.Broadcast: BroadcastStyle, ArrayStyle, Broadcasted, broadcasted + +struct TrackedStyle <: BroadcastStyle end + +Broadcast.BroadcastStyle(::Type{<:Union{TrackedArray,TrackedReal}}) = TrackedStyle() +Broadcast.BroadcastStyle(::TrackedStyle, ::BroadcastStyle) = TrackedStyle() + +# We have to re-build the original broadcast struct to get the appropriate array +# style. We need this primarily to support CuArrays' broadcasting fixes. +broadcast_rebuild(xs) = data(xs) + +broadcast_rebuild(bc::Broadcasted) = + broadcasted(bc.f, broadcast_rebuild.(bc.args)...) + +preprocess(x) = x + +function Base.Broadcast.materialize(bc::Broadcasted{TrackedStyle}) + bc1 = Broadcast.flatten(bc) + bc2 = Broadcast.flatten(broadcast_rebuild(bc)) + ∇broadcast(bc2.f, bc1.args...) +end + +using Requires + +# https://github.com/FluxML/Flux.jl/issues/353 +@init Requires.isprecompiling() || @eval Base.Broadcast begin + function flatten(bc::Broadcasted{Style}) where {Style} + isflat(bc) && return bc + args = cat_nested(bc) + let makeargs = make_makeargs(bc), f = bc.f + newf = @inline function(args::Vararg{Any,N}) where N + f(makeargs(args...)...) + end + return Broadcasted{Style}(newf, args, bc.axes) + end + end + @inline function make_makeargs(makeargs, t::Tuple{<:Broadcasted,Vararg{Any}}) + bc = t[1] + let makeargs = make_makeargs(makeargs, tail(t)), f = bc.f + let makeargs = make_makeargs(makeargs, bc.args) + headargs, tailargs = make_headargs(bc.args), make_tailargs(bc.args) + return @inline function(args::Vararg{Any,N}) where N + args1 = makeargs(args...) + a, b = headargs(args1...), tailargs(args1...) + (f(a...), b...) + end + end + end + end +end diff --git a/test/layers/conv.jl b/test/layers/conv.jl index 0bec44c15e..f50664f95a 100644 --- a/test/layers/conv.jl +++ b/test/layers/conv.jl @@ -33,3 +33,16 @@ end @test size(m2(r), 3) == 3 end + +@testset "CrossCor" begin + m = Chain( + CrossCor((2, 2), 1=>16, relu), + MaxPool((2,2)), + CrossCor((2, 2), 16=>8, relu), + MaxPool((2,2)), + x -> reshape(x, :, size(x, 4)), + Dense(288, 10), softmax) + + @test size(m(r)) == (10, 5) + + end From 32b150a5afd7e57551904835af533060a8d8ddc3 Mon Sep 17 00:00:00 2001 From: ayush1999 Date: Wed, 10 Oct 2018 17:18:37 +0530 Subject: [PATCH 05/10] gradtest added --- src/tracker/array.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tracker/array.jl b/src/tracker/array.jl index d4ebcf5ac9..7d94f0eb34 100644 --- a/src/tracker/array.jl +++ b/src/tracker/array.jl @@ -355,8 +355,8 @@ crosscor(x::TrackedArray, w::AbstractArray; kw...) = track(crosscor, x, w; kw.. @grad crosscor(x, w; kw...) = crosscor(data(x), data(w); kw...), Δ -> nobacksies(:crosscor, - (NNlib.∇conv_data(data.((Δ, x, w))...; kw...), - NNlib.∇conv_filter(data.((Δ, x, w))...; kw...))) + (NNlib.∇conv_data(data.((Δ, x, w))...; flipkernel=1, kw...), + NNlib.∇conv_filter(data.((Δ, x, w))...; flipkernel=1, kw...))) maxpool(x::TrackedArray, k; kw...) = track(maxpool, x, k; kw...) From 38b307bab9c5923add82affa13d1fb4458513e2f Mon Sep 17 00:00:00 2001 From: ayush1999 Date: Fri, 9 Nov 2018 23:53:21 +0530 Subject: [PATCH 06/10] some final changes --- src/layers/conv.jl | 48 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index b176044225..71617410ae 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -1,4 +1,8 @@ +<<<<<<< HEAD using NNlib: conv, ∇conv_data, depthwiseconv, crossconv +======= +using NNlib: conv, depthwiseconv, crosscor +>>>>>>> some final changes @generated sub2(::Val{N}) where N = :(Val($(N-2))) @@ -257,3 +261,47 @@ MeanPool(k::NTuple{N,Integer}; pad = 0, stride = k) where N = function Base.show(io::IO, m::MeanPool) print(io, "MeanPool(", m.k, ", pad = ", m.pad, ", stride = ", m.stride, ")") end +<<<<<<< HEAD +======= + +""" + CrossCor(size, in=>out) + CrossCor(size, in=>out, relu) +Standard cross convolutional layer. `size` should be a tuple like `(2, 2)`. +`in` and `out` specify the number of input and output channels respectively. +Data should be stored in WHCN order. In other words, a 100×100 RGB image would +be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array. +Takes the keyword arguments `pad`, `stride` and `dilation`. +""" +struct CrossCor{N,F,A,V} + σ::F + weight::A + bias::V + stride::NTuple{N,Int} + pad::NTuple{N,Int} + dilation::NTuple{N,Int} +end +CrossCor(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity; + stride = 1, pad = 0, dilation = 1) where {T,N} = + CrossCor(σ, w, b, expand.(sub2(Val(N)), (stride, pad, dilation))...) + +CrossCor(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; + init = glorot_uniform, stride = 1, pad = 0, dilation = 1) where N = + CrossCor(param(init(k..., ch...)), param(zeros(ch[2])), σ, + stride = stride, pad = pad, dilation = dilation) + +@treelike CrossCor + +function (c::CrossCor)(x) + # TODO: breaks gpu broadcast :( + # ndims(x) == ndims(c.weight)-1 && return squeezebatch(c(reshape(x, size(x)..., 1))) + σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1) + σ.(crosscor(x, c.weight, stride = c.stride, pad = c.pad, dilation = c.dilation) .+ b) +end +function Base.show(io::IO, l::CrossCor) + print(io, "CrossCor(", size(l.weight)[1:ndims(l.weight)-2]) + print(io, ", ", size(l.weight, ndims(l.weight)-1), "=>", size(l.weight, ndims(l.weight))) + l.σ == identity || print(io, ", ", l.σ) + print(io, ")") +end +>>>>>>> some final changes From 24d38df110396646b387c573dee4d2276237d579 Mon Sep 17 00:00:00 2001 From: ayush1999 Date: Tue, 26 Mar 2019 23:25:48 +0530 Subject: [PATCH 07/10] conflicts --- src/layers/conv.jl | 69 ++----- src/tracker/array.jl | 460 ------------------------------------------- test/layers/conv.jl | 9 +- 3 files changed, 14 insertions(+), 524 deletions(-) delete mode 100644 src/tracker/array.jl diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 71617410ae..be4ac84bf5 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -1,8 +1,4 @@ -<<<<<<< HEAD -using NNlib: conv, ∇conv_data, depthwiseconv, crossconv -======= -using NNlib: conv, depthwiseconv, crosscor ->>>>>>> some final changes +using NNlib: conv, ∇conv_data, depthwiseconv, crosscor @generated sub2(::Val{N}) where N = :(Val($(N-2))) @@ -73,8 +69,6 @@ end """ ConvTranspose(size, in=>out) ConvTranspose(size, in=>out, relu) - CrossCor(size, in=>out) - CrossCor(size, in=>out, relu) Standard convolutional transpose layer. `size` should be a tuple like `(2, 2)`. `in` and `out` specify the number of input and output channels respectively. @@ -83,7 +77,6 @@ be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array. Takes the keyword arguments `pad`, `stride` and `dilation`. """ struct ConvTranspose{N,F,A,V} -struct CrossCor{N,F,A,V} σ::F weight::A bias::V @@ -173,8 +166,8 @@ function Base.show(io::IO, l::DepthwiseConv) end """ - CrossConv(size, in=>out) - CrossConv(size, in=>out, relu) + CrossCor(size, in=>out) + CrossCor(size, in=>out, relu) Standard cross convolutional layer. `size` should be a tuple like `(2, 2)`. `in` and `out` specify the number of input and output channels respectively. @@ -197,8 +190,8 @@ CrossCor(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity; stride = 1, pad = 0, dilation = 1) where {T,N} = CrossCor(σ, w, b, expand.(sub2(Val(N)), (stride, pad, dilation))...) -CrossCor(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; init = initn, - stride = 1, pad = 0, dilation = 1) where N = +CrossCor(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; + init = glorot_uniform, stride = 1, pad = 0, dilation = 1) where N = CrossCor(param(init(k..., ch...)), param(zeros(ch[2])), σ, stride = stride, pad = pad, dilation = dilation) @@ -218,6 +211,12 @@ function Base.show(io::IO, l::CrossCor) print(io, ")") end +(a::CrossCor{<:Any,<:Any,W})(x::AbstractArray{T}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} = + invoke(a, Tuple{AbstractArray}, x) + +(a::CrossCor{<:Any,<:Any,W})(x::AbstractArray{<:Real}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} = + a(T.(x)) + """ MaxPool(k) @@ -260,48 +259,4 @@ MeanPool(k::NTuple{N,Integer}; pad = 0, stride = k) where N = function Base.show(io::IO, m::MeanPool) print(io, "MeanPool(", m.k, ", pad = ", m.pad, ", stride = ", m.stride, ")") -end -<<<<<<< HEAD -======= - -""" - CrossCor(size, in=>out) - CrossCor(size, in=>out, relu) -Standard cross convolutional layer. `size` should be a tuple like `(2, 2)`. -`in` and `out` specify the number of input and output channels respectively. -Data should be stored in WHCN order. In other words, a 100×100 RGB image would -be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array. -Takes the keyword arguments `pad`, `stride` and `dilation`. -""" -struct CrossCor{N,F,A,V} - σ::F - weight::A - bias::V - stride::NTuple{N,Int} - pad::NTuple{N,Int} - dilation::NTuple{N,Int} -end -CrossCor(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity; - stride = 1, pad = 0, dilation = 1) where {T,N} = - CrossCor(σ, w, b, expand.(sub2(Val(N)), (stride, pad, dilation))...) - -CrossCor(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; - init = glorot_uniform, stride = 1, pad = 0, dilation = 1) where N = - CrossCor(param(init(k..., ch...)), param(zeros(ch[2])), σ, - stride = stride, pad = pad, dilation = dilation) - -@treelike CrossCor - -function (c::CrossCor)(x) - # TODO: breaks gpu broadcast :( - # ndims(x) == ndims(c.weight)-1 && return squeezebatch(c(reshape(x, size(x)..., 1))) - σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1) - σ.(crosscor(x, c.weight, stride = c.stride, pad = c.pad, dilation = c.dilation) .+ b) -end -function Base.show(io::IO, l::CrossCor) - print(io, "CrossCor(", size(l.weight)[1:ndims(l.weight)-2]) - print(io, ", ", size(l.weight, ndims(l.weight)-1), "=>", size(l.weight, ndims(l.weight))) - l.σ == identity || print(io, ", ", l.σ) - print(io, ")") -end ->>>>>>> some final changes +end \ No newline at end of file diff --git a/src/tracker/array.jl b/src/tracker/array.jl deleted file mode 100644 index 7d94f0eb34..0000000000 --- a/src/tracker/array.jl +++ /dev/null @@ -1,460 +0,0 @@ -import Base: * - -import LinearAlgebra -import LinearAlgebra: inv, \, / - -using Statistics -using LinearAlgebra: Transpose, Adjoint, diagm, diag - -struct TrackedArray{T,N,A<:AbstractArray{T,N}} <: AbstractArray{T,N} - tracker::Tracked{A} - data::A - grad::A - TrackedArray{T,N,A}(t::Tracked{A}, data::A) where {T,N,A} = new(t, data) - TrackedArray{T,N,A}(t::Tracked{A}, data::A, grad::A) where {T,N,A} = new(t, data, grad) -end - -data(x::TrackedArray) = x.data -tracker(x::TrackedArray) = x.tracker - -TrackedVector{T,A} = TrackedArray{T,1,A} -TrackedMatrix{T,A} = TrackedArray{T,2,A} -TrackedVecOrMat{T,A} = Union{TrackedVector{T,A},TrackedMatrix{T,A}} - -track(c::Call, x::AbstractArray) = TrackedArray(c, x) - -TrackedArray(c::Call, x::A) where A <: AbstractArray = - TrackedArray{eltype(A),ndims(A),A}(Tracked{A}(c), x) - -TrackedArray(c::Call, x::A, Δ::A) where A <: AbstractArray = - TrackedArray{eltype(A),ndims(A),A}(Tracked{A}(c, Δ), x, Δ) - -TrackedArray(x::AbstractArray) = TrackedArray(Call(), x, zero(x)) - -Base.eltype(x::Type{<:TrackedArray{T}}) where T <: Real = TrackedReal{T} - -Base.show(io::IO, ::Type{TrackedArray{T,N,A}}) where {T,N,A<:AbstractArray{T,N}} = - print(io, "TrackedArray{…,$A}") - -function Base.summary(io::IO, x::TrackedArray) - print(io, "Tracked ") - summary(io, data(x)) -end - -Base.print_array(io::IO, x::TrackedArray) = Base.print_array(io, data(x)) - -Base.setindex!(xs::TrackedArray, v, i...) = - error("Can't differentiate `setindex!`") - -back!(::TrackedArray) = error("Value is not scalar; use `back!(sum(x))` or `back!(x, Δ)`") - -# Fallthrough methods - -for f in :[Base.size, Base.ndims, Base.collect].args - @eval @inline $f(x::TrackedArray, a...) = $f(data(x), a...) -end - -Base.size(x::TrackedArray, i::Integer, j::Integer, is::Integer...) = - size(data(x), i, j, is...) - -Base.similar(x::TrackedArray, dims::Union{AbstractUnitRange,Integer}...) = - similar(data(x), dims...) - -Base.similar(x::TrackedArray, T::Type) = similar(data(x), T) - -for op in [:(==), :≈] - @eval Base.$op(x::TrackedArray, y::AbstractArray) = Base.$op(data(x), y) - @eval Base.$op(x::AbstractArray, y::TrackedArray) = Base.$op(x, data(y)) - @eval Base.$op(x::TrackedArray, y::TrackedArray) = Base.$op(data(x), data(y)) -end - -# Array Stdlib - -Base.getindex(xs::TrackedArray, i...) = track(getindex, xs, i...) - -@grad function getindex(xs::AbstractArray, i...) - data(xs)[i...], function (Δ) - Δ′ = zero(xs) - Δ′[i...] = data(Δ) - (nobacksies(:getindex, Δ′), map(_->nothing, i)...) - end -end - -Base.:-(xs::TrackedArray) = track(-, xs) - -@grad -(xs) = -data(xs), Δ -> (-Δ,) - -Base.transpose(xs::TrackedArray) = track(transpose, xs) -Base.adjoint(xs::TrackedArray) = track(adjoint, xs) - -@grad transpose(xs) = transpose(data(xs)), Δ -> (reshape(transpose(Δ), size(xs)),) -@grad adjoint(xs) = data(xs)', Δ -> (reshape(Δ', size(xs)),) - -Base.repeat(xs::TrackedArray; kw...) = track(repeat, xs; kw...) - -@grad function repeat(xs; inner=ntuple(x->1, ndims(xs)), outer=ntuple(x->1, ndims(xs))) - repeat(data(xs), inner = inner, outer = outer), function (Δ) - Δ′ = zero(xs) - S = size(xs) - - # Loop through each element of Δ, calculate source dimensions, accumulate into Δ′ - for (dest_idx, val) in pairs(IndexCartesian(), data(Δ)) - # First, round dest_idx[dim] to nearest gridpoint defined by inner[dim], then - # wrap around based on original size S. - src_idx = [mod1(div(dest_idx[dim] - 1, inner[dim]) + 1, S[dim]) for dim in 1:length(S)] - Δ′[src_idx...] += val - end - (nobacksies(:repeat, Δ′),) - end -end - -for f in [:vcat, :hcat] - UArray = :(Union{TrackedArray,Vector,Matrix,Adjoint,Transpose}) - @eval begin - # This section is a bit of a hack since julia doesn't have a standardised - # promotion mechanism for concatenation yet - # https://github.com/JuliaLang/julia/pull/20815 - - # It should support tracked concatenation with rank ∈ (1,2) with a - # TrackedArray anywhere among the arguments This works as long as base has - # other functions that captures `(::Union{Vector,RowVector,Matrix}...)`. - Base.$f(a::$UArray...) = track($f, a...) - - # It should support tracked concatenation with rank>2 if the TrackedArray is - # first - Base.$f(a::TrackedArray, b::AbstractArray...) = track($f, a, b...) - Base.$f(a::TrackedArray, b::$UArray...) = track($f, a, b...) # resolves ambiguity introduced by previous row - - # It should support tracked concatenation with rank>2 if the TrackedArray is - # second - Base.$f(a::Array, b::TrackedArray, c::AbstractArray...) = track($f, a, b, c...) - Base.$f(a::Union{Vector,Matrix,Adjoint,Transpose}, b::TrackedArray, - c::$UArray...) = - track($f, a, b, c...) # resolves ambiguity introduced by previous row - end -end - -@grad function vcat(xs...) - vcat(data.(xs)...), function (Δ) - start = 0 - Δs = [begin - i = map(_ -> :, size(xsi)) |> Base.tail - d = Δ[start+1:start+size(xsi,1), i...] - start += size(xsi, 1) - d - end for xsi in xs] - return (Δs...,) - end -end - -@grad function hcat(xs...) - hcat(data.(xs)...), function (Δ) - start = 0 - Δs = [begin - d = if ndims(xsi) == 1 - Δ[:, start+1] - else - i = map(_ -> :, size(xsi)) |> Base.tail |> Base.tail - Δ[:, start+1:start+size(xsi,2), i...] - end - start += size(xsi, 2) - d - end for xsi in xs] - return (Δs...,) - end -end - -Base.cat(a::TrackedArray; dims) = track(cat, a, dims = dims) -Base.cat(a::TrackedArray, b::TrackedArray, c::AbstractArray...; dims) = track(cat, a, b, c..., dims = dims) -Base.cat(a::TrackedArray, b::AbstractArray, c::AbstractArray...; dims) = track(cat, a, b, c..., dims = dims) -Base.cat(a::AbstractArray, b::TrackedArray, c::AbstractArray...; dims) = track(cat, a, b, c..., dims = dims) - -@grad function cat(Xs...; dims) - cat(data.(Xs)..., dims = dims), function (Δ) - start = ntuple(i -> 0, Val(ndims(Δ))) - Δs = [begin - dim_xs = 1:ndims(xs) - till_xs = ntuple((i -> i in dims ? (i in dim_xs ? size(xs,i) : 1) : 0), Val(ndims(Δ))) - xs_in_Δ = ntuple(i -> till_xs[i] > 0 ? (start[i]+1:start[i]+till_xs[i]) : Colon(), Val(ndims(Δ))) - d = reshape(Δ[xs_in_Δ...],size(xs)) - start = start .+ till_xs - d - end for xs in Xs] - return (Δs...,) - end -end - -Base.reshape(xs::TrackedArray, dims::Union{Colon,Int64}...) = reshape(xs, dims) -Base.reshape(xs::TrackedArray, dims::Tuple{Vararg{Union{Int64,Colon}}}) = reshape(xs, Base._reshape_uncolon(xs, dims)) -Base.reshape(xs::TrackedArray, dims::Tuple{Vararg{Int64}}) = track(reshape, xs, dims) - -@grad reshape(xs, dims) = reshape(data(xs), dims), Δ -> (reshape(Δ, size(xs)),nothing) - -Base.permutedims(xs::TrackedArray, dims) = track(permutedims, xs, dims) -@grad permutedims(xs, dims) = permutedims(data(xs), dims), Δ -> (permutedims(Δ, invperm(dims)),nothing) - -function _kron(mat1::AbstractMatrix,mat2::AbstractMatrix) - m1, n1 = size(mat1) - mat1_rsh = reshape(mat1,(1,m1,1,n1)) - - m2, n2 = size(mat2) - mat2_rsh = reshape(mat2,(m2,1,n2,1)) - - return reshape(mat1_rsh.*mat2_rsh, (m1*m2,n1*n2)) -end - -Base.kron(a::TrackedMatrix, b::TrackedMatrix) = _kron(a, b) -Base.kron(a::TrackedMatrix, b::AbstractMatrix) = _kron(a, b) -Base.kron(a::AbstractMatrix, b::TrackedMatrix) = _kron(a, b) - - -inv(A::TrackedArray) = Tracker.track(inv, A) -@grad function inv(A) - return inv(Tracker.data(A)), function (Δ) - Ainv = inv(A) - ∇A = - Ainv' * Δ * Ainv' - return (∇A, ) - end -end - -# (/) rdivide -A::TrackedArray / B::TrackedArray = Tracker.track(/, A, B) -A::AbstractVecOrMat / B::TrackedArray = Tracker.track(/, A, B) -A::TrackedArray / B::AbstractVecOrMat = Tracker.track(/, A, B) -@grad function (A / B) - return Tracker.data(A) / Tracker.data(B), function (Δ) - Binv = inv(B) - ∇B = - Binv' * A' * Δ * Binv' - return (Δ * Binv', ∇B) - end -end - -# (\) ldivide (left vec divide needs more work to resolve dispatch ambiguity) -A::TrackedArray \ B::TrackedArray = Tracker.track(\, A, B) -A::AbstractArray \ B::TrackedArray = Tracker.track(\, A, B) -A::TrackedArray \ B::AbstractVecOrMat = Tracker.track(\, A, B) -@grad function (A \ B) - return Tracker.data(A) \ Tracker.data(B), function (Δ) - Ainv = inv(A) - ∇A = - Ainv' * Δ * B' * Ainv' - return (∇A, Ainv' * Δ) - end -end - - -# Reductions - -Base.sum(xs::TrackedArray; dims = :) = track(sum, xs, dims = dims) -Base.sum(f::Union{Function,Type},xs::TrackedArray) = sum(f.(xs)) - -@grad sum(xs; dims = :) = sum(data(xs), dims = dims), - Δ -> (zero(xs) .+ Δ, ) - -Base.prod(xs::TrackedArray, dim) = track(prod, xs, dim) -Base.prod(xs::TrackedArray) = track(prod, xs) -Base.prod(f::Union{Function, Type}, xs::TrackedArray) = prod(f.(xs)) - -@grad prod(xs) = prod(data(xs)), Δ -> (prod(xs) ./ xs .* Δ,) -@grad prod(xs, dim) = prod(data(xs), dims = dim), - Δ -> (nobacksies(:sum, - reshape(.*(circshift.([reshape(data(xs), length(xs))], 1:length(xs)-1)...), size(xs)) .* Δ), - nothing) - -Base.findfirst(xs::TrackedArray, args...) = findfirst(xs.data, args...) - -Statistics.mean(xs::TrackedArray; dims = :) = track(mean, xs, dims = dims) - -Base.maximum(xs::TrackedArray; dims = :) = track(maximum, xs, dims = dims) -Base.minimum(xs::TrackedArray; dims = :) = track(minimum, xs, dims = dims) - -import LinearAlgebra: dot - -dot(xs::TrackedVector, ys::TrackedVector) = track(dot, xs, ys) -dot(xs::AbstractVector, ys::TrackedVector) = track(dot, xs, ys) -dot(xs::TrackedVector, ys::AbstractVector) = track(dot, xs, ys) - -@grad dot(xs, ys) = dot(data(xs), data(ys)), Δ -> (Δ .* ys, Δ .* xs) - -# Hacks to get std working -Statistics.std(x::TrackedArray; dims = :, mean = Statistics.mean(x, dims = dims)) = _std(x,mean,dims) -_std(x::TrackedArray, mean, dims) = sqrt.(sum((x .- mean).^2, dims = dims) ./ (mapreduce(i -> size(x,i),*, dims) - 1)) -_std(x::TrackedArray, mean, ::Colon) = sqrt.(sum((x .- mean).^2) ./ (length(x) - 1)) - -LinearAlgebra.norm(x::TrackedArray, p::Real = 2) = - sum(abs.(x).^p .+ eps(0f0))^(1/p) # avoid d(sqrt(x))/dx == Inf at 0 - -@grad mean(xs; dims = :) = mean(data(xs), dims=dims), Δ -> (_backmean(xs,Δ,dims),) -_backmean(xs, Δ, ::Colon) = zero(xs) .+ Δ ./ length(xs) -_backmean(xs, Δ, dims) = zero(xs) .+ Δ ./ mapreduce(i -> size(data(xs),i),*,dims) - -@grad function maximum(xs; dims = dims) - maximum(data(xs), dims = dims), function (Δ) - Δ′ = zero(xs) - _, i = findmax(data(xs), dims = dims) - Δ′[i] = data(Δ) - return (nobacksies(:maximum, Δ′),) - end -end - -@grad function minimum(xs; dims = dims) - minimum(data(xs), dims = dims), function (Δ) - Δ′ = zero(xs) - _, i = findmin(data(xs), dims = dims) - Δ′[i] = data(Δ) - return (nobacksies(:minimum, Δ′),) - end -end - -# BLAS - -LinearAlgebra.diagm(x::TrackedVector) = track(diagm, x) -@grad diagm(x) = diagm(data(x)), Δ -> (diag(Δ),) - -x::TrackedMatrix * y::AbstractMatrix = track(*, x, y) -x::AbstractMatrix * y::TrackedMatrix = track(*, x, y) -x::TrackedMatrix * y::TrackedMatrix = track(*, x, y) - -x::TrackedMatrix * y::AbstractVector = track(*, x, y) -x::AbstractMatrix * y::TrackedVector = track(*, x, y) -x::TrackedMatrix * y::TrackedVector = track(*, x, y) - -x::TrackedVector * y::AbstractVector = track(*, x, y) -x::AbstractVector * y::TrackedVector = track(*, x, y) -x::TrackedVector * y::TrackedVector = track(*, x, y) - -@grad a::AbstractMatrix * b::AbstractVecOrMat = - data(a)*data(b), Δ -> (Δ * transpose(b), transpose(a) * Δ) - -# NNlib - -using NNlib -import NNlib: softmax, ∇softmax, logsoftmax, ∇logsoftmax, conv, maxpool, meanpool, crosscor - -softmax(xs::TrackedArray) = track(softmax, xs) - -@grad softmax(xs) = softmax(data(xs)), Δ -> (nobacksies(:softmax, ∇softmax(data(Δ), data(xs))),) - -logsoftmax(xs::TrackedArray) = track(logsoftmax, xs) - -@grad logsoftmax(xs) = logsoftmax(data(xs)), Δ -> (nobacksies(:logsoftmax, ∇logsoftmax(data(Δ), data(xs))),) - -conv(x::TrackedArray, w::TrackedArray; kw...) = track(conv, x, w; kw...) -conv(x::AbstractArray, w::TrackedArray; kw...) = track(conv, x, w; kw...) -conv(x::TrackedArray, w::AbstractArray; kw...) = track(conv, x, w; kw...) - -@grad conv(x, w; kw...) = - conv(data(x), data(w); kw...), - Δ -> nobacksies(:conv, - (NNlib.∇conv_data(data.((Δ, x, w))...; kw...), - NNlib.∇conv_filter(data.((Δ, x, w))...; kw...))) - -crosscor(x::TrackedArray, w::TrackedArray; kw...) = track(crosscor, x, w; kw...) -crosscor(x::AbstractArray, w::TrackedArray; kw...) = track(crosscor, x, w; kw...) -crosscor(x::TrackedArray, w::AbstractArray; kw...) = track(crosscor, x, w; kw...) - -@grad crosscor(x, w; kw...) = - crosscor(data(x), data(w); kw...), - Δ -> nobacksies(:crosscor, - (NNlib.∇conv_data(data.((Δ, x, w))...; flipkernel=1, kw...), - NNlib.∇conv_filter(data.((Δ, x, w))...; flipkernel=1, kw...))) - -maxpool(x::TrackedArray, k; kw...) = track(maxpool, x, k; kw...) - -@grad function maxpool(x, k; kw...) - y = maxpool(data(x), k; kw...) - y, Δ -> (nobacksies(:maxpool, NNlib.∇maxpool(data.((Δ, y, x))..., k; kw...)), nothing) -end - -meanpool(x::TrackedArray, k; kw...) = track(meanpool, x, k; kw...) - -@grad function meanpool(x, k; kw...) - y = meanpool(data(x), k; kw...) - y, Δ -> (nobacksies(:maxpool, NNlib.∇meanpool(data.((Δ, y, x))..., k; kw...)), nothing) -end - -# Broadcasting - -using ForwardDiff: Dual, partials, value - -trim(x, Δ) = reshape(Δ, ntuple(i -> size(Δ, i), Val(ndims(x)))) - -unbroadcast(x::AbstractArray, Δ) = - size(x) == size(Δ) ? Δ : - length(x) == length(Δ) ? trim(x, Δ) : - trim(x, sum(Δ, dims = ntuple(i -> size(x, i) == 1 ? i : ndims(Δ)+1, Val(ndims(Δ))))) - -unbroadcast(x::Number, Δ) = sum(Δ) -unbroadcast(x::Base.RefValue{<:Function}, _) = nothing -unbroadcast(x::Base.RefValue{<:Val}, _) = nothing - -dual(x, p) = x -dual(x::Real, p) = Dual(x, p) - -function partial(f::F, Δ, i, args::Vararg{Any,N}) where {F,N} - dargs = ntuple(j -> dual(args[j], i==j), Val(N)) - return Δ * f(dargs...).partials[1] -end - -@inline function ∇broadcast(f::F, args::Vararg{Any,N}) where {F,N} - y = broadcast(f, data.(args)...) - eltype(y) <: Real || return y - eltype(y) == Bool && return y - function back(Δ) - Δargs = ntuple(i -> partial.(f, Δ, i, args...), Val(N)) - dxs = map(unbroadcast, args, Δargs) - return dxs - end - # So we can return non-tracked arrays - track(Call(back, tracker.(args)), y) -end - -using Base.Broadcast: BroadcastStyle, ArrayStyle, Broadcasted, broadcasted - -struct TrackedStyle <: BroadcastStyle end - -Broadcast.BroadcastStyle(::Type{<:Union{TrackedArray,TrackedReal}}) = TrackedStyle() -Broadcast.BroadcastStyle(::TrackedStyle, ::BroadcastStyle) = TrackedStyle() - -# We have to re-build the original broadcast struct to get the appropriate array -# style. We need this primarily to support CuArrays' broadcasting fixes. -broadcast_rebuild(xs) = data(xs) - -broadcast_rebuild(bc::Broadcasted) = - broadcasted(bc.f, broadcast_rebuild.(bc.args)...) - -preprocess(x) = x - -function Base.Broadcast.materialize(bc::Broadcasted{TrackedStyle}) - bc1 = Broadcast.flatten(bc) - bc2 = Broadcast.flatten(broadcast_rebuild(bc)) - ∇broadcast(bc2.f, bc1.args...) -end - -using Requires - -# https://github.com/FluxML/Flux.jl/issues/353 -@init Requires.isprecompiling() || @eval Base.Broadcast begin - function flatten(bc::Broadcasted{Style}) where {Style} - isflat(bc) && return bc - args = cat_nested(bc) - let makeargs = make_makeargs(bc), f = bc.f - newf = @inline function(args::Vararg{Any,N}) where N - f(makeargs(args...)...) - end - return Broadcasted{Style}(newf, args, bc.axes) - end - end - @inline function make_makeargs(makeargs, t::Tuple{<:Broadcasted,Vararg{Any}}) - bc = t[1] - let makeargs = make_makeargs(makeargs, tail(t)), f = bc.f - let makeargs = make_makeargs(makeargs, bc.args) - headargs, tailargs = make_headargs(bc.args), make_tailargs(bc.args) - return @inline function(args::Vararg{Any,N}) where N - args1 = makeargs(args...) - a, b = headargs(args1...), tailargs(args1...) - (f(a...), b...) - end - end - end - end -end diff --git a/test/layers/conv.jl b/test/layers/conv.jl index f50664f95a..71fb2d05fd 100644 --- a/test/layers/conv.jl +++ b/test/layers/conv.jl @@ -35,13 +35,8 @@ end end @testset "CrossCor" begin - m = Chain( - CrossCor((2, 2), 1=>16, relu), - MaxPool((2,2)), - CrossCor((2, 2), 16=>8, relu), - MaxPool((2,2)), - x -> reshape(x, :, size(x, 4)), - Dense(288, 10), softmax) + r = rand(28, 28, 1, 1) + m = Chain(CrossCor((2, 2), 1=>16, relu),MaxPool((2,2)),CrossCor((2, 2), 16=>8, relu),MaxPool((2,2)),x -> reshape(x, :, size(x, 4)),Dense(288, 10), softmax) @test size(m(r)) == (10, 5) From f901c7892f6195ae1f013c58325cc01591b9a809 Mon Sep 17 00:00:00 2001 From: ayush1999 Date: Fri, 5 Apr 2019 11:31:52 +0530 Subject: [PATCH 08/10] use denseconvdims --- src/Flux.jl | 2 +- src/layers/conv.jl | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Flux.jl b/src/Flux.jl index eccdd6a7e5..b117aa4b9c 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -8,7 +8,7 @@ using MacroTools: @forward export Chain, Dense, Maxout, RNN, LSTM, GRU, Conv, ConvTranspose, MaxPool, MeanPool, DepthwiseConv, Dropout, AlphaDropout, LayerNorm, BatchNorm, InstanceNorm, GroupNorm, - params, mapleaves, cpu, gpu, f32, f64 + params, mapleaves, cpu, gpu, f32, f64, CrossCor @reexport using NNlib diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 5997faf1f2..9270833eec 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -1,4 +1,4 @@ -using NNlib: conv, ∇conv_data, depthwiseconv, crosscor +using NNlib: conv, ∇conv_data, depthwiseconv, DenseConvDims @generated sub2(::Val{N}) where N = :(Val($(N-2))) @@ -207,7 +207,8 @@ function (c::CrossCor)(x) # TODO: breaks gpu broadcast :( # ndims(x) == ndims(c.weight)-1 && return squeezebatch(c(reshape(x, size(x)..., 1))) σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1) - σ.(crosscor(x, c.weight, stride = c.stride, pad = c.pad, dilation = c.dilation) .+ b) + dense_dims = DenseConvDims(size(x), size(c.weight), stride=c.stride, padding=c.pad, dilation=c.dilation, flipkernel=true) + σ.(conv(x, c.weight, dense_dims) .+ b) end function Base.show(io::IO, l::CrossCor) From a4e2d0e855d2a08e12e482126185ba4db9c2ba20 Mon Sep 17 00:00:00 2001 From: ayush1999 Date: Sat, 6 Apr 2019 21:17:14 +0530 Subject: [PATCH 09/10] use crosscor --- src/layers/conv.jl | 26 ++++++++++++++++++-------- test/cuda/cuda.jl | 4 ++++ test/layers/conv.jl | 7 ++++++- 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 9270833eec..68ce42928a 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -184,12 +184,17 @@ be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array. Takes the keyword arguments `pad`, `stride` and `dilation`. """ struct CrossCor{N,F,A,V} -σ::F -weight::A -bias::V -stride::NTuple{N,Int} -pad::NTuple{N,Int} -dilation::NTuple{N,Int} + σ::F + weight::A + bias::V + stride::NTuple{N,Int} + pad::NTuple{N,Int} + dilation::NTuple{N,Int} +end + +function crosscor(x, w, d) + + return conv(x, w, DenseConvDims(d)) end CrossCor(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity; @@ -203,12 +208,17 @@ CrossCor(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; @treelike CrossCor +function crosscor(x, w, ddims::DenseConvDims) + ddims = DenseConvDims(ddims, F=true) + return conv(x, w, ddims) +end + function (c::CrossCor)(x) # TODO: breaks gpu broadcast :( # ndims(x) == ndims(c.weight)-1 && return squeezebatch(c(reshape(x, size(x)..., 1))) σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1) - dense_dims = DenseConvDims(size(x), size(c.weight), stride=c.stride, padding=c.pad, dilation=c.dilation, flipkernel=true) - σ.(conv(x, c.weight, dense_dims) .+ b) + ddims = DenseConvDims(x, c.weight; stride=c.stride, padding=c.pad, dilation=c.dilation) + σ.(crosscor(x, c.weight, ddims) .+ b) end function Base.show(io::IO, l::CrossCor) diff --git a/test/cuda/cuda.jl b/test/cuda/cuda.jl index 1748ed5e07..2b80c0a144 100644 --- a/test/cuda/cuda.jl +++ b/test/cuda/cuda.jl @@ -36,6 +36,10 @@ c = gpu(Conv((2,2),3=>4)) l = c(gpu(rand(10,10,3,2))) Flux.back!(sum(l)) +c = gpu(CrossCor((2,2),3=>4)) +l = c(gpu(rand(10,10,3,2))) +Flux.back!(sum(l)) + end if CuArrays.libcudnn != nothing diff --git a/test/layers/conv.jl b/test/layers/conv.jl index 1420c20428..247470fc24 100644 --- a/test/layers/conv.jl +++ b/test/layers/conv.jl @@ -46,7 +46,12 @@ end @testset "CrossCor" begin r = rand(28, 28, 1, 1) - m = Chain(CrossCor((2, 2), 1=>16, relu),MaxPool((2,2)),CrossCor((2, 2), 16=>8, relu),MaxPool((2,2)),x -> reshape(x, :, size(x, 4)),Dense(288, 10), softmax) + m = Chain(CrossCor((2, 2), 1=>16, relu), + MaxPool((2,2)), + CrossCor((2, 2), 16=>8, relu), + MaxPool((2,2)), + x -> reshape(x, :, size(x, 4)), + Dense(288, 10), softmax) @test size(m(r)) == (10, 5) From 7cc300663867fa77b30e2e11c87149152f7f91d2 Mon Sep 17 00:00:00 2001 From: ayush1999 Date: Sun, 7 Apr 2019 11:01:21 +0530 Subject: [PATCH 10/10] gpu test --- src/Flux.jl | 4 ++-- src/layers/conv.jl | 5 ----- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/src/Flux.jl b/src/Flux.jl index b117aa4b9c..a041a69a8d 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -6,9 +6,9 @@ using Base: tail using MacroTools, Juno, Requires, Reexport, Statistics, Random using MacroTools: @forward -export Chain, Dense, Maxout, RNN, LSTM, GRU, Conv, ConvTranspose, MaxPool, MeanPool, +export Chain, Dense, Maxout, RNN, LSTM, GRU, Conv, CrossCor, ConvTranspose, MaxPool, MeanPool, DepthwiseConv, Dropout, AlphaDropout, LayerNorm, BatchNorm, InstanceNorm, GroupNorm, - params, mapleaves, cpu, gpu, f32, f64, CrossCor + params, mapleaves, cpu, gpu, f32, f64 @reexport using NNlib diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 68ce42928a..76923f3f0b 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -192,11 +192,6 @@ struct CrossCor{N,F,A,V} dilation::NTuple{N,Int} end -function crosscor(x, w, d) - - return conv(x, w, DenseConvDims(d)) -end - CrossCor(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity; stride = 1, pad = 0, dilation = 1) where {T,N} = CrossCor(σ, w, b, expand.(sub2(Val(N)), (stride, pad, dilation))...)