diff --git a/Project.toml b/Project.toml index 254f1ad7c..7a9746437 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "ChainRules" uuid = "082447d4-558c-5d27-93f4-14fc19e9eca2" -version = "1.50.0" +version = "1.51.0" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" diff --git a/src/rulesets/Base/indexing.jl b/src/rulesets/Base/indexing.jl index 0ca102143..7e1befd14 100644 --- a/src/rulesets/Base/indexing.jl +++ b/src/rulesets/Base/indexing.jl @@ -81,16 +81,48 @@ For the `rrule` of `y = x[inds...]`, this function is roughly `setindex(zero(x), dy, inds...)`, returning the array `dx`. Differentiable. Includes `ProjectTo(x)(dx)`. """ -function ∇getindex(x::AbstractArray, dy, inds...) +function ∇getindex(x::AbstractArray{T,N}, dy, inds...) where {T,N} # `to_indices` removes any logical indexing, colons, CartesianIndex etc, # leaving just Int / AbstractVector of Int plain_inds = Base.to_indices(x, inds) - dx = _setindex_zero(x, dy, plain_inds...) - ∇getindex!(dx, dy, plain_inds...) + dx = if plain_inds isa NTuple{N, Int} && T<:Number + # scalar indexing + OneElement(dy, plain_inds, axes(x)) + else # some from slicing (potentially noncontigous) + dx = _setindex_zero(x, dy, plain_inds...) + ∇getindex!(dx, dy, plain_inds...) + end return ProjectTo(x)(dx) # since we have x, may as well do this inside, not in rules end ∇getindex(x::AbstractArray, z::AbstractZero, inds...) = z +""" + OneElement(val, ind, axes) <: AbstractArray + +Extremely simple `struct` used for the gradient of scalar `getindex`. +""" +struct OneElement{T,N,I,A} <: AbstractArray{T,N} + val::T + ind::I + axes::A + OneElement(val::T, ind::I, axes::A) where {T<:Number, I<:NTuple{N,Int}, A<:NTuple{N,AbstractUnitRange}} where {N} = new{T,N,I,A}(val, ind, axes) +end +Base.size(A::OneElement) = map(length, A.axes) +Base.axes(A::OneElement) = A.axes +Base.getindex(A::OneElement{T,N}, i::Vararg{Int,N}) where {T,N} = ifelse(i==A.ind, A.val, zero(T)) + +function ChainRulesCore.add!!(xs::AbstractArray{<:Any,N}, oe::OneElement{<:Any,N}) where {N} + if !ChainRulesCore.is_inplaceable_destination(xs) + xs = collect(xs) + end + xs[oe.ind...] += oe.val + return xs +end + +Base.:(+)(xs::AbstractArray, oe::OneElement) = add!!(copy(xs), oe) +Base.:(+)(oe::OneElement, xs::AbstractArray) = +(xs, oe) +Base.:(+)(oe1::OneElement, oe2::OneElement) = +(collect(oe1), oe2) + """ _setindex_zero(x, dy, inds...) diff --git a/test/rulesets/Base/array.jl b/test/rulesets/Base/array.jl index c50008430..5dead0dba 100644 --- a/test/rulesets/Base/array.jl +++ b/test/rulesets/Base/array.jl @@ -358,14 +358,15 @@ end @test_skip test_frule(findmin, rand(3,4), output_tangent = (rand(), NoTangent())) @test_skip test_frule(findmin, rand(3,4), fkwargs=(dims=1,)) # These skipped tests might be fixed by https://github.com/JuliaDiff/FiniteDifferences.jl/issues/188 + # or by https://github.com/JuliaLang/julia/pull/48404 # Reverse test_rrule(findmin, rand(10), output_tangent = (rand(), false)) test_rrule(findmax, rand(10), output_tangent = (rand(), false)) - test_rrule(findmin, rand(5,3)) - test_rrule(findmax, rand(5,3)) - @test [0 0; 0 5] == @inferred unthunk(rrule(findmax, [1 2; 3 4])[2]((5.0, nothing))[2]) - @test [0 0; 0 5] == @inferred unthunk(rrule(findmax, [1 2; 3 4])[2]((5.0, NoTangent()))[2]) + test_rrule(findmin, rand(5,3); check_inferred=false) + test_rrule(findmax, rand(5,3); check_inferred=false) + @test [0 0; 0 5] == unthunk(rrule(findmax, [1 2; 3 4])[2]((5.0, nothing))[2]) + @test [0 0; 0 5] == unthunk(rrule(findmax, [1 2; 3 4])[2]((5.0, NoTangent()))[2]) # Reverse with dims: @test [0 0; 5 6] == @inferred unthunk(rrule(findmax, [1 2; 3 4], dims=1)[2](([5 6], nothing))[2]) @@ -385,7 +386,7 @@ end # Reverse test_rrule(imum, rand(10)) - test_rrule(imum, rand(3,4)) + test_rrule(imum, rand(3,4); check_inferred=false) @gpu test_rrule(imum, rand(3,4), fkwargs=(dims=1,)) test_rrule(imum, rand(3,4,5), fkwargs=(dims=(1,3),)) diff --git a/test/rulesets/Base/indexing.jl b/test/rulesets/Base/indexing.jl index 3dbcd0bc9..8928c55e7 100644 --- a/test/rulesets/Base/indexing.jl +++ b/test/rulesets/Base/indexing.jl @@ -34,10 +34,10 @@ @testset "single element" begin test_rrule(getindex, x, 2) - test_rrule(getindex, x, 2, 1) - test_rrule(getindex, x, 2, 2) + test_rrule(getindex, x, 2, 1; check_inferred=false) + test_rrule(getindex, x, 2, 2; check_inferred=false) - test_rrule(getindex, x, CartesianIndex(2, 3)) + test_rrule(getindex, x, CartesianIndex(2, 3); check_inferred=false) end @testset "slice/index positions" begin @@ -87,7 +87,7 @@ dgrad = rrule(getindex, Diagonal(rand(3)), 2, :)[2]([1,2,3])[2] @test unthunk(dgrad) ≈ Diagonal([0, 2, 0]) - test_rrule(getindex, Symmetric(rand(3, 3)), 2, 2) + test_rrule(getindex, Symmetric(rand(3, 3)), 2, 2; check_inferred=false) # Infers to Any sgrad = rrule(getindex, Symmetric(rand(3, 3)), 2, 3)[2](1.0)[2] @test unthunk(sgrad) ≈ [0 0 0; 0 0 1/2; 0 1/2 0] end