Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bring over OneElement for scalar getindex #717

Merged
merged 6 commits into from
Jun 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "ChainRules"
uuid = "082447d4-558c-5d27-93f4-14fc19e9eca2"
version = "1.50.0"
version = "1.51.0"

[deps]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
Expand Down
38 changes: 35 additions & 3 deletions src/rulesets/Base/indexing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -81,16 +81,48 @@ For the `rrule` of `y = x[inds...]`, this function is roughly
`setindex(zero(x), dy, inds...)`, returning the array `dx`.
Differentiable. Includes `ProjectTo(x)(dx)`.
"""
function ∇getindex(x::AbstractArray, dy, inds...)
function ∇getindex(x::AbstractArray{T,N}, dy, inds...) where {T,N}
# `to_indices` removes any logical indexing, colons, CartesianIndex etc,
# leaving just Int / AbstractVector of Int
plain_inds = Base.to_indices(x, inds)
dx = _setindex_zero(x, dy, plain_inds...)
∇getindex!(dx, dy, plain_inds...)
dx = if plain_inds isa NTuple{N, Int} && T<:Number
# scalar indexing
OneElement(dy, plain_inds, axes(x))
else # some from slicing (potentially noncontigous)
dx = _setindex_zero(x, dy, plain_inds...)
∇getindex!(dx, dy, plain_inds...)
end
return ProjectTo(x)(dx) # since we have x, may as well do this inside, not in rules
end
∇getindex(x::AbstractArray, z::AbstractZero, inds...) = z

"""
OneElement(val, ind, axes) <: AbstractArray

Extremely simple `struct` used for the gradient of scalar `getindex`.
"""
struct OneElement{T,N,I,A} <: AbstractArray{T,N}
val::T
ind::I
axes::A
OneElement(val::T, ind::I, axes::A) where {T<:Number, I<:NTuple{N,Int}, A<:NTuple{N,AbstractUnitRange}} where {N} = new{T,N,I,A}(val, ind, axes)
end
Base.size(A::OneElement) = map(length, A.axes)
Base.axes(A::OneElement) = A.axes
Base.getindex(A::OneElement{T,N}, i::Vararg{Int,N}) where {T,N} = ifelse(i==A.ind, A.val, zero(T))

function ChainRulesCore.add!!(xs::AbstractArray{<:Any,N}, oe::OneElement{<:Any,N}) where {N}
if !ChainRulesCore.is_inplaceable_destination(xs)
xs = collect(xs)
end
xs[oe.ind...] += oe.val
return xs
end

Base.:(+)(xs::AbstractArray, oe::OneElement) = add!!(copy(xs), oe)
Base.:(+)(oe::OneElement, xs::AbstractArray) = +(xs, oe)
Base.:(+)(oe1::OneElement, oe2::OneElement) = +(collect(oe1), oe2)

"""
_setindex_zero(x, dy, inds...)

Expand Down
11 changes: 6 additions & 5 deletions test/rulesets/Base/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -358,14 +358,15 @@ end
@test_skip test_frule(findmin, rand(3,4), output_tangent = (rand(), NoTangent()))
@test_skip test_frule(findmin, rand(3,4), fkwargs=(dims=1,))
# These skipped tests might be fixed by https://github.com/JuliaDiff/FiniteDifferences.jl/issues/188
# or by https://github.com/JuliaLang/julia/pull/48404

# Reverse
test_rrule(findmin, rand(10), output_tangent = (rand(), false))
test_rrule(findmax, rand(10), output_tangent = (rand(), false))
test_rrule(findmin, rand(5,3))
test_rrule(findmax, rand(5,3))
@test [0 0; 0 5] == @inferred unthunk(rrule(findmax, [1 2; 3 4])[2]((5.0, nothing))[2])
@test [0 0; 0 5] == @inferred unthunk(rrule(findmax, [1 2; 3 4])[2]((5.0, NoTangent()))[2])
test_rrule(findmin, rand(5,3); check_inferred=false)
test_rrule(findmax, rand(5,3); check_inferred=false)
@test [0 0; 0 5] == unthunk(rrule(findmax, [1 2; 3 4])[2]((5.0, nothing))[2])
@test [0 0; 0 5] == unthunk(rrule(findmax, [1 2; 3 4])[2]((5.0, NoTangent()))[2])

# Reverse with dims:
@test [0 0; 5 6] == @inferred unthunk(rrule(findmax, [1 2; 3 4], dims=1)[2](([5 6], nothing))[2])
Expand All @@ -385,7 +386,7 @@ end

# Reverse
test_rrule(imum, rand(10))
test_rrule(imum, rand(3,4))
test_rrule(imum, rand(3,4); check_inferred=false)
@gpu test_rrule(imum, rand(3,4), fkwargs=(dims=1,))
test_rrule(imum, rand(3,4,5), fkwargs=(dims=(1,3),))

Expand Down
8 changes: 4 additions & 4 deletions test/rulesets/Base/indexing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@

@testset "single element" begin
test_rrule(getindex, x, 2)
test_rrule(getindex, x, 2, 1)
test_rrule(getindex, x, 2, 2)
test_rrule(getindex, x, 2, 1; check_inferred=false)
test_rrule(getindex, x, 2, 2; check_inferred=false)

test_rrule(getindex, x, CartesianIndex(2, 3))
test_rrule(getindex, x, CartesianIndex(2, 3); check_inferred=false)
end

@testset "slice/index positions" begin
Expand Down Expand Up @@ -87,7 +87,7 @@
dgrad = rrule(getindex, Diagonal(rand(3)), 2, :)[2]([1,2,3])[2]
@test unthunk(dgrad) ≈ Diagonal([0, 2, 0])

test_rrule(getindex, Symmetric(rand(3, 3)), 2, 2)
test_rrule(getindex, Symmetric(rand(3, 3)), 2, 2; check_inferred=false) # Infers to Any
sgrad = rrule(getindex, Symmetric(rand(3, 3)), 2, 3)[2](1.0)[2]
@test unthunk(sgrad) ≈ [0 0 0; 0 0 1/2; 0 1/2 0]
end
Expand Down