Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use a symmetric coloring for the computation of sparse Hessians #271

Merged
merged 6 commits into from
Jul 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,5 @@ NLPModels = "0.18, 0.19, 0.20, 0.21"
Requires = "1"
ReverseDiff = "1"
SparseConnectivityTracer = "0.5"
SparseMatrixColorings = "0.3.3"
SparseMatrixColorings = "0.3.5"
julia = "^1.6"
3 changes: 2 additions & 1 deletion src/ADNLPModels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ using LinearAlgebra, SparseArrays
# external
using ADTypes: ADTypes, AbstractColoringAlgorithm, AbstractSparsityDetector
using SparseConnectivityTracer: TracerSparsityDetector
using SparseMatrixColorings: GreedyColoringAlgorithm
using SparseMatrixColorings:
GreedyColoringAlgorithm, symmetric_coloring_detailed, symmetric_coefficient
using ForwardDiff, ReverseDiff

# JSO
Expand Down
82 changes: 36 additions & 46 deletions src/sparse_hessian.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ struct SparseADHessian{Tag, GT, S, T} <: ADNLPModels.ADBackend
colptr::Vector{Int}
colors::Vector{Int}
ncolors::Int
dcolors::Dict{Int, Vector{UnitRange{Int}}}
dcolors::Dict{Int, Vector{Tuple{Int,Int}}}
res::S
lz::Vector{ForwardDiff.Dual{Tag, T, 1}}
glz::Vector{ForwardDiff.Dual{Tag, T, 1}}
Expand All @@ -28,8 +28,7 @@ function SparseADHessian(
T = eltype(S)
H = compute_hessian_sparsity(f, nvar, c!, ncon, detector = detector)

# TODO: use ADTypes.symmetric_coloring instead if you have the right decompression
colors = ADTypes.column_coloring(H, coloring)
colors, star_set = symmetric_coloring_detailed(H, coloring)
ncolors = maximum(colors)

d = BitVector(undef, nvar)
Expand All @@ -39,11 +38,7 @@ function SparseADHessian(
colptr = trilH.colptr

# The indices of the nonzero elements in `vals` that will be processed by color `c` are stored in `dcolors[c]`.
dcolors = Dict(i => UnitRange{Int}[] for i = 1:ncolors)
for (i, color) in enumerate(colors)
range_vals = colptr[i]:(colptr[i + 1] - 1)
push!(dcolors[color], range_vals)
end
dcolors = nnz_colors(trilH, star_set, colors, ncolors)

# prepare directional derivatives
res = similar(x0)
Expand Down Expand Up @@ -97,7 +92,7 @@ struct SparseReverseADHessian{T, S, Tagf, F, Tagψ, P} <: ADNLPModels.ADBackend
colptr::Vector{Int}
colors::Vector{Int}
ncolors::Int
dcolors::Dict{Int, Vector{UnitRange{Int}}}
dcolors::Dict{Int, Vector{Tuple{Int,Int}}}
res::S
z::Vector{ForwardDiff.Dual{Tagf, T, 1}}
gz::Vector{ForwardDiff.Dual{Tagf, T, 1}}
Expand All @@ -123,8 +118,7 @@ function SparseReverseADHessian(
) where {T}
H = compute_hessian_sparsity(f, nvar, c!, ncon, detector = detector)

# TODO: use ADTypes.symmetric_coloring instead if you have the right decompression
colors = ADTypes.column_coloring(H, coloring)
colors, star_set = symmetric_coloring_detailed(H, coloring)
ncolors = maximum(colors)

d = BitVector(undef, nvar)
Expand All @@ -134,11 +128,7 @@ function SparseReverseADHessian(
colptr = trilH.colptr

# The indices of the nonzero elements in `vals` that will be processed by color `c` are stored in `dcolors[c]`.
dcolors = Dict(i => UnitRange{Int}[] for i = 1:ncolors)
for (i, color) in enumerate(colors)
range_vals = colptr[i]:(colptr[i + 1] - 1)
push!(dcolors[color], range_vals)
end
dcolors = nnz_colors(trilH, star_set, colors, ncolors)

# prepare directional derivatives
res = similar(x0)
Expand Down Expand Up @@ -239,17 +229,17 @@ function sparse_hess_coord!(
b.longv .= 0

for icol = 1:(b.ncolors)
b.d .= (b.colors .== icol)
b.longv[(ncon + 1):(ncon + nvar)] .= b.d
map!(ForwardDiff.Dual{Tag}, b.lz, b.sol, b.longv)
b.∇φ!(b.glz, b.lz)
ForwardDiff.extract_derivative!(Tag, b.Hvp, b.glz)
b.res .= view(b.Hvp, (ncon + 1):(ncon + nvar))

# Store in `vals` the nonzeros of each column of the Hessian computed with color `icol`
for range_vals in b.dcolors[icol]
for k in range_vals
row = b.rowval[k]
dcolor_icol = b.dcolors[icol]
if !isempty(dcolor_icol)
b.d .= (b.colors .== icol)
b.longv[(ncon + 1):(ncon + nvar)] .= b.d
map!(ForwardDiff.Dual{Tag}, b.lz, b.sol, b.longv)
b.∇φ!(b.glz, b.lz)
ForwardDiff.extract_derivative!(Tag, b.Hvp, b.glz)
b.res .= view(b.Hvp, (ncon + 1):(ncon + nvar))

# Store in `vals` the nonzeros of each column of the Hessian computed with color `icol`
for (row, k) in dcolor_icol
vals[k] = b.res[row]
end
end
Expand All @@ -268,25 +258,25 @@ function sparse_hess_coord!(
nvar = length(x)

for icol = 1:(b.ncolors)
b.d .= (b.colors .== icol)

# objective
map!(ForwardDiff.Dual{Tagf}, b.z, x, b.d) # x + ε * v
b.∇f!(b.gz, b.z)
ForwardDiff.extract_derivative!(Tagf, b.res, b.gz)
b.res .*= obj_weight

# constraints
map!(ForwardDiff.Dual{Tagψ}, b.zψ, x, b.d)
b.yψ .= y
b.∇l!(b.gzψ, b.gyψ, b.zψ, b.)
ForwardDiff.extract_derivative!(Tagψ, b.Hv_temp, b.gzψ)
b.res .+= b.Hv_temp

# Store in `vals` the nonzeros of each column of the Hessian computed with color `icol`
for range_vals in b.dcolors[icol]
for k in range_vals
row = b.rowval[k]
dcolor_icol = b.dcolors[icol]
if !isempty(dcolor_icol)
b.d .= (b.colors .== icol)

# objective
map!(ForwardDiff.Dual{Tagf}, b.z, x, b.d) # x + ε * v
b.∇f!(b.gz, b.z)
ForwardDiff.extract_derivative!(Tagf, b.res, b.gz)
b.res .*= obj_weight

# constraints
map!(ForwardDiff.Dual{Tagψ}, b.zψ, x, b.d)
b.yψ .= y
b.∇l!(b.gzψ, b.gyψ, b.zψ, b.yψ)
ForwardDiff.extract_derivative!(Tagψ, b.Hv_temp, b.gzψ)
b.res .+= b.Hv_temp

# Store in `vals` the nonzeros of each column of the Hessian computed with color `icol`
for (row, k) in dcolor_icol
vals[k] = b.res[row]
end
end
Expand Down
34 changes: 34 additions & 0 deletions src/sparsity_pattern.jl
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,37 @@ function compute_hessian_sparsity(
S = ADTypes.hessian_sparsity(lagrangian, x0, detector)
return S
end

"""
dcolors = nnz_colors(trilH, star_set, colors, ncolors)

Determine the coefficients in `trilH` that will be computed by a given color.

Arguments:
- `trilH::SparseMatrixCSC`: The lower triangular part of a symmetric matrix in CSC format.
- `star_set::StarSet`: A structure `StarSet` returned by the function `symmetric_coloring_detailed` of SparseMatrixColorings.jl.
- `colors::Vector{Int}`: A vector where the i-th entry represents the color assigned to the i-th column of the matrix.
- `ncolors::Int`: The number of distinct colors used in the coloring.

Output:
- `dcolors::Dict{Int, Vector{Tuple{Int, Int}}}`: A dictionary where the keys are the color indices (from 1 to `ncolors`),
and the values are vectors of tuples. Each tuple contains two integers: the first integer is the row index, and the
second integer is the index in `trilH.nzval` where the non-zero coefficient can be found.
"""
function nnz_colors(trilH, star_set, colors, ncolors)
# We want to determine the coefficients in `trilH` that will be computed by a given color.
# Because we exploit the symmetry, we also need to store the row index for a given coefficient
# in the "compressed column".
dcolors = Dict(i => Tuple{Int,Int}[] for i=1:ncolors)

n = LinearAlgebra.checksquare(trilH)
for j in 1:n
for k in trilH.colptr[j]:trilH.colptr[j+1]-1
i = trilH.rowval[k]
l, c = symmetric_coefficient(i, j, colors, star_set)
push!(dcolors[c], (l, k))
end
end

return dcolors
end
Loading