fix regularization in CRAIG

JuliaSmoothOptimizers · Aug 2, 2020 · c92b596 · c92b596
1 parent d6c9bf4
commit c92b596
Show file tree

Hide file tree

Showing 3 changed files with 99 additions and 28 deletions.
diff --git a/src/craig.jl b/src/craig.jl
@@ -6,7 +6,7 @@
 #
 # The method seeks to solve the minimum-norm problem
 #
-#  min ‖x‖²  s.t. Ax = b,
+#  min ‖x‖  s.t.  Ax = b,
 #
 # and is equivalent to applying the conjugate gradient method
 # to the linear system
@@ -36,7 +36,7 @@ export craig
 """
 Find the least-norm solution of the consistent linear system
 
-    Ax + √λs = b
+    Ax + λs = b
 
 using the Golub-Kahan implementation of Craig's method, where λ ≥ 0 is a
 regularization parameter. This method is equivalent to CGNE but is more
@@ -46,21 +46,28 @@ For a system in the form Ax = b, Craig's method is equivalent to applying
 CG to AAᵀy = b and recovering x = Aᵀy. Note that y are the Lagrange
 multipliers of the least-norm problem
 
-    minimize ‖x‖  subject to Ax = b.
+    minimize ‖x‖  s.t.  Ax = b.
 
 Preconditioners M⁻¹ and N⁻¹ may be provided in the form of linear operators and are
 assumed to be symmetric and positive definite.
-Afterward CRAIG solves the symmetric and quasi-definite system
+If `sqd = true`, CRAIG solves the symmetric and quasi-definite system
 
     [ -N   Aᵀ ] [ x ]   [ 0 ]
     [  A   M  ] [ y ] = [ b ],
 
-which is equivalent to applying CG to (M + AN⁻¹Aᵀ)y = b.
+which is equivalent to applying CG to `(AN⁻¹Aᵀ + M)y = b` with `Nx = Aᵀy`.
+
+If `sqd = false`, CRAIG solves the symmetric and indefinite system
+
+    [ -N   Aᵀ ] [ x ]   [ 0 ]
+    [  A   0  ] [ y ] = [ b ].
+
+In this case, M⁻¹ can still be specified and indicates the weighted norm in which residuals are measured.
 
 In this implementation, both the x and y-parts of the solution are returned.
 """
 function craig(A, b :: AbstractVector{T};
-               M=opEye(), N=opEye(), λ :: T=zero(T), atol :: T=√eps(T),
+               M=opEye(), N=opEye(), sqd :: Bool=false, λ :: T=zero(T), atol :: T=√eps(T),
                btol :: T=√eps(T), rtol :: T=√eps(T), conlim :: T=1/√eps(T), itmax :: Int=0,
                verbose :: Bool=false, transfer_to_lsqr :: Bool=false) where T <: AbstractFloat
 
@@ -85,6 +92,10 @@ function craig(A, b :: AbstractVector{T};
 
   x = kzeros(S, n)
   y = kzeros(S, m)
+
+  # When solving a SQD system, set regularization parameter λ = 1.
+  sqd && (λ = one(T))
+
   Mu = copy(b)
   u = M * Mu
   β₁ = sqrt(@kdot(m, u, Mu))
@@ -204,10 +215,10 @@ function craig(A, b :: AbstractVector{T};
 
     if λ > 0
       # Givens rotation to zero out the γ in position (k+1, 2k)
-      #       2k  2k+1    2k  2k+1      2k  2k+1
-      # k+1 [  γ    λ ] [ c₂   s₂ ] = [  0    δ ]
-      # k+2 [  0    0 ] [ s₂  -c₂ ]   [  0    0 ]
-      c₂, s₂, δ = sym_givens(γ, λ)
+      #       2k  2k+1     2k  2k+1      2k  2k+1
+      # k+1 [  γ    λ ] [ -c₂   s₂ ] = [  0    δ ]
+      # k+2 [  0    0 ] [  s₂   c₂ ]   [  0    0 ]
+      c₂, s₂, δ = sym_givens(λ, γ)
       @kscal!(n, s₂, w2)
     end
 

diff --git a/test/test_craig.jl b/test/test_craig.jl
@@ -78,16 +78,52 @@ function test_craig()
   @test y == zeros(size(A,1))
   @test stats.status == "x = 0 is a zero-residual solution"
 
+  # Test regularization
+  A, b, λ = regularization()
+  (x, y, stats) = craig(A, b, λ=λ)
+  s = λ * y
+  r = b - (A * x + λ * s)
+  resid = norm(r) / norm(b)
+  @printf("CRAIG: Relative residual: %8.1e\n", resid)
+  @test(resid ≤ craig_tol)
+  r2 = b - (A * A' + λ^2 * I) * y
+  resid2 = norm(r2) / norm(b)
+  @test(resid2 ≤ craig_tol)
+
+  # Test saddle-point systems
+  A, b, D = saddle_point()
+  D⁻¹ = inv(D)
+  (x, y, stats) = craig(A, b, N=D⁻¹)
+  r = b - A * x
+  resid = norm(r) / norm(b)
+  @printf("CRAIG: Relative residual: %8.1e\n", resid)
+  @test(resid ≤ craig_tol)
+  r2 = b - (A * D⁻¹ * A') * y
+  resid2 = norm(r2) / norm(b)
+  @test(resid2 ≤ craig_tol)
+
   # Test with preconditioners
-  A, b, M, N = two_preconditioners()
-  (x, y, stats) = craig(A, b, M=M, N=N)
+  A, b, M⁻¹, N⁻¹ = two_preconditioners()
+  (x, y, stats) = craig(A, b, M=M⁻¹, N=N⁻¹, sqd=false)
   show(stats)
   r = b - A * x
-  resid = sqrt(dot(r, M * r)) / norm(b)
+  resid = sqrt(dot(r, M⁻¹ * r)) / norm(b)
   @printf("CRAIG: Relative residual: %8.1e\n", resid)
-  @test(norm(x - N * A' * y) ≤ craig_tol * norm(x))
   @test(resid ≤ craig_tol)
-  @test(stats.solved)
+  @test(norm(x - N⁻¹ * A' * y) ≤ craig_tol * norm(x))
+
+  # Test symmetric and quasi-definite systems
+  A, b, M, N = sqd()
+  M⁻¹ = inv(M)
+  N⁻¹ = inv(N)
+  (x, y, stats) = craig(A, b, M=M⁻¹, N=N⁻¹, sqd=true)
+  r = b - (A * x + M * y)
+  resid = norm(r) / norm(b)
+  @printf("CRAIG: Relative residual: %8.1e\n", resid)
+  @test(resid ≤ craig_tol)
+  r2 = b - (A * N⁻¹ * A' + M) * y
+  resid2 = norm(r2) / norm(b)
+  @test(resid2 ≤ craig_tol)
 end
 
 test_craig()
diff --git a/test/test_utils.jl b/test/test_utils.jl
@@ -150,7 +150,7 @@ function overdetermined_adjoint(n :: Int=200, m :: Int=100)
   return A, b, c
 end
 
-# Adjoint ODEs
+# Adjoint ODEs.
 function adjoint_ode(n :: Int=50)
   χ₁ = χ₂ = χ₃ = 1.0
   # Primal ODE
@@ -169,7 +169,7 @@ function adjoint_ode(n :: Int=50)
   return A, b, c
 end
 
-# Adjoint PDEs
+# Adjoint PDEs.
 function adjoint_pde(n :: Int=50, m :: Int=50)
   κ₁ = 5.0
   κ₂ = 20.0
@@ -190,7 +190,7 @@ function adjoint_pde(n :: Int=50, m :: Int=50)
   return A, b, c
 end
 
-# Poisson equation in polar coordinates
+# Poisson equation in polar coordinates.
 function polar_poisson(n :: Int=50, m :: Int=50)
   f(r, θ) = -3.0 * cos(θ)
   g(r, θ) = 0.0
@@ -200,20 +200,19 @@ end
 
 # Square and preconditioned problems.
 function square_preconditioned(n :: Int=10)
-  A = ones(n, n) + (n-1) * I
-  b = 10.0 * [1:n;]
-  M = 1/n * eye(n)
-  return A, b, M
+  A   = ones(n, n) + (n-1) * eye(n)
+  b   = 10.0 * [1:n;]
+  M⁻¹ = 1/n * eye(n)
+  return A, b, M⁻¹
 end
 
 # Square problems with two preconditioners.
 function two_preconditioners(n :: Int=10, m :: Int=20)
-  A = ones(n, n) + (n-1) * I
-  b = ones(n)
-  O = zeros(n, n)
-  M = 1/√n * eye(n)
-  N = 1/√m * eye(n)
-  return A, b, M, N
+  A   = ones(n, n) + (n-1) * eye(n)
+  b   = ones(n)
+  M⁻¹ = 1/√n * eye(n)
+  N⁻¹ = 1/√m * eye(n)
+  return A, b, M⁻¹, N⁻¹
 end
 
 # Random Ax = b with b == 0.
@@ -222,3 +221,28 @@ function zero_rhs(n :: Int=10)
   b = zeros(n)
   return A, b
 end
+
+# Regularized problems.
+function regularization(n :: Int=5)
+  A = [2^(i/j)*j + (-1)^(i-j) * n*(i-1) for i = 1:n, j = 1:n]
+  b = ones(n)
+  λ = 4.0
+  return A, b, λ
+end
+
+# Saddle-point systems.
+function saddle_point(n :: Int=5)
+  A = [2^(i/j)*j + (-1)^(i-j) * n*(i-1) for i = 1:n, j = 1:n]
+  b = ones(n)
+  D = diagm(0 => [2.0 * i for i = 1:n])
+  return A, b, D
+end
+
+# Symmetric and quasi-definite systems.
+function sqd(n :: Int=5)
+  A = [2^(i/j)*j + (-1)^(i-j) * n*(i-1) for i = 1:n, j = 1:n]
+  b = ones(n)
+  M = diagm(0 => [3.0 * i for i = 1:n])
+  N = diagm(0 => [5.0 * i for i = 1:n])
+  return A, b, M, N
+end