Merge pull request #1410 from KrisThielemans/AbsFix

Relative Difference Prior abs() fix and improved numerical precision for other priors
UCL · Apr 28, 2024 · 8664718 · 8664718
2 parents 8ced2d7 + d30729f
commit 8664718
Show file tree

Hide file tree

Showing 7 changed files with 258 additions and 124 deletions.
diff --git a/documentation/release_6.1.htm b/documentation/release_6.1.htm
@@ -59,15 +59,17 @@ <h3>New functionality</h3>
 
 <h3>Changed functionality</h3>
 <ul>
-<li>
-</li>
+  <li>
+    Accumulation in computation of priors now uses doubles, which could result in slightly better precision. <a href=https://github.com/UCL/STIR/pull/1410>Part of PR #1410</a>.
+  </li>
 </ul>
 
 
 <h3>Bug fixes</h3>
 <ul>
-<li>
-</li>
+  <li>The Relative Difference Prior gave incorrect results, probably since switching to C++-14 in version 6.0, although we are not sure.
+    See <a href=https://github.com/UCL/STIR/pull/1410>PR #1410</a> and associated <a href=https://github.com/UCL/STIR/pull/1409>issue #1409</a>.
+  </li>
 </ul>
 
 <h3>Known problems</h3>

diff --git a/src/include/stir/recon_buildblock/RelativeDifferencePrior.h b/src/include/stir/recon_buildblock/RelativeDifferencePrior.h
@@ -51,6 +51,13 @@ START_NAMESPACE_STIR
   "A Concave Prior Penalizing Relative Differences for Maximum-a-Posteriori Reconstruction in Emission Tomography,"
   vol. 49, no. 1, pp. 56-60, 2002. </em>
 
+  If \f$ \epsilon=0 \f$, we attempt to resolve 0/0 at \f$ \lambda_r = \lambda_{r+dr}=0 \f$ by using the limit.
+  Note that the Hessian explodes to infinity when both voxel values approach 0, and we currently return \c INFINITY.
+  Also, as the RDP is not differentiable at this point, we have chosen to return 0 for the gradient
+  (such that a zero background is not modified).
+
+  \warning the default value for \f$ \epsilon \f$ is zero, which can be problematic for gradient-based algorithms.
+
   The \f$\kappa\f$ image can be used to have spatially-varying penalties such as in
   Jeff Fessler's papers. It should have identical dimensions to the image for which the
   penalty is computed. If \f$\kappa\f$ is not set, this class will effectively
@@ -59,6 +66,10 @@ START_NAMESPACE_STIR
   By default, a 3x3 or 3x3x3 neighbourhood is used where the weights are set to
   x-voxel_size divided by the Euclidean distance between the points.
 
+  The prior computation excludes voxel-pairs where one voxel is outside the volume. This is
+  effectively the same as extending the volume by replicating the edges (which is different
+  from zero boundary conditions).
+
 \par Parsing
   These are the keywords that can be used in addition to the ones in GeneralPrior.
   \verbatim
@@ -185,21 +196,30 @@ class RelativeDifferencePrior : public RegisteredParsingObject<RelativeDifferenc
   void initialise_keymap() override;
   bool post_processing() override;
 
-private:
+protected:
   shared_ptr<DiscretisedDensity<3, elemT>> kappa_ptr;
 
-  //! The second partial derivatives of the Relative Difference Prior
+  //! The value and partial derivatives of the Relative Difference Prior
   /*!
-   derivative_20 refers to the second derivative w.r.t. x_j only (i.e. diagonal elements of the Hessian)
-   derivative_11 refers to the second derivative w.r.t. x_j and x_k (i.e. off-diagonal elements of the Hessian)
-   See J. Nuyts, et al., 2002, Equation 7.
-   In the instance x_j, x_k and epsilon equal 0.0, these functions return 0.0 to prevent returning an undefined value
-   due to 0/0 computation. This is a "reasonable" solution to this issue.
+   * value refers to its value
+   * derivative_10 refers to the derivative w.r.t. x_j only
+   * derivative_20 refers to the second derivative w.r.t. x_j only (i.e. diagonal elements of the Hessian)
+   * derivative_11 refers to the second derivative w.r.t. x_j and x_k (i.e. off-diagonal elements of the Hessian)
+
+   See J. Nuyts, et al., 2002, Equation 7 etc, but here an epsilon is added.
+
+   In the instance x_j, x_k and epsilon equal 0.0, these functions get ill-defined due to 0/0 computations.
+   We currently return 0 for the value, 0 for derivative_10, and INFINITY for the second order
+   derivatives. These follow by taking the limit, i.e. by assuming continuity. Note however that
+   when epsilon=0, derivative_10(x,0) limits to \f$ 1/(1+\gamma) \f$, while derivative_10(x,x) limits to \f$ 0 \f$.
+
    * @param x_j is the target voxel.
    * @param x_k is the voxel in the neighbourhood.
    * @return the second order partial derivatives of the Relative Difference Prior
    */
   //@{
+  double value(const elemT x_j, const elemT x_k) const;
+  elemT derivative_10(const elemT x, const elemT y) const;
   elemT derivative_20(const elemT x_j, const elemT x_k) const;
   elemT derivative_11(const elemT x_j, const elemT x_k) const;
   //@}

diff --git a/src/recon_buildblock/LogcoshPrior.cxx b/src/recon_buildblock/LogcoshPrior.cxx
@@ -289,12 +289,12 @@ LogcoshPrior<elemT>::compute_value(const DiscretisedDensity<3, elemT>& current_i
                   for (int dx = min_dx; dx <= max_dx; ++dx)
                     {
                       // 1/scalar^2 * log(cosh(x * scalar))
-                      elemT voxel_diff = current_image_estimate[z][y][x] - current_image_estimate[z + dz][y + dy][x + dx];
-                      elemT current
+                      double voxel_diff = current_image_estimate[z][y][x] - current_image_estimate[z + dz][y + dy][x + dx];
+                      double current
                           = weights[dz][dy][dx] * 1 / (this->scalar * this->scalar) * logcosh(this->scalar * voxel_diff);
                       if (do_kappa)
                         current *= (*kappa_ptr)[z][y][x] * (*kappa_ptr)[z + dz][y + dy][x + dx];
-                      result += static_cast<double>(current);
+                      result += current;
                     }
             }
         }
@@ -349,21 +349,21 @@ LogcoshPrior<elemT>::compute_gradient(DiscretisedDensity<3, elemT>& prior_gradie
               const int min_dx = max(weights[0][0].get_min_index(), min_x - x);
               const int max_dx = min(weights[0][0].get_max_index(), max_x - x);
 
-              elemT gradient = 0;
+              double gradient = 0;
               for (int dz = min_dz; dz <= max_dz; ++dz)
                 for (int dy = min_dy; dy <= max_dy; ++dy)
                   for (int dx = min_dx; dx <= max_dx; ++dx)
                     {
                       // 1/scalar * tanh(x * scalar)
-                      elemT voxel_diff = current_image_estimate[z][y][x] - current_image_estimate[z + dz][y + dy][x + dx];
-                      elemT current = weights[dz][dy][dx] * (1 / this->scalar) * tanh(this->scalar * voxel_diff);
+                      double voxel_diff = current_image_estimate[z][y][x] - current_image_estimate[z + dz][y + dy][x + dx];
+                      double current = weights[dz][dy][dx] * (1 / this->scalar) * tanh(this->scalar * voxel_diff);
 
                       if (do_kappa)
                         current *= (*kappa_ptr)[z][y][x] * (*kappa_ptr)[z + dz][y + dy][x + dx];
 
                       gradient += current;
                     }
-              prior_gradient[z][y][x] = gradient * this->penalisation_factor;
+              prior_gradient[z][y][x] = static_cast<elemT>(gradient * this->penalisation_factor);
             }
         }
     }

diff --git a/src/recon_buildblock/PLSPrior.cxx b/src/recon_buildblock/PLSPrior.cxx
@@ -498,12 +498,12 @@ PLSPrior<elemT>::compute_value(const DiscretisedDensity<3, elemT>& current_image
                  (penalty[z][y][x]) * (*kappa_ptr)[z][y][x];
               */
 
-              elemT current = (*penalty_sptr)[z][y][x];
+              double current = (*penalty_sptr)[z][y][x];
 
               if (do_kappa)
                 current *= (*kappa_ptr)[z][y][x];
 
-              result += static_cast<double>(current);
+              result += current;
             }
         }
     }

diff --git a/src/recon_buildblock/QuadraticPrior.cxx b/src/recon_buildblock/QuadraticPrior.cxx
@@ -278,14 +278,14 @@ QuadraticPrior<elemT>::compute_value(const DiscretisedDensity<3, elemT>& current
                 for (int dy = min_dy; dy <= max_dy; ++dy)
                   for (int dx = min_dx; dx <= max_dx; ++dx)
                     {
-                      elemT current = weights[dz][dy][dx]
-                                      * square(current_image_estimate[z][y][x] - current_image_estimate[z + dz][y + dy][x + dx])
-                                      / 4;
+                      double current = weights[dz][dy][dx]
+                                       * square(current_image_estimate[z][y][x] - current_image_estimate[z + dz][y + dy][x + dx])
+                                       / 4;
 
                       if (do_kappa)
                         current *= (*kappa_ptr)[z][y][x] * (*kappa_ptr)[z + dz][y + dy][x + dx];
 
-                      result += static_cast<double>(current);
+                      result += current;
                     }
             }
         }
@@ -348,65 +348,20 @@ QuadraticPrior<elemT>::compute_gradient(DiscretisedDensity<3, elemT>& prior_grad
                  (current_image_estimate[z][y][x] - current_image_estimate[z+dz][y+dy][x+dx]) *
                  (*kappa_ptr)[z][y][x] * (*kappa_ptr)[z+dz][y+dy][x+dx];
               */
-#if 1
-              elemT gradient = 0;
+              double gradient = 0.;
               for (int dz = min_dz; dz <= max_dz; ++dz)
                 for (int dy = min_dy; dy <= max_dy; ++dy)
                   for (int dx = min_dx; dx <= max_dx; ++dx)
                     {
-                      elemT current = weights[dz][dy][dx]
-                                      * (current_image_estimate[z][y][x] - current_image_estimate[z + dz][y + dy][x + dx]);
+                      double current = weights[dz][dy][dx]
+                                       * (current_image_estimate[z][y][x] - current_image_estimate[z + dz][y + dy][x + dx]);
 
                       if (do_kappa)
                         current *= (*kappa_ptr)[z][y][x] * (*kappa_ptr)[z + dz][y + dy][x + dx];
 
                       gradient += current;
                     }
-#else
-              // attempt to speed up by precomputing the sum of weights.
-              // The current code gives identical results but is actually slower
-              // than the above, at least when kappas are present.
-
-              // precompute sum of weights
-              // TODO without kappas, this is just weights.sum() most of the time,
-              // but not near edges
-              float sum_of_weights = 0;
-              {
-                if (do_kappa)
-                  {
-                    for (int dz = min_dz; dz <= max_dz; ++dz)
-                      for (int dy = min_dy; dy <= max_dy; ++dy)
-                        for (int dx = min_dx; dx <= max_dx; ++dx)
-                          sum_of_weights += weights[dz][dy][dx] * (*kappa_ptr)[z + dz][y + dy][x + dx];
-                  }
-                else
-                  {
-                    for (int dz = min_dz; dz <= max_dz; ++dz)
-                      for (int dy = min_dy; dy <= max_dy; ++dy)
-                        for (int dx = min_dx; dx <= max_dx; ++dx)
-                          sum_of_weights += weights[dz][dy][dx];
-                  }
-              }
-              // now compute contribution of central term
-              elemT gradient = sum_of_weights * current_image_estimate[z][y][x];
-
-              // subtract the rest
-              for (int dz = min_dz; dz <= max_dz; ++dz)
-                for (int dy = min_dy; dy <= max_dy; ++dy)
-                  for (int dx = min_dx; dx <= max_dx; ++dx)
-                    {
-                      elemT current = weights[dz][dy][dx] * current_image_estimate[z + dz][y + dy][x + dx];
-
-                      if (do_kappa)
-                        current *= (*kappa_ptr)[z + dz][y + dy][x + dx];
-
-                      gradient -= current;
-                    }
-              // multiply with central kappa
-              if (do_kappa)
-                gradient *= (*kappa_ptr)[z][y][x];
-#endif
-              prior_gradient[z][y][x] = gradient * this->penalisation_factor;
+              prior_gradient[z][y][x] = static_cast<elemT>(gradient * this->penalisation_factor);
             }
         }
     }

diff --git a/src/recon_buildblock/RelativeDifferencePrior.cxx b/src/recon_buildblock/RelativeDifferencePrior.cxx
@@ -2,7 +2,7 @@
 //
 /*
     Copyright (C) 2000- 2019, Hammersmith Imanet Ltd
-    Copyright (C) 2019- 2020, UCL
+    Copyright (C) 2019- 2024, UCL
     This file is part of STIR.
 
     SPDX-License-Identifier: Apache-2.0
@@ -31,9 +31,9 @@
 #include "stir/warning.h"
 #include "stir/error.h"
 #include <algorithm>
+#include <cmath>
 using std::min;
 using std::max;
-
 /* Pretty horrible code because we don't have an iterator of neigbhourhoods yet
  */
 
@@ -265,6 +265,29 @@ compute_weights(Array<3, float>& weights, const CartesianCoordinate3D<float>& gr
         }
 }
 
+template <typename elemT>
+double
+RelativeDifferencePrior<elemT>::value(const elemT x, const elemT y) const
+{
+  return 0.5 * (square(static_cast<double>(x - y)) / (x + y + this->gamma * std::abs(x - y) + this->epsilon));
+}
+
+template <typename elemT>
+elemT
+RelativeDifferencePrior<elemT>::derivative_10(const elemT x, const elemT y) const
+{
+  if (this->epsilon == 0.0 && x == 0 && y == 0)
+    {
+      // handle 0/0 by taking the limit with x=y->0
+      // note that the limit y=0,x->0 is 1/(1+gamma)
+      return elemT(0);
+    }
+
+  const double num = (static_cast<double>(x - y) * (this->gamma * std::abs(x - y) + x + 3 * y + 2 * this->epsilon));
+  const double denom_sqrt = static_cast<double>(x + y) + this->gamma * std::abs(x - y) + this->epsilon;
+  return static_cast<elemT>(num / (denom_sqrt * denom_sqrt));
+}
+
 template <typename elemT>
 double
 RelativeDifferencePrior<elemT>::compute_value(const DiscretisedDensity<3, elemT>& current_image_estimate)
@@ -317,7 +340,7 @@ RelativeDifferencePrior<elemT>::compute_value(const DiscretisedDensity<3, elemT>
                 for (int dy = min_dy; dy <= max_dy; ++dy)
                   for (int dx = min_dx; dx <= max_dx; ++dx)
                     {
-                      elemT current;
+                      double current;
                       if (this->epsilon == 0.0 && current_image_estimate[z][y][x] == 0.0
                           && current_image_estimate[z + dz][y + dy][x + dx] == 0.0)
                         {
@@ -326,18 +349,13 @@ RelativeDifferencePrior<elemT>::compute_value(const DiscretisedDensity<3, elemT>
                         }
                       else
                         {
-                          current = weights[dz][dy][dx] * 0.5
-                                    * (pow(current_image_estimate[z][y][x] - current_image_estimate[z + dz][y + dy][x + dx], 2)
-                                       / (current_image_estimate[z][y][x] + current_image_estimate[z + dz][y + dy][x + dx]
-                                          + this->gamma
-                                                * abs(current_image_estimate[z][y][x]
-                                                      - current_image_estimate[z + dz][y + dy][x + dx])
-                                          + this->epsilon));
+                          current = weights[dz][dy][dx]
+                                    * value(current_image_estimate[z][y][x], current_image_estimate[z + dz][y + dy][x + dx]);
                         }
                       if (do_kappa)
                         current *= (*kappa_ptr)[z][y][x] * (*kappa_ptr)[z + dz][y + dy][x + dx];
 
-                      result += static_cast<double>(current);
+                      result += current;
                     }
             }
         }
@@ -394,41 +412,21 @@ RelativeDifferencePrior<elemT>::compute_gradient(DiscretisedDensity<3, elemT>& p
               const int min_dx = max(weights[0][0].get_min_index(), min_x - x);
               const int max_dx = min(weights[0][0].get_max_index(), max_x - x);
 
-              elemT gradient = 0;
+              double gradient = 0;
               for (int dz = min_dz; dz <= max_dz; ++dz)
                 for (int dy = min_dy; dy <= max_dy; ++dy)
                   for (int dx = min_dx; dx <= max_dx; ++dx)
                     {
-
-                      elemT current;
-                      if (this->epsilon == 0.0 && current_image_estimate[z][y][x] == 0.0
-                          && current_image_estimate[z + dz][y + dy][x + dx] == 0.0)
-                        {
-                          // handle the undefined nature of the gradient
-                          current = 0.0;
-                        }
-                      else
-                        {
-                          current
-                              = weights[dz][dy][dx]
-                                * (((current_image_estimate[z][y][x] - current_image_estimate[z + dz][y + dy][x + dx])
-                                    * (this->gamma
-                                           * abs(current_image_estimate[z][y][x] - current_image_estimate[z + dz][y + dy][x + dx])
-                                       + current_image_estimate[z][y][x] + 3 * current_image_estimate[z + dz][y + dy][x + dx]
-                                       + 2 * this->epsilon))
-                                   / (square((current_image_estimate[z][y][x] + current_image_estimate[z + dz][y + dy][x + dx])
-                                             + this->gamma
-                                                   * abs(current_image_estimate[z][y][x]
-                                                         - current_image_estimate[z + dz][y + dy][x + dx])
-                                             + this->epsilon)));
-                        }
+                      double current
+                          = weights[dz][dy][dx]
+                            * derivative_10(current_image_estimate[z][y][x], current_image_estimate[z + dz][y + dy][x + dx]);
                       if (do_kappa)
                         current *= (*kappa_ptr)[z][y][x] * (*kappa_ptr)[z + dz][y + dy][x + dx];
 
                       gradient += current;
                     }
 
-              prior_gradient[z][y][x] = gradient * this->penalisation_factor;
+              prior_gradient[z][y][x] = static_cast<elemT>(gradient * this->penalisation_factor);
             }
         }
     }
@@ -626,9 +624,9 @@ elemT
 RelativeDifferencePrior<elemT>::derivative_20(const elemT x_j, const elemT x_k) const
 {
   if (x_j > 0.0 || x_k > 0.0 || this->epsilon > 0.0)
-    return 2 * pow(2 * x_k + this->epsilon, 2) / pow(x_j + x_k + this->gamma * abs(x_j - x_k) + this->epsilon, 3);
+    return 2 * pow(2 * x_k + this->epsilon, 2) / pow(x_j + x_k + this->gamma * std::abs(x_j - x_k) + this->epsilon, 3);
   else
-    return 0.0;
+    return INFINITY;
 }
 
 template <typename elemT>
@@ -637,9 +635,9 @@ RelativeDifferencePrior<elemT>::derivative_11(const elemT x_j, const elemT x_k)
 {
   if (x_j > 0.0 || x_k > 0.0 || this->epsilon > 0.0)
     return -2 * (2 * x_j + this->epsilon) * (2 * x_k + this->epsilon)
-           / pow(x_j + x_k + this->gamma * abs(x_j - x_k) + this->epsilon, 3);
+           / pow(x_j + x_k + this->gamma * std::abs(x_j - x_k) + this->epsilon, 3);
   else
-    return 0.0;
+    return INFINITY;
 }
 
 #ifdef _MSC_VER