Merge pull request QMCPACK#5157 from camelto2/update_regularization

Update regularization scheme in SR method and add docs
rcclay · Sep 5, 2024 · 0cd16e6 · 0cd16e6
2 parents d4d4c94 + 88ed15f
commit 0cd16e6
Show file tree

Hide file tree

Showing 5 changed files with 114 additions and 5 deletions.
diff --git a/docs/bibs/methods.bib b/docs/bibs/methods.bib
@@ -141,3 +141,69 @@ @article{Melton2016-2
   Volume = {144},
   Year = {2016}
 }
+
+@article{Sorella2001,
+  title = {Generalized Lanczos algorithm for variational quantum Monte Carlo},
+  author = {Sorella, Sandro},
+  journal = {Phys. Rev. B},
+  volume = {64},
+  issue = {2},
+  pages = {024512},
+  numpages = {16},
+  year = {2001},
+  month = {Jun},
+  publisher = {American Physical Society},
+  doi = {10.1103/PhysRevB.64.024512},
+  url = {https://link.aps.org/doi/10.1103/PhysRevB.64.024512}
+}
+
+@article{Casula2004,
+    author = {Casula, Michele and Attaccalite, Claudio and Sorella, Sandro},
+    title = "{Correlated geminal wave function for molecules: An efficient resonating valence bond approach}",
+    journal = {The Journal of Chemical Physics},
+    volume = {121},
+    number = {15},
+    pages = {7110-7126},
+    year = {2004},
+    month = {10},
+    abstract = "{We show that a simple correlated wave function, obtained by applying a Jastrow correlation term to an antisymmetrized geminal power, based upon singlet pairs between electrons, is particularly suited for describing the electronic structure of molecules, yielding a large amount of the correlation energy. The remarkable feature of this approach is that, in principle, several resonating valence bonds can be dealt simultaneously with a single determinant, at a computational cost growing with the number of electrons similar to more conventional methods, such as Hartree-Fock or density functional theory. Moreover we describe an extension of the stochastic reconfiguration method, which was recently introduced for the energy minimization of simple atomic wave functions. Within this extension the atomic positions can be considered as further variational parameters, which can be optimized together with the remaining ones. The method is applied to several molecules from Li2 to benzene by obtaining total energies, bond lengths and binding energies comparable with much more demanding multiconfiguration schemes.}",
+    issn = {0021-9606},
+    doi = {10.1063/1.1794632},
+    url = {https://doi.org/10.1063/1.1794632},
+    eprint = {https://pubs.aip.org/aip/jcp/article-pdf/121/15/7110/19183362/7110\_1\_online.pdf},
+}
+
+@article{Neuscamman2012,
+  title = {Optimizing large parameter sets in variational quantum Monte Carlo},
+  author = {Neuscamman, Eric and Umrigar, C. J. and Chan, Garnet Kin-Lic},
+  journal = {Phys. Rev. B},
+  volume = {85},
+  issue = {4},
+  pages = {045103},
+  numpages = {6},
+  year = {2012},
+  month = {Jan},
+  publisher = {American Physical Society},
+  doi = {10.1103/PhysRevB.85.045103},
+  url = {https://link.aps.org/doi/10.1103/PhysRevB.85.045103}
+}
+
+@article{Sorella2007,
+    author = {Sorella, Sandro and Casula, Michele and Rocca, Dario},
+    title = "{Weak binding between two aromatic rings: Feeling the van der Waals attraction by quantum Monte Carlo methods}",
+    journal = {The Journal of Chemical Physics},
+    volume = {127},
+    number = {1},
+    pages = {014105},
+    year = {2007},
+    month = {07},
+    abstract = "{We report a systematic study of the weak chemical bond between two benzene molecules. We first show that it is possible to obtain a very good description of the C2 dimer and the benzene molecule by using pseudopotentials for the chemically inert 1s electrons and a resonating valence bond wave function as a variational ansatz, expanded on a relatively small Gaussian basis set. We employ an improved version of the stochastic reconfiguration technique to optimize the many-body wave function, which is the starting point for highly accurate simulations based on the lattice regularized diffusion Monte Carlo method. This projection technique provides a rigorous variational upper bound for the total energy, even in the presence of pseudopotentials, and substantially improves the accuracy of the trial wave function, which already yields a large fraction of the dynamical and nondynamical electron correlation. We show that the energy dispersion of two benzene molecules in the parallel displaced geometry is significantly deeper than the face-to-face configuration. However, contrary to previous studies based on post-Hartree-Fock methods, the binding energy remains weak (≃2kcal∕mol) also in this geometry, and its value is in agreement with the most accurate and recent experimental findings [H. Krause et al., Chem. Phys. Lett. 184, 411 (1991)].}",
+    issn = {0021-9606},
+    doi = {10.1063/1.2746035},
+    url = {https://doi.org/10.1063/1.2746035},
+    eprint = {https://pubs.aip.org/aip/jcp/article-pdf/doi/10.1063/1.2746035/15399401/014105\_1\_online.pdf},
+}
+
+
+
+
diff --git a/docs/methods.rst b/docs/methods.rst
@@ -598,7 +598,7 @@ Optimizers
 QMCPACK implements a number of different optimizers each with different
 priorities for accuracy, convergence, memory usage, and stability. The
 optimizers can be switched among “OneShiftOnly” (default), “adaptive,”
-“descent,” “hybrid,” and “quartic” (old) using the following line in the
+“descent,” “hybrid,” "stochastic_reconfiguration," and “quartic” (old) using the following line in the
 optimization block:
 
 ::
@@ -1174,6 +1174,43 @@ Additional information and recommendations:
    parameters discussed earlier for descent are useful for setting up
    the descent engine to do this averaging on its own.
 
+Stochastic Reconfiguration
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+We have implemented a preliminary version of stochastic reconfiguration (:cite:`Sorella2001` and :cite:`Casula2004`),
+currently only available in the batched drivers. The SR optimization reduces the
+computational cost over the linear method by avoiding the need to build the 
+Hamiltonian derivative matrix elements, and instead only needs the derivative overlap
+matrix. This can result in substantial savings when optimizing with very large parameter 
+counts, e.g. in orbital optimization. The SR method determines the parameter changes via
+
+:math:`-\tau \mathbf{g} = \mathbf{S} \Delta \mathbf{p}`
+
+where :math:`\mathbf{S}` is given by :math:`\langle \Psi_i | \Psi_j\rangle`, :math:`\mathbf{g}` is given by :math:`\langle \Psi_i | H | \Psi_0\rangle`, :math:`\Delta \mathbf{p}` is the parameter update, and :math:`\tau` is an effective timestep since the SR method can be interpretted as an imaginary time projection expanded in the parameter derivative basis. 
+The solution could be found by directly inverting the overlap matrix :math:`\mathbf{S}`, but this becomes prohibitive for large parameter counts. Therefore, we have implemented the conjugate gradient iterative scheme to solve the linear equation :cite:`Neuscamman2012`. This avoids having to directly invert the overlap matrix and significantly reduces the cost for large parameter counts.
+
+Since we are using finite samples to represent the overlap matrix, it can become ill-conditioned. We choose to use a simple regularization scheme to improve the optimization, described in :cite:`Sorella2007`. The overlap matrix is scaled via :math:`\mathbf{S} \rightarrow \mathbf{S}(1+\epsilon \mathbf{I})`, where :math:`\epsilon` is a small scalar. This can be controlled through ``sr_regularization``. 
+
+By default, the parameter update is accepted as is, and the size of the proposed parameter changes can be controlled by the timestep :math:`\tau`. This parameter can be controlled via ``sr_tau``. If this parameter gets too large, the optimization can become unstable. Therefore, it is recommended to use a small timestep. Small timesteps require many more total optimization steps than is typically required by the linear method, so convergence should be carefully checked. Alternatively, it is possible to use the conjugate gradient step to determine the parameter update direction, and follow up with a line search, triggered via ``line_search``. This can result in much faster convergence at the expense of doing additional correlated sampling steps. 
+
+We are currently investigating various improvements to make this a more reliable optimizer. 
+
+``stochastic_reconfiguration`` method:
+
+  parameters:
+
+  +-----------------------+--------------+-------------+-------------+----------------------------------------------+
+  | **Name**              | **Datatype** | **Values**  | **Default** | **Description**                              |
+  +=======================+==============+=============+=============+==============================================+
+  | ``sr_tau``            | real         | :math:`> 0` | 0.01        | Effective timestep for SR equation           |
+  +-----------------------+--------------+-------------+-------------+----------------------------------------------+
+  | ``sr_tolerance``      | real         | :math:`> 0` | 1e-06       | Convergence threshold for CG algorithm       | 
+  +-----------------------+--------------+-------------+-------------+----------------------------------------------+
+  | ``sr_regularization`` | real         | :math:`> 0` | 0.01        | Scaling constant for S matrix regularization | 
+  +-----------------------+--------------+-------------+-------------+----------------------------------------------+
+  | ``line_search``       | text         | yes/no      | no          | Use linesearch to find optimal move          |
+  +-----------------------+--------------+-------------+-------------+----------------------------------------------+
+
+
 Quartic Optimizer
 ^^^^^^^^^^^^^^^^^
 

diff --git a/src/QMCDrivers/WFOpt/ConjugateGradient.cpp b/src/QMCDrivers/WFOpt/ConjugateGradient.cpp
@@ -52,7 +52,7 @@ int ConjugateGradient::run(QMCCostFunctionBase& optTarget, const std::vector<Rea
     //optTarget needs to implement A*p and return in Apk
     optTarget.calcOvlParmVec(pk, Apk);
     for (int pm = 0; pm < numParams; pm++)
-      Apk[pm] += regularization_ * pk[pm];
+      Apk[pm] *= (1 + regularization_);
     Real denom = 0;
     for (int i = 0; i < numParams; i++)
       denom += pk[i] * Apk[i];

diff --git a/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimizeBatched.cpp b/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimizeBatched.cpp
@@ -83,6 +83,8 @@ QMCFixedSampleLinearOptimizeBatched::QMCFixedSampleLinearOptimizeBatched(
       accept_history(3),
       shift_s_base(4.0),
       sr_tau(0.01),
+      sr_regularization(0.01),
+      sr_tolerance(1e-6),
       MinMethod("OneShiftOnly"),
       do_output_matrices_csv_(false),
       do_output_matrices_hdf_(false),
@@ -114,6 +116,8 @@ QMCFixedSampleLinearOptimizeBatched::QMCFixedSampleLinearOptimizeBatched(
   m_param.add(shift_i_input, "shift_i");
   m_param.add(shift_s_input, "shift_s");
   m_param.add(sr_tau, "sr_tau");
+  m_param.add(sr_regularization, "sr_regularization");
+  m_param.add(sr_tolerance, "sr_tolerance");
   // options_LMY_
   m_param.add(options_LMY_.targetExcited, "options_LMY_.targetExcited");
   m_param.add(options_LMY_.block_lm, "options_LMY_.block_lm");
@@ -1853,7 +1857,7 @@ bool QMCFixedSampleLinearOptimizeBatched::stochastic_reconfiguration_conjugate_g
       std::vector<RealType> bvec(numParams, 0);
       for (int i = 0; i < numParams; i++)
         bvec[i] = -sr_tau * ham[i + 1];
-      ConjugateGradient cg;
+      ConjugateGradient cg(sr_tolerance, sr_regularization);
       int iterations = cg.run(*optTarget, bvec, param_update);
       app_log() << "Solved iterative krylov in " << iterations << " iterations" << std::endl;
       // compute the scaling constant to apply to the update
@@ -1915,8 +1919,6 @@ bool QMCFixedSampleLinearOptimizeBatched::stochastic_reconfiguration_conjugate_g
       optTarget->Params(i) = currentParameters.at(i) + objFuncWrapper_.Lambda * parameterDirections.at(i + 1);
   }
 
-  if (bestShift_s > 1.0e-2)
-    bestShift_s = bestShift_s / shift_s_base;
   // say what we are doing
   app_log() << std::endl << "The new set of parameters is valid. Updating the trial wave function!" << std::endl;
   accept_history <<= 1;

diff --git a/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimizeBatched.h b/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimizeBatched.h
@@ -200,6 +200,10 @@ class QMCFixedSampleLinearOptimizeBatched : public QMCDriverNew, LinearMethod
   RealType shift_s_base;
   /// SR projection timestep
   RealType sr_tau;
+  /// SR regularization parameter
+  RealType sr_regularization;
+  /// tolerance for CG solution in SR
+  RealType sr_tolerance;
 
   // ------------------------------------
   // Parameters in this struct are used by one or more of the adaptive LM, descent, or hybrid optimizers