No-U-Turn Sampler in HMC (#216)

* initialize nuts sampler class * implement the burnin of nuts sampler * add tests and resolve bugs * implement e0 estimation in burnin of nuts * optimize leapfrog * integrate nuts into the R interface * document random walk in sample_points.cpp in R interface * fix burnin for the non-truncated case * resolve bugs in hmc and nuts pipelines * improve the preprocess in burin step of nuts * split large lines in sample_points.cpp * Add NUTS C++ example and update CMake (#29) * resolve PR comments * fix minor bug * fix compiler bug * fix error in building the C++ examples * resolve warnings in sample_points * fix lpsolve cran warning * fix cran warning on mac * improve lpsolve cmake for cran check * fix R warning in mac test * remove lpsolve header * resolve PR comments --------- Co-authored-by: Marios Papachristou <[email protected]> Co-authored-by: Apostolos Chalkis (TolisChal) <[email protected]>
GeomScale · Jul 7, 2024 · 5348bf2 · 5348bf2
1 parent 3aeae7a
commit 5348bf2
Show file tree

Hide file tree

Showing 17 changed files with 732 additions and 78 deletions.
diff --git a/R-proj/R/RcppExports.R b/R-proj/R/RcppExports.R
@@ -361,11 +361,6 @@ sample_points <- function(P, n, random_walk = NULL, distribution = NULL, seed =
     .Call(`_volesti_sample_points`, P, n, random_walk, distribution, seed)
 }
 
-#' @export
-sample_spectra <- function(file = NULL, N = NULL, walk_length = NULL) {
-    .Call(`_volesti_sample_spectra`, file, N, walk_length)
-}
-
 #' Write a SDPA format file
 #'
 #' Outputs a spectrahedron (the matrices defining a linear matrix inequality) and a vector (the objective function)

diff --git a/R-proj/examples/logconcave/nuts_rand_poly.R b/R-proj/examples/logconcave/nuts_rand_poly.R
@@ -0,0 +1,55 @@
+# VolEsti (volume computation and sampling library)
+
+# Copyright (c) 2012-2020 Vissarion Fisikopoulos
+# Copyright (c) 2018-2020 Apostolos Chalkis
+# Copyright (c) 2020-2020 Marios Papachristou
+
+# Contributed and/or modified by Marios Papachristou, as part of Google Summer of Code 2020 program.
+
+# Licensed under GNU LGPL.3, see LICENCE file
+
+# Example script for using the logconcave sampling methods
+
+# Import required libraries
+library(ggplot2)
+library(volesti)
+
+# Sampling from logconcave density example
+
+# Helper function
+norm_vec <- function(x) sqrt(sum(x^2))
+
+# Negative log-probability oracle
+f <- function(x) (norm_vec(x)^2 + sum(x))
+
+# Negative log-probability gradient oracle
+grad_f <- function(x) (2 * x + 1)
+
+dimension <- 50
+facets <- 200
+
+# Create domain of truncation
+H <- gen_rand_hpoly(dimension, facets, seed = 15)
+
+# Rounding
+Tr <- rounding(H, seed = 127)
+
+P <- Hpolytope$new(A = Tr$Mat[1:nrow(Tr$Mat), 2:ncol(Tr$Mat)], b = Tr$Mat[,1])
+
+x_min = matrix(0, dimension, 1)
+
+# Warm start point from truncated Gaussian
+warm_start <- sample_points(P, n = 1, random_walk = list("nburns" = 5000), distribution = list("density" = "gaussian", "variance" = 1/2, "mode" = x_min))
+
+# Sample points
+n_samples <- 20000
+
+samples <- sample_points(P, n = n_samples, random_walk = list("walk" = "NUTS", "solver" = "leapfrog", "starting_point" = warm_start[,1]),
+                         distribution = list("density" = "logconcave", "negative_logprob" = f, "negative_logprob_gradient" = grad_f))
+
+# Plot histogram
+hist(samples[1,], probability=TRUE, breaks = 100)
+
+psrfs <- psrf_univariate(samples)
+n_ess <- ess(samples)
+
diff --git a/R-proj/examples/logconcave/simple_hmc_rand_poly.R b/R-proj/examples/logconcave/simple_hmc_rand_poly.R
@@ -29,10 +29,10 @@ dimension <- 50
 facets <- 200
 
 # Create domain of truncation
-H <- gen_rand_hpoly(dimension, facets)
+H <- gen_rand_hpoly(dimension, facets, seed = 15)
 
 # Rounding
-Tr <- rounding(H)
+Tr <- rounding(H, seed = 127)
 
 P <- Hpolytope$new(A = Tr$Mat[1:nrow(Tr$Mat), 2:ncol(Tr$Mat)], b = Tr$Mat[,1])
 

diff --git a/R-proj/man/sample_points.Rd b/R-proj/man/sample_points.Rd
diff --git a/R-proj/src/sample_points.cpp b/R-proj/src/sample_points.cpp
@@ -36,6 +36,7 @@ enum random_walks {
   brdhr,
   bcdhr,
   hmc,
+  nuts,
   gaussian_hmc,
   exponential_hmc,
   uld
@@ -213,6 +214,26 @@ void sample_from_polytope(Polytope &P, int type, RNGType &rng, PointList &randPo
       }
 
       break;
+    case nuts:
+
+      logconcave_sampling <
+              PointList,
+              Polytope,
+              RNGType,
+              NutsHamiltonianMonteCarloWalk,
+              NT,
+              Point,
+              NegativeGradientFunctor,
+              NegativeLogprobFunctor,
+              LeapfrogODESolver <
+                Point,
+                NT,
+                Polytope,
+                NegativeGradientFunctor
+              >
+            >(randPoints, P, rng, walkL, numpoints, StartingPoint, nburns, *F, *f);
+
+        break;
     case uld:
 
       logconcave_sampling <
@@ -244,7 +265,16 @@ void sample_from_polytope(Polytope &P, int type, RNGType &rng, PointList &randPo
 //' @param n The number of points that the function is going to sample from the convex polytope.
 //' @param random_walk Optional. A list that declares the random walk and some related parameters as follows:
 //' \itemize{
-//' \item{\code{walk} }{ A string to declare the random walk: i) \code{'CDHR'} for Coordinate Directions Hit-and-Run, ii) \code{'RDHR'} for Random Directions Hit-and-Run, iii) \code{'BaW'} for Ball Walk, iv) \code{'BiW'} for Billiard walk, v) \code{'dikin'} for dikin walk, vi) \code{'vaidya'} for vaidya walk, vii) \code{'john'} for john walk, viii) \code{'BCDHR'} boundary sampling by keeping the extreme points of CDHR or ix) \code{'BRDHR'} boundary sampling by keeping the extreme points of RDHR x) \code{'HMC'} for Hamiltonian Monte Carlo (logconcave densities) xi) \code{'ULD'} for Underdamped Langevin Dynamics using the Randomized Midpoint Method xii) \code{'ExactHMC'} for exact Hamiltonian Monte Carlo with reflections (spherical Gaussian or exponential distribution). The default walk is \code{'aBiW'} for the uniform distribution or \code{'CDHR'} for the Gaussian distribution and H-polytopes and \code{'BiW'} or \code{'RDHR'} for the same distributions and V-polytopes and zonotopes.}
+//' \item{\code{walk} }{ A string to declare the random walk: i) \code{'CDHR'} for Coordinate Directions Hit-and-Run, 
+//' ii) \code{'RDHR'} for Random Directions Hit-and-Run, iii) \code{'BaW'} for Ball Walk, 
+//' iv) \code{'BiW'} for Billiard walk, v) \code{'dikin'} for dikin walk, vi) \code{'vaidya'} for vaidya walk, 
+//' vii) \code{'john'} for john walk, viii) \code{'BCDHR'} boundary sampling by keeping the extreme points of CDHR or 
+//' ix) \code{'BRDHR'} boundary sampling by keeping the extreme points of RDHR x) \code{'NUTS'} for 
+//' NUTS Hamiltonian Monte Carlo sampler (logconcave densities) xi) \code{'HMC'} for Hamiltonian Monte Carlo (logconcave densities) 
+//' xii) \code{'ULD'} for Underdamped Langevin Dynamics using the Randomized Midpoint Method (logconcave densities) 
+//' xiii) \code{'ExactHMC'} for exact Hamiltonian Monte Carlo with reflections (spherical Gaussian or exponential distribution). 
+//' The default walk is \code{'aBiW'} for the uniform distribution, \code{'CDHR'} for the Gaussian distribution and H-polytopes 
+//' and \code{'BiW'} or \code{'RDHR'} for the same distributions and V-polytopes and zonotopes. \code{'NUTS'} is the default sampler for logconcave densities.}
 //' \item{\code{walk_length} }{ The number of the steps per generated point for the random walk. The default value is \eqn{1}.}
 //' \item{\code{nburns} }{ The number of points to burn before start sampling. The default value is \eqn{1}.}
 //' \item{\code{starting_point} }{ A \eqn{d}-dimensional numerical vector that declares a starting point in the interior of the polytope for the random walk. The default choice is the center of the ball as that one computed by the function \code{inner_ball()}.}
@@ -410,18 +440,24 @@ Rcpp::NumericMatrix sample_points(Rcpp::Nullable<Rcpp::Reference> P,
         Rcpp::Function negative_logprob = Rcpp::as<Rcpp::List>(distribution)["negative_logprob"];
         Rcpp::Function negative_logprob_gradient = Rcpp::as<Rcpp::List>(distribution)["negative_logprob_gradient"];
 
-        NT L_, m;
+        NT L_ = 1, m = 1;
 
         if (Rcpp::as<Rcpp::List>(distribution).containsElementNamed("L_")) {
             L_ = Rcpp::as<NT>(Rcpp::as<Rcpp::List>(distribution)["L_"]);
+            if (L_ <= NT(0)) {
+                throw Rcpp::exception("The smoothness constant must be positive");
+            }
         } else {
-            throw Rcpp::exception("The smoothness constant is absent");
+            L_ = -1;
         }
 
         if (Rcpp::as<Rcpp::List>(distribution).containsElementNamed("m")) {
             m = Rcpp::as<NT>(Rcpp::as<Rcpp::List>(distribution)["m"]);
+            if (m <= NT(0)) {
+                throw Rcpp::exception("The strong-convexity constant must be positive");
+            }
         } else {
-            throw Rcpp::exception("The strong-convexity constant is absent");
+            m = -1;
         }
 
         if (Rcpp::as<Rcpp::List>(random_walk).containsElementNamed("step_size")) {
@@ -443,7 +479,6 @@ Rcpp::NumericMatrix sample_points(Rcpp::Nullable<Rcpp::Reference> P,
             throw Rcpp::exception("Invalid ODE solver specified. Aborting.");
           }
          } else {
-          Rcpp::warning("Solver set to leapfrog.");
           solver = leapfrog;
         }
 
@@ -495,6 +530,8 @@ Rcpp::NumericMatrix sample_points(Rcpp::Nullable<Rcpp::Reference> P,
                 throw Rcpp::exception("Exponential sampling is supported only for H-polytopes");
             }
             walk = exponential_hmc;
+        } else if (logconcave) {
+            walk = nuts;
         } else if (gaussian) {
             if (type == 1) {
                 walk = cdhr;
@@ -571,7 +608,12 @@ Rcpp::NumericMatrix sample_points(Rcpp::Nullable<Rcpp::Reference> P,
         }
     } else if (Rcpp::as<std::string>(Rcpp::as<Rcpp::List>(random_walk)["walk"]).compare(std::string("HMC")) == 0) {
         if (!logconcave) throw Rcpp::exception("HMC is not supported for non first-order sampling");
+        if (F->params.L < 0) throw Rcpp::exception("The smoothness constant is absent");
+        if (F->params.m < 0) throw Rcpp::exception("The strong-convexity constant is absent");
         walk = hmc;
+    } else if (Rcpp::as<std::string>(Rcpp::as<Rcpp::List>(random_walk)["walk"]).compare(std::string("NUTS")) == 0) {
+        if (!logconcave) throw Rcpp::exception("NUTS is not supported for non first-order sampling");
+        walk = nuts;
     } else if (Rcpp::as<std::string>(Rcpp::as<Rcpp::List>(random_walk)["walk"]).compare(std::string("ULD")) == 0) {
         if (!logconcave) throw Rcpp::exception("ULD is not supported for non first-order sampling");
         walk = uld;
@@ -703,7 +745,7 @@ Rcpp::NumericMatrix sample_points(Rcpp::Nullable<Rcpp::Reference> P,
     if (numpoints % 2 == 1 && (walk == brdhr || walk == bcdhr)) numpoints--;
     MT RetMat(dim, numpoints);
     unsigned int jj = 0;
-
+    
     for (typename std::list<Point>::iterator rpit = randPoints.begin(); rpit!=randPoints.end(); rpit++, jj++) {
         if (gaussian) {
             RetMat.col(jj) = (*rpit).getCoefficients() + mode.getCoefficients();

diff --git a/examples/logconcave/CMakeLists.txt b/examples/logconcave/CMakeLists.txt
@@ -20,7 +20,7 @@ endif(COMMAND cmake_policy)
 
 option(DISABLE_NLP_ORACLES "Disable non-linear oracles (used in collocation)" ON)
 option(BUILTIN_EIGEN "Use eigen from ../external" OFF)
-option(USE_MKL "Use MKL library to build eigen" ON)
+option(USE_MKL "Use MKL library to build eigen" OFF)
 
 if(DISABLE_NLP_ORACLES)
   add_definitions(-DDISABLE_NLP_ORACLES)
@@ -73,16 +73,24 @@ else ()
     include_directories(BEFORE /usr/include/eigen3)
 endif(BUILTIN_EIGEN)
 
+find_library(BLAS NAMES libblas.so libblas.dylib PATHS /usr/local/Cellar/lapack/3.9.1_1/lib /usr/lib/x86_64-linux-gnu /usr/lib/i386-linux-gnu /usr/local/Cellar/openblas/0.3.15_1/lib /usr/lib)
+
 if (USE_MKL)
-  find_library(BLAS NAME libblas.so PATHS /usr/lib/x86_64-linux-gnu /usr/lib/i386-linux-gnu)
+  find_library(BLAS NAMES libblas.so libblas.dylib PATHS /usr/local/Cellar/lapack/3.9.1_1/lib /usr/lib/x86_64-linux-gnu /usr/lib/i386-linux-gnu /usr/local/Cellar/openblas/0.3.15_1/lib /usr/lib)
+  find_library(GFORTRAN NAME libgfortran.dylib PATHS /usr/local/Cellar/gcc/10.2.0_4/lib/gcc/10)
+  find_library(LAPACK NAME liblapack.dylib PATHS /usr/lib)
+  find_library(OPENMP NAME libiomp5.dylib PATHS /opt/intel/oneapi/compiler/2021.1.1/mac/compiler/lib)
+
   include_directories (BEFORE ${MKLROOT}/include)
-  set(PROJECT_LIBS ${BLAS_LIBRARIES})
+  set(PROJECT_LIBS ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES} ${GFORTRAN_LIBRARIES})
   set(MKL_LINK "-L${MKLROOT}/lib -Wl,-rpath,${MKLROOT}/lib -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core -lpthread -lm -ldl")
   add_definitions(-DEIGEN_USE_MKL_ALL)
+else()
+  set(MKL_LINK "")
 endif(USE_MKL)
 
 # Find lpsolve library
-find_library(LP_SOLVE NAMES liblpsolve55.so PATHS /usr/lib/lp_solve)
+find_library(LP_SOLVE NAMES liblpsolve55.so liblpsolve55.dylib PATHS /usr/lib/lp_solve /usr/local/lib)
 
 if (NOT LP_SOLVE)
   message(FATAL_ERROR "This program requires the lp_solve library, and will not be compiled.")
@@ -145,6 +153,6 @@ else ()
   TARGET_LINK_LIBRARIES(simple_hmc ${LP_SOLVE} ${BLAS} ${MKL_LINK} )
   TARGET_LINK_LIBRARIES(exponential_exact_hmc ${LP_SOLVE} ${BLAS} ${MKL_LINK}  )
   TARGET_LINK_LIBRARIES(gaussian_exact_hmc ${LP_SOLVE} ${BLAS} ${MKL_LINK}  )
-  
+
 
 endif()
diff --git a/external/cmake-files/LPSolve.cmake b/external/cmake-files/LPSolve.cmake
@@ -26,7 +26,7 @@ function(GetLPSolve)
       message(STATUS "lp_solve library found: ${LP_SOLVE_DIR}")
 
   endif()
-
+  
   include_directories(${LP_SOLVE_DIR})
 
 endfunction()
diff --git a/include/cartesian_geom/point.h b/include/cartesian_geom/point.h
@@ -81,6 +81,7 @@ class point
     void set_dimension(const unsigned int dim)
     {
         d = dim;
+        coeffs.setZero(d);
     }
 
     void set_coord(const unsigned int i, FT coord)

diff --git a/include/ode_solvers/leapfrog.hpp b/include/ode_solvers/leapfrog.hpp
@@ -56,6 +56,8 @@ struct LeapfrogODESolver {
   pts xs;
   pts xs_prev;
 
+  Point grad_x;
+
   MT _AA;
 
   std::pair<NT, int> pbpair;
@@ -69,11 +71,11 @@ struct LeapfrogODESolver {
   eta(step), eta0(step), t(initial_time), F(oracle), Ks(boundaries), xs(initial_state), adaptive(adaptive_) {
     dim = xs[0].dimension();
     _update_parameters = update_parameters();
+    grad_x.set_dimension(dim);
     initialize();
   };
 
 
-
   void initialize() {
       for (unsigned int i = 0; i < xs.size(); i++) {
           VT ar, av;
@@ -88,7 +90,6 @@ struct LeapfrogODESolver {
           Av.push_back(av);
           lambda_prev.push_back(NT(0));
       }
-      //step();
   }
 
   void disable_adaptive() {
@@ -101,27 +102,28 @@ struct LeapfrogODESolver {
 
   void step(int k, bool accepted) {
     num_steps++;
-
     if (adaptive) eta = (eta0 * num_steps) / (num_steps + num_reflections);
 
     xs_prev = xs;
     unsigned int x_index, v_index, it;
     t += eta;
+    Point y;
     for (unsigned int i = 1; i < xs.size(); i += 2) {
-        //pbpair.second  = -1;
+
       x_index = i - 1;
       v_index = i;
 
       // v' <- v + eta / 2 F(x)
-      Point z = F(v_index, xs_prev, t);
-      z = (eta / 2) * z;
-      xs[v_index] = xs[v_index] + z;
+      if (k == 0 && !accepted) {
+        grad_x = F(v_index, xs_prev, t);
+      }
+      xs[v_index] += (eta / 2) * grad_x;
 
       // x <- x + eta v'
-      Point y = xs[v_index];
+      y = xs[v_index];
 
       if (Ks[x_index] == NULL) {
-        xs[x_index] = xs_prev[x_index] + eta*y;
+        xs[x_index] += eta*y;
       }
       else {
         // Find intersection (assuming a line trajectory) between x and y
@@ -173,10 +175,8 @@ struct LeapfrogODESolver {
       }
 
       // tilde v <- v + eta / 2 F(tilde x)
-      z = F(v_index, xs, t);
-      z = (eta / 2) * z;
-      xs[v_index] = xs[v_index] + z;
-
+      grad_x = F(v_index, xs, t);
+      xs[v_index] += (eta / 2) * grad_x;
     }
 
   }
@@ -202,6 +202,19 @@ struct LeapfrogODESolver {
     xs[index] = p;
   }
 
+  NT get_eta() {
+    return eta;
+  }
+
+  void set_eta(NT &eta_) {
+    eta = eta_;
+    eta0 = eta_;
+  }
+
+  bounds get_bounds() {
+    return Ks;
+  }
+
 };