From c9ed5a5aff86737479c559ca175db451835ebc6c Mon Sep 17 00:00:00 2001
From: Vinay D <vinayd@nvidia.com>
Date: Thu, 17 Feb 2022 07:23:07 +0530
Subject: [PATCH 01/23] Updating the test description

---
 cpp/test/random/rng.cu | 34 +++++++++++++---------------------
 1 file changed, 13 insertions(+), 21 deletions(-)
diff --git a/cpp/test/random/rng.cu b/cpp/test/random/rng.cu
index c63763d5a4..cd8d296705 100644
--- a/cpp/test/random/rng.cu
+++ b/cpp/test/random/rng.cu
@@ -179,27 +179,19 @@ class RngTest : public ::testing::TestWithParam<RngInputs<T>> {
   int num_sigma;
 };
 
-// The measured mean and standard deviation for each tested distribution are,
-// of course, statistical variables. Thus setting an appropriate testing
-// tolerance essentially requires one to set a probability of test failure. We
-// choose to set this at 3-4 x sigma, i.e., a 99.7-99.9% confidence interval so that
-// the test will indeed pass. In quick experiments (using the identical
-// distributions given by NumPy/SciPy), the measured standard deviation is the
-// variable with the greatest variance and so we determined the variance for
-// each distribution and number of samples (32*1024 or 8*1024). Below
-// are listed the standard deviation for these tests.
-
-// Distribution: StdDev 32*1024, StdDev 8*1024
-// Normal: 0.0055, 0.011
-// LogNormal: 0.05, 0.1
-// Uniform: 0.003, 0.005
-// Gumbel: 0.005, 0.01
-// Logistic: 0.005, 0.01
-// Exp: 0.008, 0.015
-// Rayleigh: 0.0125, 0.025
-// Laplace: 0.02, 0.04
-
-// We generally want 4 x sigma >= 99.9% chance of success
+// In this test we generate pseudo-random values that follow various probability distributions such
+// as Normal, Laplace etc. To check the correctness of generated random variates we compute two
+// measures, mean and variance from the generated data. The computed values are matched against
+// their theoretically expected values for the corresponding distribution. The computed mean and
+// variance are statistical variables themselves and follow a Normal distribution. Which means,
+// there is 99+% chance that the computed values fall in the 3-sigma (standard deviation) interval
+// [theoretical_value - 3*sigma, theoretical_value + 3*sigma]. The values are practically
+// guaranteed to fall in the 4-sigma interval. Reference standard deviation of the computed
+// mean/variance distribution is calculated here
+// https://gist.github.com/vinaydes/cee04f50ff7e3365759603d39b7e079b Maximum standard deviation
+// observed here is ~1.5e-2, thus we use this as sigma in our test.
+// N O T E: Before adding any new test case below, make sure to calculate standard deviation for the
+// test parameters using above notebook.
 
 typedef RngTest<float> RngTestF;
 const std::vector<RngInputs<float>> inputsf = {

From 5db25eedc492062226cdd3bad445034fd8fb5032 Mon Sep 17 00:00:00 2001
From: Vinay D <vinayd@nvidia.com>
Date: Thu, 17 Feb 2022 07:25:29 +0530
Subject: [PATCH 02/23] Correcting the seed type and updating parameter
 description

---
 cpp/test/random/rng.cu | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/cpp/test/random/rng.cu b/cpp/test/random/rng.cu
index cd8d296705..603f990742 100644
--- a/cpp/test/random/rng.cu
+++ b/cpp/test/random/rng.cu
@@ -60,16 +60,15 @@ template <typename T>
 struct RngInputs {
   T tolerance;
   int len;
-  // start, end: for uniform
-  // mean, sigma: for normal/lognormal
-  // mean, beta: for gumbel
-  // mean, scale: for logistic and laplace
-  // lambda: for exponential
-  // sigma: for rayleigh
+  // Meaning of 'start' and 'end' parameter for various distributions
+  //
+  //         Uniform   Normal/Log-Normal   Gumbel   Logistic   Laplace   Exponential   Rayleigh
+  // start    start          mean           mean     mean       mean       lambda       sigma
+  // end       end           sigma          beta     scale      scale      Unused       Unused
   T start, end;
   RandomType type;
   GeneratorType gtype;
-  unsigned long long int seed;
+  uint64_t seed;
 };
 
 template <typename T>

From 873a2c63e249c950bd76141fe932662ab7288025 Mon Sep 17 00:00:00 2001
From: Vinay D <vinayd@nvidia.com>
Date: Thu, 17 Feb 2022 07:28:38 +0530
Subject: [PATCH 03/23] Changing the test case parameters

---
 cpp/test/random/rng.cu | 101 ++++++++++++++---------------------------
 1 file changed, 33 insertions(+), 68 deletions(-)

diff --git a/cpp/test/random/rng.cu b/cpp/test/random/rng.cu
index 603f990742..13ea1f5286 100644
--- a/cpp/test/random/rng.cu
+++ b/cpp/test/random/rng.cu
@@ -96,8 +96,6 @@ class RngTest : public ::testing::TestWithParam<RngInputs<T>> {
  protected:
   void SetUp() override
   {
-    // Tests are configured with their expected test-values sigma. For example,
-    // 4 x sigma indicates the test shouldn't fail 99.9% of the time.
     num_sigma = 4;
     Rng r(params.seed, params.gtype);
     switch (params.type) {
@@ -194,39 +192,22 @@ class RngTest : public ::testing::TestWithParam<RngInputs<T>> {
 
 typedef RngTest<float> RngTestF;
 const std::vector<RngInputs<float>> inputsf = {
-  {0.0055, 32 * 1024, 1.f, 1.f, RNG_Normal, GenPhilox, 1234ULL},
-  {0.011, 8 * 1024, 1.f, 1.f, RNG_Normal, GenPhilox, 1234ULL},
-  {0.05, 32 * 1024, 1.f, 1.f, RNG_LogNormal, GenPhilox, 1234ULL},
-  {0.1, 8 * 1024, 1.f, 1.f, RNG_LogNormal, GenPhilox, 1234ULL},
-  {0.003, 32 * 1024, -1.f, 1.f, RNG_Uniform, GenPhilox, 1234ULL},
-  {0.005, 8 * 1024, -1.f, 1.f, RNG_Uniform, GenPhilox, 1234ULL},
-  {0.005, 32 * 1024, 1.f, 1.f, RNG_Gumbel, GenPhilox, 1234ULL},
-  {0.01, 8 * 1024, 1.f, 1.f, RNG_Gumbel, GenPhilox, 1234ULL},
-  {0.005, 32 * 1024, 1.f, 1.f, RNG_Logistic, GenPhilox, 67632ULL},
-  {0.01, 8 * 1024, 1.f, 1.f, RNG_Logistic, GenPhilox, 1234ULL},
-  {0.008, 32 * 1024, 1.f, 1.f, RNG_Exp, GenPhilox, 1234ULL},
-  {0.015, 8 * 1024, 1.f, 1.f, RNG_Exp, GenPhilox, 1234ULL},
-  {0.0125, 32 * 1024, 1.f, 1.f, RNG_Rayleigh, GenPhilox, 1234ULL},
-  {0.025, 8 * 1024, 1.f, 1.f, RNG_Rayleigh, GenPhilox, 1234ULL},
-  {0.02, 32 * 1024, 1.f, 1.f, RNG_Laplace, GenPhilox, 1234ULL},
-  {0.04, 8 * 1024, 1.f, 1.f, RNG_Laplace, GenPhilox, 1234ULL},
-
-  {0.0055, 32 * 1024, 1.f, 1.f, RNG_Normal, GenPC, 1234ULL},
-  {0.011, 8 * 1024, 1.f, 1.f, RNG_Normal, GenPC, 1234ULL},
-  {0.05, 32 * 1024, 1.f, 1.f, RNG_LogNormal, GenPC, 1234ULL},
-  {0.1, 8 * 1024, 1.f, 1.f, RNG_LogNormal, GenPC, 1234ULL},
-  {0.003, 32 * 1024, -1.f, 1.f, RNG_Uniform, GenPC, 1234ULL},
-  {0.005, 8 * 1024, -1.f, 1.f, RNG_Uniform, GenPC, 1234ULL},
-  {0.005, 32 * 1024, 1.f, 1.f, RNG_Gumbel, GenPC, 1234ULL},
-  {0.01, 8 * 1024, 1.f, 1.f, RNG_Gumbel, GenPC, 1234ULL},
-  {0.005, 32 * 1024, 1.f, 1.f, RNG_Logistic, GenPC, 1234ULL},
-  {0.01, 8 * 1024, 1.f, 1.f, RNG_Logistic, GenPC, 1234ULL},
-  {0.008, 32 * 1024, 1.f, 1.f, RNG_Exp, GenPC, 1234ULL},
-  {0.015, 8 * 1024, 1.f, 1.f, RNG_Exp, GenPC, 1234ULL},
-  {0.0125, 32 * 1024, 1.f, 1.f, RNG_Rayleigh, GenPC, 1234ULL},
-  {0.025, 8 * 1024, 1.f, 1.f, RNG_Rayleigh, GenPC, 1234ULL},
-  {0.02, 32 * 1024, 1.f, 1.f, RNG_Laplace, GenPC, 1234ULL},
-  {0.04, 8 * 1024, 1.f, 1.f, RNG_Laplace, GenPC, 1234ULL}};
+  // Test with Philox
+  {1.5e-5f, 1024 * 1024, 3.0f, 1.3f, RNG_Normal, GenPhilox, 1234ULL},
+  {1.5e-5f, 1024 * 1024, 1.2f, 0.1f, RNG_LogNormal, GenPhilox, 1234ULL},
+  {1.5e-2f, 1024 * 1024, 1.2f, 5.5f, RNG_Uniform, GenPhilox, 1234ULL},
+  {1.5e-2f, 1024 * 1024, 0.1f, 1.3f, RNG_Gumbel, GenPhilox, 1234ULL},
+  {1.5e-2f, 1024 * 1024, 1.6f, 0.0f, RNG_Exp, GenPhilox, 1234ULL},
+  {1.5e-2f, 1024 * 1024, 1.6f, 0.0f, RNG_Rayleigh, GenPhilox, 1234ULL},
+  {1.5e-2f, 1024 * 1024, 2.6f, 1.3f, RNG_Laplace, GenPhilox, 1234ULL},
+  // Test with PCG
+  {1.5e-5f, 1024 * 1024, 3.0f, 1.3f, RNG_Normal, GenPC, 1234ULL},
+  {1.5e-5f, 1024 * 1024, 1.2f, 0.1f, RNG_LogNormal, GenPC, 1234ULL},
+  {1.5e-2f, 1024 * 1024, 1.2f, 5.5f, RNG_Uniform, GenPC, 1234ULL},
+  {1.5e-2f, 1024 * 1024, 0.1f, 1.3f, RNG_Gumbel, GenPC, 1234ULL},
+  {1.5e-2f, 1024 * 1024, 1.6f, 0.0f, RNG_Exp, GenPC, 1234ULL},
+  {1.5e-2f, 1024 * 1024, 1.6f, 0.0f, RNG_Rayleigh, GenPC, 1234ULL},
+  {1.5e-2f, 1024 * 1024, 2.6f, 1.3f, RNG_Laplace, GenPC, 1234ULL}};
 
 TEST_P(RngTestF, Result)
 {
@@ -239,39 +220,23 @@ INSTANTIATE_TEST_SUITE_P(RngTests, RngTestF, ::testing::ValuesIn(inputsf));
 
 typedef RngTest<double> RngTestD;
 const std::vector<RngInputs<double>> inputsd = {
-  {0.0055, 32 * 1024, 1.0, 1.0, RNG_Normal, GenPhilox, 1234ULL},
-  {0.011, 8 * 1024, 1.0, 1.0, RNG_Normal, GenPhilox, 1234ULL},
-  {0.05, 32 * 1024, 1.0, 1.0, RNG_LogNormal, GenPhilox, 1234ULL},
-  {0.1, 8 * 1024, 1.0, 1.0, RNG_LogNormal, GenPhilox, 1234ULL},
-  {0.003, 32 * 1024, -1.0, 1.0, RNG_Uniform, GenPhilox, 1234ULL},
-  {0.005, 8 * 1024, -1.0, 1.0, RNG_Uniform, GenPhilox, 1234ULL},
-  {0.005, 32 * 1024, 1.0, 1.0, RNG_Gumbel, GenPhilox, 1234ULL},
-  {0.01, 8 * 1024, 1.0, 1.0, RNG_Gumbel, GenPhilox, 1234ULL},
-  {0.005, 32 * 1024, 1.0, 1.0, RNG_Logistic, GenPhilox, 67632ULL},
-  {0.01, 8 * 1024, 1.0, 1.0, RNG_Logistic, GenPhilox, 1234ULL},
-  {0.008, 32 * 1024, 1.0, 1.0, RNG_Exp, GenPhilox, 1234ULL},
-  {0.015, 8 * 1024, 1.0, 1.0, RNG_Exp, GenPhilox, 1234ULL},
-  {0.0125, 32 * 1024, 1.0, 1.0, RNG_Rayleigh, GenPhilox, 1234ULL},
-  {0.025, 8 * 1024, 1.0, 1.0, RNG_Rayleigh, GenPhilox, 1234ULL},
-  {0.02, 32 * 1024, 1.0, 1.0, RNG_Laplace, GenPhilox, 1234ULL},
-  {0.04, 8 * 1024, 1.0, 1.0, RNG_Laplace, GenPhilox, 1234ULL},
-
-  {0.0055, 32 * 1024, 1.0, 1.0, RNG_Normal, GenPC, 1234ULL},
-  {0.011, 8 * 1024, 1.0, 1.0, RNG_Normal, GenPC, 1234ULL},
-  {0.05, 32 * 1024, 1.0, 1.0, RNG_LogNormal, GenPC, 1234ULL},
-  {0.1, 8 * 1024, 1.0, 1.0, RNG_LogNormal, GenPC, 1234ULL},
-  {0.003, 32 * 1024, -1.0, 1.0, RNG_Uniform, GenPC, 1234ULL},
-  {0.005, 8 * 1024, -1.0, 1.0, RNG_Uniform, GenPC, 1234ULL},
-  {0.005, 32 * 1024, 1.0, 1.0, RNG_Gumbel, GenPC, 1234ULL},
-  {0.01, 8 * 1024, 1.0, 1.0, RNG_Gumbel, GenPC, 1234ULL},
-  {0.005, 32 * 1024, 1.0, 1.0, RNG_Logistic, GenPC, 1234ULL},
-  {0.01, 8 * 1024, 1.0, 1.0, RNG_Logistic, GenPC, 1234ULL},
-  {0.008, 32 * 1024, 1.0, 1.0, RNG_Exp, GenPC, 1234ULL},
-  {0.015, 8 * 1024, 1.0, 1.0, RNG_Exp, GenPC, 1234ULL},
-  {0.0125, 32 * 1024, 1.0, 1.0, RNG_Rayleigh, GenPC, 1234ULL},
-  {0.025, 8 * 1024, 1.0, 1.0, RNG_Rayleigh, GenPC, 1234ULL},
-  {0.02, 32 * 1024, 1.0, 1.0, RNG_Laplace, GenPC, 1234ULL},
-  {0.04, 8 * 1024, 1.0, 1.0, RNG_Laplace, GenPC, 1234ULL}};
+  // Test with Philox
+  {1.5e-5f, 1024 * 1024, 3.0f, 1.3f, RNG_Normal, GenPhilox, 1234ULL},
+  {1.5e-5f, 1024 * 1024, 1.2f, 0.1f, RNG_LogNormal, GenPhilox, 1234ULL},
+  {1.5e-2f, 1024 * 1024, 1.2f, 5.5f, RNG_Uniform, GenPhilox, 1234ULL},
+  {1.5e-2f, 1024 * 1024, 0.1f, 1.3f, RNG_Gumbel, GenPhilox, 1234ULL},
+  {1.5e-2f, 1024 * 1024, 1.6f, 0.0f, RNG_Exp, GenPhilox, 1234ULL},
+  {1.5e-2f, 1024 * 1024, 1.6f, 0.0f, RNG_Rayleigh, GenPhilox, 1234ULL},
+  {1.5e-2f, 1024 * 1024, 2.6f, 1.3f, RNG_Laplace, GenPhilox, 1234ULL},
+  // Test with PCG
+  {1.5e-5f, 1024 * 1024, 3.0f, 1.3f, RNG_Normal, GenPC, 1234ULL},
+  {1.5e-5f, 1024 * 1024, 1.2f, 0.1f, RNG_LogNormal, GenPC, 1234ULL},
+  {1.5e-2f, 1024 * 1024, 1.2f, 5.5f, RNG_Uniform, GenPC, 1234ULL},
+  {1.5e-2f, 1024 * 1024, 0.1f, 1.3f, RNG_Gumbel, GenPC, 1234ULL},
+  {1.5e-2f, 1024 * 1024, 1.6f, 0.0f, RNG_Exp, GenPC, 1234ULL},
+  {1.5e-2f, 1024 * 1024, 1.6f, 0.0f, RNG_Rayleigh, GenPC, 1234ULL},
+  {1.5e-2f, 1024 * 1024, 2.6f, 1.3f, RNG_Laplace, GenPC, 1234ULL}};
+
 
 TEST_P(RngTestD, Result)
 {

From 672e455361293404b3b710445227ef9488444a59 Mon Sep 17 00:00:00 2001
From: Vinay D <vinayd@nvidia.com>
Date: Thu, 17 Feb 2022 07:33:21 +0530
Subject: [PATCH 04/23] Fixing NaNs/Infs in the unit tests

---
 cpp/include/raft/random/detail/rng_impl.cuh | 24 +++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/cpp/include/raft/random/detail/rng_impl.cuh b/cpp/include/raft/random/detail/rng_impl.cuh
index 2406456404..76bf30e43e 100644
--- a/cpp/include/raft/random/detail/rng_impl.cuh
+++ b/cpp/include/raft/random/detail/rng_impl.cuh
@@ -294,7 +294,11 @@ DI void custom_next(
   GenType& gen, OutType* val, GumbelDistParams<OutType> params, LenType idx = 0, LenType stride = 0)
 {
   OutType res = 0;
-  gen.next(res);
+
+  do {
+    gen.next(res);
+  } while (res == OutType(1.0));
+
   *val = params.mu - params.beta * raft::myLog(-raft::myLog(res));
 }
 
@@ -334,7 +338,11 @@ DI void custom_next(GenType& gen,
                     LenType stride = 0)
 {
   OutType res;
-  gen.next(res);
+
+  do {
+    gen.next(res);
+  } while (res == OutType(1.0));
+
   constexpr OutType one = (OutType)1.0;
   *val                  = -raft::myLog(one - res) / params.lambda;
 }
@@ -347,7 +355,11 @@ DI void custom_next(GenType& gen,
                     LenType stride = 0)
 {
   OutType res;
-  gen.next(res);
+
+  do {
+    gen.next(res);
+  } while (res == OutType(1.0));
+
   constexpr OutType one = (OutType)1.0;
   constexpr OutType two = (OutType)2.0;
   *val                  = raft::mySqrt(-two * raft::myLog(one - res)) * params.sigma;
@@ -361,7 +373,11 @@ DI void custom_next(GenType& gen,
                     LenType stride = 0)
 {
   OutType res, out;
-  gen.next(res);
+
+  do {
+    gen.next(res);
+  } while (res == OutType(0.0) || res == OutType(1.0));
+
   constexpr OutType one     = (OutType)1.0;
   constexpr OutType two     = (OutType)2.0;
   constexpr OutType oneHalf = (OutType)0.5;

From 727dbc3d4b34ab23edf43e1434dde0e56952b329 Mon Sep 17 00:00:00 2001
From: Vinay D <vinayd@nvidia.com>
Date: Thu, 17 Feb 2022 07:38:19 +0530
Subject: [PATCH 05/23] Formatting

---
 cpp/test/random/rng.cu | 1 -
 1 file changed, 1 deletion(-)

diff --git a/cpp/test/random/rng.cu b/cpp/test/random/rng.cu
index 13ea1f5286..eee4075c22 100644
--- a/cpp/test/random/rng.cu
+++ b/cpp/test/random/rng.cu
@@ -237,7 +237,6 @@ const std::vector<RngInputs<double>> inputsd = {
   {1.5e-2f, 1024 * 1024, 1.6f, 0.0f, RNG_Rayleigh, GenPC, 1234ULL},
   {1.5e-2f, 1024 * 1024, 2.6f, 1.3f, RNG_Laplace, GenPC, 1234ULL}};
 
-
 TEST_P(RngTestD, Result)
 {
   double meanvar[2];

From 555339eabf8b90456afb14e6aedeab3fc0452736 Mon Sep 17 00:00:00 2001
From: Vinay D <vinayd@nvidia.com>
Date: Thu, 17 Feb 2022 07:42:49 +0530
Subject: [PATCH 06/23] Moving the test description

---
 cpp/test/random/rng.cu | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/cpp/test/random/rng.cu b/cpp/test/random/rng.cu
index eee4075c22..eac955bebf 100644
--- a/cpp/test/random/rng.cu
+++ b/cpp/test/random/rng.cu
@@ -80,6 +80,20 @@ template <typename T>
 #include <sys/timeb.h>
 #include <time.h>
 
+// In this test we generate pseudo-random values that follow various probability distributions such
+// as Normal, Laplace etc. To check the correctness of generated random variates we compute two
+// measures, mean and variance from the generated data. The computed values are matched against
+// their theoretically expected values for the corresponding distribution. The computed mean and
+// variance are statistical variables themselves and follow a Normal distribution. Which means,
+// there is 99+% chance that the computed values fall in the 3-sigma (standard deviation) interval
+// [theoretical_value - 3*sigma, theoretical_value + 3*sigma]. The values are practically
+// guaranteed to fall in the 4-sigma interval. Reference standard deviation of the computed
+// mean/variance distribution is calculated here
+// https://gist.github.com/vinaydes/cee04f50ff7e3365759603d39b7e079b Maximum standard deviation
+// observed here is ~1.5e-2, thus we use this as sigma in our test.
+// N O T E: Before adding any new test case below, make sure to calculate standard deviation for the
+// test parameters using above notebook.
+
 template <typename T>
 class RngTest : public ::testing::TestWithParam<RngInputs<T>> {
  public:
@@ -176,20 +190,6 @@ class RngTest : public ::testing::TestWithParam<RngInputs<T>> {
   int num_sigma;
 };
 
-// In this test we generate pseudo-random values that follow various probability distributions such
-// as Normal, Laplace etc. To check the correctness of generated random variates we compute two
-// measures, mean and variance from the generated data. The computed values are matched against
-// their theoretically expected values for the corresponding distribution. The computed mean and
-// variance are statistical variables themselves and follow a Normal distribution. Which means,
-// there is 99+% chance that the computed values fall in the 3-sigma (standard deviation) interval
-// [theoretical_value - 3*sigma, theoretical_value + 3*sigma]. The values are practically
-// guaranteed to fall in the 4-sigma interval. Reference standard deviation of the computed
-// mean/variance distribution is calculated here
-// https://gist.github.com/vinaydes/cee04f50ff7e3365759603d39b7e079b Maximum standard deviation
-// observed here is ~1.5e-2, thus we use this as sigma in our test.
-// N O T E: Before adding any new test case below, make sure to calculate standard deviation for the
-// test parameters using above notebook.
-
 typedef RngTest<float> RngTestF;
 const std::vector<RngInputs<float>> inputsf = {
   // Test with Philox

From 3676480fa76bb9611336a7bb88d1afcde1ed7a34 Mon Sep 17 00:00:00 2001
From: Vinay D <vinayd@nvidia.com>
Date: Thu, 17 Feb 2022 07:47:59 +0530
Subject: [PATCH 07/23] Correcting tolerances

---
 cpp/test/random/rng.cu | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/cpp/test/random/rng.cu b/cpp/test/random/rng.cu
index eac955bebf..b0a1e55ef2 100644
--- a/cpp/test/random/rng.cu
+++ b/cpp/test/random/rng.cu
@@ -193,16 +193,16 @@ class RngTest : public ::testing::TestWithParam<RngInputs<T>> {
 typedef RngTest<float> RngTestF;
 const std::vector<RngInputs<float>> inputsf = {
   // Test with Philox
-  {1.5e-5f, 1024 * 1024, 3.0f, 1.3f, RNG_Normal, GenPhilox, 1234ULL},
-  {1.5e-5f, 1024 * 1024, 1.2f, 0.1f, RNG_LogNormal, GenPhilox, 1234ULL},
+  {1.5e-2f, 1024 * 1024, 3.0f, 1.3f, RNG_Normal, GenPhilox, 1234ULL},
+  {1.5e-2f, 1024 * 1024, 1.2f, 0.1f, RNG_LogNormal, GenPhilox, 1234ULL},
   {1.5e-2f, 1024 * 1024, 1.2f, 5.5f, RNG_Uniform, GenPhilox, 1234ULL},
   {1.5e-2f, 1024 * 1024, 0.1f, 1.3f, RNG_Gumbel, GenPhilox, 1234ULL},
   {1.5e-2f, 1024 * 1024, 1.6f, 0.0f, RNG_Exp, GenPhilox, 1234ULL},
   {1.5e-2f, 1024 * 1024, 1.6f, 0.0f, RNG_Rayleigh, GenPhilox, 1234ULL},
   {1.5e-2f, 1024 * 1024, 2.6f, 1.3f, RNG_Laplace, GenPhilox, 1234ULL},
   // Test with PCG
-  {1.5e-5f, 1024 * 1024, 3.0f, 1.3f, RNG_Normal, GenPC, 1234ULL},
-  {1.5e-5f, 1024 * 1024, 1.2f, 0.1f, RNG_LogNormal, GenPC, 1234ULL},
+  {1.5e-2f, 1024 * 1024, 3.0f, 1.3f, RNG_Normal, GenPC, 1234ULL},
+  {1.5e-2f, 1024 * 1024, 1.2f, 0.1f, RNG_LogNormal, GenPC, 1234ULL},
   {1.5e-2f, 1024 * 1024, 1.2f, 5.5f, RNG_Uniform, GenPC, 1234ULL},
   {1.5e-2f, 1024 * 1024, 0.1f, 1.3f, RNG_Gumbel, GenPC, 1234ULL},
   {1.5e-2f, 1024 * 1024, 1.6f, 0.0f, RNG_Exp, GenPC, 1234ULL},
@@ -221,16 +221,16 @@ INSTANTIATE_TEST_SUITE_P(RngTests, RngTestF, ::testing::ValuesIn(inputsf));
 typedef RngTest<double> RngTestD;
 const std::vector<RngInputs<double>> inputsd = {
   // Test with Philox
-  {1.5e-5f, 1024 * 1024, 3.0f, 1.3f, RNG_Normal, GenPhilox, 1234ULL},
-  {1.5e-5f, 1024 * 1024, 1.2f, 0.1f, RNG_LogNormal, GenPhilox, 1234ULL},
+  {1.5e-2f, 1024 * 1024, 3.0f, 1.3f, RNG_Normal, GenPhilox, 1234ULL},
+  {1.5e-2f, 1024 * 1024, 1.2f, 0.1f, RNG_LogNormal, GenPhilox, 1234ULL},
   {1.5e-2f, 1024 * 1024, 1.2f, 5.5f, RNG_Uniform, GenPhilox, 1234ULL},
   {1.5e-2f, 1024 * 1024, 0.1f, 1.3f, RNG_Gumbel, GenPhilox, 1234ULL},
   {1.5e-2f, 1024 * 1024, 1.6f, 0.0f, RNG_Exp, GenPhilox, 1234ULL},
   {1.5e-2f, 1024 * 1024, 1.6f, 0.0f, RNG_Rayleigh, GenPhilox, 1234ULL},
   {1.5e-2f, 1024 * 1024, 2.6f, 1.3f, RNG_Laplace, GenPhilox, 1234ULL},
   // Test with PCG
-  {1.5e-5f, 1024 * 1024, 3.0f, 1.3f, RNG_Normal, GenPC, 1234ULL},
-  {1.5e-5f, 1024 * 1024, 1.2f, 0.1f, RNG_LogNormal, GenPC, 1234ULL},
+  {1.5e-2f, 1024 * 1024, 3.0f, 1.3f, RNG_Normal, GenPC, 1234ULL},
+  {1.5e-2f, 1024 * 1024, 1.2f, 0.1f, RNG_LogNormal, GenPC, 1234ULL},
   {1.5e-2f, 1024 * 1024, 1.2f, 5.5f, RNG_Uniform, GenPC, 1234ULL},
   {1.5e-2f, 1024 * 1024, 0.1f, 1.3f, RNG_Gumbel, GenPC, 1234ULL},
   {1.5e-2f, 1024 * 1024, 1.6f, 0.0f, RNG_Exp, GenPC, 1234ULL},

From a67724689893b8a8def3ef110456d6b7e15a93ba Mon Sep 17 00:00:00 2001
From: Vinay D <vinayd@nvidia.com>
Date: Thu, 17 Feb 2022 10:50:53 +0530
Subject: [PATCH 08/23] Consolidating tolerance parameters

---
 cpp/test/random/rng.cu | 70 +++++++++++++++++++++---------------------
 1 file changed, 35 insertions(+), 35 deletions(-)

diff --git a/cpp/test/random/rng.cu b/cpp/test/random/rng.cu
index b0a1e55ef2..139345fec2 100644
--- a/cpp/test/random/rng.cu
+++ b/cpp/test/random/rng.cu
@@ -58,7 +58,6 @@ __global__ void meanKernel(T* out, const T* data, int len)
 
 template <typename T>
 struct RngInputs {
-  T tolerance;
   int len;
   // Meaning of 'start' and 'end' parameter for various distributions
   //
@@ -94,6 +93,9 @@ template <typename T>
 // N O T E: Before adding any new test case below, make sure to calculate standard deviation for the
 // test parameters using above notebook.
 
+constexpr int NUM_SIGMA    = 4;
+constexpr double MAX_SIGMA = 1.5e-2;
+
 template <typename T>
 class RngTest : public ::testing::TestWithParam<RngInputs<T>> {
  public:
@@ -110,7 +112,6 @@ class RngTest : public ::testing::TestWithParam<RngInputs<T>> {
  protected:
   void SetUp() override
   {
-    num_sigma = 4;
     Rng r(params.seed, params.gtype);
     switch (params.type) {
       case RNG_Normal: r.normal(data.data(), params.len, params.start, params.end, stream); break;
@@ -187,62 +188,61 @@ class RngTest : public ::testing::TestWithParam<RngInputs<T>> {
   RngInputs<T> params;
   rmm::device_uvector<T> data, stats;
   T h_stats[2];  // mean, var
-  int num_sigma;
 };
 
 typedef RngTest<float> RngTestF;
 const std::vector<RngInputs<float>> inputsf = {
   // Test with Philox
-  {1.5e-2f, 1024 * 1024, 3.0f, 1.3f, RNG_Normal, GenPhilox, 1234ULL},
-  {1.5e-2f, 1024 * 1024, 1.2f, 0.1f, RNG_LogNormal, GenPhilox, 1234ULL},
-  {1.5e-2f, 1024 * 1024, 1.2f, 5.5f, RNG_Uniform, GenPhilox, 1234ULL},
-  {1.5e-2f, 1024 * 1024, 0.1f, 1.3f, RNG_Gumbel, GenPhilox, 1234ULL},
-  {1.5e-2f, 1024 * 1024, 1.6f, 0.0f, RNG_Exp, GenPhilox, 1234ULL},
-  {1.5e-2f, 1024 * 1024, 1.6f, 0.0f, RNG_Rayleigh, GenPhilox, 1234ULL},
-  {1.5e-2f, 1024 * 1024, 2.6f, 1.3f, RNG_Laplace, GenPhilox, 1234ULL},
+  {1024 * 1024, 3.0f, 1.3f, RNG_Normal, GenPhilox, 1234ULL},
+  {1024 * 1024, 1.2f, 0.1f, RNG_LogNormal, GenPhilox, 1234ULL},
+  {1024 * 1024, 1.2f, 5.5f, RNG_Uniform, GenPhilox, 1234ULL},
+  {1024 * 1024, 0.1f, 1.3f, RNG_Gumbel, GenPhilox, 1234ULL},
+  {1024 * 1024, 1.6f, 0.0f, RNG_Exp, GenPhilox, 1234ULL},
+  {1024 * 1024, 1.6f, 0.0f, RNG_Rayleigh, GenPhilox, 1234ULL},
+  {1024 * 1024, 2.6f, 1.3f, RNG_Laplace, GenPhilox, 1234ULL},
   // Test with PCG
-  {1.5e-2f, 1024 * 1024, 3.0f, 1.3f, RNG_Normal, GenPC, 1234ULL},
-  {1.5e-2f, 1024 * 1024, 1.2f, 0.1f, RNG_LogNormal, GenPC, 1234ULL},
-  {1.5e-2f, 1024 * 1024, 1.2f, 5.5f, RNG_Uniform, GenPC, 1234ULL},
-  {1.5e-2f, 1024 * 1024, 0.1f, 1.3f, RNG_Gumbel, GenPC, 1234ULL},
-  {1.5e-2f, 1024 * 1024, 1.6f, 0.0f, RNG_Exp, GenPC, 1234ULL},
-  {1.5e-2f, 1024 * 1024, 1.6f, 0.0f, RNG_Rayleigh, GenPC, 1234ULL},
-  {1.5e-2f, 1024 * 1024, 2.6f, 1.3f, RNG_Laplace, GenPC, 1234ULL}};
+  {1024 * 1024, 3.0f, 1.3f, RNG_Normal, GenPC, 1234ULL},
+  {1024 * 1024, 1.2f, 0.1f, RNG_LogNormal, GenPC, 1234ULL},
+  {1024 * 1024, 1.2f, 5.5f, RNG_Uniform, GenPC, 1234ULL},
+  {1024 * 1024, 0.1f, 1.3f, RNG_Gumbel, GenPC, 1234ULL},
+  {1024 * 1024, 1.6f, 0.0f, RNG_Exp, GenPC, 1234ULL},
+  {1024 * 1024, 1.6f, 0.0f, RNG_Rayleigh, GenPC, 1234ULL},
+  {1024 * 1024, 2.6f, 1.3f, RNG_Laplace, GenPC, 1234ULL}};
 
 TEST_P(RngTestF, Result)
 {
   float meanvar[2];
   getExpectedMeanVar(meanvar);
-  ASSERT_TRUE(match(meanvar[0], h_stats[0], CompareApprox<float>(num_sigma * params.tolerance)));
-  ASSERT_TRUE(match(meanvar[1], h_stats[1], CompareApprox<float>(num_sigma * params.tolerance)));
+  ASSERT_TRUE(match(meanvar[0], h_stats[0], CompareApprox<float>(NUM_SIGMA * MAX_SIGMA)));
+  ASSERT_TRUE(match(meanvar[1], h_stats[1], CompareApprox<float>(NUM_SIGMA * MAX_SIGMA)));
 }
 INSTANTIATE_TEST_SUITE_P(RngTests, RngTestF, ::testing::ValuesIn(inputsf));
 
 typedef RngTest<double> RngTestD;
 const std::vector<RngInputs<double>> inputsd = {
   // Test with Philox
-  {1.5e-2f, 1024 * 1024, 3.0f, 1.3f, RNG_Normal, GenPhilox, 1234ULL},
-  {1.5e-2f, 1024 * 1024, 1.2f, 0.1f, RNG_LogNormal, GenPhilox, 1234ULL},
-  {1.5e-2f, 1024 * 1024, 1.2f, 5.5f, RNG_Uniform, GenPhilox, 1234ULL},
-  {1.5e-2f, 1024 * 1024, 0.1f, 1.3f, RNG_Gumbel, GenPhilox, 1234ULL},
-  {1.5e-2f, 1024 * 1024, 1.6f, 0.0f, RNG_Exp, GenPhilox, 1234ULL},
-  {1.5e-2f, 1024 * 1024, 1.6f, 0.0f, RNG_Rayleigh, GenPhilox, 1234ULL},
-  {1.5e-2f, 1024 * 1024, 2.6f, 1.3f, RNG_Laplace, GenPhilox, 1234ULL},
+  {1024 * 1024, 3.0f, 1.3f, RNG_Normal, GenPhilox, 1234ULL},
+  {1024 * 1024, 1.2f, 0.1f, RNG_LogNormal, GenPhilox, 1234ULL},
+  {1024 * 1024, 1.2f, 5.5f, RNG_Uniform, GenPhilox, 1234ULL},
+  {1024 * 1024, 0.1f, 1.3f, RNG_Gumbel, GenPhilox, 1234ULL},
+  {1024 * 1024, 1.6f, 0.0f, RNG_Exp, GenPhilox, 1234ULL},
+  {1024 * 1024, 1.6f, 0.0f, RNG_Rayleigh, GenPhilox, 1234ULL},
+  {1024 * 1024, 2.6f, 1.3f, RNG_Laplace, GenPhilox, 1234ULL},
   // Test with PCG
-  {1.5e-2f, 1024 * 1024, 3.0f, 1.3f, RNG_Normal, GenPC, 1234ULL},
-  {1.5e-2f, 1024 * 1024, 1.2f, 0.1f, RNG_LogNormal, GenPC, 1234ULL},
-  {1.5e-2f, 1024 * 1024, 1.2f, 5.5f, RNG_Uniform, GenPC, 1234ULL},
-  {1.5e-2f, 1024 * 1024, 0.1f, 1.3f, RNG_Gumbel, GenPC, 1234ULL},
-  {1.5e-2f, 1024 * 1024, 1.6f, 0.0f, RNG_Exp, GenPC, 1234ULL},
-  {1.5e-2f, 1024 * 1024, 1.6f, 0.0f, RNG_Rayleigh, GenPC, 1234ULL},
-  {1.5e-2f, 1024 * 1024, 2.6f, 1.3f, RNG_Laplace, GenPC, 1234ULL}};
+  {1024 * 1024, 3.0f, 1.3f, RNG_Normal, GenPC, 1234ULL},
+  {1024 * 1024, 1.2f, 0.1f, RNG_LogNormal, GenPC, 1234ULL},
+  {1024 * 1024, 1.2f, 5.5f, RNG_Uniform, GenPC, 1234ULL},
+  {1024 * 1024, 0.1f, 1.3f, RNG_Gumbel, GenPC, 1234ULL},
+  {1024 * 1024, 1.6f, 0.0f, RNG_Exp, GenPC, 1234ULL},
+  {1024 * 1024, 1.6f, 0.0f, RNG_Rayleigh, GenPC, 1234ULL},
+  {1024 * 1024, 2.6f, 1.3f, RNG_Laplace, GenPC, 1234ULL}};
 
 TEST_P(RngTestD, Result)
 {
   double meanvar[2];
   getExpectedMeanVar(meanvar);
-  ASSERT_TRUE(match(meanvar[0], h_stats[0], CompareApprox<double>(num_sigma * params.tolerance)));
-  ASSERT_TRUE(match(meanvar[1], h_stats[1], CompareApprox<double>(num_sigma * params.tolerance)));
+  ASSERT_TRUE(match(meanvar[0], h_stats[0], CompareApprox<double>(NUM_SIGMA * MAX_SIGMA)));
+  ASSERT_TRUE(match(meanvar[1], h_stats[1], CompareApprox<double>(NUM_SIGMA * MAX_SIGMA)));
 }
 INSTANTIATE_TEST_SUITE_P(RngTests, RngTestD, ::testing::ValuesIn(inputsd));
 

From 83dd8135d9a2c5d0e0c3f1b4a5c935287f00853f Mon Sep 17 00:00:00 2001
From: Vinay D <vinayd@nvidia.com>
Date: Thu, 17 Feb 2022 10:51:19 +0530
Subject: [PATCH 09/23] Removing unused function

---
 cpp/test/random/rng.cu | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/cpp/test/random/rng.cu b/cpp/test/random/rng.cu
index 139345fec2..5ba8591b42 100644
--- a/cpp/test/random/rng.cu
+++ b/cpp/test/random/rng.cu
@@ -70,12 +70,6 @@ struct RngInputs {
   uint64_t seed;
 };
 
-template <typename T>
-::std::ostream& operator<<(::std::ostream& os, const RngInputs<T>& dims)
-{
-  return os;
-}
-
 #include <sys/timeb.h>
 #include <time.h>
 

From dc14296b4981e03c770b9979a321cf5e06264e28 Mon Sep 17 00:00:00 2001
From: Vinay D <vinayd@nvidia.com>
Date: Thu, 17 Feb 2022 10:53:21 +0530
Subject: [PATCH 10/23] Moving include statement to correct place

---
 cpp/test/random/rng.cu | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/cpp/test/random/rng.cu b/cpp/test/random/rng.cu
index 5ba8591b42..4426c3f0e1 100644
--- a/cpp/test/random/rng.cu
+++ b/cpp/test/random/rng.cu
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include <sys/timeb.h>
+
 #include "../test_utils.h"
 #include <cub/cub.cuh>
 #include <gtest/gtest.h>
@@ -70,9 +72,6 @@ struct RngInputs {
   uint64_t seed;
 };
 
-#include <sys/timeb.h>
-#include <time.h>
-
 // In this test we generate pseudo-random values that follow various probability distributions such
 // as Normal, Laplace etc. To check the correctness of generated random variates we compute two
 // measures, mean and variance from the generated data. The computed values are matched against

From 500864dcb568b40a26b7a91a1197dfcbe51877eb Mon Sep 17 00:00:00 2001
From: Vinay D <vinayd@nvidia.com>
Date: Thu, 24 Feb 2022 09:47:21 +0530
Subject: [PATCH 11/23] Replacing cuRAND uniform calls to change the range of
 generation from (0.0, 1.0] to [0.0, 1.0)

---
 cpp/include/raft/random/detail/rng_impl.cuh | 38 ++++++++++++++++++---
 1 file changed, 34 insertions(+), 4 deletions(-)

diff --git a/cpp/include/raft/random/detail/rng_impl.cuh b/cpp/include/raft/random/detail/rng_impl.cuh
index 76bf30e43e..8ef18d4916 100644
--- a/cpp/include/raft/random/detail/rng_impl.cuh
+++ b/cpp/include/raft/random/detail/rng_impl.cuh
@@ -221,8 +221,15 @@ DI void custom_next(
   GenType& gen, OutType* val, NormalDistParams<OutType> params, LenType idx = 0, LenType stride = 0)
 {
   OutType res1, res2;
-  gen.next(res1);
-  gen.next(res2);
+
+  do {
+    gen.next(res1);
+  } while (res1 == OutType(0.0));
+
+  do {
+    gen.next(res2);
+  } while (res2 == OutType(0.0));
+
   box_muller_transform<OutType>(res1, res2, params.sigma, params.mu);
   *val       = res1;
   *(val + 1) = res2;
@@ -467,8 +474,31 @@ struct PhiloxGenerator {
     return ret;
   }
 
-  DI void next(float& ret) { ret = curand_uniform(&(this->philox_state)); }
-  DI void next(double& ret) { ret = curand_uniform_double(&(this->philox_state)); }
+  DI float next_float()
+  {
+    float ret;
+    uint32_t val = next_u32() >> 8;
+    ret          = static_cast<float>(val) / (1U << 24);
+    return ret;
+  }
+
+  DI double next_double()
+  {
+    double ret;
+    uint64_t val = next_u64() >> 11;
+    ret          = static_cast<double>(val) / (1LU << 53);
+    return ret;
+  }
+
+  DI void next(float& ret) {
+    //ret = curand_uniform(&(this->philox_state));
+    ret = next_float();
+  }
+
+  DI void next(double& ret) {
+    // ret = curand_uniform_double(&(this->philox_state));
+    ret = next_double();
+  }
 
   DI void next(uint32_t& ret) { ret = next_u32(); }
   DI void next(uint64_t& ret) { ret = next_u64(); }

From 422c37f71b6b488747facbb385eb7b801332035b Mon Sep 17 00:00:00 2001
From: Vinay D <vinayd@nvidia.com>
Date: Thu, 24 Feb 2022 09:49:30 +0530
Subject: [PATCH 12/23] Added a temporary test

---
 cpp/test/random/rng.cu | 47 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/cpp/test/random/rng.cu b/cpp/test/random/rng.cu
index 4426c3f0e1..42a96e4692 100644
--- a/cpp/test/random/rng.cu
+++ b/cpp/test/random/rng.cu
@@ -274,6 +274,50 @@ std::ostream& operator<<(std::ostream& out, const std::vector<T>& v)
   return out;
 }
 
+template<typename T>
+__global__ void test_kernel(T* g_min, T* g_max) {
+  RngState state;
+  __shared__ T min_val, max_val;
+  T l_min_val = T(0.5), l_max_val = T(0.5);
+  if (threadIdx.x == 0) {
+    min_val = T(0.5);
+    max_val = T(0.5);
+  }
+  __syncthreads();
+  state.seed = 1234ULL;
+  state.base_subsequence = 0;
+  PhiloxGenerator r(state, blockIdx.x * blockDim.x + threadIdx.x);
+  // PCGenerator r(state, blockIdx.x * blockDim.x + threadIdx.x);
+  float res;
+  for (int i = 0; i < 100000; i++) {
+    r.next(res);
+    if (res < l_min_val) l_min_val = res;
+    if (res > l_max_val) l_max_val = res;
+  }
+
+  raft::myAtomicMin(&min_val, l_min_val);
+  raft::myAtomicMax(&max_val, l_max_val);
+  __syncthreads();
+  if (threadIdx.x == 0) {
+    raft::myAtomicMin(g_min, min_val);
+    raft::myAtomicMax(g_max, max_val);
+  }
+}
+
+TEST(Rng, Hello) {
+
+  double g_min, g_max;
+  double *dev_g_min, *dev_g_max;
+  cudaMalloc(&dev_g_min, 8);
+  cudaMalloc(&dev_g_max, 8);
+  test_kernel<double><<<80, 1024>>>(dev_g_min, dev_g_max);
+  cudaMemcpy(&g_min, dev_g_min, 8, cudaMemcpyDeviceToHost);
+  cudaMemcpy(&g_max, dev_g_max, 8, cudaMemcpyDeviceToHost);
+  printf("%.10e %.10e\n", g_min, 1.0 - g_max);
+
+  RAFT_CUDA_TRY(cudaDeviceSynchronize());
+}
+
 // The following tests the 3 random number generators by checking that the
 // measured mean error is close to the well-known analytical result
 // (sigma/sqrt(n_samples)). To compute the mean error, we a number of
@@ -328,7 +372,8 @@ TEST(Rng, MeanError)
     auto diff_expected_vs_measured_mean_error =
       std::abs(d_std_of_mean - d_std / std::sqrt(num_samples));
 
-    ASSERT_TRUE((diff_expected_vs_measured_mean_error / d_std_of_mean_analytical < 0.5));
+    ASSERT_TRUE((diff_expected_vs_measured_mean_error / d_std_of_mean_analytical < 0.5))
+      << "Failed with seed: " << seed << "\nrtype: " << rtype;
   }
   RAFT_CUDA_TRY(cudaStreamDestroy(stream));
 

From 8f53e4af1b56f33be8cfc60b34d3ddda22ae055e Mon Sep 17 00:00:00 2001
From: Vinay D <vinayd@nvidia.com>
Date: Fri, 25 Feb 2022 20:15:06 +0530
Subject: [PATCH 13/23] Removing check on res2 as it is not needed

---
 cpp/include/raft/random/detail/rng_impl.cuh | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/cpp/include/raft/random/detail/rng_impl.cuh b/cpp/include/raft/random/detail/rng_impl.cuh
index 8ef18d4916..6882c404dc 100644
--- a/cpp/include/raft/random/detail/rng_impl.cuh
+++ b/cpp/include/raft/random/detail/rng_impl.cuh
@@ -226,9 +226,7 @@ DI void custom_next(
     gen.next(res1);
   } while (res1 == OutType(0.0));
 
-  do {
-    gen.next(res2);
-  } while (res2 == OutType(0.0));
+  gen.next(res2);
 
   box_muller_transform<OutType>(res1, res2, params.sigma, params.mu);
   *val       = res1;

From 8c600421999ebcb9af79c4ae5013b099f739efc1 Mon Sep 17 00:00:00 2001
From: Vinay D <vinayd@nvidia.com>
Date: Fri, 25 Feb 2022 20:20:46 +0530
Subject: [PATCH 14/23] Adding check for all Box-Muller calls

---
 cpp/include/raft/random/detail/rng_impl.cuh | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/cpp/include/raft/random/detail/rng_impl.cuh b/cpp/include/raft/random/detail/rng_impl.cuh
index 6882c404dc..6a60f8d9da 100644
--- a/cpp/include/raft/random/detail/rng_impl.cuh
+++ b/cpp/include/raft/random/detail/rng_impl.cuh
@@ -241,7 +241,11 @@ DI void custom_next(GenType& gen,
                     LenType stride = 0)
 {
   IntType res1_int, res2_int;
-  gen.next(res1_int);
+
+  do {
+    gen.next(res1_int);
+  } while (res1_int == 0);
+
   gen.next(res2_int);
   double res1  = static_cast<double>(res1_int);
   double res2  = static_cast<double>(res2_int);
@@ -260,7 +264,11 @@ DI void custom_next(GenType& gen,
                     LenType stride)
 {
   OutType res1, res2;
-  gen.next(res1);
+
+  do {
+    gen.next(res1);
+  } while (res1 == OutType(0.0));
+
   gen.next(res2);
   LenType col1  = idx % params.n_cols;
   LenType col2  = (idx + stride) % params.n_cols;
@@ -315,7 +323,10 @@ DI void custom_next(GenType& gen,
                     LenType stride = 0)
 {
   OutType res1 = 0, res2 = 0;
-  gen.next(res1);
+  do {
+    gen.next(res1);
+  } while (res1 == OutType(0.0));
+
   gen.next(res2);
   box_muller_transform<OutType>(res1, res2, params.sigma, params.mu);
   *val       = raft::myExp(res1);

From 98c2b2e1f12562afe09cd6deea087d90a97a0a63 Mon Sep 17 00:00:00 2001
From: Vinay D <vinayd@nvidia.com>
Date: Fri, 25 Feb 2022 20:47:13 +0530
Subject: [PATCH 15/23] Fixing the log related checks

---
 cpp/include/raft/random/detail/rng_impl.cuh | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/cpp/include/raft/random/detail/rng_impl.cuh b/cpp/include/raft/random/detail/rng_impl.cuh
index 6a60f8d9da..0f94976514 100644
--- a/cpp/include/raft/random/detail/rng_impl.cuh
+++ b/cpp/include/raft/random/detail/rng_impl.cuh
@@ -310,7 +310,7 @@ DI void custom_next(
 
   do {
     gen.next(res);
-  } while (res == OutType(1.0));
+  } while (res == OutType(0.0));
 
   *val = params.mu - params.beta * raft::myLog(-raft::myLog(res));
 }
@@ -341,7 +341,11 @@ DI void custom_next(GenType& gen,
                     LenType stride = 0)
 {
   OutType res;
-  gen.next(res);
+
+  do {
+    gen.next(res);
+  } while (res == OutType(0.0));
+
   constexpr OutType one = (OutType)1.0;
   *val                  = params.mu - params.scale * raft::myLog(one / res - one);
 }
@@ -354,11 +358,7 @@ DI void custom_next(GenType& gen,
                     LenType stride = 0)
 {
   OutType res;
-
-  do {
-    gen.next(res);
-  } while (res == OutType(1.0));
-
+  gen.next(res);
   constexpr OutType one = (OutType)1.0;
   *val                  = -raft::myLog(one - res) / params.lambda;
 }
@@ -371,10 +371,7 @@ DI void custom_next(GenType& gen,
                     LenType stride = 0)
 {
   OutType res;
-
-  do {
-    gen.next(res);
-  } while (res == OutType(1.0));
+  gen.next(res);
 
   constexpr OutType one = (OutType)1.0;
   constexpr OutType two = (OutType)2.0;
@@ -392,7 +389,7 @@ DI void custom_next(GenType& gen,
 
   do {
     gen.next(res);
-  } while (res == OutType(0.0) || res == OutType(1.0));
+  } while (res == OutType(0.0));
 
   constexpr OutType one     = (OutType)1.0;
   constexpr OutType two     = (OutType)2.0;

From 758863fcc3456924d0db753fbe412a73d97ad027 Mon Sep 17 00:00:00 2001
From: Vinay D <vinayd@nvidia.com>
Date: Fri, 25 Feb 2022 20:49:32 +0530
Subject: [PATCH 16/23] Removing the debug test

---
 cpp/test/random/rng.cu | 55 ++++--------------------------------------
 1 file changed, 5 insertions(+), 50 deletions(-)

diff --git a/cpp/test/random/rng.cu b/cpp/test/random/rng.cu
index 42a96e4692..15215f4def 100644
--- a/cpp/test/random/rng.cu
+++ b/cpp/test/random/rng.cu
@@ -274,56 +274,11 @@ std::ostream& operator<<(std::ostream& out, const std::vector<T>& v)
   return out;
 }
 
-template<typename T>
-__global__ void test_kernel(T* g_min, T* g_max) {
-  RngState state;
-  __shared__ T min_val, max_val;
-  T l_min_val = T(0.5), l_max_val = T(0.5);
-  if (threadIdx.x == 0) {
-    min_val = T(0.5);
-    max_val = T(0.5);
-  }
-  __syncthreads();
-  state.seed = 1234ULL;
-  state.base_subsequence = 0;
-  PhiloxGenerator r(state, blockIdx.x * blockDim.x + threadIdx.x);
-  // PCGenerator r(state, blockIdx.x * blockDim.x + threadIdx.x);
-  float res;
-  for (int i = 0; i < 100000; i++) {
-    r.next(res);
-    if (res < l_min_val) l_min_val = res;
-    if (res > l_max_val) l_max_val = res;
-  }
-
-  raft::myAtomicMin(&min_val, l_min_val);
-  raft::myAtomicMax(&max_val, l_max_val);
-  __syncthreads();
-  if (threadIdx.x == 0) {
-    raft::myAtomicMin(g_min, min_val);
-    raft::myAtomicMax(g_max, max_val);
-  }
-}
-
-TEST(Rng, Hello) {
-
-  double g_min, g_max;
-  double *dev_g_min, *dev_g_max;
-  cudaMalloc(&dev_g_min, 8);
-  cudaMalloc(&dev_g_max, 8);
-  test_kernel<double><<<80, 1024>>>(dev_g_min, dev_g_max);
-  cudaMemcpy(&g_min, dev_g_min, 8, cudaMemcpyDeviceToHost);
-  cudaMemcpy(&g_max, dev_g_max, 8, cudaMemcpyDeviceToHost);
-  printf("%.10e %.10e\n", g_min, 1.0 - g_max);
-
-  RAFT_CUDA_TRY(cudaDeviceSynchronize());
-}
-
-// The following tests the 3 random number generators by checking that the
-// measured mean error is close to the well-known analytical result
-// (sigma/sqrt(n_samples)). To compute the mean error, we a number of
-// experiments computing the mean, giving us a distribution of the mean
-// itself. The mean error is simply the standard deviation of this
-// distribution (the standard deviation of the mean).
+// The following tests the two random number generators by checking that the measured mean error is
+// close to the well-known analytical result(sigma/sqrt(n_samples)). To compute the mean error, we
+// a number of experiments computing the mean, giving us a distribution of the mean itself. The
+// mean error is simply the standard deviation of this distribution (the standard deviation of the
+// mean).
 TEST(Rng, MeanError)
 {
   timeb time_struct;

From b3ebe3b294c2da76a2172c63e49397380d74d1df Mon Sep 17 00:00:00 2001
From: Vinay D <vinayd@nvidia.com>
Date: Fri, 25 Feb 2022 21:02:18 +0530
Subject: [PATCH 17/23] Formatting fixes

---
 cpp/include/raft/random/detail/rng_impl.cuh | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/cpp/include/raft/random/detail/rng_impl.cuh b/cpp/include/raft/random/detail/rng_impl.cuh
index 0f94976514..893971c190 100644
--- a/cpp/include/raft/random/detail/rng_impl.cuh
+++ b/cpp/include/raft/random/detail/rng_impl.cuh
@@ -496,12 +496,14 @@ struct PhiloxGenerator {
     return ret;
   }
 
-  DI void next(float& ret) {
-    //ret = curand_uniform(&(this->philox_state));
+  DI void next(float& ret)
+  {
+    // ret = curand_uniform(&(this->philox_state));
     ret = next_float();
   }
 
-  DI void next(double& ret) {
+  DI void next(double& ret)
+  {
     // ret = curand_uniform_double(&(this->philox_state));
     ret = next_double();
   }

From be22797c6589f0971bcd3ca4ebd8cfc07de053da Mon Sep 17 00:00:00 2001
From: Vinay D <vinayd@nvidia.com>
Date: Wed, 2 Mar 2022 22:12:08 +0530
Subject: [PATCH 18/23] Removing the possibility of passing zero to Box Muller
 transform

---
 cpp/include/raft/random/detail/make_blobs.cuh | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/cpp/include/raft/random/detail/make_blobs.cuh b/cpp/include/raft/random/detail/make_blobs.cuh
index b79178567b..1d833ac6b7 100644
--- a/cpp/include/raft/random/detail/make_blobs.cuh
+++ b/cpp/include/raft/random/detail/make_blobs.cuh
@@ -107,7 +107,9 @@ __global__ void generate_data_kernel(DataT* out,
   IdxT len          = n_rows * n_cols;
   for (IdxT idx = tid; idx < len; idx += stride) {
     DataT val1, val2;
-    gen.next(val1);
+    do {
+      gen.next(val1);
+    } while (val1 == DataT(0.0));
     gen.next(val2);
     DataT mu1, sigma1, mu2, sigma2;
     get_mu_sigma(mu1,

From 1bbe79b88bce21172ec05d96e3d7dcf63cabdaf2 Mon Sep 17 00:00:00 2001
From: Vinay D <vinayd@nvidia.com>
Date: Wed, 2 Mar 2022 22:14:01 +0530
Subject: [PATCH 19/23] Changing the seed for a test case to avoid degenerate
 case

---
 cpp/test/linalg/gemm_layout.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/test/linalg/gemm_layout.cu b/cpp/test/linalg/gemm_layout.cu
index 72567ff5f9..6b78556b6d 100644
--- a/cpp/test/linalg/gemm_layout.cu
+++ b/cpp/test/linalg/gemm_layout.cu
@@ -128,7 +128,7 @@ const std::vector<GemmLayoutInputs<float>> inputsf = {
   {50, 10, 60, false, true, true, 73012ULL},
   {90, 90, 30, false, true, false, 538147ULL},
   {30, 100, 10, false, false, true, 412352ULL},
-  {40, 80, 100, false, false, false, 297941ULL}};
+  {40, 80, 100, false, false, false, 2979410ULL}};
 
 const std::vector<GemmLayoutInputs<double>> inputsd = {
   {10, 70, 40, true, true, true, 535648ULL},

From ebb1a626fb160bb0d6e6c6c13936a83b9d4c1b31 Mon Sep 17 00:00:00 2001
From: Vinay D <vinayd@nvidia.com>
Date: Wed, 2 Mar 2022 22:14:41 +0530
Subject: [PATCH 20/23] Improving format

---
 cpp/test/spatial/epsilon_neighborhood.cu | 24 ++++--------------------
 1 file changed, 4 insertions(+), 20 deletions(-)

diff --git a/cpp/test/spatial/epsilon_neighborhood.cu b/cpp/test/spatial/epsilon_neighborhood.cu
index 33af5726a0..2a210d42c4 100644
--- a/cpp/test/spatial/epsilon_neighborhood.cu
+++ b/cpp/test/spatial/epsilon_neighborhood.cu
@@ -93,26 +93,10 @@ TEST_P(EpsNeighTestFI, Result)
   for (int i = 0; i < param.n_batches; ++i) {
     RAFT_CUDA_TRY(cudaMemsetAsync(adj.data(), 0, sizeof(bool) * param.n_row * batchSize, stream));
     RAFT_CUDA_TRY(cudaMemsetAsync(vd.data(), 0, sizeof(int) * (batchSize + 1), stream));
-    epsUnexpL2SqNeighborhood<float, int>(adj.
-
-                                         data(),
-                                         vd
-
-                                           .
-
-                                         data(),
-                                         data
-
-                                           .
-
-                                         data(),
-                                         data
-
-                                             .
-
-                                           data()
-
-                                           + (i * batchSize * param.n_col),
+    epsUnexpL2SqNeighborhood<float, int>(adj.data(),
+                                         vd.data(),
+                                         data.data(),
+                                         data.data() + (i * batchSize * param.n_col),
                                          param.n_row,
                                          batchSize,
                                          param.n_col,

From 9e6853cd3f7c83745d523154ede03fbf95d0d75f Mon Sep 17 00:00:00 2001
From: Vinay D <vinayd@nvidia.com>
Date: Tue, 8 Mar 2022 18:36:01 +0530
Subject: [PATCH 21/23] Correcting the Bernoulli dist generation

---
 cpp/include/raft/random/detail/rng_impl.cuh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/include/raft/random/detail/rng_impl.cuh b/cpp/include/raft/random/detail/rng_impl.cuh
index 893971c190..9c28f9768b 100644
--- a/cpp/include/raft/random/detail/rng_impl.cuh
+++ b/cpp/include/raft/random/detail/rng_impl.cuh
@@ -287,7 +287,7 @@ DI void custom_next(
 {
   Type res = 0;
   gen.next(res);
-  *val = res > params.prob;
+  *val = res < params.prob;
 }
 
 template <typename GenType, typename OutType, typename LenType>
@@ -299,7 +299,7 @@ DI void custom_next(GenType& gen,
 {
   OutType res = 0;
   gen.next(res);
-  *val = res > params.prob ? -params.scale : params.scale;
+  *val = res < params.prob ? -params.scale : params.scale;
 }
 
 template <typename GenType, typename OutType, typename LenType>

From 7224a2654740667ce3d55783927e80d43e05d91c Mon Sep 17 00:00:00 2001
From: Vinay D <vinayd@nvidia.com>
Date: Tue, 8 Mar 2022 18:44:42 +0530
Subject: [PATCH 22/23]  Adding comment explaining Laplace implementation

---
 cpp/include/raft/random/detail/rng_impl.cuh | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/cpp/include/raft/random/detail/rng_impl.cuh b/cpp/include/raft/random/detail/rng_impl.cuh
index 9c28f9768b..42ca908115 100644
--- a/cpp/include/raft/random/detail/rng_impl.cuh
+++ b/cpp/include/raft/random/detail/rng_impl.cuh
@@ -394,6 +394,9 @@ DI void custom_next(GenType& gen,
   constexpr OutType one     = (OutType)1.0;
   constexpr OutType two     = (OutType)2.0;
   constexpr OutType oneHalf = (OutType)0.5;
+
+  // The <= comparison here means, number of samples going in `if` branch are more by 1 than `else`
+  // branch. However it does not matter as for 0.5 both branches evaluate to same result.
   if (res <= oneHalf) {
     out = params.mu + params.scale * raft::myLog(two * res);
   } else {

From e0244510a921394aaa0e36b3855dddeefe408897 Mon Sep 17 00:00:00 2001
From: Vinay D <vinayd@nvidia.com>
Date: Tue, 8 Mar 2022 18:48:38 +0530
Subject: [PATCH 23/23] Making integer to float/double conversion explicit

---
 cpp/include/raft/random/detail/rng_impl.cuh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/include/raft/random/detail/rng_impl.cuh b/cpp/include/raft/random/detail/rng_impl.cuh
index 42ca908115..1b245ca45f 100644
--- a/cpp/include/raft/random/detail/rng_impl.cuh
+++ b/cpp/include/raft/random/detail/rng_impl.cuh
@@ -487,7 +487,7 @@ struct PhiloxGenerator {
   {
     float ret;
     uint32_t val = next_u32() >> 8;
-    ret          = static_cast<float>(val) / (1U << 24);
+    ret          = static_cast<float>(val) / float(uint32_t(1) << 24);
     return ret;
   }
 
@@ -495,7 +495,7 @@ struct PhiloxGenerator {
   {
     double ret;
     uint64_t val = next_u64() >> 11;
-    ret          = static_cast<double>(val) / (1LU << 53);
+    ret          = static_cast<double>(val) / double(uint64_t(1) << 53);
     return ret;
   }