rapidsai · rapids-bot · Mar 8, 2022 · Feb 17, 2022 · Feb 17, 2022 · Feb 17, 2022
@@ -294,7 +294,11 @@ DI void custom_next(
   GenType& gen, OutType* val, GumbelDistParams<OutType> params, LenType idx = 0, LenType stride = 0)
 {
   OutType res = 0;
-  gen.next(res);
+
+  do {
+    gen.next(res);
+  } while (res == OutType(1.0));
+
   *val = params.mu - params.beta * raft::myLog(-raft::myLog(res));
 }
 
@@ -334,7 +338,11 @@ DI void custom_next(GenType& gen,
                     LenType stride = 0)
 {
   OutType res;
-  gen.next(res);
+
+  do {
+    gen.next(res);
+  } while (res == OutType(1.0));
+
   constexpr OutType one = (OutType)1.0;
   *val                  = -raft::myLog(one - res) / params.lambda;
 }
@@ -347,7 +355,11 @@ DI void custom_next(GenType& gen,
                     LenType stride = 0)
 {
   OutType res;
-  gen.next(res);
+
+  do {
+    gen.next(res);
+  } while (res == OutType(1.0));
+
   constexpr OutType one = (OutType)1.0;
   constexpr OutType two = (OutType)2.0;
   *val                  = raft::mySqrt(-two * raft::myLog(one - res)) * params.sigma;
@@ -361,7 +373,11 @@ DI void custom_next(GenType& gen,
                     LenType stride = 0)
 {
   OutType res, out;
-  gen.next(res);
+
+  do {
+    gen.next(res);
+  } while (res == OutType(0.0) || res == OutType(1.0));
+
   constexpr OutType one     = (OutType)1.0;
   constexpr OutType two     = (OutType)2.0;
   constexpr OutType oneHalf = (OutType)0.5;

@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include <sys/timeb.h>
+
 #include "../test_utils.h"
 #include <cub/cub.cuh>
 #include <gtest/gtest.h>
@@ -58,28 +60,34 @@ __global__ void meanKernel(T* out, const T* data, int len)
 
 template <typename T>
 struct RngInputs {
-  T tolerance;
   int len;
-  // start, end: for uniform
-  // mean, sigma: for normal/lognormal
-  // mean, beta: for gumbel
-  // mean, scale: for logistic and laplace
-  // lambda: for exponential
-  // sigma: for rayleigh
+  // Meaning of 'start' and 'end' parameter for various distributions
+  //
+  //         Uniform   Normal/Log-Normal   Gumbel   Logistic   Laplace   Exponential   Rayleigh
+  // start    start          mean           mean     mean       mean       lambda       sigma
+  // end       end           sigma          beta     scale      scale      Unused       Unused
   T start, end;
   RandomType type;
   GeneratorType gtype;
-  unsigned long long int seed;
+  uint64_t seed;
 };
 
-template <typename T>
-::std::ostream& operator<<(::std::ostream& os, const RngInputs<T>& dims)
-{
-  return os;
-}
-
-#include <sys/timeb.h>
-#include <time.h>
+// In this test we generate pseudo-random values that follow various probability distributions such
+// as Normal, Laplace etc. To check the correctness of generated random variates we compute two
+// measures, mean and variance from the generated data. The computed values are matched against
+// their theoretically expected values for the corresponding distribution. The computed mean and
+// variance are statistical variables themselves and follow a Normal distribution. Which means,
+// there is 99+% chance that the computed values fall in the 3-sigma (standard deviation) interval
+// [theoretical_value - 3*sigma, theoretical_value + 3*sigma]. The values are practically
+// guaranteed to fall in the 4-sigma interval. Reference standard deviation of the computed
+// mean/variance distribution is calculated here
+// https://gist.github.com/vinaydes/cee04f50ff7e3365759603d39b7e079b Maximum standard deviation
+// observed here is ~1.5e-2, thus we use this as sigma in our test.
+// N O T E: Before adding any new test case below, make sure to calculate standard deviation for the
+// test parameters using above notebook.
+
+constexpr int NUM_SIGMA    = 4;
+constexpr double MAX_SIGMA = 1.5e-2;
 
 template <typename T>
 class RngTest : public ::testing::TestWithParam<RngInputs<T>> {
@@ -97,9 +105,6 @@ class RngTest : public ::testing::TestWithParam<RngInputs<T>> {
  protected:
   void SetUp() override
   {
-    // Tests are configured with their expected test-values sigma. For example,
-    // 4 x sigma indicates the test shouldn't fail 99.9% of the time.
-    num_sigma = 4;
     Rng r(params.seed, params.gtype);
     switch (params.type) {
       case RNG_Normal: r.normal(data.data(), params.len, params.start, params.end, stream); break;
@@ -176,118 +181,61 @@ class RngTest : public ::testing::TestWithParam<RngInputs<T>> {
   RngInputs<T> params;
   rmm::device_uvector<T> data, stats;
   T h_stats[2];  // mean, var
-  int num_sigma;
 };
 
-// The measured mean and standard deviation for each tested distribution are,
-// of course, statistical variables. Thus setting an appropriate testing
-// tolerance essentially requires one to set a probability of test failure. We
-// choose to set this at 3-4 x sigma, i.e., a 99.7-99.9% confidence interval so that
-// the test will indeed pass. In quick experiments (using the identical
-// distributions given by NumPy/SciPy), the measured standard deviation is the
-// variable with the greatest variance and so we determined the variance for
-// each distribution and number of samples (32*1024 or 8*1024). Below
-// are listed the standard deviation for these tests.
-
-// Distribution: StdDev 32*1024, StdDev 8*1024
-// Normal: 0.0055, 0.011
-// LogNormal: 0.05, 0.1
-// Uniform: 0.003, 0.005
-// Gumbel: 0.005, 0.01
-// Logistic: 0.005, 0.01
-// Exp: 0.008, 0.015
-// Rayleigh: 0.0125, 0.025
-// Laplace: 0.02, 0.04
-
-// We generally want 4 x sigma >= 99.9% chance of success
-
 typedef RngTest<float> RngTestF;
 const std::vector<RngInputs<float>> inputsf = {
-  {0.0055, 32 * 1024, 1.f, 1.f, RNG_Normal, GenPhilox, 1234ULL},
-  {0.011, 8 * 1024, 1.f, 1.f, RNG_Normal, GenPhilox, 1234ULL},
-  {0.05, 32 * 1024, 1.f, 1.f, RNG_LogNormal, GenPhilox, 1234ULL},
-  {0.1, 8 * 1024, 1.f, 1.f, RNG_LogNormal, GenPhilox, 1234ULL},
-  {0.003, 32 * 1024, -1.f, 1.f, RNG_Uniform, GenPhilox, 1234ULL},
-  {0.005, 8 * 1024, -1.f, 1.f, RNG_Uniform, GenPhilox, 1234ULL},
-  {0.005, 32 * 1024, 1.f, 1.f, RNG_Gumbel, GenPhilox, 1234ULL},
-  {0.01, 8 * 1024, 1.f, 1.f, RNG_Gumbel, GenPhilox, 1234ULL},
-  {0.005, 32 * 1024, 1.f, 1.f, RNG_Logistic, GenPhilox, 67632ULL},
-  {0.01, 8 * 1024, 1.f, 1.f, RNG_Logistic, GenPhilox, 1234ULL},
-  {0.008, 32 * 1024, 1.f, 1.f, RNG_Exp, GenPhilox, 1234ULL},
-  {0.015, 8 * 1024, 1.f, 1.f, RNG_Exp, GenPhilox, 1234ULL},
-  {0.0125, 32 * 1024, 1.f, 1.f, RNG_Rayleigh, GenPhilox, 1234ULL},
-  {0.025, 8 * 1024, 1.f, 1.f, RNG_Rayleigh, GenPhilox, 1234ULL},
-  {0.02, 32 * 1024, 1.f, 1.f, RNG_Laplace, GenPhilox, 1234ULL},
-  {0.04, 8 * 1024, 1.f, 1.f, RNG_Laplace, GenPhilox, 1234ULL},
-
-  {0.0055, 32 * 1024, 1.f, 1.f, RNG_Normal, GenPC, 1234ULL},
-  {0.011, 8 * 1024, 1.f, 1.f, RNG_Normal, GenPC, 1234ULL},
-  {0.05, 32 * 1024, 1.f, 1.f, RNG_LogNormal, GenPC, 1234ULL},
-  {0.1, 8 * 1024, 1.f, 1.f, RNG_LogNormal, GenPC, 1234ULL},
-  {0.003, 32 * 1024, -1.f, 1.f, RNG_Uniform, GenPC, 1234ULL},
-  {0.005, 8 * 1024, -1.f, 1.f, RNG_Uniform, GenPC, 1234ULL},
-  {0.005, 32 * 1024, 1.f, 1.f, RNG_Gumbel, GenPC, 1234ULL},
-  {0.01, 8 * 1024, 1.f, 1.f, RNG_Gumbel, GenPC, 1234ULL},
-  {0.005, 32 * 1024, 1.f, 1.f, RNG_Logistic, GenPC, 1234ULL},
-  {0.01, 8 * 1024, 1.f, 1.f, RNG_Logistic, GenPC, 1234ULL},
-  {0.008, 32 * 1024, 1.f, 1.f, RNG_Exp, GenPC, 1234ULL},
-  {0.015, 8 * 1024, 1.f, 1.f, RNG_Exp, GenPC, 1234ULL},
-  {0.0125, 32 * 1024, 1.f, 1.f, RNG_Rayleigh, GenPC, 1234ULL},
-  {0.025, 8 * 1024, 1.f, 1.f, RNG_Rayleigh, GenPC, 1234ULL},
-  {0.02, 32 * 1024, 1.f, 1.f, RNG_Laplace, GenPC, 1234ULL},
-  {0.04, 8 * 1024, 1.f, 1.f, RNG_Laplace, GenPC, 1234ULL}};
+  // Test with Philox
+  {1024 * 1024, 3.0f, 1.3f, RNG_Normal, GenPhilox, 1234ULL},
+  {1024 * 1024, 1.2f, 0.1f, RNG_LogNormal, GenPhilox, 1234ULL},
+  {1024 * 1024, 1.2f, 5.5f, RNG_Uniform, GenPhilox, 1234ULL},
+  {1024 * 1024, 0.1f, 1.3f, RNG_Gumbel, GenPhilox, 1234ULL},
+  {1024 * 1024, 1.6f, 0.0f, RNG_Exp, GenPhilox, 1234ULL},
+  {1024 * 1024, 1.6f, 0.0f, RNG_Rayleigh, GenPhilox, 1234ULL},
+  {1024 * 1024, 2.6f, 1.3f, RNG_Laplace, GenPhilox, 1234ULL},
+  // Test with PCG
+  {1024 * 1024, 3.0f, 1.3f, RNG_Normal, GenPC, 1234ULL},
+  {1024 * 1024, 1.2f, 0.1f, RNG_LogNormal, GenPC, 1234ULL},
+  {1024 * 1024, 1.2f, 5.5f, RNG_Uniform, GenPC, 1234ULL},
+  {1024 * 1024, 0.1f, 1.3f, RNG_Gumbel, GenPC, 1234ULL},
+  {1024 * 1024, 1.6f, 0.0f, RNG_Exp, GenPC, 1234ULL},
+  {1024 * 1024, 1.6f, 0.0f, RNG_Rayleigh, GenPC, 1234ULL},
+  {1024 * 1024, 2.6f, 1.3f, RNG_Laplace, GenPC, 1234ULL}};
 
 TEST_P(RngTestF, Result)
 {
   float meanvar[2];
   getExpectedMeanVar(meanvar);
-  ASSERT_TRUE(match(meanvar[0], h_stats[0], CompareApprox<float>(num_sigma * params.tolerance)));
-  ASSERT_TRUE(match(meanvar[1], h_stats[1], CompareApprox<float>(num_sigma * params.tolerance)));
+  ASSERT_TRUE(match(meanvar[0], h_stats[0], CompareApprox<float>(NUM_SIGMA * MAX_SIGMA)));
+  ASSERT_TRUE(match(meanvar[1], h_stats[1], CompareApprox<float>(NUM_SIGMA * MAX_SIGMA)));
 }
 INSTANTIATE_TEST_SUITE_P(RngTests, RngTestF, ::testing::ValuesIn(inputsf));
 
 typedef RngTest<double> RngTestD;
 const std::vector<RngInputs<double>> inputsd = {
-  {0.0055, 32 * 1024, 1.0, 1.0, RNG_Normal, GenPhilox, 1234ULL},
-  {0.011, 8 * 1024, 1.0, 1.0, RNG_Normal, GenPhilox, 1234ULL},
-  {0.05, 32 * 1024, 1.0, 1.0, RNG_LogNormal, GenPhilox, 1234ULL},
-  {0.1, 8 * 1024, 1.0, 1.0, RNG_LogNormal, GenPhilox, 1234ULL},
-  {0.003, 32 * 1024, -1.0, 1.0, RNG_Uniform, GenPhilox, 1234ULL},
-  {0.005, 8 * 1024, -1.0, 1.0, RNG_Uniform, GenPhilox, 1234ULL},
-  {0.005, 32 * 1024, 1.0, 1.0, RNG_Gumbel, GenPhilox, 1234ULL},
-  {0.01, 8 * 1024, 1.0, 1.0, RNG_Gumbel, GenPhilox, 1234ULL},
-  {0.005, 32 * 1024, 1.0, 1.0, RNG_Logistic, GenPhilox, 67632ULL},
-  {0.01, 8 * 1024, 1.0, 1.0, RNG_Logistic, GenPhilox, 1234ULL},
-  {0.008, 32 * 1024, 1.0, 1.0, RNG_Exp, GenPhilox, 1234ULL},
-  {0.015, 8 * 1024, 1.0, 1.0, RNG_Exp, GenPhilox, 1234ULL},
-  {0.0125, 32 * 1024, 1.0, 1.0, RNG_Rayleigh, GenPhilox, 1234ULL},
-  {0.025, 8 * 1024, 1.0, 1.0, RNG_Rayleigh, GenPhilox, 1234ULL},
-  {0.02, 32 * 1024, 1.0, 1.0, RNG_Laplace, GenPhilox, 1234ULL},
-  {0.04, 8 * 1024, 1.0, 1.0, RNG_Laplace, GenPhilox, 1234ULL},
-
-  {0.0055, 32 * 1024, 1.0, 1.0, RNG_Normal, GenPC, 1234ULL},
-  {0.011, 8 * 1024, 1.0, 1.0, RNG_Normal, GenPC, 1234ULL},
-  {0.05, 32 * 1024, 1.0, 1.0, RNG_LogNormal, GenPC, 1234ULL},
-  {0.1, 8 * 1024, 1.0, 1.0, RNG_LogNormal, GenPC, 1234ULL},
-  {0.003, 32 * 1024, -1.0, 1.0, RNG_Uniform, GenPC, 1234ULL},
-  {0.005, 8 * 1024, -1.0, 1.0, RNG_Uniform, GenPC, 1234ULL},
-  {0.005, 32 * 1024, 1.0, 1.0, RNG_Gumbel, GenPC, 1234ULL},
-  {0.01, 8 * 1024, 1.0, 1.0, RNG_Gumbel, GenPC, 1234ULL},
-  {0.005, 32 * 1024, 1.0, 1.0, RNG_Logistic, GenPC, 1234ULL},
-  {0.01, 8 * 1024, 1.0, 1.0, RNG_Logistic, GenPC, 1234ULL},
-  {0.008, 32 * 1024, 1.0, 1.0, RNG_Exp, GenPC, 1234ULL},
-  {0.015, 8 * 1024, 1.0, 1.0, RNG_Exp, GenPC, 1234ULL},
-  {0.0125, 32 * 1024, 1.0, 1.0, RNG_Rayleigh, GenPC, 1234ULL},
-  {0.025, 8 * 1024, 1.0, 1.0, RNG_Rayleigh, GenPC, 1234ULL},
-  {0.02, 32 * 1024, 1.0, 1.0, RNG_Laplace, GenPC, 1234ULL},
-  {0.04, 8 * 1024, 1.0, 1.0, RNG_Laplace, GenPC, 1234ULL}};
+  // Test with Philox
+  {1024 * 1024, 3.0f, 1.3f, RNG_Normal, GenPhilox, 1234ULL},
+  {1024 * 1024, 1.2f, 0.1f, RNG_LogNormal, GenPhilox, 1234ULL},
+  {1024 * 1024, 1.2f, 5.5f, RNG_Uniform, GenPhilox, 1234ULL},
+  {1024 * 1024, 0.1f, 1.3f, RNG_Gumbel, GenPhilox, 1234ULL},
+  {1024 * 1024, 1.6f, 0.0f, RNG_Exp, GenPhilox, 1234ULL},
+  {1024 * 1024, 1.6f, 0.0f, RNG_Rayleigh, GenPhilox, 1234ULL},
+  {1024 * 1024, 2.6f, 1.3f, RNG_Laplace, GenPhilox, 1234ULL},
+  // Test with PCG
+  {1024 * 1024, 3.0f, 1.3f, RNG_Normal, GenPC, 1234ULL},
+  {1024 * 1024, 1.2f, 0.1f, RNG_LogNormal, GenPC, 1234ULL},
+  {1024 * 1024, 1.2f, 5.5f, RNG_Uniform, GenPC, 1234ULL},
+  {1024 * 1024, 0.1f, 1.3f, RNG_Gumbel, GenPC, 1234ULL},
+  {1024 * 1024, 1.6f, 0.0f, RNG_Exp, GenPC, 1234ULL},
+  {1024 * 1024, 1.6f, 0.0f, RNG_Rayleigh, GenPC, 1234ULL},
+  {1024 * 1024, 2.6f, 1.3f, RNG_Laplace, GenPC, 1234ULL}};
 
 TEST_P(RngTestD, Result)
 {
   double meanvar[2];
   getExpectedMeanVar(meanvar);
-  ASSERT_TRUE(match(meanvar[0], h_stats[0], CompareApprox<double>(num_sigma * params.tolerance)));
-  ASSERT_TRUE(match(meanvar[1], h_stats[1], CompareApprox<double>(num_sigma * params.tolerance)));
+  ASSERT_TRUE(match(meanvar[0], h_stats[0], CompareApprox<double>(NUM_SIGMA * MAX_SIGMA)));
+  ASSERT_TRUE(match(meanvar[1], h_stats[1], CompareApprox<double>(NUM_SIGMA * MAX_SIGMA)));
 }
 INSTANTIATE_TEST_SUITE_P(RngTests, RngTestD, ::testing::ValuesIn(inputsd));