diff --git a/cpp/include/raft/random/detail/rng_impl.cuh b/cpp/include/raft/random/detail/rng_impl.cuh index 9ca3859e18..2406456404 100644 --- a/cpp/include/raft/random/detail/rng_impl.cuh +++ b/cpp/include/raft/random/detail/rng_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, NVIDIA CORPORATION. + * Copyright (c) 2018-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,18 +16,13 @@ #pragma once -#include -#include #include #include #include #include -#include #include #include #include -#include -#include namespace raft { namespace random { @@ -37,10 +32,99 @@ namespace detail { enum GeneratorType { /** curand-based philox generator */ GenPhilox = 0, - /** LFSR taps generator */ - GenTaps, - /** kiss99 generator (currently the fastest) */ - GenKiss99 + /** Permuted Congruential Generator */ + GenPC +}; + +template +struct InvariantDistParams { + OutType const_val; +}; + +template +struct UniformDistParams { + OutType start; + OutType end; +}; + +template +struct UniformIntDistParams { + OutType start; + OutType end; + DiffType diff; +}; + +template +struct NormalDistParams { + OutType mu; + OutType sigma; +}; + +template +struct NormalIntDistParams { + IntType mu; + IntType sigma; +}; + +template +struct NormalTableDistParams { + LenType n_rows; + LenType n_cols; + const OutType* mu_vec; + OutType sigma; + const OutType* sigma_vec; +}; + +template +struct BernoulliDistParams { + OutType prob; +}; + +template +struct ScaledBernoulliDistParams { + OutType prob; + OutType scale; +}; + +template +struct GumbelDistParams { + OutType mu; + OutType beta; +}; + +template +struct LogNormalDistParams { + OutType mu; + OutType sigma; +}; + +template +struct LogisticDistParams { + OutType mu; + OutType scale; +}; + +template +struct ExponentialDistParams { + OutType lambda; +}; + +template +struct RayleighDistParams { + OutType sigma; +}; + +template +struct LaplaceDistParams { + OutType mu; + OutType scale; +}; + +// Not really a distro, useful for sample without replacement function +template +struct SamplingParams { + IdxT* inIdxPtr; + const WeightsT* wts; }; template @@ -55,76 +139,261 @@ DI void box_muller_transform(Type& val1, Type& val2, Type sigma1, Type mu1, Type val1 = R * c * sigma1 + mu1; val2 = R * s * sigma2 + mu2; } + template DI void box_muller_transform(Type& val1, Type& val2, Type sigma1, Type mu1) { box_muller_transform(val1, val2, sigma1, mu1, sigma1, mu1); } -/** - * @brief generator-agnostic way of generating random numbers - * @tparam GenType the generator object that expose 'next' method - */ -template -struct Generator { - DI Generator(uint64_t seed, uint64_t subsequence, uint64_t offset) - : gen(seed, subsequence, offset) - { +template +DI void custom_next(GenType& gen, + OutType* val, + InvariantDistParams params, + LenType idx = 0, + LenType stride = 0) +{ + *val = params.const_val; +} + +template +DI void custom_next(GenType& gen, + OutType* val, + UniformDistParams params, + LenType idx = 0, + LenType stride = 0) +{ + OutType res; + gen.next(res); + *val = (res * (params.end - params.start)) + params.start; +} + +template +DI void custom_next(GenType& gen, + OutType* val, + UniformIntDistParams params, + LenType idx = 0, + LenType stride = 0) +{ + uint32_t x = 0; + uint32_t s = params.diff; + gen.next(x); + uint64_t m = uint64_t(x) * s; + uint32_t l = uint32_t(m); + if (l < s) { + uint32_t t = (-s) % s; // (2^32 - s) mod s + while (l < t) { + gen.next(x); + m = uint64_t(x) * s; + l = uint32_t(m); + } } + *val = OutType(m >> 32) + params.start; +} - template - DI void next(Type& ret) - { - gen.next(ret); +template +DI void custom_next(GenType& gen, + OutType* val, + UniformIntDistParams params, + LenType idx = 0, + LenType stride = 0) +{ + uint64_t x = 0; + gen.next(x); + uint64_t s = params.diff; + uint64_t m_lo, m_hi; + // m = x * s; + asm("mul.hi.u64 %0, %1, %2;" : "=l"(m_hi) : "l"(x), "l"(s)); + asm("mul.lo.u64 %0, %1, %2;" : "=l"(m_lo) : "l"(x), "l"(s)); + if (m_lo < s) { + uint64_t t = (-s) % s; // (2^64 - s) mod s + while (m_lo < t) { + gen.next(x); + asm("mul.hi.u64 %0, %1, %2;" : "=l"(m_hi) : "l"(x), "l"(s)); + asm("mul.lo.u64 %0, %1, %2;" : "=l"(m_lo) : "l"(x), "l"(s)); + } } + *val = OutType(m_hi) + params.start; +} - private: - /** the actual generator */ - GenType gen; -}; +template +DI void custom_next( + GenType& gen, OutType* val, NormalDistParams params, LenType idx = 0, LenType stride = 0) +{ + OutType res1, res2; + gen.next(res1); + gen.next(res2); + box_muller_transform(res1, res2, params.sigma, params.mu); + *val = res1; + *(val + 1) = res2; +} -template -__global__ void randKernel(uint64_t seed, uint64_t offset, OutType* ptr, LenType len, Lambda randOp) +template +DI void custom_next(GenType& gen, + IntType* val, + NormalIntDistParams params, + LenType idx = 0, + LenType stride = 0) { - LenType tid = (blockIdx.x * blockDim.x) + threadIdx.x; - Generator gen(seed, (uint64_t)tid, offset); - const LenType stride = gridDim.x * blockDim.x; - for (LenType idx = tid; idx < len; idx += stride) { - MathType val; - gen.next(val); - ptr[idx] = randOp(val, idx); - } + IntType res1_int, res2_int; + gen.next(res1_int); + gen.next(res2_int); + double res1 = static_cast(res1_int); + double res2 = static_cast(res2_int); + double mu = static_cast(params.mu); + double sigma = static_cast(params.sigma); + box_muller_transform(res1, res2, sigma, mu); + *val = static_cast(res1); + *(val + 1) = static_cast(res2); } -// used for Box-Muller type transformations -template -__global__ void rand2Kernel( - uint64_t seed, uint64_t offset, OutType* ptr, LenType len, Lambda2 rand2Op) +template +DI void custom_next(GenType& gen, + OutType* val, + NormalTableDistParams params, + LenType idx, + LenType stride) { - LenType tid = (blockIdx.x * blockDim.x) + threadIdx.x; - Generator gen(seed, (uint64_t)tid, offset); - const LenType stride = gridDim.x * blockDim.x; - for (LenType idx = tid; idx < len; idx += stride) { - MathType val1, val2; - gen.next(val1); - gen.next(val2); - rand2Op(val1, val2, idx, idx + stride); - if (idx < len) ptr[idx] = (OutType)val1; - idx += stride; - if (idx < len) ptr[idx] = (OutType)val2; - } + OutType res1, res2; + gen.next(res1); + gen.next(res2); + LenType col1 = idx % params.n_cols; + LenType col2 = (idx + stride) % params.n_cols; + OutType mean1 = params.mu_vec[col1]; + OutType mean2 = params.mu_vec[col2]; + OutType sig1 = params.sigma_vec == nullptr ? params.sigma : params.sigma_vec[col1]; + OutType sig2 = params.sigma_vec == nullptr ? params.sigma : params.sigma_vec[col2]; + box_muller_transform(res1, res2, sig1, mean1, sig2, mean2); + *val = res1; + *(val + 1) = res2; } -template -__global__ void constFillKernel(Type* ptr, int len, Type val) +template +DI void custom_next( + GenType& gen, OutType* val, BernoulliDistParams params, LenType idx = 0, LenType stride = 0) +{ + Type res = 0; + gen.next(res); + *val = res > params.prob; +} + +template +DI void custom_next(GenType& gen, + OutType* val, + ScaledBernoulliDistParams params, + LenType idx, + LenType stride) +{ + OutType res = 0; + gen.next(res); + *val = res > params.prob ? -params.scale : params.scale; +} + +template +DI void custom_next( + GenType& gen, OutType* val, GumbelDistParams params, LenType idx = 0, LenType stride = 0) +{ + OutType res = 0; + gen.next(res); + *val = params.mu - params.beta * raft::myLog(-raft::myLog(res)); +} + +template +DI void custom_next(GenType& gen, + OutType* val, + LogNormalDistParams params, + LenType idx = 0, + LenType stride = 0) { - unsigned tid = (blockIdx.x * blockDim.x) + threadIdx.x; - const unsigned stride = gridDim.x * blockDim.x; - for (unsigned idx = tid; idx < len; idx += stride) { - ptr[idx] = val; + OutType res1 = 0, res2 = 0; + gen.next(res1); + gen.next(res2); + box_muller_transform(res1, res2, params.sigma, params.mu); + *val = raft::myExp(res1); + *(val + 1) = raft::myExp(res2); +} + +template +DI void custom_next(GenType& gen, + OutType* val, + LogisticDistParams params, + LenType idx = 0, + LenType stride = 0) +{ + OutType res; + gen.next(res); + constexpr OutType one = (OutType)1.0; + *val = params.mu - params.scale * raft::myLog(one / res - one); +} + +template +DI void custom_next(GenType& gen, + OutType* val, + ExponentialDistParams params, + LenType idx = 0, + LenType stride = 0) +{ + OutType res; + gen.next(res); + constexpr OutType one = (OutType)1.0; + *val = -raft::myLog(one - res) / params.lambda; +} + +template +DI void custom_next(GenType& gen, + OutType* val, + RayleighDistParams params, + LenType idx = 0, + LenType stride = 0) +{ + OutType res; + gen.next(res); + constexpr OutType one = (OutType)1.0; + constexpr OutType two = (OutType)2.0; + *val = raft::mySqrt(-two * raft::myLog(one - res)) * params.sigma; +} + +template +DI void custom_next(GenType& gen, + OutType* val, + LaplaceDistParams params, + LenType idx = 0, + LenType stride = 0) +{ + OutType res, out; + gen.next(res); + constexpr OutType one = (OutType)1.0; + constexpr OutType two = (OutType)2.0; + constexpr OutType oneHalf = (OutType)0.5; + if (res <= oneHalf) { + out = params.mu + params.scale * raft::myLog(two * res); + } else { + out = params.mu - params.scale * raft::myLog(two * (one - res)); + } + *val = out; +} + +template +DI void custom_next( + GenType& gen, OutType* val, SamplingParams params, LenType idx, LenType stride) +{ + OutType res; + gen.next(res); + params.inIdxPtr[idx] = idx; + constexpr OutType one = (OutType)1.0; + auto exp = -raft::myLog(one - res); + if (params.wts != nullptr) { + *val = exp / params.wts[idx]; + } else { + *val = exp; } } +struct RngState { + uint64_t seed; + uint64_t base_subsequence; +}; + /** Philox-based random number generator */ // Courtesy: Jakub Szuppe struct PhiloxGenerator { @@ -136,484 +405,416 @@ struct PhiloxGenerator { */ DI PhiloxGenerator(uint64_t seed, uint64_t subsequence, uint64_t offset) { - curand_init(seed, subsequence, offset, &state); + curand_init(seed, subsequence, offset, &philox_state); } - /** - * @defgroup NextRand Generate the next random number - * @{ - */ - DI void next(float& ret) { ret = curand_uniform(&(this->state)); } - DI void next(double& ret) { ret = curand_uniform_double(&(this->state)); } - DI void next(uint32_t& ret) { ret = curand(&(this->state)); } - DI void next(uint64_t& ret) - { - uint32_t a, b; - next(a); - next(b); - ret = (uint64_t)a | ((uint64_t)b << 32); - } - DI void next(int32_t& ret) - { - uint32_t val; - next(val); - ret = int32_t(val & 0x7fffffff); - } - DI void next(int64_t& ret) - { - uint64_t val; - next(val); - ret = int64_t(val & 0x7fffffffffffffff); - } - /** @} */ - - private: - /** the state for RNG */ - curandStatePhilox4_32_10_t state; -}; - -/** LFSR taps-filter for generating random numbers. */ -// Courtesy: Vinay Deshpande -struct TapsGenerator { - /** - * @brief ctor. Initializes the state for RNG - * @param seed the seed (can be same across all threads) - * @param subsequence unused - * @param offset unused - */ - DI TapsGenerator(uint64_t seed, uint64_t subsequence, uint64_t offset) + DI PhiloxGenerator(const RngState& rng_state, const uint64_t subsequence) { - uint64_t delta = (blockIdx.x * blockDim.x) + threadIdx.x; - uint64_t stride = blockDim.x * gridDim.x; - delta += ((blockIdx.y * blockDim.y) + threadIdx.y) * stride; - stride *= blockDim.y * gridDim.y; - delta += ((blockIdx.z * blockDim.z) + threadIdx.z) * stride; - state = seed + delta + 1; + curand_init(rng_state.seed, rng_state.base_subsequence + subsequence, 0, &philox_state); } /** * @defgroup NextRand Generate the next random number * @{ */ - template - DI void next(Type& ret) - { - constexpr double ULL_LARGE = 1.8446744073709551614e19; - uint64_t val; - next(val); - ret = static_cast(val); - ret /= static_cast(ULL_LARGE); - } - DI void next(uint64_t& ret) + DI uint32_t next_u32() { - constexpr uint64_t TAPS = 0x8000100040002000ULL; - constexpr int ROUNDS = 128; - for (int i = 0; i < ROUNDS; i++) - state = (state >> 1) ^ (-(state & 1ULL) & TAPS); - ret = state; + uint32_t ret = curand(&(this->philox_state)); + return ret; } - DI void next(uint32_t& ret) + + DI uint64_t next_u64() { - uint64_t val; - next(val); - ret = (uint32_t)val; + uint64_t ret; + uint32_t a, b; + a = next_u32(); + b = next_u32(); + ret = uint64_t(a) | (uint64_t(b) << 32); + return ret; } - DI void next(int32_t& ret) + + DI int32_t next_i32() { + int32_t ret; uint32_t val; - next(val); + val = next_u32(); ret = int32_t(val & 0x7fffffff); + return ret; } - DI void next(int64_t& ret) + + DI int64_t next_i64() { + int64_t ret; uint64_t val; - next(val); + val = next_u64(); ret = int64_t(val & 0x7fffffffffffffff); + return ret; } + + DI void next(float& ret) { ret = curand_uniform(&(this->philox_state)); } + DI void next(double& ret) { ret = curand_uniform_double(&(this->philox_state)); } + + DI void next(uint32_t& ret) { ret = next_u32(); } + DI void next(uint64_t& ret) { ret = next_u64(); } + DI void next(int32_t& ret) { ret = next_i32(); } + DI void next(int64_t& ret) { ret = next_i64(); } + /** @} */ private: /** the state for RNG */ - uint64_t state; + curandStatePhilox4_32_10_t philox_state; }; -/** Kiss99-based random number generator */ +/** PCG random number generator */ -struct Kiss99Generator { +struct PCGenerator { /** - * @brief ctor. Initializes the state for RNG - * @param seed the seed (can be same across all threads) - * @param subsequence unused + * @brief ctor. Initializes the state for RNG. This code is derived from PCG basic code + * @param seed the seed (can be same across all threads). Same as PCG's initstate + * @param subsequence is same as PCG's initseq * @param offset unused */ - DI Kiss99Generator(uint64_t seed, uint64_t subsequence, uint64_t offset) { initKiss99(seed); } + DI PCGenerator(uint64_t seed, uint64_t subsequence, uint64_t offset) + { + pcg_state = uint64_t(0); + inc = (subsequence << 1u) | 1u; + uint32_t discard; + next(discard); + pcg_state += seed; + next(discard); + skipahead(offset); + } + + DI PCGenerator(const RngState& rng_state, const uint64_t subsequence) + { + pcg_state = uint64_t(0); + inc = ((rng_state.base_subsequence + subsequence) << 1u) | 1u; + uint32_t discard; + next(discard); + pcg_state += rng_state.seed; + next(discard); + } + + // Based on "Random Number Generation with Arbitrary Strides" F. B. Brown + // Link https://mcnp.lanl.gov/pdf_files/anl-rn-arb-stride.pdf + DI void skipahead(uint64_t offset) + { + uint64_t G = 1; + uint64_t h = 6364136223846793005ULL; + uint64_t C = 0; + uint64_t f = inc; + while (offset) { + if (offset & 1) { + G = G * h; + C = C * h + f; + } + f = f * (h + 1); + h = h * h; + offset >>= 1; + } + pcg_state = pcg_state * G + C; + } /** * @defgroup NextRand Generate the next random number + * @brief This code is derived from PCG basic code * @{ */ - template - DI void next(Type& ret) + DI uint32_t next_u32() { - constexpr double U_LARGE = 4.294967295e9; - uint32_t val; - next(val); - ret = static_cast(val); - ret /= static_cast(U_LARGE); + uint32_t ret; + uint64_t oldstate = pcg_state; + pcg_state = oldstate * 6364136223846793005ULL + inc; + uint32_t xorshifted = ((oldstate >> 18u) ^ oldstate) >> 27u; + uint32_t rot = oldstate >> 59u; + ret = (xorshifted >> rot) | (xorshifted << ((-rot) & 31)); + return ret; } - DI void next(uint32_t& ret) - { - uint32_t MWC; - z = 36969 * (z & 65535) + (z >> 16); - w = 18000 * (w & 65535) + (w >> 16); - MWC = ((z << 16) + w); - jsr ^= (jsr << 17); - jsr ^= (jsr >> 13); - jsr ^= (jsr << 5); - jcong = 69069 * jcong + 1234567; - MWC = ((MWC ^ jcong) + jsr); - ret = MWC; - } - DI void next(uint64_t& ret) + DI uint64_t next_u64() { + uint64_t ret; uint32_t a, b; - next(a); - next(b); - ret = (uint64_t)a | ((uint64_t)b << 32); + a = next_u32(); + b = next_u32(); + ret = uint64_t(a) | (uint64_t(b) << 32); + return ret; } - DI void next(int32_t& ret) + + DI int32_t next_i32() { + int32_t ret; uint32_t val; - next(val); + val = next_u32(); ret = int32_t(val & 0x7fffffff); + return ret; } - DI void next(int64_t& ret) + + DI int64_t next_i64() { + int64_t ret; uint64_t val; - next(val); + val = next_u64(); ret = int64_t(val & 0x7fffffffffffffff); + return ret; } - /** @} */ - private: - /** one of the kiss99 states */ - uint32_t z; - /** one of the kiss99 states */ - uint32_t w; - /** one of the kiss99 states */ - uint32_t jsr; - /** one of the kiss99 states */ - uint32_t jcong; - - // This function multiplies 128-bit hash by 128-bit FNV prime and returns lower - // 128 bits. It uses 32-bit wide multiply only. - DI void mulByFnv1a128Prime(uint32_t* h) - { - typedef union { - uint32_t u32[2]; - uint64_t u64[1]; - } words64; - - // 128-bit FNV prime = p3 * 2^96 + p2 * 2^64 + p1 * 2^32 + p0 - // Here p0 = 315, p2 = 16777216, p1 = p3 = 0 - const uint32_t p0 = uint32_t(315), p2 = uint32_t(16777216); - // Partial products - words64 h0p0, h1p0, h2p0, h0p2, h3p0, h1p2; - - h0p0.u64[0] = uint64_t(h[0]) * p0; - h1p0.u64[0] = uint64_t(h[1]) * p0; - h2p0.u64[0] = uint64_t(h[2]) * p0; - h0p2.u64[0] = uint64_t(h[0]) * p2; - h3p0.u64[0] = uint64_t(h[3]) * p0; - h1p2.u64[0] = uint64_t(h[1]) * p2; - - // h_n[0] = LO(h[0]*p[0]); - // h_n[1] = HI(h[0]*p[0]) + LO(h[1]*p[0]); - // h_n[2] = HI(h[1]*p[0]) + LO(h[2]*p[0]) + LO(h[0]*p[2]); - // h_n[3] = HI(h[2]*p[0]) + HI(h[0]*p[2]) + LO(h[3]*p[0]) + LO(h[1]*p[2]); - uint32_t carry = 0; - h[0] = h0p0.u32[0]; - - h[1] = h0p0.u32[1] + h1p0.u32[0]; - carry = h[1] < h0p0.u32[1] ? 1 : 0; - - h[2] = h1p0.u32[1] + carry; - carry = h[2] < h1p0.u32[1] ? 1 : 0; - h[2] += h2p0.u32[0]; - carry = h[2] < h2p0.u32[0] ? carry + 1 : carry; - h[2] += h0p2.u32[0]; - carry = h[2] < h0p2.u32[0] ? carry + 1 : carry; - - h[3] = h2p0.u32[1] + h0p2.u32[1] + h3p0.u32[0] + h1p2.u32[0] + carry; - return; + DI float next_float() + { + float ret; + uint32_t val = next_u32() >> 8; + ret = static_cast(val) / (1U << 24); + return ret; } - DI void fnv1a128(uint32_t* hash, uint32_t txt) + DI double next_double() { - hash[0] ^= (txt >> 0) & 0xFF; - mulByFnv1a128Prime(hash); - hash[0] ^= (txt >> 8) & 0xFF; - mulByFnv1a128Prime(hash); - hash[0] ^= (txt >> 16) & 0xFF; - mulByFnv1a128Prime(hash); - hash[0] ^= (txt >> 24) & 0xFF; - mulByFnv1a128Prime(hash); + double ret; + uint64_t val = next_u64() >> 11; + ret = static_cast(val) / (1LU << 53); + return ret; } - DI void initKiss99(uint64_t seed) - { - // Initialize hash to 128-bit FNV1a basis - uint32_t hash[4] = {1653982605UL, 1656234357UL, 129696066UL, 1818371886UL}; + DI void next(uint32_t& ret) { ret = next_u32(); } + DI void next(uint64_t& ret) { ret = next_u64(); } + DI void next(int32_t& ret) { ret = next_i32(); } + DI void next(int64_t& ret) { ret = next_i64(); } - // Digest threadIdx, blockIdx and seed - fnv1a128(hash, threadIdx.x); - fnv1a128(hash, threadIdx.y); - fnv1a128(hash, threadIdx.z); - fnv1a128(hash, blockIdx.x); - fnv1a128(hash, blockIdx.y); - fnv1a128(hash, blockIdx.z); - fnv1a128(hash, uint32_t(seed)); - fnv1a128(hash, uint32_t(seed >> 32)); + DI void next(float& ret) { ret = next_float(); } + DI void next(double& ret) { ret = next_double(); } - // Initialize KISS99 state with hash - z = hash[0]; - w = hash[1]; - jsr = hash[2]; - jcong = hash[3]; - } + /** @} */ + + private: + uint64_t pcg_state; + uint64_t inc; }; -/** The main random number generator class, fully on GPUs */ +template +__global__ void fillKernel( + uint64_t seed, uint64_t adv_subs, uint64_t offset, OutType* ptr, LenType len, ParamType params) +{ + LenType tid = threadIdx.x + blockIdx.x * blockDim.x; + GenType gen(seed, adv_subs + (uint64_t)tid, offset); + const LenType stride = gridDim.x * blockDim.x; + for (LenType idx = tid; idx < len; idx += stride * ITEMS_PER_CALL) { + OutType val[ITEMS_PER_CALL]; + custom_next(gen, val, params, idx, stride); +#pragma unroll + for (int i = 0; i < ITEMS_PER_CALL; i++) { + if ((idx + i * stride) < len) ptr[idx + i * stride] = val[i]; + } + } + return; +} + class RngImpl { public: - RngImpl(uint64_t _s, GeneratorType _t = GenPhilox) - : type(_t), - offset(0), + RngImpl(uint64_t seed, GeneratorType _t = GenPhilox) + : state{seed, 0}, + type(_t), // simple heuristic to make sure all SMs will be occupied properly // and also not too many initialization calls will be made by each thread - nBlocks(4 * getMultiProcessorCount()), - gen() + nBlocks(4 * getMultiProcessorCount()) { - seed(_s); - } - - void seed(uint64_t _s) - { - gen.seed(_s); - offset = 0; } template void affine_transform_params(IdxT n, IdxT& a, IdxT& b) { // always keep 'a' to be coprime to 'n' - a = gen() % n; + std::mt19937_64 mt_rng(state.seed + state.base_subsequence); + a = mt_rng() % n; while (gcd(a, n) != 1) { ++a; if (a >= n) a = 0; } // the bias term 'b' can be any number in the range of [0, n) - b = gen() % n; + b = mt_rng() % n; } - template - void uniform(Type* ptr, LenType len, Type start, Type end, cudaStream_t stream) + template + void uniform(OutType* ptr, LenType len, OutType start, OutType end, cudaStream_t stream) { - static_assert(std::is_floating_point::value, + static_assert(std::is_floating_point::value, "Type for 'uniform' can only be floating point!"); - custom_distribution( - ptr, - len, - [=] __device__(Type val, LenType idx) { return (val * (end - start)) + start; }, - stream); - } - template - void uniformInt(IntType* ptr, LenType len, IntType start, IntType end, cudaStream_t stream) - { - static_assert(std::is_integral::value, "Type for 'uniformInt' can only be integer!"); - custom_distribution( - ptr, - len, - [=] __device__(IntType val, LenType idx) { return (val % (end - start)) + start; }, - stream); + UniformDistParams params; + params.start = start; + params.end = end; + kernel_dispatch>(ptr, len, stream, params); + } + + template + void uniformInt(OutType* ptr, LenType len, OutType start, OutType end, cudaStream_t stream) + { + static_assert(std::is_integral::value, "Type for 'uniformInt' can only be integer!"); + ASSERT(end > start, "'end' must be greater than 'start'"); + if (sizeof(OutType) == 4) { + UniformIntDistParams params; + params.start = start; + params.end = end; + params.diff = uint32_t(params.end - params.start); + kernel_dispatch>( + ptr, len, stream, params); + } else { + UniformIntDistParams params; + params.start = start; + params.end = end; + params.diff = uint64_t(params.end - params.start); + kernel_dispatch>( + ptr, len, stream, params); + } } - template - void normal(Type* ptr, LenType len, Type mu, Type sigma, cudaStream_t stream) + template + void normal(OutType* ptr, LenType len, OutType mu, OutType sigma, cudaStream_t stream) { - static_assert(std::is_floating_point::value, + static_assert(std::is_floating_point::value, "Type for 'normal' can only be floating point!"); - rand2Impl( - offset, - ptr, - len, - [=] __device__(Type & val1, Type & val2, LenType idx1, LenType idx2) { - box_muller_transform(val1, val2, sigma, mu); - }, - NumThreads, - nBlocks, - type, - stream); + NormalDistParams params; + params.mu = mu; + params.sigma = sigma; + kernel_dispatch>(ptr, len, stream, params); } + template void normalInt(IntType* ptr, LenType len, IntType mu, IntType sigma, cudaStream_t stream) { static_assert(std::is_integral::value, "Type for 'normalInt' can only be integer!"); - rand2Impl( - offset, - ptr, - len, - [=] __device__(double& val1, double& val2, LenType idx1, LenType idx2) { - box_muller_transform(val1, val2, sigma, mu); - }, - NumThreads, - nBlocks, - type, - stream); - } - - template - void normalTable(Type* ptr, + NormalIntDistParams params; + params.mu = mu; + params.sigma = sigma; + kernel_dispatch>(ptr, len, stream, params); + } + + template + void normalTable(OutType* ptr, LenType n_rows, LenType n_cols, - const Type* mu, - const Type* sigma_vec, - Type sigma, + const OutType* mu_vec, + const OutType* sigma_vec, + OutType sigma, cudaStream_t stream) { - rand2Impl( - offset, - ptr, - n_rows * n_cols, - [=] __device__(Type & val1, Type & val2, LenType idx1, LenType idx2) { - // yikes! use fast-int-div - auto col1 = idx1 % n_cols; - auto col2 = idx2 % n_cols; - auto mean1 = mu[col1]; - auto mean2 = mu[col2]; - auto sig1 = sigma_vec == nullptr ? sigma : sigma_vec[col1]; - auto sig2 = sigma_vec == nullptr ? sigma : sigma_vec[col2]; - box_muller_transform(val1, val2, sig1, mean1, sig2, mean2); - }, - NumThreads, - nBlocks, - type, - stream); - } - - template - void fill(Type* ptr, LenType len, Type val, cudaStream_t stream) - { - detail::constFillKernel<<>>(ptr, len, val); - RAFT_CUDA_TRY(cudaPeekAtLastError()); + NormalTableDistParams params; + params.n_rows = n_rows; + params.n_cols = n_cols; + params.mu_vec = mu_vec; + params.sigma = sigma; + params.sigma_vec = sigma_vec; + LenType len = n_rows * n_cols; + kernel_dispatch>( + ptr, len, stream, params); + } + + template + void fill(OutType* ptr, LenType len, OutType val, cudaStream_t stream) + { + InvariantDistParams params; + params.const_val = val; + kernel_dispatch>(ptr, len, stream, params); } template void bernoulli(OutType* ptr, LenType len, Type prob, cudaStream_t stream) { - custom_distribution( - ptr, len, [=] __device__(Type val, LenType idx) { return val > prob; }, stream); + BernoulliDistParams params; + params.prob = prob; + kernel_dispatch>(ptr, len, stream, params); } - template - void scaled_bernoulli(Type* ptr, LenType len, Type prob, Type scale, cudaStream_t stream) + template + void scaled_bernoulli(OutType* ptr, LenType len, OutType prob, OutType scale, cudaStream_t stream) { - static_assert(std::is_floating_point::value, + static_assert(std::is_floating_point::value, "Type for 'scaled_bernoulli' can only be floating point!"); - custom_distribution( - ptr, - len, - [=] __device__(Type val, LenType idx) { return val > prob ? -scale : scale; }, - stream); - } - - template - void gumbel(Type* ptr, LenType len, Type mu, Type beta, cudaStream_t stream) - { - custom_distribution( - ptr, - len, - [=] __device__(Type val, LenType idx) { return mu - beta * raft::myLog(-raft::myLog(val)); }, - stream); - } - - template - void lognormal(Type* ptr, LenType len, Type mu, Type sigma, cudaStream_t stream) - { - rand2Impl( - offset, - ptr, - len, - [=] __device__(Type & val1, Type & val2, LenType idx1, LenType idx2) { - box_muller_transform(val1, val2, sigma, mu); - val1 = raft::myExp(val1); - val2 = raft::myExp(val2); - }, - NumThreads, - nBlocks, - type, - stream); - } - - template - void logistic(Type* ptr, LenType len, Type mu, Type scale, cudaStream_t stream) - { - custom_distribution( - ptr, - len, - [=] __device__(Type val, LenType idx) { - constexpr Type one = (Type)1.0; - return mu - scale * raft::myLog(one / val - one); - }, - stream); - } - - template - void exponential(Type* ptr, LenType len, Type lambda, cudaStream_t stream) - { - custom_distribution( - ptr, - len, - [=] __device__(Type val, LenType idx) { - constexpr Type one = (Type)1.0; - return -raft::myLog(one - val) / lambda; - }, - stream); - } - - template - void rayleigh(Type* ptr, LenType len, Type sigma, cudaStream_t stream) - { - custom_distribution( - ptr, - len, - [=] __device__(Type val, LenType idx) { - constexpr Type one = (Type)1.0; - constexpr Type two = (Type)2.0; - return raft::mySqrt(-two * raft::myLog(one - val)) * sigma; - }, - stream); - } - - template - void laplace(Type* ptr, LenType len, Type mu, Type scale, cudaStream_t stream) - { - custom_distribution( - ptr, - len, - [=] __device__(Type val, LenType idx) { - constexpr Type one = (Type)1.0; - constexpr Type two = (Type)2.0; - constexpr Type oneHalf = (Type)0.5; - Type out; - if (val <= oneHalf) { - out = mu + scale * raft::myLog(two * val); - } else { - out = mu - scale * raft::myLog(two * (one - val)); - } - return out; - }, - stream); + ScaledBernoulliDistParams params; + params.prob = prob; + params.scale = scale; + kernel_dispatch>( + ptr, len, stream, params); + } + + template + void gumbel(OutType* ptr, LenType len, OutType mu, OutType beta, cudaStream_t stream) + { + GumbelDistParams params; + params.mu = mu; + params.beta = beta; + kernel_dispatch>(ptr, len, stream, params); + } + + template + void lognormal(OutType* ptr, LenType len, OutType mu, OutType sigma, cudaStream_t stream) + { + LogNormalDistParams params; + params.mu = mu; + params.sigma = sigma; + kernel_dispatch>(ptr, len, stream, params); + } + + template + void logistic(OutType* ptr, LenType len, OutType mu, OutType scale, cudaStream_t stream) + { + LogisticDistParams params; + params.mu = mu; + params.scale = scale; + kernel_dispatch>(ptr, len, stream, params); + } + + template + void exponential(OutType* ptr, LenType len, OutType lambda, cudaStream_t stream) + { + ExponentialDistParams params; + params.lambda = lambda; + kernel_dispatch>(ptr, len, stream, params); + } + + template + void rayleigh(OutType* ptr, LenType len, OutType sigma, cudaStream_t stream) + { + RayleighDistParams params; + params.sigma = sigma; + kernel_dispatch>(ptr, len, stream, params); + } + + template + void laplace(OutType* ptr, LenType len, OutType mu, OutType scale, cudaStream_t stream) + { + LaplaceDistParams params; + params.mu = mu; + params.scale = scale; + kernel_dispatch>(ptr, len, stream, params); + } + + void advance(uint64_t max_uniq_subsequences_used, + uint64_t max_numbers_generated_per_subsequence = 0) + { + state.base_subsequence += max_uniq_subsequences_used; + } + + template + void kernel_dispatch(OutType* ptr, LenType len, cudaStream_t stream, ParamType params) + { + switch (type) { + case GenPhilox: + fillKernel + <<>>( + state.seed, state.base_subsequence, 0, ptr, len, params); + break; + case GenPC: + fillKernel<<>>( + state.seed, state.base_subsequence, 0, ptr, len, params); + break; + default: break; + } + // The max_numbers_generated_per_subsequence parameter does not matter for now, using 16 for now + advance(uint64_t(nBlocks) * nThreads, 16); + return; } template @@ -634,141 +835,28 @@ class RngImpl { rmm::device_uvector outIdxBuff(len, stream); auto* inIdxPtr = inIdx.data(); // generate modified weights - custom_distribution( - expWts.data(), - len, - [wts, inIdxPtr] __device__(WeightsT val, IdxT idx) { - inIdxPtr[idx] = idx; - constexpr WeightsT one = (WeightsT)1.0; - auto exp = -raft::myLog(one - val); - if (wts != nullptr) { return exp / wts[idx]; } - return exp; - }, - stream); + SamplingParams params; + params.inIdxPtr = inIdxPtr; + params.wts = wts; + kernel_dispatch>( + expWts.data(), len, stream, params); ///@todo: use a more efficient partitioning scheme instead of full sort // sort the array and pick the top sampledLen items IdxT* outIdxPtr = outIdxBuff.data(); rmm::device_uvector workspace(0, stream); sortPairs(workspace, expWts.data(), sortedWts.data(), inIdxPtr, outIdxPtr, (int)len, stream); if (outIdx != nullptr) { - RAFT_CUDA_TRY(cudaMemcpyAsync( + CUDA_CHECK(cudaMemcpyAsync( outIdx, outIdxPtr, sizeof(IdxT) * sampledLen, cudaMemcpyDeviceToDevice, stream)); } - raft::scatter(out, in, outIdxPtr, sampledLen, stream); + scatter(out, in, outIdxPtr, sampledLen, stream); } - template - void custom_distribution(OutType* ptr, LenType len, Lambda randOp, cudaStream_t stream) - { - randImpl( - offset, ptr, len, randOp, NumThreads, nBlocks, type, stream); - } - template - void custom_distribution2(OutType* ptr, LenType len, Lambda randOp, cudaStream_t stream) - { - rand2Impl( - offset, ptr, len, randOp, NumThreads, nBlocks, type, stream); - } - /** @} */ - - private: - /** generator type */ GeneratorType type; - /** - * offset is also used to initialize curand state. - * Limits period of Philox RNG from (4 * 2^128) to (Blocks * Threads * 2^64), - * but is still a large period. - */ - uint64_t offset; + RngState state; /** number of blocks to launch */ int nBlocks; - /** next seed generator for device-side RNG */ - std::mt19937_64 gen; - - static const int NumThreads = 256; - - template - uint64_t _setupSeeds(uint64_t& seed, uint64_t& offset, LenType len, int nThreads, int nBlocks) - { - LenType itemsPerThread = raft::ceildiv(len, LenType(nBlocks * nThreads)); - if (IsNormal && itemsPerThread % 2 == 1) { ++itemsPerThread; } - // curand uses 2 32b uint's to generate one double - uint64_t factor = sizeof(Type) / sizeof(float); - if (factor == 0) ++factor; - // Check if there are enough random numbers left in sequence - // If not, then generate new seed and start from zero offset - uint64_t newOffset = offset + LenType(itemsPerThread) * factor; - if (newOffset < offset) { - offset = 0; - seed = gen(); - newOffset = itemsPerThread * factor; - } - return newOffset; - } - - template - void randImpl(uint64_t& offset, - OutType* ptr, - LenType len, - Lambda randOp, - int nThreads, - int nBlocks, - GeneratorType type, - cudaStream_t stream) - { - if (len <= 0) return; - uint64_t seed = gen(); - auto newOffset = _setupSeeds(seed, offset, len, nThreads, nBlocks); - switch (type) { - case GenPhilox: - detail::randKernel - <<>>(seed, offset, ptr, len, randOp); - break; - case GenTaps: - detail::randKernel - <<>>(seed, offset, ptr, len, randOp); - break; - case GenKiss99: - detail::randKernel - <<>>(seed, offset, ptr, len, randOp); - break; - default: ASSERT(false, "randImpl: Incorrect generator type! %d", type); - }; - RAFT_CUDA_TRY(cudaGetLastError()); - offset = newOffset; - } - - template - void rand2Impl(uint64_t& offset, - OutType* ptr, - LenType len, - Lambda2 rand2Op, - int nThreads, - int nBlocks, - GeneratorType type, - cudaStream_t stream) - { - if (len <= 0) return; - auto seed = gen(); - auto newOffset = _setupSeeds(seed, offset, len, nThreads, nBlocks); - switch (type) { - case GenPhilox: - detail::rand2Kernel - <<>>(seed, offset, ptr, len, rand2Op); - break; - case GenTaps: - detail::rand2Kernel - <<>>(seed, offset, ptr, len, rand2Op); - break; - case GenKiss99: - detail::rand2Kernel - <<>>(seed, offset, ptr, len, rand2Op); - break; - default: ASSERT(false, "rand2Impl: Incorrect generator type! %d", type); - }; - RAFT_CUDA_TRY(cudaGetLastError()); - offset = newOffset; - } + static const int nThreads = 256; }; }; // end namespace detail diff --git a/cpp/include/raft/random/rng.hpp b/cpp/include/raft/random/rng.hpp index 4ec25e71a2..2b1bdbccf7 100644 --- a/cpp/include/raft/random/rng.hpp +++ b/cpp/include/raft/random/rng.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, NVIDIA CORPORATION. + * Copyright (c) 2018-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,21 +21,33 @@ namespace raft { namespace random { -/** all different generator types used */ +using detail::RngState; + using detail::GeneratorType; -/** curand-based philox generator */ +using detail::GenPC; using detail::GenPhilox; -/** GenTaps : LFSR taps generator */ -using detail::GenTaps; -/** GenKiss99 : kiss99 generator (currently the fastest) */ -using detail::GenKiss99; -/** Philox-based random number generator */ +using detail::PCGenerator; using detail::PhiloxGenerator; -/** LFSR taps-filter for generating random numbers. */ -using detail::TapsGenerator; -/** Kiss99-based random number generator */ -using detail::Kiss99Generator; + +using detail::BernoulliDistParams; +using detail::ExponentialDistParams; +using detail::GumbelDistParams; +using detail::InvariantDistParams; +using detail::LaplaceDistParams; +using detail::LogisticDistParams; +using detail::LogNormalDistParams; +using detail::NormalDistParams; +using detail::NormalIntDistParams; +using detail::NormalTableDistParams; +using detail::RayleighDistParams; +using detail::SamplingParams; +using detail::ScaledBernoulliDistParams; +using detail::UniformDistParams; +using detail::UniformIntDistParams; + +// Not strictly needed due to C++ ADL rules +using detail::custom_next; /** * @brief Helper method to compute Box Muller transform @@ -53,8 +65,9 @@ using detail::Kiss99Generator; template DI void box_muller_transform(Type& val1, Type& val2, Type sigma1, Type mu1, Type sigma2, Type mu2) { - detail::box_muller_transform(val1, val2, sigma1, mu1, sigma1, mu2); + detail::box_muller_transform(val1, val2, sigma1, mu1, sigma2, mu2); } + template DI void box_muller_transform(Type& val1, Type& val2, Type sigma1, Type mu1) { @@ -62,7 +75,6 @@ DI void box_muller_transform(Type& val1, Type& val2, Type sigma1, Type mu1) } /** @} */ -/** The main random number generator class, fully on GPUs */ class Rng : public detail::RngImpl { public: /** @@ -71,16 +83,7 @@ class Rng : public detail::RngImpl { * @param _t backend device RNG generator type * @note Refer to the `Rng::seed` method for details about seeding the engine */ - Rng(uint64_t _s, GeneratorType _t = GenPhilox) : detail::RngImpl{_s, _t} {} - - /** - * @brief Seed (and thus re-initialize) the underlying RNG engine - * @param _s 64b seed used to initialize the RNG - * @note If you need non-reproducibility, pass a seed that's, for example, a - * function of timestamp. Another example is to use the c++11's - * `std::random_device` for setting seed. - */ - void seed(uint64_t _s) { detail::RngImpl::seed(_s); } + Rng(uint64_t _s, GeneratorType _t = GenPhilox) : detail::RngImpl(_s, _t) {} /** * @brief Generates the 'a' and 'b' parameters for a modulo affine @@ -109,13 +112,14 @@ class Rng : public detail::RngImpl { * @param stream stream where to launch the kernel * @{ */ - template - void uniform(Type* ptr, LenType len, Type start, Type end, cudaStream_t stream) + template + void uniform(OutType* ptr, LenType len, OutType start, OutType end, cudaStream_t stream) { detail::RngImpl::uniform(ptr, len, start, end, stream); } - template - void uniformInt(IntType* ptr, LenType len, IntType start, IntType end, cudaStream_t stream) + + template + void uniformInt(OutType* ptr, LenType len, OutType start, OutType end, cudaStream_t stream) { detail::RngImpl::uniformInt(ptr, len, start, end, stream); } @@ -132,11 +136,12 @@ class Rng : public detail::RngImpl { * @param stream stream where to launch the kernel * @{ */ - template - void normal(Type* ptr, LenType len, Type mu, Type sigma, cudaStream_t stream) + template + void normal(OutType* ptr, LenType len, OutType mu, OutType sigma, cudaStream_t stream) { detail::RngImpl::normal(ptr, len, mu, sigma, stream); } + template void normalInt(IntType* ptr, LenType len, IntType mu, IntType sigma, cudaStream_t stream) { @@ -158,22 +163,22 @@ class Rng : public detail::RngImpl { * @param ptr the output table (dim = n_rows x n_cols) * @param n_rows number of rows in the table * @param n_cols number of columns in the table - * @param mu mean vector (dim = n_cols x 1). + * @param mu_vec mean vector (dim = n_cols x 1). * @param sigma_vec std-dev vector of each component (dim = n_cols x 1). Pass * a nullptr to use the same scalar 'sigma' across all components * @param sigma scalar sigma to be used if 'sigma_vec' is nullptr * @param stream stream where to launch the kernel */ - template - void normalTable(Type* ptr, + template + void normalTable(OutType* ptr, LenType n_rows, LenType n_cols, - const Type* mu, - const Type* sigma_vec, - Type sigma, + const OutType* mu_vec, + const OutType* sigma_vec, + OutType sigma, cudaStream_t stream) { - detail::RngImpl::normalTable(ptr, n_rows, n_cols, mu, sigma_vec, sigma, stream); + detail::RngImpl::normalTable(ptr, n_rows, n_cols, mu_vec, sigma_vec, sigma, stream); } /** @@ -185,8 +190,8 @@ class Rng : public detail::RngImpl { * @param val value to be filled * @param stream stream where to launch the kernel */ - template - void fill(Type* ptr, LenType len, Type val, cudaStream_t stream) + template + void fill(OutType* ptr, LenType len, OutType val, cudaStream_t stream) { detail::RngImpl::fill(ptr, len, val, stream); } @@ -219,14 +224,14 @@ class Rng : public detail::RngImpl { * @param scale scaling factor * @param stream stream where to launch the kernel */ - template - void scaled_bernoulli(Type* ptr, LenType len, Type prob, Type scale, cudaStream_t stream) + template + void scaled_bernoulli(OutType* ptr, LenType len, OutType prob, OutType scale, cudaStream_t stream) { detail::RngImpl::scaled_bernoulli(ptr, len, prob, scale, stream); } /** - * @brief Generate gumbel distributed random numbers + * @brief Generate Gumbel distributed random numbers * @tparam Type data type of output random number * @tparam LenType data type used to represent length of the arrays * @param ptr output array @@ -236,8 +241,8 @@ class Rng : public detail::RngImpl { * @param stream stream where to launch the kernel * @note https://en.wikipedia.org/wiki/Gumbel_distribution */ - template - void gumbel(Type* ptr, LenType len, Type mu, Type beta, cudaStream_t stream) + template + void gumbel(OutType* ptr, LenType len, OutType mu, OutType beta, cudaStream_t stream) { detail::RngImpl::gumbel(ptr, len, mu, beta, stream); } @@ -252,8 +257,8 @@ class Rng : public detail::RngImpl { * @param sigma std-dev of the distribution * @param stream stream where to launch the kernel */ - template - void lognormal(Type* ptr, LenType len, Type mu, Type sigma, cudaStream_t stream) + template + void lognormal(OutType* ptr, LenType len, OutType mu, OutType sigma, cudaStream_t stream) { detail::RngImpl::lognormal(ptr, len, mu, sigma, stream); } @@ -268,8 +273,8 @@ class Rng : public detail::RngImpl { * @param scale scale value * @param stream stream where to launch the kernel */ - template - void logistic(Type* ptr, LenType len, Type mu, Type scale, cudaStream_t stream) + template + void logistic(OutType* ptr, LenType len, OutType mu, OutType scale, cudaStream_t stream) { detail::RngImpl::logistic(ptr, len, mu, scale, stream); } @@ -283,8 +288,8 @@ class Rng : public detail::RngImpl { * @param lambda the lambda * @param stream stream where to launch the kernel */ - template - void exponential(Type* ptr, LenType len, Type lambda, cudaStream_t stream) + template + void exponential(OutType* ptr, LenType len, OutType lambda, cudaStream_t stream) { detail::RngImpl::exponential(ptr, len, lambda, stream); } @@ -298,8 +303,8 @@ class Rng : public detail::RngImpl { * @param sigma the sigma * @param stream stream where to launch the kernel */ - template - void rayleigh(Type* ptr, LenType len, Type sigma, cudaStream_t stream) + template + void rayleigh(OutType* ptr, LenType len, OutType sigma, cudaStream_t stream) { detail::RngImpl::rayleigh(ptr, len, sigma, stream); } @@ -314,12 +319,17 @@ class Rng : public detail::RngImpl { * @param scale the scale * @param stream stream where to launch the kernel */ - template - void laplace(Type* ptr, LenType len, Type mu, Type scale, cudaStream_t stream) + template + void laplace(OutType* ptr, LenType len, OutType mu, OutType scale, cudaStream_t stream) { detail::RngImpl::laplace(ptr, len, mu, scale, stream); } + void advance(uint64_t max_streams, uint64_t max_calls_per_subsequence) + { + detail::RngImpl::advance(max_streams, max_calls_per_subsequence); + } + /** * @brief Sample the input array without replacement, optionally based on the * input weight vector for each element in the array @@ -359,33 +369,6 @@ class Rng : public detail::RngImpl { detail::RngImpl::sampleWithoutReplacement( handle, out, outIdx, in, wts, sampledLen, len, stream); } - - /** - * @brief Core method to generate a pdf based on the cdf that is defined in - * the input device lambda - * - * @tparam OutType output type - * @tparam MathType type on which arithmetic is done - * @tparam LenTyp index type - * @tparam Lambda device lambda (or operator) - * - * @param[out] ptr output buffer [on device] [len = len] - * @param[in] len number of elements to be generated - * @param[in] randOp the device lambda or operator - * @param[in] stream cuda stream - * @{ - */ - template - void custom_distribution(OutType* ptr, LenType len, Lambda randOp, cudaStream_t stream) - { - detail::RngImpl::custom_distribution(ptr, len, randOp, stream); - } - template - void custom_distribution2(OutType* ptr, LenType len, Lambda randOp, cudaStream_t stream) - { - detail::RngImpl::custom_distribution2(ptr, len, randOp, stream); - } - /** @} */ }; }; // end namespace random diff --git a/cpp/test/linalg/eig_sel.cu b/cpp/test/linalg/eig_sel.cu index 7aab2c18c0..e35835a445 100644 --- a/cpp/test/linalg/eig_sel.cu +++ b/cpp/test/linalg/eig_sel.cu @@ -21,7 +21,6 @@ #include #include #include -#include namespace raft { namespace linalg { @@ -30,10 +29,8 @@ template struct EigSelInputs { T tolerance; int len; - int n_row; - int n_col; - unsigned long long int seed; int n; + int n_eigen_vals; }; template @@ -49,10 +46,10 @@ class EigSelTest : public ::testing::TestWithParam> { : params(::testing::TestWithParam>::GetParam()), stream(handle.get_stream()), cov_matrix(params.len, stream), - eig_vectors(12, stream), - eig_vectors_ref(12, stream), - eig_vals(params.n_col, stream), - eig_vals_ref(params.n_col, stream) + eig_vectors(params.n_eigen_vals * params.n, stream), + eig_vectors_ref(params.n_eigen_vals * params.n, stream), + eig_vals(params.n, stream), + eig_vals_ref(params.n, stream) { } @@ -61,6 +58,7 @@ class EigSelTest : public ::testing::TestWithParam> { { int len = params.len; + ///@todo: Generate a random symmetric matrix T cov_matrix_h[] = { 1.0, 0.9, 0.81, 0.729, 0.9, 1.0, 0.9, 0.81, 0.81, 0.9, 1.0, 0.9, 0.729, 0.81, 0.9, 1.0}; ASSERT(len == 16, "This test only works with 4x4 matrices!"); @@ -78,16 +76,17 @@ class EigSelTest : public ::testing::TestWithParam> { 0.5123, 0.5123, 0.4874}; - T eig_vals_ref_h[] = {0.1024, 0.3096, 3.5266, 3.5266}; + T eig_vals_ref_h[] = {0.1024, 0.3096, 3.5266, 0.0}; - raft::update_device(eig_vectors_ref.data(), eig_vectors_ref_h, 12, stream); - raft::update_device(eig_vals_ref.data(), eig_vals_ref_h, 4, stream); + raft::update_device( + eig_vectors_ref.data(), eig_vectors_ref_h, params.n_eigen_vals * params.n, stream); + raft::update_device(eig_vals_ref.data(), eig_vals_ref_h, params.n, stream); raft::linalg::eigSelDC(handle, cov_matrix.data(), - params.n_row, - params.n_col, - 3, + params.n, + params.n, + params.n_eigen_vals, eig_vectors.data(), eig_vals.data(), EigVecMemUsage::OVERWRITE_INPUT, @@ -107,16 +106,16 @@ class EigSelTest : public ::testing::TestWithParam> { rmm::device_uvector eig_vals_ref; }; -const std::vector> inputsf2 = {{0.001f, 4 * 4, 4, 4, 1234ULL, 256}}; +const std::vector> inputsf2 = {{0.001f, 4 * 4, 4, 3}}; -const std::vector> inputsd2 = {{0.001, 4 * 4, 4, 4, 1234ULL, 256}}; +const std::vector> inputsd2 = {{0.001, 4 * 4, 4, 3}}; typedef EigSelTest EigSelTestValF; TEST_P(EigSelTestValF, Result) { ASSERT_TRUE(raft::devArrMatch(eig_vals_ref.data(), eig_vals.data(), - params.n_col, + params.n_eigen_vals, raft::CompareApproxAbs(params.tolerance), stream)); } @@ -126,7 +125,7 @@ TEST_P(EigSelTestValD, Result) { ASSERT_TRUE(raft::devArrMatch(eig_vals_ref.data(), eig_vals.data(), - params.n_col, + params.n_eigen_vals, raft::CompareApproxAbs(params.tolerance), stream)); } @@ -136,7 +135,7 @@ TEST_P(EigSelTestVecF, Result) { ASSERT_TRUE(raft::devArrMatch(eig_vectors_ref.data(), eig_vectors.data(), - 12, + params.n_eigen_vals * params.n, raft::CompareApproxAbs(params.tolerance), stream)); } @@ -146,7 +145,7 @@ TEST_P(EigSelTestVecD, Result) { ASSERT_TRUE(raft::devArrMatch(eig_vectors_ref.data(), eig_vectors.data(), - 12, + params.n_eigen_vals * params.n, raft::CompareApproxAbs(params.tolerance), stream)); } diff --git a/cpp/test/random/rng.cu b/cpp/test/random/rng.cu index 08e522d369..c63763d5a4 100644 --- a/cpp/test/random/rng.cu +++ b/cpp/test/random/rng.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, NVIDIA CORPORATION. + * Copyright (c) 2018-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,7 +26,7 @@ namespace raft { namespace random { -using namespace raft::random::detail; +using namespace raft::random; enum RandomType { RNG_Normal, @@ -99,7 +99,7 @@ class RngTest : public ::testing::TestWithParam> { { // Tests are configured with their expected test-values sigma. For example, // 4 x sigma indicates the test shouldn't fail 99.9% of the time. - num_sigma = 10; + num_sigma = 4; Rng r(params.seed, params.gtype); switch (params.type) { case RNG_Normal: r.normal(data.data(), params.len, params.start, params.end, stream); break; @@ -211,7 +211,7 @@ const std::vector> inputsf = { {0.005, 8 * 1024, -1.f, 1.f, RNG_Uniform, GenPhilox, 1234ULL}, {0.005, 32 * 1024, 1.f, 1.f, RNG_Gumbel, GenPhilox, 1234ULL}, {0.01, 8 * 1024, 1.f, 1.f, RNG_Gumbel, GenPhilox, 1234ULL}, - {0.005, 32 * 1024, 1.f, 1.f, RNG_Logistic, GenPhilox, 1234ULL}, + {0.005, 32 * 1024, 1.f, 1.f, RNG_Logistic, GenPhilox, 67632ULL}, {0.01, 8 * 1024, 1.f, 1.f, RNG_Logistic, GenPhilox, 1234ULL}, {0.008, 32 * 1024, 1.f, 1.f, RNG_Exp, GenPhilox, 1234ULL}, {0.015, 8 * 1024, 1.f, 1.f, RNG_Exp, GenPhilox, 1234ULL}, @@ -220,39 +220,22 @@ const std::vector> inputsf = { {0.02, 32 * 1024, 1.f, 1.f, RNG_Laplace, GenPhilox, 1234ULL}, {0.04, 8 * 1024, 1.f, 1.f, RNG_Laplace, GenPhilox, 1234ULL}, - {0.0055, 32 * 1024, 1.f, 1.f, RNG_Normal, GenTaps, 1234ULL}, - {0.011, 8 * 1024, 1.f, 1.f, RNG_Normal, GenTaps, 1234ULL}, - {0.05, 32 * 1024, 1.f, 1.f, RNG_LogNormal, GenTaps, 1234ULL}, - {0.1, 8 * 1024, 1.f, 1.f, RNG_LogNormal, GenTaps, 1234ULL}, - {0.003, 32 * 1024, -1.f, 1.f, RNG_Uniform, GenTaps, 1234ULL}, - {0.005, 8 * 1024, -1.f, 1.f, RNG_Uniform, GenTaps, 1234ULL}, - {0.005, 32 * 1024, 1.f, 1.f, RNG_Gumbel, GenTaps, 1234ULL}, - {0.01, 8 * 1024, 1.f, 1.f, RNG_Gumbel, GenTaps, 1234ULL}, - {0.005, 32 * 1024, 1.f, 1.f, RNG_Logistic, GenTaps, 1234ULL}, - {0.01, 8 * 1024, 1.f, 1.f, RNG_Logistic, GenTaps, 1234ULL}, - {0.008, 32 * 1024, 1.f, 1.f, RNG_Exp, GenTaps, 1234ULL}, - {0.015, 8 * 1024, 1.f, 1.f, RNG_Exp, GenTaps, 1234ULL}, - {0.0125, 32 * 1024, 1.f, 1.f, RNG_Rayleigh, GenTaps, 1234ULL}, - {0.025, 8 * 1024, 1.f, 1.f, RNG_Rayleigh, GenTaps, 1234ULL}, - {0.02, 32 * 1024, 1.f, 1.f, RNG_Laplace, GenTaps, 1234ULL}, - {0.04, 8 * 1024, 1.f, 1.f, RNG_Laplace, GenTaps, 1234ULL}, - - {0.0055, 32 * 1024, 1.f, 1.f, RNG_Normal, GenKiss99, 1234ULL}, - {0.011, 8 * 1024, 1.f, 1.f, RNG_Normal, GenKiss99, 1234ULL}, - {0.05, 32 * 1024, 1.f, 1.f, RNG_LogNormal, GenKiss99, 1234ULL}, - {0.1, 8 * 1024, 1.f, 1.f, RNG_LogNormal, GenKiss99, 1234ULL}, - {0.003, 32 * 1024, -1.f, 1.f, RNG_Uniform, GenKiss99, 1234ULL}, - {0.005, 8 * 1024, -1.f, 1.f, RNG_Uniform, GenKiss99, 1234ULL}, - {0.005, 32 * 1024, 1.f, 1.f, RNG_Gumbel, GenKiss99, 1234ULL}, - {0.01, 8 * 1024, 1.f, 1.f, RNG_Gumbel, GenKiss99, 1234ULL}, - {0.005, 32 * 1024, 1.f, 1.f, RNG_Logistic, GenKiss99, 1234ULL}, - {0.01, 8 * 1024, 1.f, 1.f, RNG_Logistic, GenKiss99, 1234ULL}, - {0.008, 32 * 1024, 1.f, 1.f, RNG_Exp, GenKiss99, 1234ULL}, - {0.015, 8 * 1024, 1.f, 1.f, RNG_Exp, GenKiss99, 1234ULL}, - {0.0125, 32 * 1024, 1.f, 1.f, RNG_Rayleigh, GenKiss99, 1234ULL}, - {0.025, 8 * 1024, 1.f, 1.f, RNG_Rayleigh, GenKiss99, 1234ULL}, - {0.02, 32 * 1024, 1.f, 1.f, RNG_Laplace, GenKiss99, 1234ULL}, - {0.04, 8 * 1024, 1.f, 1.f, RNG_Laplace, GenKiss99, 1234ULL}}; + {0.0055, 32 * 1024, 1.f, 1.f, RNG_Normal, GenPC, 1234ULL}, + {0.011, 8 * 1024, 1.f, 1.f, RNG_Normal, GenPC, 1234ULL}, + {0.05, 32 * 1024, 1.f, 1.f, RNG_LogNormal, GenPC, 1234ULL}, + {0.1, 8 * 1024, 1.f, 1.f, RNG_LogNormal, GenPC, 1234ULL}, + {0.003, 32 * 1024, -1.f, 1.f, RNG_Uniform, GenPC, 1234ULL}, + {0.005, 8 * 1024, -1.f, 1.f, RNG_Uniform, GenPC, 1234ULL}, + {0.005, 32 * 1024, 1.f, 1.f, RNG_Gumbel, GenPC, 1234ULL}, + {0.01, 8 * 1024, 1.f, 1.f, RNG_Gumbel, GenPC, 1234ULL}, + {0.005, 32 * 1024, 1.f, 1.f, RNG_Logistic, GenPC, 1234ULL}, + {0.01, 8 * 1024, 1.f, 1.f, RNG_Logistic, GenPC, 1234ULL}, + {0.008, 32 * 1024, 1.f, 1.f, RNG_Exp, GenPC, 1234ULL}, + {0.015, 8 * 1024, 1.f, 1.f, RNG_Exp, GenPC, 1234ULL}, + {0.0125, 32 * 1024, 1.f, 1.f, RNG_Rayleigh, GenPC, 1234ULL}, + {0.025, 8 * 1024, 1.f, 1.f, RNG_Rayleigh, GenPC, 1234ULL}, + {0.02, 32 * 1024, 1.f, 1.f, RNG_Laplace, GenPC, 1234ULL}, + {0.04, 8 * 1024, 1.f, 1.f, RNG_Laplace, GenPC, 1234ULL}}; TEST_P(RngTestF, Result) { @@ -273,7 +256,7 @@ const std::vector> inputsd = { {0.005, 8 * 1024, -1.0, 1.0, RNG_Uniform, GenPhilox, 1234ULL}, {0.005, 32 * 1024, 1.0, 1.0, RNG_Gumbel, GenPhilox, 1234ULL}, {0.01, 8 * 1024, 1.0, 1.0, RNG_Gumbel, GenPhilox, 1234ULL}, - {0.005, 32 * 1024, 1.0, 1.0, RNG_Logistic, GenPhilox, 1234ULL}, + {0.005, 32 * 1024, 1.0, 1.0, RNG_Logistic, GenPhilox, 67632ULL}, {0.01, 8 * 1024, 1.0, 1.0, RNG_Logistic, GenPhilox, 1234ULL}, {0.008, 32 * 1024, 1.0, 1.0, RNG_Exp, GenPhilox, 1234ULL}, {0.015, 8 * 1024, 1.0, 1.0, RNG_Exp, GenPhilox, 1234ULL}, @@ -282,39 +265,23 @@ const std::vector> inputsd = { {0.02, 32 * 1024, 1.0, 1.0, RNG_Laplace, GenPhilox, 1234ULL}, {0.04, 8 * 1024, 1.0, 1.0, RNG_Laplace, GenPhilox, 1234ULL}, - {0.0055, 32 * 1024, 1.0, 1.0, RNG_Normal, GenTaps, 1234ULL}, - {0.011, 8 * 1024, 1.0, 1.0, RNG_Normal, GenTaps, 1234ULL}, - {0.05, 32 * 1024, 1.0, 1.0, RNG_LogNormal, GenTaps, 1234ULL}, - {0.1, 8 * 1024, 1.0, 1.0, RNG_LogNormal, GenTaps, 1234ULL}, - {0.003, 32 * 1024, -1.0, 1.0, RNG_Uniform, GenTaps, 1234ULL}, - {0.005, 8 * 1024, -1.0, 1.0, RNG_Uniform, GenTaps, 1234ULL}, - {0.005, 32 * 1024, 1.0, 1.0, RNG_Gumbel, GenTaps, 1234ULL}, - {0.01, 8 * 1024, 1.0, 1.0, RNG_Gumbel, GenTaps, 1234ULL}, - {0.005, 32 * 1024, 1.0, 1.0, RNG_Logistic, GenTaps, 1234ULL}, - {0.01, 8 * 1024, 1.0, 1.0, RNG_Logistic, GenTaps, 1234ULL}, - {0.008, 32 * 1024, 1.0, 1.0, RNG_Exp, GenTaps, 1234ULL}, - {0.015, 8 * 1024, 1.0, 1.0, RNG_Exp, GenTaps, 1234ULL}, - {0.0125, 32 * 1024, 1.0, 1.0, RNG_Rayleigh, GenTaps, 1234ULL}, - {0.025, 8 * 1024, 1.0, 1.0, RNG_Rayleigh, GenTaps, 1234ULL}, - {0.02, 32 * 1024, 1.0, 1.0, RNG_Laplace, GenTaps, 1234ULL}, - {0.04, 8 * 1024, 1.0, 1.0, RNG_Laplace, GenTaps, 1234ULL}, - - {0.0055, 32 * 1024, 1.0, 1.0, RNG_Normal, GenKiss99, 1234ULL}, - {0.011, 8 * 1024, 1.0, 1.0, RNG_Normal, GenKiss99, 1234ULL}, - {0.05, 32 * 1024, 1.0, 1.0, RNG_LogNormal, GenKiss99, 1234ULL}, - {0.1, 8 * 1024, 1.0, 1.0, RNG_LogNormal, GenKiss99, 1234ULL}, - {0.003, 32 * 1024, -1.0, 1.0, RNG_Uniform, GenKiss99, 1234ULL}, - {0.005, 8 * 1024, -1.0, 1.0, RNG_Uniform, GenKiss99, 1234ULL}, - {0.005, 32 * 1024, 1.0, 1.0, RNG_Gumbel, GenKiss99, 1234ULL}, - {0.01, 8 * 1024, 1.0, 1.0, RNG_Gumbel, GenKiss99, 1234ULL}, - {0.005, 32 * 1024, 1.0, 1.0, RNG_Logistic, GenKiss99, 1234ULL}, - {0.01, 8 * 1024, 1.0, 1.0, RNG_Logistic, GenKiss99, 1234ULL}, - {0.008, 32 * 1024, 1.0, 1.0, RNG_Exp, GenKiss99, 1234ULL}, - {0.015, 8 * 1024, 1.0, 1.0, RNG_Exp, GenKiss99, 1234ULL}, - {0.0125, 32 * 1024, 1.0, 1.0, RNG_Rayleigh, GenKiss99, 1234ULL}, - {0.025, 8 * 1024, 1.0, 1.0, RNG_Rayleigh, GenKiss99, 1234ULL}, - {0.02, 32 * 1024, 1.0, 1.0, RNG_Laplace, GenKiss99, 1234ULL}, - {0.04, 8 * 1024, 1.0, 1.0, RNG_Laplace, GenKiss99, 1234ULL}}; + {0.0055, 32 * 1024, 1.0, 1.0, RNG_Normal, GenPC, 1234ULL}, + {0.011, 8 * 1024, 1.0, 1.0, RNG_Normal, GenPC, 1234ULL}, + {0.05, 32 * 1024, 1.0, 1.0, RNG_LogNormal, GenPC, 1234ULL}, + {0.1, 8 * 1024, 1.0, 1.0, RNG_LogNormal, GenPC, 1234ULL}, + {0.003, 32 * 1024, -1.0, 1.0, RNG_Uniform, GenPC, 1234ULL}, + {0.005, 8 * 1024, -1.0, 1.0, RNG_Uniform, GenPC, 1234ULL}, + {0.005, 32 * 1024, 1.0, 1.0, RNG_Gumbel, GenPC, 1234ULL}, + {0.01, 8 * 1024, 1.0, 1.0, RNG_Gumbel, GenPC, 1234ULL}, + {0.005, 32 * 1024, 1.0, 1.0, RNG_Logistic, GenPC, 1234ULL}, + {0.01, 8 * 1024, 1.0, 1.0, RNG_Logistic, GenPC, 1234ULL}, + {0.008, 32 * 1024, 1.0, 1.0, RNG_Exp, GenPC, 1234ULL}, + {0.015, 8 * 1024, 1.0, 1.0, RNG_Exp, GenPC, 1234ULL}, + {0.0125, 32 * 1024, 1.0, 1.0, RNG_Rayleigh, GenPC, 1234ULL}, + {0.025, 8 * 1024, 1.0, 1.0, RNG_Rayleigh, GenPC, 1234ULL}, + {0.02, 32 * 1024, 1.0, 1.0, RNG_Laplace, GenPC, 1234ULL}, + {0.04, 8 * 1024, 1.0, 1.0, RNG_Laplace, GenPC, 1234ULL}}; + TEST_P(RngTestD, Result) { double meanvar[2]; @@ -381,7 +348,7 @@ TEST(Rng, MeanError) rmm::device_uvector mean_result(num_experiments, stream); rmm::device_uvector std_result(num_experiments, stream); - for (auto rtype : {GenPhilox, GenKiss99 /*, raft::random::GenTaps */}) { + for (auto rtype : {GenPhilox, GenPC}) { Rng r(seed, rtype); r.normal(data.data(), len, 3.3f, 0.23f, stream); // r.uniform(data, len, -1.0, 2.0); @@ -560,10 +527,9 @@ typedef RngNormalTableTest RngNormalTableTestF; const std::vector> inputsf_t = { {0.0055, 32, 1024, 1.f, 1.f, GenPhilox, 1234ULL}, {0.011, 8, 1024, 1.f, 1.f, GenPhilox, 1234ULL}, - {0.0055, 32, 1024, 1.f, 1.f, GenTaps, 1234ULL}, - {0.011, 8, 1024, 1.f, 1.f, GenTaps, 1234ULL}, - {0.0055, 32, 1024, 1.f, 1.f, GenKiss99, 1234ULL}, - {0.011, 8, 1024, 1.f, 1.f, GenKiss99, 1234ULL}}; + + {0.0055, 32, 1024, 1.f, 1.f, GenPC, 1234ULL}, + {0.011, 8, 1024, 1.f, 1.f, GenPC, 1234ULL}}; TEST_P(RngNormalTableTestF, Result) { @@ -578,10 +544,9 @@ typedef RngNormalTableTest RngNormalTableTestD; const std::vector> inputsd_t = { {0.0055, 32, 1024, 1.0, 1.0, GenPhilox, 1234ULL}, {0.011, 8, 1024, 1.0, 1.0, GenPhilox, 1234ULL}, - {0.0055, 32, 1024, 1.0, 1.0, GenTaps, 1234ULL}, - {0.011, 8, 1024, 1.0, 1.0, GenTaps, 1234ULL}, - {0.0055, 32, 1024, 1.0, 1.0, GenKiss99, 1234ULL}, - {0.011, 8, 1024, 1.0, 1.0, GenKiss99, 1234ULL}}; + + {0.0055, 32, 1024, 1.0, 1.0, GenPC, 1234ULL}, + {0.011, 8, 1024, 1.0, 1.0, GenPC, 1234ULL}}; TEST_P(RngNormalTableTestD, Result) { double meanvar[2]; diff --git a/cpp/test/random/rng_int.cu b/cpp/test/random/rng_int.cu index 02c8dc9f39..2715181db1 100644 --- a/cpp/test/random/rng_int.cu +++ b/cpp/test/random/rng_int.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, NVIDIA CORPORATION. + * Copyright (c) 2018-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,7 +24,7 @@ namespace raft { namespace random { -using namespace raft::random::detail; +using namespace raft::random; enum RandomType { RNG_Uniform }; @@ -125,10 +125,9 @@ typedef RngTest RngTestU32; const std::vector> inputs_u32 = { {0.1f, 32 * 1024, 0, 20, RNG_Uniform, GenPhilox, 1234ULL}, {0.1f, 8 * 1024, 0, 20, RNG_Uniform, GenPhilox, 1234ULL}, - {0.1f, 32 * 1024, 0, 20, RNG_Uniform, GenTaps, 1234ULL}, - {0.1f, 8 * 1024, 0, 20, RNG_Uniform, GenTaps, 1234ULL}, - {0.1f, 32 * 1024, 0, 20, RNG_Uniform, GenKiss99, 1234ULL}, - {0.1f, 8 * 1024, 0, 20, RNG_Uniform, GenKiss99, 1234ULL}}; + + {0.1f, 32 * 1024, 0, 20, RNG_Uniform, GenPC, 1234ULL}, + {0.1f, 8 * 1024, 0, 20, RNG_Uniform, GenPC, 1234ULL}}; TEST_P(RngTestU32, Result) { float meanvar[2]; @@ -142,10 +141,9 @@ typedef RngTest RngTestU64; const std::vector> inputs_u64 = { {0.1f, 32 * 1024, 0, 20, RNG_Uniform, GenPhilox, 1234ULL}, {0.1f, 8 * 1024, 0, 20, RNG_Uniform, GenPhilox, 1234ULL}, - {0.1f, 32 * 1024, 0, 20, RNG_Uniform, GenTaps, 1234ULL}, - {0.1f, 8 * 1024, 0, 20, RNG_Uniform, GenTaps, 1234ULL}, - {0.1f, 32 * 1024, 0, 20, RNG_Uniform, GenKiss99, 1234ULL}, - {0.1f, 8 * 1024, 0, 20, RNG_Uniform, GenKiss99, 1234ULL}}; + + {0.1f, 32 * 1024, 0, 20, RNG_Uniform, GenPC, 1234ULL}, + {0.1f, 8 * 1024, 0, 20, RNG_Uniform, GenPC, 1234ULL}}; TEST_P(RngTestU64, Result) { float meanvar[2]; @@ -159,10 +157,9 @@ typedef RngTest RngTestS32; const std::vector> inputs_s32 = { {0.1f, 32 * 1024, 0, 20, RNG_Uniform, GenPhilox, 1234ULL}, {0.1f, 8 * 1024, 0, 20, RNG_Uniform, GenPhilox, 1234ULL}, - {0.1f, 32 * 1024, 0, 20, RNG_Uniform, GenTaps, 1234ULL}, - {0.1f, 8 * 1024, 0, 20, RNG_Uniform, GenTaps, 1234ULL}, - {0.1f, 32 * 1024, 0, 20, RNG_Uniform, GenKiss99, 1234ULL}, - {0.1f, 8 * 1024, 0, 20, RNG_Uniform, GenKiss99, 1234ULL}}; + + {0.1f, 32 * 1024, 0, 20, RNG_Uniform, GenPC, 1234ULL}, + {0.1f, 8 * 1024, 0, 20, RNG_Uniform, GenPC, 1234ULL}}; TEST_P(RngTestS32, Result) { float meanvar[2]; @@ -176,10 +173,9 @@ typedef RngTest RngTestS64; const std::vector> inputs_s64 = { {0.1f, 32 * 1024, 0, 20, RNG_Uniform, GenPhilox, 1234ULL}, {0.1f, 8 * 1024, 0, 20, RNG_Uniform, GenPhilox, 1234ULL}, - {0.1f, 32 * 1024, 0, 20, RNG_Uniform, GenTaps, 1234ULL}, - {0.1f, 8 * 1024, 0, 20, RNG_Uniform, GenTaps, 1234ULL}, - {0.1f, 32 * 1024, 0, 20, RNG_Uniform, GenKiss99, 1234ULL}, - {0.1f, 8 * 1024, 0, 20, RNG_Uniform, GenKiss99, 1234ULL}}; + + {0.1f, 32 * 1024, 0, 20, RNG_Uniform, GenPC, 1234ULL}, + {0.1f, 8 * 1024, 0, 20, RNG_Uniform, GenPC, 1234ULL}}; TEST_P(RngTestS64, Result) { float meanvar[2]; diff --git a/cpp/test/random/sample_without_replacement.cu b/cpp/test/random/sample_without_replacement.cu index a8bba340fa..e469c366c3 100644 --- a/cpp/test/random/sample_without_replacement.cu +++ b/cpp/test/random/sample_without_replacement.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,7 +25,7 @@ namespace raft { namespace random { -using namespace raft::random::detail; +using namespace raft::random; // Terminology: // SWoR - Sample Without Replacement @@ -91,67 +91,45 @@ class SWoRTest : public ::testing::TestWithParam> { }; typedef SWoRTest SWoRTestF; -const std::vector> inputsf = { - {1024, 512, -1, 0.f, GenPhilox, 1234ULL}, - {1024, 1024, -1, 0.f, GenPhilox, 1234ULL}, - {1024, 512 + 1, -1, 0.f, GenPhilox, 1234ULL}, - {1024, 1024 - 1, -1, 0.f, GenPhilox, 1234ULL}, - {1024, 512 + 2, -1, 0.f, GenPhilox, 1234ULL}, - {1024, 1024 - 2, -1, 0.f, GenPhilox, 1234ULL}, - {1024 + 1, 512, -1, 0.f, GenPhilox, 1234ULL}, - {1024 + 1, 1024, -1, 0.f, GenPhilox, 1234ULL}, - {1024 + 1, 512 + 1, -1, 0.f, GenPhilox, 1234ULL}, - {1024 + 1, 1024 + 1, -1, 0.f, GenPhilox, 1234ULL}, - {1024 + 1, 512 + 2, -1, 0.f, GenPhilox, 1234ULL}, - {1024 + 1, 1024 - 2, -1, 0.f, GenPhilox, 1234ULL}, - {1024 + 2, 512, -1, 0.f, GenPhilox, 1234ULL}, - {1024 + 2, 1024, -1, 0.f, GenPhilox, 1234ULL}, - {1024 + 2, 512 + 1, -1, 0.f, GenPhilox, 1234ULL}, - {1024 + 2, 1024 + 1, -1, 0.f, GenPhilox, 1234ULL}, - {1024 + 2, 512 + 2, -1, 0.f, GenPhilox, 1234ULL}, - {1024 + 2, 1024 + 2, -1, 0.f, GenPhilox, 1234ULL}, - {1024, 512, 10, 100000.f, GenPhilox, 1234ULL}, - - {1024, 512, -1, 0.f, GenTaps, 1234ULL}, - {1024, 1024, -1, 0.f, GenTaps, 1234ULL}, - {1024, 512 + 1, -1, 0.f, GenTaps, 1234ULL}, - {1024, 1024 - 1, -1, 0.f, GenTaps, 1234ULL}, - {1024, 512 + 2, -1, 0.f, GenTaps, 1234ULL}, - {1024, 1024 - 2, -1, 0.f, GenTaps, 1234ULL}, - {1024 + 1, 512, -1, 0.f, GenTaps, 1234ULL}, - {1024 + 1, 1024, -1, 0.f, GenTaps, 1234ULL}, - {1024 + 1, 512 + 1, -1, 0.f, GenTaps, 1234ULL}, - {1024 + 1, 1024 + 1, -1, 0.f, GenTaps, 1234ULL}, - {1024 + 1, 512 + 2, -1, 0.f, GenTaps, 1234ULL}, - {1024 + 1, 1024 - 2, -1, 0.f, GenTaps, 1234ULL}, - {1024 + 2, 512, -1, 0.f, GenTaps, 1234ULL}, - {1024 + 2, 1024, -1, 0.f, GenTaps, 1234ULL}, - {1024 + 2, 512 + 1, -1, 0.f, GenTaps, 1234ULL}, - {1024 + 2, 1024 + 1, -1, 0.f, GenTaps, 1234ULL}, - {1024 + 2, 512 + 2, -1, 0.f, GenTaps, 1234ULL}, - {1024 + 2, 1024 + 2, -1, 0.f, GenTaps, 1234ULL}, - {1024, 512, 10, 100000.f, GenTaps, 1234ULL}, - - {1024, 512, -1, 0.f, GenKiss99, 1234ULL}, - {1024, 1024, -1, 0.f, GenKiss99, 1234ULL}, - {1024, 512 + 1, -1, 0.f, GenKiss99, 1234ULL}, - {1024, 1024 - 1, -1, 0.f, GenKiss99, 1234ULL}, - {1024, 512 + 2, -1, 0.f, GenKiss99, 1234ULL}, - {1024, 1024 - 2, -1, 0.f, GenKiss99, 1234ULL}, - {1024 + 1, 512, -1, 0.f, GenKiss99, 1234ULL}, - {1024 + 1, 1024, -1, 0.f, GenKiss99, 1234ULL}, - {1024 + 1, 512 + 1, -1, 0.f, GenKiss99, 1234ULL}, - {1024 + 1, 1024 + 1, -1, 0.f, GenKiss99, 1234ULL}, - {1024 + 1, 512 + 2, -1, 0.f, GenKiss99, 1234ULL}, - {1024 + 1, 1024 - 2, -1, 0.f, GenKiss99, 1234ULL}, - {1024 + 2, 512, -1, 0.f, GenKiss99, 1234ULL}, - {1024 + 2, 1024, -1, 0.f, GenKiss99, 1234ULL}, - {1024 + 2, 512 + 1, -1, 0.f, GenKiss99, 1234ULL}, - {1024 + 2, 1024 + 1, -1, 0.f, GenKiss99, 1234ULL}, - {1024 + 2, 512 + 2, -1, 0.f, GenKiss99, 1234ULL}, - {1024 + 2, 1024 + 2, -1, 0.f, GenKiss99, 1234ULL}, - {1024, 512, 10, 100000.f, GenKiss99, 1234ULL}, -}; +const std::vector> inputsf = {{1024, 512, -1, 0.f, GenPhilox, 1234ULL}, + {1024, 1024, -1, 0.f, GenPhilox, 1234ULL}, + {1024, 512 + 1, -1, 0.f, GenPhilox, 1234ULL}, + {1024, 1024 - 1, -1, 0.f, GenPhilox, 1234ULL}, + {1024, 512 + 2, -1, 0.f, GenPhilox, 1234ULL}, + {1024, 1024 - 2, -1, 0.f, GenPhilox, 1234ULL}, + {1024 + 1, 512, -1, 0.f, GenPhilox, 1234ULL}, + {1024 + 1, 1024, -1, 0.f, GenPhilox, 1234ULL}, + {1024 + 1, 512 + 1, -1, 0.f, GenPhilox, 1234ULL}, + {1024 + 1, 1024 + 1, -1, 0.f, GenPhilox, 1234ULL}, + {1024 + 1, 512 + 2, -1, 0.f, GenPhilox, 1234ULL}, + {1024 + 1, 1024 - 2, -1, 0.f, GenPhilox, 1234ULL}, + {1024 + 2, 512, -1, 0.f, GenPhilox, 1234ULL}, + {1024 + 2, 1024, -1, 0.f, GenPhilox, 1234ULL}, + {1024 + 2, 512 + 1, -1, 0.f, GenPhilox, 1234ULL}, + {1024 + 2, 1024 + 1, -1, 0.f, GenPhilox, 1234ULL}, + {1024 + 2, 512 + 2, -1, 0.f, GenPhilox, 1234ULL}, + {1024 + 2, 1024 + 2, -1, 0.f, GenPhilox, 1234ULL}, + {1024, 512, 10, 100000.f, GenPhilox, 1234ULL}, + + {1024, 512, -1, 0.f, GenPC, 1234ULL}, + {1024, 1024, -1, 0.f, GenPC, 1234ULL}, + {1024, 512 + 1, -1, 0.f, GenPC, 1234ULL}, + {1024, 1024 - 1, -1, 0.f, GenPC, 1234ULL}, + {1024, 512 + 2, -1, 0.f, GenPC, 1234ULL}, + {1024, 1024 - 2, -1, 0.f, GenPC, 1234ULL}, + {1024 + 1, 512, -1, 0.f, GenPC, 1234ULL}, + {1024 + 1, 1024, -1, 0.f, GenPC, 1234ULL}, + {1024 + 1, 512 + 1, -1, 0.f, GenPC, 1234ULL}, + {1024 + 1, 1024 + 1, -1, 0.f, GenPC, 1234ULL}, + {1024 + 1, 512 + 2, -1, 0.f, GenPC, 1234ULL}, + {1024 + 1, 1024 - 2, -1, 0.f, GenPC, 1234ULL}, + {1024 + 2, 512, -1, 0.f, GenPC, 1234ULL}, + {1024 + 2, 1024, -1, 0.f, GenPC, 1234ULL}, + {1024 + 2, 512 + 1, -1, 0.f, GenPC, 1234ULL}, + {1024 + 2, 1024 + 1, -1, 0.f, GenPC, 1234ULL}, + {1024 + 2, 512 + 2, -1, 0.f, GenPC, 1234ULL}, + {1024 + 2, 1024 + 2, -1, 0.f, GenPC, 1234ULL}, + {1024, 512, 10, 100000.f, GenPC, 1234ULL}}; TEST_P(SWoRTestF, Result) { @@ -173,67 +151,45 @@ TEST_P(SWoRTestF, Result) INSTANTIATE_TEST_SUITE_P(SWoRTests, SWoRTestF, ::testing::ValuesIn(inputsf)); typedef SWoRTest SWoRTestD; -const std::vector> inputsd = { - {1024, 512, -1, 0.0, GenPhilox, 1234ULL}, - {1024, 1024, -1, 0.0, GenPhilox, 1234ULL}, - {1024, 512 + 1, -1, 0.0, GenPhilox, 1234ULL}, - {1024, 1024 - 1, -1, 0.0, GenPhilox, 1234ULL}, - {1024, 512 + 2, -1, 0.0, GenPhilox, 1234ULL}, - {1024, 1024 - 2, -1, 0.0, GenPhilox, 1234ULL}, - {1024 + 1, 512, -1, 0.0, GenPhilox, 1234ULL}, - {1024 + 1, 1024, -1, 0.0, GenPhilox, 1234ULL}, - {1024 + 1, 512 + 1, -1, 0.0, GenPhilox, 1234ULL}, - {1024 + 1, 1024 + 1, -1, 0.0, GenPhilox, 1234ULL}, - {1024 + 1, 512 + 2, -1, 0.0, GenPhilox, 1234ULL}, - {1024 + 1, 1024 - 2, -1, 0.0, GenPhilox, 1234ULL}, - {1024 + 2, 512, -1, 0.0, GenPhilox, 1234ULL}, - {1024 + 2, 1024, -1, 0.0, GenPhilox, 1234ULL}, - {1024 + 2, 512 + 1, -1, 0.0, GenPhilox, 1234ULL}, - {1024 + 2, 1024 + 1, -1, 0.0, GenPhilox, 1234ULL}, - {1024 + 2, 512 + 2, -1, 0.0, GenPhilox, 1234ULL}, - {1024 + 2, 1024 + 2, -1, 0.0, GenPhilox, 1234ULL}, - {1024, 512, 10, 100000.0, GenPhilox, 1234ULL}, - - {1024, 512, -1, 0.0, GenTaps, 1234ULL}, - {1024, 1024, -1, 0.0, GenTaps, 1234ULL}, - {1024, 512 + 1, -1, 0.0, GenTaps, 1234ULL}, - {1024, 1024 - 1, -1, 0.0, GenTaps, 1234ULL}, - {1024, 512 + 2, -1, 0.0, GenTaps, 1234ULL}, - {1024, 1024 - 2, -1, 0.0, GenTaps, 1234ULL}, - {1024 + 1, 512, -1, 0.0, GenTaps, 1234ULL}, - {1024 + 1, 1024, -1, 0.0, GenTaps, 1234ULL}, - {1024 + 1, 512 + 1, -1, 0.0, GenTaps, 1234ULL}, - {1024 + 1, 1024 + 1, -1, 0.0, GenTaps, 1234ULL}, - {1024 + 1, 512 + 2, -1, 0.0, GenTaps, 1234ULL}, - {1024 + 1, 1024 - 2, -1, 0.0, GenTaps, 1234ULL}, - {1024 + 2, 512, -1, 0.0, GenTaps, 1234ULL}, - {1024 + 2, 1024, -1, 0.0, GenTaps, 1234ULL}, - {1024 + 2, 512 + 1, -1, 0.0, GenTaps, 1234ULL}, - {1024 + 2, 1024 + 1, -1, 0.0, GenTaps, 1234ULL}, - {1024 + 2, 512 + 2, -1, 0.0, GenTaps, 1234ULL}, - {1024 + 2, 1024 + 2, -1, 0.0, GenTaps, 1234ULL}, - {1024, 512, 10, 100000.0, GenTaps, 1234ULL}, - - {1024, 512, -1, 0.0, GenKiss99, 1234ULL}, - {1024, 1024, -1, 0.0, GenKiss99, 1234ULL}, - {1024, 512 + 1, -1, 0.0, GenKiss99, 1234ULL}, - {1024, 1024 - 1, -1, 0.0, GenKiss99, 1234ULL}, - {1024, 512 + 2, -1, 0.0, GenKiss99, 1234ULL}, - {1024, 1024 - 2, -1, 0.0, GenKiss99, 1234ULL}, - {1024 + 1, 512, -1, 0.0, GenKiss99, 1234ULL}, - {1024 + 1, 1024, -1, 0.0, GenKiss99, 1234ULL}, - {1024 + 1, 512 + 1, -1, 0.0, GenKiss99, 1234ULL}, - {1024 + 1, 1024 + 1, -1, 0.0, GenKiss99, 1234ULL}, - {1024 + 1, 512 + 2, -1, 0.0, GenKiss99, 1234ULL}, - {1024 + 1, 1024 - 2, -1, 0.0, GenKiss99, 1234ULL}, - {1024 + 2, 512, -1, 0.0, GenKiss99, 1234ULL}, - {1024 + 2, 1024, -1, 0.0, GenKiss99, 1234ULL}, - {1024 + 2, 512 + 1, -1, 0.0, GenKiss99, 1234ULL}, - {1024 + 2, 1024 + 1, -1, 0.0, GenKiss99, 1234ULL}, - {1024 + 2, 512 + 2, -1, 0.0, GenKiss99, 1234ULL}, - {1024 + 2, 1024 + 2, -1, 0.0, GenKiss99, 1234ULL}, - {1024, 512, 10, 100000.0, GenKiss99, 1234ULL}, -}; +const std::vector> inputsd = {{1024, 512, -1, 0.0, GenPhilox, 1234ULL}, + {1024, 1024, -1, 0.0, GenPhilox, 1234ULL}, + {1024, 512 + 1, -1, 0.0, GenPhilox, 1234ULL}, + {1024, 1024 - 1, -1, 0.0, GenPhilox, 1234ULL}, + {1024, 512 + 2, -1, 0.0, GenPhilox, 1234ULL}, + {1024, 1024 - 2, -1, 0.0, GenPhilox, 1234ULL}, + {1024 + 1, 512, -1, 0.0, GenPhilox, 1234ULL}, + {1024 + 1, 1024, -1, 0.0, GenPhilox, 1234ULL}, + {1024 + 1, 512 + 1, -1, 0.0, GenPhilox, 1234ULL}, + {1024 + 1, 1024 + 1, -1, 0.0, GenPhilox, 1234ULL}, + {1024 + 1, 512 + 2, -1, 0.0, GenPhilox, 1234ULL}, + {1024 + 1, 1024 - 2, -1, 0.0, GenPhilox, 1234ULL}, + {1024 + 2, 512, -1, 0.0, GenPhilox, 1234ULL}, + {1024 + 2, 1024, -1, 0.0, GenPhilox, 1234ULL}, + {1024 + 2, 512 + 1, -1, 0.0, GenPhilox, 1234ULL}, + {1024 + 2, 1024 + 1, -1, 0.0, GenPhilox, 1234ULL}, + {1024 + 2, 512 + 2, -1, 0.0, GenPhilox, 1234ULL}, + {1024 + 2, 1024 + 2, -1, 0.0, GenPhilox, 1234ULL}, + {1024, 512, 10, 100000.0, GenPhilox, 1234ULL}, + + {1024, 512, -1, 0.0, GenPC, 1234ULL}, + {1024, 1024, -1, 0.0, GenPC, 1234ULL}, + {1024, 512 + 1, -1, 0.0, GenPC, 1234ULL}, + {1024, 1024 - 1, -1, 0.0, GenPC, 1234ULL}, + {1024, 512 + 2, -1, 0.0, GenPC, 1234ULL}, + {1024, 1024 - 2, -1, 0.0, GenPC, 1234ULL}, + {1024 + 1, 512, -1, 0.0, GenPC, 1234ULL}, + {1024 + 1, 1024, -1, 0.0, GenPC, 1234ULL}, + {1024 + 1, 512 + 1, -1, 0.0, GenPC, 1234ULL}, + {1024 + 1, 1024 + 1, -1, 0.0, GenPC, 1234ULL}, + {1024 + 1, 512 + 2, -1, 0.0, GenPC, 1234ULL}, + {1024 + 1, 1024 - 2, -1, 0.0, GenPC, 1234ULL}, + {1024 + 2, 512, -1, 0.0, GenPC, 1234ULL}, + {1024 + 2, 1024, -1, 0.0, GenPC, 1234ULL}, + {1024 + 2, 512 + 1, -1, 0.0, GenPC, 1234ULL}, + {1024 + 2, 1024 + 1, -1, 0.0, GenPC, 1234ULL}, + {1024 + 2, 512 + 2, -1, 0.0, GenPC, 1234ULL}, + {1024 + 2, 1024 + 2, -1, 0.0, GenPC, 1234ULL}, + {1024, 512, 10, 100000.0, GenPC, 1234ULL}}; TEST_P(SWoRTestD, Result) { diff --git a/thirdparty/README.md b/thirdparty/README.md new file mode 100644 index 0000000000..d9f05379dc --- /dev/null +++ b/thirdparty/README.md @@ -0,0 +1,4 @@ +# PCG + +Link to the repository https://github.com/imneme/pcg-c-basic +Commit ID for last borrowed code bc39cd76ac3d541e618606bcc6e1e5ba5e5e6aa3 diff --git a/thirdparty/pcg/LICENSE.txt b/thirdparty/pcg/LICENSE.txt new file mode 100644 index 0000000000..8dada3edaf --- /dev/null +++ b/thirdparty/pcg/LICENSE.txt @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/thirdparty/pcg/pcg_basic.c b/thirdparty/pcg/pcg_basic.c new file mode 100644 index 0000000000..8c2fd0d22b --- /dev/null +++ b/thirdparty/pcg/pcg_basic.c @@ -0,0 +1,116 @@ +/* + * PCG Random Number Generation for C. + * + * Copyright 2014 Melissa O'Neill + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For additional information about the PCG random number generation scheme, + * including its license and other licensing options, visit + * + * http://www.pcg-random.org + */ + +/* + * This code is derived from the full C implementation, which is in turn + * derived from the canonical C++ PCG implementation. The C++ version + * has many additional features and is preferable if you can use C++ in + * your project. + */ + +#include "pcg_basic.h" + +// state for global RNGs + +static pcg32_random_t pcg32_global = PCG32_INITIALIZER; + +// pcg32_srandom(initstate, initseq) +// pcg32_srandom_r(rng, initstate, initseq): +// Seed the rng. Specified in two parts, state initializer and a +// sequence selection constant (a.k.a. stream id) + +void pcg32_srandom_r(pcg32_random_t* rng, uint64_t initstate, uint64_t initseq) +{ + rng->state = 0U; + rng->inc = (initseq << 1u) | 1u; + pcg32_random_r(rng); + rng->state += initstate; + pcg32_random_r(rng); +} + +void pcg32_srandom(uint64_t seed, uint64_t seq) +{ + pcg32_srandom_r(&pcg32_global, seed, seq); +} + +// pcg32_random() +// pcg32_random_r(rng) +// Generate a uniformly distributed 32-bit random number + +uint32_t pcg32_random_r(pcg32_random_t* rng) +{ + uint64_t oldstate = rng->state; + rng->state = oldstate * 6364136223846793005ULL + rng->inc; + uint32_t xorshifted = ((oldstate >> 18u) ^ oldstate) >> 27u; + uint32_t rot = oldstate >> 59u; + return (xorshifted >> rot) | (xorshifted << ((-rot) & 31)); +} + +uint32_t pcg32_random() +{ + return pcg32_random_r(&pcg32_global); +} + + +// pcg32_boundedrand(bound): +// pcg32_boundedrand_r(rng, bound): +// Generate a uniformly distributed number, r, where 0 <= r < bound + +uint32_t pcg32_boundedrand_r(pcg32_random_t* rng, uint32_t bound) +{ + // To avoid bias, we need to make the range of the RNG a multiple of + // bound, which we do by dropping output less than a threshold. + // A naive scheme to calculate the threshold would be to do + // + // uint32_t threshold = 0x100000000ull % bound; + // + // but 64-bit div/mod is slower than 32-bit div/mod (especially on + // 32-bit platforms). In essence, we do + // + // uint32_t threshold = (0x100000000ull-bound) % bound; + // + // because this version will calculate the same modulus, but the LHS + // value is less than 2^32. + + uint32_t threshold = -bound % bound; + + // Uniformity guarantees that this loop will terminate. In practice, it + // should usually terminate quickly; on average (assuming all bounds are + // equally likely), 82.25% of the time, we can expect it to require just + // one iteration. In the worst case, someone passes a bound of 2^31 + 1 + // (i.e., 2147483649), which invalidates almost 50% of the range. In + // practice, bounds are typically small and only a tiny amount of the range + // is eliminated. + for (;;) { + uint32_t r = pcg32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + + +uint32_t pcg32_boundedrand(uint32_t bound) +{ + return pcg32_boundedrand_r(&pcg32_global, bound); +} +