diff --git a/testing/shuffle.cu b/testing/shuffle.cu index 2d9094b421..77582bb3b6 100644 --- a/testing/shuffle.cu +++ b/testing/shuffle.cu @@ -1,15 +1,14 @@ #include #if THRUST_CPP_DIALECT >= 2011 +#include #include #include #include #include #include -#include -template -void TestShuffleSimple() { +template void TestShuffleSimple() { Vector data(5); data[0] = 0; data[1] = 1; @@ -26,8 +25,7 @@ void TestShuffleSimple() { } DECLARE_VECTOR_UNITTEST(TestShuffleSimple); -template -void TestShuffleCopySimple() { +template void TestShuffleCopySimple() { Vector data(5); data[0] = 0; data[1] = 1; @@ -43,8 +41,7 @@ void TestShuffleCopySimple() { } DECLARE_VECTOR_UNITTEST(TestShuffleCopySimple); -template -void TestHostDeviceIdentical(size_t m) { +template void TestHostDeviceIdentical(size_t m) { thrust::host_vector host_result(m); thrust::host_vector device_result(m); thrust::sequence(host_result.begin(), host_result.end(), 0llu); @@ -62,8 +59,7 @@ DECLARE_VARIABLE_UNITTEST(TestHostDeviceIdentical); // Individual input keys should be permuted to output locations with uniform // probability. Perform chi-squared test with confidence 99.9%. -template -void TestShuffleKeyPosition() { +template void TestShuffleKeyPosition() { typedef typename Vector::value_type T; size_t m = 20; size_t num_samples = 100; @@ -97,10 +93,12 @@ DECLARE_INTEGRAL_VECTOR_UNITTEST(TestShuffleKeyPosition); struct vector_compare { template - bool operator()(const VectorT& a, const VectorT& b) const { + bool operator()(const VectorT &a, const VectorT &b) const { for (auto i = 0ull; i < a.size(); i++) { - if (a[i] < b[i]) return true; - if (a[i] > b[i]) return false; + if (a[i] < b[i]) + return true; + if (a[i] > b[i]) + return false; } return false; } @@ -109,8 +107,7 @@ struct vector_compare { // Brute force check permutations are uniformly distributed on small input // Uses a chi-squared test indicating 99% confidence the output is uniformly // random -template -void TestShuffleUniformPermutation() { +template void TestShuffleUniformPermutation() { typedef typename Vector::value_type T; size_t m = 5; @@ -139,4 +136,51 @@ void TestShuffleUniformPermutation() { ASSERT_LESS(chi_squared, confidence_threshold); } DECLARE_VECTOR_UNITTEST(TestShuffleUniformPermutation); + +template void TestShuffleEvenSpacingBetweenOccurances() { + typedef typename Vector::value_type T; + const uint64_t shuffle_size = 10; + const uint64_t num_samples = 1000; + + thrust::host_vector h_results; + Vector sequence(shuffle_size); + thrust::sequence(sequence.begin(), sequence.end(), 0); + thrust::default_random_engine g(17); + for (auto i = 0ull; i < num_samples; i++) { + thrust::shuffle(sequence.begin(), sequence.end(), g); + thrust::host_vector tmp(sequence.begin(), sequence.end()); + h_results.insert(h_results.end(), sequence.begin(), sequence.end()); + } + + std::map>> distance_between; + + for (uint64_t sample = 0; sample < num_samples; sample++) { + for (uint64_t i = 0; i < shuffle_size - 1; i++) { + for (uint64_t j = 1; j < shuffle_size - i; j++) { + T val_1 = h_results[sample * shuffle_size + i]; + T val_2 = h_results[sample * shuffle_size + (i + j) % shuffle_size]; + distance_between[val_1][val_2][j]++; + distance_between[val_2][val_1][shuffle_size - j]++; + } + } + } + + const double expected_occurances = (double)num_samples / (shuffle_size - 1); + for (T val_1 = 0; val_1 < T(shuffle_size); val_1++) { + for (T val_2 = val_1 + 1; val_2 < T(shuffle_size); val_2++) { + double chi_squared = 0.0; + auto &distances = distance_between[val_1][val_2]; + for (uint64_t i = 1; i < shuffle_size; i++) { + chi_squared += std::pow((double)distances[i] - expected_occurances, 2) / + expected_occurances; + } + + // Tabulated chi-squared critical value for shuffling 10 items + // and 99% confidence + double confidence_threshold = 20.1; + ASSERT_LESS(chi_squared, confidence_threshold); + } + } +} +DECLARE_VECTOR_UNITTEST(TestShuffleEvenSpacingBetweenOccurances); #endif diff --git a/thrust/system/detail/generic/shuffle.inl b/thrust/system/detail/generic/shuffle.inl index 80b45dc024..5bdcc8c67b 100644 --- a/thrust/system/detail/generic/shuffle.inl +++ b/thrust/system/detail/generic/shuffle.inl @@ -32,6 +32,44 @@ namespace system { namespace detail { namespace generic { +class round_function { +private: + constexpr static uint64_t _wyp0 = 0xa0761d6478bd642full; + constexpr static uint64_t _wyp1 = 0xe7037ed1a0b428dbull; + constexpr static uint64_t _wyp2 = 0x8ebc6af09c88c6e3ull; + constexpr static uint64_t _wyp3 = 0x589965cc75374cc3ull; + constexpr static uint64_t _wyp4 = 0x1d8e4e27c47d124full; + +#ifdef __CUDA_ARCH__ + __device__ static uint64_t mum(uint64_t a, uint64_t b) { + return __mul64hi(a, b) ^ (a * b); + } + +#else + __host__ static uint64_t mum(uint64_t a, uint64_t b) { + uint64_t ha = a >> 32, hb = b >> 32, la = (uint32_t)a, lb = (uint32_t)b, hi, + lo; + uint64_t rh = ha * hb, rm0 = ha * lb, rm1 = hb * la, rl = la * lb, + t = rl + (rm0 << 32), c = t < rl; + lo = t + (rm1 << 32); + c += lo < t; + hi = rh + (rm0 >> 32) + (rm1 >> 32) + c; + return hi ^ lo; + } + +#endif + +public: + // Round function, a 'pseudorandom function' whos output is indistinguishable + // from random for each key value input. This is not cryptographically secure + // but sufficient for generating permutations. + __host__ __device__ uint64_t operator()(const uint64_t key[2], + uint64_t seed) { + return mum(mum(key[0] ^ seed ^ _wyp0, key[1] ^ seed ^ _wyp1) ^ seed, + 16 ^ _wyp4); + } +}; + // An implementation of a Feistel cipher for operating on 64 bit keys class feistel_bijection { private: @@ -51,7 +89,7 @@ class feistel_bijection { right_side_bits = total_bits - left_side_bits; right_side_mask = (1ull << right_side_bits) - 1; - for (uint64_t i = 0; i < num_rounds; i++) { + for (uint64_t i = 0; i < num_rounds * 2; i++) { key[i] = g(); } } @@ -88,22 +126,12 @@ class feistel_bijection { return i; } - // Round function, a 'pseudorandom function' whos output is indistinguishable - // from random for each key value input. This is not cryptographically secure - // but sufficient for generating permutations. We hash the value with the - // tau88 engine and combine it with the random bits of the key (provided by - // the user-defined engine). - __host__ __device__ uint32_t round_function(uint64_t value, - const uint64_t key) const { - uint64_t value_hash = thrust::random::taus88(value)(); - return (value_hash ^ key) & left_side_mask; - } - __host__ __device__ round_state do_round(const round_state state, const uint64_t round) const { const uint32_t new_left = state.right & left_side_mask; + round_function f; const uint32_t round_function_res = - state.left ^ round_function(state.right, key[round]); + state.left ^ (f(key + round * 2, state.right) & left_side_mask); if (right_side_bits != left_side_bits) { // Upper bit of the old right becomes lower bit of new right if we have // odd length feistel @@ -119,7 +147,7 @@ class feistel_bijection { uint64_t left_side_bits; uint64_t right_side_mask; uint64_t left_side_mask; - uint64_t key[num_rounds]; + uint64_t key[num_rounds * 2]; }; struct key_flag_tuple {