Skip to content

Commit

Permalink
Merge pull request #34929 from fwyzard/fix_radix_sort_test_121x
Browse files Browse the repository at this point in the history
Fix radix sort test
  • Loading branch information
cmsbuild authored Aug 18, 2021
2 parents 80c9d7e + 0b6eae2 commit 05c2b42
Showing 1 changed file with 49 additions and 9 deletions.
58 changes: 49 additions & 9 deletions HeterogeneousCore/CUDAUtilities/test/radixSort_t.cu
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
#include <algorithm>
#include <cassert>
#include <chrono>
#include <cstdint>
#include <iomanip>
#include <iostream>
#include <limits>
#include <memory>
#include <random>
#include <set>
#include <type_traits>

#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
Expand All @@ -32,6 +34,50 @@ struct RS<float> {
static constexpr int imax = std::numeric_limits<int>::max();
};

// A templated unsigned integer type with N bytes
template <int N>
struct uintN;

template <>
struct uintN<8> {
using type = uint8_t;
};

template <>
struct uintN<16> {
using type = uint16_t;
};

template <>
struct uintN<32> {
using type = uint32_t;
};

template <>
struct uintN<64> {
using type = uint64_t;
};

template <int N>
using uintN_t = typename uintN<N>::type;

// A templated unsigned integer type with the same size as T
template <typename T>
using uintT_t = uintN_t<sizeof(T) * 8>;

// Keep only the `N` most significant bytes of `t`, and set the others to zero
template <int N, typename T, typename SFINAE = std::enable_if_t<N <= sizeof(T)>>
void truncate(T& t) {
const int shift = 8 * (sizeof(T) - N);
union {
T t;
uintT_t<T> u;
} c;
c.t = t;
c.u = c.u >> shift << shift;
t = c.t;
}

template <typename T, int NS = sizeof(T), typename U = T, typename LL = long long>
void go(bool useShared) {
std::mt19937 eng;
Expand Down Expand Up @@ -100,7 +146,7 @@ void go(bool useShared) {
cudaCheck(cudaMemcpy(off_d.get(), offsets, 4 * (blocks + 1), cudaMemcpyHostToDevice));

if (i < 2)
std::cout << "lauch for " << offsets[blocks] << std::endl;
std::cout << "launch for " << offsets[blocks] << std::endl;

auto ntXBl __attribute__((unused)) = 1 == i % 4 ? 256 : 256;

Expand Down Expand Up @@ -138,14 +184,8 @@ void go(bool useShared) {
auto a = v + offsets[ib];
auto k1 = a[ind[j]];
auto k2 = a[ind[j - 1]];
auto sh = sizeof(uint64_t) - NS;
sh *= 8;
auto shorten = [sh](T& t) {
auto k = (uint64_t*)(&t);
*k = (*k >> sh) << sh;
};
shorten(k1);
shorten(k2);
truncate<NS>(k1);
truncate<NS>(k2);
if (k1 < k2)
std::cout << ib << " not ordered at " << ind[j] << " : " << a[ind[j]] << ' ' << a[ind[j - 1]] << std::endl;
}
Expand Down

0 comments on commit 05c2b42

Please sign in to comment.