rapidsai · rapids-bot · Aug 23, 2022 · Aug 5, 2022 · Aug 5, 2022 · Aug 5, 2022
diff --git a/cpp/include/raft/common/detail/scatter.cuh b/cpp/include/raft/common/detail/scatter.cuh
@@ -34,8 +34,8 @@ __global__ void scatterKernel(DataT* out, const DataT* in, const IdxT* idx, IdxT
   DataVec dataIn;
 #pragma unroll
   for (int i = 0; i < VecLen; ++i) {
-    auto inPos         = idxIn.val.data[i];
-    dataIn.val.data[i] = op(in[inPos], tid + i);
+    auto inPos     = idxIn.data[i];
+    dataIn.data[i] = op(in[inPos], tid + i);
   }
   dataIn.store(out, tid);
 }

diff --git a/cpp/include/raft/linalg/detail/binary_op.cuh b/cpp/include/raft/linalg/detail/binary_op.cuh
@@ -37,7 +37,7 @@ __global__ void binaryOpKernel(
   b.load(in2, idx);
 #pragma unroll
   for (int i = 0; i < InVecType::Ratio; ++i) {
-    c.val.data[i] = op(a.val.data[i], b.val.data[i]);
+    c.data[i] = op(a.data[i], b.data[i]);
   }
   c.store(out, idx);
 }

diff --git a/cpp/include/raft/linalg/detail/matrix_vector_op.cuh b/cpp/include/raft/linalg/detail/matrix_vector_op.cuh
@@ -70,7 +70,7 @@ __global__ void matrixVectorOpKernel(Type* out,
   mat.load(matrix, idx);
 #pragma unroll
   for (int i = 0; i < VecType::Ratio; ++i)
-    mat.val.data[i] = op(mat.val.data[i], vec.val.data[i]);
+    mat.data[i] = op(mat.data[i], vec.data[i]);
   mat.store(out, idx);
 }
 

diff --git a/cpp/include/raft/linalg/detail/ternary_op.cuh b/cpp/include/raft/linalg/detail/ternary_op.cuh
@@ -36,7 +36,7 @@ __global__ void ternaryOpKernel(
   c.load(in3, idx);
 #pragma unroll
   for (int i = 0; i < VecType::Ratio; ++i) {
-    a.val.data[i] = op(a.val.data[i], b.val.data[i], c.val.data[i]);
+    a.data[i] = op(a.data[i], b.data[i], c.data[i]);
   }
   a.store(out, idx);
 }

diff --git a/cpp/include/raft/linalg/detail/unary_op.cuh b/cpp/include/raft/linalg/detail/unary_op.cuh
@@ -37,7 +37,7 @@ __global__ void unaryOpKernel(OutType* out, const InType* in, IdxType len, Lambd
   a.load(in, idx);
 #pragma unroll
   for (int i = 0; i < InVecType::Ratio; ++i) {
-    b.val.data[i] = op(a.val.data[i]);
+    b.data[i] = op(a.data[i]);
   }
   b.store(out, idx);
 }

@@ -83,7 +83,7 @@ struct Linewise {
     Vec v, w;
     bool update = true;
     for (; in < in_end; in += AlignWarp::Value, out += AlignWarp::Value, rowMod += warpPad) {
-      v.val.internal = __ldcv(in);
+      *v.vectorized_data() = __ldcv(in);
       while (rowMod >= rowLen) {
         rowMod -= rowLen;
         rowDiv++;
@@ -102,10 +102,10 @@ struct Linewise {
           int l = 0;
           ((args[l] = vecs[rowDiv], l++), ...);
         }
-        int l         = 0;
-        w.val.data[k] = op(v.val.data[k], (std::ignore = vecs, args[l++])...);
+        int l     = 0;
+        w.data[k] = op(v.data[k], (std::ignore = vecs, args[l++])...);
       }
-      *out = w.val.internal;
+      *out = *w.vectorized_data();
     }
   }
 
@@ -138,11 +138,11 @@ struct Linewise {
     Vec v;
     const IdxType d = BlockSize * gridDim.x;
     for (IdxType i = threadIdx.x + blockIdx.x * BlockSize; i < len; i += d) {
-      v.val.internal = __ldcv(in + i);
+      *v.vectorized_data() = __ldcv(in + i);
 #pragma unroll VecElems
       for (int k = 0; k < VecElems; k++)
-        v.val.data[k] = op(v.val.data[k], args.val.data[k]...);
-      __stwt(out + i, v.val.internal);
+        v.data[k] = op(v.data[k], args.data[k]...);
+      __stwt(out + i, *v.vectorized_data());
     }
   }
 
@@ -172,7 +172,7 @@ struct Linewise {
     __syncthreads();
     {
       Vec out;
-      out.val.internal = reinterpret_cast<typename Vec::io_t*>(shm)[threadIdx.x];
+      *out.vectorized_data() = reinterpret_cast<typename Vec::io_t*>(shm)[threadIdx.x];
       return out;
     }
   }

diff --git a/cpp/include/raft/sparse/convert/detail/adj_to_csr.cuh b/cpp/include/raft/sparse/convert/detail/adj_to_csr.cuh
@@ -90,7 +90,7 @@ __global__ void adj_to_csr_kernel(const bool* adj,         // row-major adjacenc
       chunk_bool chunk;
       chunk.load(row, j);
       for (int k = 0; k < chunk_size; ++k) {
-        if (chunk.val.data[k]) { out_col_ind[row_base + atomicIncWarp(row_count)] = j + k; }
+        if (chunk.data[k]) { out_col_ind[row_base + atomicIncWarp(row_count)] = j + k; }
       }
     }
 

diff --git a/cpp/include/raft/spatial/knn/detail/topk/radix_topk.cuh b/cpp/include/raft/spatial/knn/detail/topk/radix_topk.cuh
@@ -134,7 +134,7 @@ __device__ void vectorized_process(const T* in, IdxT len, Func f)
       wide.load(in, i);
 #pragma unroll
       for (int j = 0; j < wide_t::Ratio; ++j) {
-        f(wide.val.data[j], i + j);
+        f(wide.data[j], i + j);
       }
     }
 

diff --git a/cpp/include/raft/stats/detail/histogram.cuh b/cpp/include/raft/stats/detail/histogram.cuh
@@ -62,7 +62,7 @@ DI void histCoreOp(const DataT* data, IdxT nrows, IdxT nbins, BinnerOp binner, C
     if (i < nrows) { a.load(data, offset + i); }
 #pragma unroll
     for (int j = 0; j < VecLen; ++j) {
-      int binId = binner(a.val.data[j], i + j, col);
+      int binId = binner(a.data[j], i + j, col);
       op(binId, i + j, col);
     }
   }

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -244,7 +244,7 @@ struct IOType<double, 2> {
  * mydata2.load(ptr2, idx);
  * #pragma unroll
  * for(int i=0;i<mydata1.Ratio;++i) {
- *     mydata1.val.data[i] += mydata2.val.data[i];
+ *     mydata1.data[i] += mydata2.data[i];
  * }
  * mydata1.store(ptr1, idx);
  * @endcode
@@ -269,12 +269,10 @@ struct TxN_t {
   /** defines the number of 'math_t' types stored by this struct */
   static const int Ratio = veclen_;
 
-  union {
-    /** the vectorized data that is used for subsequent operations */
-    math_t data[Ratio];
-    /** internal data used to ensure vectorized loads/stores */
-    io_t internal;
-  } val;
+  /** the vectorized data that is used for subsequent operations */
+  math_t data[Ratio];
+
+  __device__ auto* vectorized_data() { return reinterpret_cast<io_t*>(data); }
 
   ///@todo: add default constructor
 
@@ -286,7 +284,7 @@ struct TxN_t {
   {
 #pragma unroll
     for (int i = 0; i < Ratio; ++i) {
-      val.data[i] = _val;
+      data[i] = _val;
     }
   }
 
@@ -302,30 +300,30 @@ struct TxN_t {
    * @param idx the offset from the base pointer which will be loaded
    *  (or stored) by the current thread. This must be aligned to 'Ratio'!
    *
-   * @note: In case of loads, after a successful execution, the val.data will
+   * @note: In case of loads, after a successful execution, the data will
    *  be populated with the desired data loaded from the pointer location. In
-   * case of stores, the data in the val.data will be stored to that location.
+   * case of stores, the data in the data will be stored to that location.
    * @{
    */
   template <typename idx_t = int>
   DI void load(const math_t* ptr, idx_t idx)
   {
-    const io_t* bptr = reinterpret_cast<const io_t*>(&ptr[idx]);
-    val.internal     = __ldg(bptr);
+    const io_t* bptr   = reinterpret_cast<const io_t*>(&ptr[idx]);
+    *vectorized_data() = __ldg(bptr);
   }
 
   template <typename idx_t = int>
   DI void load(math_t* ptr, idx_t idx)
   {
-    io_t* bptr   = reinterpret_cast<io_t*>(&ptr[idx]);
-    val.internal = *bptr;
+    io_t* bptr         = reinterpret_cast<io_t*>(&ptr[idx]);
+    *vectorized_data() = *bptr;
   }
 
   template <typename idx_t = int>
   DI void store(math_t* ptr, idx_t idx)
   {
     io_t* bptr = reinterpret_cast<io_t*>(&ptr[idx]);
-    *bptr      = val.internal;
+    *bptr      = *vectorized_data();
   }
   /** @} */
 };
@@ -336,9 +334,7 @@ struct TxN_t<math_, 0> {
   typedef math_ math_t;
   static const int Ratio = 1;
 
-  union {
-    math_t data[1];
-  } val;
+  math_t data[1];
 
   DI void fill(math_t _val) {}
   template <typename idx_t = int>