Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove type punning from TxN_t #781

Merged
merged 11 commits into from
Aug 23, 2022
4 changes: 2 additions & 2 deletions cpp/include/raft/common/detail/scatter.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ __global__ void scatterKernel(DataT* out, const DataT* in, const IdxT* idx, IdxT
DataVec dataIn;
#pragma unroll
for (int i = 0; i < VecLen; ++i) {
auto inPos = idxIn.val.data[i];
dataIn.val.data[i] = op(in[inPos], tid + i);
auto inPos = idxIn.data[i];
dataIn.data[i] = op(in[inPos], tid + i);
}
dataIn.store(out, tid);
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/raft/linalg/detail/binary_op.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ __global__ void binaryOpKernel(
b.load(in2, idx);
#pragma unroll
for (int i = 0; i < InVecType::Ratio; ++i) {
c.val.data[i] = op(a.val.data[i], b.val.data[i]);
c.data[i] = op(a.data[i], b.data[i]);
}
c.store(out, idx);
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/raft/linalg/detail/matrix_vector_op.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ __global__ void matrixVectorOpKernel(Type* out,
mat.load(matrix, idx);
#pragma unroll
for (int i = 0; i < VecType::Ratio; ++i)
mat.val.data[i] = op(mat.val.data[i], vec.val.data[i]);
mat.data[i] = op(mat.data[i], vec.data[i]);
mat.store(out, idx);
}

Expand Down
2 changes: 1 addition & 1 deletion cpp/include/raft/linalg/detail/ternary_op.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ __global__ void ternaryOpKernel(
c.load(in3, idx);
#pragma unroll
for (int i = 0; i < VecType::Ratio; ++i) {
a.val.data[i] = op(a.val.data[i], b.val.data[i], c.val.data[i]);
a.data[i] = op(a.data[i], b.data[i], c.data[i]);
}
a.store(out, idx);
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/raft/linalg/detail/unary_op.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ __global__ void unaryOpKernel(OutType* out, const InType* in, IdxType len, Lambd
a.load(in, idx);
#pragma unroll
for (int i = 0; i < InVecType::Ratio; ++i) {
b.val.data[i] = op(a.val.data[i]);
b.data[i] = op(a.data[i]);
}
b.store(out, idx);
}
Expand Down
16 changes: 8 additions & 8 deletions cpp/include/raft/matrix/detail/linewise_op.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ struct Linewise {
Vec v, w;
bool update = true;
for (; in < in_end; in += AlignWarp::Value, out += AlignWarp::Value, rowMod += warpPad) {
v.val.internal = __ldcv(in);
*v.vectorized_data() = __ldcv(in);
while (rowMod >= rowLen) {
rowMod -= rowLen;
rowDiv++;
Expand All @@ -102,10 +102,10 @@ struct Linewise {
int l = 0;
((args[l] = vecs[rowDiv], l++), ...);
}
int l = 0;
w.val.data[k] = op(v.val.data[k], (std::ignore = vecs, args[l++])...);
int l = 0;
w.data[k] = op(v.data[k], (std::ignore = vecs, args[l++])...);
}
*out = w.val.internal;
*out = *w.vectorized_data();
}
}

Expand Down Expand Up @@ -138,11 +138,11 @@ struct Linewise {
Vec v;
const IdxType d = BlockSize * gridDim.x;
for (IdxType i = threadIdx.x + blockIdx.x * BlockSize; i < len; i += d) {
v.val.internal = __ldcv(in + i);
*v.vectorized_data() = __ldcv(in + i);
#pragma unroll VecElems
for (int k = 0; k < VecElems; k++)
v.val.data[k] = op(v.val.data[k], args.val.data[k]...);
__stwt(out + i, v.val.internal);
v.data[k] = op(v.data[k], args.data[k]...);
__stwt(out + i, *v.vectorized_data());
}
}

Expand Down Expand Up @@ -172,7 +172,7 @@ struct Linewise {
__syncthreads();
{
Vec out;
out.val.internal = reinterpret_cast<typename Vec::io_t*>(shm)[threadIdx.x];
*out.vectorized_data() = reinterpret_cast<typename Vec::io_t*>(shm)[threadIdx.x];
return out;
}
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/raft/sparse/convert/detail/adj_to_csr.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ __global__ void adj_to_csr_kernel(const bool* adj, // row-major adjacenc
chunk_bool chunk;
chunk.load(row, j);
for (int k = 0; k < chunk_size; ++k) {
if (chunk.val.data[k]) { out_col_ind[row_base + atomicIncWarp(row_count)] = j + k; }
if (chunk.data[k]) { out_col_ind[row_base + atomicIncWarp(row_count)] = j + k; }
}
}

Expand Down
2 changes: 1 addition & 1 deletion cpp/include/raft/spatial/knn/detail/topk/radix_topk.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ __device__ void vectorized_process(const T* in, IdxT len, Func f)
wide.load(in, i);
#pragma unroll
for (int j = 0; j < wide_t::Ratio; ++j) {
f(wide.val.data[j], i + j);
f(wide.data[j], i + j);
}
}

Expand Down
2 changes: 1 addition & 1 deletion cpp/include/raft/stats/detail/histogram.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ DI void histCoreOp(const DataT* data, IdxT nrows, IdxT nbins, BinnerOp binner, C
if (i < nrows) { a.load(data, offset + i); }
#pragma unroll
for (int j = 0; j < VecLen; ++j) {
int binId = binner(a.val.data[j], i + j, col);
int binId = binner(a.data[j], i + j, col);
op(binId, i + j, col);
}
}
Expand Down
34 changes: 15 additions & 19 deletions cpp/include/raft/vectorized.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2018-2021, NVIDIA CORPORATION.
* Copyright (c) 2018-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -244,7 +244,7 @@ struct IOType<double, 2> {
* mydata2.load(ptr2, idx);
* #pragma unroll
* for(int i=0;i<mydata1.Ratio;++i) {
* mydata1.val.data[i] += mydata2.val.data[i];
* mydata1.data[i] += mydata2.data[i];
* }
* mydata1.store(ptr1, idx);
* @endcode
Expand All @@ -269,12 +269,10 @@ struct TxN_t {
/** defines the number of 'math_t' types stored by this struct */
static const int Ratio = veclen_;

union {
/** the vectorized data that is used for subsequent operations */
math_t data[Ratio];
/** internal data used to ensure vectorized loads/stores */
io_t internal;
} val;
/** the vectorized data that is used for subsequent operations */
math_t data[Ratio];

__device__ auto* vectorized_data() { return reinterpret_cast<io_t*>(data); }

///@todo: add default constructor

Expand All @@ -286,7 +284,7 @@ struct TxN_t {
{
#pragma unroll
for (int i = 0; i < Ratio; ++i) {
val.data[i] = _val;
data[i] = _val;
}
}

Expand All @@ -302,30 +300,30 @@ struct TxN_t {
* @param idx the offset from the base pointer which will be loaded
* (or stored) by the current thread. This must be aligned to 'Ratio'!
*
* @note: In case of loads, after a successful execution, the val.data will
* @note: In case of loads, after a successful execution, the data will
* be populated with the desired data loaded from the pointer location. In
* case of stores, the data in the val.data will be stored to that location.
* case of stores, the data in the data will be stored to that location.
* @{
*/
template <typename idx_t = int>
DI void load(const math_t* ptr, idx_t idx)
{
const io_t* bptr = reinterpret_cast<const io_t*>(&ptr[idx]);
val.internal = __ldg(bptr);
const io_t* bptr = reinterpret_cast<const io_t*>(&ptr[idx]);
wphicks marked this conversation as resolved.
Show resolved Hide resolved
*vectorized_data() = __ldg(bptr);
}

template <typename idx_t = int>
DI void load(math_t* ptr, idx_t idx)
{
io_t* bptr = reinterpret_cast<io_t*>(&ptr[idx]);
val.internal = *bptr;
io_t* bptr = reinterpret_cast<io_t*>(&ptr[idx]);
*vectorized_data() = *bptr;
}

template <typename idx_t = int>
DI void store(math_t* ptr, idx_t idx)
{
io_t* bptr = reinterpret_cast<io_t*>(&ptr[idx]);
*bptr = val.internal;
*bptr = *vectorized_data();
}
/** @} */
};
Expand All @@ -336,9 +334,7 @@ struct TxN_t<math_, 0> {
typedef math_ math_t;
static const int Ratio = 1;

union {
math_t data[1];
} val;
math_t data[1];

DI void fill(math_t _val) {}
template <typename idx_t = int>
Expand Down