Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] Moving cuML prims to RAFT #65

Merged
merged 19 commits into from
Oct 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# RAFT 0.17.0 (Date TBD)

## New Features
- PR #65: Adding cuml prims that break circular dependency between cuml and cumlprims projects

## Improvements
- PR #73: Move DistanceType enum from cuML to RAFT
Expand Down
26 changes: 26 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -235,8 +235,34 @@ if(BUILD_RAFT_TESTS)
test/cudart_utils.cpp
test/handle.cpp
test/integer_utils.cpp
test/linalg/add.cu
test/linalg/binary_op.cu
test/linalg/coalesced_reduction.cu
test/linalg/divide.cu
test/linalg/eig.cu
test/linalg/eig_sel.cu
test/linalg/gemm_layout.cu
test/linalg/map_then_reduce.cu
test/linalg/matrix_vector_op.cu
test/linalg/multiply.cu
test/linalg/norm.cu
test/linalg/reduce.cu
test/linalg/strided_reduction.cu
test/linalg/subtract.cu
test/linalg/svd.cu
test/linalg/transpose.cu
test/linalg/unary_op.cu
test/matrix/math.cu
test/matrix/matrix.cu
test/mr/device/buffer.cpp
test/mr/host/buffer.cpp
test/random/rng.cu
test/random/rng_int.cu
test/random/sample_without_replacement.cu
test/stats/mean.cu
test/stats/mean_center.cu
test/stats/stddev.cu
test/stats/sum.cu
test/test.cpp
test/spectral_matrix.cu
test/eigen_solvers.cu
Expand Down
49 changes: 49 additions & 0 deletions cpp/include/raft/common/cub_wrappers.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <cub/cub.cuh>
#include <raft/mr/device/buffer.hpp>

namespace raft {

/**
* @brief Convenience wrapper over cub's SortPairs method
* @tparam KeyT key type
* @tparam ValueT value type
* @param workspace workspace buffer which will get resized if not enough space
* @param inKeys input keys array
* @param outKeys output keys array
* @param inVals input values array
* @param outVals output values array
* @param len array length
* @param stream cuda stream
*/
template <typename KeyT, typename ValueT>
void sortPairs(raft::mr::device::buffer<char> &workspace, const KeyT *inKeys,
KeyT *outKeys, const ValueT *inVals, ValueT *outVals, int len,
cudaStream_t stream) {
size_t worksize;
cub::DeviceRadixSort::SortPairs(nullptr, worksize, inKeys, outKeys, inVals,
outVals, len, 0, sizeof(KeyT) * 8, stream);
workspace.resize(worksize, stream);
cub::DeviceRadixSort::SortPairs(workspace.data(), worksize, inKeys, outKeys,
inVals, outVals, len, 0, sizeof(KeyT) * 8,
stream);
}

} // namespace raft
97 changes: 97 additions & 0 deletions cpp/include/raft/common/scatter.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <raft/cuda_utils.cuh>
#include <raft/vectorized.cuh>

namespace raft {

template <typename DataT, int VecLen, typename Lambda, typename IdxT>
__global__ void scatterKernel(DataT *out, const DataT *in, const IdxT *idx,
IdxT len, Lambda op) {
typedef TxN_t<DataT, VecLen> DataVec;
typedef TxN_t<IdxT, VecLen> IdxVec;
IdxT tid = threadIdx.x + ((IdxT)blockIdx.x * blockDim.x);
tid *= VecLen;
if (tid >= len) return;
IdxVec idxIn;
idxIn.load(idx, tid);
DataVec dataIn;
#pragma unroll
for (int i = 0; i < VecLen; ++i) {
auto inPos = idxIn.val.data[i];
dataIn.val.data[i] = op(in[inPos], tid + i);
}
dataIn.store(out, tid);
}

template <typename DataT, int VecLen, typename Lambda, typename IdxT, int TPB>
void scatterImpl(DataT *out, const DataT *in, const IdxT *idx, IdxT len,
Lambda op, cudaStream_t stream) {
const IdxT nblks = raft::ceildiv(VecLen ? len / VecLen : len, (IdxT)TPB);
scatterKernel<DataT, VecLen, Lambda, IdxT>
<<<nblks, TPB, 0, stream>>>(out, in, idx, len, op);
CUDA_CHECK(cudaGetLastError());
}

/**
* @brief Performs scatter operation based on the input indexing array
* @tparam DataT data type whose array gets scattered
* @tparam IdxT indexing type
* @tparam TPB threads-per-block in the final kernel launched
* @tparam Lambda the device-lambda performing a unary operation on the loaded
* data before it gets scattered
* @param out the output array
* @param in the input array
* @param idx the indexing array
* @param len number of elements in the input array
* @param stream cuda stream where to launch work
* @param op the device-lambda with signature `DataT func(DataT, IdxT);`. This
* will be applied to every element before scattering it to the right location.
* The second param in this method will be the destination index.
*/
template <typename DataT, typename IdxT,
typename Lambda = raft::Nop<DataT, IdxT>, int TPB = 256>
void scatter(DataT *out, const DataT *in, const IdxT *idx, IdxT len,
cudaStream_t stream, Lambda op = raft::Nop<DataT, IdxT>()) {
if (len <= 0) return;
constexpr size_t DataSize = sizeof(DataT);
constexpr size_t IdxSize = sizeof(IdxT);
constexpr size_t MaxPerElem = DataSize > IdxSize ? DataSize : IdxSize;
size_t bytes = len * MaxPerElem;
if (16 / MaxPerElem && bytes % 16 == 0) {
scatterImpl<DataT, 16 / MaxPerElem, Lambda, IdxT, TPB>(out, in, idx, len,
op, stream);
} else if (8 / MaxPerElem && bytes % 8 == 0) {
scatterImpl<DataT, 8 / MaxPerElem, Lambda, IdxT, TPB>(out, in, idx, len, op,
stream);
} else if (4 / MaxPerElem && bytes % 4 == 0) {
scatterImpl<DataT, 4 / MaxPerElem, Lambda, IdxT, TPB>(out, in, idx, len, op,
stream);
} else if (2 / MaxPerElem && bytes % 2 == 0) {
scatterImpl<DataT, 2 / MaxPerElem, Lambda, IdxT, TPB>(out, in, idx, len, op,
stream);
} else if (1 / MaxPerElem) {
scatterImpl<DataT, 1 / MaxPerElem, Lambda, IdxT, TPB>(out, in, idx, len, op,
stream);
} else {
scatterImpl<DataT, 1, Lambda, IdxT, TPB>(out, in, idx, len, op, stream);
}
}

} // namespace raft
Loading