Skip to content

Commit

Permalink
[REVIEW] Moving cuML prims to RAFT (#65)
Browse files Browse the repository at this point in the history
* rng working

* linalg tests working

* got all tests working

* changelog

* clang format

* updated from cuml

* style check

* added leftover prims

* more style format

* adding cuda check include files

* its actually in cudart_utils.h

Co-authored-by: Dante Gama Dessavre <[email protected]>
  • Loading branch information
divyegala and dantegd authored Oct 23, 2020
1 parent 9ed6adf commit eee0193
Show file tree
Hide file tree
Showing 68 changed files with 11,292 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# RAFT 0.17.0 (Date TBD)

## New Features
- PR #65: Adding cuml prims that break circular dependency between cuml and cumlprims projects

## Improvements
- PR #73: Move DistanceType enum from cuML to RAFT
Expand Down
26 changes: 26 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -235,8 +235,34 @@ if(BUILD_RAFT_TESTS)
test/cudart_utils.cpp
test/handle.cpp
test/integer_utils.cpp
test/linalg/add.cu
test/linalg/binary_op.cu
test/linalg/coalesced_reduction.cu
test/linalg/divide.cu
test/linalg/eig.cu
test/linalg/eig_sel.cu
test/linalg/gemm_layout.cu
test/linalg/map_then_reduce.cu
test/linalg/matrix_vector_op.cu
test/linalg/multiply.cu
test/linalg/norm.cu
test/linalg/reduce.cu
test/linalg/strided_reduction.cu
test/linalg/subtract.cu
test/linalg/svd.cu
test/linalg/transpose.cu
test/linalg/unary_op.cu
test/matrix/math.cu
test/matrix/matrix.cu
test/mr/device/buffer.cpp
test/mr/host/buffer.cpp
test/random/rng.cu
test/random/rng_int.cu
test/random/sample_without_replacement.cu
test/stats/mean.cu
test/stats/mean_center.cu
test/stats/stddev.cu
test/stats/sum.cu
test/test.cpp
test/spectral_matrix.cu
test/eigen_solvers.cu
Expand Down
49 changes: 49 additions & 0 deletions cpp/include/raft/common/cub_wrappers.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <cub/cub.cuh>
#include <raft/mr/device/buffer.hpp>

namespace raft {

/**
* @brief Convenience wrapper over cub's SortPairs method
* @tparam KeyT key type
* @tparam ValueT value type
* @param workspace workspace buffer which will get resized if not enough space
* @param inKeys input keys array
* @param outKeys output keys array
* @param inVals input values array
* @param outVals output values array
* @param len array length
* @param stream cuda stream
*/
template <typename KeyT, typename ValueT>
void sortPairs(raft::mr::device::buffer<char> &workspace, const KeyT *inKeys,
KeyT *outKeys, const ValueT *inVals, ValueT *outVals, int len,
cudaStream_t stream) {
size_t worksize;
cub::DeviceRadixSort::SortPairs(nullptr, worksize, inKeys, outKeys, inVals,
outVals, len, 0, sizeof(KeyT) * 8, stream);
workspace.resize(worksize, stream);
cub::DeviceRadixSort::SortPairs(workspace.data(), worksize, inKeys, outKeys,
inVals, outVals, len, 0, sizeof(KeyT) * 8,
stream);
}

} // namespace raft
97 changes: 97 additions & 0 deletions cpp/include/raft/common/scatter.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <raft/cuda_utils.cuh>
#include <raft/vectorized.cuh>

namespace raft {

template <typename DataT, int VecLen, typename Lambda, typename IdxT>
__global__ void scatterKernel(DataT *out, const DataT *in, const IdxT *idx,
IdxT len, Lambda op) {
typedef TxN_t<DataT, VecLen> DataVec;
typedef TxN_t<IdxT, VecLen> IdxVec;
IdxT tid = threadIdx.x + ((IdxT)blockIdx.x * blockDim.x);
tid *= VecLen;
if (tid >= len) return;
IdxVec idxIn;
idxIn.load(idx, tid);
DataVec dataIn;
#pragma unroll
for (int i = 0; i < VecLen; ++i) {
auto inPos = idxIn.val.data[i];
dataIn.val.data[i] = op(in[inPos], tid + i);
}
dataIn.store(out, tid);
}

template <typename DataT, int VecLen, typename Lambda, typename IdxT, int TPB>
void scatterImpl(DataT *out, const DataT *in, const IdxT *idx, IdxT len,
Lambda op, cudaStream_t stream) {
const IdxT nblks = raft::ceildiv(VecLen ? len / VecLen : len, (IdxT)TPB);
scatterKernel<DataT, VecLen, Lambda, IdxT>
<<<nblks, TPB, 0, stream>>>(out, in, idx, len, op);
CUDA_CHECK(cudaGetLastError());
}

/**
* @brief Performs scatter operation based on the input indexing array
* @tparam DataT data type whose array gets scattered
* @tparam IdxT indexing type
* @tparam TPB threads-per-block in the final kernel launched
* @tparam Lambda the device-lambda performing a unary operation on the loaded
* data before it gets scattered
* @param out the output array
* @param in the input array
* @param idx the indexing array
* @param len number of elements in the input array
* @param stream cuda stream where to launch work
* @param op the device-lambda with signature `DataT func(DataT, IdxT);`. This
* will be applied to every element before scattering it to the right location.
* The second param in this method will be the destination index.
*/
template <typename DataT, typename IdxT,
typename Lambda = raft::Nop<DataT, IdxT>, int TPB = 256>
void scatter(DataT *out, const DataT *in, const IdxT *idx, IdxT len,
cudaStream_t stream, Lambda op = raft::Nop<DataT, IdxT>()) {
if (len <= 0) return;
constexpr size_t DataSize = sizeof(DataT);
constexpr size_t IdxSize = sizeof(IdxT);
constexpr size_t MaxPerElem = DataSize > IdxSize ? DataSize : IdxSize;
size_t bytes = len * MaxPerElem;
if (16 / MaxPerElem && bytes % 16 == 0) {
scatterImpl<DataT, 16 / MaxPerElem, Lambda, IdxT, TPB>(out, in, idx, len,
op, stream);
} else if (8 / MaxPerElem && bytes % 8 == 0) {
scatterImpl<DataT, 8 / MaxPerElem, Lambda, IdxT, TPB>(out, in, idx, len, op,
stream);
} else if (4 / MaxPerElem && bytes % 4 == 0) {
scatterImpl<DataT, 4 / MaxPerElem, Lambda, IdxT, TPB>(out, in, idx, len, op,
stream);
} else if (2 / MaxPerElem && bytes % 2 == 0) {
scatterImpl<DataT, 2 / MaxPerElem, Lambda, IdxT, TPB>(out, in, idx, len, op,
stream);
} else if (1 / MaxPerElem) {
scatterImpl<DataT, 1 / MaxPerElem, Lambda, IdxT, TPB>(out, in, idx, len, op,
stream);
} else {
scatterImpl<DataT, 1, Lambda, IdxT, TPB>(out, in, idx, len, op, stream);
}
}

} // namespace raft
Loading

0 comments on commit eee0193

Please sign in to comment.