Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] Apply modifications to account for RAFT changes #4077

Merged
merged 46 commits into from
Aug 30, 2021
Merged
Show file tree
Hide file tree
Changes from 42 commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
fd2bd47
Changes for RAFT
viclafargue Jul 21, 2021
d69a5d3
Merge branch-21.10
viclafargue Jul 21, 2021
de08d9f
Changes for RAFT 2
viclafargue Jul 26, 2021
cff3f2a
Changes for RAFT 3
viclafargue Jul 27, 2021
acde3cb
Changes for RAFT 4
viclafargue Aug 2, 2021
7224d30
Changes for RAFT 5
viclafargue Aug 3, 2021
1eb60e6
Changes for RAFT 6
viclafargue Aug 4, 2021
97fe376
Fix C++ testing
viclafargue Aug 9, 2021
39432a0
Fix KNN predict_proba
viclafargue Aug 10, 2021
aa6d09d
Remove raft::allocate in src
viclafargue Aug 10, 2021
acfff28
Fix SVM issue
viclafargue Aug 10, 2021
0916689
Merge branch-21.10
viclafargue Aug 10, 2021
fd9d66b
Restore original cmake files
viclafargue Aug 10, 2021
bc0e559
Use of device_uvectors instead of RMM alloc
viclafargue Aug 13, 2021
76670e8
Requested changes 1
viclafargue Aug 16, 2021
a66acf7
Requested changes 2 & 3
viclafargue Aug 16, 2021
1d0ff8b
Requested changes 4
viclafargue Aug 16, 2021
80000ab
Requested changes 5
viclafargue Aug 17, 2021
b09ac87
Using RMM allocator back for Tensor
viclafargue Aug 17, 2021
ac30cf7
RMM allocator for SvmModel
viclafargue Aug 18, 2021
4065d53
getUniqueLabels update
viclafargue Aug 18, 2021
377eb3c
Init stream to 0
viclafargue Aug 18, 2021
6441ade
RMM allocator for ARIMA
viclafargue Aug 18, 2021
b13d3d0
Revert
viclafargue Aug 18, 2021
5f2f1d4
Merge branch-21.10
viclafargue Aug 18, 2021
75eda93
Copyright header update
viclafargue Aug 19, 2021
c99050b
Merge branch 'branch-21.10' into apply-raft-changes
viclafargue Aug 20, 2021
fe2bbe5
Fix MNMG KMeans
viclafargue Aug 23, 2021
34b5df7
Requested changes
viclafargue Aug 23, 2021
175d432
Restore cmake files
viclafargue Aug 23, 2021
89f13de
Update get_raft.cmake for CI testing
viclafargue Aug 23, 2021
6e482cb
DBG Use testing libcumlprims package
dantegd Aug 25, 2021
18f65e2
DBG Use testing libcumlprims in conda recipe
dantegd Aug 25, 2021
ef2821c
DBG Change libcumlprims pinning in conda-build
dantegd Aug 25, 2021
97ba8d0
DBG add channel to conda-build command
dantegd Aug 25, 2021
ebc26b4
DBG Undo conda-build changes
dantegd Aug 25, 2021
bc40075
FIX roll back cuml conda recipe meta change
dantegd Aug 25, 2021
21eedc5
Updating ci/gpu/build.sh
viclafargue Aug 25, 2021
0af4a25
Updating ci/gpu/build.sh
viclafargue Aug 25, 2021
e603eed
Update cppdoc
viclafargue Aug 27, 2021
6fd05bc
xfailing test_gaussian_partial_fit pytest
viclafargue Aug 27, 2021
6cf4de6
Restoring dependencies and CI
viclafargue Aug 27, 2021
711d05b
Merge remote-tracking branch 'origin/branch-21.10' into apply-raft-ch…
dantegd Aug 29, 2021
21f2e78
Merge remote-tracking branch 'origin/branch-21.10' into apply-raft-ch…
dantegd Aug 29, 2021
535f237
FIX Undo deletion of newline in build.sh
dantegd Aug 29, 2021
47c4b6b
FIX Undo deletion of newline in build.sh
dantegd Aug 29, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion ci/cpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -94,4 +94,3 @@ fi

gpuci_logger "Upload conda pkgs"
source ci/cpu/upload.sh

2 changes: 1 addition & 1 deletion ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ gpuci_mamba_retry install -c conda-forge -c rapidsai -c rapidsai-nightly -c nvid
"libcumlprims=${MINOR_VERSION}" \
"dask-cudf=${MINOR_VERSION}" \
"dask-cuda=${MINOR_VERSION}" \
"ucx-py=0.21.*" \
"ucx-py=0.22.*" \
"ucx-proc=*=gpu" \
"xgboost=1.4.2dev.rapidsai${MINOR_VERSION}" \
"rapids-build-env=${MINOR_VERSION}.*" \
Expand Down
17 changes: 7 additions & 10 deletions cpp/bench/common/ml_benchmark.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,19 +80,15 @@ struct CudaEventTimer {

private:
::benchmark::State* state;
cudaStream_t stream;
cudaStream_t stream = 0;
cudaEvent_t start;
cudaEvent_t stop;
}; // end struct CudaEventTimer

/** Main fixture to be inherited and used by all other c++ benchmarks in cuml */
class Fixture : public ::benchmark::Fixture {
public:
Fixture(const std::string& name, std::shared_ptr<raft::mr::device::allocator> _alloc)
: ::benchmark::Fixture(), d_alloc(_alloc)
{
SetName(name.c_str());
}
Fixture(const std::string& name) : ::benchmark::Fixture() { SetName(name.c_str()); }
Fixture() = delete;

void SetUp(const ::benchmark::State& state) override
Expand Down Expand Up @@ -163,19 +159,20 @@ class Fixture : public ::benchmark::Fixture {
template <typename T>
void alloc(T*& ptr, size_t len, bool init = false)
{
auto nBytes = len * sizeof(T);
ptr = (T*)d_alloc->allocate(nBytes, stream);
auto nBytes = len * sizeof(T);
auto d_alloc = rmm::mr::get_current_device_resource();
ptr = (T*)d_alloc->allocate(nBytes, stream);
if (init) { CUDA_CHECK(cudaMemsetAsync(ptr, 0, nBytes, stream)); }
}

template <typename T>
void dealloc(T* ptr, size_t len)
{
auto d_alloc = rmm::mr::get_current_device_resource();
d_alloc->deallocate(ptr, len * sizeof(T), stream);
}

std::shared_ptr<raft::mr::device::allocator> d_alloc;
cudaStream_t stream;
cudaStream_t stream = 0;
int l2CacheSize;
char* scratchBuffer;
}; // class Fixture
Expand Down
9 changes: 1 addition & 8 deletions cpp/bench/prims/add.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

#include <common/ml_benchmark.hpp>
#include <raft/linalg/add.cuh>
#include <raft/mr/device/allocator.hpp>

namespace MLCommon {
namespace Bench {
Expand All @@ -28,13 +27,7 @@ struct AddParams {

template <typename T>
struct AddBench : public Fixture {
AddBench(const std::string& name, const AddParams& p)
: Fixture(
name,
std::shared_ptr<raft::mr::device::allocator>(new raft::mr::device::default_allocator)),
params(p)
{
}
AddBench(const std::string& name, const AddParams& p) : Fixture(name), params(p) {}

protected:
void allocateBuffers(const ::benchmark::State& state) override
Expand Down
41 changes: 16 additions & 25 deletions cpp/bench/prims/distance_common.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
#include <raft/cudart_utils.h>
#include <common/ml_benchmark.hpp>
#include <raft/distance/distance.cuh>
#include <raft/mr/device/allocator.hpp>

namespace MLCommon {
namespace Bench {
Expand All @@ -31,42 +30,34 @@ struct Params {
template <typename T, raft::distance::DistanceType DType>
struct Distance : public Fixture {
Distance(const std::string& name, const Params& p)
: Fixture(
name,
std::shared_ptr<raft::mr::device::allocator>(new raft::mr::device::default_allocator)),
params(p)
: Fixture(name), params(p), x(0, stream), y(0, stream), out(0, stream), workspace(0, stream)
{
}

protected:
void allocateBuffers(const ::benchmark::State& state) override
{
alloc(x, params.m * params.k, true);
alloc(y, params.n * params.k, true);
alloc(out, params.m * params.n, true);
workspace = nullptr;
worksize = raft::distance::getWorkspaceSize<DType, T, T, T>(x, y, params.m, params.n, params.k);
if (worksize != 0) { alloc(workspace, worksize, false); }
}

void deallocateBuffers(const ::benchmark::State& state) override
{
dealloc(x, params.m * params.k);
dealloc(y, params.n * params.k);
dealloc(out, params.m * params.n);
dealloc(workspace, worksize);
x.resize(params.m * params.k, stream);
y.resize(params.n * params.k, stream);
out.resize(params.m * params.n, stream);
CUDA_CHECK(cudaMemsetAsync(x.data(), 0, x.size() * sizeof(T), stream));
CUDA_CHECK(cudaMemsetAsync(y.data(), 0, y.size() * sizeof(T), stream));
CUDA_CHECK(cudaMemsetAsync(out.data(), 0, out.size() * sizeof(T), stream));
worksize = raft::distance::getWorkspaceSize<DType, T, T, T>(
x.data(), y.data(), params.m, params.n, params.k);
workspace.resize(worksize, stream);
}

void runBenchmark(::benchmark::State& state) override
{
loopOnState(state, [this]() {
raft::distance::distance<DType, T, T, T>(x,
y,
out,
raft::distance::distance<DType, T, T, T>(x.data(),
y.data(),
out.data(),
params.m,
params.n,
params.k,
(void*)workspace,
(void*)workspace.data(),
worksize,
stream,
params.isRowMajor);
Expand All @@ -75,8 +66,8 @@ struct Distance : public Fixture {

private:
Params params;
T *x, *y, *out;
char* workspace;
rmm::device_uvector<T> x, y, out;
rmm::device_uvector<char> workspace;
size_t worksize;
}; // struct Distance

Expand Down
9 changes: 1 addition & 8 deletions cpp/bench/prims/fused_l2_nn.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#include <limits>
#include <raft/distance/fused_l2_nn.cuh>
#include <raft/linalg/norm.cuh>
#include <raft/mr/device/allocator.hpp>
#include <raft/random/rng.cuh>

namespace MLCommon {
Expand All @@ -32,13 +31,7 @@ struct FLNParams {

template <typename T>
struct FusedL2NN : public Fixture {
FusedL2NN(const std::string& name, const FLNParams& p)
: Fixture(
name,
std::shared_ptr<raft::mr::device::allocator>(new raft::mr::device::default_allocator)),
params(p)
{
}
FusedL2NN(const std::string& name, const FLNParams& p) : Fixture(name), params(p) {}

protected:
void allocateBuffers(const ::benchmark::State& state) override
Expand Down
36 changes: 13 additions & 23 deletions cpp/bench/prims/gram_matrix.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@
*/

#include <cuml/matrix/kernelparams.h>
#include <raft/linalg/cublas_wrappers.h>
#include <common/ml_benchmark.hpp>
#include <matrix/grammatrix.cuh>
#include <matrix/kernelfactory.cuh>
#include <memory>
#include <raft/mr/device/allocator.hpp>
#include <raft/random/rng.cuh>
#include <sstream>
#include <string>
Expand All @@ -42,10 +42,7 @@ struct GramTestParams {
template <typename T>
struct GramMatrix : public Fixture {
GramMatrix(const std::string& name, const GramTestParams& p)
: Fixture(
name,
std::shared_ptr<raft::mr::device::allocator>(new raft::mr::device::default_allocator)),
params(p)
: Fixture(name), params(p), A(0, stream), B(0, stream), C(0, stream)
{
std::vector<std::string> kernel_names{"linear", "poly", "rbf", "tanh"};
std::ostringstream oss;
Expand All @@ -63,31 +60,24 @@ struct GramMatrix : public Fixture {
protected:
void allocateBuffers(const ::benchmark::State& state) override
{
alloc(A, params.m * params.k);
alloc(B, params.k * params.n);
alloc(C, params.m * params.n);
A.resize(params.m * params.k, stream);
B.resize(params.k * params.n, stream);
C.resize(params.m * params.n, stream);
raft::random::Rng r(123456ULL);
r.uniform(A, params.m * params.k, T(-1.0), T(1.0), stream);
r.uniform(B, params.k * params.n, T(-1.0), T(1.0), stream);
}

void deallocateBuffers(const ::benchmark::State& state) override
{
dealloc(A, params.m * params.k);
dealloc(B, params.k * params.n);
dealloc(C, params.m * params.n);
r.uniform(A.data(), params.m * params.k, T(-1.0), T(1.0), stream);
r.uniform(B.data(), params.k * params.n, T(-1.0), T(1.0), stream);
}

void runBenchmark(::benchmark::State& state) override
{
if (!this->kernel) { state.SkipWithError("Kernel matrix is not initialized"); }
loopOnState(state, [this]() {
(*this->kernel)(this->A,
(*this->kernel)(A.data(),
this->params.m,
this->params.k,
this->B,
B.data(),
this->params.n,
this->C,
C.data(),
this->params.is_row_major,
this->stream);
});
Expand All @@ -98,9 +88,9 @@ struct GramMatrix : public Fixture {
std::unique_ptr<GramMatrixBase<T>> kernel;
GramTestParams params;

T* A; // input matrix A, size [m * k]
T* B; // input matrix B, size [n * k]
T* C; // output matrix C, size [m*n]
rmm::device_uvector<T> A; // input matrix A, size [m * k]
rmm::device_uvector<T> B; // input matrix B, size [n * k]
rmm::device_uvector<T> C; // output matrix C, size [m*n]
};

static std::vector<GramTestParams> getInputs()
Expand Down
25 changes: 7 additions & 18 deletions cpp/bench/prims/make_blobs.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
*/

#include <common/ml_benchmark.hpp>
#include <raft/mr/device/allocator.hpp>
#include <random/make_blobs.cuh>

namespace MLCommon {
Expand All @@ -30,44 +29,34 @@ struct Params {
template <typename T>
struct MakeBlobs : public Fixture {
MakeBlobs(const std::string& name, const Params& p)
: Fixture(
name,
std::shared_ptr<raft::mr::device::allocator>(new raft::mr::device::default_allocator)),
params(p)
: Fixture(name), params(p), data(0, stream), labels(0, stream)
{
}

protected:
void allocateBuffers(const ::benchmark::State& state) override
{
alloc(data, params.rows * params.cols);
alloc(labels, params.rows);
}

void deallocateBuffers(const ::benchmark::State& state) override
{
dealloc(data, params.rows * params.cols);
dealloc(labels, params.rows);
data.resize(params.rows * params.cols, stream);
labels.resize(params.rows, stream);
}

void runBenchmark(::benchmark::State& state) override
{
loopOnState(state, [this]() {
MLCommon::Random::make_blobs(data,
labels,
MLCommon::Random::make_blobs(data.data(),
labels.data(),
params.rows,
params.cols,
params.clusters,
this->d_alloc,
this->stream,
params.row_major);
});
}

private:
Params params;
T* data;
int* labels;
rmm::device_uvector<T> data;
rmm::device_uvector<int> labels;
}; // struct MakeBlobs

static std::vector<Params> getInputs()
Expand Down
9 changes: 1 addition & 8 deletions cpp/bench/prims/map_then_reduce.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

#include <common/ml_benchmark.hpp>
#include <raft/linalg/map_then_reduce.cuh>
#include <raft/mr/device/allocator.hpp>

namespace MLCommon {
namespace Bench {
Expand All @@ -33,13 +32,7 @@ struct Identity {

template <typename T>
struct MapThenReduce : public Fixture {
MapThenReduce(const std::string& name, const Params& p)
: Fixture(
name,
std::shared_ptr<raft::mr::device::allocator>(new raft::mr::device::default_allocator)),
params(p)
{
}
MapThenReduce(const std::string& name, const Params& p) : Fixture(name), params(p) {}

protected:
void allocateBuffers(const ::benchmark::State& state) override
Expand Down
9 changes: 1 addition & 8 deletions cpp/bench/prims/matrix_vector_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

#include <common/ml_benchmark.hpp>
#include <raft/linalg/matrix_vector_op.cuh>
#include <raft/mr/device/allocator.hpp>

namespace MLCommon {
namespace Bench {
Expand All @@ -29,13 +28,7 @@ struct Params {

template <typename T>
struct MatVecOp : public Fixture {
MatVecOp(const std::string& name, const Params& p)
: Fixture(
name,
std::shared_ptr<raft::mr::device::allocator>(new raft::mr::device::default_allocator)),
params(p)
{
}
MatVecOp(const std::string& name, const Params& p) : Fixture(name), params(p) {}

protected:
void allocateBuffers(const ::benchmark::State& state) override
Expand Down
8 changes: 1 addition & 7 deletions cpp/bench/prims/permute.cu
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,7 @@ struct Params {

template <typename T>
struct Permute : public Fixture {
Permute(const std::string& name, const Params& p)
: Fixture(
name,
std::shared_ptr<raft::mr::device::allocator>(new raft::mr::device::default_allocator)),
params(p)
{
}
Permute(const std::string& name, const Params& p) : Fixture(name), params(p) {}

protected:
void allocateBuffers(const ::benchmark::State& state) override
Expand Down
Loading