From ee8f2c06b9e3ab74a8c6fc1236976b4dff22b3b8 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 6 Apr 2022 17:21:47 -0400 Subject: [PATCH 1/5] Adding prims benchmarks --- cpp/bench/CMakeLists.txt | 13 +++ cpp/bench/distance/distance_common.cuh | 92 +++++++++++++++++++ cpp/bench/distance/distance_cosine.cu | 23 +++++ cpp/bench/distance/distance_exp_l2.cu | 24 +++++ cpp/bench/distance/distance_l1.cu | 23 +++++ cpp/bench/distance/distance_unexp_l2.cu | 24 +++++ cpp/bench/linalg/add.cu | 52 +++++++++++ cpp/bench/linalg/map_then_reduce.cu | 65 ++++++++++++++ cpp/bench/linalg/matrix_vector_op.cu | 75 ++++++++++++++++ cpp/bench/random/make_blobs.cu | 77 ++++++++++++++++ cpp/bench/random/permute.cu | 73 +++++++++++++++ cpp/bench/random/rng.cu | 112 ++++++++++++++++++++++++ cpp/bench/spatial/fused_l2_nn.cu | 89 +++++++++++++++++++ 13 files changed, 742 insertions(+) create mode 100644 cpp/bench/distance/distance_common.cuh create mode 100644 cpp/bench/distance/distance_cosine.cu create mode 100644 cpp/bench/distance/distance_exp_l2.cu create mode 100644 cpp/bench/distance/distance_l1.cu create mode 100644 cpp/bench/distance/distance_unexp_l2.cu create mode 100644 cpp/bench/linalg/add.cu create mode 100644 cpp/bench/linalg/map_then_reduce.cu create mode 100644 cpp/bench/linalg/matrix_vector_op.cu create mode 100644 cpp/bench/random/make_blobs.cu create mode 100644 cpp/bench/random/permute.cu create mode 100644 cpp/bench/random/rng.cu create mode 100644 cpp/bench/spatial/fused_l2_nn.cu diff --git a/cpp/bench/CMakeLists.txt b/cpp/bench/CMakeLists.txt index 5214047571..7a0f1d5201 100644 --- a/cpp/bench/CMakeLists.txt +++ b/cpp/bench/CMakeLists.txt @@ -18,7 +18,18 @@ set(RAFT_CPP_BENCH_TARGET "bench_raft") # (please keep the filenames in alphabetical order) add_executable(${RAFT_CPP_BENCH_TARGET} + bench/distance/distance_cosine.cu + bench/distance/distance_exp_l2.cu + bench/distance/distance_l1.cu + bench/distance/distance_unexp_l2.cu + bench/linalg/add.cu + bench/linalg/map_then_reduce.cu + bench/linalg/matrix_vector_op.cu bench/linalg/reduce.cu + bench/random/make_blobs.cu + bench/random/permute.cu + bench/random/rng.cu + bench/spatial/fused_l2_nn.cu bench/spatial/selection.cu bench/main.cpp ) @@ -47,6 +58,8 @@ target_include_directories(${RAFT_CPP_BENCH_TARGET} target_link_libraries(${RAFT_CPP_BENCH_TARGET} PRIVATE raft::raft + raft::distance + raft::nn faiss::faiss benchmark::benchmark $ diff --git a/cpp/bench/distance/distance_common.cuh b/cpp/bench/distance/distance_common.cuh new file mode 100644 index 0000000000..b4ab4da953 --- /dev/null +++ b/cpp/bench/distance/distance_common.cuh @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include + +namespace raft::bench::distance { + +struct distance_inputs { + int m, n, k; + bool isRowMajor; +}; // struct distance_inputs + +template +struct distance : public fixture { + distance(const distance_inputs& p) + : params(p), x(p.m * p.k, stream), y(p.n * p.k, stream), out(p.m * p.n, stream), workspace(0, stream) + { + RAFT_CUDA_TRY(cudaMemsetAsync(x.data(), 0, x.size() * sizeof(T), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(y.data(), 0, y.size() * sizeof(T), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(out.data(), 0, out.size() * sizeof(T), stream)); + worksize = raft::distance::getWorkspaceSize( + x.data(), y.data(), params.m, params.n, params.k); + workspace.resize(worksize, stream); + + } + + void run_benchmark(::benchmark::State& state) override + { + loop_on_state(state, [this]() { + raft::distance::distance(x.data(), + y.data(), + out.data(), + params.m, + params.n, + params.k, + (void*)workspace.data(), + worksize, + stream, + params.isRowMajor); + }); + } + + private: + distance_inputs params; + rmm::device_uvector x, y, out; + rmm::device_uvector workspace; + size_t worksize; +}; // struct Distance + +const std::vector dist_input_vecs +{ + {32, 16384, 16384, true}, {64, 16384, 16384, true}, {128, 16384, 16384, true}, + {256, 16384, 16384, true}, {512, 16384, 16384, true}, {1024, 16384, 16384, true}, + {16384, 32, 16384, true}, {16384, 64, 16384, true}, {16384, 128, 16384, true}, + {16384, 256, 16384, true}, {16384, 512, 16384, true}, {16384, 1024, 16384, true}, + {16384, 16384, 32, true}, {16384, 16384, 64, true}, {16384, 16384, 128, true}, + {16384, 16384, 256, true}, {16384, 16384, 512, true}, {16384, 16384, 1024, true}, + {16384, 16384, 16384, true}, {32, 16384, 16384, false}, {64, 16384, 16384, false}, + {128, 16384, 16384, false}, {256, 16384, 16384, false}, {512, 16384, 16384, false}, + {1024, 16384, 16384, false}, {16384, 32, 16384, false}, {16384, 64, 16384, false}, + {16384, 128, 16384, false}, {16384, 256, 16384, false}, {16384, 512, 16384, false}, + {16384, 1024, 16384, false}, {16384, 16384, 32, false}, {16384, 16384, 64, false}, + {16384, 16384, 128, false}, {16384, 16384, 256, false}, {16384, 16384, 512, false}, + {16384, 16384, 1024, false}, {16384, 16384, 16384, false} + +}; + +#define DIST_BENCH_REGISTER(Name, Metric) \ + using Name##F = distance; \ + RAFT_BENCH_REGISTER(Name##F, "", dist_input_vecs); \ + using Name##D = distance; \ + RAFT_BENCH_REGISTER(Name##D, "", dist_input_vecs); + +} // namespace raft::bench::distance diff --git a/cpp/bench/distance/distance_cosine.cu b/cpp/bench/distance/distance_cosine.cu new file mode 100644 index 0000000000..20f29ce4ef --- /dev/null +++ b/cpp/bench/distance/distance_cosine.cu @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "distance_common.cuh" + +namespace raft::bench::distance { + +DIST_BENCH_REGISTER(DistanceCosine, raft::distance::DistanceType::CosineExpanded); + +} // namespace raft::bench::distance diff --git a/cpp/bench/distance/distance_exp_l2.cu b/cpp/bench/distance/distance_exp_l2.cu new file mode 100644 index 0000000000..5a3af17193 --- /dev/null +++ b/cpp/bench/distance/distance_exp_l2.cu @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "distance_common.cuh" + +namespace raft::bench::distance { + +DIST_BENCH_REGISTER(DistanceL2Sq, raft::distance::DistanceType::L2Expanded); +DIST_BENCH_REGISTER(DistanceL2Sqrt, raft::distance::DistanceType::L2SqrtExpanded); + +} // namespace raft::bench::distance diff --git a/cpp/bench/distance/distance_l1.cu b/cpp/bench/distance/distance_l1.cu new file mode 100644 index 0000000000..2ad7d5e957 --- /dev/null +++ b/cpp/bench/distance/distance_l1.cu @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "distance_common.cuh" + +namespace raft::bench::distance { + +DIST_BENCH_REGISTER(DistanceL1, raft::distance::DistanceType::L1); + +} // namespace raft::bench::distance diff --git a/cpp/bench/distance/distance_unexp_l2.cu b/cpp/bench/distance/distance_unexp_l2.cu new file mode 100644 index 0000000000..406aca2378 --- /dev/null +++ b/cpp/bench/distance/distance_unexp_l2.cu @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "distance_common.cuh" + +namespace raft::bench::distance { + +DIST_BENCH_REGISTER(DistanceUnexpL2Sq, raft::distance::DistanceType::L2Unexpanded); +DIST_BENCH_REGISTER(DistanceUnexpL2Sqrt, raft::distance::DistanceType::L2SqrtUnexpanded); + +} // namespace raft::bench::distance diff --git a/cpp/bench/linalg/add.cu b/cpp/bench/linalg/add.cu new file mode 100644 index 0000000000..4c5a0407d4 --- /dev/null +++ b/cpp/bench/linalg/add.cu @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +namespace raft::bench::linalg { + +struct add_inputs { + int len; +}; // struct add_inputs + +template +struct add : public fixture { + add(const add_inputs& p) : params(p), ptr0(p.len, stream), ptr1(p.len, stream) {} + + void run_benchmark(::benchmark::State& state) override + { + loop_on_state(state, [this]() { raft::linalg::add(ptr0.data(), ptr0.data(), ptr1.data(), params.len, stream); }); + } + + private: + add_inputs params; + rmm::device_uvector ptr0, ptr1; +}; // struct add + +const std::vector add_input_vecs +{ + {256 * 1024 * 1024}, + {256 * 1024 * 1024 + 2}, + {256 * 1024 * 1024 + 1} + +}; + +RAFT_BENCH_REGISTER(add, "", add_input_vecs); +RAFT_BENCH_REGISTER(add, "", add_input_vecs); + +} // namespace raft::bench::linalg diff --git a/cpp/bench/linalg/map_then_reduce.cu b/cpp/bench/linalg/map_then_reduce.cu new file mode 100644 index 0000000000..fd60b97b44 --- /dev/null +++ b/cpp/bench/linalg/map_then_reduce.cu @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +namespace raft::bench::linalg { + +struct map_then_reduce_inputs { + int len; +}; + +template +struct Identity { + HDI Type operator()(Type a) { return a; } +}; + +template +struct map_then_reduce : public fixture { + map_then_reduce(const map_then_reduce_inputs& p) : params(p), in(p.len, stream), out(1, stream) {} + + void run_benchmark(::benchmark::State& state) override + { + loop_on_state(state, [this]() { + raft::linalg::mapThenSumReduce(out.data(), params.len, Identity(), stream, in.data()); + }); + } + + private: + map_then_reduce_inputs params; + rmm::device_uvector out, in; +}; // struct MapThenReduce + +const std::vector map_then_reduce_input_vecs +{ + {1024 * 1024}, + {32 * 1024 * 1024}, + {1024 * 1024 * 1024}, + {1024 * 1024 + 2}, + {32 * 1024 * 1024 + 2}, + {1024 * 1024 * 1024 + 2}, + {1024 * 1024 + 1}, + {32 * 1024 * 1024 + 1}, + {1024 * 1024 * 1024 + 1}, + +}; + +RAFT_BENCH_REGISTER(map_then_reduce, "", map_then_reduce_input_vecs); +RAFT_BENCH_REGISTER(map_then_reduce, "",map_then_reduce_input_vecs); + +} // namespace raft::bench::linalg diff --git a/cpp/bench/linalg/matrix_vector_op.cu b/cpp/bench/linalg/matrix_vector_op.cu new file mode 100644 index 0000000000..55a1a6d3ab --- /dev/null +++ b/cpp/bench/linalg/matrix_vector_op.cu @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +namespace raft::bench::linalg { + +struct mat_vec_op_inputs { + int rows, cols; + bool rowMajor, bcastAlongRows; +}; // struct mat_vec_op_inputs + +template +struct mat_vec_op : public fixture { + mat_vec_op(const mat_vec_op_inputs& p) : params(p), out(p.rows * p.cols, stream), in(p.rows*p.cols, stream), vec(p.bcastAlongRows ? p.cols : p.rows, stream) {} + + void run_benchmark(::benchmark::State& state) override + { + loop_on_state(state, [this]() { + raft::linalg::matrixVectorOp(out.data(), + in.data(), + vec.data(), + params.cols, + params.rows, + params.rowMajor, + params.bcastAlongRows, + raft::Sum(), + stream); + }); + } + + private: + mat_vec_op_inputs params; + rmm::device_uvector out, in, vec; +}; // struct MatVecOp + +const std::vector mat_vec_op_input_vecs +{ + {1024, 128, true, true}, {1024 * 1024, 128, true, true}, + {1024, 128 + 2, true, true}, {1024 * 1024, 128 + 2, true, true}, + {1024, 128 + 1, true, true}, {1024 * 1024, 128 + 1, true, true}, + + {1024, 128, true, false}, {1024 * 1024, 128, true, false}, + {1024, 128 + 2, true, false}, {1024 * 1024, 128 + 2, true, false}, + {1024, 128 + 1, true, false}, {1024 * 1024, 128 + 1, true, false}, + + {1024, 128, false, false}, {1024 * 1024, 128, false, false}, + {1024, 128 + 2, false, false}, {1024 * 1024, 128 + 2, false, false}, + {1024, 128 + 1, false, false}, {1024 * 1024, 128 + 1, false, false}, + + {1024, 128, false, true}, {1024 * 1024, 128, false, true}, + {1024, 128 + 2, false, true}, {1024 * 1024, 128 + 2, false, true}, + {1024, 128 + 1, false, true}, {1024 * 1024, 128 + 1, false, true}, + +}; + +RAFT_BENCH_REGISTER(mat_vec_op, "", mat_vec_op_input_vecs); +RAFT_BENCH_REGISTER(mat_vec_op, "", mat_vec_op_input_vecs); + +} // namespace raft::bench::linalg diff --git a/cpp/bench/random/make_blobs.cu b/cpp/bench/random/make_blobs.cu new file mode 100644 index 0000000000..d88713285c --- /dev/null +++ b/cpp/bench/random/make_blobs.cu @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +namespace raft::bench::random { +struct make_blobs_inputs { + int rows, cols, clusters; + bool row_major; +}; // struct make_blobs_inputs + +template +struct make_blobs : public fixture { + make_blobs(const make_blobs_inputs& p) + : params(p), data(p.rows * p.cols, stream), labels(p.rows, stream) + { + } + + void run_benchmark(::benchmark::State& state) override + { + loop_on_state(state, [this]() { + raft::random::make_blobs(data.data(), + labels.data(), + params.rows, + params.cols, + params.clusters, + this->stream, + params.row_major); + }); + } + + private: + make_blobs_inputs params; + rmm::device_uvector data; + rmm::device_uvector labels; +}; // struct MakeBlobs + +static std::vector get_make_blobs_input_vecs() +{ + std::vector out; + make_blobs_inputs p; + for (auto rows : std::vector{100000, 1000000}) { + for (auto cols : std::vector{10, 100}) { + for (auto clusters : std::vector{2, 10, 100}) { + p.rows = rows; + p.cols = cols; + p.clusters = clusters; + p.row_major = true; + out.push_back(p); + p.row_major = false; + out.push_back(p); + } + } + } + return out; +} + +RAFT_BENCH_REGISTER(make_blobs, "", get_make_blobs_input_vecs()); +RAFT_BENCH_REGISTER(make_blobs, "", get_make_blobs_input_vecs()); + +} // namespace raft::bench::random diff --git a/cpp/bench/random/permute.cu b/cpp/bench/random/permute.cu new file mode 100644 index 0000000000..046f3beab9 --- /dev/null +++ b/cpp/bench/random/permute.cu @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include + +namespace raft::bench::random { + +struct permute_inputs { + int rows, cols; + bool needPerms, needShuffle, rowMajor; +}; // struct permute_inputs + +template +struct permute : public fixture { + permute(const permute_inputs& p) : params(p), perms(p.needPerms ? p.rows : 0, stream), out(p.rows * p.cols, stream), in(p.rows*p.cols, stream) { + raft::random::Rng r(123456ULL); + r.uniform(in.data(), p.rows, T(-1.0), T(1.0), stream); + } + + void run_benchmark(::benchmark::State& state) override + { + raft::random::Rng r(123456ULL); + loop_on_state(state, [this, &r]() { + raft::random::permute(perms.data(), out.data(), in.data(), params.cols, params.rows, params.rowMajor, stream); + }); + } + + private: + permute_inputs params; + rmm::device_uvector out, in; + rmm::device_uvector perms; +}; // struct permute + +const std::vector permute_input_vecs = +{ + {32 * 1024, 128, true, true, true}, + {1024 * 1024, 128, true, true, true}, + {32 * 1024, 128 + 2, true, true, true}, + {1024 * 1024, 128 + 2, true, true, true}, + {32 * 1024, 128 + 1, true, true, true}, + {1024 * 1024, 128 + 1, true, true, true}, + + {32 * 1024, 128, true, true, false}, + {1024 * 1024, 128, true, true, false}, + {32 * 1024, 128 + 2, true, true, false}, + {1024 * 1024, 128 + 2, true, true, false}, + {32 * 1024, 128 + 1, true, true, false}, + {1024 * 1024, 128 + 1, true, true, false}, + +}; + +RAFT_BENCH_REGISTER(permute, "", permute_input_vecs); +RAFT_BENCH_REGISTER(permute, "", permute_input_vecs); + +} // namespace raft::bench::random diff --git a/cpp/bench/random/rng.cu b/cpp/bench/random/rng.cu new file mode 100644 index 0000000000..353a4aff53 --- /dev/null +++ b/cpp/bench/random/rng.cu @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include + +namespace raft::bench::random { + +enum RandomType { + RNG_Normal, + RNG_LogNormal, + RNG_Uniform, + RNG_Gumbel, + RNG_Logistic, + RNG_Exp, + RNG_Rayleigh, + RNG_Laplace, + RNG_Fill +}; // enum RandomType + +template +struct rng_inputs { + int len; + RandomType type; + raft::random::GeneratorType gtype; + T start, end; +}; // struct rng_inputs + +template +struct rng : public fixture { + rng(const rng_inputs& p) : params(p), ptr(p.len, stream) {} + + void run_benchmark(::benchmark::State& state) override + { + raft::random::Rng r(123456ULL, params.gtype); + loop_on_state(state, [this, &r]() { + switch (params.type) { + case RNG_Normal: r.normal(ptr.data(), params.len, params.start, params.end, stream); break; + case RNG_LogNormal: r.lognormal(ptr.data(), params.len, params.start, params.end, stream); break; + case RNG_Uniform: r.uniform(ptr.data(), params.len, params.start, params.end, stream); break; + case RNG_Gumbel: r.gumbel(ptr.data(), params.len, params.start, params.end, stream); break; + case RNG_Logistic: r.logistic(ptr.data(), params.len, params.start, params.end, stream); break; + case RNG_Exp: r.exponential(ptr.data(), params.len, params.start, stream); break; + case RNG_Rayleigh: r.rayleigh(ptr.data(), params.len, params.start, stream); break; + case RNG_Laplace: r.laplace(ptr.data(), params.len, params.start, params.end, stream); break; + case RNG_Fill: r.fill(ptr.data(), params.len, params.start, stream); break; + }; + }); + } + + private: + rng_inputs params; + rmm::device_uvector ptr; +}; // struct RngBench + +template +static std::vector> get_rng_input_vecs() +{ + using namespace raft::random; + return { + {1024 * 1024, RNG_Uniform, GenPhilox, T(-1.0), T(1.0)}, + {32 * 1024 * 1024, RNG_Uniform, GenPhilox, T(-1.0), T(1.0)}, + {1024 * 1024 * 1024, RNG_Uniform, GenPhilox, T(-1.0), T(1.0)}, + {1024 * 1024 + 2, RNG_Uniform, GenPhilox, T(-1.0), T(1.0)}, + {32 * 1024 * 1024 + 2, RNG_Uniform, GenPhilox, T(-1.0), T(1.0)}, + {1024 * 1024 * 1024 + 2, RNG_Uniform, GenPhilox, T(-1.0), T(1.0)}, + {1024 * 1024 + 1, RNG_Uniform, GenPhilox, T(-1.0), T(1.0)}, + {32 * 1024 * 1024 + 1, RNG_Uniform, GenPhilox, T(-1.0), T(1.0)}, + {1024 * 1024 * 1024 + 1, RNG_Uniform, GenPhilox, T(-1.0), T(1.0)}, + + {1024 * 1024, RNG_Uniform, GenPC, T(-1.0), T(1.0)}, + {32 * 1024 * 1024, RNG_Uniform, GenPC, T(-1.0), T(1.0)}, + {1024 * 1024 * 1024, RNG_Uniform, GenPC, T(-1.0), T(1.0)}, + {1024 * 1024 + 2, RNG_Uniform, GenPC, T(-1.0), T(1.0)}, + {32 * 1024 * 1024 + 2, RNG_Uniform, GenPC, T(-1.0), T(1.0)}, + {1024 * 1024 * 1024 + 2, RNG_Uniform, GenPC, T(-1.0), T(1.0)}, + {1024 * 1024 + 1, RNG_Uniform, GenPC, T(-1.0), T(1.0)}, + {32 * 1024 * 1024 + 1, RNG_Uniform, GenPC, T(-1.0), T(1.0)}, + {1024 * 1024 * 1024 + 1, RNG_Uniform, GenPC, T(-1.0), T(1.0)}, + + {1024 * 1024, RNG_Fill, GenPhilox, T(-1.0), T(1.0)}, + {32 * 1024 * 1024, RNG_Fill, GenPhilox, T(-1.0), T(1.0)}, + {1024 * 1024 * 1024, RNG_Fill, GenPhilox, T(-1.0), T(1.0)}, + {1024 * 1024 + 2, RNG_Fill, GenPhilox, T(-1.0), T(1.0)}, + {32 * 1024 * 1024 + 2, RNG_Fill, GenPhilox, T(-1.0), T(1.0)}, + {1024 * 1024 * 1024 + 2, RNG_Fill, GenPhilox, T(-1.0), T(1.0)}, + {1024 * 1024 + 1, RNG_Fill, GenPhilox, T(-1.0), T(1.0)}, + {32 * 1024 * 1024 + 1, RNG_Fill, GenPhilox, T(-1.0), T(1.0)}, + {1024 * 1024 * 1024 + 1, RNG_Fill, GenPhilox, T(-1.0), T(1.0)}, + }; +} + +RAFT_BENCH_REGISTER(rng, "", get_rng_input_vecs()); +RAFT_BENCH_REGISTER(rng, "", get_rng_input_vecs()); + +} // namespace raft::bench::random diff --git a/cpp/bench/spatial/fused_l2_nn.cu b/cpp/bench/spatial/fused_l2_nn.cu new file mode 100644 index 0000000000..bd6594d3d4 --- /dev/null +++ b/cpp/bench/spatial/fused_l2_nn.cu @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace raft::bench::spatial { + +struct fused_l2_nn_inputs { + int m, n, k; +}; // struct fused_l2_nn_inputs + +template +struct fused_l2_nn : public fixture { + fused_l2_nn(const fused_l2_nn_inputs& p) : params(p), out(p.m, stream), x(p.m*p.k, stream), y(p.n*p.k, stream), xn(p.m, stream), yn(p.n, stream), workspace(p.m, stream) { + raft::handle_t handle{stream}; + raft::random::Rng r(123456ULL); + + r.uniform(x.data(), p.m * p.k, T(-1.0), T(1.0), stream); + r.uniform(y.data(), p.n * p.k, T(-1.0), T(1.0), stream); + raft::linalg::rowNorm(xn.data(), x.data(), p.k, p.m, raft::linalg::L2Norm, true, stream); + raft::linalg::rowNorm(yn.data(), y.data(), p.k, p.n, raft::linalg::L2Norm, true, stream); + raft::distance::initialize, int>( + handle, out.data(), p.m, std::numeric_limits::max(), op); + + } + + void run_benchmark(::benchmark::State& state) override + { + loop_on_state(state, [this]() { + // it is enough to only benchmark the L2-squared metric + raft::distance::fusedL2NN, int>(out.data(), + x.data(), + y.data(), + xn.data(), + yn.data(), + params.m, + params.n, + params.k, + (void*)workspace.data(), + op, + pairRedOp, + false, + false, + stream); + }); + } + + private: + fused_l2_nn_inputs params; + rmm::device_uvector x, y, xn, yn; + rmm::device_uvector> out; + rmm::device_uvector workspace; + raft::distance::KVPMinReduce pairRedOp; + raft::distance::MinAndDistanceReduceOp op; +}; // struct FusedL2NN + +const std::vector fused_l2_nn_input_vecs = { + {32, 16384, 16384}, {64, 16384, 16384}, {128, 16384, 16384}, {256, 16384, 16384}, + {512, 16384, 16384}, {1024, 16384, 16384}, {16384, 32, 16384}, {16384, 64, 16384}, + {16384, 128, 16384}, {16384, 256, 16384}, {16384, 512, 16384}, {16384, 1024, 16384}, + {16384, 16384, 32}, {16384, 16384, 64}, {16384, 16384, 128}, {16384, 16384, 256}, + {16384, 16384, 512}, {16384, 16384, 1024}, {16384, 16384, 16384}, + +}; + +RAFT_BENCH_REGISTER(fused_l2_nn, "", fused_l2_nn_input_vecs); +RAFT_BENCH_REGISTER(fused_l2_nn, "", fused_l2_nn_input_vecs); + +} // namespace raft::bench::spatial From d52584b1e8dc6191393357bdf1261e7f40d7ae8d Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 6 Apr 2022 17:27:45 -0400 Subject: [PATCH 2/5] Fixing style of benchmarks --- cpp/bench/distance/distance_common.cuh | 56 +++++++++++++------------- cpp/bench/linalg/add.cu | 13 +++--- cpp/bench/linalg/map_then_reduce.cu | 29 +++++++------ cpp/bench/linalg/matrix_vector_op.cu | 37 +++++++++-------- cpp/bench/random/make_blobs.cu | 4 +- cpp/bench/random/permute.cu | 43 +++++++++++--------- cpp/bench/random/rng.cu | 20 ++++++--- cpp/bench/spatial/fused_l2_nn.cu | 43 +++++++++++--------- 8 files changed, 135 insertions(+), 110 deletions(-) diff --git a/cpp/bench/distance/distance_common.cuh b/cpp/bench/distance/distance_common.cuh index b4ab4da953..5817162f22 100644 --- a/cpp/bench/distance/distance_common.cuh +++ b/cpp/bench/distance/distance_common.cuh @@ -31,20 +31,23 @@ struct distance_inputs { template struct distance : public fixture { distance(const distance_inputs& p) - : params(p), x(p.m * p.k, stream), y(p.n * p.k, stream), out(p.m * p.n, stream), workspace(0, stream) + : params(p), + x(p.m * p.k, stream), + y(p.n * p.k, stream), + out(p.m * p.n, stream), + workspace(0, stream) { - RAFT_CUDA_TRY(cudaMemsetAsync(x.data(), 0, x.size() * sizeof(T), stream)); - RAFT_CUDA_TRY(cudaMemsetAsync(y.data(), 0, y.size() * sizeof(T), stream)); - RAFT_CUDA_TRY(cudaMemsetAsync(out.data(), 0, out.size() * sizeof(T), stream)); - worksize = raft::distance::getWorkspaceSize( - x.data(), y.data(), params.m, params.n, params.k); - workspace.resize(worksize, stream); - + RAFT_CUDA_TRY(cudaMemsetAsync(x.data(), 0, x.size() * sizeof(T), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(y.data(), 0, y.size() * sizeof(T), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(out.data(), 0, out.size() * sizeof(T), stream)); + worksize = raft::distance::getWorkspaceSize( + x.data(), y.data(), params.m, params.n, params.k); + workspace.resize(worksize, stream); } void run_benchmark(::benchmark::State& state) override { - loop_on_state(state, [this]() { + loop_on_state(state, [this]() { raft::distance::distance(x.data(), y.data(), out.data(), @@ -65,28 +68,27 @@ struct distance : public fixture { size_t worksize; }; // struct Distance -const std::vector dist_input_vecs -{ - {32, 16384, 16384, true}, {64, 16384, 16384, true}, {128, 16384, 16384, true}, - {256, 16384, 16384, true}, {512, 16384, 16384, true}, {1024, 16384, 16384, true}, - {16384, 32, 16384, true}, {16384, 64, 16384, true}, {16384, 128, 16384, true}, - {16384, 256, 16384, true}, {16384, 512, 16384, true}, {16384, 1024, 16384, true}, - {16384, 16384, 32, true}, {16384, 16384, 64, true}, {16384, 16384, 128, true}, - {16384, 16384, 256, true}, {16384, 16384, 512, true}, {16384, 16384, 1024, true}, - {16384, 16384, 16384, true}, {32, 16384, 16384, false}, {64, 16384, 16384, false}, - {128, 16384, 16384, false}, {256, 16384, 16384, false}, {512, 16384, 16384, false}, - {1024, 16384, 16384, false}, {16384, 32, 16384, false}, {16384, 64, 16384, false}, - {16384, 128, 16384, false}, {16384, 256, 16384, false}, {16384, 512, 16384, false}, - {16384, 1024, 16384, false}, {16384, 16384, 32, false}, {16384, 16384, 64, false}, - {16384, 16384, 128, false}, {16384, 16384, 256, false}, {16384, 16384, 512, false}, - {16384, 16384, 1024, false}, {16384, 16384, 16384, false} +const std::vector dist_input_vecs{ + {32, 16384, 16384, true}, {64, 16384, 16384, true}, {128, 16384, 16384, true}, + {256, 16384, 16384, true}, {512, 16384, 16384, true}, {1024, 16384, 16384, true}, + {16384, 32, 16384, true}, {16384, 64, 16384, true}, {16384, 128, 16384, true}, + {16384, 256, 16384, true}, {16384, 512, 16384, true}, {16384, 1024, 16384, true}, + {16384, 16384, 32, true}, {16384, 16384, 64, true}, {16384, 16384, 128, true}, + {16384, 16384, 256, true}, {16384, 16384, 512, true}, {16384, 16384, 1024, true}, + {16384, 16384, 16384, true}, {32, 16384, 16384, false}, {64, 16384, 16384, false}, + {128, 16384, 16384, false}, {256, 16384, 16384, false}, {512, 16384, 16384, false}, + {1024, 16384, 16384, false}, {16384, 32, 16384, false}, {16384, 64, 16384, false}, + {16384, 128, 16384, false}, {16384, 256, 16384, false}, {16384, 512, 16384, false}, + {16384, 1024, 16384, false}, {16384, 16384, 32, false}, {16384, 16384, 64, false}, + {16384, 16384, 128, false}, {16384, 16384, 256, false}, {16384, 16384, 512, false}, + {16384, 16384, 1024, false}, {16384, 16384, 16384, false} }; -#define DIST_BENCH_REGISTER(Name, Metric) \ - using Name##F = distance; \ +#define DIST_BENCH_REGISTER(Name, Metric) \ + using Name##F = distance; \ RAFT_BENCH_REGISTER(Name##F, "", dist_input_vecs); \ - using Name##D = distance; \ + using Name##D = distance; \ RAFT_BENCH_REGISTER(Name##D, "", dist_input_vecs); } // namespace raft::bench::distance diff --git a/cpp/bench/linalg/add.cu b/cpp/bench/linalg/add.cu index 4c5a0407d4..7c651b61ed 100644 --- a/cpp/bench/linalg/add.cu +++ b/cpp/bench/linalg/add.cu @@ -30,19 +30,18 @@ struct add : public fixture { void run_benchmark(::benchmark::State& state) override { - loop_on_state(state, [this]() { raft::linalg::add(ptr0.data(), ptr0.data(), ptr1.data(), params.len, stream); }); + loop_on_state(state, [this]() { + raft::linalg::add(ptr0.data(), ptr0.data(), ptr1.data(), params.len, stream); + }); } private: add_inputs params; - rmm::device_uvector ptr0, ptr1; + rmm::device_uvector ptr0, ptr1; }; // struct add -const std::vector add_input_vecs -{ - {256 * 1024 * 1024}, - {256 * 1024 * 1024 + 2}, - {256 * 1024 * 1024 + 1} +const std::vector add_input_vecs{ + {256 * 1024 * 1024}, {256 * 1024 * 1024 + 2}, {256 * 1024 * 1024 + 1} }; diff --git a/cpp/bench/linalg/map_then_reduce.cu b/cpp/bench/linalg/map_then_reduce.cu index fd60b97b44..7eeb4a79b6 100644 --- a/cpp/bench/linalg/map_then_reduce.cu +++ b/cpp/bench/linalg/map_then_reduce.cu @@ -31,35 +31,34 @@ struct Identity { template struct map_then_reduce : public fixture { - map_then_reduce(const map_then_reduce_inputs& p) : params(p), in(p.len, stream), out(1, stream) {} + map_then_reduce(const map_then_reduce_inputs& p) : params(p), in(p.len, stream), out(1, stream) {} void run_benchmark(::benchmark::State& state) override { - loop_on_state(state, [this]() { + loop_on_state(state, [this]() { raft::linalg::mapThenSumReduce(out.data(), params.len, Identity(), stream, in.data()); }); } private: - map_then_reduce_inputs params; + map_then_reduce_inputs params; rmm::device_uvector out, in; }; // struct MapThenReduce -const std::vector map_then_reduce_input_vecs -{ - {1024 * 1024}, - {32 * 1024 * 1024}, - {1024 * 1024 * 1024}, - {1024 * 1024 + 2}, - {32 * 1024 * 1024 + 2}, - {1024 * 1024 * 1024 + 2}, - {1024 * 1024 + 1}, - {32 * 1024 * 1024 + 1}, - {1024 * 1024 * 1024 + 1}, +const std::vector map_then_reduce_input_vecs{ + {1024 * 1024}, + {32 * 1024 * 1024}, + {1024 * 1024 * 1024}, + {1024 * 1024 + 2}, + {32 * 1024 * 1024 + 2}, + {1024 * 1024 * 1024 + 2}, + {1024 * 1024 + 1}, + {32 * 1024 * 1024 + 1}, + {1024 * 1024 * 1024 + 1}, }; RAFT_BENCH_REGISTER(map_then_reduce, "", map_then_reduce_input_vecs); -RAFT_BENCH_REGISTER(map_then_reduce, "",map_then_reduce_input_vecs); +RAFT_BENCH_REGISTER(map_then_reduce, "", map_then_reduce_input_vecs); } // namespace raft::bench::linalg diff --git a/cpp/bench/linalg/matrix_vector_op.cu b/cpp/bench/linalg/matrix_vector_op.cu index 55a1a6d3ab..d3a53ea345 100644 --- a/cpp/bench/linalg/matrix_vector_op.cu +++ b/cpp/bench/linalg/matrix_vector_op.cu @@ -27,11 +27,17 @@ struct mat_vec_op_inputs { template struct mat_vec_op : public fixture { - mat_vec_op(const mat_vec_op_inputs& p) : params(p), out(p.rows * p.cols, stream), in(p.rows*p.cols, stream), vec(p.bcastAlongRows ? p.cols : p.rows, stream) {} + mat_vec_op(const mat_vec_op_inputs& p) + : params(p), + out(p.rows * p.cols, stream), + in(p.rows * p.cols, stream), + vec(p.bcastAlongRows ? p.cols : p.rows, stream) + { + } void run_benchmark(::benchmark::State& state) override { - loop_on_state(state, [this]() { + loop_on_state(state, [this]() { raft::linalg::matrixVectorOp(out.data(), in.data(), vec.data(), @@ -49,23 +55,22 @@ struct mat_vec_op : public fixture { rmm::device_uvector out, in, vec; }; // struct MatVecOp -const std::vector mat_vec_op_input_vecs -{ - {1024, 128, true, true}, {1024 * 1024, 128, true, true}, - {1024, 128 + 2, true, true}, {1024 * 1024, 128 + 2, true, true}, - {1024, 128 + 1, true, true}, {1024 * 1024, 128 + 1, true, true}, +const std::vector mat_vec_op_input_vecs{ + {1024, 128, true, true}, {1024 * 1024, 128, true, true}, + {1024, 128 + 2, true, true}, {1024 * 1024, 128 + 2, true, true}, + {1024, 128 + 1, true, true}, {1024 * 1024, 128 + 1, true, true}, - {1024, 128, true, false}, {1024 * 1024, 128, true, false}, - {1024, 128 + 2, true, false}, {1024 * 1024, 128 + 2, true, false}, - {1024, 128 + 1, true, false}, {1024 * 1024, 128 + 1, true, false}, + {1024, 128, true, false}, {1024 * 1024, 128, true, false}, + {1024, 128 + 2, true, false}, {1024 * 1024, 128 + 2, true, false}, + {1024, 128 + 1, true, false}, {1024 * 1024, 128 + 1, true, false}, - {1024, 128, false, false}, {1024 * 1024, 128, false, false}, - {1024, 128 + 2, false, false}, {1024 * 1024, 128 + 2, false, false}, - {1024, 128 + 1, false, false}, {1024 * 1024, 128 + 1, false, false}, + {1024, 128, false, false}, {1024 * 1024, 128, false, false}, + {1024, 128 + 2, false, false}, {1024 * 1024, 128 + 2, false, false}, + {1024, 128 + 1, false, false}, {1024 * 1024, 128 + 1, false, false}, - {1024, 128, false, true}, {1024 * 1024, 128, false, true}, - {1024, 128 + 2, false, true}, {1024 * 1024, 128 + 2, false, true}, - {1024, 128 + 1, false, true}, {1024 * 1024, 128 + 1, false, true}, + {1024, 128, false, true}, {1024 * 1024, 128, false, true}, + {1024, 128 + 2, false, true}, {1024 * 1024, 128 + 2, false, true}, + {1024, 128 + 1, false, true}, {1024 * 1024, 128 + 1, false, true}, }; diff --git a/cpp/bench/random/make_blobs.cu b/cpp/bench/random/make_blobs.cu index d88713285c..c449223040 100644 --- a/cpp/bench/random/make_blobs.cu +++ b/cpp/bench/random/make_blobs.cu @@ -27,14 +27,14 @@ struct make_blobs_inputs { template struct make_blobs : public fixture { - make_blobs(const make_blobs_inputs& p) + make_blobs(const make_blobs_inputs& p) : params(p), data(p.rows * p.cols, stream), labels(p.rows, stream) { } void run_benchmark(::benchmark::State& state) override { - loop_on_state(state, [this]() { + loop_on_state(state, [this]() { raft::random::make_blobs(data.data(), labels.data(), params.rows, diff --git a/cpp/bench/random/permute.cu b/cpp/bench/random/permute.cu index 046f3beab9..9ec9fb2cc9 100644 --- a/cpp/bench/random/permute.cu +++ b/cpp/bench/random/permute.cu @@ -30,16 +30,22 @@ struct permute_inputs { template struct permute : public fixture { - permute(const permute_inputs& p) : params(p), perms(p.needPerms ? p.rows : 0, stream), out(p.rows * p.cols, stream), in(p.rows*p.cols, stream) { - raft::random::Rng r(123456ULL); - r.uniform(in.data(), p.rows, T(-1.0), T(1.0), stream); + permute(const permute_inputs& p) + : params(p), + perms(p.needPerms ? p.rows : 0, stream), + out(p.rows * p.cols, stream), + in(p.rows * p.cols, stream) + { + raft::random::Rng r(123456ULL); + r.uniform(in.data(), p.rows, T(-1.0), T(1.0), stream); } void run_benchmark(::benchmark::State& state) override { raft::random::Rng r(123456ULL); - loop_on_state(state, [this, &r]() { - raft::random::permute(perms.data(), out.data(), in.data(), params.cols, params.rows, params.rowMajor, stream); + loop_on_state(state, [this, &r]() { + raft::random::permute( + perms.data(), out.data(), in.data(), params.cols, params.rows, params.rowMajor, stream); }); } @@ -49,21 +55,20 @@ struct permute : public fixture { rmm::device_uvector perms; }; // struct permute -const std::vector permute_input_vecs = -{ - {32 * 1024, 128, true, true, true}, - {1024 * 1024, 128, true, true, true}, - {32 * 1024, 128 + 2, true, true, true}, - {1024 * 1024, 128 + 2, true, true, true}, - {32 * 1024, 128 + 1, true, true, true}, - {1024 * 1024, 128 + 1, true, true, true}, +const std::vector permute_input_vecs = { + {32 * 1024, 128, true, true, true}, + {1024 * 1024, 128, true, true, true}, + {32 * 1024, 128 + 2, true, true, true}, + {1024 * 1024, 128 + 2, true, true, true}, + {32 * 1024, 128 + 1, true, true, true}, + {1024 * 1024, 128 + 1, true, true, true}, - {32 * 1024, 128, true, true, false}, - {1024 * 1024, 128, true, true, false}, - {32 * 1024, 128 + 2, true, true, false}, - {1024 * 1024, 128 + 2, true, true, false}, - {32 * 1024, 128 + 1, true, true, false}, - {1024 * 1024, 128 + 1, true, true, false}, + {32 * 1024, 128, true, true, false}, + {1024 * 1024, 128, true, true, false}, + {32 * 1024, 128 + 2, true, true, false}, + {1024 * 1024, 128 + 2, true, true, false}, + {32 * 1024, 128 + 1, true, true, false}, + {1024 * 1024, 128 + 1, true, true, false}, }; diff --git a/cpp/bench/random/rng.cu b/cpp/bench/random/rng.cu index 353a4aff53..942606cddf 100644 --- a/cpp/bench/random/rng.cu +++ b/cpp/bench/random/rng.cu @@ -44,21 +44,29 @@ struct rng_inputs { template struct rng : public fixture { - rng(const rng_inputs& p) : params(p), ptr(p.len, stream) {} + rng(const rng_inputs& p) : params(p), ptr(p.len, stream) {} void run_benchmark(::benchmark::State& state) override { raft::random::Rng r(123456ULL, params.gtype); - loop_on_state(state, [this, &r]() { + loop_on_state(state, [this, &r]() { switch (params.type) { case RNG_Normal: r.normal(ptr.data(), params.len, params.start, params.end, stream); break; - case RNG_LogNormal: r.lognormal(ptr.data(), params.len, params.start, params.end, stream); break; - case RNG_Uniform: r.uniform(ptr.data(), params.len, params.start, params.end, stream); break; + case RNG_LogNormal: + r.lognormal(ptr.data(), params.len, params.start, params.end, stream); + break; + case RNG_Uniform: + r.uniform(ptr.data(), params.len, params.start, params.end, stream); + break; case RNG_Gumbel: r.gumbel(ptr.data(), params.len, params.start, params.end, stream); break; - case RNG_Logistic: r.logistic(ptr.data(), params.len, params.start, params.end, stream); break; + case RNG_Logistic: + r.logistic(ptr.data(), params.len, params.start, params.end, stream); + break; case RNG_Exp: r.exponential(ptr.data(), params.len, params.start, stream); break; case RNG_Rayleigh: r.rayleigh(ptr.data(), params.len, params.start, stream); break; - case RNG_Laplace: r.laplace(ptr.data(), params.len, params.start, params.end, stream); break; + case RNG_Laplace: + r.laplace(ptr.data(), params.len, params.start, params.end, stream); + break; case RNG_Fill: r.fill(ptr.data(), params.len, params.start, stream); break; }; }); diff --git a/cpp/bench/spatial/fused_l2_nn.cu b/cpp/bench/spatial/fused_l2_nn.cu index bd6594d3d4..c77f727bd7 100644 --- a/cpp/bench/spatial/fused_l2_nn.cu +++ b/cpp/bench/spatial/fused_l2_nn.cu @@ -31,22 +31,29 @@ struct fused_l2_nn_inputs { template struct fused_l2_nn : public fixture { - fused_l2_nn(const fused_l2_nn_inputs& p) : params(p), out(p.m, stream), x(p.m*p.k, stream), y(p.n*p.k, stream), xn(p.m, stream), yn(p.n, stream), workspace(p.m, stream) { - raft::handle_t handle{stream}; - raft::random::Rng r(123456ULL); - - r.uniform(x.data(), p.m * p.k, T(-1.0), T(1.0), stream); - r.uniform(y.data(), p.n * p.k, T(-1.0), T(1.0), stream); - raft::linalg::rowNorm(xn.data(), x.data(), p.k, p.m, raft::linalg::L2Norm, true, stream); - raft::linalg::rowNorm(yn.data(), y.data(), p.k, p.n, raft::linalg::L2Norm, true, stream); - raft::distance::initialize, int>( - handle, out.data(), p.m, std::numeric_limits::max(), op); + fused_l2_nn(const fused_l2_nn_inputs& p) + : params(p), + out(p.m, stream), + x(p.m * p.k, stream), + y(p.n * p.k, stream), + xn(p.m, stream), + yn(p.n, stream), + workspace(p.m, stream) + { + raft::handle_t handle{stream}; + raft::random::Rng r(123456ULL); - } + r.uniform(x.data(), p.m * p.k, T(-1.0), T(1.0), stream); + r.uniform(y.data(), p.n * p.k, T(-1.0), T(1.0), stream); + raft::linalg::rowNorm(xn.data(), x.data(), p.k, p.m, raft::linalg::L2Norm, true, stream); + raft::linalg::rowNorm(yn.data(), y.data(), p.k, p.n, raft::linalg::L2Norm, true, stream); + raft::distance::initialize, int>( + handle, out.data(), p.m, std::numeric_limits::max(), op); + } void run_benchmark(::benchmark::State& state) override { - loop_on_state(state, [this]() { + loop_on_state(state, [this]() { // it is enough to only benchmark the L2-squared metric raft::distance::fusedL2NN, int>(out.data(), x.data(), @@ -66,7 +73,7 @@ struct fused_l2_nn : public fixture { } private: - fused_l2_nn_inputs params; + fused_l2_nn_inputs params; rmm::device_uvector x, y, xn, yn; rmm::device_uvector> out; rmm::device_uvector workspace; @@ -75,11 +82,11 @@ struct fused_l2_nn : public fixture { }; // struct FusedL2NN const std::vector fused_l2_nn_input_vecs = { - {32, 16384, 16384}, {64, 16384, 16384}, {128, 16384, 16384}, {256, 16384, 16384}, - {512, 16384, 16384}, {1024, 16384, 16384}, {16384, 32, 16384}, {16384, 64, 16384}, - {16384, 128, 16384}, {16384, 256, 16384}, {16384, 512, 16384}, {16384, 1024, 16384}, - {16384, 16384, 32}, {16384, 16384, 64}, {16384, 16384, 128}, {16384, 16384, 256}, - {16384, 16384, 512}, {16384, 16384, 1024}, {16384, 16384, 16384}, + {32, 16384, 16384}, {64, 16384, 16384}, {128, 16384, 16384}, {256, 16384, 16384}, + {512, 16384, 16384}, {1024, 16384, 16384}, {16384, 32, 16384}, {16384, 64, 16384}, + {16384, 128, 16384}, {16384, 256, 16384}, {16384, 512, 16384}, {16384, 1024, 16384}, + {16384, 16384, 32}, {16384, 16384, 64}, {16384, 16384, 128}, {16384, 16384, 256}, + {16384, 16384, 512}, {16384, 16384, 1024}, {16384, 16384, 16384}, }; From 973780cb5e0b3861bdff9603b4613a75c6c3e4ff Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 6 Apr 2022 18:59:16 -0400 Subject: [PATCH 3/5] Checking if libs were compiled --- cpp/bench/distance/distance_common.cuh | 3 ++- cpp/bench/spatial/fused_l2_nn.cu | 3 +++ cpp/bench/spatial/selection.cu | 4 ++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/cpp/bench/distance/distance_common.cuh b/cpp/bench/distance/distance_common.cuh index 5817162f22..dae2550326 100644 --- a/cpp/bench/distance/distance_common.cuh +++ b/cpp/bench/distance/distance_common.cuh @@ -17,8 +17,9 @@ #include #include #include +#if defined RAFT_DISTANCE_COMPILED #include - +#endif #include namespace raft::bench::distance { diff --git a/cpp/bench/spatial/fused_l2_nn.cu b/cpp/bench/spatial/fused_l2_nn.cu index c77f727bd7..2eb2097920 100644 --- a/cpp/bench/spatial/fused_l2_nn.cu +++ b/cpp/bench/spatial/fused_l2_nn.cu @@ -21,7 +21,10 @@ #include #include #include + +#if defined RAFT_NN_COMPILED #include +#endif namespace raft::bench::spatial { diff --git a/cpp/bench/spatial/selection.cu b/cpp/bench/spatial/selection.cu index 09d02940a5..841c7754ed 100644 --- a/cpp/bench/spatial/selection.cu +++ b/cpp/bench/spatial/selection.cu @@ -17,6 +17,10 @@ #include #include +#if defined RAFT_NN_COMPILED +#include +#endif + #include #include From 43de666587b0ce4ae76252ca1c0797e6b0dc6261 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 6 Apr 2022 19:22:08 -0400 Subject: [PATCH 4/5] Fixing bad typo --- cpp/include/raft/stats/detail/completeness_score.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/raft/stats/detail/completeness_score.cuh b/cpp/include/raft/stats/detail/completeness_score.cuh index 5e6fb835ef..f70eb6d104 100644 --- a/cpp/include/raft/stats/detail/completeness_score.cuh +++ b/cpp/include/raft/stats/detail/completeness_score.cuh @@ -52,9 +52,9 @@ double completeness_score(const T* truthClusterArray, double computedMI, computedEntropy; computedMI = raft::stats::mutual_info_score( - truthClusterArray, predClusterArray, size, lowerLabelRange, upperLabelRange, stream); + predClusterArray, truthClusterArray, size, lowerLabelRange, upperLabelRange, stream); computedEntropy = - raft::stats::entropy(predClusterArray, size, lowerLabelRange, upperLabelRange, stream); + raft::stats::entropy(truthClusterArray, size, lowerLabelRange, upperLabelRange, stream); double completeness; From 45cfd140928011fecdefb2e3cda586652fa9c134 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 6 Apr 2022 19:27:47 -0400 Subject: [PATCH 5/5] Fixing completeness score --- cpp/include/raft/stats/completeness_score.cuh | 6 +- cpp/include/raft/stats/completeness_score.hpp | 35 +-------- .../raft/stats/detail/completeness_score.cuh | 71 ------------------- 3 files changed, 4 insertions(+), 108 deletions(-) delete mode 100644 cpp/include/raft/stats/detail/completeness_score.cuh diff --git a/cpp/include/raft/stats/completeness_score.cuh b/cpp/include/raft/stats/completeness_score.cuh index dbfe6ce430..407986de05 100644 --- a/cpp/include/raft/stats/completeness_score.cuh +++ b/cpp/include/raft/stats/completeness_score.cuh @@ -19,7 +19,7 @@ #pragma once -#include +#include namespace raft { namespace stats { @@ -42,8 +42,8 @@ double completeness_score(const T* truthClusterArray, T upperLabelRange, cudaStream_t stream) { - return detail::completeness_score( - truthClusterArray, predClusterArray, size, lowerLabelRange, upperLabelRange, stream); + return detail::homogeneity_score( + predClusterArray, truthClusterArray, size, lowerLabelRange, upperLabelRange, stream); } }; // end namespace stats diff --git a/cpp/include/raft/stats/completeness_score.hpp b/cpp/include/raft/stats/completeness_score.hpp index 0dd97e9782..40e60c852c 100644 --- a/cpp/include/raft/stats/completeness_score.hpp +++ b/cpp/include/raft/stats/completeness_score.hpp @@ -18,39 +18,6 @@ * Please use the cuh version instead. */ -#ifndef __COMPLETENESS_SCORE_H -#define __COMPLETENESS_SCORE_H - #pragma once -#include - -namespace raft { -namespace stats { - -/** - * @brief Function to calculate the completeness score between two clusters - * - * @param truthClusterArray: the array of truth classes of type T - * @param predClusterArray: the array of predicted classes of type T - * @param size: the size of the data points of type int - * @param lowerLabelRange: the lower bound of the range of labels - * @param upperLabelRange: the upper bound of the range of labels - * @param stream: the cudaStream object - */ -template -double completeness_score(const T* truthClusterArray, - const T* predClusterArray, - int size, - T lowerLabelRange, - T upperLabelRange, - cudaStream_t stream) -{ - return detail::completeness_score( - truthClusterArray, predClusterArray, size, lowerLabelRange, upperLabelRange, stream); -} - -}; // end namespace stats -}; // end namespace raft - -#endif +#include \ No newline at end of file diff --git a/cpp/include/raft/stats/detail/completeness_score.cuh b/cpp/include/raft/stats/detail/completeness_score.cuh deleted file mode 100644 index f70eb6d104..0000000000 --- a/cpp/include/raft/stats/detail/completeness_score.cuh +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** - * @file completeness_score.cuh - * - * @brief A clustering result satisfies completeness if all the data points - * that are members of a given class are elements of the same cluster. - */ - -#pragma once - -#include -#include - -namespace raft { -namespace stats { -namespace detail { - -/** - * @brief Function to calculate the completeness score between two clusters - * - * @param truthClusterArray: the array of truth classes of type T - * @param predClusterArray: the array of predicted classes of type T - * @param size: the size of the data points of type int - * @param lowerLabelRange: the lower bound of the range of labels - * @param upperLabelRange: the upper bound of the range of labels - * @param stream: the cudaStream object - */ -template -double completeness_score(const T* truthClusterArray, - const T* predClusterArray, - int size, - T lowerLabelRange, - T upperLabelRange, - cudaStream_t stream) -{ - if (size == 0) return 1.0; - - double computedMI, computedEntropy; - - computedMI = raft::stats::mutual_info_score( - predClusterArray, truthClusterArray, size, lowerLabelRange, upperLabelRange, stream); - computedEntropy = - raft::stats::entropy(truthClusterArray, size, lowerLabelRange, upperLabelRange, stream); - - double completeness; - - if (computedEntropy) { - completeness = computedMI / computedEntropy; - } else - completeness = 1.0; - - return completeness; -} - -}; // end namespace detail -}; // end namespace stats -}; // end namespace raft