diff --git a/cpp/bench/CMakeLists.txt b/cpp/bench/CMakeLists.txt index 5214047571..7a0f1d5201 100644 --- a/cpp/bench/CMakeLists.txt +++ b/cpp/bench/CMakeLists.txt @@ -18,7 +18,18 @@ set(RAFT_CPP_BENCH_TARGET "bench_raft") # (please keep the filenames in alphabetical order) add_executable(${RAFT_CPP_BENCH_TARGET} + bench/distance/distance_cosine.cu + bench/distance/distance_exp_l2.cu + bench/distance/distance_l1.cu + bench/distance/distance_unexp_l2.cu + bench/linalg/add.cu + bench/linalg/map_then_reduce.cu + bench/linalg/matrix_vector_op.cu bench/linalg/reduce.cu + bench/random/make_blobs.cu + bench/random/permute.cu + bench/random/rng.cu + bench/spatial/fused_l2_nn.cu bench/spatial/selection.cu bench/main.cpp ) @@ -47,6 +58,8 @@ target_include_directories(${RAFT_CPP_BENCH_TARGET} target_link_libraries(${RAFT_CPP_BENCH_TARGET} PRIVATE raft::raft + raft::distance + raft::nn faiss::faiss benchmark::benchmark $ diff --git a/cpp/bench/distance/distance_common.cuh b/cpp/bench/distance/distance_common.cuh new file mode 100644 index 0000000000..dae2550326 --- /dev/null +++ b/cpp/bench/distance/distance_common.cuh @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#if defined RAFT_DISTANCE_COMPILED +#include +#endif +#include + +namespace raft::bench::distance { + +struct distance_inputs { + int m, n, k; + bool isRowMajor; +}; // struct distance_inputs + +template +struct distance : public fixture { + distance(const distance_inputs& p) + : params(p), + x(p.m * p.k, stream), + y(p.n * p.k, stream), + out(p.m * p.n, stream), + workspace(0, stream) + { + RAFT_CUDA_TRY(cudaMemsetAsync(x.data(), 0, x.size() * sizeof(T), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(y.data(), 0, y.size() * sizeof(T), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(out.data(), 0, out.size() * sizeof(T), stream)); + worksize = raft::distance::getWorkspaceSize( + x.data(), y.data(), params.m, params.n, params.k); + workspace.resize(worksize, stream); + } + + void run_benchmark(::benchmark::State& state) override + { + loop_on_state(state, [this]() { + raft::distance::distance(x.data(), + y.data(), + out.data(), + params.m, + params.n, + params.k, + (void*)workspace.data(), + worksize, + stream, + params.isRowMajor); + }); + } + + private: + distance_inputs params; + rmm::device_uvector x, y, out; + rmm::device_uvector workspace; + size_t worksize; +}; // struct Distance + +const std::vector dist_input_vecs{ + {32, 16384, 16384, true}, {64, 16384, 16384, true}, {128, 16384, 16384, true}, + {256, 16384, 16384, true}, {512, 16384, 16384, true}, {1024, 16384, 16384, true}, + {16384, 32, 16384, true}, {16384, 64, 16384, true}, {16384, 128, 16384, true}, + {16384, 256, 16384, true}, {16384, 512, 16384, true}, {16384, 1024, 16384, true}, + {16384, 16384, 32, true}, {16384, 16384, 64, true}, {16384, 16384, 128, true}, + {16384, 16384, 256, true}, {16384, 16384, 512, true}, {16384, 16384, 1024, true}, + {16384, 16384, 16384, true}, {32, 16384, 16384, false}, {64, 16384, 16384, false}, + {128, 16384, 16384, false}, {256, 16384, 16384, false}, {512, 16384, 16384, false}, + {1024, 16384, 16384, false}, {16384, 32, 16384, false}, {16384, 64, 16384, false}, + {16384, 128, 16384, false}, {16384, 256, 16384, false}, {16384, 512, 16384, false}, + {16384, 1024, 16384, false}, {16384, 16384, 32, false}, {16384, 16384, 64, false}, + {16384, 16384, 128, false}, {16384, 16384, 256, false}, {16384, 16384, 512, false}, + {16384, 16384, 1024, false}, {16384, 16384, 16384, false} + +}; + +#define DIST_BENCH_REGISTER(Name, Metric) \ + using Name##F = distance; \ + RAFT_BENCH_REGISTER(Name##F, "", dist_input_vecs); \ + using Name##D = distance; \ + RAFT_BENCH_REGISTER(Name##D, "", dist_input_vecs); + +} // namespace raft::bench::distance diff --git a/cpp/bench/distance/distance_cosine.cu b/cpp/bench/distance/distance_cosine.cu new file mode 100644 index 0000000000..20f29ce4ef --- /dev/null +++ b/cpp/bench/distance/distance_cosine.cu @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "distance_common.cuh" + +namespace raft::bench::distance { + +DIST_BENCH_REGISTER(DistanceCosine, raft::distance::DistanceType::CosineExpanded); + +} // namespace raft::bench::distance diff --git a/cpp/bench/distance/distance_exp_l2.cu b/cpp/bench/distance/distance_exp_l2.cu new file mode 100644 index 0000000000..5a3af17193 --- /dev/null +++ b/cpp/bench/distance/distance_exp_l2.cu @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "distance_common.cuh" + +namespace raft::bench::distance { + +DIST_BENCH_REGISTER(DistanceL2Sq, raft::distance::DistanceType::L2Expanded); +DIST_BENCH_REGISTER(DistanceL2Sqrt, raft::distance::DistanceType::L2SqrtExpanded); + +} // namespace raft::bench::distance diff --git a/cpp/bench/distance/distance_l1.cu b/cpp/bench/distance/distance_l1.cu new file mode 100644 index 0000000000..2ad7d5e957 --- /dev/null +++ b/cpp/bench/distance/distance_l1.cu @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "distance_common.cuh" + +namespace raft::bench::distance { + +DIST_BENCH_REGISTER(DistanceL1, raft::distance::DistanceType::L1); + +} // namespace raft::bench::distance diff --git a/cpp/bench/distance/distance_unexp_l2.cu b/cpp/bench/distance/distance_unexp_l2.cu new file mode 100644 index 0000000000..406aca2378 --- /dev/null +++ b/cpp/bench/distance/distance_unexp_l2.cu @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "distance_common.cuh" + +namespace raft::bench::distance { + +DIST_BENCH_REGISTER(DistanceUnexpL2Sq, raft::distance::DistanceType::L2Unexpanded); +DIST_BENCH_REGISTER(DistanceUnexpL2Sqrt, raft::distance::DistanceType::L2SqrtUnexpanded); + +} // namespace raft::bench::distance diff --git a/cpp/bench/linalg/add.cu b/cpp/bench/linalg/add.cu new file mode 100644 index 0000000000..7c651b61ed --- /dev/null +++ b/cpp/bench/linalg/add.cu @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +namespace raft::bench::linalg { + +struct add_inputs { + int len; +}; // struct add_inputs + +template +struct add : public fixture { + add(const add_inputs& p) : params(p), ptr0(p.len, stream), ptr1(p.len, stream) {} + + void run_benchmark(::benchmark::State& state) override + { + loop_on_state(state, [this]() { + raft::linalg::add(ptr0.data(), ptr0.data(), ptr1.data(), params.len, stream); + }); + } + + private: + add_inputs params; + rmm::device_uvector ptr0, ptr1; +}; // struct add + +const std::vector add_input_vecs{ + {256 * 1024 * 1024}, {256 * 1024 * 1024 + 2}, {256 * 1024 * 1024 + 1} + +}; + +RAFT_BENCH_REGISTER(add, "", add_input_vecs); +RAFT_BENCH_REGISTER(add, "", add_input_vecs); + +} // namespace raft::bench::linalg diff --git a/cpp/bench/linalg/map_then_reduce.cu b/cpp/bench/linalg/map_then_reduce.cu new file mode 100644 index 0000000000..7eeb4a79b6 --- /dev/null +++ b/cpp/bench/linalg/map_then_reduce.cu @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +namespace raft::bench::linalg { + +struct map_then_reduce_inputs { + int len; +}; + +template +struct Identity { + HDI Type operator()(Type a) { return a; } +}; + +template +struct map_then_reduce : public fixture { + map_then_reduce(const map_then_reduce_inputs& p) : params(p), in(p.len, stream), out(1, stream) {} + + void run_benchmark(::benchmark::State& state) override + { + loop_on_state(state, [this]() { + raft::linalg::mapThenSumReduce(out.data(), params.len, Identity(), stream, in.data()); + }); + } + + private: + map_then_reduce_inputs params; + rmm::device_uvector out, in; +}; // struct MapThenReduce + +const std::vector map_then_reduce_input_vecs{ + {1024 * 1024}, + {32 * 1024 * 1024}, + {1024 * 1024 * 1024}, + {1024 * 1024 + 2}, + {32 * 1024 * 1024 + 2}, + {1024 * 1024 * 1024 + 2}, + {1024 * 1024 + 1}, + {32 * 1024 * 1024 + 1}, + {1024 * 1024 * 1024 + 1}, + +}; + +RAFT_BENCH_REGISTER(map_then_reduce, "", map_then_reduce_input_vecs); +RAFT_BENCH_REGISTER(map_then_reduce, "", map_then_reduce_input_vecs); + +} // namespace raft::bench::linalg diff --git a/cpp/bench/linalg/matrix_vector_op.cu b/cpp/bench/linalg/matrix_vector_op.cu new file mode 100644 index 0000000000..d3a53ea345 --- /dev/null +++ b/cpp/bench/linalg/matrix_vector_op.cu @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +namespace raft::bench::linalg { + +struct mat_vec_op_inputs { + int rows, cols; + bool rowMajor, bcastAlongRows; +}; // struct mat_vec_op_inputs + +template +struct mat_vec_op : public fixture { + mat_vec_op(const mat_vec_op_inputs& p) + : params(p), + out(p.rows * p.cols, stream), + in(p.rows * p.cols, stream), + vec(p.bcastAlongRows ? p.cols : p.rows, stream) + { + } + + void run_benchmark(::benchmark::State& state) override + { + loop_on_state(state, [this]() { + raft::linalg::matrixVectorOp(out.data(), + in.data(), + vec.data(), + params.cols, + params.rows, + params.rowMajor, + params.bcastAlongRows, + raft::Sum(), + stream); + }); + } + + private: + mat_vec_op_inputs params; + rmm::device_uvector out, in, vec; +}; // struct MatVecOp + +const std::vector mat_vec_op_input_vecs{ + {1024, 128, true, true}, {1024 * 1024, 128, true, true}, + {1024, 128 + 2, true, true}, {1024 * 1024, 128 + 2, true, true}, + {1024, 128 + 1, true, true}, {1024 * 1024, 128 + 1, true, true}, + + {1024, 128, true, false}, {1024 * 1024, 128, true, false}, + {1024, 128 + 2, true, false}, {1024 * 1024, 128 + 2, true, false}, + {1024, 128 + 1, true, false}, {1024 * 1024, 128 + 1, true, false}, + + {1024, 128, false, false}, {1024 * 1024, 128, false, false}, + {1024, 128 + 2, false, false}, {1024 * 1024, 128 + 2, false, false}, + {1024, 128 + 1, false, false}, {1024 * 1024, 128 + 1, false, false}, + + {1024, 128, false, true}, {1024 * 1024, 128, false, true}, + {1024, 128 + 2, false, true}, {1024 * 1024, 128 + 2, false, true}, + {1024, 128 + 1, false, true}, {1024 * 1024, 128 + 1, false, true}, + +}; + +RAFT_BENCH_REGISTER(mat_vec_op, "", mat_vec_op_input_vecs); +RAFT_BENCH_REGISTER(mat_vec_op, "", mat_vec_op_input_vecs); + +} // namespace raft::bench::linalg diff --git a/cpp/bench/random/make_blobs.cu b/cpp/bench/random/make_blobs.cu new file mode 100644 index 0000000000..c449223040 --- /dev/null +++ b/cpp/bench/random/make_blobs.cu @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +namespace raft::bench::random { +struct make_blobs_inputs { + int rows, cols, clusters; + bool row_major; +}; // struct make_blobs_inputs + +template +struct make_blobs : public fixture { + make_blobs(const make_blobs_inputs& p) + : params(p), data(p.rows * p.cols, stream), labels(p.rows, stream) + { + } + + void run_benchmark(::benchmark::State& state) override + { + loop_on_state(state, [this]() { + raft::random::make_blobs(data.data(), + labels.data(), + params.rows, + params.cols, + params.clusters, + this->stream, + params.row_major); + }); + } + + private: + make_blobs_inputs params; + rmm::device_uvector data; + rmm::device_uvector labels; +}; // struct MakeBlobs + +static std::vector get_make_blobs_input_vecs() +{ + std::vector out; + make_blobs_inputs p; + for (auto rows : std::vector{100000, 1000000}) { + for (auto cols : std::vector{10, 100}) { + for (auto clusters : std::vector{2, 10, 100}) { + p.rows = rows; + p.cols = cols; + p.clusters = clusters; + p.row_major = true; + out.push_back(p); + p.row_major = false; + out.push_back(p); + } + } + } + return out; +} + +RAFT_BENCH_REGISTER(make_blobs, "", get_make_blobs_input_vecs()); +RAFT_BENCH_REGISTER(make_blobs, "", get_make_blobs_input_vecs()); + +} // namespace raft::bench::random diff --git a/cpp/bench/random/permute.cu b/cpp/bench/random/permute.cu new file mode 100644 index 0000000000..9ec9fb2cc9 --- /dev/null +++ b/cpp/bench/random/permute.cu @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include + +namespace raft::bench::random { + +struct permute_inputs { + int rows, cols; + bool needPerms, needShuffle, rowMajor; +}; // struct permute_inputs + +template +struct permute : public fixture { + permute(const permute_inputs& p) + : params(p), + perms(p.needPerms ? p.rows : 0, stream), + out(p.rows * p.cols, stream), + in(p.rows * p.cols, stream) + { + raft::random::Rng r(123456ULL); + r.uniform(in.data(), p.rows, T(-1.0), T(1.0), stream); + } + + void run_benchmark(::benchmark::State& state) override + { + raft::random::Rng r(123456ULL); + loop_on_state(state, [this, &r]() { + raft::random::permute( + perms.data(), out.data(), in.data(), params.cols, params.rows, params.rowMajor, stream); + }); + } + + private: + permute_inputs params; + rmm::device_uvector out, in; + rmm::device_uvector perms; +}; // struct permute + +const std::vector permute_input_vecs = { + {32 * 1024, 128, true, true, true}, + {1024 * 1024, 128, true, true, true}, + {32 * 1024, 128 + 2, true, true, true}, + {1024 * 1024, 128 + 2, true, true, true}, + {32 * 1024, 128 + 1, true, true, true}, + {1024 * 1024, 128 + 1, true, true, true}, + + {32 * 1024, 128, true, true, false}, + {1024 * 1024, 128, true, true, false}, + {32 * 1024, 128 + 2, true, true, false}, + {1024 * 1024, 128 + 2, true, true, false}, + {32 * 1024, 128 + 1, true, true, false}, + {1024 * 1024, 128 + 1, true, true, false}, + +}; + +RAFT_BENCH_REGISTER(permute, "", permute_input_vecs); +RAFT_BENCH_REGISTER(permute, "", permute_input_vecs); + +} // namespace raft::bench::random diff --git a/cpp/bench/random/rng.cu b/cpp/bench/random/rng.cu new file mode 100644 index 0000000000..942606cddf --- /dev/null +++ b/cpp/bench/random/rng.cu @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include + +namespace raft::bench::random { + +enum RandomType { + RNG_Normal, + RNG_LogNormal, + RNG_Uniform, + RNG_Gumbel, + RNG_Logistic, + RNG_Exp, + RNG_Rayleigh, + RNG_Laplace, + RNG_Fill +}; // enum RandomType + +template +struct rng_inputs { + int len; + RandomType type; + raft::random::GeneratorType gtype; + T start, end; +}; // struct rng_inputs + +template +struct rng : public fixture { + rng(const rng_inputs& p) : params(p), ptr(p.len, stream) {} + + void run_benchmark(::benchmark::State& state) override + { + raft::random::Rng r(123456ULL, params.gtype); + loop_on_state(state, [this, &r]() { + switch (params.type) { + case RNG_Normal: r.normal(ptr.data(), params.len, params.start, params.end, stream); break; + case RNG_LogNormal: + r.lognormal(ptr.data(), params.len, params.start, params.end, stream); + break; + case RNG_Uniform: + r.uniform(ptr.data(), params.len, params.start, params.end, stream); + break; + case RNG_Gumbel: r.gumbel(ptr.data(), params.len, params.start, params.end, stream); break; + case RNG_Logistic: + r.logistic(ptr.data(), params.len, params.start, params.end, stream); + break; + case RNG_Exp: r.exponential(ptr.data(), params.len, params.start, stream); break; + case RNG_Rayleigh: r.rayleigh(ptr.data(), params.len, params.start, stream); break; + case RNG_Laplace: + r.laplace(ptr.data(), params.len, params.start, params.end, stream); + break; + case RNG_Fill: r.fill(ptr.data(), params.len, params.start, stream); break; + }; + }); + } + + private: + rng_inputs params; + rmm::device_uvector ptr; +}; // struct RngBench + +template +static std::vector> get_rng_input_vecs() +{ + using namespace raft::random; + return { + {1024 * 1024, RNG_Uniform, GenPhilox, T(-1.0), T(1.0)}, + {32 * 1024 * 1024, RNG_Uniform, GenPhilox, T(-1.0), T(1.0)}, + {1024 * 1024 * 1024, RNG_Uniform, GenPhilox, T(-1.0), T(1.0)}, + {1024 * 1024 + 2, RNG_Uniform, GenPhilox, T(-1.0), T(1.0)}, + {32 * 1024 * 1024 + 2, RNG_Uniform, GenPhilox, T(-1.0), T(1.0)}, + {1024 * 1024 * 1024 + 2, RNG_Uniform, GenPhilox, T(-1.0), T(1.0)}, + {1024 * 1024 + 1, RNG_Uniform, GenPhilox, T(-1.0), T(1.0)}, + {32 * 1024 * 1024 + 1, RNG_Uniform, GenPhilox, T(-1.0), T(1.0)}, + {1024 * 1024 * 1024 + 1, RNG_Uniform, GenPhilox, T(-1.0), T(1.0)}, + + {1024 * 1024, RNG_Uniform, GenPC, T(-1.0), T(1.0)}, + {32 * 1024 * 1024, RNG_Uniform, GenPC, T(-1.0), T(1.0)}, + {1024 * 1024 * 1024, RNG_Uniform, GenPC, T(-1.0), T(1.0)}, + {1024 * 1024 + 2, RNG_Uniform, GenPC, T(-1.0), T(1.0)}, + {32 * 1024 * 1024 + 2, RNG_Uniform, GenPC, T(-1.0), T(1.0)}, + {1024 * 1024 * 1024 + 2, RNG_Uniform, GenPC, T(-1.0), T(1.0)}, + {1024 * 1024 + 1, RNG_Uniform, GenPC, T(-1.0), T(1.0)}, + {32 * 1024 * 1024 + 1, RNG_Uniform, GenPC, T(-1.0), T(1.0)}, + {1024 * 1024 * 1024 + 1, RNG_Uniform, GenPC, T(-1.0), T(1.0)}, + + {1024 * 1024, RNG_Fill, GenPhilox, T(-1.0), T(1.0)}, + {32 * 1024 * 1024, RNG_Fill, GenPhilox, T(-1.0), T(1.0)}, + {1024 * 1024 * 1024, RNG_Fill, GenPhilox, T(-1.0), T(1.0)}, + {1024 * 1024 + 2, RNG_Fill, GenPhilox, T(-1.0), T(1.0)}, + {32 * 1024 * 1024 + 2, RNG_Fill, GenPhilox, T(-1.0), T(1.0)}, + {1024 * 1024 * 1024 + 2, RNG_Fill, GenPhilox, T(-1.0), T(1.0)}, + {1024 * 1024 + 1, RNG_Fill, GenPhilox, T(-1.0), T(1.0)}, + {32 * 1024 * 1024 + 1, RNG_Fill, GenPhilox, T(-1.0), T(1.0)}, + {1024 * 1024 * 1024 + 1, RNG_Fill, GenPhilox, T(-1.0), T(1.0)}, + }; +} + +RAFT_BENCH_REGISTER(rng, "", get_rng_input_vecs()); +RAFT_BENCH_REGISTER(rng, "", get_rng_input_vecs()); + +} // namespace raft::bench::random diff --git a/cpp/bench/spatial/fused_l2_nn.cu b/cpp/bench/spatial/fused_l2_nn.cu new file mode 100644 index 0000000000..2eb2097920 --- /dev/null +++ b/cpp/bench/spatial/fused_l2_nn.cu @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +#if defined RAFT_NN_COMPILED +#include +#endif + +namespace raft::bench::spatial { + +struct fused_l2_nn_inputs { + int m, n, k; +}; // struct fused_l2_nn_inputs + +template +struct fused_l2_nn : public fixture { + fused_l2_nn(const fused_l2_nn_inputs& p) + : params(p), + out(p.m, stream), + x(p.m * p.k, stream), + y(p.n * p.k, stream), + xn(p.m, stream), + yn(p.n, stream), + workspace(p.m, stream) + { + raft::handle_t handle{stream}; + raft::random::Rng r(123456ULL); + + r.uniform(x.data(), p.m * p.k, T(-1.0), T(1.0), stream); + r.uniform(y.data(), p.n * p.k, T(-1.0), T(1.0), stream); + raft::linalg::rowNorm(xn.data(), x.data(), p.k, p.m, raft::linalg::L2Norm, true, stream); + raft::linalg::rowNorm(yn.data(), y.data(), p.k, p.n, raft::linalg::L2Norm, true, stream); + raft::distance::initialize, int>( + handle, out.data(), p.m, std::numeric_limits::max(), op); + } + + void run_benchmark(::benchmark::State& state) override + { + loop_on_state(state, [this]() { + // it is enough to only benchmark the L2-squared metric + raft::distance::fusedL2NN, int>(out.data(), + x.data(), + y.data(), + xn.data(), + yn.data(), + params.m, + params.n, + params.k, + (void*)workspace.data(), + op, + pairRedOp, + false, + false, + stream); + }); + } + + private: + fused_l2_nn_inputs params; + rmm::device_uvector x, y, xn, yn; + rmm::device_uvector> out; + rmm::device_uvector workspace; + raft::distance::KVPMinReduce pairRedOp; + raft::distance::MinAndDistanceReduceOp op; +}; // struct FusedL2NN + +const std::vector fused_l2_nn_input_vecs = { + {32, 16384, 16384}, {64, 16384, 16384}, {128, 16384, 16384}, {256, 16384, 16384}, + {512, 16384, 16384}, {1024, 16384, 16384}, {16384, 32, 16384}, {16384, 64, 16384}, + {16384, 128, 16384}, {16384, 256, 16384}, {16384, 512, 16384}, {16384, 1024, 16384}, + {16384, 16384, 32}, {16384, 16384, 64}, {16384, 16384, 128}, {16384, 16384, 256}, + {16384, 16384, 512}, {16384, 16384, 1024}, {16384, 16384, 16384}, + +}; + +RAFT_BENCH_REGISTER(fused_l2_nn, "", fused_l2_nn_input_vecs); +RAFT_BENCH_REGISTER(fused_l2_nn, "", fused_l2_nn_input_vecs); + +} // namespace raft::bench::spatial diff --git a/cpp/bench/spatial/selection.cu b/cpp/bench/spatial/selection.cu index 09d02940a5..841c7754ed 100644 --- a/cpp/bench/spatial/selection.cu +++ b/cpp/bench/spatial/selection.cu @@ -17,6 +17,10 @@ #include #include +#if defined RAFT_NN_COMPILED +#include +#endif + #include #include diff --git a/cpp/include/raft/stats/completeness_score.cuh b/cpp/include/raft/stats/completeness_score.cuh index dbfe6ce430..407986de05 100644 --- a/cpp/include/raft/stats/completeness_score.cuh +++ b/cpp/include/raft/stats/completeness_score.cuh @@ -19,7 +19,7 @@ #pragma once -#include +#include namespace raft { namespace stats { @@ -42,8 +42,8 @@ double completeness_score(const T* truthClusterArray, T upperLabelRange, cudaStream_t stream) { - return detail::completeness_score( - truthClusterArray, predClusterArray, size, lowerLabelRange, upperLabelRange, stream); + return detail::homogeneity_score( + predClusterArray, truthClusterArray, size, lowerLabelRange, upperLabelRange, stream); } }; // end namespace stats diff --git a/cpp/include/raft/stats/completeness_score.hpp b/cpp/include/raft/stats/completeness_score.hpp index 0dd97e9782..40e60c852c 100644 --- a/cpp/include/raft/stats/completeness_score.hpp +++ b/cpp/include/raft/stats/completeness_score.hpp @@ -18,39 +18,6 @@ * Please use the cuh version instead. */ -#ifndef __COMPLETENESS_SCORE_H -#define __COMPLETENESS_SCORE_H - #pragma once -#include - -namespace raft { -namespace stats { - -/** - * @brief Function to calculate the completeness score between two clusters - * - * @param truthClusterArray: the array of truth classes of type T - * @param predClusterArray: the array of predicted classes of type T - * @param size: the size of the data points of type int - * @param lowerLabelRange: the lower bound of the range of labels - * @param upperLabelRange: the upper bound of the range of labels - * @param stream: the cudaStream object - */ -template -double completeness_score(const T* truthClusterArray, - const T* predClusterArray, - int size, - T lowerLabelRange, - T upperLabelRange, - cudaStream_t stream) -{ - return detail::completeness_score( - truthClusterArray, predClusterArray, size, lowerLabelRange, upperLabelRange, stream); -} - -}; // end namespace stats -}; // end namespace raft - -#endif +#include \ No newline at end of file diff --git a/cpp/include/raft/stats/detail/completeness_score.cuh b/cpp/include/raft/stats/detail/completeness_score.cuh deleted file mode 100644 index 5e6fb835ef..0000000000 --- a/cpp/include/raft/stats/detail/completeness_score.cuh +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** - * @file completeness_score.cuh - * - * @brief A clustering result satisfies completeness if all the data points - * that are members of a given class are elements of the same cluster. - */ - -#pragma once - -#include -#include - -namespace raft { -namespace stats { -namespace detail { - -/** - * @brief Function to calculate the completeness score between two clusters - * - * @param truthClusterArray: the array of truth classes of type T - * @param predClusterArray: the array of predicted classes of type T - * @param size: the size of the data points of type int - * @param lowerLabelRange: the lower bound of the range of labels - * @param upperLabelRange: the upper bound of the range of labels - * @param stream: the cudaStream object - */ -template -double completeness_score(const T* truthClusterArray, - const T* predClusterArray, - int size, - T lowerLabelRange, - T upperLabelRange, - cudaStream_t stream) -{ - if (size == 0) return 1.0; - - double computedMI, computedEntropy; - - computedMI = raft::stats::mutual_info_score( - truthClusterArray, predClusterArray, size, lowerLabelRange, upperLabelRange, stream); - computedEntropy = - raft::stats::entropy(predClusterArray, size, lowerLabelRange, upperLabelRange, stream); - - double completeness; - - if (computedEntropy) { - completeness = computedMI / computedEntropy; - } else - completeness = 1.0; - - return completeness; -} - -}; // end namespace detail -}; // end namespace stats -}; // end namespace raft