diff --git a/cpp/bench/random/make_blobs.cu b/cpp/bench/random/make_blobs.cu index fdd4ef61d2..950d80c499 100644 --- a/cpp/bench/random/make_blobs.cu +++ b/cpp/bench/random/make_blobs.cu @@ -25,6 +25,12 @@ struct make_blobs_inputs { bool row_major; }; // struct make_blobs_inputs +inline auto operator<<(std::ostream& os, const make_blobs_inputs& p) -> std::ostream& +{ + os << p.rows << "#" << p.cols << "#" << p.clusters << "#" << p.row_major; + return os; +} + template struct make_blobs : public fixture { make_blobs(const make_blobs_inputs& p) @@ -34,6 +40,10 @@ struct make_blobs : public fixture { void run_benchmark(::benchmark::State& state) override { + std::ostringstream label_stream; + label_stream << params; + state.SetLabel(label_stream.str()); + loop_on_state(state, [this]() { raft::random::make_blobs(data.data(), labels.data(), diff --git a/cpp/include/raft/random/detail/make_blobs.cuh b/cpp/include/raft/random/detail/make_blobs.cuh index 212245a9bf..68c2d56599 100644 --- a/cpp/include/raft/random/detail/make_blobs.cuh +++ b/cpp/include/raft/random/detail/make_blobs.cuh @@ -156,8 +156,10 @@ void generate_data(DataT* out, const DataT cluster_std_scalar, raft::random::RngState& rng_state) { - IdxT items = n_rows * n_cols; - IdxT nBlocks = (items + 127) / 128; + constexpr IdxT block_size = 128; + IdxT items = n_rows * n_cols; + // Choose a grid size so that each thread can write two output values. + IdxT nBlocks = ceildiv(items, 2 * block_size); // parentheses needed here for kernel, otherwise macro interprets the arguments // of triple chevron notation as macro arguments RAFT_CALL_RNG_FUNC(rng_state,