Skip to content

Commit

Permalink
[Opt] Expose the detail::popc as public API (#2346)
Browse files Browse the repository at this point in the history
- For resolving the issue of cuVS: rapidsai/cuvs#158

Authors:
  - rhdong (https://github.com/rhdong)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: #2346
  • Loading branch information
rhdong authored Jul 24, 2024
1 parent 706eb39 commit fa7c193
Show file tree
Hide file tree
Showing 7 changed files with 344 additions and 8 deletions.
8 changes: 7 additions & 1 deletion cpp/bench/prims/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,13 @@ endfunction()

if(BUILD_PRIMS_BENCH)
ConfigureBench(
NAME CORE_BENCH PATH core/bitset.cu core/copy.cu main.cpp
NAME
CORE_BENCH
PATH
core/bitset.cu
core/copy.cu
core/popc.cu
main.cpp
)

ConfigureBench(
Expand Down
127 changes: 127 additions & 0 deletions cpp/bench/prims/core/popc.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <common/benchmark.hpp>

#include <raft/core/popc.hpp>

namespace raft::bench::core {

template <typename index_t>
struct PopcInputs {
index_t n_rows;
index_t n_cols;
float sparsity;
};

template <typename index_t>
inline auto operator<<(std::ostream& os, const PopcInputs<index_t>& params) -> std::ostream&
{
os << params.n_rows << "#" << params.n_cols << "#" << params.sparsity;
return os;
}

template <typename index_t, typename bits_t = uint32_t>
struct popc_bench : public fixture {
popc_bench(const PopcInputs<index_t>& p)
: params(p),
n_element(raft::ceildiv(params.n_rows * params.n_cols, index_t(sizeof(bits_t) * 8))),
bits_d{raft::make_device_vector<bits_t, index_t>(res, n_element)},
nnz_actual_d{raft::make_device_scalar<index_t>(res, 0)}
{
}

index_t create_bitmap(index_t m, index_t n, float sparsity, std::vector<bits_t>& bitmap)
{
index_t total = static_cast<index_t>(m * n);
index_t num_ones = static_cast<index_t>((total * 1.0f) * sparsity);
index_t res = num_ones;

for (auto& item : bitmap) {
item = static_cast<bits_t>(0);
}

std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<index_t> dis(0, total - 1);

while (num_ones > 0) {
index_t index = dis(gen);

bits_t& element = bitmap[index / (8 * sizeof(bits_t))];
index_t bit_position = index % (8 * sizeof(bits_t));

if (((element >> bit_position) & 1) == 0) {
element |= (static_cast<index_t>(1) << bit_position);
num_ones--;
}
}
return res;
}
void run_benchmark(::benchmark::State& state) override
{
std::ostringstream label_stream;
label_stream << params;
state.SetLabel(label_stream.str());

std::vector<bits_t> bits_h(n_element);
auto stream = raft::resource::get_cuda_stream(res);

create_bitmap(params.n_rows, params.n_cols, params.sparsity, bits_h);
update_device(bits_d.data_handle(), bits_h.data(), bits_h.size(), stream);

resource::sync_stream(res);

loop_on_state(state, [this]() {
auto bits_view =
raft::make_device_vector_view<const bits_t, index_t>(bits_d.data_handle(), bits_d.size());

index_t max_len = params.n_rows * params.n_cols;
auto max_len_view = raft::make_host_scalar_view<index_t>(&max_len);
auto nnz_actual_view =
nnz_actual_d.view(); // raft::make_device_scalar_view<index_t>(nnz_actual_d.data_handle());
raft::popc(this->handle, bits_view, max_len_view, nnz_actual_view);
});
}

private:
raft::resources res;
PopcInputs<index_t> params;
index_t n_element;

raft::device_vector<bits_t, index_t> bits_d;
raft::device_scalar<index_t> nnz_actual_d;
};

template <typename index_t>
const std::vector<PopcInputs<index_t>> popc_input_vecs{
{2, 131072, 0.4}, {8, 131072, 0.5}, {16, 131072, 0.2}, {2, 8192, 0.4}, {16, 8192, 0.5},
{128, 8192, 0.2}, {1024, 8192, 0.1}, {1024, 8192, 0.1}, {1024, 8192, 0.1}, {1024, 8192, 0.1},

{1024, 8192, 0.1}, {1024, 8192, 0.1}, {1024, 8192, 0.1}, {1024, 8192, 0.1},

{1024, 8192, 0.4}, {1024, 8192, 0.5}, {1024, 8192, 0.2}, {1024, 8192, 0.4}, {1024, 8192, 0.5},
{1024, 8192, 0.2}, {1024, 8192, 0.4}, {1024, 8192, 0.5}, {1024, 8192, 0.2}, {1024, 8192, 0.4},
{1024, 8192, 0.5}, {1024, 8192, 0.2},

{1024, 8192, 0.5}, {1024, 8192, 0.2}, {1024, 8192, 0.4}, {1024, 8192, 0.5}, {1024, 8192, 0.2},
{1024, 8192, 0.4}, {1024, 8192, 0.5}, {1024, 8192, 0.2}};

using PopcBenchI64 = popc_bench<int64_t>;

RAFT_BENCH_REGISTER(PopcBenchI64, "", popc_input_vecs<int64_t>);

} // namespace raft::bench::core
5 changes: 3 additions & 2 deletions cpp/include/raft/core/bitset.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
#pragma once

#include <raft/core/bitset.hpp>
#include <raft/core/detail/popc.cuh>
#include <raft/core/device_container_policy.hpp>
#include <raft/core/device_mdarray.hpp>
#include <raft/core/popc.hpp>
#include <raft/core/resource/thrust_policy.hpp>
#include <raft/core/resources.hpp>
#include <raft/linalg/map.cuh>
Expand Down Expand Up @@ -167,9 +167,10 @@ template <typename bitset_t, typename index_t>
void bitset<bitset_t, index_t>::count(const raft::resources& res,
raft::device_scalar_view<index_t> count_gpu_scalar)
{
auto max_len = raft::make_host_scalar_view<index_t>(&bitset_len_);
auto values =
raft::make_device_vector_view<const bitset_t, index_t>(bitset_.data(), n_elements());
raft::detail::popc(res, values, bitset_len_, count_gpu_scalar);
raft::popc(res, values, max_len, count_gpu_scalar);
}

} // end namespace raft::core
11 changes: 6 additions & 5 deletions cpp/include/raft/core/detail/popc.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include <raft/core/detail/mdspan_util.cuh>
#include <raft/core/device_mdarray.hpp>
#include <raft/core/host_mdspan.hpp>
#include <raft/core/resources.hpp>
#include <raft/linalg/coalesced_reduction.cuh>

Expand All @@ -28,15 +29,15 @@ namespace raft::detail {
* @tparam value_t the value type of the vector.
* @tparam index_t the index type of vector and scalar.
*
* @param[in] res raft handle for managing expensive resources
* @param[in] values Number of row in the matrix.
* @param[in] res RAFT handle for managing expensive resources
* @param[in] values Device vector view containing the values to be processed.
* @param[in] max_len Maximum number of bits to count.
* @param[out] counter Number of bits that are set to 1.
* @param[out] counter Device scalar view to store the number of bits that are set to 1.
*/
template <typename value_t, typename index_t>
void popc(const raft::resources& res,
device_vector_view<value_t, index_t> values,
index_t max_len,
raft::host_scalar_view<index_t> max_len,
raft::device_scalar_view<index_t> counter)
{
auto values_size = values.size();
Expand All @@ -46,7 +47,7 @@ void popc(const raft::resources& res,

static constexpr index_t len_per_item = sizeof(value_t) * 8;

value_t tail_len = (max_len % len_per_item);
value_t tail_len = (max_len[0] % len_per_item);
value_t tail_mask = tail_len ? (value_t)((value_t{1} << tail_len) - value_t{1}) : ~value_t{0};
raft::linalg::coalesced_reduction(
res,
Expand Down
41 changes: 41 additions & 0 deletions cpp/include/raft/core/popc.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once
#include <raft/core/detail/popc.cuh>
namespace raft {

/**
* @brief Count the number of bits that are set to 1 in a vector.
*
* @tparam value_t the value type of the vector.
* @tparam index_t the index type of vector and scalar.
*
* @param[in] res RAFT handle for managing expensive resources
* @param[in] values Device vector view containing the values to be processed.
* @param[in] max_len Host scalar view to store the Maximum number of bits to count.
* @param[out] counter Device scalar view to store the number of bits that are set to 1.
*/
template <typename value_t, typename index_t>
void popc(const raft::resources& res,
device_vector_view<value_t, index_t> values,
raft::host_scalar_view<index_t> max_len,
raft::device_scalar_view<index_t> counter)
{
detail::popc(res, values, max_len, counter);
}

} // namespace raft
1 change: 1 addition & 0 deletions cpp/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ if(BUILD_TESTS)
core/math_host.cpp
core/operators_device.cu
core/operators_host.cpp
core/popc.cu
core/handle.cpp
core/interruptible.cu
core/nvtx.cpp
Expand Down
Loading

0 comments on commit fa7c193

Please sign in to comment.