Skip to content

Commit

Permalink
Merge pull request #169 from PointKernel/improve-retrieve-all
Browse files Browse the repository at this point in the history
Improve `static_map::retrieve_all`
  • Loading branch information
PointKernel authored Jun 2, 2022
2 parents 56325eb + abd2474 commit ee730a1
Showing 1 changed file with 35 additions and 6 deletions.
41 changes: 35 additions & 6 deletions include/cuco/detail/static_map.inl
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@
#include <cuco/detail/error.hpp>
#include <cuco/detail/utils.cuh>

#include <thrust/copy.h>
#include <thrust/execution_policy.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/tuple.h>

#include <cub/device/device_select.cuh>

namespace cuco {

template <typename Key, typename Value, cuda::thread_scope Scope, typename Allocator>
Expand Down Expand Up @@ -220,10 +220,39 @@ std::pair<KeyOut, ValueOut> static_map<Key, Value, Scope, Allocator>::retrieve_a
auto filled = detail::slot_is_filled<Key>{get_empty_key_sentinel()};
auto zipped_out_begin = thrust::make_zip_iterator(thrust::make_tuple(keys_out, values_out));

auto const zipped_out_end =
thrust::copy_if(thrust::cuda::par.on(stream), begin, end, zipped_out_begin, filled);
auto const num = std::distance(zipped_out_begin, zipped_out_end);
return std::make_pair(keys_out + num, values_out + num);
std::size_t temp_storage_bytes = 0;
using temp_allocator_type = typename std::allocator_traits<Allocator>::rebind_alloc<char>;
auto temp_allocator = temp_allocator_type{slot_allocator_};
auto d_num_out = reinterpret_cast<std::size_t*>(
std::allocator_traits<temp_allocator_type>::allocate(temp_allocator, sizeof(std::size_t)));
cub::DeviceSelect::If(nullptr,
temp_storage_bytes,
begin,
zipped_out_begin,
d_num_out,
get_capacity(),
filled,
stream);

// Allocate temporary storage
auto d_temp_storage =
std::allocator_traits<temp_allocator_type>::allocate(temp_allocator, temp_storage_bytes);

cub::DeviceSelect::If(d_temp_storage,
temp_storage_bytes,
begin,
zipped_out_begin,
d_num_out,
get_capacity(),
filled,
stream);

std::size_t h_num_out;
CUCO_CUDA_TRY(
cudaMemcpyAsync(&h_num_out, d_num_out, sizeof(std::size_t), cudaMemcpyDeviceToHost, stream));
CUCO_CUDA_TRY(cudaStreamSynchronize(stream));

return std::make_pair(keys_out + h_num_out, values_out + h_num_out);
}

template <typename Key, typename Value, cuda::thread_scope Scope, typename Allocator>
Expand Down

0 comments on commit ee730a1

Please sign in to comment.