Skip to content

Commit

Permalink
Change block size parameter from a global to a template param. (#7333)
Browse files Browse the repository at this point in the history
Addresses followup review comment:  #7096 (comment)

There was a constexpr global being used to initialize a template that could have been supplied from it's own template value.

Authors:
  - @nvdbaranec

Approvers:
  - David (@davidwendt)
  - Jake Hemstad (@jrhemstad)

URL: #7333
  • Loading branch information
nvdbaranec authored Feb 8, 2021
1 parent 621de88 commit eb8dc88
Showing 1 changed file with 17 additions and 15 deletions.
32 changes: 17 additions & 15 deletions cpp/src/copying/contiguous_split.cu
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,6 @@ struct dst_buf_info {
size_type valid_count;
};

constexpr size_type copy_block_size = 512;

/**
* @brief Copy a single buffer of column data, shifting values (for offset columns),
* and validity (for validity buffers) as necessary.
Expand Down Expand Up @@ -130,6 +128,7 @@ constexpr size_type copy_block_size = 512;
* @param num_rows Number of rows being copied
* @param valid_count Optional pointer to a value to store count of set bits
*/
template <int block_size>
__device__ void copy_buffer(uint8_t* __restrict__ dst,
uint8_t* __restrict__ src,
int t,
Expand Down Expand Up @@ -217,7 +216,7 @@ __device__ void copy_buffer(uint8_t* __restrict__ dst,
if (num_bytes == 0) {
if (!t) { *valid_count = 0; }
} else {
using BlockReduce = cub::BlockReduce<size_type, copy_block_size>;
using BlockReduce = cub::BlockReduce<size_type, block_size>;
__shared__ typename BlockReduce::TempStorage temp_storage;
size_type block_valid_count{BlockReduce(temp_storage).Sum(thread_valid_count)};
if (!t) {
Expand Down Expand Up @@ -253,6 +252,7 @@ __device__ void copy_buffer(uint8_t* __restrict__ dst,
* @param dst_bufs Desination buffers (N*M)
* @param buf_info Information on the range of values to be copied for each destination buffer.
*/
template <int block_size>
__global__ void copy_partition(int num_src_bufs,
int num_partitions,
uint8_t** src_bufs,
Expand All @@ -264,17 +264,18 @@ __global__ void copy_partition(int num_src_bufs,
size_t const buf_index = (partition_index * num_src_bufs) + src_buf_index;

// copy, shifting offsets and validity bits as needed
copy_buffer(dst_bufs[partition_index] + buf_info[buf_index].dst_offset,
src_bufs[src_buf_index],
threadIdx.x,
buf_info[buf_index].num_elements,
buf_info[buf_index].element_size,
buf_info[buf_index].src_row_index,
blockDim.x,
buf_info[buf_index].value_shift,
buf_info[buf_index].bit_shift,
buf_info[buf_index].num_rows,
buf_info[buf_index].valid_count > 0 ? &buf_info[buf_index].valid_count : nullptr);
copy_buffer<block_size>(
dst_bufs[partition_index] + buf_info[buf_index].dst_offset,
src_bufs[src_buf_index],
threadIdx.x,
buf_info[buf_index].num_elements,
buf_info[buf_index].element_size,
buf_info[buf_index].src_row_index,
blockDim.x,
buf_info[buf_index].value_shift,
buf_info[buf_index].bit_shift,
buf_info[buf_index].num_rows,
buf_info[buf_index].valid_count > 0 ? &buf_info[buf_index].valid_count : nullptr);
}

// The block of functions below are all related:
Expand Down Expand Up @@ -1019,7 +1020,8 @@ std::vector<packed_table> contiguous_split(cudf::table_view const& input,

// copy. 1 block per buffer
{
copy_partition<<<num_bufs, copy_block_size, 0, stream.value()>>>(
constexpr size_type block_size = 512;
copy_partition<block_size><<<num_bufs, block_size, 0, stream.value()>>>(
num_src_bufs, num_partitions, d_src_bufs, d_dst_bufs, d_dst_buf_info);
}

Expand Down

0 comments on commit eb8dc88

Please sign in to comment.