diff --git a/cpp/src/join/hash_join.cuh b/cpp/src/join/hash_join.cuh index f6c20eb7bc4..e6df2b58b15 100644 --- a/cpp/src/join/hash_join.cuh +++ b/cpp/src/join/hash_join.cuh @@ -69,8 +69,6 @@ std::size_t estimate_join_output_size(table_device_view build_table, null_equality compare_nulls, rmm::cuda_stream_view stream) { - using estimate_size_type = int64_t; // use 64-bit size so we can detect overflow - const size_type build_table_num_rows{build_table.num_rows()}; const size_type probe_table_num_rows{probe_table.num_rows()}; @@ -101,8 +99,8 @@ std::size_t estimate_join_output_size(table_device_view build_table, if (probe_to_build_ratio > MAX_RATIO) { sample_probe_num_rows = build_table_num_rows; } // Allocate storage for the counter used to get the size of the join output - estimate_size_type h_size_estimate{0}; - rmm::device_scalar size_estimate(0, stream); + std::size_t h_size_estimate{0}; + rmm::device_scalar size_estimate(0, stream); CHECK_CUDA(stream.value()); diff --git a/cpp/src/join/join_kernels.cuh b/cpp/src/join/join_kernels.cuh index 599b504dea0..4298706987c 100644 --- a/cpp/src/join/join_kernels.cuh +++ b/cpp/src/join/join_kernels.cuh @@ -120,17 +120,14 @@ __global__ void build_hash_table(multimap_type multi_map, * @param[in] probe_table_num_rows The number of rows in the probe table * @param[out] output_size The resulting output size */ -template +template __global__ void compute_join_output_size(multimap_type multi_map, table_device_view build_table, table_device_view probe_table, row_hash hash_probe, row_equality check_row_equality, const cudf::size_type probe_table_num_rows, - estimate_size_type* output_size) + std::size_t* output_size) { // This kernel probes multiple elements in the probe_table and store the number of matches found // inside a register. A block reduction is used at the end to calculate the matches per thread @@ -194,9 +191,9 @@ __global__ void compute_join_output_size(multimap_type multi_map, } } - using BlockReduce = cub::BlockReduce; + using BlockReduce = cub::BlockReduce; __shared__ typename BlockReduce::TempStorage temp_storage; - estimate_size_type block_counter = BlockReduce(temp_storage).Sum(thread_counter); + std::size_t block_counter = BlockReduce(temp_storage).Sum(thread_counter); // Add block counter to global counter if (threadIdx.x == 0) atomicAdd(output_size, block_counter);