Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure that all CUDA kernels in cudf have hidden visibility. #14726

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 15 additions & 15 deletions cpp/benchmarks/join/generate_input_tables.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -31,19 +31,19 @@

#include <cassert>

__global__ static void init_curand(curandState* state, int const nstates)
CUDF_KERNEL void init_curand(curandState* state, int const nstates)
{
int ithread = threadIdx.x + blockIdx.x * blockDim.x;

if (ithread < nstates) { curand_init(1234ULL, ithread, 0, state + ithread); }
}

template <typename key_type, typename size_type>
__global__ static void init_build_tbl(key_type* const build_tbl,
size_type const build_tbl_size,
int const multiplicity,
curandState* state,
int const num_states)
CUDF_KERNEL void init_build_tbl(key_type* const build_tbl,
size_type const build_tbl_size,
int const multiplicity,
curandState* state,
int const num_states)
{
auto const start_idx = blockIdx.x * blockDim.x + threadIdx.x;
auto const stride = blockDim.x * gridDim.x;
Expand All @@ -61,14 +61,14 @@ __global__ static void init_build_tbl(key_type* const build_tbl,
}

template <typename key_type, typename size_type>
__global__ void init_probe_tbl(key_type* const probe_tbl,
size_type const probe_tbl_size,
size_type const build_tbl_size,
key_type const rand_max,
double const selectivity,
int const multiplicity,
curandState* state,
int const num_states)
CUDF_KERNEL void init_probe_tbl(key_type* const probe_tbl,
size_type const probe_tbl_size,
size_type const build_tbl_size,
key_type const rand_max,
double const selectivity,
int const multiplicity,
curandState* state,
int const num_states)
{
auto const start_idx = blockIdx.x * blockDim.x + threadIdx.x;
auto const stride = blockDim.x * gridDim.x;
Expand Down
8 changes: 4 additions & 4 deletions cpp/benchmarks/type_dispatcher/type_dispatcher.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -58,7 +58,7 @@ constexpr int block_size = 256;

// This is for NO_DISPATCHING
template <FunctorType functor_type, class T>
__global__ void no_dispatching_kernel(T** A, cudf::size_type n_rows, cudf::size_type n_cols)
CUDF_KERNEL void no_dispatching_kernel(T** A, cudf::size_type n_rows, cudf::size_type n_cols)
{
using F = Functor<T, functor_type>;
cudf::size_type index = blockIdx.x * blockDim.x + threadIdx.x;
Expand All @@ -72,7 +72,7 @@ __global__ void no_dispatching_kernel(T** A, cudf::size_type n_rows, cudf::size_

// This is for HOST_DISPATCHING
template <FunctorType functor_type, class T>
__global__ void host_dispatching_kernel(cudf::mutable_column_device_view source_column)
CUDF_KERNEL void host_dispatching_kernel(cudf::mutable_column_device_view source_column)
{
using F = Functor<T, functor_type>;
T* A = source_column.data<T>();
Expand Down Expand Up @@ -124,7 +124,7 @@ struct RowHandle {

// This is for DEVICE_DISPATCHING
template <FunctorType functor_type>
__global__ void device_dispatching_kernel(cudf::mutable_table_device_view source)
CUDF_KERNEL void device_dispatching_kernel(cudf::mutable_table_device_view source)
{
cudf::size_type const n_rows = source.num_rows();
cudf::size_type index = threadIdx.x + blockIdx.x * blockDim.x;
Expand Down
16 changes: 8 additions & 8 deletions cpp/examples/strings/custom_optimized.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -37,9 +37,9 @@
* @param d_visibilities Column of visibilities
* @param d_sizes Output sizes for each row
*/
__global__ void sizes_kernel(cudf::column_device_view const d_names,
cudf::column_device_view const d_visibilities,
cudf::size_type* d_sizes)
__global__ static void sizes_kernel(cudf::column_device_view const d_names,
robertmaynard marked this conversation as resolved.
Show resolved Hide resolved
cudf::column_device_view const d_visibilities,
cudf::size_type* d_sizes)
{
// The row index is resolved from the CUDA thread/block objects
auto index = threadIdx.x + blockIdx.x * blockDim.x;
Expand Down Expand Up @@ -74,10 +74,10 @@ __global__ void sizes_kernel(cudf::column_device_view const d_names,
* @param d_offsets Byte offset in `d_chars` for each row
* @param d_chars Output memory for all rows
*/
__global__ void redact_kernel(cudf::column_device_view const d_names,
cudf::column_device_view const d_visibilities,
cudf::size_type const* d_offsets,
char* d_chars)
__global__ static void redact_kernel(cudf::column_device_view const d_names,
cudf::column_device_view const d_visibilities,
cudf::size_type const* d_offsets,
char* d_chars)
{
// The row index is resolved from the CUDA thread/block objects
auto index = threadIdx.x + blockIdx.x * blockDim.x;
Expand Down
14 changes: 7 additions & 7 deletions cpp/examples/strings/custom_prealloc.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -37,12 +37,12 @@
* @param d_offsets Byte offset in `d_chars` for each row
* @param d_output Output array of string_view objects
*/
__global__ void redact_kernel(cudf::column_device_view const d_names,
cudf::column_device_view const d_visibilities,
cudf::string_view redaction,
char* working_memory,
cudf::size_type const* d_offsets,
cudf::string_view* d_output)
__global__ static void redact_kernel(cudf::column_device_view const d_names,
cudf::column_device_view const d_visibilities,
cudf::string_view redaction,
char* working_memory,
cudf::size_type const* d_offsets,
cudf::string_view* d_output)
{
// The row index is resolved from the CUDA thread/block objects
auto index = threadIdx.x + blockIdx.x * blockDim.x;
Expand Down
14 changes: 8 additions & 6 deletions cpp/examples/strings/custom_with_malloc.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -64,10 +64,10 @@ void set_malloc_heap_size(size_t heap_size = 1073741824) // 1GB
* @param redaction Redacted string replacement
* @param d_output Output array of string_view objects
*/
__global__ void redact_kernel(cudf::column_device_view const d_names,
cudf::column_device_view const d_visibilities,
cudf::string_view redaction,
cudf::string_view* d_output)
__global__ static void redact_kernel(cudf::column_device_view const d_names,
cudf::column_device_view const d_visibilities,
cudf::string_view redaction,
cudf::string_view* d_output)
{
// The row index is resolved from the CUDA thread/block objects
auto index = threadIdx.x + blockIdx.x * blockDim.x;
Expand Down Expand Up @@ -107,7 +107,9 @@ __global__ void redact_kernel(cudf::column_device_view const d_names,
* @param redaction Redacted string replacement (not to be freed)
* @param d_output Output array of string_view objects to free
*/
__global__ void free_kernel(cudf::string_view redaction, cudf::string_view* d_output, int count)
__global__ static void free_kernel(cudf::string_view redaction,
cudf::string_view* d_output,
int count)
{
auto index = threadIdx.x + blockIdx.x * blockDim.x;
if (index >= count) return;
Expand Down
12 changes: 6 additions & 6 deletions cpp/include/cudf/detail/copy_if.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -52,10 +52,10 @@ namespace detail {

// Compute the count of elements that pass the mask within each block
template <typename Filter, int block_size>
__global__ void compute_block_counts(cudf::size_type* __restrict__ block_counts,
cudf::size_type size,
cudf::size_type per_thread,
Filter filter)
CUDF_KERNEL void compute_block_counts(cudf::size_type* __restrict__ block_counts,
cudf::size_type size,
cudf::size_type per_thread,
Filter filter)
{
int tid = threadIdx.x + per_thread * block_size * blockIdx.x;
int count = 0;
Expand Down Expand Up @@ -96,7 +96,7 @@ __device__ cudf::size_type block_scan_mask(bool mask_true, cudf::size_type& bloc
//
// Note: `filter` is not run on indices larger than the input column size
template <typename T, typename Filter, int block_size, bool has_validity>
__launch_bounds__(block_size) __global__
__launch_bounds__(block_size) CUDF_KERNEL
void scatter_kernel(cudf::mutable_column_device_view output_view,
cudf::size_type* output_null_count,
cudf::column_device_view input_view,
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/detail/copy_if_else.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -37,7 +37,7 @@ template <size_type block_size,
typename RightIter,
typename Filter,
bool has_nulls>
__launch_bounds__(block_size) __global__
__launch_bounds__(block_size) CUDF_KERNEL
void copy_if_else_kernel(LeftIter lhs,
RightIter rhs,
Filter filter,
Expand Down
14 changes: 7 additions & 7 deletions cpp/include/cudf/detail/copy_range.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -40,12 +40,12 @@ template <cudf::size_type block_size,
typename SourceValidityIterator,
typename T,
bool has_validity>
__global__ void copy_range_kernel(SourceValueIterator source_value_begin,
SourceValidityIterator source_validity_begin,
cudf::mutable_column_device_view target,
cudf::size_type target_begin,
cudf::size_type target_end,
cudf::size_type* __restrict__ const null_count)
CUDF_KERNEL void copy_range_kernel(SourceValueIterator source_value_begin,
SourceValidityIterator source_validity_begin,
cudf::mutable_column_device_view target,
cudf::size_type target_begin,
cudf::size_type target_end,
cudf::size_type* __restrict__ const null_count)
{
using cudf::detail::warp_size;

Expand Down
24 changes: 12 additions & 12 deletions cpp/include/cudf/detail/null_mask.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -61,12 +61,12 @@ namespace detail {
* @param count_ptr Pointer to counter of set bits
*/
template <int block_size, typename Binop>
__global__ void offset_bitmask_binop(Binop op,
device_span<bitmask_type> destination,
device_span<bitmask_type const* const> source,
device_span<size_type const> source_begin_bits,
size_type source_size_bits,
size_type* count_ptr)
CUDF_KERNEL void offset_bitmask_binop(Binop op,
device_span<bitmask_type> destination,
device_span<bitmask_type const* const> source,
device_span<size_type const> source_begin_bits,
size_type source_size_bits,
size_type* count_ptr)
{
auto const tid = threadIdx.x + blockIdx.x * blockDim.x;

Expand Down Expand Up @@ -214,11 +214,11 @@ enum class count_bits_policy : bool {
* in each range is updated.
*/
template <typename OffsetIterator, typename OutputIterator>
__global__ void subtract_set_bits_range_boundaries_kernel(bitmask_type const* bitmask,
size_type num_ranges,
OffsetIterator first_bit_indices,
OffsetIterator last_bit_indices,
OutputIterator null_counts)
CUDF_KERNEL void subtract_set_bits_range_boundaries_kernel(bitmask_type const* bitmask,
size_type num_ranges,
OffsetIterator first_bit_indices,
OffsetIterator last_bit_indices,
OutputIterator null_counts)
{
constexpr size_type const word_size_in_bits{detail::size_in_bits<bitmask_type>()};

Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/detail/utilities/cuda.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -211,7 +211,7 @@ __device__ inline T round_up_pow2(T number_to_round, T modulus)
}

template <class F>
__global__ void single_thread_kernel(F f)
CUDF_KERNEL void single_thread_kernel(F f)
{
f();
}
Expand Down
18 changes: 9 additions & 9 deletions cpp/include/cudf/detail/valid_if.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -44,7 +44,7 @@ namespace detail {
* @param[out] valid_count The count of set bits in the output bitmask
*/
template <size_type block_size, typename InputIterator, typename Predicate>
__global__ void valid_if_kernel(
CUDF_KERNEL void valid_if_kernel(
bitmask_type* output, InputIterator begin, size_type size, Predicate p, size_type* valid_count)
{
constexpr size_type leader_lane{0};
Expand Down Expand Up @@ -151,13 +151,13 @@ template <typename InputIterator1,
typename InputIterator2,
typename BinaryPredicate,
int32_t block_size>
__global__ void valid_if_n_kernel(InputIterator1 begin1,
InputIterator2 begin2,
BinaryPredicate p,
bitmask_type* masks[],
size_type mask_count,
size_type mask_num_bits,
size_type* valid_counts)
CUDF_KERNEL void valid_if_n_kernel(InputIterator1 begin1,
InputIterator2 begin2,
BinaryPredicate p,
bitmask_type* masks[],
size_type mask_count,
size_type mask_num_bits,
size_type* valid_counts)
{
for (size_type mask_idx = 0; mask_idx < mask_count; mask_idx++) {
auto const mask = masks[mask_idx];
Expand Down
10 changes: 5 additions & 5 deletions cpp/include/cudf/hashing/detail/helper_functions.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2023, NVIDIA CORPORATION.
* Copyright (c) 2017-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -130,10 +130,10 @@ __forceinline__ __device__ void store_pair_vectorized(pair_type* __restrict__ co
}

template <typename value_type, typename size_type, typename key_type, typename elem_type>
__global__ void init_hashtbl(value_type* __restrict__ const hashtbl_values,
size_type const n,
key_type const key_val,
elem_type const elem_val)
CUDF_KERNEL void init_hashtbl(value_type* __restrict__ const hashtbl_values,
size_type const n,
key_type const key_val,
elem_type const elem_val)
{
size_type const idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < n) {
Expand Down
Loading