Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

All kernels in the JNI should have hidden visibility #2168

Merged
merged 2 commits into from
Jun 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/main/cpp/faultinj/faultinj.cu
Original file line number Diff line number Diff line change
Expand Up @@ -136,12 +136,12 @@ CUptiResult cuptiInitialize(void)
return status;
}

__global__ void faultInjectorKernelAssert(void)
__global__ static void faultInjectorKernelAssert(void)
{
assert(0 && "faultInjectorKernelAssert triggered");
}

__global__ void faultInjectorKernelTrap(void) { asm("trap;"); }
__global__ static void faultInjectorKernelTrap(void) { asm("trap;"); }

boost::optional<boost::property_tree::ptree&> lookupConfig(
boost::optional<boost::property_tree::ptree&> domainConfigs,
Expand Down
8 changes: 4 additions & 4 deletions src/main/cpp/src/bloom_filter.cu
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,10 @@ __device__ inline std::pair<cudf::size_type, cudf::bitmask_type> gpu_get_hash_ma
}

template <bool nullable>
__global__ void gpu_bloom_filter_put(cudf::bitmask_type* const bloom_filter,
cudf::size_type bloom_filter_bits,
cudf::column_device_view input,
cudf::size_type num_hashes)
CUDF_KERNEL void gpu_bloom_filter_put(cudf::bitmask_type* const bloom_filter,
cudf::size_type bloom_filter_bits,
cudf::column_device_view input,
cudf::size_type num_hashes)
{
size_t const tid = threadIdx.x + blockIdx.x * blockDim.x;
if (tid >= input.size()) { return; }
Expand Down
35 changes: 17 additions & 18 deletions src/main/cpp/src/cast_string.cu
Original file line number Diff line number Diff line change
Expand Up @@ -156,14 +156,14 @@ process_value(bool first_value, T current_val, T const new_digit, bool adding)
* @param ansi_mode true if ansi mode is required, which is more strict and throws
*/
template <typename T>
void __global__ string_to_integer_kernel(T* out,
bitmask_type* validity,
const char* const chars,
size_type const* offsets,
bitmask_type const* incoming_null_mask,
size_type num_rows,
bool ansi_mode,
bool strip)
void CUDF_KERNEL string_to_integer_kernel(T* out,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be

void CUDF_KERNEL or CUDF_KERNEL void ?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Validity wise it doesn't matter. Stylistically it can be argued we should be consistent. I think it's outside the scope of this change to enforce it though. This example started with void __global__ and ended with void __global__ static. This looks odd expanded out, but is valid and does the expected thing. I would prefer to see it as CUDF_KERNEL void as well.

Copy link
Collaborator Author

@jihoonson jihoonson Jun 28, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I was just replacing all __global__ with CUDF_KERNEL and never noticed this. I agree CUDF_KERNEL void is better and consistent. Will fix it.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah just noticed it has been fixed already in #2178.

bitmask_type* validity,
const char* const chars,
size_type const* offsets,
bitmask_type const* incoming_null_mask,
size_type num_rows,
bool ansi_mode,
bool strip)
{
auto const group = cooperative_groups::this_thread_block();
auto const warp = cooperative_groups::tiled_partition<cudf::detail::warp_size>(group);
Expand Down Expand Up @@ -386,18 +386,17 @@ __device__ thrust::optional<thrust::tuple<bool, int, int>> validate_and_exponent
* @param scale scale of desired decimals
* @param precision precision of desired decimals
* @param ansi_mode true if ansi mode is required, which is more strict and throws
* @return __global__
*/
template <typename T>
__global__ void string_to_decimal_kernel(T* out,
bitmask_type* validity,
const char* const chars,
size_type const* offsets,
bitmask_type const* incoming_null_mask,
size_type num_rows,
int32_t scale,
int32_t precision,
bool strip)
CUDF_KERNEL void string_to_decimal_kernel(T* out,
bitmask_type* validity,
const char* const chars,
size_type const* offsets,
bitmask_type const* incoming_null_mask,
size_type num_rows,
int32_t scale,
int32_t precision,
bool strip)
{
auto const group = cooperative_groups::this_thread_block();
auto const warp = cooperative_groups::tiled_partition<cudf::detail::warp_size>(group);
Expand Down
16 changes: 8 additions & 8 deletions src/main/cpp/src/cast_string_to_float.cu
Original file line number Diff line number Diff line change
Expand Up @@ -618,14 +618,14 @@ class string_to_float {
};

template <typename T, size_type block_size>
__global__ void string_to_float_kernel(T* out,
bitmask_type* validity,
int32_t* ansi_except,
size_type* valid_count,
const char* const chars,
size_type const* offsets,
bitmask_type const* incoming_null_mask,
size_type const num_rows)
CUDF_KERNEL void string_to_float_kernel(T* out,
bitmask_type* validity,
int32_t* ansi_except,
size_type* valid_count,
const char* const chars,
size_type const* offsets,
bitmask_type const* incoming_null_mask,
size_type const num_rows)
{
size_type const tid = threadIdx.x + (blockDim.x * blockIdx.x);
size_type const row = tid / 32;
Expand Down
24 changes: 12 additions & 12 deletions src/main/cpp/src/parse_uri.cu
Original file line number Diff line number Diff line change
Expand Up @@ -770,13 +770,13 @@ uri_parts __device__ validate_uri(const char* str,
* @param out_offsets Offsets to the start of the chunks
* @param out_validity Bitmask of validity data, updated in function
*/
__global__ void parse_uri_char_counter(column_device_view const in_strings,
URI_chunks chunk,
char const* const base_ptr,
size_type* const out_lengths,
size_type* const out_offsets,
bitmask_type* out_validity,
thrust::optional<column_device_view const> query_match)
CUDF_KERNEL void parse_uri_char_counter(column_device_view const in_strings,
URI_chunks chunk,
char const* const base_ptr,
size_type* const out_lengths,
size_type* const out_offsets,
bitmask_type* out_validity,
thrust::optional<column_device_view const> query_match)
{
// thread per row
auto const tid = cudf::detail::grid_1d::global_thread_id();
Expand Down Expand Up @@ -850,11 +850,11 @@ __global__ void parse_uri_char_counter(column_device_view const in_strings,
* @param offsets Offset value of each string associated with `out_chars`
* @param out_chars Character buffer for the output string column
*/
__global__ void parse_uri(column_device_view const in_strings,
char const* const base_ptr,
size_type const* const src_offsets,
size_type const* const offsets,
char* const out_chars)
CUDF_KERNEL void parse_uri(column_device_view const in_strings,
char const* const base_ptr,
size_type const* const src_offsets,
size_type const* const offsets,
char* const out_chars)
{
auto const tid = cudf::detail::grid_1d::global_thread_id();

Expand Down
Loading