Skip to content

Commit

Permalink
jitify direct-to-cubin compilation and caching. (#7919)
Browse files Browse the repository at this point in the history
This changes jitify2 `get_kernel` invocations to pass an `-arch=sm_.` arg which causes jitify to compile and disk-cache direct-to-cubin, rather than caching ptx. This alleviates some compilation issues for specific (possibly unsupported) environments/configurations, but more importantly should provide faster launching from a cached kernel.

Authors:
  - Christopher Harris (https://github.com/cwharris)

Approvers:
  - MithunR (https://github.com/mythrocks)
  - Keith Kraus (https://github.com/kkraus14)
  - Devavret Makkar (https://github.com/devavret)

URL: #7919
  • Loading branch information
cwharris authored Apr 8, 2021
1 parent 192ff46 commit a5d2407
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 11 deletions.
13 changes: 7 additions & 6 deletions cpp/src/binaryop/binaryop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ void binary_operation(mutable_column_view& out,
get_operator_name(op, op_type));

cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit)
.get_kernel(kernel_name) //
.get_kernel(kernel_name, {}, {}, {"-arch=sm_."}) //
->configure_1d_max_occupancy(0, 0, 0, stream.value()) //
->launch(out.size(),
cudf::jit::get_data_ptr(out),
Expand All @@ -108,7 +108,7 @@ void binary_operation(mutable_column_view& out,
get_operator_name(op, op_type));

cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit)
.get_kernel(kernel_name) //
.get_kernel(kernel_name, {}, {}, {"-arch=sm_."}) //
->configure_1d_max_occupancy(0, 0, 0, stream.value()) //
->launch(out.size(),
cudf::jit::get_data_ptr(out),
Expand Down Expand Up @@ -150,7 +150,7 @@ void binary_operation(mutable_column_view& out,
get_operator_name(op, OperatorType::Direct));

cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit)
.get_kernel(kernel_name) //
.get_kernel(kernel_name, {}, {}, {"-arch=sm_."}) //
->configure_1d_max_occupancy(0, 0, 0, stream.value()) //
->launch(out.size(),
cudf::jit::get_data_ptr(out),
Expand All @@ -170,7 +170,7 @@ void binary_operation(mutable_column_view& out,
get_operator_name(op, OperatorType::Direct));

cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit)
.get_kernel(kernel_name) //
.get_kernel(kernel_name, {}, {}, {"-arch=sm_."}) //
->configure_1d_max_occupancy(0, 0, 0, stream.value()) //
->launch(out.size(),
cudf::jit::get_data_ptr(out),
Expand Down Expand Up @@ -200,8 +200,9 @@ void binary_operation(mutable_column_view& out,
get_operator_name(binary_operator::GENERIC_BINARY, OperatorType::Direct));

cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit)
.get_kernel(kernel_name, {}, {{"binaryop/jit/operation-udf.hpp", cuda_source}}) //
->configure_1d_max_occupancy(0, 0, 0, stream.value()) //
.get_kernel(
kernel_name, {}, {{"binaryop/jit/operation-udf.hpp", cuda_source}}, {"-arch=sm_."}) //
->configure_1d_max_occupancy(0, 0, 0, stream.value()) //
->launch(out.size(),
cudf::jit::get_data_ptr(out),
cudf::jit::get_data_ptr(lhs),
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/rolling/rolling_detail.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -1295,8 +1295,9 @@ std::unique_ptr<column> rolling_window_udf(column_view const& input,
following_window_str.c_str());

cudf::jit::get_program_cache(*rolling_jit_kernel_cu_jit)
.get_kernel(kernel_name, {}, {{"rolling/jit/operation-udf.hpp", cuda_source}}) //
->configure_1d_max_occupancy(0, 0, 0, stream.value()) //
.get_kernel(
kernel_name, {}, {{"rolling/jit/operation-udf.hpp", cuda_source}}, {"-arch=sm_."}) //
->configure_1d_max_occupancy(0, 0, 0, stream.value()) //
->launch(input.size(),
cudf::jit::get_data_ptr(input),
input.null_mask(),
Expand Down
7 changes: 4 additions & 3 deletions cpp/src/transform/transform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,10 @@ void unary_operation(mutable_column_view output,
"GENERIC_UNARY_OP");

cudf::jit::get_program_cache(*transform_jit_kernel_cu_jit)
.get_kernel(kernel_name, {}, {{"transform/jit/operation-udf.hpp", cuda_source}}) //
->configure_1d_max_occupancy(0, 0, 0, stream.value()) //
->launch(output.size(), //
.get_kernel(
kernel_name, {}, {{"transform/jit/operation-udf.hpp", cuda_source}}, {"-arch=sm_."}) //
->configure_1d_max_occupancy(0, 0, 0, stream.value()) //
->launch(output.size(), //
cudf::jit::get_data_ptr(output),
cudf::jit::get_data_ptr(input));
}
Expand Down

0 comments on commit a5d2407

Please sign in to comment.