From d87e181daa67d8fb1a029fc2c09e2f561d1e7234 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 4 Oct 2023 13:25:56 -0700 Subject: [PATCH] Expose streams in binaryop APIs (#14187) Contributes to #925 Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Nghia Truong (https://github.com/ttnghia) - Karthikeyan (https://github.com/karthikeyann) URL: https://github.com/rapidsai/cudf/pull/14187 --- cpp/include/cudf/binaryop.hpp | 8 ++ cpp/src/binaryop/binaryop.cpp | 12 ++- cpp/src/binaryop/compiled/binary_ops.cu | 6 +- cpp/tests/CMakeLists.txt | 1 + cpp/tests/streams/binaryop_test.cpp | 126 ++++++++++++++++++++++++ 5 files changed, 147 insertions(+), 6 deletions(-) create mode 100644 cpp/tests/streams/binaryop_test.cpp diff --git a/cpp/include/cudf/binaryop.hpp b/cpp/include/cudf/binaryop.hpp index 77d6a4d1e89..9df4b4eb00f 100644 --- a/cpp/include/cudf/binaryop.hpp +++ b/cpp/include/cudf/binaryop.hpp @@ -102,6 +102,7 @@ enum class binary_operator : int32_t { * @param rhs The right operand column * @param op The binary operator * @param output_type The desired data type of the output column + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return Output column of `output_type` type containing the result of * the binary operation @@ -115,6 +116,7 @@ std::unique_ptr binary_operation( column_view const& rhs, binary_operator op, data_type output_type, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -131,6 +133,7 @@ std::unique_ptr binary_operation( * @param rhs The right operand scalar * @param op The binary operator * @param output_type The desired data type of the output column + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return Output column of `output_type` type containing the result of * the binary operation @@ -144,6 +147,7 @@ std::unique_ptr binary_operation( scalar const& rhs, binary_operator op, data_type output_type, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -158,6 +162,7 @@ std::unique_ptr binary_operation( * @param rhs The right operand column * @param op The binary operator * @param output_type The desired data type of the output column + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return Output column of `output_type` type containing the result of * the binary operation @@ -172,6 +177,7 @@ std::unique_ptr binary_operation( column_view const& rhs, binary_operator op, data_type output_type, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -189,6 +195,7 @@ std::unique_ptr binary_operation( * @param output_type The desired data type of the output column. It is assumed * that output_type is compatible with the output data type * of the function in the PTX code + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return Output column of `output_type` type containing the result of * the binary operation @@ -201,6 +208,7 @@ std::unique_ptr binary_operation( column_view const& rhs, std::string const& ptx, data_type output_type, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp index ef07de8c461..6b413ab2be4 100644 --- a/cpp/src/binaryop/binaryop.cpp +++ b/cpp/src/binaryop/binaryop.cpp @@ -405,38 +405,42 @@ std::unique_ptr binary_operation(scalar const& lhs, column_view const& rhs, binary_operator op, data_type output_type, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::binary_operation(lhs, rhs, op, output_type, cudf::get_default_stream(), mr); + return detail::binary_operation(lhs, rhs, op, output_type, stream, mr); } std::unique_ptr binary_operation(column_view const& lhs, scalar const& rhs, binary_operator op, data_type output_type, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::binary_operation(lhs, rhs, op, output_type, cudf::get_default_stream(), mr); + return detail::binary_operation(lhs, rhs, op, output_type, stream, mr); } std::unique_ptr binary_operation(column_view const& lhs, column_view const& rhs, binary_operator op, data_type output_type, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::binary_operation(lhs, rhs, op, output_type, cudf::get_default_stream(), mr); + return detail::binary_operation(lhs, rhs, op, output_type, stream, mr); } std::unique_ptr binary_operation(column_view const& lhs, column_view const& rhs, std::string const& ptx, data_type output_type, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::binary_operation(lhs, rhs, ptx, output_type, cudf::get_default_stream(), mr); + return detail::binary_operation(lhs, rhs, ptx, output_type, stream, mr); } } // namespace cudf diff --git a/cpp/src/binaryop/compiled/binary_ops.cu b/cpp/src/binaryop/compiled/binary_ops.cu index 1f7f342632a..85ab5c6d6cb 100644 --- a/cpp/src/binaryop/compiled/binary_ops.cu +++ b/cpp/src/binaryop/compiled/binary_ops.cu @@ -47,14 +47,16 @@ namespace { struct scalar_as_column_view { using return_type = typename std::pair>; template ())> - return_type operator()(scalar const& s, rmm::cuda_stream_view, rmm::mr::device_memory_resource*) + return_type operator()(scalar const& s, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource*) { auto& h_scalar_type_view = static_cast&>(const_cast(s)); auto col_v = column_view(s.type(), 1, h_scalar_type_view.data(), reinterpret_cast(s.validity_data()), - !s.is_valid()); + !s.is_valid(stream)); return std::pair{col_v, std::unique_ptr(nullptr)}; } template ())> diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 04939f3cd6d..ac13c121530 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -622,6 +622,7 @@ ConfigureTest( STREAM_IDENTIFICATION_TEST identify_stream_usage/test_default_stream_identification.cu ) +ConfigureTest(STREAM_BINARYOP_TEST streams/binaryop_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_CONCATENATE_TEST streams/concatenate_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_COPYING_TEST streams/copying_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_FILLING_TEST streams/filling_test.cpp STREAM_MODE testing) diff --git a/cpp/tests/streams/binaryop_test.cpp b/cpp/tests/streams/binaryop_test.cpp new file mode 100644 index 00000000000..2520aed0458 --- /dev/null +++ b/cpp/tests/streams/binaryop_test.cpp @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include + +#include +#include +#include + +class BinaryopTest : public cudf::test::BaseFixture {}; + +TEST_F(BinaryopTest, ColumnColumn) +{ + cudf::test::fixed_width_column_wrapper lhs{10, 20, 30, 40, 50}; + cudf::test::fixed_width_column_wrapper rhs{15, 25, 35, 45, 55}; + + cudf::binary_operation(lhs, + rhs, + cudf::binary_operator::ADD, + cudf::data_type(cudf::type_to_id()), + cudf::test::get_default_stream()); +} + +TEST_F(BinaryopTest, ColumnScalar) +{ + cudf::test::fixed_width_column_wrapper lhs{10, 20, 30, 40, 50}; + cudf::numeric_scalar rhs{23, true, cudf::test::get_default_stream()}; + + cudf::binary_operation(lhs, + rhs, + cudf::binary_operator::ADD, + cudf::data_type(cudf::type_to_id()), + cudf::test::get_default_stream()); +} + +TEST_F(BinaryopTest, ScalarColumn) +{ + cudf::numeric_scalar lhs{42, true, cudf::test::get_default_stream()}; + cudf::test::fixed_width_column_wrapper rhs{15, 25, 35, 45, 55}; + + cudf::binary_operation(lhs, + rhs, + cudf::binary_operator::ADD, + cudf::data_type(cudf::type_to_id()), + cudf::test::get_default_stream()); +} + +class BinaryopPTXTest : public BinaryopTest { + protected: + void SetUp() override + { + if (!can_do_runtime_jit()) { GTEST_SKIP() << "Skipping tests that require 11.5 runtime"; } + } +}; + +TEST_F(BinaryopPTXTest, ColumnColumnPTX) +{ + cudf::test::fixed_width_column_wrapper lhs{10, 20, 30, 40, 50}; + cudf::test::fixed_width_column_wrapper rhs{15, 25, 35, 45, 55}; + + // c = a*a*a + b*b + char const* ptx = + R"***( +// +// Generated by NVIDIA NVVM Compiler +// +// Compiler Build ID: CL-24817639 +// Cuda compilation tools, release 10.0, V10.0.130 +// Based on LLVM 3.4svn +// + +.version 6.3 +.target sm_70 +.address_size 64 + + // .globl _ZN8__main__7add$241Eix +.common .global .align 8 .u64 _ZN08NumbaEnv8__main__7add$241Eix; +.common .global .align 8 .u64 _ZN08NumbaEnv5numba7targets7numbers14int_power_impl12$3clocals$3e13int_power$242Exx; + +.visible .func (.param .b32 func_retval0) _ZN8__main__7add$241Eix( + .param .b64 _ZN8__main__7add$241Eix_param_0, + .param .b32 _ZN8__main__7add$241Eix_param_1, + .param .b64 _ZN8__main__7add$241Eix_param_2 +) +{ + .reg .b32 %r<3>; + .reg .b64 %rd<8>; + + + ld.param.u64 %rd1, [_ZN8__main__7add$241Eix_param_0]; + ld.param.u32 %r1, [_ZN8__main__7add$241Eix_param_1]; + ld.param.u64 %rd2, [_ZN8__main__7add$241Eix_param_2]; + cvt.s64.s32 %rd3, %r1; + mul.wide.s32 %rd4, %r1, %r1; + mul.lo.s64 %rd5, %rd4, %rd3; + mul.lo.s64 %rd6, %rd2, %rd2; + add.s64 %rd7, %rd6, %rd5; + st.u64 [%rd1], %rd7; + mov.u32 %r2, 0; + st.param.b32 [func_retval0+0], %r2; + ret; +} + +)***"; + + cudf::binary_operation( + lhs, rhs, ptx, cudf::data_type(cudf::type_to_id()), cudf::test::get_default_stream()); + cudf::binary_operation(lhs, rhs, ptx, cudf::data_type(cudf::type_to_id())); +}