From e3ac8be66804d850f91e6c5c1ddd6c111e17034b Mon Sep 17 00:00:00 2001 From: David Wendt Date: Thu, 8 Feb 2024 14:13:59 -0500 Subject: [PATCH 1/5] Rework cudf::strings::detail::copy_range for offsetalator --- cpp/CMakeLists.txt | 1 + cpp/benchmarks/CMakeLists.txt | 1 + cpp/benchmarks/string/copy_range.cpp | 62 +++++ .../cudf/strings/detail/copy_range.cuh | 216 ------------------ .../cudf/strings/detail/copy_range.hpp | 60 +++++ cpp/src/copying/copy_range.cu | 29 +-- cpp/src/strings/copying/copy_range.cu | 139 +++++++++++ 7 files changed, 267 insertions(+), 241 deletions(-) create mode 100644 cpp/benchmarks/string/copy_range.cpp delete mode 100644 cpp/include/cudf/strings/detail/copy_range.cuh create mode 100644 cpp/include/cudf/strings/detail/copy_range.hpp create mode 100644 cpp/src/strings/copying/copy_range.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d4ed6c113b9..078de27f0ea 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -570,6 +570,7 @@ add_library( src/strings/convert/convert_lists.cu src/strings/copying/concatenate.cu src/strings/copying/copying.cu + src/strings/copying/copy_range.cu src/strings/copying/shift.cu src/strings/count_matches.cu src/strings/extract/extract.cu diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 35b03fa33d0..d7f1ffe8265 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -311,6 +311,7 @@ ConfigureNVBench( string/case.cpp string/char_types.cpp string/contains.cpp + string/copy_range.cpp string/count.cpp string/extract.cpp string/gather.cpp diff --git a/cpp/benchmarks/string/copy_range.cpp b/cpp/benchmarks/string/copy_range.cpp new file mode 100644 index 00000000000..e42c799e720 --- /dev/null +++ b/cpp/benchmarks/string/copy_range.cpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include + +#include + +static void bench_copy(nvbench::state& state) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + + if (static_cast(num_rows) * static_cast(row_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } + + data_profile const table_profile = + data_profile_builder() + .distribution(cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width) + .no_validity(); + auto const source_table = + create_random_table({cudf::type_id::STRING}, row_count{num_rows}, table_profile); + auto const target_table = + create_random_table({cudf::type_id::STRING}, row_count{num_rows}, table_profile); + + auto const start = row_width / 4; + auto const end = (row_width * 3) / 4; + auto const source = source_table->view().column(0); + auto const target = target_table->view().column(0); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + auto chars_size = cudf::strings_column_view(target).chars_size(cudf::get_default_stream()); + state.add_global_memory_reads(chars_size); // all bytes are read; + state.add_global_memory_writes(chars_size); // both columns are similar size + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = cudf::copy_range(source, target, start, end, start / 2); + }); +} + +NVBENCH_BENCH(bench_copy) + .set_name("copy_range") + .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048, 4096}) + .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216}); diff --git a/cpp/include/cudf/strings/detail/copy_range.cuh b/cpp/include/cudf/strings/detail/copy_range.cuh deleted file mode 100644 index 567452bac4e..00000000000 --- a/cpp/include/cudf/strings/detail/copy_range.cuh +++ /dev/null @@ -1,216 +0,0 @@ -/* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include - -namespace { -template -struct compute_element_size { - SourceValueIterator source_value_begin; - SourceValidityIterator source_validity_begin; - cudf::column_device_view d_target; - cudf::size_type target_begin; - cudf::size_type target_end; - - __device__ cudf::size_type operator()(cudf::size_type idx) - { - if (idx >= target_begin && idx < target_end) { - if (source_has_nulls) { - return *(source_validity_begin + (idx - target_begin)) - ? (*(source_value_begin + (idx - target_begin))).size_bytes() - : 0; - } else { - return (*(source_value_begin + (idx - target_begin))).size_bytes(); - } - } else { - if (target_has_nulls) { - return d_target.is_valid_nocheck(idx) - ? d_target.element(idx).size_bytes() - : 0; - } else { - return d_target.element(idx).size_bytes(); - } - } - } -}; - -} // namespace - -namespace cudf { -namespace strings { -namespace detail { -/** - * @brief Internal API to copy a range of string elements out-of-place from - * source iterators to a target column. - * - * Creates a new column as if an in-place copy was performed into @p target. - * The elements indicated by the indices [@p target_begin, @p target_end) were - * replaced with the elements retrieved from source iterators; - * *(@p source_value_begin + idx) if *(@p source_validity_begin + idx) is true, - * invalidate otherwise (where idx = [0, @p target_end - @p target_begin)). - * Elements outside the range are copied from @p target into the new target - * column to return. - * - * @throws cudf::logic_error for invalid range (if @p target_begin < 0, - * target_begin >= @p target.size(), or @p target_end > @p target.size()). - * - * @tparam SourceValueIterator Iterator for retrieving source values - * @tparam SourceValidityIterator Iterator for retrieving source validities - * @param source_value_begin Start of source value iterator - * @param source_validity_begin Start of source validity iterator - * @param target The strings column to copy from outside the range. - * @param target_begin The starting index of the target range (inclusive) - * @param target_end The index of the last element in the target range - * (exclusive) - * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return std::unique_ptr The result target column - */ -template -std::unique_ptr copy_range(SourceValueIterator source_value_begin, - SourceValidityIterator source_validity_begin, - strings_column_view const& target, - size_type target_begin, - size_type target_end, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - CUDF_EXPECTS( - (target_begin >= 0) && (target_begin < target.size()) && (target_end <= target.size()), - "Range is out of bounds."); - - if (target_end == target_begin) { - return std::make_unique(target.parent(), stream, mr); - } else { - auto p_target_device_view = column_device_view::create(target.parent(), stream); - auto d_target = *p_target_device_view; - - // create resulting null mask - - std::pair valid_mask{}; - if (target.has_nulls()) { // check validities for both source & target - valid_mask = cudf::detail::valid_if( - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(target.size()), - [source_validity_begin, d_target, target_begin, target_end] __device__(size_type idx) { - return (idx >= target_begin && idx < target_end) - ? *(source_validity_begin + (idx - target_begin)) - : d_target.is_valid_nocheck(idx); - }, - stream, - mr); - } else { // check validities for source only - valid_mask = cudf::detail::valid_if( - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(target.size()), - [source_validity_begin, d_target, target_begin, target_end] __device__(size_type idx) { - return (idx >= target_begin && idx < target_end) - ? *(source_validity_begin + (idx - target_begin)) - : true; - }, - stream, - mr); - } - - auto null_count = valid_mask.second; - rmm::device_buffer null_mask{0, stream, mr}; - if (target.parent().nullable() || null_count > 0) { null_mask = std::move(valid_mask.first); } - - // build offsets column - - std::unique_ptr p_offsets_column{nullptr}; - size_type chars_bytes = 0; - if (target.has_nulls()) { // check validities for both source & target - auto string_size_begin = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), - compute_element_size{ - source_value_begin, source_validity_begin, d_target, target_begin, target_end}); - - std::tie(p_offsets_column, chars_bytes) = cudf::detail::make_offsets_child_column( - string_size_begin, string_size_begin + target.size(), stream, mr); - } else if (null_count > 0) { // check validities for source only - auto string_size_begin = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), - compute_element_size{ - source_value_begin, source_validity_begin, d_target, target_begin, target_end}); - - std::tie(p_offsets_column, chars_bytes) = cudf::detail::make_offsets_child_column( - string_size_begin, string_size_begin + target.size(), stream, mr); - } else { // no need to check validities - auto string_size_begin = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), - compute_element_size{ - source_value_begin, source_validity_begin, d_target, target_begin, target_end}); - - std::tie(p_offsets_column, chars_bytes) = cudf::detail::make_offsets_child_column( - string_size_begin, string_size_begin + target.size(), stream, mr); - } - - // create the chars column - - auto p_offsets = - thrust::device_pointer_cast(p_offsets_column->view().template data()); - auto p_chars_column = strings::detail::create_chars_child_column(chars_bytes, stream, mr); - - // copy to the chars column - - auto p_chars = (p_chars_column->mutable_view()).template data(); - thrust::for_each(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(target.size()), - [source_value_begin, - source_validity_begin, - d_target, - target_begin, - target_end, - p_offsets, - p_chars] __device__(size_type idx) { - if (p_offsets[idx + 1] - p_offsets[idx] > 0) { - const auto source = (idx >= target_begin && idx < target_end) - ? *(source_value_begin + (idx - target_begin)) - : d_target.element(idx); - memcpy(p_chars + p_offsets[idx], source.data(), source.size_bytes()); - } - }); - - return make_strings_column(target.size(), - std::move(p_offsets_column), - std::move(p_chars_column->release().data.release()[0]), - null_count, - std::move(null_mask)); - } -} - -} // namespace detail -} // namespace strings -} // namespace cudf diff --git a/cpp/include/cudf/strings/detail/copy_range.hpp b/cpp/include/cudf/strings/detail/copy_range.hpp new file mode 100644 index 00000000000..e18f1fdc5ad --- /dev/null +++ b/cpp/include/cudf/strings/detail/copy_range.hpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include + +namespace cudf { +namespace strings { +namespace detail { + +/** + * @brief Internal API to copy a range of string elements out-of-place from + * a source column to a target column + * + * Creates a new column as if an in-place copy was performed into `target`. + * The elements indicated by the indices `source_begin`, `source_end`) + * replace with the elements in the target column starting at `target_begin`. + * Elements outside the range are copied from `target` into the new target + * column to return. + * + * @throws cudf::logic_error for invalid range (if `target_begin < 0`, + * or `target_begin >= target.size()`, + * or `target_begin + (source_end-source_begin)` > target.size()`). + * + * @param source The strings column to copy from inside the `target_begin` range + * @param target The strings column to copy from outside the range + * @param source_end The index of the first element in the source range + * @param source_end The index of the last element in the source range (exclusive) + * @param target_begin The starting index of the target range (inclusive) + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned column's device memory. + * @return The result target column + */ +std::unique_ptr copy_range(strings_column_view const& source, + strings_column_view const& target, + size_type source_begin, + size_type source_end, + size_type target_begin, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + +} // namespace detail +} // namespace strings +} // namespace cudf diff --git a/cpp/src/copying/copy_range.cu b/cpp/src/copying/copy_range.cu index af253858c73..61d51f1d284 100644 --- a/cpp/src/copying/copy_range.cu +++ b/cpp/src/copying/copy_range.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include @@ -130,29 +130,8 @@ std::unique_ptr out_of_place_copy_range_dispatch::operator()(*p_source_device_view, - cudf::string_view()) + - source_begin, - cudf::detail::make_validity_iterator(*p_source_device_view) + source_begin, - cudf::strings_column_view(target), - target_begin, - target_end, - stream, - mr); - } else { - return cudf::strings::detail::copy_range( - p_source_device_view->begin() + source_begin, - thrust::make_constant_iterator(true), - cudf::strings_column_view(target), - target_begin, - target_end, - stream, - mr); - } + return cudf::strings::detail::copy_range( + source, target, source_begin, source_end, target_begin, stream, mr); } template <> diff --git a/cpp/src/strings/copying/copy_range.cu b/cpp/src/strings/copying/copy_range.cu new file mode 100644 index 00000000000..b5e4f038e99 --- /dev/null +++ b/cpp/src/strings/copying/copy_range.cu @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +namespace cudf { +namespace strings { +namespace detail { +namespace { +struct compute_element_size { + column_device_view d_source; + column_device_view d_target; + size_type source_begin; + size_type target_begin; + size_type target_end; + bool source_has_nulls; + bool target_has_nulls; + + __device__ cudf::size_type operator()(cudf::size_type idx) + { + if (idx >= target_begin && idx < target_end) { + auto const str_idx = source_begin + (idx - target_begin); + return source_has_nulls && d_source.is_null_nocheck(str_idx) + ? 0 + : d_source.element(str_idx).size_bytes(); + } else { + return target_has_nulls && d_target.is_null_nocheck(idx) + ? 0 + : d_target.element(idx).size_bytes(); + } + } +}; + +} // namespace + +std::unique_ptr copy_range(strings_column_view const& source, + strings_column_view const& target, + size_type source_begin, + size_type source_end, + size_type target_begin, + // size_type target_end, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto target_end = target_begin + (source_end - source_begin); + CUDF_EXPECTS( + (target_begin >= 0) && (target_begin < target.size()) && (target_end <= target.size()), + "Range is out of bounds."); + + if (target_end == target_begin) { return std::make_unique(target.parent(), stream, mr); } + auto source_device_view = column_device_view::create(source.parent(), stream); + auto d_source = *source_device_view; + auto target_device_view = column_device_view::create(target.parent(), stream); + auto d_target = *target_device_view; + + // create null mask + auto [null_mask, null_count] = cudf::detail::valid_if( + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(target.size()), + [d_source, d_target, source_begin, target_begin, target_end] __device__(size_type idx) { + return (idx >= target_begin && idx < target_end) + ? d_source.is_valid(source_begin + (idx - target_begin)) + : d_target.is_valid(idx); + }, + stream, + mr); + + auto [check_source, check_target] = [target, null_count = null_count] { + // check validities for both source & target + if (target.has_nulls()) { return std::make_pair(true, true); } + // check validities for source only + if (null_count > 0) { return std::make_pair(true, false); } + // no need to check validities + return std::make_pair(false, false); + }(); + + // create offsets + auto sizes_begin = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + compute_element_size{ + d_source, d_target, source_begin, target_begin, target_end, check_source, check_target}); + auto [offsets_column, chars_bytes] = cudf::strings::detail::make_offsets_child_column( + sizes_begin, sizes_begin + target.size(), stream, mr); + auto d_offsets = cudf::detail::offsetalator_factory::make_input_iterator(offsets_column->view()); + + // create chars + auto chars_data = rmm::device_uvector(chars_bytes, stream, mr); + auto d_chars = chars_data.data(); + thrust::for_each( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(target.size()), + [d_source, d_target, source_begin, target_begin, target_end, d_offsets, d_chars] __device__( + size_type idx) { + if (d_offsets[idx + 1] - d_offsets[idx] > 0) { + const auto source = (idx >= target_begin && idx < target_end) + ? d_source.element(source_begin + (idx - target_begin)) + : d_target.element(idx); + memcpy(d_chars + d_offsets[idx], source.data(), source.size_bytes()); + } + }); + + return make_strings_column(target.size(), + std::move(offsets_column), + chars_data.release(), + null_count, + null_count > 0 ? std::move(null_mask) : rmm::device_buffer{}); +} + +} // namespace detail +} // namespace strings +} // namespace cudf From abc2191ff8db22a78ca2ec88e5aa18dd550f3ffc Mon Sep 17 00:00:00 2001 From: David Wendt Date: Mon, 12 Feb 2024 11:55:38 -0500 Subject: [PATCH 2/5] removed commented out line --- cpp/src/strings/copying/copy_range.cu | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/src/strings/copying/copy_range.cu b/cpp/src/strings/copying/copy_range.cu index b5e4f038e99..90b7f40c011 100644 --- a/cpp/src/strings/copying/copy_range.cu +++ b/cpp/src/strings/copying/copy_range.cu @@ -65,7 +65,6 @@ std::unique_ptr copy_range(strings_column_view const& source, size_type source_begin, size_type source_end, size_type target_begin, - // size_type target_end, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { From defd3f1776d52a5581b118e7d3287ff4dfed0e01 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Tue, 13 Feb 2024 16:12:18 -0500 Subject: [PATCH 3/5] use make_counting_transform_iterator --- cpp/src/strings/copying/copy_range.cu | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cpp/src/strings/copying/copy_range.cu b/cpp/src/strings/copying/copy_range.cu index 90b7f40c011..34cd922671d 100644 --- a/cpp/src/strings/copying/copy_range.cu +++ b/cpp/src/strings/copying/copy_range.cu @@ -28,7 +28,6 @@ #include #include -#include namespace cudf { namespace strings { @@ -101,8 +100,8 @@ std::unique_ptr copy_range(strings_column_view const& source, }(); // create offsets - auto sizes_begin = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), + auto sizes_begin = cudf::detail::make_counting_transform_iterator( + 0, compute_element_size{ d_source, d_target, source_begin, target_begin, target_end, check_source, check_target}); auto [offsets_column, chars_bytes] = cudf::strings::detail::make_offsets_child_column( From 861ec827b0fac6738815e326d248bafe19797c30 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Wed, 14 Feb 2024 17:08:06 -0500 Subject: [PATCH 4/5] fix function name conflict --- cpp/benchmarks/string/copy_range.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/benchmarks/string/copy_range.cpp b/cpp/benchmarks/string/copy_range.cpp index e42c799e720..b494b286923 100644 --- a/cpp/benchmarks/string/copy_range.cpp +++ b/cpp/benchmarks/string/copy_range.cpp @@ -22,7 +22,7 @@ #include -static void bench_copy(nvbench::state& state) +static void bench_copy_range(nvbench::state& state) { auto const num_rows = static_cast(state.get_int64("num_rows")); auto const row_width = static_cast(state.get_int64("row_width")); @@ -56,7 +56,7 @@ static void bench_copy(nvbench::state& state) }); } -NVBENCH_BENCH(bench_copy) +NVBENCH_BENCH(bench_copy_range) .set_name("copy_range") .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048, 4096}) .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216}); From e54f1abb63a342b464a385e4cf56b2dff22201f2 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Thu, 15 Feb 2024 17:22:35 -0500 Subject: [PATCH 5/5] fix benchmark; check for nullable --- cpp/benchmarks/string/copy_range.cpp | 16 ++++++-------- cpp/src/strings/copying/copy_range.cu | 32 ++++++++++++++++----------- 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/cpp/benchmarks/string/copy_range.cpp b/cpp/benchmarks/string/copy_range.cpp index b494b286923..af217a49195 100644 --- a/cpp/benchmarks/string/copy_range.cpp +++ b/cpp/benchmarks/string/copy_range.cpp @@ -36,15 +36,13 @@ static void bench_copy_range(nvbench::state& state) data_profile_builder() .distribution(cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width) .no_validity(); - auto const source_table = - create_random_table({cudf::type_id::STRING}, row_count{num_rows}, table_profile); - auto const target_table = - create_random_table({cudf::type_id::STRING}, row_count{num_rows}, table_profile); + auto const source_tables = create_random_table( + {cudf::type_id::STRING, cudf::type_id::STRING}, row_count{num_rows}, table_profile); - auto const start = row_width / 4; - auto const end = (row_width * 3) / 4; - auto const source = source_table->view().column(0); - auto const target = target_table->view().column(0); + auto const start = num_rows / 4; + auto const end = (num_rows * 3) / 4; + auto const source = source_tables->view().column(0); + auto const target = source_tables->view().column(1); state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); auto chars_size = cudf::strings_column_view(target).chars_size(cudf::get_default_stream()); @@ -52,7 +50,7 @@ static void bench_copy_range(nvbench::state& state) state.add_global_memory_writes(chars_size); // both columns are similar size state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { - auto result = cudf::copy_range(source, target, start, end, start / 2); + [[maybe_unused]] auto result = cudf::copy_range(source, target, start, end, start / 2); }); } diff --git a/cpp/src/strings/copying/copy_range.cu b/cpp/src/strings/copying/copy_range.cu index 34cd922671d..f4c86389534 100644 --- a/cpp/src/strings/copying/copy_range.cu +++ b/cpp/src/strings/copying/copy_range.cu @@ -70,7 +70,8 @@ std::unique_ptr copy_range(strings_column_view const& source, auto target_end = target_begin + (source_end - source_begin); CUDF_EXPECTS( (target_begin >= 0) && (target_begin < target.size()) && (target_end <= target.size()), - "Range is out of bounds."); + "Range is out of bounds.", + std::invalid_argument); if (target_end == target_begin) { return std::make_unique(target.parent(), stream, mr); } auto source_device_view = column_device_view::create(source.parent(), stream); @@ -79,16 +80,21 @@ std::unique_ptr copy_range(strings_column_view const& source, auto d_target = *target_device_view; // create null mask - auto [null_mask, null_count] = cudf::detail::valid_if( - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(target.size()), - [d_source, d_target, source_begin, target_begin, target_end] __device__(size_type idx) { - return (idx >= target_begin && idx < target_end) - ? d_source.is_valid(source_begin + (idx - target_begin)) - : d_target.is_valid(idx); - }, - stream, - mr); + auto [null_mask, null_count] = [&] { + if (!target.parent().nullable() && !source.parent().nullable()) { + return std::pair(rmm::device_buffer{}, 0); + } + return cudf::detail::valid_if( + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(target.size()), + [d_source, d_target, source_begin, target_begin, target_end] __device__(size_type idx) { + return (idx >= target_begin && idx < target_end) + ? d_source.is_valid(source_begin + (idx - target_begin)) + : d_target.is_valid(idx); + }, + stream, + mr); + }(); auto [check_source, check_target] = [target, null_count = null_count] { // check validities for both source & target @@ -112,7 +118,7 @@ std::unique_ptr copy_range(strings_column_view const& source, auto chars_data = rmm::device_uvector(chars_bytes, stream, mr); auto d_chars = chars_data.data(); thrust::for_each( - rmm::exec_policy(stream), + rmm::exec_policy_nosync(stream), thrust::make_counting_iterator(0), thrust::make_counting_iterator(target.size()), [d_source, d_target, source_begin, target_begin, target_end, d_offsets, d_chars] __device__( @@ -129,7 +135,7 @@ std::unique_ptr copy_range(strings_column_view const& source, std::move(offsets_column), chars_data.release(), null_count, - null_count > 0 ? std::move(null_mask) : rmm::device_buffer{}); + std::move(null_mask)); } } // namespace detail