diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 35b03fa33d0..6ddc5a6b8de 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -311,6 +311,7 @@ ConfigureNVBench( string/case.cpp string/char_types.cpp string/contains.cpp + string/copy_if_else.cpp string/count.cpp string/extract.cpp string/gather.cpp diff --git a/cpp/benchmarks/string/copy_if_else.cpp b/cpp/benchmarks/string/copy_if_else.cpp new file mode 100644 index 00000000000..e06cca497c2 --- /dev/null +++ b/cpp/benchmarks/string/copy_if_else.cpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include + +#include + +static void bench_copy(nvbench::state& state) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + + if (static_cast(num_rows) * static_cast(row_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } + + data_profile const str_profile = data_profile_builder().distribution( + cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width); + auto const source_table = + create_random_table({cudf::type_id::STRING}, row_count{num_rows}, str_profile); + auto const target_table = + create_random_table({cudf::type_id::STRING}, row_count{num_rows}, str_profile); + data_profile const bool_profile = data_profile_builder().no_validity(); + auto const booleans = + create_random_table({cudf::type_id::BOOL8}, row_count{num_rows}, bool_profile); + + auto const source = source_table->view().column(0); + auto const target = target_table->view().column(0); + auto const left_right = booleans->view().column(0); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + auto chars_size = cudf::strings_column_view(target).chars_size(cudf::get_default_stream()); + state.add_global_memory_reads(chars_size); // all bytes are read; + state.add_global_memory_writes(chars_size); // both columns are similar size + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + [[maybe_unused]] auto result = cudf::copy_if_else(source, target, left_right); + }); +} + +NVBENCH_BENCH(bench_copy) + .set_name("copy_if_else") + .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048, 4096}) + .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216}); diff --git a/cpp/include/cudf/strings/detail/copy_if_else.cuh b/cpp/include/cudf/strings/detail/copy_if_else.cuh index 64e14dcc549..e1ef97b7803 100644 --- a/cpp/include/cudf/strings/detail/copy_if_else.cuh +++ b/cpp/include/cudf/strings/detail/copy_if_else.cuh @@ -16,18 +16,16 @@ #pragma once #include -#include #include -#include -#include +#include #include +#include #include -#include #include -#include #include +#include #include @@ -65,10 +63,10 @@ std::unique_ptr copy_if_else(StringIterLeft lhs_begin, rmm::mr::device_memory_resource* mr) { auto strings_count = std::distance(lhs_begin, lhs_end); - if (strings_count == 0) return make_empty_column(type_id::STRING); + if (strings_count == 0) { return make_empty_column(type_id::STRING); } // create null mask - auto valid_mask = cudf::detail::valid_if( + auto [null_mask, null_count] = cudf::detail::valid_if( thrust::make_counting_iterator(0), thrust::make_counting_iterator(strings_count), [lhs_begin, rhs_begin, filter_fn] __device__(size_type idx) { @@ -76,44 +74,25 @@ std::unique_ptr copy_if_else(StringIterLeft lhs_begin, }, stream, mr); - size_type null_count = valid_mask.second; - auto null_mask = (null_count > 0) ? std::move(valid_mask.first) : rmm::device_buffer{}; + if (null_count == 0) { null_mask = rmm::device_buffer{}; } - // build offsets column - auto offsets_transformer = cuda::proclaim_return_type( - [lhs_begin, rhs_begin, filter_fn] __device__(size_type idx) { - auto const result = filter_fn(idx) ? lhs_begin[idx] : rhs_begin[idx]; - return result.has_value() ? result->size_bytes() : 0; - }); - - auto offsets_transformer_itr = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), offsets_transformer); - auto [offsets_column, bytes] = cudf::detail::make_offsets_child_column( - offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr); - auto d_offsets = offsets_column->view().template data(); + // build vector of strings + rmm::device_uvector indices(strings_count, stream); + thrust::transform(rmm::exec_policy_nosync(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(strings_count), + indices.begin(), + [lhs_begin, rhs_begin, filter_fn] __device__(size_type idx) { + auto const result = filter_fn(idx) ? lhs_begin[idx] : rhs_begin[idx]; + auto const d_str = result.has_value() ? *result : string_view{"", 0}; + return string_index_pair{d_str.data(), d_str.size_bytes()}; + }); - // build chars column - auto chars_column = create_chars_child_column(bytes, stream, mr); - auto d_chars = chars_column->mutable_view().template data(); - // fill in chars - thrust::for_each_n( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - strings_count, - [lhs_begin, rhs_begin, filter_fn, d_offsets, d_chars] __device__(size_type idx) { - auto const result = filter_fn(idx) ? lhs_begin[idx] : rhs_begin[idx]; - if (!result.has_value()) return; - auto const d_str = *result; - memcpy(d_chars + d_offsets[idx], d_str.data(), d_str.size_bytes()); - }); - - return make_strings_column(strings_count, - std::move(offsets_column), - std::move(chars_column->release().data.release()[0]), - null_count, - std::move(null_mask)); + // convert vector into strings column + auto result = make_strings_column(indices.begin(), indices.end(), stream, mr); + result->set_null_mask(std::move(null_mask), null_count); + return result; } - } // namespace detail } // namespace strings } // namespace cudf