rapidsai · rapids-bot · Feb 14, 2024 · Feb 9, 2024 · Feb 9, 2024 · Feb 13, 2024
@@ -311,6 +311,7 @@ ConfigureNVBench(
   string/case.cpp
   string/char_types.cpp
   string/contains.cpp
+  string/copy_if_else.cpp
   string/count.cpp
   string/extract.cpp
   string/gather.cpp

@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+
+#include <cudf/copying.hpp>
+#include <cudf/strings/strings_column_view.hpp>
+#include <cudf/utilities/default_stream.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+static void bench_copy(nvbench::state& state)
+{
+  auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
+
+  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
+      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
+    state.skip("Skip benchmarks greater than size_type limit");
+  }
+
+  data_profile const str_profile = data_profile_builder().distribution(
+    cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
+  auto const source_table =
+    create_random_table({cudf::type_id::STRING}, row_count{num_rows}, str_profile);
+  auto const target_table =
+    create_random_table({cudf::type_id::STRING}, row_count{num_rows}, str_profile);
+  data_profile const bool_profile = data_profile_builder().no_validity();
+  auto const booleans =
+    create_random_table({cudf::type_id::BOOL8}, row_count{num_rows}, bool_profile);
+
+  auto const source     = source_table->view().column(0);
+  auto const target     = target_table->view().column(0);
+  auto const left_right = booleans->view().column(0);
+
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+  auto chars_size = cudf::strings_column_view(target).chars_size(cudf::get_default_stream());
+  state.add_global_memory_reads<nvbench::int8_t>(chars_size);   // all bytes are read;
+  state.add_global_memory_writes<nvbench::int8_t>(chars_size);  // both columns are similar size
+
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    [[maybe_unused]] auto result = cudf::copy_if_else(source, target, left_right);
+  });
+}
+
+NVBENCH_BENCH(bench_copy)
+  .set_name("copy_if_else")
+  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048, 4096})
+  .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216});
@@ -16,18 +16,16 @@
 #pragma once
 
 #include <cudf/column/column.hpp>
-#include <cudf/column/column_device_view.cuh>
 #include <cudf/detail/valid_if.cuh>
-#include <cudf/strings/detail/strings_children.cuh>
-#include <cudf/strings/strings_column_view.hpp>
+#include <cudf/strings/detail/strings_column_factories.cuh>
 
 #include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
 
-#include <thrust/for_each.h>
 #include <thrust/iterator/counting_iterator.h>
-#include <thrust/iterator/transform_iterator.h>
 #include <thrust/optional.h>
+#include <thrust/transform.h>
 
 #include <cuda/functional>
 
@@ -65,55 +63,36 @@ std::unique_ptr<cudf::column> copy_if_else(StringIterLeft lhs_begin,
                                            rmm::mr::device_memory_resource* mr)
 {
   auto strings_count = std::distance(lhs_begin, lhs_end);
-  if (strings_count == 0) return make_empty_column(type_id::STRING);
+  if (strings_count == 0) { return make_empty_column(type_id::STRING); }
 
   // create null mask
-  auto valid_mask = cudf::detail::valid_if(
+  auto [null_mask, null_count] = cudf::detail::valid_if(
     thrust::make_counting_iterator<size_type>(0),
     thrust::make_counting_iterator<size_type>(strings_count),
     [lhs_begin, rhs_begin, filter_fn] __device__(size_type idx) {
       return filter_fn(idx) ? lhs_begin[idx].has_value() : rhs_begin[idx].has_value();
     },
     stream,
     mr);
-  size_type null_count = valid_mask.second;
-  auto null_mask       = (null_count > 0) ? std::move(valid_mask.first) : rmm::device_buffer{};
+  if (null_count == 0) { null_mask = rmm::device_buffer{}; }
 
-  // build offsets column
-  auto offsets_transformer = cuda::proclaim_return_type<size_type>(
-    [lhs_begin, rhs_begin, filter_fn] __device__(size_type idx) {
-      auto const result = filter_fn(idx) ? lhs_begin[idx] : rhs_begin[idx];
-      return result.has_value() ? result->size_bytes() : 0;
-    });
-
-  auto offsets_transformer_itr = thrust::make_transform_iterator(
-    thrust::make_counting_iterator<size_type>(0), offsets_transformer);
-  auto [offsets_column, bytes] = cudf::detail::make_offsets_child_column(
-    offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr);
-  auto d_offsets = offsets_column->view().template data<int32_t>();
+  // build vector of strings
+  rmm::device_uvector<string_index_pair> indices(strings_count, stream);
+  thrust::transform(rmm::exec_policy_nosync(stream),
+                    thrust::make_counting_iterator<size_type>(0),
+                    thrust::make_counting_iterator<size_type>(strings_count),
+                    indices.begin(),
+                    [lhs_begin, rhs_begin, filter_fn] __device__(size_type idx) {
+                      auto const result = filter_fn(idx) ? lhs_begin[idx] : rhs_begin[idx];
+                      auto const d_str  = result.has_value() ? *result : string_view{"", 0};
+                      return string_index_pair{d_str.data(), d_str.size_bytes()};
+                    });
 
-  // build chars column
-  auto chars_column = create_chars_child_column(bytes, stream, mr);
-  auto d_chars      = chars_column->mutable_view().template data<char>();
-  // fill in chars
-  thrust::for_each_n(
-    rmm::exec_policy(stream),
-    thrust::make_counting_iterator<size_type>(0),
-    strings_count,
-    [lhs_begin, rhs_begin, filter_fn, d_offsets, d_chars] __device__(size_type idx) {
-      auto const result = filter_fn(idx) ? lhs_begin[idx] : rhs_begin[idx];
-      if (!result.has_value()) return;
-      auto const d_str = *result;
-      memcpy(d_chars + d_offsets[idx], d_str.data(), d_str.size_bytes());
-    });
-
-  return make_strings_column(strings_count,
-                             std::move(offsets_column),
-                             std::move(chars_column->release().data.release()[0]),
-                             null_count,
-                             std::move(null_mask));
+  // convert vector into strings column
+  auto result = make_strings_column(indices.begin(), indices.end(), stream, mr);
+  result->set_null_mask(std::move(null_mask), null_count);
+  return result;
 }
-
 }  // namespace detail
 }  // namespace strings
 }  // namespace cudf