rapidsai · rapids-bot · Jul 8, 2022 · Jul 6, 2022 · Jul 6, 2022 · Jul 6, 2022
@@ -160,6 +160,7 @@ ConfigureBench(ITERATOR_BENCH iterator/iterator.cu)
 # ##################################################################################################
 # * search benchmark ------------------------------------------------------------------------------
 ConfigureBench(SEARCH_BENCH search/search.cpp)
+ConfigureNVBench(SEARCH_NVBENCH search/contains.cpp)
 
 # ##################################################################################################
 # * sort benchmark --------------------------------------------------------------------------------

@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/rmm_pool_raii.hpp>
+
+#include <cudf/detail/search.hpp>
+#include <cudf/scalar/scalar_factories.hpp>
+#include <cudf/types.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+namespace {
+template <typename Type>
+std::unique_ptr<cudf::table> create_table_data(cudf::size_type n_rows,
+                                               cudf::size_type n_cols,
+                                               bool has_nulls = false)
+{
+  data_profile profile;
+  profile.set_cardinality(0);
+  profile.set_null_frequency(has_nulls ? std::optional{0.1} : std::nullopt);
+  profile.set_distribution_params<Type>(
+    cudf::type_to_id<Type>(), distribution_id::UNIFORM, Type{0}, Type{1000});
+
+  return create_random_table(
+    cycle_dtypes({cudf::type_to_id<Type>()}, n_cols), row_count{n_rows}, profile);
+}
+
+template <typename Type>
+std::unique_ptr<cudf::column> create_column_data(cudf::size_type n_rows, bool has_nulls = false)
+{
+  return std::move(create_table_data<Type>(n_rows, 1, has_nulls)->release().front());
+}
+
+}  // namespace
+
+static void nvbench_contains_scalar(nvbench::state& state)
+{
+  cudf::rmm_pool_raii pool_raii;
+  using Type = int;
+
+  auto const has_nulls = static_cast<bool>(state.get_int64("has_nulls"));
+  auto const size      = state.get_int64("data_size");
+
+  auto const haystack = create_column_data<Type>(size, has_nulls);
+  auto const needle   = cudf::make_fixed_width_scalar<Type>(size / 2);
+
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    auto const stream_view             = rmm::cuda_stream_view{launch.get_stream()};
+    [[maybe_unused]] auto const result = cudf::detail::contains(*haystack, *needle, stream_view);
+  });
+}
+
+NVBENCH_BENCH(nvbench_contains_scalar)
+  .set_name("contains_scalar")
+  .add_int64_power_of_two_axis("data_size", {10, 12, 14, 16, 18, 20, 22, 24, 26})
+  .add_int64_axis("has_nulls", {0, 1});
@@ -23,13 +23,14 @@
 #include <cudf/dictionary/detail/update_keys.hpp>
 #include <cudf/lists/list_view.hpp>
 #include <cudf/scalar/scalar.hpp>
+#include <cudf/scalar/scalar_device_view.cuh>
 #include <cudf/structs/struct_view.hpp>
 #include <cudf/utilities/default_stream.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/exec_policy.hpp>
 
-#include <thrust/find.h>
+#include <thrust/count.h>
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/pair.h>
 #include <thrust/transform.h>
@@ -40,6 +41,23 @@ namespace detail {
 
 namespace {
 
+/**
+ * @brief Get the underlying value of a scalar through a scalar device view.
+ *
+ * @tparam Type The scalar's value type
+ * @tparam ScalarDView Type of the input scalar device view
+ * @param d_scalar The input scalar device view
+ */
+template <typename Type, typename ScalarDView>
+__device__ auto inline get_scalar_value(ScalarDView d_scalar)
+{
+  if constexpr (cudf::is_fixed_point<Type>()) {
+    return d_scalar.rep();
+  } else {
+    return d_scalar.value();
+  }
+}
+
 struct contains_scalar_dispatch {
   template <typename Type>
   bool operator()(column_view const& haystack,
@@ -51,20 +69,28 @@ struct contains_scalar_dispatch {
     using DType           = device_storage_type_t<Type>;
     using ScalarType      = cudf::scalar_type_t<Type>;
     auto const d_haystack = column_device_view::create(haystack, stream);
-    auto const s          = static_cast<ScalarType const*>(&needle);
+
+    // `get_scalar_device_view` only accepts non-const reference so we need to strip const.
+    auto const s        = static_cast<ScalarType const*>(&needle);
+    auto const d_needle = get_scalar_device_view(const_cast<ScalarType&>(*s));
 
     if (haystack.has_nulls()) {
       auto const begin = d_haystack->pair_begin<DType, true>();
       auto const end   = d_haystack->pair_end<DType, true>();
-      auto const val   = thrust::make_pair(s->value(stream), true);
 
-      return thrust::find(rmm::exec_policy(stream), begin, end, val) != end;
+      return thrust::count_if(
+               rmm::exec_policy(stream), begin, end, [d_needle] __device__(auto const val_pair) {
+                 auto const needle_pair = thrust::make_pair(get_scalar_value<Type>(d_needle), true);
+                 return val_pair == needle_pair;
+               }) > 0;
     } else {
       auto const begin = d_haystack->begin<DType>();
       auto const end   = d_haystack->end<DType>();
-      auto const val   = s->value(stream);
 
-      return thrust::find(rmm::exec_policy(stream), begin, end, val) != end;
+      return thrust::count_if(
+               rmm::exec_policy(stream), begin, end, [d_needle] __device__(auto const val) {
+                 return val == get_scalar_value<Type>(d_needle);
+               }) > 0;
     }
   }
 };

@@ -21,7 +21,7 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/exec_policy.hpp>
 
-#include <thrust/logical.h>
+#include <thrust/count.h>
 
 namespace cudf::detail {
 
@@ -47,16 +47,20 @@ bool contains_nested_element(column_view const& haystack,
     auto const haystack_cdv_ptr  = column_device_view::create(haystack, stream);
     auto const haystack_valid_it = cudf::detail::make_validity_iterator<false>(*haystack_cdv_ptr);
 
-    return thrust::any_of(
-      rmm::exec_policy(stream), begin, end, [d_comp, haystack_valid_it] __device__(auto const idx) {
-        if (!haystack_valid_it[static_cast<size_type>(idx)]) { return false; }
-        return d_comp(idx, rhs_index_type{0});  // compare haystack[idx] == needle[0].
-      });
+    return thrust::count_if(rmm::exec_policy(stream),
+                            begin,
+                            end,
+                            [d_comp, haystack_valid_it] __device__(auto const idx) {
+                              if (!haystack_valid_it[static_cast<size_type>(idx)]) { return false; }
+                              return d_comp(
+                                idx, rhs_index_type{0});  // compare haystack[idx] == needle[0].
+                            }) > 0;
   }
 
-  return thrust::any_of(rmm::exec_policy(stream), begin, end, [d_comp] __device__(auto const idx) {
-    return d_comp(idx, rhs_index_type{0});  // compare haystack[idx] == needle[0].
-  });
+  return thrust::count_if(
+           rmm::exec_policy(stream), begin, end, [d_comp] __device__(auto const idx) {
+             return d_comp(idx, rhs_index_type{0});  // compare haystack[idx] == needle[0].
+           }) > 0;
 }
 
 }  // namespace cudf::detail