Skip to content

Commit

Permalink
Improve performance for cudf::contains when searching for a scalar (#…
Browse files Browse the repository at this point in the history
…11202)

The current implementation of `cudf::contains(column_view, scalar)` uses `thrust::find` and `thrust::any_of` (which also calls `thrust::find_if` under the hood). These thrust APIs were known to have performance regression (https://github.com/NVIDIA/thrust/issues/1016).

This PR replaces `thrust::find` and `thrust::any_of` in `cudf::contains` by `thrust::count_if`, which improves performance significantly.
Benchmarks show that the run time can be reduced as much as 80% after modification, or up to 5X speedup.

Closes #3806.

Authors:
  - Nghia Truong (https://github.com/ttnghia)

Approvers:
  - Karthikeyan (https://github.com/karthikeyann)
  - Bradley Dice (https://github.com/bdice)

URL: #11202
  • Loading branch information
ttnghia authored Jul 8, 2022
1 parent bad00d7 commit 4d4632a
Show file tree
Hide file tree
Showing 4 changed files with 114 additions and 16 deletions.
1 change: 1 addition & 0 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ ConfigureBench(ITERATOR_BENCH iterator/iterator.cu)
# ##################################################################################################
# * search benchmark ------------------------------------------------------------------------------
ConfigureBench(SEARCH_BENCH search/search.cpp)
ConfigureNVBench(SEARCH_NVBENCH search/contains.cpp)

# ##################################################################################################
# * sort benchmark --------------------------------------------------------------------------------
Expand Down
70 changes: 70 additions & 0 deletions cpp/benchmarks/search/contains.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/rmm_pool_raii.hpp>

#include <cudf/detail/search.hpp>
#include <cudf/scalar/scalar_factories.hpp>
#include <cudf/types.hpp>

#include <nvbench/nvbench.cuh>

namespace {
template <typename Type>
std::unique_ptr<cudf::table> create_table_data(cudf::size_type n_rows,
cudf::size_type n_cols,
bool has_nulls = false)
{
data_profile profile;
profile.set_cardinality(0);
profile.set_null_frequency(has_nulls ? std::optional{0.1} : std::nullopt);
profile.set_distribution_params<Type>(
cudf::type_to_id<Type>(), distribution_id::UNIFORM, Type{0}, Type{1000});

return create_random_table(
cycle_dtypes({cudf::type_to_id<Type>()}, n_cols), row_count{n_rows}, profile);
}

template <typename Type>
std::unique_ptr<cudf::column> create_column_data(cudf::size_type n_rows, bool has_nulls = false)
{
return std::move(create_table_data<Type>(n_rows, 1, has_nulls)->release().front());
}

} // namespace

static void nvbench_contains_scalar(nvbench::state& state)
{
cudf::rmm_pool_raii pool_raii;
using Type = int;

auto const has_nulls = static_cast<bool>(state.get_int64("has_nulls"));
auto const size = state.get_int64("data_size");

auto const haystack = create_column_data<Type>(size, has_nulls);
auto const needle = cudf::make_fixed_width_scalar<Type>(size / 2);

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
auto const stream_view = rmm::cuda_stream_view{launch.get_stream()};
[[maybe_unused]] auto const result = cudf::detail::contains(*haystack, *needle, stream_view);
});
}

NVBENCH_BENCH(nvbench_contains_scalar)
.set_name("contains_scalar")
.add_int64_power_of_two_axis("data_size", {10, 12, 14, 16, 18, 20, 22, 24, 26})
.add_int64_axis("has_nulls", {0, 1});
37 changes: 30 additions & 7 deletions cpp/src/search/contains.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,14 @@
#include <cudf/dictionary/detail/update_keys.hpp>
#include <cudf/lists/list_view.hpp>
#include <cudf/scalar/scalar.hpp>
#include <cudf/scalar/scalar_device_view.cuh>
#include <cudf/structs/struct_view.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/find.h>
#include <thrust/count.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/pair.h>
#include <thrust/transform.h>
Expand All @@ -40,6 +41,23 @@ namespace detail {

namespace {

/**
* @brief Get the underlying value of a scalar through a scalar device view.
*
* @tparam Type The scalar's value type
* @tparam ScalarDView Type of the input scalar device view
* @param d_scalar The input scalar device view
*/
template <typename Type, typename ScalarDView>
__device__ auto inline get_scalar_value(ScalarDView d_scalar)
{
if constexpr (cudf::is_fixed_point<Type>()) {
return d_scalar.rep();
} else {
return d_scalar.value();
}
}

struct contains_scalar_dispatch {
template <typename Type>
bool operator()(column_view const& haystack,
Expand All @@ -49,22 +67,27 @@ struct contains_scalar_dispatch {
CUDF_EXPECTS(haystack.type() == needle.type(), "scalar and column types must match");

using DType = device_storage_type_t<Type>;
using ScalarType = cudf::scalar_type_t<Type>;
auto const d_haystack = column_device_view::create(haystack, stream);
auto const s = static_cast<ScalarType const*>(&needle);
auto const d_needle =
get_scalar_device_view(static_cast<cudf::scalar_type_t<Type>&>(const_cast<scalar&>(needle)));

if (haystack.has_nulls()) {
auto const begin = d_haystack->pair_begin<DType, true>();
auto const end = d_haystack->pair_end<DType, true>();
auto const val = thrust::make_pair(s->value(stream), true);

return thrust::find(rmm::exec_policy(stream), begin, end, val) != end;
return thrust::count_if(
rmm::exec_policy(stream), begin, end, [d_needle] __device__(auto const val_pair) {
auto const needle_pair = thrust::make_pair(get_scalar_value<Type>(d_needle), true);
return val_pair == needle_pair;
}) > 0;
} else {
auto const begin = d_haystack->begin<DType>();
auto const end = d_haystack->end<DType>();
auto const val = s->value(stream);

return thrust::find(rmm::exec_policy(stream), begin, end, val) != end;
return thrust::count_if(
rmm::exec_policy(stream), begin, end, [d_needle] __device__(auto const val) {
return val == get_scalar_value<Type>(d_needle);
}) > 0;
}
}
};
Expand Down
22 changes: 13 additions & 9 deletions cpp/src/search/contains_nested.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/logical.h>
#include <thrust/count.h>

namespace cudf::detail {

Expand All @@ -47,16 +47,20 @@ bool contains_nested_element(column_view const& haystack,
auto const haystack_cdv_ptr = column_device_view::create(haystack, stream);
auto const haystack_valid_it = cudf::detail::make_validity_iterator<false>(*haystack_cdv_ptr);

return thrust::any_of(
rmm::exec_policy(stream), begin, end, [d_comp, haystack_valid_it] __device__(auto const idx) {
if (!haystack_valid_it[static_cast<size_type>(idx)]) { return false; }
return d_comp(idx, rhs_index_type{0}); // compare haystack[idx] == needle[0].
});
return thrust::count_if(rmm::exec_policy(stream),
begin,
end,
[d_comp, haystack_valid_it] __device__(auto const idx) {
if (!haystack_valid_it[static_cast<size_type>(idx)]) { return false; }
return d_comp(
idx, rhs_index_type{0}); // compare haystack[idx] == needle[0].
}) > 0;
}

return thrust::any_of(rmm::exec_policy(stream), begin, end, [d_comp] __device__(auto const idx) {
return d_comp(idx, rhs_index_type{0}); // compare haystack[idx] == needle[0].
});
return thrust::count_if(
rmm::exec_policy(stream), begin, end, [d_comp] __device__(auto const idx) {
return d_comp(idx, rhs_index_type{0}); // compare haystack[idx] == needle[0].
}) > 0;
}

} // namespace cudf::detail

0 comments on commit 4d4632a

Please sign in to comment.