From ac2695365c2b594c44a4aeeedff9899df53c0e90 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Tue, 25 Apr 2023 07:32:01 -0400 Subject: [PATCH] Split up unique_count.cu to improve build time (#13169) Moves the `column_view` overload out of `cpp/src/stream_compaction/unique_count.cu` and into the new file `cpp/src/stream_compaction/unique_count_column.cu` to help improve overall build time. Compiling these two in parallel roughly speeds up their build time by 2x. The `unique_count.cu` is one of the files that takes over 20 minutes to build in CI. Also cleaned up much of the include statements. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - Vukasin Milovanovic (https://github.com/vuule) URL: https://github.com/rapidsai/cudf/pull/13169 --- cpp/CMakeLists.txt | 1 + cpp/src/stream_compaction/unique_count.cu | 85 -------------- .../stream_compaction/unique_count_column.cu | 110 ++++++++++++++++++ 3 files changed, 111 insertions(+), 85 deletions(-) create mode 100644 cpp/src/stream_compaction/unique_count_column.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d9cc047a76e..6d9986178d1 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -514,6 +514,7 @@ add_library( src/stream_compaction/stable_distinct.cu src/stream_compaction/unique.cu src/stream_compaction/unique_count.cu + src/stream_compaction/unique_count_column.cu src/strings/attributes.cu src/strings/capitalize.cu src/strings/case.cu diff --git a/cpp/src/stream_compaction/unique_count.cu b/cpp/src/stream_compaction/unique_count.cu index ac9924311c2..19607fe8105 100644 --- a/cpp/src/stream_compaction/unique_count.cu +++ b/cpp/src/stream_compaction/unique_count.cu @@ -14,22 +14,12 @@ * limitations under the License. */ -#include "stream_compaction_common.cuh" -#include "stream_compaction_common.hpp" - -#include -#include -#include -#include -#include #include -#include #include #include #include #include #include -#include #include #include @@ -37,37 +27,10 @@ #include #include #include -#include #include -#include -#include -#include -#include -#include - namespace cudf { namespace detail { -namespace { -/** - * @brief A functor to be used along with device type_dispatcher to check if - * the row `index` of `column_device_view` is `NaN`. - */ -struct check_nan { - // Check if a value is `NaN` for floating point type columns - template >* = nullptr> - __device__ inline bool operator()(column_device_view const& input, size_type index) - { - return std::isnan(input.data()[index]); - } - // Non-floating point type columns can never have `NaN`, so it will always return false. - template >* = nullptr> - __device__ inline bool operator()(column_device_view const&, size_type) - { - return false; - } -}; -} // namespace cudf::size_type unique_count(table_view const& keys, null_equality nulls_equal, @@ -102,56 +65,8 @@ cudf::size_type unique_count(table_view const& keys, } } -cudf::size_type unique_count(column_view const& input, - null_policy null_handling, - nan_policy nan_handling, - rmm::cuda_stream_view stream) -{ - auto const num_rows = input.size(); - - if (num_rows == 0 or num_rows == input.null_count()) { return 0; } - - auto const count_nulls = null_handling == null_policy::INCLUDE; - auto const nan_is_null = nan_handling == nan_policy::NAN_IS_NULL; - auto const should_check_nan = cudf::is_floating_point(input.type()); - auto input_device_view = cudf::column_device_view::create(input, stream); - auto device_view = *input_device_view; - auto input_table_view = table_view{{input}}; - auto table_ptr = cudf::table_device_view::create(input_table_view, stream); - row_equality_comparator comp(nullate::DYNAMIC{cudf::has_nulls(input_table_view)}, - *table_ptr, - *table_ptr, - null_equality::EQUAL); - - return thrust::count_if( - rmm::exec_policy(stream), - thrust::counting_iterator(0), - thrust::counting_iterator(num_rows), - [count_nulls, nan_is_null, should_check_nan, device_view, comp] __device__(cudf::size_type i) { - auto const is_null = device_view.is_null(i); - auto const is_nan = nan_is_null and should_check_nan and - cudf::type_dispatcher(device_view.type(), check_nan{}, device_view, i); - if (not count_nulls and (is_null or (nan_is_null and is_nan))) { return false; } - if (i == 0) { return true; } - if (count_nulls and nan_is_null and (is_nan or is_null)) { - auto const prev_is_nan = - should_check_nan and - cudf::type_dispatcher(device_view.type(), check_nan{}, device_view, i - 1); - return not(prev_is_nan or device_view.is_null(i - 1)); - } - return not comp(i, i - 1); - }); -} } // namespace detail -cudf::size_type unique_count(column_view const& input, - null_policy null_handling, - nan_policy nan_handling) -{ - CUDF_FUNC_RANGE(); - return detail::unique_count(input, null_handling, nan_handling, cudf::get_default_stream()); -} - cudf::size_type unique_count(table_view const& input, null_equality nulls_equal) { CUDF_FUNC_RANGE(); diff --git a/cpp/src/stream_compaction/unique_count_column.cu b/cpp/src/stream_compaction/unique_count_column.cu new file mode 100644 index 00000000000..16758b6e3a7 --- /dev/null +++ b/cpp/src/stream_compaction/unique_count_column.cu @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include + +namespace cudf { +namespace detail { +namespace { +/** + * @brief A functor to be used along with device type_dispatcher to check if + * the row `index` of `column_device_view` is `NaN`. + */ +struct check_nan { + // Check if a value is `NaN` for floating point type columns + template >* = nullptr> + __device__ inline bool operator()(column_device_view const& input, size_type index) + { + return std::isnan(input.data()[index]); + } + // Non-floating point type columns can never have `NaN`, so it will always return false. + template >* = nullptr> + __device__ inline bool operator()(column_device_view const&, size_type) + { + return false; + } +}; +} // namespace + +cudf::size_type unique_count(column_view const& input, + null_policy null_handling, + nan_policy nan_handling, + rmm::cuda_stream_view stream) +{ + auto const num_rows = input.size(); + + if (num_rows == 0 or num_rows == input.null_count()) { return 0; } + + auto const count_nulls = null_handling == null_policy::INCLUDE; + auto const nan_is_null = nan_handling == nan_policy::NAN_IS_NULL; + auto const should_check_nan = cudf::is_floating_point(input.type()); + auto input_device_view = cudf::column_device_view::create(input, stream); + auto device_view = *input_device_view; + auto input_table_view = table_view{{input}}; + auto table_ptr = cudf::table_device_view::create(input_table_view, stream); + row_equality_comparator comp(nullate::DYNAMIC{cudf::has_nulls(input_table_view)}, + *table_ptr, + *table_ptr, + null_equality::EQUAL); + + return thrust::count_if( + rmm::exec_policy(stream), + thrust::counting_iterator(0), + thrust::counting_iterator(num_rows), + [count_nulls, nan_is_null, should_check_nan, device_view, comp] __device__(cudf::size_type i) { + auto const is_null = device_view.is_null(i); + auto const is_nan = nan_is_null and should_check_nan and + cudf::type_dispatcher(device_view.type(), check_nan{}, device_view, i); + if (not count_nulls and (is_null or (nan_is_null and is_nan))) { return false; } + if (i == 0) { return true; } + if (count_nulls and nan_is_null and (is_nan or is_null)) { + auto const prev_is_nan = + should_check_nan and + cudf::type_dispatcher(device_view.type(), check_nan{}, device_view, i - 1); + return not(prev_is_nan or device_view.is_null(i - 1)); + } + return not comp(i, i - 1); + }); +} +} // namespace detail + +cudf::size_type unique_count(column_view const& input, + null_policy null_handling, + nan_policy nan_handling) +{ + CUDF_FUNC_RANGE(); + return detail::unique_count(input, null_handling, nan_handling, cudf::get_default_stream()); +} + +} // namespace cudf