From 2234554c6cfe2941a30401bed907e409bf4bf56c Mon Sep 17 00:00:00 2001 From: Conor Hoekstra <36027403+codereport@users.noreply.github.com> Date: Tue, 23 Feb 2021 13:52:01 -0500 Subject: [PATCH] Simplify type dispatch with `device_storage_dispatch` (#7419) Resolves https://github.com/rapidsai/cudf/issues/7390 Compile times: ``` // Before real 33m29.842s user 300m0.478s sys 10m46.871s // After real 33m20.127s user 299m24.825s sys 10m35.779s ``` Binary sizes: ``` Before: -rwxr-xr-x 1 rapids rapids 328M Feb 22 15:10 libcudf_base.so After: -rwxr-xr-x 1 rapids rapids 327M Feb 23 07:49 libcudf_base.so ``` Authors: - Conor Hoekstra (@codereport) Approvers: - David (@davidwendt) - Jake Hemstad (@jrhemstad) URL: https://github.com/rapidsai/cudf/pull/7419 --- cpp/include/cudf/detail/gather.cuh | 24 +++---- cpp/include/cudf/detail/scatter.cuh | 24 +++---- .../cudf/utilities/type_dispatcher.hpp | 13 ++++ cpp/src/copying/concatenate.cu | 12 ++-- cpp/src/copying/copy.cu | 60 ++++++++-------- cpp/src/copying/copy_range.cu | 20 +++--- cpp/src/copying/scatter.cu | 24 +++---- cpp/src/copying/shift.cu | 12 ++-- cpp/src/dictionary/search.cu | 17 +++-- cpp/src/merge/merge.cu | 12 ++-- cpp/src/partitioning/partitioning.cu | 32 ++++----- cpp/src/reductions/scan.cu | 69 +++++++++---------- cpp/src/replace/clamp.cu | 34 +++++---- cpp/src/replace/nulls.cu | 14 ++-- cpp/src/replace/replace.cu | 32 ++++----- cpp/src/reshape/interleave_columns.cu | 22 +++--- cpp/src/sort/sort.cu | 14 ++-- cpp/src/sort/sort_column.cu | 27 ++++---- cpp/src/sort/stable_sort_column.cu | 21 +++--- 19 files changed, 229 insertions(+), 254 deletions(-) diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh index adae9b76c5b..b8edf5ab4cb 100644 --- a/cpp/include/cudf/detail/gather.cuh +++ b/cpp/include/cudf/detail/gather.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -176,11 +176,9 @@ struct column_gatherer_impl { auto destination_column = cudf::detail::allocate_like(source_column, num_rows, policy, stream, mr); - using Type = device_storage_type_t; - - gather_helper(source_column.data(), + gather_helper(source_column.data(), source_column.size(), - destination_column->mutable_view().template begin(), + destination_column->mutable_view().template begin(), gather_map_begin, gather_map_end, nullify_out_of_bounds, @@ -633,14 +631,14 @@ std::unique_ptr gather( for (auto const& source_column : source_table) { // The data gather for n columns will be put on the first n streams destination_columns.push_back( - cudf::type_dispatcher(source_column.type(), - column_gatherer{}, - source_column, - gather_map_begin, - gather_map_end, - bounds_policy == out_of_bounds_policy::NULLIFY, - stream, - mr)); + cudf::type_dispatcher(source_column.type(), + column_gatherer{}, + source_column, + gather_map_begin, + gather_map_end, + bounds_policy == out_of_bounds_policy::NULLIFY, + stream, + mr)); } gather_bitmask_op const op = bounds_policy == out_of_bounds_policy::NULLIFY diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh index 7d5c3f4d2ee..2cb1cbffc68 100644 --- a/cpp/include/cudf/detail/scatter.cuh +++ b/cpp/include/cudf/detail/scatter.cuh @@ -91,15 +91,13 @@ struct column_scatterer_impl { auto result = std::make_unique(target, stream, mr); auto result_view = result->mutable_view(); - using Type = device_storage_type_t; - // NOTE use source.begin + scatter rows rather than source.end in case the // scatter map is smaller than the number of source rows thrust::scatter(rmm::exec_policy(stream), - source.begin(), - source.begin() + cudf::distance(scatter_map_begin, scatter_map_end), + source.begin(), + source.begin() + cudf::distance(scatter_map_begin, scatter_map_end), scatter_map_begin, - result_view.begin()); + result_view.begin()); return result; } @@ -285,14 +283,14 @@ std::unique_ptr
scatter( target.begin(), result.begin(), [=](auto const& source_col, auto const& target_col) { - return type_dispatcher(source_col.type(), - scatter_functor, - source_col, - updated_scatter_map_begin, - updated_scatter_map_end, - target_col, - stream, - mr); + return type_dispatcher(source_col.type(), + scatter_functor, + source_col, + updated_scatter_map_begin, + updated_scatter_map_end, + target_col, + stream, + mr); }); auto gather_map = scatter_to_gather( diff --git a/cpp/include/cudf/utilities/type_dispatcher.hpp b/cpp/include/cudf/utilities/type_dispatcher.hpp index 271d722396e..26c51d0435a 100644 --- a/cpp/include/cudf/utilities/type_dispatcher.hpp +++ b/cpp/include/cudf/utilities/type_dispatcher.hpp @@ -190,6 +190,19 @@ CUDF_TYPE_MAPPING(numeric::decimal32, type_id::DECIMAL32); CUDF_TYPE_MAPPING(numeric::decimal64, type_id::DECIMAL64); CUDF_TYPE_MAPPING(cudf::struct_view, type_id::STRUCT); +/** + * @brief Use this specialization on `type_dispatcher` whenever you only need to operate on the + * underlying stored type. + * + * For example, `cudf::sort` in sort.cu uses `cudf::type_dispatcher(...)`. + * `cudf::gather` in gather.cuh also uses `cudf::type_dispatcher(...)`. + * However, reductions needs both `data_type` and underlying type, so cannot use this. + */ +template +struct dispatch_storage_type { + using type = device_storage_type_t::type>; +}; + template struct type_to_scalar_type_impl { using ScalarType = cudf::scalar; diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu index 57932fb63e1..8cf9db465f3 100644 --- a/cpp/src/copying/concatenate.cu +++ b/cpp/src/copying/concatenate.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -300,13 +300,11 @@ struct concatenate_dispatch { bool const has_nulls = std::any_of(views.cbegin(), views.cend(), [](auto const& col) { return col.has_nulls(); }); - using Type = device_storage_type_t; - // Use a heuristic to guess when the fused kernel will be faster if (use_fused_kernel_heuristic(has_nulls, views.size())) { - return fused_concatenate(views, has_nulls, stream, mr); + return fused_concatenate(views, has_nulls, stream, mr); } else { - return for_each_concatenate(views, has_nulls, stream, mr); + return for_each_concatenate(views, has_nulls, stream, mr); } } }; @@ -409,8 +407,8 @@ std::unique_ptr concatenate(std::vector const& columns_to_c return empty_like(columns_to_concat.front()); } - return type_dispatcher(columns_to_concat.front().type(), - concatenate_dispatch{columns_to_concat, stream, mr}); + return type_dispatcher( + columns_to_concat.front().type(), concatenate_dispatch{columns_to_concat, stream, mr}); } std::unique_ptr
concatenate(std::vector const& tables_to_concat, diff --git a/cpp/src/copying/copy.cu b/cpp/src/copying/copy.cu index a87eedf8412..422fc0821a0 100644 --- a/cpp/src/copying/copy.cu +++ b/cpp/src/copying/copy.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,28 +41,26 @@ struct copy_if_else_functor_impl { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - using Type = device_storage_type_t; - if (left_nullable) { if (right_nullable) { - auto lhs_iter = cudf::detail::make_pair_iterator(lhs); - auto rhs_iter = cudf::detail::make_pair_iterator(rhs); + auto lhs_iter = cudf::detail::make_pair_iterator(lhs); + auto rhs_iter = cudf::detail::make_pair_iterator(rhs); return detail::copy_if_else( true, lhs_iter, lhs_iter + size, rhs_iter, filter, lhs.type(), stream, mr); } - auto lhs_iter = cudf::detail::make_pair_iterator(lhs); - auto rhs_iter = cudf::detail::make_pair_iterator(rhs); + auto lhs_iter = cudf::detail::make_pair_iterator(lhs); + auto rhs_iter = cudf::detail::make_pair_iterator(rhs); return detail::copy_if_else( true, lhs_iter, lhs_iter + size, rhs_iter, filter, lhs.type(), stream, mr); } if (right_nullable) { - auto lhs_iter = cudf::detail::make_pair_iterator(lhs); - auto rhs_iter = cudf::detail::make_pair_iterator(rhs); + auto lhs_iter = cudf::detail::make_pair_iterator(lhs); + auto rhs_iter = cudf::detail::make_pair_iterator(rhs); return detail::copy_if_else( true, lhs_iter, lhs_iter + size, rhs_iter, filter, lhs.type(), stream, mr); } - auto lhs_iter = cudf::detail::make_pair_iterator(lhs); - auto rhs_iter = cudf::detail::make_pair_iterator(rhs); + auto lhs_iter = cudf::detail::make_pair_iterator(lhs); + auto rhs_iter = cudf::detail::make_pair_iterator(rhs); return detail::copy_if_else( false, lhs_iter, lhs_iter + size, rhs_iter, filter, lhs.type(), stream, mr); } @@ -182,30 +180,30 @@ std::unique_ptr copy_if_else(Left const& lhs, auto filter = [bool_mask_device] __device__(cudf::size_type i) { return bool_mask_device.is_valid_nocheck(i) and bool_mask_device.element(i); }; - return cudf::type_dispatcher(lhs.type(), - copy_if_else_functor{}, - lhs, - rhs, - boolean_mask.size(), - left_nullable, - right_nullable, - filter, - stream, - mr); + return cudf::type_dispatcher(lhs.type(), + copy_if_else_functor{}, + lhs, + rhs, + boolean_mask.size(), + left_nullable, + right_nullable, + filter, + stream, + mr); } else { auto filter = [bool_mask_device] __device__(cudf::size_type i) { return bool_mask_device.element(i); }; - return cudf::type_dispatcher(lhs.type(), - copy_if_else_functor{}, - lhs, - rhs, - boolean_mask.size(), - left_nullable, - right_nullable, - filter, - stream, - mr); + return cudf::type_dispatcher(lhs.type(), + copy_if_else_functor{}, + lhs, + rhs, + boolean_mask.size(), + left_nullable, + right_nullable, + filter, + stream, + mr); } } diff --git a/cpp/src/copying/copy_range.cu b/cpp/src/copying/copy_range.cu index 974d03b0fc0..31a8796f950 100644 --- a/cpp/src/copying/copy_range.cu +++ b/cpp/src/copying/copy_range.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -108,9 +108,8 @@ struct out_of_place_copy_range_dispatch { } if (source_end != source_begin) { // otherwise no-op - using Type = cudf::device_storage_type_t; auto ret_view = p_ret->mutable_view(); - in_place_copy_range(source, ret_view, source_begin, source_end, target_begin, stream); + in_place_copy_range(source, ret_view, source_begin, source_end, target_begin, stream); } return p_ret; @@ -261,13 +260,14 @@ std::unique_ptr copy_range(column_view const& source, "Range is out of bounds."); CUDF_EXPECTS(target.type() == source.type(), "Data type mismatch."); - return cudf::type_dispatcher(target.type(), - out_of_place_copy_range_dispatch{source, target}, - source_begin, - source_end, - target_begin, - stream, - mr); + return cudf::type_dispatcher( + target.type(), + out_of_place_copy_range_dispatch{source, target}, + source_begin, + source_end, + target_begin, + stream, + mr); } } // namespace detail diff --git a/cpp/src/copying/scatter.cu b/cpp/src/copying/scatter.cu index 552b6c22a29..cedac96cee6 100644 --- a/cpp/src/copying/scatter.cu +++ b/cpp/src/copying/scatter.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -106,10 +106,8 @@ struct column_scalar_scatterer_impl { auto result = std::make_unique(target, stream, mr); auto result_view = result->mutable_view(); - using Type = device_storage_type_t; - // Use permutation iterator with constant index to dereference scalar data - auto scalar_impl = static_cast*>(&source.get()); + auto scalar_impl = static_cast*>(&source.get()); auto scalar_iter = thrust::make_permutation_iterator(scalar_impl->data(), thrust::make_constant_iterator(0)); @@ -117,7 +115,7 @@ struct column_scalar_scatterer_impl { scalar_iter, scalar_iter + scatter_rows, scatter_iter, - result_view.begin()); + result_view.begin()); return result; } @@ -300,14 +298,14 @@ std::unique_ptr
scatter(std::vector> target.begin(), result.begin(), [=](auto const& source_scalar, auto const& target_col) { - return type_dispatcher(target_col.type(), - scatter_functor, - source_scalar, - scatter_iter, - scatter_rows, - target_col, - stream, - mr); + return type_dispatcher(target_col.type(), + scatter_functor, + source_scalar, + scatter_iter, + scatter_rows, + target_col, + stream, + mr); }); scatter_scalar_bitmask(source, scatter_iter, scatter_rows, result, stream, mr); diff --git a/cpp/src/copying/shift.cu b/cpp/src/copying/shift.cu index 2b7a426bb73..cf85bf51e80 100644 --- a/cpp/src/copying/shift.cu +++ b/cpp/src/copying/shift.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -60,8 +60,7 @@ struct shift_functor { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - using Type = device_storage_type_t; - using ScalarType = cudf::scalar_type_t; + using ScalarType = cudf::scalar_type_t; auto& scalar = static_cast(fill_value); auto device_input = column_device_view::create(input); @@ -88,7 +87,7 @@ struct shift_functor { output->set_null_count(std::get<1>(mask_pair)); } - auto data = device_output->data(); + auto data = device_output->data(); // avoid assigning elements we know to be invalid. if (not scalar.is_valid()) { @@ -103,7 +102,7 @@ struct shift_functor { auto func_value = [size, offset, fill = scalar.data(), input = *device_input] __device__(size_type idx) { auto src_idx = idx - offset; - return out_of_bounds(size, src_idx) ? *fill : input.element(src_idx); + return out_of_bounds(size, src_idx) ? *fill : input.element(src_idx); }; thrust::transform(rmm::exec_policy(stream), index_begin, index_end, data, func_value); @@ -128,7 +127,8 @@ std::unique_ptr shift(column_view const& input, if (input.is_empty()) { return empty_like(input); } - return type_dispatcher(input.type(), shift_functor{}, input, offset, fill_value, stream, mr); + return type_dispatcher( + input.type(), shift_functor{}, input, offset, fill_value, stream, mr); } } // namespace detail diff --git a/cpp/src/dictionary/search.cu b/cpp/src/dictionary/search.cu index 56c2d32a71f..0aaf10707f4 100644 --- a/cpp/src/dictionary/search.cu +++ b/cpp/src/dictionary/search.cu @@ -74,18 +74,17 @@ struct find_index_fn { CUDF_EXPECTS(input.keys().type() == key.type(), "search key type must match dictionary keys type"); - using Type = device_storage_type_t; using ScalarType = cudf::scalar_type_t; auto find_key = static_cast(key).value(stream); auto keys_view = column_device_view::create(input.keys(), stream); auto iter = thrust::equal_range(thrust::device, // segfaults: rmm::exec_policy(stream) and // thrust::cuda::par.on(stream) - keys_view->begin(), - keys_view->end(), + keys_view->begin(), + keys_view->end(), find_key); return type_dispatcher(input.indices().type(), dispatch_scalar_index{}, - thrust::distance(keys_view->begin(), iter.first), + thrust::distance(keys_view->begin(), iter.first), (thrust::distance(iter.first, iter.second) > 0), stream, mr); @@ -135,15 +134,14 @@ struct find_insert_index_fn { CUDF_EXPECTS(input.keys().type() == key.type(), "search key type must match dictionary keys type"); - using Type = device_storage_type_t; using ScalarType = cudf::scalar_type_t; auto find_key = static_cast(key).value(stream); auto keys_view = column_device_view::create(input.keys(), stream); auto iter = thrust::lower_bound( - rmm::exec_policy(stream), keys_view->begin(), keys_view->end(), find_key); + rmm::exec_policy(stream), keys_view->begin(), keys_view->end(), find_key); return type_dispatcher(input.indices().type(), dispatch_scalar_index{}, - thrust::distance(keys_view->begin(), iter), + thrust::distance(keys_view->begin(), iter), true, stream, mr); @@ -171,7 +169,8 @@ std::unique_ptr get_index(dictionary_column_view const& dictionary, { if (dictionary.is_empty()) return std::make_unique>(0, false, stream, mr); - return type_dispatcher(dictionary.keys().type(), find_index_fn(), dictionary, key, stream, mr); + return type_dispatcher( + dictionary.keys().type(), find_index_fn(), dictionary, key, stream, mr); } std::unique_ptr get_insert_index(dictionary_column_view const& dictionary, @@ -181,7 +180,7 @@ std::unique_ptr get_insert_index(dictionary_column_view const& dictionar { if (dictionary.is_empty()) return std::make_unique>(0, false, stream, mr); - return type_dispatcher( + return type_dispatcher( dictionary.keys().type(), find_insert_index_fn(), dictionary, key, stream, mr); } diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu index 06f1a5dcab7..be13405b469 100644 --- a/cpp/src/merge/merge.cu +++ b/cpp/src/merge/merge.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -272,12 +272,10 @@ struct column_merger { // merged_col->set_null_count(lcol.null_count() + rcol.null_count()); - using Type = device_storage_type_t; - // to resolve view.data()'s types use: Element // - auto const d_lcol = lcol.data(); - auto const d_rcol = rcol.data(); + auto const d_lcol = lcol.data(); + auto const d_rcol = rcol.data(); // capture lcol, rcol // and "gather" into merged_view.data()[indx_merged] @@ -286,7 +284,7 @@ struct column_merger { thrust::transform(rmm::exec_policy(stream), row_order_.begin(), row_order_.end(), - merged_view.begin(), + merged_view.begin(), [d_lcol, d_rcol] __device__(index_type const& index_pair) { // When C++17, use structure bindings auto side = thrust::get<0>(index_pair); @@ -383,7 +381,7 @@ table_ptr_type merge(cudf::table_view const& left_table, right_table.begin(), std::back_inserter(merged_cols), [&](auto const& left_col, auto const& right_col) { - return cudf::type_dispatcher( + return cudf::type_dispatcher( left_col.type(), merger, left_col, right_col, stream, mr); }); diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu index 4d409edac65..46f00ecb75d 100644 --- a/cpp/src/partitioning/partitioning.cu +++ b/cpp/src/partitioning/partitioning.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -393,12 +393,10 @@ struct copy_block_partitions_dispatcher { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - using Type = device_storage_type_t; + rmm::device_buffer output(input.size() * sizeof(DataType), stream, mr); - rmm::device_buffer output(input.size() * sizeof(Type), stream, mr); - - copy_block_partitions_impl(input.data(), - static_cast(output.data()), + copy_block_partitions_impl(input.data(), + static_cast(output.data()), input.size(), num_partitions, row_partition_numbers, @@ -569,17 +567,17 @@ std::pair, std::vector> hash_partition_table( // Copy input to output by partition per column std::transform(input.begin(), input.end(), output_cols.begin(), [=](auto const& col) { - return cudf::type_dispatcher(col.type(), - copy_block_partitions_dispatcher{}, - col, - num_partitions, - row_partition_numbers_ptr, - row_partition_offset_ptr, - block_partition_sizes_ptr, - scanned_block_partition_sizes_ptr, - grid_size, - stream, - mr); + return cudf::type_dispatcher(col.type(), + copy_block_partitions_dispatcher{}, + col, + num_partitions, + row_partition_numbers_ptr, + row_partition_offset_ptr, + block_partition_sizes_ptr, + scanned_block_partition_sizes_ptr, + grid_size, + stream, + mr); }); if (has_nulls(input)) { diff --git a/cpp/src/reductions/scan.cu b/cpp/src/reductions/scan.cu index ca9ab82c27c..011b34031fe 100644 --- a/cpp/src/reductions/scan.cu +++ b/cpp/src/reductions/scan.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,7 +40,7 @@ namespace detail { * @tparam Op device binary operator */ template -struct ScanDispatcher { +struct scan_dispatcher { private: template static constexpr bool is_string_supported() @@ -213,12 +213,9 @@ struct ScanDispatcher { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto output = [&] { - using Type = device_storage_type_t; - return inclusive == scan_type::INCLUSIVE - ? inclusive_scan(input, null_handling, stream, mr) - : exclusive_scan(input, null_handling, stream, mr); - }(); + auto output = inclusive == scan_type::INCLUSIVE + ? inclusive_scan(input, null_handling, stream, mr) + : exclusive_scan(input, null_handling, stream, mr); if (null_handling == null_policy::EXCLUDE) { CUDF_EXPECTS(input.null_count() == output->null_count(), @@ -253,40 +250,40 @@ std::unique_ptr scan( switch (agg->kind) { case aggregation::SUM: - return cudf::type_dispatcher(input.type(), - ScanDispatcher(), - input, - inclusive, - null_handling, - stream, - mr); + return cudf::type_dispatcher(input.type(), + scan_dispatcher(), + input, + inclusive, + null_handling, + stream, + mr); case aggregation::MIN: - return cudf::type_dispatcher(input.type(), - ScanDispatcher(), - input, - inclusive, - null_handling, - stream, - mr); + return cudf::type_dispatcher(input.type(), + scan_dispatcher(), + input, + inclusive, + null_handling, + stream, + mr); case aggregation::MAX: - return cudf::type_dispatcher(input.type(), - ScanDispatcher(), - input, - inclusive, - null_handling, - stream, - mr); + return cudf::type_dispatcher(input.type(), + scan_dispatcher(), + input, + inclusive, + null_handling, + stream, + mr); case aggregation::PRODUCT: // a product scan on a decimal type with non-zero scale would result in each element having // a different scale, and because scale is stored once per column, this is not possible if (is_fixed_point(input.type())) CUDF_FAIL("decimal32/64 cannot support product scan"); - return cudf::type_dispatcher(input.type(), - ScanDispatcher(), - input, - inclusive, - null_handling, - stream, - mr); + return cudf::type_dispatcher(input.type(), + scan_dispatcher(), + input, + inclusive, + null_handling, + stream, + mr); default: CUDF_FAIL("Unsupported aggregation operator for scan"); } } diff --git a/cpp/src/replace/clamp.cu b/cpp/src/replace/clamp.cu index f63fe793a04..1f7e0672404 100644 --- a/cpp/src/replace/clamp.cu +++ b/cpp/src/replace/clamp.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -249,14 +249,12 @@ struct dispatch_clamp { { CUDF_EXPECTS(lo.type() == input.type(), "mismatching types of scalar and input"); - using Type = device_storage_type_t; + auto lo_itr = make_pair_iterator(lo); + auto hi_itr = make_pair_iterator(hi); + auto lo_replace_itr = make_pair_iterator(lo_replace); + auto hi_replace_itr = make_pair_iterator(hi_replace); - auto lo_itr = make_pair_iterator(lo); - auto hi_itr = make_pair_iterator(hi); - auto lo_replace_itr = make_pair_iterator(lo_replace); - auto hi_replace_itr = make_pair_iterator(hi_replace); - - return clamp(input, lo_itr, lo_replace_itr, hi_itr, hi_replace_itr, stream, mr); + return clamp(input, lo_itr, lo_replace_itr, hi_itr, hi_replace_itr, stream, mr); } }; @@ -322,15 +320,15 @@ std::unique_ptr dispatch_clamp::operator()( // call clamp with the scalar indexes and the matched indices auto matched_indices = matched_view.get_indices_annotated(); - auto new_indices = cudf::type_dispatcher(matched_indices.type(), - dispatch_clamp{}, - matched_indices, - *lo_index, - *lo_replace_index, - *hi_index, - *hi_replace_index, - stream, - mr); + auto new_indices = cudf::type_dispatcher(matched_indices.type(), + dispatch_clamp{}, + matched_indices, + *lo_index, + *lo_replace_index, + *hi_index, + *hi_replace_index, + stream, + mr); auto const indices_type = new_indices->type(); auto const output_size = new_indices->size(); @@ -387,7 +385,7 @@ std::unique_ptr clamp( CUDF_EXPECTS(hi_replace.is_valid(stream), "hi_replace can't be null if hi is not null"); } - return cudf::type_dispatcher( + return cudf::type_dispatcher( input.type(), dispatch_clamp{}, input, lo, lo_replace, hi, hi_replace, stream, mr); } diff --git a/cpp/src/replace/nulls.cu b/cpp/src/replace/nulls.cu index c78b1c67c04..50b1b7062d9 100644 --- a/cpp/src/replace/nulls.cu +++ b/cpp/src/replace/nulls.cu @@ -308,17 +308,16 @@ struct replace_nulls_scalar_kernel_forwarder { cudf::allocate_like(input, cudf::mask_allocation_policy::NEVER, mr); auto output_view = output->mutable_view(); - using Type = cudf::device_storage_type_t; using ScalarType = cudf::scalar_type_t; auto s1 = static_cast(replacement); auto device_in = cudf::column_device_view::create(input); - auto func = replace_nulls_functor{s1.data()}; + auto func = replace_nulls_functor{s1.data()}; thrust::transform(rmm::exec_policy(stream), - input.data(), - input.data() + input.size(), + input.data(), + input.data() + input.size(), cudf::detail::make_validity_iterator(*device_in), - output_view.data(), + output_view.data(), func); return output; } @@ -424,7 +423,6 @@ std::unique_ptr replace_nulls(cudf::column_view const& input, CUDF_EXPECTS(replacement.size() == input.size(), "Column size mismatch"); if (input.is_empty()) { return cudf::empty_like(input); } - if (!input.has_nulls()) { return std::make_unique(input); } return cudf::type_dispatcher( @@ -437,12 +435,11 @@ std::unique_ptr replace_nulls(cudf::column_view const& input, rmm::mr::device_memory_resource* mr) { if (input.is_empty()) { return cudf::empty_like(input); } - if (!input.has_nulls() || !replacement.is_valid()) { return std::make_unique(input, stream, mr); } - return cudf::type_dispatcher( + return cudf::type_dispatcher( input.type(), replace_nulls_scalar_kernel_forwarder{}, input, replacement, stream, mr); } @@ -452,7 +449,6 @@ std::unique_ptr replace_nulls(cudf::column_view const& input, rmm::mr::device_memory_resource* mr) { if (input.is_empty()) { return cudf::empty_like(input); } - if (!input.has_nulls()) { return std::make_unique(input, stream, mr); } return replace_nulls_policy_impl(input, replace_policy, stream, mr); diff --git a/cpp/src/replace/replace.cu b/cpp/src/replace/replace.cu index 6126b4824e5..783e0b4b1de 100644 --- a/cpp/src/replace/replace.cu +++ b/cpp/src/replace/replace.cu @@ -17,7 +17,7 @@ * limitations under the License. */ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -241,9 +241,7 @@ __global__ void replace_kernel(cudf::column_device_view input, cudf::column_device_view values_to_replace, cudf::column_device_view replacement) { - using Type = cudf::device_storage_type_t; - - Type* __restrict__ output_data = output.data(); + T* __restrict__ output_data = output.data(); cudf::size_type i = blockIdx.x * blockDim.x + threadIdx.x; @@ -260,12 +258,12 @@ __global__ void replace_kernel(cudf::column_device_view input, output_is_valid = input_is_valid; } if (input_is_valid) - thrust::tie(output_data[i], output_is_valid) = get_new_value( + thrust::tie(output_data[i], output_is_valid) = get_new_value( i, - input.data(), - values_to_replace.data(), - values_to_replace.data() + values_to_replace.size(), - replacement.data(), + input.data(), + values_to_replace.data(), + values_to_replace.data() + values_to_replace.size(), + replacement.data(), replacement.null_mask()); /* output valid counts calculations*/ @@ -461,7 +459,7 @@ std::unique_ptr replace_kernel_forwarder::operator()( indices_type, replace_kernel_forwarder{}, matched_view.get_indices_annotated(), @@ -502,13 +500,13 @@ std::unique_ptr find_and_replace_all(cudf::column_view const& inpu return std::make_unique(input_col); } - return cudf::type_dispatcher(input_col.type(), - replace_kernel_forwarder{}, - input_col, - values_to_replace, - replacement_values, - stream, - mr); + return cudf::type_dispatcher(input_col.type(), + replace_kernel_forwarder{}, + input_col, + values_to_replace, + replacement_values, + stream, + mr); } } // namespace detail diff --git a/cpp/src/reshape/interleave_columns.cu b/cpp/src/reshape/interleave_columns.cu index 25b245bee6b..3e2cb4ac02f 100644 --- a/cpp/src/reshape/interleave_columns.cu +++ b/cpp/src/reshape/interleave_columns.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -137,16 +137,14 @@ struct interleave_columns_functor { auto index_begin = thrust::make_counting_iterator(0); auto index_end = thrust::make_counting_iterator(output_size); - using Type = device_storage_type_t; - auto func_value = [input = *device_input, divisor = input.num_columns()] __device__(size_type idx) { - return input.column(idx % divisor).element(idx / divisor); + return input.column(idx % divisor).element(idx / divisor); }; if (not create_mask) { thrust::transform( - rmm::exec_policy(stream), index_begin, index_end, device_output->begin(), func_value); + rmm::exec_policy(stream), index_begin, index_end, device_output->begin(), func_value); return output; } @@ -159,7 +157,7 @@ struct interleave_columns_functor { thrust::transform_if(rmm::exec_policy(stream), index_begin, index_end, - device_output->begin(), + device_output->begin(), func_value, func_validity); @@ -193,12 +191,12 @@ std::unique_ptr interleave_columns(table_view const& input, auto const output_needs_mask = std::any_of( std::cbegin(input), std::cend(input), [](auto const& col) { return col.nullable(); }); - return type_dispatcher(dtype, - detail::interleave_columns_functor{}, - input, - output_needs_mask, - rmm::cuda_stream_default, - mr); + return type_dispatcher(dtype, + detail::interleave_columns_functor{}, + input, + output_needs_mask, + rmm::cuda_stream_default, + mr); } } // namespace cudf diff --git a/cpp/src/sort/sort.cu b/cpp/src/sort/sort.cu index c500b8ae49f..b6262a1848d 100644 --- a/cpp/src/sort/sort.cu +++ b/cpp/src/sort/sort.cu @@ -60,17 +60,10 @@ struct inplace_column_sort_fn { void operator()(mutable_column_view& col, bool ascending, rmm::cuda_stream_view stream) const { CUDF_EXPECTS(!col.has_nulls(), "Nulls not supported for in-place sort"); - using DeviceT = device_storage_type_t; if (ascending) { - thrust::sort(rmm::exec_policy(stream), - col.begin(), - col.end(), - thrust::less()); + thrust::sort(rmm::exec_policy(stream), col.begin(), col.end(), thrust::less()); } else { - thrust::sort(rmm::exec_policy(stream), - col.begin(), - col.end(), - thrust::greater()); + thrust::sort(rmm::exec_policy(stream), col.begin(), col.end(), thrust::greater()); } } @@ -95,7 +88,8 @@ std::unique_ptr
sort(table_view input, auto output = std::make_unique(input.column(0), stream, mr); auto view = output->mutable_view(); bool ascending = (column_order.empty() ? true : column_order.front() == order::ASCENDING); - cudf::type_dispatcher(output->type(), inplace_column_sort_fn{}, view, ascending, stream); + cudf::type_dispatcher( + output->type(), inplace_column_sort_fn{}, view, ascending, stream); std::vector> columns; columns.emplace_back(std::move(output)); return std::make_unique
(std::move(columns)); diff --git a/cpp/src/sort/sort_column.cu b/cpp/src/sort/sort_column.cu index 070aa6eae03..74c796e7962 100644 --- a/cpp/src/sort/sort_column.cu +++ b/cpp/src/sort/sort_column.cu @@ -54,19 +54,18 @@ struct column_sorted_order_fn { // But this also requires making a copy of the input data. auto temp_col = column(input, stream); auto d_col = temp_col.mutable_view(); - using DeviceT = device_storage_type_t; if (ascending) { thrust::sort_by_key(rmm::exec_policy(stream), - d_col.begin(), - d_col.end(), + d_col.begin(), + d_col.end(), indices.begin(), - thrust::less()); + thrust::less()); } else { thrust::sort_by_key(rmm::exec_policy(stream), - d_col.begin(), - d_col.end(), + d_col.begin(), + d_col.end(), indices.begin(), - thrust::greater()); + thrust::greater()); } } template ()>* = nullptr> @@ -132,13 +131,13 @@ std::unique_ptr sorted_order(column_view const& input, mutable_column_view indices_view = sorted_indices->mutable_view(); thrust::sequence( rmm::exec_policy(stream), indices_view.begin(), indices_view.end(), 0); - cudf::type_dispatcher(input.type(), - column_sorted_order_fn{}, - input, - indices_view, - column_order == order::ASCENDING, - null_precedence, - stream); + cudf::type_dispatcher(input.type(), + column_sorted_order_fn{}, + input, + indices_view, + column_order == order::ASCENDING, + null_precedence, + stream); return sorted_indices; } diff --git a/cpp/src/sort/stable_sort_column.cu b/cpp/src/sort/stable_sort_column.cu index abeaa7bef76..49aecf52625 100644 --- a/cpp/src/sort/stable_sort_column.cu +++ b/cpp/src/sort/stable_sort_column.cu @@ -35,11 +35,8 @@ struct column_stable_sorted_order_fn { { auto temp_col = column(input, stream); auto d_col = temp_col.mutable_view(); - using DeviceT = device_storage_type_t; - thrust::stable_sort_by_key(rmm::exec_policy(stream), - d_col.begin(), - d_col.end(), - indices.begin()); + thrust::stable_sort_by_key( + rmm::exec_policy(stream), d_col.begin(), d_col.end(), indices.begin()); } template ()>* = nullptr> void faster_stable_sort(column_view const&, mutable_column_view&, rmm::cuda_stream_view) @@ -103,13 +100,13 @@ std::unique_ptr sorted_order(column_view const& input, mutable_column_view indices_view = sorted_indices->mutable_view(); thrust::sequence( rmm::exec_policy(stream), indices_view.begin(), indices_view.end(), 0); - cudf::type_dispatcher(input.type(), - column_stable_sorted_order_fn{}, - input, - indices_view, - column_order == order::ASCENDING, - null_precedence, - stream); + cudf::type_dispatcher(input.type(), + column_stable_sorted_order_fn{}, + input, + indices_view, + column_order == order::ASCENDING, + null_precedence, + stream); return sorted_indices; }